From bdb47b4f0cf4d3d9d3b7903fd26966958629613a Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 30 Apr 2024 08:13:36 -0700 Subject: [PATCH 001/126] docs: Add hexagon sysemu docs Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- MAINTAINERS | 2 + docs/devel/hexagon-sys.rst | 106 +++++++++++++++++++++++++++++++++ docs/devel/index-internals.rst | 1 + docs/system/hexagon/cdsp.rst | 10 ++++ docs/system/target-hexagon.rst | 100 +++++++++++++++++++++++++++++++ docs/system/targets.rst | 1 + 6 files changed, 220 insertions(+) create mode 100644 docs/devel/hexagon-sys.rst create mode 100644 docs/system/hexagon/cdsp.rst create mode 100644 docs/system/target-hexagon.rst diff --git a/MAINTAINERS b/MAINTAINERS index 5df6020ed5454..c343ef6c13e09 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -240,6 +240,8 @@ F: disas/hexagon.c F: configs/targets/hexagon-linux-user/default.mak F: docker/dockerfiles/debian-hexagon-cross.docker F: gdb-xml/hexagon*.xml +F: docs/system/target-hexagon.rst +F: docs/devel/hexagon-sys.rst T: git https://github.com/quic/qemu.git hex-next Hexagon idef-parser diff --git a/docs/devel/hexagon-sys.rst b/docs/devel/hexagon-sys.rst new file mode 100644 index 0000000000000..3972261a2bbed --- /dev/null +++ b/docs/devel/hexagon-sys.rst @@ -0,0 +1,106 @@ +.. _Hexagon-System-arch: + +Hexagon System Architecture +=========================== + +The hexagon architecture has some unique elements which are described here. + +Interrupts +---------- +When interrupts arrive at a Hexagon DSP core, they are priority-steered to +be handled by an eligible hardware thread with the lowest priority. + +Memory +------ +Each hardware thread has an ``SSR.ASID`` field that contains its Address +Space Identifier. This value is catenated with a 32-bit virtual address - +the MMU can then resolve this extended virtual address to a physical address. + +TLBs +---- +The format of a TLB entry is shown below. + +.. note:: + The Small Core DSPs have a different TLB format which is not yet + supported. + +.. admonition:: Diagram + + .. code:: text + + 6 5 4 3 + 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |v|g|x|A|A| | | + |a|l|P|1|0| ASID | Virtual Page | + |l|b| | | | | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + 3 2 1 0 + 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | | | | | | | + |x|w|r|u|Cacheab| Physical Page |S| + | | | | | | | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + +* ASID: the address-space identifier +* A1, A0: the behavior of these cache line attributes are not modeled by QEMU. +* xP: the extra-physical bit is the most significant physical address bit. +* S: the S bit and the LSBs of the physical page indicate the page size +* val: this is the 'valid' bit, when set it indicates that page matching + should consider this entry. + +.. list-table:: Page sizes + :widths: 25 25 50 + :header-rows: 1 + + * - S-bit + - Phys page LSBs + - Page size + * - 1 + - N/A + - 4kb + * - 0 + - 0b1 + - 16kb + * - 0 + - 0b10 + - 64kb + * - 0 + - 0b100 + - 256kb + * - 0 + - 0b1000 + - 1MB + * - 0 + - 0b10000 + - 4MB + * - 0 + - 0b100000 + - 16MB + +* glb: if the global bit is set, the ASID is not considered when matching + TLBs. +* Cacheab: the cacheability attributes of TLBs are not modeled, these bits + are ignored. +* RWX: read-, write-, execute-, enable bits. Indicates if user programs + are permitted to read/write/execute the given page. +* U: indicates if user programs can access this page. + +Scheduler +--------- +The Hexagon system architecture has a feature to assist the guest OS +task scheduler. The guest OS can enable this feature by setting +``SCHEDCFG.EN``. The ``BESTWAIT`` register is programmed by the guest OS +to indicate the priority of the highest priority task waiting to run on a +hardware thread. The reschedule interrupt is triggered when any hardware +thread's priority in ``STID.PRIO`` is worse than the ``BESTWAIT``. When +it is triggered, the ``BESTWAIT.PRIO`` value is reset to 0x1ff. + +HVX Coprocessor +--------------- +The Supervisor Status Register field ``SSR.XA`` binds a DSP hardware thread +to one of the eight possible HVX contexts. The guest OS is responsible for +managing this resource. diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst index 7a0678cbdd3ab..0471db80645f7 100644 --- a/docs/devel/index-internals.rst +++ b/docs/devel/index-internals.rst @@ -14,6 +14,7 @@ Details about QEMU's various subsystems including how to add features to them. block-coroutine-wrapper clocks ebpf_rss + hexagon-sys migration/index multi-process reset diff --git a/docs/system/hexagon/cdsp.rst b/docs/system/hexagon/cdsp.rst new file mode 100644 index 0000000000000..f755fbe0a5bab --- /dev/null +++ b/docs/system/hexagon/cdsp.rst @@ -0,0 +1,10 @@ +Compute DSP +=========== + +A Hexagon CDSP is designed as a computation offload device for an SoC. The +``V66G_1024`` machine contains: + +* L2VIC interrupt controller +* QTimer timer device + +This machine will support any Hexagon CPU, but will default to ``v66``. diff --git a/docs/system/target-hexagon.rst b/docs/system/target-hexagon.rst new file mode 100644 index 0000000000000..e12a93d15d4f7 --- /dev/null +++ b/docs/system/target-hexagon.rst @@ -0,0 +1,100 @@ +.. _Hexagon-System-emulator: + +Hexagon System emulator +----------------------- + +Use the ``qemu-system-hexagon`` executable to simulate a 32-bit Hexagon +machine. + +Hexagon Machines +================ + +Hexagon DSPs are suited to various functions and generally appear in a +"DSP subsystem" of a larger system-on-chip (SoC). + +Hexagon DSPs are often included in a subsystem that looks like the diagram +below. Instructions are loaded into DDR before the DSP is brought out of +reset and the first instructions are fetched from DDR via the EVB/reset vector. + +In a real system, a TBU/SMMU would normally arbitrate AXI accesses but +we don't have a need to model that for QEMU. + +Hexagon DSP cores use simultaneous multithreading (SMT) with as many as 8 +hardware threads. + +.. admonition:: Diagram + + .. code:: text + + AHB (local) bus AXI (global) bus + │ │ + │ │ + ┌─────────┐ │ ┌─────────────────┐ │ + │ L2VIC ├──┤ │ │ │ + │ ├──┼───────► ├───────┤ + └─────▲───┘ │ │ Hexagon DSP │ │ + │ │ │ │ │ ┌─────┐ + │ │ │ N threads │ │ │ DDR │ + │ ├───────┤ │ │ │ │ + ┌────┴──┐ │ │ │ ├────────┤ │ + │QTimer ├───┤ │ │ │ │ │ + │ │ │ │ │ │ │ │ + └───────┘ │ │ ┌─────────┐ │ │ │ │ + │ │ ┌─────────┐│ │ │ │ │ + ┌───────┐ │ │ │ HVX xM ││ │ │ │ │ + │QDSP6SS├───┤ │ │ │┘ │ │ │ │ + └───────┘ │ │ └─────────┘ │ │ └─────┘ + │ │ │ │ + ┌───────┐ │ └─────────────────┘ │ + │ CSR ├───┤ + └───────┘ │ ┌──────┐ ┌───────────┐ + │ │ TCM │ │ VTCM │ + │ │ │ │ + └──────┘ │ │ + │ │ + │ │ + │ │ + └───────────┘ + +Components +---------- +Other than l2vic and HVX, the components below are not implemented in QEMU. + +* L2VIC: the L2 vectored interrupt controller. Supports 1024 input + interrupts, edge- or level-triggered. The core ISA has system registers + ``VID``, ``VID1`` which read through to the L2VIC device. +* QTimer: ARMSSE-based programmable timer device. Its interrupts are + wired to the L2VIC. System registers ``TIMER``, ``UTIMER`` read + through to the QTimer device. +* QDSP6SS: DSP subsystem features, accessible to the entire SoC, including + DSP NMI, watchdog, reset, etc. +* CSR: Configuration/Status Registers. +* TCM: DSP-exclusive tightly-coupled memory. This memory can be used for + DSPs when isolated from DDR and in some bootstrapping modes. +* VTCM: DSP-exclusive vector tightly-coupled memory. This memory is accessed + by some HVX instructions. +* HVX: the vector coprocessor supports 64 and 128-byte vector registers. + 64-byte mode is not implemented in QEMU. + + +Bootstrapping +------------- +Hexagon systems do not generally have access to a block device. So, for +QEMU the typical use case involves loading a binary or ELF file into memory +and executing from the indicated start address:: + + $ qemu-system-hexagon -kernel ./prog -append 'arg1 arg2' + +Semihosting +----------- +Hexagon supports a semihosting interface similar to other architectures'. +The ``trap0`` instruction can activate these semihosting calls so that the +guest software can access the host console and filesystem. Semihosting +is not yet implemented in QEMU hexagon. + + +Hexagon Features +================ +.. toctree:: + hexagon/cdsp + diff --git a/docs/system/targets.rst b/docs/system/targets.rst index 224fadae71c45..e6dcdb9d41610 100644 --- a/docs/system/targets.rst +++ b/docs/system/targets.rst @@ -29,3 +29,4 @@ Contents: target-sparc64 target-i386 target-xtensa + target-hexagon From 04c7ff3e16fc318a904f9bc20d16a5e7e344bc67 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Fri, 25 Oct 2024 23:20:05 -0500 Subject: [PATCH 002/126] docs/system: Add hexagon CPU emulation Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- docs/system/hexagon/emulation.rst | 16 ++++++++++++++++ docs/system/target-hexagon.rst | 1 + 2 files changed, 17 insertions(+) create mode 100644 docs/system/hexagon/emulation.rst diff --git a/docs/system/hexagon/emulation.rst b/docs/system/hexagon/emulation.rst new file mode 100644 index 0000000000000..03a6092a12816 --- /dev/null +++ b/docs/system/hexagon/emulation.rst @@ -0,0 +1,16 @@ +.. _Hexagon Emulation: + +Hexagon CPU architecture support +================================ + +QEMU's TCG emulation includes support for v65, v66, v67, v68, v69, v71, v73. +It also has support for the following architecture extensions: + +- HVX (Hexagon Vector eXtensions) + +For information on the specifics of the HVX extension, please refer +to the `Qualcomm Hexagon V69 HVX Programmer's Reference Manual +<https://docs.qualcomm.com/bundle/publicresource/80-N2040-49_REV_AA_Qualcomm_Hexagon_V69_HVX_ProgrammerS_Reference_Manual.pdf>`_. + +.. code-block:: bash + diff --git a/docs/system/target-hexagon.rst b/docs/system/target-hexagon.rst index e12a93d15d4f7..b2ffee91eb02e 100644 --- a/docs/system/target-hexagon.rst +++ b/docs/system/target-hexagon.rst @@ -96,5 +96,6 @@ is not yet implemented in QEMU hexagon. Hexagon Features ================ .. toctree:: + hexagon/emulation hexagon/cdsp From 5665b0e0195f606c7303d96053969ecb0832dbec Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Thu, 8 Aug 2024 11:17:57 -0700 Subject: [PATCH 003/126] target/hexagon: Fix badva reference, delete CAUSE The BADVA reg is referred to with the wrong identifier. The CAUSE reg field of SSR is not yet modeled, we will dump the SSR in a subsequent commit. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index a9beb9a17572e..1abc98b98d7af 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -216,8 +216,7 @@ static void hexagon_dump(CPUHexagonState *env, FILE *f, int flags) qemu_fprintf(f, " cs0 = 0x00000000\n"); qemu_fprintf(f, " cs1 = 0x00000000\n"); #else - print_reg(f, env, HEX_REG_CAUSE); - print_reg(f, env, HEX_REG_BADVA); + print_reg(f, env, HEX_SREG_BADVA); print_reg(f, env, HEX_REG_CS0); print_reg(f, env, HEX_REG_CS1); #endif From c943a9b2d5f1a0b28ccf63ab95c4d510d410aa41 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Fri, 17 May 2024 19:50:15 -0700 Subject: [PATCH 004/126] target/hexagon: Add missing A_CALL attr, hintjumpr to multi_cof Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/hex_common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index 758e5fd12dfed..e60e8efabc936 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -247,7 +247,11 @@ def need_next_PC(tag): def need_pkt_has_multi_cof(tag): - return "A_COF" in attribdict[tag] + if "A_JUMP" in attribdict[tag] or "A_CALL" in attribdict[tag]: + if tag == "J4_hintjumpr": + return False + return True + return False def need_pkt_need_commit(tag): From afb3b58b4e2b7701349b3310a1c21da276aa7496 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Fri, 17 May 2024 17:59:23 -0700 Subject: [PATCH 005/126] target/hexagon: Add System/Guest register definitions Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/gen_analyze_funcs.py | 21 +++- target/hexagon/hex_common.py | 163 ++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+), 3 deletions(-) diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py index 3ac7cc2cfe577..dfdf5f3b87ba5 100755 --- a/target/hexagon/gen_analyze_funcs.py +++ b/target/hexagon/gen_analyze_funcs.py @@ -22,6 +22,8 @@ import string import hex_common +def has_analyze_func(reg, mode): + return callable(getattr(reg, f"analyze_{mode}", None)) ## ## Generate the code to analyze the instruction @@ -42,6 +44,14 @@ def gen_analyze_func(f, tag, regs, imms): f.write(f"static void analyze_{tag}(DisasContext *ctx)\n") f.write("{\n") + if hex_common.tag_ignore(tag): + f.write("}\n\n") + return + + if ("A_PRIV" in hex_common.attribdict[tag] or + "A_GUEST" in hex_common.attribdict[tag]): + f.write("#ifndef CONFIG_USER_ONLY\n") + f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n") if (hex_common.is_hvx_insn(tag)): if hex_common.has_hvx_helper(tag): @@ -58,22 +68,27 @@ def gen_analyze_func(f, tag, regs, imms): for regno, register in enumerate(regs): reg_type, reg_id = register reg = hex_common.get_register(tag, reg_type, reg_id) - reg.decl_reg_num(f, regno) + if has_analyze_func(reg, "read") or has_analyze_func(reg, "write"): + reg.decl_reg_num(f, regno) ## Analyze the register reads for regno, register in enumerate(regs): reg_type, reg_id = register reg = hex_common.get_register(tag, reg_type, reg_id) - if reg.is_read(): + if reg.is_read() and has_analyze_func(reg, "read"): reg.analyze_read(f, regno) ## Analyze the register writes for regno, register in enumerate(regs): reg_type, reg_id = register reg = hex_common.get_register(tag, reg_type, reg_id) - if reg.is_written(): + if reg.is_written() and has_analyze_func(reg, "write"): reg.analyze_write(f, tag, regno) + if ("A_PRIV" in hex_common.attribdict[tag] or + "A_GUEST" in hex_common.attribdict[tag]): + f.write("#endif /* !CONFIG_USER_ONLY */\n") + f.write("}\n\n") diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index e60e8efabc936..b0c86cb3babd6 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -33,6 +33,41 @@ overrides = {} # tags with helper overrides idef_parser_enabled = {} # tags enabled for idef-parser + +def is_sysemu_tag(tag): + return "A_PRIV" in attribdict[tag] or "A_GUEST" in attribdict[tag] + + +def tag_ignore(tag): + tag_skips = ( + "Y6_diag", + "Y6_diag0", + "Y6_diag1", + ) + attr_skips = ( + "A_FAKEINSN", + "A_MAPPING", + ) + return tag in tag_skips or \ + any(attr in attribdict[tag] for attr in attr_skips) + + +def get_sys_tags(): + return sorted( + tag for tag in frozenset(tags) if is_sysemu_tag(tag) + ) + + +def get_user_tags(): + return sorted( + tag for tag in frozenset(tags) if not is_sysemu_tag(tag) + ) + + +def get_all_tags(): + return get_user_tags() + get_sys_tags() + + # We should do this as a hash for performance, # but to keep order let's keep it as a list. def uniquify(seq): @@ -370,12 +405,16 @@ def helper_proto_type(self): return "s32" def helper_arg_type(self): return "int32_t" + def is_pair(self): + return False class Pair(Scalar): def helper_proto_type(self): return "s64" def helper_arg_type(self): return "int64_t" + def is_pair(self): + return True class Hvx: def is_scalar_reg(self): @@ -1013,6 +1052,120 @@ def analyze_write(self, f, tag, regno): ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper); """)) +class GuestRegister(Register): + def gen_check_impl(self, f, regno): + if self.is_written(): + f.write(code_fmt(f"""\ + if (!greg_writable(insn->regno[{regno}], + {str(self.is_pair()).lower()})) {{ + return; + }} + """)) + else: + f.write(code_fmt(f"""\ +check_greg_impl(insn->regno[{regno}], {str(self.is_pair()).lower()}); + """)) + +class GuestDest(GuestRegister, Single, Dest): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno) + self.gen_check_impl(f, regno) + f.write(code_fmt(f"""\ + TCGv {self.reg_tcg()} = tcg_temp_new(); + gen_read_greg({self.reg_tcg()}, {self.reg_num}); + """)) + def log_write(self, f, tag): + f.write(code_fmt(f"""\ + gen_log_greg_write(ctx, {self.reg_num}, {self.reg_tcg()}); + """)) + def analyze_write(self, f, tag, regno): + f.write(code_fmt(f"""\ + ctx_log_greg_write(ctx, {self.reg_num}); + """)) + +class GuestSource(GuestRegister, Single, OldSource): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno); + self.gen_check_impl(f, regno) + f.write(code_fmt(f"""\ + TCGv {self.reg_tcg()} = tcg_temp_new(); + gen_read_greg({self.reg_tcg()}, {self.reg_num}); + """)) + +class GuestPairDest(GuestRegister, Pair, Dest): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno) + self.gen_check_impl(f, regno) + f.write(code_fmt(f"""\ + TCGv_i64 {self.reg_tcg()} = tcg_temp_new_i64(); + gen_read_greg_pair({self.reg_tcg()}, {self.reg_num}); + """)) + def log_write(self, f, tag): + f.write(code_fmt(f"""\ + gen_log_greg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()}); + """)) + def analyze_write(self, f, tag, regno): + f.write(code_fmt(f"""\ + ctx_log_greg_write_pair(ctx, {self.reg_num}); + """)) + +class GuestPairSource(GuestRegister, Pair, OldSource): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno) + self.gen_check_impl(f, regno) + f.write(code_fmt(f"""\ + TCGv_i64 {self.reg_tcg()} = tcg_temp_new_i64(); + gen_read_greg_pair({self.reg_tcg()}, {self.reg_num}); + """)) + +class SystemDest(Register, Single, Dest): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno) + f.write(code_fmt(f"""\ + TCGv {self.reg_tcg()} = tcg_temp_new(); + gen_read_sreg({self.reg_tcg()}, {self.reg_num}); + """)) + def log_write(self, f, tag): + f.write(code_fmt(f"""\ + gen_log_sreg_write(ctx, {self.reg_num}, {self.reg_tcg()}); + """)) + def analyze_write(self, f, tag, regno): + f.write(code_fmt(f"""\ + ctx_log_sreg_write(ctx, {self.reg_num}); + """)) + +class SystemSource(Register, Single, OldSource): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno); + f.write(code_fmt(f"""\ + TCGv {self.reg_tcg()} = tcg_temp_new(); + gen_read_sreg({self.reg_tcg()}, {self.reg_num}); + """)) + +class SystemPairDest(Register, Pair, Dest): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno) + f.write(code_fmt(f"""\ + TCGv_i64 {self.reg_tcg()} = tcg_temp_new_i64(); + gen_read_sreg_pair({self.reg_tcg()}, {self.reg_num}); + """)) + def log_write(self, f, tag): + f.write(code_fmt(f"""\ + gen_log_sreg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()}); + """)) + def analyze_write(self, f, tag, regno): + f.write(code_fmt(f"""\ + ctx_log_sreg_write_pair(ctx, {self.reg_num}); + """)) + +class SystemPairSource(Register, Pair, OldSource): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno) + f.write(code_fmt(f"""\ + TCGv_i64 {self.reg_tcg()} = tcg_temp_new_i64(); + gen_read_sreg_pair({self.reg_tcg()}, {self.reg_num}); + """)) + def init_registers(): regs = { GprDest("R", "d"), @@ -1059,6 +1212,16 @@ def init_registers(): QRegSource("Q", "u"), QRegSource("Q", "v"), QRegReadWrite("Q", "x"), + + # system regs + GuestDest("G", "d"), + GuestSource("G", "s"), + GuestPairDest("G", "dd"), + GuestPairSource("G", "ss"), + SystemDest("S", "d"), + SystemSource("S", "s"), + SystemPairDest("S", "dd"), + SystemPairSource("S", "ss"), } for reg in regs: registers[f"{reg.regtype}{reg.regid}"] = reg From 3e4cb9b925ac82067afa6ecb42aba7ffcf647cb2 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 20 May 2024 16:15:39 -0500 Subject: [PATCH 006/126] target/hexagon: Make gen_exception_end_tb non-static Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/translate.c | 9 ++++----- target/hexagon/translate.h | 2 ++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index fe7858703c8cb..2e9a934fc6c1a 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -185,13 +185,12 @@ static void gen_end_tb(DisasContext *ctx) ctx->base.is_jmp = DISAS_NORETURN; } -static void gen_exception_end_tb(DisasContext *ctx, int excp) +void hex_gen_exception_end_tb(DisasContext *ctx, int excp) { gen_exec_counters(ctx); tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC); gen_exception_raw(excp); ctx->base.is_jmp = DISAS_NORETURN; - } static int read_packet_words(CPUHexagonState *env, DisasContext *ctx, @@ -558,7 +557,7 @@ static void gen_insn(DisasContext *ctx) ctx->insn->generate(ctx); mark_store_width(ctx); } else { - gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_OPCODE); + hex_gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_OPCODE); } } @@ -912,7 +911,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx) nwords = read_packet_words(env, ctx, words); if (!nwords) { - gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET); + hex_gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET); return; } @@ -927,7 +926,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx) gen_commit_packet(ctx); ctx->base.pc_next += pkt.encod_pkt_size_in_bytes; } else { - gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET); + hex_gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET); } } diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index d251e2233fda7..2bd125297a820 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -281,6 +281,8 @@ extern TCGv hex_vstore_addr[VSTORES_MAX]; extern TCGv hex_vstore_size[VSTORES_MAX]; extern TCGv hex_vstore_pending[VSTORES_MAX]; +void hex_gen_exception_end_tb(DisasContext *ctx, int excp); + void process_store(DisasContext *ctx, int slot_num); FIELD(PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, 0, 2) From 669bdb46bd432ef7f5ec97dea6a3485ff61579c0 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Fri, 17 May 2024 18:56:07 -0700 Subject: [PATCH 007/126] target/hexagon: Switch to tag_ignore(), generate via get_{user,sys}_tags() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/gen_helper_funcs.py | 21 +++++++++------------ target/hexagon/gen_helper_protos.py | 23 ++++++++++++----------- target/hexagon/gen_idef_parser_funcs.py | 2 ++ target/hexagon/gen_op_attribs.py | 2 +- target/hexagon/gen_opcodes_def.py | 5 ++++- target/hexagon/gen_tcg_func_table.py | 14 ++------------ 6 files changed, 30 insertions(+), 37 deletions(-) diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py index c1f806ac4b25b..dd8ab60598557 100755 --- a/target/hexagon/gen_helper_funcs.py +++ b/target/hexagon/gen_helper_funcs.py @@ -109,26 +109,23 @@ def main(): tagimms = hex_common.get_tagimms() with open(args.out, "w") as f: - for tag in hex_common.tags: - ## Skip the priv instructions - if "A_PRIV" in hex_common.attribdict[tag]: + for tag in hex_common.get_user_tags(): + if hex_common.tag_ignore(tag): continue - ## Skip the guest instructions - if "A_GUEST" in hex_common.attribdict[tag]: - continue - ## Skip the diag instructions - if tag == "Y6_diag": - continue - if tag == "Y6_diag0": + if hex_common.skip_qemu_helper(tag): continue - if tag == "Y6_diag1": + if hex_common.is_idef_parser_enabled(tag): continue + gen_helper_function(f, tag, tagregs, tagimms) + + f.write("#if !defined(CONFIG_USER_ONLY)\n") + for tag in hex_common.get_sys_tags(): if hex_common.skip_qemu_helper(tag): continue if hex_common.is_idef_parser_enabled(tag): continue - gen_helper_function(f, tag, tagregs, tagimms) + f.write("#endif\n") if __name__ == "__main__": diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py index 77f8e0a6a322c..59c8bdd05c0f6 100755 --- a/target/hexagon/gen_helper_protos.py +++ b/target/hexagon/gen_helper_protos.py @@ -59,27 +59,28 @@ def main(): tagimms = hex_common.get_tagimms() with open(args.out, "w") as f: - for tag in hex_common.tags: - ## Skip the priv instructions - if "A_PRIV" in hex_common.attribdict[tag]: + for tag in hex_common.get_user_tags(): + if hex_common.tag_ignore(tag): continue - ## Skip the guest instructions - if "A_GUEST" in hex_common.attribdict[tag]: - continue - ## Skip the diag instructions - if tag == "Y6_diag": - continue - if tag == "Y6_diag0": + + if hex_common.skip_qemu_helper(tag): continue - if tag == "Y6_diag1": + if hex_common.is_idef_parser_enabled(tag): continue + gen_helper_prototype(f, tag, tagregs, tagimms) + + f.write("#if !defined(CONFIG_USER_ONLY)\n") + for tag in hex_common.get_sys_tags(): + if hex_common.tag_ignore(tag): + continue if hex_common.skip_qemu_helper(tag): continue if hex_common.is_idef_parser_enabled(tag): continue gen_helper_prototype(f, tag, tagregs, tagimms) + f.write("#endif\n") if __name__ == "__main__": diff --git a/target/hexagon/gen_idef_parser_funcs.py b/target/hexagon/gen_idef_parser_funcs.py index 2f6e826f76d60..32bce9b002863 100644 --- a/target/hexagon/gen_idef_parser_funcs.py +++ b/target/hexagon/gen_idef_parser_funcs.py @@ -60,6 +60,8 @@ def main(): f.write('#include "macros.h.inc"\n\n') for tag in hex_common.tags: + if hex_common.tag_ignore(tag): + continue ## Skip the priv instructions if "A_PRIV" in hex_common.attribdict[tag]: continue diff --git a/target/hexagon/gen_op_attribs.py b/target/hexagon/gen_op_attribs.py index bbbb02df3a23b..94dd1f876b21c 100755 --- a/target/hexagon/gen_op_attribs.py +++ b/target/hexagon/gen_op_attribs.py @@ -38,7 +38,7 @@ def main(): ## Generate all the attributes associated with each instruction ## with open(args.out, "w") as f: - for tag in hex_common.tags: + for tag in hex_common.get_all_tags(): f.write( f"OP_ATTRIB({tag},ATTRIBS(" f'{",".join(sorted(hex_common.attribdict[tag]))}))\n' diff --git a/target/hexagon/gen_opcodes_def.py b/target/hexagon/gen_opcodes_def.py index 94a19ff412e2e..17ba3f9db95e8 100755 --- a/target/hexagon/gen_opcodes_def.py +++ b/target/hexagon/gen_opcodes_def.py @@ -37,7 +37,10 @@ def main(): ## Generate a list of all the opcodes ## with open(args.out, "w") as f: - for tag in hex_common.tags: + for tag in hex_common.get_user_tags(): + f.write(f"OPCODE({tag}),\n") + + for tag in hex_common.get_sys_tags(): f.write(f"OPCODE({tag}),\n") diff --git a/target/hexagon/gen_tcg_func_table.py b/target/hexagon/gen_tcg_func_table.py index 299a39b1aa02b..70c8db5c44c88 100755 --- a/target/hexagon/gen_tcg_func_table.py +++ b/target/hexagon/gen_tcg_func_table.py @@ -41,19 +41,9 @@ def main(): f.write("#define HEXAGON_FUNC_TABLE_H\n\n") f.write("const SemanticInsn opcode_genptr[XX_LAST_OPCODE] = {\n") + for tag in hex_common.tags: - ## Skip the priv instructions - if "A_PRIV" in hex_common.attribdict[tag]: - continue - ## Skip the guest instructions - if "A_GUEST" in hex_common.attribdict[tag]: - continue - ## Skip the diag instructions - if tag == "Y6_diag": - continue - if tag == "Y6_diag0": - continue - if tag == "Y6_diag1": + if hex_common.tag_ignore(tag): continue f.write(f" [{tag}] = generate_{tag},\n") From 5aed2a2b666a4bfa317b2613dad3e317b0d208de Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Fri, 17 May 2024 19:07:25 -0700 Subject: [PATCH 008/126] target/hexagon: Add privilege check, use tag_ignore() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu_bits.h | 2 ++ target/hexagon/gen_tcg_funcs.py | 32 +++++++++++++++++++------------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/target/hexagon/cpu_bits.h b/target/hexagon/cpu_bits.h index ff596e2a94c98..6582bb4f16fc3 100644 --- a/target/hexagon/cpu_bits.h +++ b/target/hexagon/cpu_bits.h @@ -37,6 +37,8 @@ enum hex_cause { HEX_CAUSE_PC_NOT_ALIGNED = 0x01e, HEX_CAUSE_PRIV_NO_UREAD = 0x024, HEX_CAUSE_PRIV_NO_UWRITE = 0x025, + HEX_CAUSE_PRIV_USER_NO_GINSN = 0x01a, + HEX_CAUSE_PRIV_USER_NO_SINSN = 0x01b, }; #define PACKET_WORDS_MAX 4 diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index c2ba91ddc0444..65bfa046b8671 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -21,7 +21,7 @@ import re import string import hex_common - +from textwrap import dedent ## ## Generate the TCG code to call the helper @@ -50,6 +50,18 @@ def gen_tcg_func(f, tag, regs, imms): f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n") + if "A_PRIV" in hex_common.attribdict[tag]: + f.write(dedent("""\ +#ifdef CONFIG_USER_ONLY + hex_gen_exception_end_tb(ctx, HEX_CAUSE_PRIV_USER_NO_SINSN); +#else +""")) + if "A_GUEST" in hex_common.attribdict[tag]: + f.write(dedent("""\ +#ifdef CONFIG_USER_ONLY + hex_gen_exception_end_tb(ctx, HEX_CAUSE_PRIV_USER_NO_GINSN); +#else +""")) if hex_common.need_ea(tag): f.write(" TCGv EA G_GNUC_UNUSED = tcg_temp_new();\n") @@ -97,6 +109,11 @@ def gen_tcg_func(f, tag, regs, imms): if reg.is_written(): reg.log_write(f, tag) + if ( + "A_PRIV" in hex_common.attribdict[tag] + or "A_GUEST" in hex_common.attribdict[tag] + ): + f.write("#endif /* CONFIG_USER_ONLY */\n") f.write("}\n\n") @@ -121,18 +138,7 @@ def main(): f.write('#include "idef-generated-emitter.h.inc"\n\n') for tag in hex_common.tags: - ## Skip the priv instructions - if "A_PRIV" in hex_common.attribdict[tag]: - continue - ## Skip the guest instructions - if "A_GUEST" in hex_common.attribdict[tag]: - continue - ## Skip the diag instructions - if tag == "Y6_diag": - continue - if tag == "Y6_diag0": - continue - if tag == "Y6_diag1": + if hex_common.tag_ignore(tag): continue gen_def_tcg_func(f, tag, tagregs, tagimms) From 00f9aaf4e94578b431210ef7a663d2dca25747d1 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Sun, 19 May 2024 21:52:51 -0500 Subject: [PATCH 009/126] target/hexagon: Add memory order definition Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu-param.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/target/hexagon/cpu-param.h b/target/hexagon/cpu-param.h index 45ee7b46409c7..ccaf6a9d28d6f 100644 --- a/target/hexagon/cpu-param.h +++ b/target/hexagon/cpu-param.h @@ -23,4 +23,9 @@ #define TARGET_PHYS_ADDR_SPACE_BITS 36 #define TARGET_VIRT_ADDR_SPACE_BITS 32 +/* + * Hexagon processors have a strong memory model. + */ +#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL) + #endif From 762ed4e33da0b5d9dc9dbbabdd712bf1a730c1b9 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Sun, 19 May 2024 21:54:00 -0500 Subject: [PATCH 010/126] target/hexagon: Add a placeholder fp exception Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/arch.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index d053d6848715c..87c2f6a53f6ca 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -208,6 +208,11 @@ void arch_fpop_start(CPUHexagonState *env) * model it in qemu user mode. */ #define RAISE_FP_EXCEPTION do {} while (0) +#else + /* + * To be implemented. + */ +#define RAISE_FP_EXCEPTION do { g_assert_not_reached(); } while (0) #endif #define SOFTFLOAT_TEST_FLAG(FLAG, MYF, MYE) \ From f25012d280fcf9c921120ca80eb250a0cb99a8e9 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 20 May 2024 14:27:37 -0500 Subject: [PATCH 011/126] target/hexagon: Add guest, system reg number defs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These registers are defined in the Qualcomm Hexagon V71 Programmer's Reference Manual - https://docs.qualcomm.com/bundle/publicresource/80-N2040-51_REV_AB_Hexagon_V71_ProgrammerS_Reference_Manual.pdf Refer to §11.9.1 SYSTEM GUEST, §11.9.2 SYSTEM MONITOR. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.h | 5 ++ target/hexagon/hex_regs.h | 115 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index f78c8f9c2a006..5e15a8560a3b9 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -20,6 +20,11 @@ #include "fpu/softfloat-types.h" +#define NUM_GREGS 32 +#define GREG_WRITES_MAX 32 +#define NUM_SREGS 64 +#define SREG_WRITES_MAX 64 + #include "cpu-qom.h" #include "exec/cpu-defs.h" #include "hex_regs.h" diff --git a/target/hexagon/hex_regs.h b/target/hexagon/hex_regs.h index bddfc28021c60..ea8c62eba9ce5 100644 --- a/target/hexagon/hex_regs.h +++ b/target/hexagon/hex_regs.h @@ -81,4 +81,119 @@ enum { HEX_REG_UTIMERHI = 63, }; +#ifndef CONFIG_USER_ONLY + +#define HEX_GREG_VALUES \ + DECL_HEX_GREG(G0, 0) \ + DECL_HEX_GREG(GELR, 0) \ + DECL_HEX_GREG(G1, 1) \ + DECL_HEX_GREG(GSR, 1) \ + DECL_HEX_GREG(G2, 2) \ + DECL_HEX_GREG(GOSP, 2) \ + DECL_HEX_GREG(G3, 3) \ + DECL_HEX_GREG(GBADVA, 3) \ + DECL_HEX_GREG(GCYCLE_1T, 10) \ + DECL_HEX_GREG(GCYCLE_2T, 11) \ + DECL_HEX_GREG(GCYCLE_3T, 12) \ + DECL_HEX_GREG(GCYCLE_4T, 13) \ + DECL_HEX_GREG(GCYCLE_5T, 14) \ + DECL_HEX_GREG(GCYCLE_6T, 15) \ + DECL_HEX_GREG(GPMUCNT4, 16) \ + DECL_HEX_GREG(GPMUCNT5, 17) \ + DECL_HEX_GREG(GPMUCNT6, 18) \ + DECL_HEX_GREG(GPMUCNT7, 19) \ + DECL_HEX_GREG(GPCYCLELO, 24) \ + DECL_HEX_GREG(GPCYCLEHI, 25) \ + DECL_HEX_GREG(GPMUCNT0, 26) \ + DECL_HEX_GREG(GPMUCNT1, 27) \ + DECL_HEX_GREG(GPMUCNT2, 28) \ + DECL_HEX_GREG(GPMUCNT3, 29) \ + DECL_HEX_GREG_DONE + +#define DECL_HEX_GREG_DONE +#define DECL_HEX_GREG(name, val) HEX_GREG_ ##name = val, +enum hex_greg { + HEX_GREG_VALUES +}; +#undef DECL_HEX_GREG +#undef DECL_HEX_GREG_DONE + +#define DECL_HEX_GREG_DONE 0 +#define DECL_HEX_GREG(_, val) (1 << val) | +static inline bool greg_implemented(enum hex_greg greg) +{ +#if NUM_GREGS > 32 +#error "NUM_GREGS too large for greg_implemented(): update `impl_bitmap`" +#endif + static int32_t impl_bitmap = HEX_GREG_VALUES; + return impl_bitmap & (1 << greg); +} +#undef DECL_HEX_GREG +#undef DECL_HEX_GREG_DONE + +#endif /* CONFIG_USER_ONLY */ + +enum { + HEX_SREG_SGP0 = 0, + HEX_SREG_SGP1 = 1, + HEX_SREG_STID = 2, + HEX_SREG_ELR = 3, + HEX_SREG_BADVA0 = 4, + HEX_SREG_BADVA1 = 5, + HEX_SREG_SSR = 6, + HEX_SREG_CCR = 7, + HEX_SREG_HTID = 8, + HEX_SREG_BADVA = 9, + HEX_SREG_IMASK = 10, + HEX_SREG_GEVB = 11, + HEX_SREG_GLB_START = 16, + HEX_SREG_EVB = 16, + HEX_SREG_MODECTL = 17, + HEX_SREG_SYSCFG = 18, + HEX_SREG_IPENDAD = 20, + HEX_SREG_VID = 21, + HEX_SREG_VID1 = 22, + HEX_SREG_BESTWAIT = 23, + HEX_SREG_IEL = 24, + HEX_SREG_SCHEDCFG = 25, + HEX_SREG_IAHL = 26, + HEX_SREG_CFGBASE = 27, + HEX_SREG_DIAG = 28, + HEX_SREG_REV = 29, + HEX_SREG_PCYCLELO = 30, + HEX_SREG_PCYCLEHI = 31, + HEX_SREG_ISDBST = 32, + HEX_SREG_ISDBCFG0 = 33, + HEX_SREG_ISDBCFG1 = 34, + HEX_SREG_LIVELOCK = 35, + HEX_SREG_BRKPTPC0 = 36, + HEX_SREG_BRKPTCFG0 = 37, + HEX_SREG_BRKPTPC1 = 38, + HEX_SREG_BRKPTCFG1 = 39, + HEX_SREG_ISDBMBXIN = 40, + HEX_SREG_ISDBMBXOUT = 41, + HEX_SREG_ISDBEN = 42, + HEX_SREG_ISDBGPR = 43, + HEX_SREG_PMUCNT4 = 44, + HEX_SREG_PMUCNT5 = 45, + HEX_SREG_PMUCNT6 = 46, + HEX_SREG_PMUCNT7 = 47, + HEX_SREG_PMUCNT0 = 48, + HEX_SREG_PMUCNT1 = 49, + HEX_SREG_PMUCNT2 = 50, + HEX_SREG_PMUCNT3 = 51, + HEX_SREG_PMUEVTCFG = 52, + HEX_SREG_PMUSTID0 = 53, + HEX_SREG_PMUEVTCFG1 = 54, + HEX_SREG_PMUSTID1 = 55, + HEX_SREG_TIMERLO = 56, + HEX_SREG_TIMERHI = 57, + HEX_SREG_PMUCFG = 58, + HEX_SREG_S59 = 59, + HEX_SREG_S60 = 60, + HEX_SREG_S61 = 61, + HEX_SREG_S62 = 62, + HEX_SREG_S63 = 63, +}; + #endif From 4bab16a4227f1124be9e40f27e8d4d341220c9d7 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 28 May 2024 22:09:54 -0500 Subject: [PATCH 012/126] target/hexagon: Add guest, system reg number state Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 17 +++++++++++++++++ target/hexagon/cpu.h | 8 ++++++++ 2 files changed, 25 insertions(+) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 1abc98b98d7af..bf4a9bd6266e3 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -287,6 +287,14 @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type) set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status); /* Default NaN value: sign bit set, all frac bits set */ set_float_default_nan_pattern(0b11111111, &env->fp_status); + +#ifndef CONFIG_USER_ONLY + if (cs->cpu_index == 0) { + memset(env->g_sreg, 0, sizeof(target_ulong) * NUM_SREGS); + } + memset(env->t_sreg, 0, sizeof(target_ulong) * NUM_SREGS); + memset(env->greg, 0, sizeof(target_ulong) * NUM_GREGS); +#endif } static void hexagon_cpu_disas_set_info(CPUState *s, disassemble_info *info) @@ -313,6 +321,15 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp) qemu_init_vcpu(cs); cpu_reset(cs); +#ifndef CONFIG_USER_ONLY + if (cs->cpu_index == 0) { + env->g_sreg = g_new0(target_ulong, NUM_SREGS); + } else { + CPUState *cpu0 = qemu_get_cpu(0); + CPUHexagonState *env0 = cpu_env(cpu0); + env->g_sreg = env0->g_sreg; + } +#endif mcc->parent_realize(dev, errp); } diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 5e15a8560a3b9..5dde4f8e880cb 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -86,6 +86,14 @@ typedef struct CPUArchState { target_ulong stack_start; uint8_t slot_cancelled; + +#ifndef CONFIG_USER_ONLY + /* Some system registers are per thread and some are global. */ + target_ulong t_sreg[NUM_SREGS]; + target_ulong *g_sreg; + + target_ulong greg[NUM_GREGS]; +#endif target_ulong new_value_usr; MemLog mem_log_stores[STORES_MAX]; From e19dc3c37ebd5ce7ad8d99fa7ceaf52e1930880b Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 20 May 2024 17:28:56 -0500 Subject: [PATCH 013/126] target/hexagon: Add TCG values for sreg, greg Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/translate.c | 7 +++++++ target/hexagon/translate.h | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 2e9a934fc6c1a..71c137be308fe 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -61,6 +61,13 @@ TCGv hex_vstore_addr[VSTORES_MAX]; TCGv hex_vstore_size[VSTORES_MAX]; TCGv hex_vstore_pending[VSTORES_MAX]; +#ifndef CONFIG_USER_ONLY +TCGv hex_greg[NUM_GREGS]; +TCGv hex_t_sreg[NUM_SREGS]; +TCGv_ptr hex_g_sreg_ptr; +TCGv hex_g_sreg[NUM_SREGS]; +#endif + static const char * const hexagon_prednames[] = { "p0", "p1", "p2", "p3" }; diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 2bd125297a820..f611c854dcbd9 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -280,6 +280,13 @@ extern TCGv_i64 hex_llsc_val_i64; extern TCGv hex_vstore_addr[VSTORES_MAX]; extern TCGv hex_vstore_size[VSTORES_MAX]; extern TCGv hex_vstore_pending[VSTORES_MAX]; +#ifndef CONFIG_USER_ONLY +extern TCGv hex_greg[NUM_GREGS]; +extern TCGv hex_t_sreg[NUM_SREGS]; +extern TCGv_ptr hex_g_sreg_ptr; +extern TCGv hex_g_sreg[NUM_SREGS]; +#endif + void hex_gen_exception_end_tb(DisasContext *ctx, int excp); From a33f529166132960370f55952512a85693b35874 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 20 May 2024 16:27:30 -0500 Subject: [PATCH 014/126] target/hexagon: Add guest/sys reg writes to DisasContext Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/translate.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index f611c854dcbd9..0eaa3db03e815 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -39,6 +39,14 @@ typedef struct DisasContext { int reg_log_idx; DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS); DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS); +#ifndef CONFIG_USER_ONLY + int greg_log[GREG_WRITES_MAX]; + int greg_log_idx; + int sreg_log[SREG_WRITES_MAX]; + int sreg_log_idx; + TCGv t_sreg_new_value[NUM_SREGS]; + TCGv greg_new_value[NUM_GREGS]; +#endif int preg_log[PRED_WRITES_MAX]; int preg_log_idx; DECLARE_BITMAP(pregs_written, NUM_PREGS); @@ -79,6 +87,34 @@ typedef struct DisasContext { bool is_gather_store_insn(DisasContext *ctx); +#ifndef CONFIG_USER_ONLY +static inline void ctx_log_greg_write(DisasContext *ctx, int rnum) +{ + if (rnum <= HEX_GREG_G3) { + ctx->greg_log[ctx->greg_log_idx] = rnum; + ctx->greg_log_idx++; + } +} + +static inline void ctx_log_greg_write_pair(DisasContext *ctx, int rnum) +{ + ctx_log_greg_write(ctx, rnum); + ctx_log_greg_write(ctx, rnum + 1); +} + +static inline void ctx_log_sreg_write(DisasContext *ctx, int rnum) +{ + ctx->sreg_log[ctx->sreg_log_idx] = rnum; + ctx->sreg_log_idx++; +} + +static inline void ctx_log_sreg_write_pair(DisasContext *ctx, int rnum) +{ + ctx_log_sreg_write(ctx, rnum); + ctx_log_sreg_write(ctx, rnum + 1); +} +#endif + static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) { if (!test_bit(pnum, ctx->pregs_written)) { From 203d40c82d7041a5565de0784c7281b2d6aa253f Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 20 May 2024 17:04:25 -0500 Subject: [PATCH 015/126] target/hexagon: Add imported macro, attr defs for sysemu Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/attribs_def.h.inc | 414 +++++++++++++++++++-- target/hexagon/imported/macros.def | 558 +++++++++++++++++++++++++++++ 2 files changed, 942 insertions(+), 30 deletions(-) mode change 100755 => 100644 target/hexagon/imported/macros.def diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc index 9e3a05f882810..e6523a739b103 100644 --- a/target/hexagon/attribs_def.h.inc +++ b/target/hexagon/attribs_def.h.inc @@ -19,20 +19,41 @@ DEF_ATTRIB(AA_DUMMY, "Dummy Zeroth Attribute", "", "") /* Misc */ +DEF_ATTRIB(FAKEINSN, "Not a real instruction", "", "") +DEF_ATTRIB(MAPPING, "Not real -- asm mapped", "", "") +DEF_ATTRIB(CONDMAPPING, "Not real -- mapped based on values", "", "") DEF_ATTRIB(EXTENSION, "Extension instruction", "", "") +DEF_ATTRIB(SHARED_EXTENSION, "Shared extension instruction", "", "") +DEF_ATTRIB(CABAC, + "Cabac Instruction. Used in conjuction with QDSP6_CABAC_PRESENT", "", + "") +DEF_ATTRIB(EXPERIMENTAL, "This may not work correctly not supported by RTL.", + "", "") DEF_ATTRIB(PRIV, "Not available in user or guest mode", "", "") DEF_ATTRIB(GUEST, "Not available in user mode", "", "") DEF_ATTRIB(FPOP, "Floating Point Operation", "", "") +DEF_ATTRIB(FPDOUBLE, "Double-precision Floating Point Operation", "", "") +DEF_ATTRIB(FPSINGLE, "Single-precision Floating Point Operation", "", "") +DEF_ATTRIB(SFMAKE, "Single Float Make", "", "") +DEF_ATTRIB(DFMAKE, "Single Float Make", "", "") + +DEF_ATTRIB(NO_TIMING_LOG, "Does not get logged to the timing model", "", "") DEF_ATTRIB(EXTENDABLE, "Immediate may be extended", "", "") +DEF_ATTRIB(EXT_UPPER_IMMED, "Extend upper case immediate", "", "") +DEF_ATTRIB(EXT_LOWER_IMMED, "Extend lower case immediate", "", "") +DEF_ATTRIB(MUST_EXTEND, "Immediate must be extended", "", "") +DEF_ATTRIB(NA_NT, "Non-Allocating Non-Temporal instruction", "", "") +DEF_ATTRIB(INVPRED, "The predicate is inverted for true/false sense", "", "") DEF_ATTRIB(ARCHV2, "V2 architecture", "", "") DEF_ATTRIB(ARCHV3, "V3 architecture", "", "") DEF_ATTRIB(ARCHV4, "V4 architecture", "", "") DEF_ATTRIB(ARCHV5, "V5 architecture", "", "") +DEF_ATTRIB(PACKED, "Packable instruction", "", "") DEF_ATTRIB(SUBINSN, "sub-instruction", "", "") /* Load and Store attributes */ @@ -46,21 +67,48 @@ DEF_ATTRIB(MEMSIZE_4B, "Memory width is 4 bytes", "", "") DEF_ATTRIB(MEMSIZE_8B, "Memory width is 8 bytes", "", "") DEF_ATTRIB(SCALAR_LOAD, "Load is scalar", "", "") DEF_ATTRIB(SCALAR_STORE, "Store is scalar", "", "") -DEF_ATTRIB(REGWRSIZE_1B, "Memory width is 1 byte", "", "") -DEF_ATTRIB(REGWRSIZE_2B, "Memory width is 2 bytes", "", "") -DEF_ATTRIB(REGWRSIZE_4B, "Memory width is 4 bytes", "", "") -DEF_ATTRIB(REGWRSIZE_8B, "Memory width is 8 bytes", "", "") +DEF_ATTRIB(REGWRSIZE_1B, "ETM Memory width is 1 byte", "", "") +DEF_ATTRIB(REGWRSIZE_2B, "ETM Memory width is 2 bytes", "", "") +DEF_ATTRIB(REGWRSIZE_4B, "ETM Memory width is 4 bytes", "", "") +DEF_ATTRIB(REGWRSIZE_8B, "ETM Memory width is 8 bytes", "", "") DEF_ATTRIB(MEMLIKE, "Memory-like instruction", "", "") DEF_ATTRIB(MEMLIKE_PACKET_RULES, "follows Memory-like packet rules", "", "") +DEF_ATTRIB(CACHEOP, "Cache operation", "", "") +DEF_ATTRIB(COPBYADDRESS, "Cache operation by address", "", "") +DEF_ATTRIB(COPBYIDX, "Cache operation by index", "", "") DEF_ATTRIB(RELEASE, "Releases a lock", "", "") DEF_ATTRIB(ACQUIRE, "Acquires a lock", "", "") +DEF_ATTRIB(LLSC, "load-locked/store-conditional instruction", "", "") DEF_ATTRIB(RLS_INNER, "Store release inner visibility", "", "") +DEF_ATTRIB(RLS_OUTER, "Store release outer visibility", "", "") DEF_ATTRIB(RLS_ALL_THREAD, "Store release among all threads", "", "") DEF_ATTRIB(RLS_SAME_THREAD, "Store release with the same thread", "", "") +/* Load and Store Addressing Mode Attributes */ +DEF_ATTRIB(EA_REG_ONLY, "EA = input register only", "", "") +DEF_ATTRIB(EA_IMM_ONLY, "EA = immediate only", "", "") +DEF_ATTRIB(EA_REG_PLUS_IMM, "EA = register plus immediate", "", "") +DEF_ATTRIB(EA_REG_PLUS_REGSCALED, "EA = register plus scaled register", "", "") +DEF_ATTRIB(EA_IMM_PLUS_REGSCALED, "EA = immediate plus scaled register", "", "") +DEF_ATTRIB(EA_BREV_REG, "EA = bit-reversed input register", "", "") +DEF_ATTRIB(EA_GP_IMM, "EA = GP plus immediate (unless extended)", "", "") +DEF_ATTRIB(EA_PAGECROSS, "EA calculation can have a Page Cross Stall", "", "") + +DEF_ATTRIB(PM_ANY, "Post Modify", "", "") +DEF_ATTRIB(PM_I, "Post Modify by Immediate", "", "") +DEF_ATTRIB(PM_M, "Post Modify by M register", "", "") +DEF_ATTRIB(PM_CIRI, "Post Modify with Circular Addressing by immediate", "", "") +DEF_ATTRIB(PM_CIRR, "Post Modify with Circular Addressing by I field", "", "") + +DEF_ATTRIB(VMEM, "VMEM-type", "", "") +DEF_ATTRIB(VBUF, "Touches the VBUF", "", "") +DEF_ATTRIB(VDBG, "Vector debugging instruction", "", "") + /* V6 Vector attributes */ DEF_ATTRIB(CVI, "Executes on the HVX extension", "", "") +DEF_ATTRIB(NT_VMEM, "Non-temporal memory access", "", "") +DEF_ATTRIB(VMEMU, "Unaligned memory access", "", "") DEF_ATTRIB(CVI_NEW, "New value memory instruction executes on HVX", "", "") DEF_ATTRIB(CVI_VM, "Memory instruction executes on HVX", "", "") @@ -69,109 +117,415 @@ DEF_ATTRIB(CVI_VP_VS, "Double vector permute/shft insn executes on HVX", "", "") DEF_ATTRIB(CVI_VX, "Multiply instruction executes on HVX", "", "") DEF_ATTRIB(CVI_VX_DV, "Double vector multiply insn executes on HVX", "", "") DEF_ATTRIB(CVI_VS, "Shift instruction executes on HVX", "", "") -DEF_ATTRIB(CVI_VS_3SRC, "This shift needs to borrow a source register", "", "") +DEF_ATTRIB( + CVI_VS_3SRC, + "This shift instruction needs to borrow a source register from the VP slot", + "", "") DEF_ATTRIB(CVI_VS_VX, "Permute/shift and multiply insn executes on HVX", "", "") DEF_ATTRIB(CVI_VA, "ALU instruction executes on HVX", "", "") +DEF_ATTRIB(CVI_VA_2SRC, + "This alu instruction executes on multimedia vector engine and " + "requires two vectro sources", + "", "") DEF_ATTRIB(CVI_VA_DV, "Double vector alu instruction executes on HVX", "", "") DEF_ATTRIB(CVI_4SLOT, "Consumes all the vector execution resources", "", "") DEF_ATTRIB(CVI_TMP, "Transient Memory Load not written to register", "", "") DEF_ATTRIB(CVI_REMAP, "Register Renaming not written to register file", "", "") +DEF_ATTRIB(CVI_TMP_SRC, "Transient reassign", "", "") +DEF_ATTRIB(CVI_EXTRACT, "HVX Extract Instruction that goes through L2", "", "") +DEF_ATTRIB(CVI_EARLY, "HVX instructions that require early sources", "", "") +DEF_ATTRIB(CVI_LATE, "HVX insn that always require late sources", "", "") +DEF_ATTRIB(CVI_VV_LATE, "HVX insn that always require late Vv source", "", "") +DEF_ATTRIB(CVI_REQUIRES_TMPLOAD, ".tmp load must be included in packet", "", "") +DEF_ATTRIB(CVI_PUMP_2X, "Goes through the pipeline twice", "", "") +DEF_ATTRIB(CVI_PUMP_4X, "Goes through the pipeline four times", "", "") DEF_ATTRIB(CVI_GATHER, "CVI Gather operation", "", "") DEF_ATTRIB(CVI_SCATTER, "CVI Scatter operation", "", "") DEF_ATTRIB(CVI_SCATTER_RELEASE, "CVI Store Release for scatter", "", "") +DEF_ATTRIB(CVI_GATHER_RELEASE, "CVI Store Release for gather", "", "") DEF_ATTRIB(CVI_TMP_DST, "CVI instruction that doesn't write a register", "", "") +DEF_ATTRIB(CVI_SCATTER_WORD_ACC, "CVI Scatter Word Accum (second pass)", "", "") +DEF_ATTRIB(CVI_SCATTER_ACC, "CVI Scatter Accumulate", "", "") +DEF_ATTRIB(CVI_VX_VSRC0_IS_DST, + "For the assembler to handle the special case of non-linear " + "instructions with Vxx specified both as src and dst in syntax ", + "", "") + +DEF_ATTRIB(CVI_VX_ACC_FWD, "VX Accumulator Forwarding", "", "") + +DEF_ATTRIB(CVI_VX_NO_TMP_LD, + "VX Accumulator renaming not allowed from tmp load instruction", "", + "") + +DEF_ATTRIB(RESTRICT_CVI_NOVP, + "Instructions with this attribute are assigned to the original " + "shift unit and can not be assigned to the shift/permute unit", + "", "") + +DEF_ATTRIB(CVI_GATHER_ADDR_2B, "CVI Scatter/Gather address is halfword", "", "") +DEF_ATTRIB(CVI_GATHER_ADDR_4B, "CVI Scatter/Gather address is word", "", "") + +DEF_ATTRIB(VFETCH, "memory fetch op to L2 for a single vector", "", "") + DEF_ATTRIB(CVI_SLOT23, "Can execute in slot 2 or slot 3 (HVX)", "", "") -DEF_ATTRIB(VTCM_ALLBANK_ACCESS, "Allocates in all VTCM schedulers.", "", "") +DEF_ATTRIB(HVX_FLT, "This a floating point HVX instruction.", "", "") + +DEF_ATTRIB( + VTCM_ALLBANK_ACCESS, + "This instruction allocates in all VTCM schedulers due to a region access.", + "", "") +DEF_ATTRIB(XUMINOR, "XU minor SMTable instruction", "", "") + +DEF_ATTRIB(SYNC_MARKER, "This instruction needs a sync marker.", "", "") + /* Change-of-flow attributes */ DEF_ATTRIB(JUMP, "Jump-type instruction", "", "") +DEF_ATTRIB(DIRECT, "Uses an PC-relative immediate field", "", "") DEF_ATTRIB(INDIRECT, "Absolute register jump", "", "") +DEF_ATTRIB(CJUMP, "Conditional jump", "", "") DEF_ATTRIB(CALL, "Function call instruction", "", "") +DEF_ATTRIB(RET, "Function return instruction", "", "") +DEF_ATTRIB(PERM, "Permute instruction", "", "") DEF_ATTRIB(COF, "Change-of-flow instruction", "", "") DEF_ATTRIB(HINTED_COF, "This instruction is a hinted change-of-flow", "", "") DEF_ATTRIB(CONDEXEC, "May be cancelled by a predicate", "", "") +DEF_ATTRIB(DOTOLD, "Uses a predicate generated in a previous packet", "", "") +DEF_ATTRIB(DOTNEW, "Uses a predicate generated in the same packet", "", "") DEF_ATTRIB(DOTNEWVALUE, "Uses a register value generated in this pkt", "", "") DEF_ATTRIB(NEWCMPJUMP, "Compound compare and jump", "", "") DEF_ATTRIB(NVSTORE, "New-value store", "", "") DEF_ATTRIB(MEMOP, "memop", "", "") -DEF_ATTRIB(ROPS_2, "Compound instruction worth 2 RISC-ops", "", "") -DEF_ATTRIB(ROPS_3, "Compound instruction worth 3 RISC-ops", "", "") +DEF_ATTRIB(ROPS_2, "Compound instruction worth 2 wimpy RISC-ops", "", "") +DEF_ATTRIB(ROPS_3, "Compound instruction worth 3 wimpy RISC-ops", "", "") /* access to implicit registers */ DEF_ATTRIB(IMPLICIT_WRITES_LR, "Writes the link register", "", "UREG.LR") +DEF_ATTRIB(IMPLICIT_READS_LR, "Reads the link register", "UREG.LR", "") +DEF_ATTRIB(IMPLICIT_READS_LC0, "Reads loop count for loop 0", "UREG.LC0", "") +DEF_ATTRIB(IMPLICIT_READS_LC1, "Reads loop count for loop 1", "UREG.LC1", "") +DEF_ATTRIB(IMPLICIT_READS_SA0, "Reads start address for loop 0", "UREG.SA0", "") +DEF_ATTRIB(IMPLICIT_READS_SA1, "Reads start address for loop 1", "UREG.SA1", "") +DEF_ATTRIB(IMPLICIT_WRITES_PC, "Writes the program counter", "", "UREG.PC") +DEF_ATTRIB(IMPLICIT_READS_PC, "Reads the program counter", "UREG.PC", "") DEF_ATTRIB(IMPLICIT_WRITES_SP, "Writes the stack pointer", "", "UREG.SP") +DEF_ATTRIB(IMPLICIT_READS_SP, "Reads the stack pointer", "UREG.SP", "") DEF_ATTRIB(IMPLICIT_WRITES_FP, "Writes the frame pointer", "", "UREG.FP") +DEF_ATTRIB(IMPLICIT_READS_FP, "Reads the frame pointer", "UREG.FP", "") +DEF_ATTRIB(IMPLICIT_WRITES_GP, "Writes the GP register", "", "UREG.GP") +DEF_ATTRIB(IMPLICIT_READS_GP, "Reads the GP register", "UREG.GP", "") DEF_ATTRIB(IMPLICIT_WRITES_LC0, "Writes loop count for loop 0", "", "UREG.LC0") DEF_ATTRIB(IMPLICIT_WRITES_LC1, "Writes loop count for loop 1", "", "UREG.LC1") DEF_ATTRIB(IMPLICIT_WRITES_SA0, "Writes start addr for loop 0", "", "UREG.SA0") DEF_ATTRIB(IMPLICIT_WRITES_SA1, "Writes start addr for loop 1", "", "UREG.SA1") +DEF_ATTRIB(IMPLICIT_WRITES_R00, "Writes Register 0", "", "UREG.R00") DEF_ATTRIB(IMPLICIT_WRITES_P0, "Writes Predicate 0", "", "UREG.P0") DEF_ATTRIB(IMPLICIT_WRITES_P1, "Writes Predicate 1", "", "UREG.P1") DEF_ATTRIB(IMPLICIT_WRITES_P2, "Writes Predicate 1", "", "UREG.P2") DEF_ATTRIB(IMPLICIT_WRITES_P3, "May write Predicate 3", "", "UREG.P3") -DEF_ATTRIB(IMPLICIT_READS_PC, "Reads the PC register", "", "") -DEF_ATTRIB(IMPLICIT_READS_P0, "Reads the P0 register", "", "") -DEF_ATTRIB(IMPLICIT_READS_P1, "Reads the P1 register", "", "") -DEF_ATTRIB(IMPLICIT_READS_P2, "Reads the P2 register", "", "") -DEF_ATTRIB(IMPLICIT_READS_P3, "Reads the P3 register", "", "") +DEF_ATTRIB(IMPLICIT_READS_R00, "Reads Register 0", "UREG.R00", "") +DEF_ATTRIB(IMPLICIT_READS_P0, "Reads Predicate 0", "UREG.P0", "") +DEF_ATTRIB(IMPLICIT_READS_P1, "Reads Predicate 1", "UREG.P1", "") +DEF_ATTRIB(IMPLICIT_READS_P3, "Reads Predicate 3", "UREG.P3", "") +DEF_ATTRIB(IMPLICIT_READS_Q3, "Reads Vector Predicate 3", "UREG.Q3", "") +DEF_ATTRIB(IMPLICIT_READS_CS, "Reads the CS/M register", "UREG.CS", "") +DEF_ATTRIB(IMPLICIT_READS_FRAMEKEY, "Reads FRAMEKEY", "UREG.FRAMEKEY", "") +DEF_ATTRIB(IMPLICIT_READS_FRAMELIMIT, "Reads FRAMELIMIT", "UREG.FRAMELIMIT", "") +DEF_ATTRIB(IMPLICIT_READS_ELR, "Reads the ELR register", "MREG.ELR", "") +DEF_ATTRIB(IMPLICIT_READS_SGP0, "Reads the SGP0 register", "MREG.SGP0", "") +DEF_ATTRIB(IMPLICIT_READS_SGP1, "Reads the SGP1 register", "MREG.SGP1", "") +DEF_ATTRIB(IMPLICIT_WRITES_SGP0, "Reads the SGP0 register", "", "MREG.SGP0") +DEF_ATTRIB(IMPLICIT_WRITES_SGP1, "Reads the SGP1 register", "", "MREG.SGP1") +DEF_ATTRIB(IMPLICIT_WRITES_STID_PRIO_ANYTHREAD, "Reads", "", "MREG.STID.PRIO") +DEF_ATTRIB(IMPLICIT_WRITES_SRBIT, "Writes the OVF bit", "", "UREG.SR.OVF") +DEF_ATTRIB(IMPLICIT_WRITES_FPFLAGS, "May write FP flags", "", "UREG.SR.FPFLAGS") +DEF_ATTRIB(IMPLICIT_WRITES_LPCFG, "Writes the loop config", "", "UREG.SR.LPCFG") +DEF_ATTRIB(IMPLICIT_WRITES_CVBITS, "Writes the CV flags", "", "UREG.SR.CV") +DEF_ATTRIB(IMPLICIT_READS_FPRND, "May read FP rnd mode", "UREG.SR.FPRND", "") +DEF_ATTRIB(IMPLICIT_READS_SSR, "May read SSR values", "MREG.SSR", "") +DEF_ATTRIB(IMPLICIT_READS_CCR, "May read CCR values", "MREG.CCR", "") +DEF_ATTRIB(IMPLICIT_WRITES_CCR, "May write CCR values", "", "MREG.CCR") +DEF_ATTRIB(IMPLICIT_WRITES_SSR, "May write SSR values", "", "MREG.SSR") +DEF_ATTRIB(IMPLICIT_READS_GELR, "May read GELR values", "GREG.GELR", "") +DEF_ATTRIB(IMPLICIT_READS_GEVB, "May read GEVB values", "MREG.GEVB", "") +DEF_ATTRIB(IMPLICIT_READS_GSR, "May read GSR values", "GREG.GSR", "") +DEF_ATTRIB(IMPLICIT_READS_GOSP, "May read GOSP values", "GREG.GOSP", "") +DEF_ATTRIB(IMPLICIT_WRITES_GELR, "May write GELR values", "", "GREG.GELR") +DEF_ATTRIB(IMPLICIT_WRITES_GSR, "May write GSR values", "", "GREG.GSR") +DEF_ATTRIB(IMPLICIT_WRITES_GOSP, "May write GOSP values", "", "GREG.GOSP") +DEF_ATTRIB(IMPLICIT_READS_IPENDAD_IPEND, "May read", "MREG.IPENDAD.IPEND", "") +DEF_ATTRIB(IMPLICIT_WRITES_IPENDAD_IPEND, "May write", "", "MREG.IPENDAD.IPEND") +DEF_ATTRIB(IMPLICIT_READS_IPENDAD_IAD, "May read", "MREG.IPENDAD.IAD", "") +DEF_ATTRIB(IMPLICIT_WRITES_IPENDAD_IAD, "May write", "", "MREG.IPENDAD.IAD") +DEF_ATTRIB(IMPLICIT_WRITES_IMASK_ANYTHREAD, "May write", "", "MREG.IMASK") +DEF_ATTRIB(IMPLICIT_READS_IMASK_ANYTHREAD, "May read", "MREG.IMASK", "") +DEF_ATTRIB(IMPLICIT_READS_SYSCFG_K0LOCK, "May read", "MREG.SYSCFG.K0LOCK", "") +DEF_ATTRIB(IMPLICIT_WRITES_SYSCFG_K0LOCK, "May write", "", "MREG.SYSCFG.K0LOCK") +DEF_ATTRIB(IMPLICIT_READS_SYSCFG_TLBLOCK, "May read", "MREG.SYSCFG.TLBLOCK", "") +DEF_ATTRIB(IMPLICIT_WRITES_SYSCFG_TLBLOCK, "May wr", "", "MREG.SYSCFG.TLBLOCK") +DEF_ATTRIB(IMPLICIT_WRITES_SYSCFG_GCA, "May write", "", "MREG.SYSCFG.GCA") +DEF_ATTRIB(IMPLICIT_READS_SYSCFG_GCA, "May read", "MREG.SYSCFG.GCA", "") DEF_ATTRIB(IMPLICIT_WRITES_USR, "May write USR", "", "") -DEF_ATTRIB(IMPLICIT_READS_SP, "Reads the SP register", "", "") + +/* Other things the instruction does */ +DEF_ATTRIB(ACC, "Has a multiply", "", "") +DEF_ATTRIB(MPY, "Has a multiply", "", "") +DEF_ATTRIB(SATURATE, "Does signed saturation", "", "") +DEF_ATTRIB(USATURATE, "Does unsigned saturation", "", "") +DEF_ATTRIB(CIRCADDR, "Uses circular addressing mode", "", "") +DEF_ATTRIB(BREVADDR, "Uses bit reverse addressing mode", "", "") +DEF_ATTRIB(BIDIRSHIFTL, "Uses a bidirectional shift left", "", "") +DEF_ATTRIB(BIDIRSHIFTR, "Uses a bidirectional shift right", "", "") +DEF_ATTRIB(BRANCHADDER, "Contains a PC-plus-immediate operation.", "", "") +DEF_ATTRIB(CRSLOT23, "Can execute in slot 2 or slot 3 (CR)", "", "") DEF_ATTRIB(COMMUTES, "The operation is communitive", "", "") DEF_ATTRIB(DEALLOCRET, "dealloc_return", "", "") DEF_ATTRIB(DEALLOCFRAME, "deallocframe", "", "") -DEF_ATTRIB(CRSLOT23, "Can execute in slot 2 or slot 3 (CR)", "", "") +/* Instruction Types */ + +DEF_ATTRIB(IT_ALU, "ALU type", "", "") +DEF_ATTRIB(IT_ALU_ADDSUB, "ALU add or subtract type", "", "") +DEF_ATTRIB(IT_ALU_MINMAX, "ALU MIN or MAX type", "", "") +DEF_ATTRIB(IT_ALU_MOVE, "ALU data movement type", "", "") +DEF_ATTRIB(IT_ALU_LOGICAL, "ALU logical operation type", "", "") +DEF_ATTRIB(IT_ALU_SHIFT, "ALU shift operation type", "", "") +DEF_ATTRIB(IT_ALU_SHIFT_AND_OP, "ALU shift and additional op type", "", "") +DEF_ATTRIB(IT_ALU_CMP, "ALU compare operation type", "", "") + +DEF_ATTRIB(IT_LOAD, "Loads from memory", "", "") +DEF_ATTRIB(IT_STORE, "Stores to memory", "", "") + +DEF_ATTRIB(IT_MPY, "Multiply type", "", "") +DEF_ATTRIB(IT_MPY_32, "32-bit Multiply type", "", "") + +DEF_ATTRIB(IT_COF, "Change-of-flow type", "", "") +DEF_ATTRIB(IT_HWLOOP, "Sets up hardware loop registers", "", "") + +DEF_ATTRIB(IT_MISC, "misc instruction type", "", "") + DEF_ATTRIB(IT_NOP, "nop instruction", "", "") DEF_ATTRIB(IT_EXTENDER, "constant extender instruction", "", "") +/* Exceptions the instruction can generate */ + +DEF_ATTRIB(EXCEPTION_TLB, "Can generate a TLB Miss Exception", "", "") +DEF_ATTRIB(EXCEPTION_ACCESS, "Can generate Access Violation Exception", "", "") +DEF_ATTRIB(EXCEPTION_SWI, "Software Interrupt (trap) exception", "", "") + + +/* Documentation Notes */ +DEF_ATTRIB(NOTE_ARCHV2, "Only available in the V2 architecture", "", "") + +DEF_ATTRIB(NOTE_PACKET_PC, "The PC is the addr of the start of the pkt", "", "") + +DEF_ATTRIB(NOTE_PACKET_NPC, "Next PC is the address following pkt", "", "") + +DEF_ATTRIB(NOTE_CONDITIONAL, "can be conditionally executed", "", "") + +DEF_ATTRIB(NOTE_NEWVAL_SLOT0, "New-value oprnd must execute on slot 0", "", "") + +DEF_ATTRIB(NOTE_RELATIVE_ADDRESS, "A PC-relative address is formed", "", "") + +DEF_ATTRIB(NOTE_LA_RESTRICT, "Cannot be in the last pkt of a HW loop", "", "") + +DEF_ATTRIB(NOTE_OOBVSHIFT, "Possible shift overflow", "", "") +DEF_ATTRIB(NOTE_BIDIRSHIFT, "Bidirectional shift", "", "") + +DEF_ATTRIB(NOTE_CVFLAGS, "Sets the Carry and Overflow flags in USR.", "", "") +DEF_ATTRIB(NOTE_SR_OVF_WHEN_SATURATING, "Might set OVF bit", "", "") +DEF_ATTRIB(NOTE_STNT, + "Non Temporal Data. The :nt appendix is a hint to the " + "microarchitecture indicating that the life of the cache line is " + "short. This information is used throughout the cache hierarchy to " + "make replacement and allocation decisions.", + "", "") +DEF_ATTRIB(NOTE_PRIV, "Monitor-level feature", "", "") +DEF_ATTRIB(NOTE_GUEST, "Guest-level feature", "", "") +DEF_ATTRIB(NOTE_NOPACKET, "solo instruction", "", "") +DEF_ATTRIB(NOTE_AXOK, "May only be grouped with ALU32 or non-FP XTYPE.", "", "") +DEF_ATTRIB(NOTE_NOSLOT1, "Packet with this insn must have slot 1 empty", "", "") +DEF_ATTRIB(NOTE_SLOT1_AOK, "Packet must have slot 1 empty or ALU32", "", "") +DEF_ATTRIB(NOTE_NOSLOT01, "Packet must have both slot 1 and 2 empty", "", "") +DEF_ATTRIB(NOTE_NEEDS_MEMLD, "Must be grouped with a memory load", "", "") +DEF_ATTRIB(NOTE_LATEPRED, "The predicate can not be used as a .new", "", "") +DEF_ATTRIB(NOTE_COMPAT_ACCURACY, "In the future accuracy may increase", "", "") +DEF_ATTRIB(NOTE_NVSLOT0, "Can execute only in slot 0 (ST)", "", "") +DEF_ATTRIB(NOTE_DEPRECATED, "Will be deprecated in a future version.", "", "") +DEF_ATTRIB(NOTE_NONAPALIV1, "may not work correctly in Napali V1.", "", "") +DEF_ATTRIB(NOTE_NOLAHAINAV1, "This may not work correctly in Lahaina V1.", "", + "") +DEF_ATTRIB(NOTE_BADTAG_UNDEF, "Undefined if a tag is non-present", "", "") +DEF_ATTRIB(NOTE_NOSLOT2_MPY, "Packet cannot have a slot 2 multiply", "", "") +DEF_ATTRIB(NOTE_HVX_ONLY, "Only available on a core with HVX.", "", "") + +DEF_ATTRIB(NOTE_NOCOF_RESTRICT, "Cannot be grouped with any COF", "", "") +DEF_ATTRIB(NOTE_BRANCHADDER_MAX1, "One PC-plus-offset calculation", "", "") + +DEF_ATTRIB(NOTE_CRSLOT23, "Execute on either slot2 or slot3 (CR)", "", "") +DEF_ATTRIB(NOTE_EXTENSION_AUDIO, "Hexagon audio extensions", "", "") +DEF_ATTRIB(NOTE_FETCHNT, + "Non Temporal Data Cache Prefetch. The :nt appendix is a hint to " + "the microarchitecture indicating that the life of the cache line " + "fetched is short. This information is used throughout the cache " + "hierarchy to make replacement and allocation decisions.", + "", "") +DEF_ATTRIB(NOTE_VECX_V67, "This instruction is only available on V67", "", "") + +DEF_ATTRIB(NOTE_NOVP, + "This instruction cannot be paired with a HVX permute instruction", + "", "") +DEF_ATTRIB(NOTE_VA_UNARY, + "If a packet contains this instruction and a HVX ALU op then the " + "ALU OP must be unary.", + "", "") + + +/* V6 MMVector Notes for Documentation */ +DEF_ATTRIB(NOTE_ANY_RESOURCE, "Can use any HVX resource.", "", "") +DEF_ATTRIB(NOTE_ANY2_RESOURCE, "Uses any pair of the HVX resources", "", "") +DEF_ATTRIB(NOTE_PERMUTE_RESOURCE, "Uses the HVX permute resource.", "", "") +DEF_ATTRIB(NOTE_SHIFT_RESOURCE, "Uses the HVX shift resource.", "", "") +DEF_ATTRIB(NOTE_MPY_RESOURCE, "Uses a HVX multiply resource.", "", "") +DEF_ATTRIB(NOTE_MPYDV_RESOURCE, "Uses both HVX multiply resources.", "", "") +DEF_ATTRIB(NOTE_NT_VMEM, "Non-temporal hint to the micro-architecture", "", "") +DEF_ATTRIB(NOTE_ALL_RESOURCE, "Uses all HVX resources.", "", "") +DEF_ATTRIB(NOTE_VMEM, "Immediates are in multiples of vector length.", "", "") +DEF_ATTRIB(NOTE_ANY_VS_VX_RESOURCE, "Consumes two resources", "", "") + +DEF_ATTRIB(NOTE_RT8, "Input scalar register Rt is limited to R0-R7", "", "") + +DEF_ATTRIB(NOTE_MX, "This is in-memory matrix multiply instruction.", "", "") +DEF_ATTRIB(NOTE_VX_ACC_FWD, + "The accumulator (Vxx) source of this instruction must be generate " + "in the previous packet to avoid a stall. The accumulator cannot " + "come from a .tmp operation.", + "", "") +DEF_ATTRIB(NOTE_TMP_NO_VX, + "The tmp load instruction destination register cannot be an " + "accumulator register.", + "", "") + +DEF_ATTRIB( + NOTE_NO_ECC, + "ECC is not supported for scatter and gather instructions. Enabling ECC " + "with unprotected access instructions result in undetermined behavior.", + "", "") + +/* FP8 instructions */ +DEF_ATTRIB(HVX_FP8, "HVX FP8 extension instruction", "", "") +DEF_ATTRIB(HVX_IEEE_FP_OUT_8, "HVX IEEE FP extension instruction: 8-bit output", + "", "") + /* Restrictions to make note of */ +DEF_ATTRIB(RESTRICT_LOOP_LA, "Cannot be in the last packet of a loop", "", "") +DEF_ATTRIB(RESTRICT_NEEDS_MEMLD, "Must be grouped with a load", "", "") DEF_ATTRIB(RESTRICT_COF_MAX1, "One change-of-flow per packet", "", "") DEF_ATTRIB(RESTRICT_NOPACKET, "Not allowed in a packet", "", "") +DEF_ATTRIB(RESTRICT_NOSRMOVE, "Do not write SR in the same packet", "", "") DEF_ATTRIB(RESTRICT_SLOT0ONLY, "Must execute on slot0", "", "") DEF_ATTRIB(RESTRICT_SLOT1ONLY, "Must execute on slot1", "", "") DEF_ATTRIB(RESTRICT_SLOT2ONLY, "Must execute on slot2", "", "") DEF_ATTRIB(RESTRICT_SLOT3ONLY, "Must execute on slot3", "", "") +DEF_ATTRIB(RESTRICT_NOSLOT2_MPY, "A packet cannot have a slot 2 mpy", "", "") DEF_ATTRIB(RESTRICT_NOSLOT1, "No slot 1 instruction in parallel", "", "") +DEF_ATTRIB(RESTRICT_SLOT1_AOK, "Slot 1 insn must be empty or A-type", "", "") +DEF_ATTRIB(RESTRICT_NOSLOT01, "No slot 0 or 1 instructions in parallel", "", "") +DEF_ATTRIB(RESTRICT_NOSLOT1_STORE, "Packet must not have slot 1 store", "", "") +DEF_ATTRIB(RESTRICT_NOSLOT0_LOAD, "Packet must not have a slot 1 load", "", "") +DEF_ATTRIB(RESTRICT_NOCOF, "Cannot be grouped with any COF", "", "") +DEF_ATTRIB(RESTRICT_BRANCHADDER_MAX1, "One PC-plus-offset calculation", "", "") DEF_ATTRIB(RESTRICT_PREFERSLOT0, "Try to encode into slot 0", "", "") +DEF_ATTRIB(RESTRICT_SINGLE_MEM_FIRST, "Single memory op must be last", "", "") DEF_ATTRIB(RESTRICT_PACKET_AXOK, "May exist with A-type or X-type", "", "") +DEF_ATTRIB(RESTRICT_PACKET_SOMEREGS_OK, "Relaxed grouping rules", "", "") +DEF_ATTRIB(RESTRICT_LATEPRED, "Predicate can not be used as a .new.", "", "") + +DEF_ATTRIB(PAIR_1OF2, "For assembler", "", "") +DEF_ATTRIB(PAIR_2OF2, "For assembler", "", "") +DEF_ATTRIB(NOTE_MX_PAIR, + "Weights and Activations need to be paired in a packet.", "", "") +DEF_ATTRIB(NOTE_RESTRICT_CVI_NOVP, + "This instruction cannot use the permute/shift resource", "", "") + +/* Performance based preferences */ +DEF_ATTRIB(PREFER_SLOT3, "Complex XU prefering slot3", "", "") + +DEF_ATTRIB(RELAX_COF_1ST, "COF can be fisrt in assembly order", "", "") +DEF_ATTRIB(RELAX_COF_2ND, "COF can be second in assembly order", "", "") DEF_ATTRIB(ICOP, "Instruction cache op", "", "") +DEF_ATTRIB(INTRINSIC_RETURNS_UNSIGNED, "Intrinsic returns an unsigned", "", "") + +DEF_ATTRIB(PRED_BIT_1, "The branch uses bit 1 as the prediction bit", "", "") +DEF_ATTRIB(PRED_BIT_4, "The branch uses bit 4 as the prediction bit", "", "") +DEF_ATTRIB(PRED_BIT_8, "The branch uses bit 8 as the prediction bit", "", "") +DEF_ATTRIB(PRED_BIT_12, "The branch uses bit 12 as the prediction bit", "", "") +DEF_ATTRIB(PRED_BIT_13, "The branch uses bit 13 as the prediction bit", "", "") +DEF_ATTRIB(PRED_BIT_7, "The branch uses bit 7 as the prediction bit", "", "") +DEF_ATTRIB(HWLOOP0_SETUP, "Sets up HW loop0", "", "") +DEF_ATTRIB(HWLOOP1_SETUP, "Sets up HW loop1", "", "") DEF_ATTRIB(HWLOOP0_END, "Ends HW loop0", "", "") DEF_ATTRIB(HWLOOP1_END, "Ends HW loop1", "", "") DEF_ATTRIB(RET_TYPE, "return type", "", "") +DEF_ATTRIB(HINTJR, "hintjr type", "", "") DEF_ATTRIB(DCZEROA, "dczeroa type", "", "") +DEF_ATTRIB(ICTAGOP, "ictag op type", "", "") DEF_ATTRIB(ICFLUSHOP, "icflush op type", "", "") DEF_ATTRIB(DCFLUSHOP, "dcflush op type", "", "") +DEF_ATTRIB(DCTAGOP, "dctag op type", "", "") DEF_ATTRIB(L2FLUSHOP, "l2flush op type", "", "") +DEF_ATTRIB(L2TAGOP, "l2tag op type", "", "") DEF_ATTRIB(DCFETCH, "dcfetch type", "", "") +DEF_ATTRIB(BIMODAL_BRANCH, "Updates the bimodal branch predictor", "", "") +DEF_ATTRIB(VECINSN, "Long Vector Instruction", "", "") +DEF_ATTRIB(MEMSIZE_32B, "Memory width is 32 bytes", "", "") +DEF_ATTRIB(FOUR_PHASE, "Four Phase Instruction", "", "") DEF_ATTRIB(L2FETCH, "Instruction is l2fetch type", "", "") +DEF_ATTRIB(PREDUSE_BSB, "Instructions need back-skip-back scheduling", "", "") DEF_ATTRIB(ICINVA, "icinva", "", "") DEF_ATTRIB(DCCLEANINVA, "dccleaninva", "", "") +DEF_ATTRIB(EXTENSION_AUDIO, "audio extension", "", "") + +DEF_ATTRIB(MEMCPY, "memcpy or dma-type instruction", "", "") DEF_ATTRIB(NO_INTRINSIC, "Don't generate an intrisic", "", "") -/* Documentation Notes */ -DEF_ATTRIB(NOTE_CONDITIONAL, "can be conditionally executed", "", "") -DEF_ATTRIB(NOTE_NEWVAL_SLOT0, "New-value oprnd must execute on slot 0", "", "") -DEF_ATTRIB(NOTE_PRIV, "Monitor-level feature", "", "") -DEF_ATTRIB(NOTE_NOPACKET, "solo instruction", "", "") -DEF_ATTRIB(NOTE_AXOK, "May only be grouped with ALU32 or non-FP XTYPE.", "", "") -DEF_ATTRIB(NOTE_LATEPRED, "The predicate can not be used as a .new", "", "") -DEF_ATTRIB(NOTE_NVSLOT0, "Can execute only in slot 0 (ST)", "", "") -DEF_ATTRIB(NOTE_NOVP, "Cannot be paired with a HVX permute instruction", "", "") -DEF_ATTRIB(NOTE_VA_UNARY, "Combined with HVX ALU op (must be unary)", "", "") +DEF_ATTRIB(NO_XML, "Don't generate a XML docs for this instruction", "", "") -/* V6 MMVector Notes for Documentation */ -DEF_ATTRIB(NOTE_SHIFT_RESOURCE, "Uses the HVX shift resource.", "", "") -/* Restrictions to make note of */ -DEF_ATTRIB(RESTRICT_NOSLOT1_STORE, "Packet must not have slot 1 store", "", "") -DEF_ATTRIB(RESTRICT_LATEPRED, "Predicate can not be used as a .new.", "", "") +DEF_ATTRIB(DMA, "User-DMA instruction", "", "") +DEF_ATTRIB(VERIF_DMASTEP, + "Hiphop needs to step dma prior to executing this packet", "", "") +DEF_ATTRIB(VERIF_DMATICK, + "DMA gets a tick in verif mode for this instruction after a commit", + "", "") + +DEF_ATTRIB(HVX_IEEE_FP, "HVX IEEE FP extension instruction", "", "") +DEF_ATTRIB(NOTE_HVX_IEEE_FP, + "Only supported on the HVX cores with the IEEE FP extension", "", "") + +DEF_ATTRIB(HVX_IEEE_FP_DV_ONE, + "HVX IEEE FP extension instruction - dual pipes: P2 and P3 - output " + "only on P2", + "", "") +DEF_ATTRIB(HVX_IEEE_FP_ACC, "HVX IEEE FP accumulate instruction", "", "") +DEF_ATTRIB(HVX_IEEE_BF, + "HVX IEEE BF extension instruction: 16-bit bfloat input", "", "") +DEF_ATTRIB(HVX_IEEE_FP_OUT_BF, + "HVX IEEE FP extension instruction: 16-bit bfloat output", "", "") +DEF_ATTRIB(HVX_IEEE_FP_OUT_16, + "HVX IEEE FP extension instruction: 16-bit output", "", "") +DEF_ATTRIB(HVX_IEEE_FP_OUT_32, + "HVX IEEE FP extension instruction: 32-bit output", "", "") +DEF_ATTRIB(HVX_IEEE_FP_BINARY_LATE, + "HVX IEEE FP extension instruction: Both inputs can arrive late", "", + "") /* Keep this as the last attribute: */ DEF_ATTRIB(ZZ_LASTATTRIB, "Last attribute in the file", "", "") diff --git a/target/hexagon/imported/macros.def b/target/hexagon/imported/macros.def old mode 100755 new mode 100644 index 4bbcfdd5e194a..f24f89f361263 --- a/target/hexagon/imported/macros.def +++ b/target/hexagon/imported/macros.def @@ -353,6 +353,12 @@ DEF_MACRO( () ) +DEF_MACRO( + fREAD_SSR, /* read SSR register */ + (READ_RREG(REG_SSR)), /* behavior */ + () +) + DEF_MACRO( fWRITE_LR, /* write lr */ WRITE_RREG(REG_LR,A), /* behavior */ @@ -371,12 +377,36 @@ DEF_MACRO( (A_IMPLICIT_WRITES_SP) ) +DEF_MACRO( + fWRITE_GOSP, /* write gosp */ + WRITE_RREG(REG_GOSP,A), /* behavior */ + (A_IMPLICIT_WRITES_GOSP) +) + DEF_MACRO( fREAD_SP, /* read stack pointer */ (READ_RREG(REG_SP)), /* behavior */ () ) +DEF_MACRO( + fREAD_GOSP, /* read guest other stack pointer */ + (READ_RREG(REG_GOSP)), /* behavior */ + () +) + +DEF_MACRO( + fREAD_GELR, /* read guest other stack pointer */ + (READ_RREG(REG_GELR)), /* behavior */ + () +) + +DEF_MACRO( + fREAD_GEVB, /* read guest other stack pointer */ + (READ_RREG(REG_GEVB)), /* behavior */ + () +) + DEF_MACRO( fREAD_CSREG, /* read CS register */ (READ_RREG(REG_CSA+N)), /* behavior */ @@ -570,6 +600,11 @@ DEF_MACRO( WRITE_PREG(3,VAL), /* behavior */ (A_IMPLICIT_WRITES_P3) ) +DEF_MACRO( + fWRITE_P3_LATE, /* write Predicate 0 */ + {WRITE_PREG(3,VAL); fHIDE(MARK_LATE_PRED_WRITE(3))} , /* behavior */ + (A_IMPLICIT_WRITES_P3,A_RESTRICT_LATEPRED) +) DEF_MACRO( fPART1, /* write Predicate 0 */ @@ -660,6 +695,7 @@ DEF_MACRO( ((size8s_t)((size2s_t)(A))), /* optional attributes */ ) + DEF_MACRO( fCAST2_8u, /* macro name */ ((size8u_t)((size2u_t)(A))), @@ -1532,18 +1568,209 @@ DEF_MACRO(fECHO, /* OS interface and stop/wait */ /********************************************/ +DEF_MACRO(RUNNABLE_THREADS_MAX, + (thread->processor_ptr->runnable_threads_max), + () +) + +DEF_MACRO(THREAD_IS_ON, + ((PROC->arch_proc_options->thread_enable_mask>>TNUM) & 0x1), + () +) + +DEF_MACRO(THREAD_EN_MASK, + ((PROC->arch_proc_options->thread_enable_mask)), + () +) + + + +DEF_MACRO(READ_IMASK, + (((TH) >= (thread->processor_ptr->runnable_threads_max)) ? 0 : (thread->processor_ptr->thread[TH]->Regs[REG_IMASK])), + () +) +DEF_MACRO(WRITE_IMASK, + if ((TH) < (thread->processor_ptr->runnable_threads_max)) { thread->processor_ptr->thread[TH]->Regs[REG_IMASK]=(VAL & reg_mutability[REG_IMASK] ); }, + (A_IMPLICIT_WRITES_IMASK_ANYTHREAD) +) + + +DEF_MACRO(WRITE_PRIO, + { + if ((TH) < (thread->processor_ptr->runnable_threads_max)) { + size4u_t tid_reg = thread->processor_ptr->thread[TH]->Regs[REG_TID]; + fINSERT_BITS(tid_reg, reg_field_info[STID_PRIO].width, reg_field_info[STID_PRIO].offset, VAL); + LOG_OTHER_THREAD_REG_WRITE(thread,REG_TID,tid_reg,TH); + } + }, + (A_IMPLICIT_WRITES_STID_PRIO_ANYTHREAD) +) + + +DEF_MACRO(DO_IASSIGNW, + { + int i; + int intbitpos = ((REG>>16)&0xF); + for (i=0;i<RUNNABLE_THREADS_MAX;i++) { + if(( (thread->processor_ptr->arch_proc_options->thread_enable_mask>>i) & 0x1)) { + fINSERT_BITS(thread->processor_ptr->thread[i]->Regs[REG_IMASK],1, intbitpos, (REG>>i) & 1); + } + } + }, + (A_IMPLICIT_WRITES_IMASK_ANYTHREAD) +) + + + + +DEF_MACRO(fDO_NMI, + { + int i; + for (i=0;i<RUNNABLE_THREADS_MAX;i++) { + if( ( (thread->processor_ptr->arch_proc_options->thread_enable_mask>>i) & 0x1) ) { + if (SREG & (1<<i)) { + register_nmi_interrupt(thread->processor_ptr->thread[i]); + } + } + } + }, +) + +DEF_MACRO(fDO_TRACE, + { + fHIDE(HEX_CALLBACK(thread->processor_ptr->options->trace_callback, + thread->system_ptr,thread->processor_ptr, + thread->threadId,SREG);) + }, +) + +DEF_MACRO(DO_IASSIGNR, + { + int i; + int result=0; + int intbitpos = ((SREG>>16)&0xF); + for (i=0;i<RUNNABLE_THREADS_MAX;i++) { + if(( (thread->processor_ptr->arch_proc_options->thread_enable_mask>>i) & 0x1)) { + result |= (((thread->processor_ptr->thread[i]->Regs[REG_IMASK]>>intbitpos)&1)<<i); + } + } + DREG=result; + }, + () +) + +DEF_MACRO(DO_SWI, + {fHIDE(HEX_CALLBACK(thread->processor_ptr->options->swi_callback, + thread->system_ptr,thread->processor_ptr, + thread->threadId,REG)); + LOG_GLOBAL_REG_WRITE(REG_IPEND,(GLOBAL_REG_READ(REG_IPEND) | (REG & GLOBAL_REG_READ(REG_IEL)))); + }, + (A_EXCEPTION_SWI) +) + +DEF_MACRO(DO_CSWI, + LOG_GLOBAL_REG_WRITE(REG_IPEND,GLOBAL_REG_READ(REG_IPEND) & ~((REG) & GLOBAL_REG_READ(REG_IEL)));, + () +) + +DEF_MACRO(DO_CIAD, + sys_ciad(thread,VAL); LOG_GLOBAL_REG_WRITE(REG_IAD,GLOBAL_REG_READ(REG_IAD) & ~(VAL));, + (A_EXCEPTION_SWI) +) + +DEF_MACRO(DO_SIAD, + sys_siad(thread,VAL); LOG_GLOBAL_REG_WRITE(REG_IAD,GLOBAL_REG_READ(REG_IAD) | (VAL));, + (A_EXCEPTION_SWI) +) + +DEF_MACRO(fBREAK, + {isdb_brkpt_insn(thread->processor_ptr,thread->threadId);}, + () +) + DEF_MACRO(fPAUSE, {sys_pause(thread, insn->slot, IMM);}, () ) + DEF_MACRO(fTRAP, warn("Trap NPC=%x ",fREAD_NPC()); warn("Trap exception, PCYCLE=%lld TYPE=%d NPC=%x IMM=0x%x",thread->processor_ptr->pstats[pcycles],TRAPTYPE,fREAD_NPC(),IMM); register_trap_exception(thread,fREAD_NPC(),TRAPTYPE,IMM);, + (A_EXCEPTION_SWI) +) + +DEF_MACRO(fINTERNAL_CLEAR_SAMEPAGE, + /* force re-xlate at next fetch, refresh of in_user_mode, etc */ + /* Permissions change too... */ + sys_utlb_invalidate(thread->processor_ptr,thread), + /* NOTHING */ +) + +DEF_MACRO(fCLEAR_RTE_EX, + { + fLOG_REG_FIELD(SSR,SSR_EX,0); + fINTERNAL_CLEAR_SAMEPAGE(); + }, + () +) + +DEF_MACRO(fTLB_LOCK_AVAILABLE, + (fREAD_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_TLBLOCK) == 0), () ) +DEF_MACRO(fK0_LOCK_AVAILABLE, + (fREAD_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_K0LOCK) == 0), + () +) + +DEF_MACRO(fSET_TLB_LOCK, + { + if (fTLB_LOCK_AVAILABLE()) { + fLOG_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_TLBLOCK,1); + } else { + sys_waiting_for_tlb_lock(thread); + } + }, + () +) + +DEF_MACRO(fSET_K0_LOCK, + { + if (fK0_LOCK_AVAILABLE() && sys_k0lock_queue_ready(thread)) { + warn("k0lock: T%d: PC=0x%x: PCycle=%lld",thread->threadId,thread->Regs[REG_PC],thread->processor_ptr->pstats[pcycles]); + fLOG_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_K0LOCK,1); + } else { + warn("k0lock_waiting: T%d: PC=0x%x: PCycle=%lld",thread->threadId,thread->Regs[REG_PC],thread->processor_ptr->pstats[pcycles]); + sys_waiting_for_k0_lock(thread); + } + }, + () +) + +DEF_MACRO(fCLEAR_TLB_LOCK, + { + int i; + fLOG_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_TLBLOCK,0); + for (i = 0; i < RUNNABLE_THREADS_MAX; i++) { + if(( (thread->processor_ptr->arch_proc_options->thread_enable_mask>>i) & 0x1)) { + thread->processor_ptr->thread[i]->cu_tlb_lock_waiting = 0; + } + } + }, + () +) + +DEF_MACRO(fCLEAR_K0_LOCK, + do { + warn("k0unlock: T%d: PC=0x%x: Pcycle=%lld",thread->threadId,thread->Regs[REG_PC], thread->processor_ptr->pstats[pcycles]); + sys_initiate_clear_k0_lock(thread); + } while (0), + () +) + DEF_MACRO(fALIGN_REG_FIELD_VALUE, ((VAL)<<reg_field_info[FIELD].offset), /* */ @@ -1554,6 +1781,26 @@ DEF_MACRO(fGET_REG_FIELD_MASK, /* */ ) +DEF_MACRO(fLOG_REG_FIELD, + LOG_MASKED_REG_WRITE(thread,REG_##REG, + fALIGN_REG_FIELD_VALUE(FIELD,VAL), + fGET_REG_FIELD_MASK(FIELD)), + () +) + +DEF_MACRO(fWRITE_GLOBAL_REG_FIELD, + fINSERT_BITS(thread->processor_ptr->global_regs[REG_##REG], + reg_field_info[FIELD].width, + reg_field_info[FIELD].offset,VAL), +) + +DEF_MACRO(fLOG_GLOBAL_REG_FIELD, + LOG_MASKED_GLOBAL_REG_WRITE(REG_##REG, + fALIGN_REG_FIELD_VALUE(FIELD,VAL), + fGET_REG_FIELD_MASK(FIELD)), + () +) + DEF_MACRO(fREAD_REG_FIELD, fEXTRACTU_BITS(thread->Regs[REG_##REG], reg_field_info[FIELD].width, @@ -1561,6 +1808,13 @@ DEF_MACRO(fREAD_REG_FIELD, /* ATTRIBS */ ) +DEF_MACRO(fREAD_GLOBAL_REG_FIELD, + fEXTRACTU_BITS(thread->processor_ptr->global_regs[REG_##REG], + reg_field_info[FIELD].width, + reg_field_info[FIELD].offset), + /* ATTRIBS */ +) + DEF_MACRO(fGET_FIELD, fEXTRACTU_BITS(VAL, reg_field_info[FIELD].width, @@ -1576,6 +1830,185 @@ DEF_MACRO(fSET_FIELD, /* ATTRIBS */ ) +DEF_MACRO(fSET_RUN_MODE_NOW, + {thread->processor_ptr->global_regs[REG_MODECTL] |= (1<<TNUM); + thread->last_commit_cycle = thread->processor_ptr->pcycle_counter; + sys_recalc_num_running_threads(thread->processor_ptr);}, +) + +DEF_MACRO(fIN_DEBUG_MODE, + (thread->debug_mode || (fREAD_GLOBAL_REG_FIELD(ISDBST,ISDBST_DEBUGMODE) & 1<<TNUM)), + () +) +DEF_MACRO(fIN_DEBUG_MODE_NO_ISDB, + (thread->debug_mode), + () +) + + +DEF_MACRO(fIN_DEBUG_MODE_WARN, + { + if (fREAD_GLOBAL_REG_FIELD(ISDBST,ISDBST_DEBUGMODE) & 1<<TNUM) + warn("In ISDB debug mode, but TB told me to step normally"); + }, + () +) + +DEF_MACRO(fCLEAR_RUN_MODE, + {fLOG_GLOBAL_REG_FIELD(MODECTL,MODECTL_E, + fREAD_GLOBAL_REG_FIELD(MODECTL,MODECTL_E) & ~(1<<(TNUM)))}, + /* NOTHING */ +) + +DEF_MACRO(fCLEAR_RUN_MODE_NOW, + do { + fWRITE_GLOBAL_REG_FIELD(MODECTL,MODECTL_E, + fREAD_GLOBAL_REG_FIELD(MODECTL,MODECTL_E) & ~(1<<(TNUM))); + sys_recalc_num_running_threads(thread->processor_ptr); + } while (0), + /* NOTHING */ +) + +DEF_MACRO(fGET_RUN_MODE, + ((thread->processor_ptr->global_regs[REG_MODECTL]>>TNUM)&0x1), +) + +DEF_MACRO(fSET_WAIT_MODE, + {fLOG_GLOBAL_REG_FIELD(MODECTL,MODECTL_W, + fREAD_GLOBAL_REG_FIELD(MODECTL,MODECTL_W) | 1<<(TNUM))}, + /* NOTHING */ +) + +DEF_MACRO(fCLEAR_WAIT_MODE, + {thread->processor_ptr->global_regs[REG_MODECTL] &= ~(1<<(TNUM+16)); + thread->last_commit_cycle = thread->processor_ptr->pcycle_counter; + sys_recalc_num_running_threads(thread->processor_ptr);}, +) + +DEF_MACRO(fGET_WAIT_MODE, + ((thread->processor_ptr->global_regs[REG_MODECTL]>>(TNUM+16))&0x1), +) + + +DEF_MACRO(fRESET_THREAD, + register_reset_interrupt(T,NUM), +) + +DEF_MACRO(fREAD_CURRENT_EVB, + (GLOBAL_REG_READ(REG_EVB)), + /* nothing */ +) + +DEF_MACRO(fREAD_ELR, + READ_RREG(REG_ELR), + () +) + +DEF_MACRO(fPOW2_HELP_ROUNDUP, + ((VAL) | ((VAL) >> 1) | ((VAL) >> 2) | ((VAL) >> 4) | ((VAL) >> 8) | ((VAL) >> 16)), + () +) + +DEF_MACRO(fPOW2_ROUNDUP, + fPOW2_HELP_ROUNDUP((VAL)-1)+1, + () +) + +DEF_MACRO(fTLB_IDXMASK, + ((INDEX) & (fPOW2_ROUNDUP(fCAST4u(thread->processor_ptr->arch_proc_options->jtlb_size)) - 1)), + () +) + +DEF_MACRO(fTLB_NONPOW2WRAP, + (((INDEX) >= thread->processor_ptr->arch_proc_options->jtlb_size) ? ((INDEX) - thread->processor_ptr->arch_proc_options->jtlb_size) : (INDEX)), + /* ATTRIBS */ +) + +DEF_MACRO(fTLBW, + do {size4u_t __myidx = fTLB_NONPOW2WRAP(fTLB_IDXMASK(INDEX)); + TLB_REG_WRITE(__myidx,VALUE); + fHIDE(HEX_CALLBACK(thread->processor_ptr->options->tlbw_callback,thread->system_ptr,thread->processor_ptr,thread->threadId,__myidx);) + fHIDE(sys_tlb_write(thread,__myidx,VALUE);)} while (0), + /* ATTRIBS */ +) + +DEF_MACRO(fTLB_ENTRY_OVERLAP, + fHIDE( (sys_check_overlap(thread,VALUE)!=-2) ), + /* ATTRIBS */ +) + +DEF_MACRO(fTLB_ENTRY_OVERLAP_IDX, + fHIDE(sys_check_overlap(thread,VALUE)), + /* ATTRIBS */ +) + + +DEF_MACRO(fTLBR, + TLB_REG_READ(fTLB_NONPOW2WRAP(fTLB_IDXMASK(INDEX))), + /* ATTRIBS */ +) + +DEF_MACRO(fTLBP, + tlb_lookup(thread,((TLBHI)>>12),((TLBHI)<<12),1), + /* attribs */ +) + + + +DEF_MACRO(READ_SGP0, + READ_RREG(REG_SGP), + () +) + +DEF_MACRO(READ_SGP1, + READ_RREG(REG_SGP+1), + () +) + +DEF_MACRO(READ_SGP10, + READ_RREG_PAIR(REG_SGP), + () +) + +DEF_MACRO(READ_UGP, + READ_RREG(REG_UGP), +) + +DEF_MACRO(WRITE_SGP0, + WRITE_RREG(REG_SGP,VAL), + (A_IMPLICIT_WRITES_SGP0) +) + +DEF_MACRO(WRITE_SGP1, + WRITE_RREG(REG_SGP+1,VAL), + (A_IMPLICIT_WRITES_SGP1) +) + +DEF_MACRO(WRITE_SGP10, + WRITE_RREG_PAIR(REG_SGP,VAL), + (A_IMPLICIT_WRITES_SGP0,A_IMPLICIT_WRITES_SGP1) +) + +DEF_MACRO(WRITE_UGP, + WRITE_RREG(REG_UGP,VAL), +) + +DEF_MACRO(fSTART, + fLOG_GLOBAL_REG_FIELD(MODECTL,MODECTL_E, fREAD_GLOBAL_REG_FIELD(MODECTL,MODECTL_E) | (((REG & ((1<<RUNNABLE_THREADS_MAX)-1))) & THREAD_EN_MASK(thread->processor_ptr))), + () +) + +DEF_MACRO(fRESUME, + fLOG_GLOBAL_REG_FIELD(MODECTL,MODECTL_W, + fREAD_GLOBAL_REG_FIELD(MODECTL,MODECTL_W) & (~(REG))), + () +) + +DEF_MACRO(fGET_TNUM, + thread->threadId, + () +) + /********************************************/ /* Cache Management */ /********************************************/ @@ -1602,6 +2035,11 @@ DEF_MACRO(fISYNC, ) +DEF_MACRO(fICFETCH, + , + () +) + DEF_MACRO(fDCFETCH, sys_dcfetch(thread, (REG), insn->slot), (A_MEMLIKE) @@ -1615,6 +2053,34 @@ DEF_MACRO(fICINVA, (A_ICINVA) ) +DEF_MACRO(fDCTAGR, + ({DST=sys_dctagr(thread, INDEX, insn->slot,DSTREGNO);})/* FIXME */, + () +) + +DEF_MACRO(fDCTAGW, + (sys_dctagw(thread, INDEX, PART2, insn->slot)), + () +) +DEF_MACRO(fICTAGR, + ({DST=sys_ictagr(thread, INDEX, insn->slot,REGNO);}), + () +) + +DEF_MACRO(fICDATAR, + ({DST=sys_icdatar(thread, INDEX, insn->slot);}), + () +) + +DEF_MACRO(fICTAGW, + (sys_ictagw(thread, INDEX, PART2, insn->slot)), + () +) +DEF_MACRO(fICDATAW, + ({ fHIDE(); }), + () +) + DEF_MACRO(fL2FETCH, sys_l2fetch(thread, ADDR,HEIGHT,WIDTH,STRIDE,FLAGS, insn->slot), (A_MEMLIKE,A_L2FETCH) @@ -1635,6 +2101,12 @@ DEF_MACRO(fDCZEROA, (A_MEMLIKE) ) +DEF_MACRO(fDCINVA, + sys_dcinva(thread, (REG)), + (A_MEMLIKE) +) + + DEF_MACRO(fCHECKFORPRIV, {sys_check_privs(thread); if (EXCEPTION_DETECTED) return; }, () @@ -1645,6 +2117,16 @@ DEF_MACRO(fCHECKFORGUEST, () ) +DEF_MACRO(fTAKEN_INTERRUPT_EDGECLEAR, + { proc->global_regs[REG_IPEND] &= ~(INT_NUMTOMASK(intnum) & proc->global_regs[REG_IEL]); }, + () +) + +DEF_MACRO(fSET_IAD, + { sys_siad(thread,INT_NUMTOMASK(intnum)); thread->processor_ptr->global_regs[REG_IAD] |= INT_NUMTOMASK(intnum); }, + () +) + DEF_MACRO(fBRANCH_SPECULATE_STALL, { sys_speculate_branch_stall(thread, insn->slot, JUMP_COND(JUMP_PRED_SET), @@ -1664,3 +2146,79 @@ DEF_MACRO(IV1DEAD, , () ) + +DEF_MACRO(fIN_MONITOR_MODE, + sys_in_monitor_mode(thread), + () +) + +DEF_MACRO(fIN_USER_MODE, + sys_in_user_mode(thread), + () +) + +DEF_MACRO(fIN_GUEST_MODE, + sys_in_guest_mode(thread), + () +) + +DEF_MACRO(fGRE_ENABLED, + fREAD_REG_FIELD(CCR,CCR_GRE), + () +) + +DEF_MACRO(fGTE_ENABLED, + fREAD_REG_FIELD(CCR,CCR_GRE), + () +) + +DEF_MACRO(fTRAP1_VIRTINSN, + ((fIN_GUEST_MODE()) + && (fGRE_ENABLED()) + && ( ((IMM) == 1) + || ((IMM) == 3) + || ((IMM) == 4) + || ((IMM) == 6))), + () +) + +DEF_MACRO(fVIRTINSN_RTE, + do { + thread->trap1_info = TRAP1_VIRTINSN_RTE; + fLOG_REG_FIELD(SSR,SSR_SS,fREAD_REG_FIELD(GSR,GSR_SS)); + fLOG_REG_FIELD(CCR,CCR_GIE,fREAD_REG_FIELD(GSR,GSR_IE)); + fLOG_REG_FIELD(SSR,SSR_GM,!fREAD_REG_FIELD(GSR,GSR_UM)); + fBRANCH((fREAD_GELR() & -4),COF_TYPE_RTE); + fINTERNAL_CLEAR_SAMEPAGE(); + } while (0), + (A_IMPLICIT_WRITES_CCR,A_IMPLICIT_WRITES_SSR) +) + +DEF_MACRO(fVIRTINSN_SETIE, + do { + fLOG_REG_FIELD(CCR,CCR_GIE,(REG) & 1); + REG = fREAD_REG_FIELD(CCR,CCR_GIE); + thread->trap1_info = TRAP1_VIRTINSN_SETIE; + } while (0), + (A_IMPLICIT_WRITES_CCR) +) + +DEF_MACRO(fVIRTINSN_GETIE, + { + thread->trap1_info = TRAP1_VIRTINSN_GETIE; + REG = fREAD_REG_FIELD(CCR,CCR_GIE); + }, + () +) + +DEF_MACRO(fVIRTINSN_SPSWAP, + do { + if (fREAD_REG_FIELD(GSR,GSR_UM)) { + size4u_t TEMP = REG; + REG = fREAD_GOSP(); + fWRITE_GOSP(TEMP); + thread->trap1_info = TRAP1_VIRTINSN_SPSWAP; + } + } while (0), + (A_IMPLICIT_WRITES_GOSP) +) From bd00c0d9eadcebd5913b2c51605f86e85dcee450 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Sun, 8 Sep 2024 18:52:49 -0700 Subject: [PATCH 016/126] target/hexagon: Define DCache states Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu_bits.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/target/hexagon/cpu_bits.h b/target/hexagon/cpu_bits.h index 6582bb4f16fc3..5d26815eb9bcf 100644 --- a/target/hexagon/cpu_bits.h +++ b/target/hexagon/cpu_bits.h @@ -41,6 +41,13 @@ enum hex_cause { HEX_CAUSE_PRIV_USER_NO_SINSN = 0x01b, }; +enum data_cache_state { + HEX_DC_STATE_INVALID = 0x0, + HEX_DC_STATE_VALID = 0x1, + HEX_DC_STATE_RESERVED = 0x2, + HEX_DC_STATE_UNUSED_WT = 0x3, +}; + #define PACKET_WORDS_MAX 4 static inline uint32_t parse_bits(uint32_t encoding) From 55580c0c9fc818901d20445a1a5c113112b002f0 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 20 May 2024 17:04:51 -0500 Subject: [PATCH 017/126] target/hexagon: Add new macro definitions for sysemu Also: add nop TCG overrides for break,unpause,fetchbo,dczeroa break: this hardware breakpoint instruction is used with the in-silicon debugger feature, this is not modeled. unpause: this instruction is used to resume hardware threads that are stalled by pause instructions. pause is modeled as a nop, or in RR mode as an EXCP_YIELD. This instruction is safe to ignore. Since cache/prefetch functions are not modeled, dczero and fetchbo are safe to ignore. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/gen_tcg.h | 9 ++ target/hexagon/macros.h | 28 ++++- target/hexagon/op_helper.c | 1 + target/hexagon/sys_macros.h | 238 ++++++++++++++++++++++++++++++++++++ 4 files changed, 272 insertions(+), 4 deletions(-) create mode 100644 target/hexagon/sys_macros.h diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 8a3b801287c7a..71f8a0e2d0848 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -488,6 +488,7 @@ /* dczeroa clears the 32 byte cache line at the address given */ #define fGEN_TCG_Y2_dczeroa(SHORTCODE) SHORTCODE +#define fGEN_TCG_Y2_dczeroa_nt(SHORTCODE) SHORTCODE /* In linux-user mode, these are not modelled, suppress compiler warning */ #define fGEN_TCG_Y2_dcinva(SHORTCODE) \ @@ -1133,6 +1134,9 @@ RdV, tcg_constant_tl(0)); \ } while (0) +#define fGEN_TCG_Y2_break(SHORTCODE) +#define fGEN_TCG_J2_unpause(SHORTCODE) + #define fGEN_TCG_J2_pause(SHORTCODE) \ do { \ uiV = uiV; \ @@ -1342,6 +1346,11 @@ RsV = RsV; \ uiV = uiV; \ } while (0) +#define fGEN_TCG_Y2_dcfetchbo_nt(SHORTCODE) \ + do { \ + RsV = RsV; \ + uiV = uiV; \ + } while (0) #define fGEN_TCG_L2_loadw_aq(SHORTCODE) SHORTCODE #define fGEN_TCG_L4_loadd_aq(SHORTCODE) SHORTCODE diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index ee3d4c88e7bdf..6e4a3a16970c2 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -537,9 +537,6 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #ifdef CONFIG_USER_ONLY #define fFRAMECHECK(ADDR, EA) do { } while (0) /* Not modelled in linux-user */ -#else -/* System mode not implemented yet */ -#define fFRAMECHECK(ADDR, EA) g_assert_not_reached(); #endif #ifdef QEMU_GENERATE @@ -630,8 +627,18 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fCONSTLL(A) A##LL #define fECHO(A) (A) -#define fTRAP(TRAPTYPE, IMM) helper_raise_exception(env, HEX_EXCP_TRAP0) +#ifdef CONFIG_USER_ONLY +#define fTRAP(TRAPTYPE, IMM) \ + do { \ + hexagon_raise_exception_err(env, HEX_EVENT_TRAP0, PC); \ + } while (0) +#endif + +#define fDO_TRACE(SREG) +#define fBREAK() +#define fUNPAUSE() #define fPAUSE(IMM) +#define fDCFETCH(REG) #define fALIGN_REG_FIELD_VALUE(FIELD, VAL) \ ((VAL) << reg_field_info[FIELD].offset) @@ -648,10 +655,23 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) ctx->dczero_addr = tcg_temp_new(); \ tcg_gen_mov_tl(ctx->dczero_addr, (REG)); \ } while (0) +#else +#define fDCZEROA(REG) ((void) REG) #endif #define fBRANCH_SPECULATE_STALL(DOTNEWVAL, JUMP_COND, SPEC_DIR, HINTBITNUM, \ STRBITNUM) /* Nothing */ +#ifdef CONFIG_USER_ONLY +/* + * This macro can only be true in guest mode. + * In user mode, the 4 VIRTINSN's can't be reached + */ +#define fTRAP1_VIRTINSN(IMM) (false) +#define fVIRTINSN_SPSWAP(IMM, REG) g_assert_not_reached() +#define fVIRTINSN_GETIE(IMM, REG) g_assert_not_reached() +#define fVIRTINSN_SETIE(IMM, REG) g_assert_not_reached() +#define fVIRTINSN_RTE(IMM, REG) g_assert_not_reached() +#endif #endif diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 6da8db8ea5c59..4feec232983a4 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -24,6 +24,7 @@ #include "cpu.h" #include "internal.h" #include "macros.h" +#include "sys_macros.h" #include "arch.h" #include "hex_arch_types.h" #include "fma_emu.h" diff --git a/target/hexagon/sys_macros.h b/target/hexagon/sys_macros.h new file mode 100644 index 0000000000000..3c4c3c7aa5ece --- /dev/null +++ b/target/hexagon/sys_macros.h @@ -0,0 +1,238 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HEXAGON_SYS_MACROS_H +#define HEXAGON_SYS_MACROS_H + +/* + * Macro definitions for Hexagon system mode + */ + +#ifndef CONFIG_USER_ONLY + +#define READ_SREG(NUM) arch_get_system_reg(env, NUM) +#define READ_SGP0() arch_get_system_reg(env, HEX_SREG_SGP0) +#define READ_SGP1() arch_get_system_reg(env, HEX_SREG_SGP1) +#define READ_SGP10() ((uint64_t)arch_get_system_reg(env, HEX_SREG_SGP0) | \ + ((uint64_t)arch_get_system_reg(env, HEX_SREG_SGP1) << 32)) + +#define WRITE_SREG(NUM, VAL) log_sreg_write(env, NUM, VAL, slot) +#define WRITE_SGP0(VAL) log_sreg_write(env, HEX_SREG_SGP0, VAL, slot) +#define WRITE_SGP1(VAL) log_sreg_write(env, HEX_SREG_SGP1, VAL, slot) +#define WRITE_SGP10(VAL) \ + do { \ + log_sreg_write(env, HEX_SREG_SGP0, (VAL) & 0xFFFFFFFF, slot); \ + log_sreg_write(env, HEX_SREG_SGP1, (VAL) >> 32, slot); \ + } while (0) + +#ifdef QEMU_GENERATE +#define GET_SSR_FIELD(RES, FIELD) \ + GET_FIELD(RES, FIELD, hex_t_sreg[HEX_SREG_SSR]) +#else + +#define GET_SSR_FIELD(FIELD, REGIN) \ + (uint32_t)GET_FIELD(FIELD, REGIN) +#define GET_SYSCFG_FIELD(FIELD, REGIN) \ + (uint32_t)GET_FIELD(FIELD, REGIN) +#define SET_SYSTEM_FIELD(ENV, REG, FIELD, VAL) \ + do { \ + uint32_t regval = arch_get_system_reg(ENV, REG); \ + fINSERT_BITS(regval, reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset, (VAL)); \ + arch_set_system_reg(ENV, REG, regval); \ + } while (0) +#define SET_SSR_FIELD(ENV, FIELD, VAL) \ + SET_SYSTEM_FIELD(ENV, HEX_SREG_SSR, FIELD, VAL) +#define SET_SYSCFG_FIELD(ENV, FIELD, VAL) \ + SET_SYSTEM_FIELD(ENV, HEX_SREG_SYSCFG, FIELD, VAL) + +#define CCR_FIELD_SET(ENV, FIELD) \ + (!!GET_FIELD(FIELD, arch_get_system_reg(ENV, HEX_SREG_CCR))) + +/* + * Direct-to-guest is not implemented yet, continuing would cause unexpected + * behavior, so we abort. + */ +#define ASSERT_DIRECT_TO_GUEST_UNSET(ENV, EXCP) \ + do { \ + switch (EXCP) { \ + case HEX_EVENT_TRAP0: \ + g_assert(!CCR_FIELD_SET(ENV, CCR_GTE)); \ + break; \ + case HEX_EVENT_IMPRECISE: \ + case HEX_EVENT_PRECISE: \ + case HEX_EVENT_FPTRAP: \ + g_assert(!CCR_FIELD_SET(ENV, CCR_GEE)); \ + break; \ + default: \ + if ((EXCP) >= HEX_EVENT_INT0) { \ + g_assert(!CCR_FIELD_SET(ENV, CCR_GIE)); \ + } \ + break; \ + } \ + } while (0) +#endif + +#define fREAD_ELR() (READ_SREG(HEX_SREG_ELR)) + +#define fLOAD_PHYS(NUM, SIZE, SIGN, SRC1, SRC2, DST) { \ + const uintptr_t rs = ((unsigned long)(unsigned)(SRC1)) & 0x7ff; \ + const uintptr_t rt = ((unsigned long)(unsigned)(SRC2)) << 11; \ + const uintptr_t addr = rs + rt; \ + cpu_physical_memory_read(addr, &DST, sizeof(uint32_t)); \ +} + +#define fPOW2_HELP_ROUNDUP(VAL) \ + ((VAL) | \ + ((VAL) >> 1) | \ + ((VAL) >> 2) | \ + ((VAL) >> 4) | \ + ((VAL) >> 8) | \ + ((VAL) >> 16)) +#define fPOW2_ROUNDUP(VAL) (fPOW2_HELP_ROUNDUP((VAL) - 1) + 1) + +#define fFRAMECHECK(ADDR, EA) g_assert_not_reached(); + +#define fTRAP(TRAPTYPE, IMM) \ + register_trap_exception(env, TRAPTYPE, IMM, PC) + +#define fVIRTINSN_SPSWAP(IMM, REG) +#define fVIRTINSN_GETIE(IMM, REG) { REG = 0xdeafbeef; } +#define fVIRTINSN_SETIE(IMM, REG) +#define fVIRTINSN_RTE(IMM, REG) +#define fGRE_ENABLED() GET_FIELD(CCR_GRE, READ_SREG(HEX_SREG_CCR)) +#define fTRAP1_VIRTINSN(IMM) \ + (fGRE_ENABLED() && \ + (((IMM) == 1) || ((IMM) == 3) || ((IMM) == 4) || ((IMM) == 6))) + +/* Not modeled in qemu */ + +#define MARK_LATE_PRED_WRITE(RNUM) +#define fICINVIDX(REG) +#define fICKILL() +#define fDCKILL() +#define fL2KILL() +#define fL2UNLOCK() +#define fL2CLEAN() +#define fL2CLEANINV() +#define fL2CLEANPA(REG) +#define fL2CLEANINVPA(REG) +#define fL2CLEANINVIDX(REG) +#define fL2CLEANIDX(REG) +#define fL2INVIDX(REG) +#define fL2TAGR(INDEX, DST, DSTREG) +#define fL2UNLOCKA(VA) ((void) VA) +#define fL2TAGW(INDEX, PART2) +#define fDCCLEANIDX(REG) +#define fDCCLEANINVIDX(REG) + +/* Always succeed: */ +#define fL2LOCKA(EA, PDV, PDN) ((void) EA, PDV = 0xFF) +#define fCLEAR_RTE_EX() \ + do { \ + uint32_t tmp = 0; \ + tmp = arch_get_system_reg(env, HEX_SREG_SSR); \ + fINSERT_BITS(tmp, reg_field_info[SSR_EX].width, \ + reg_field_info[SSR_EX].offset, 0); \ + log_sreg_write(env, HEX_SREG_SSR, tmp, slot); \ + } while (0) + +#define fDCINVIDX(REG) +#define fDCINVA(REG) do { REG = REG; } while (0) /* Nothing to do in qemu */ + +#define fSET_TLB_LOCK() g_assert_not_reached() +#define fCLEAR_TLB_LOCK() g_assert_not_reached() + +#define fSET_K0_LOCK() g_assert_not_reached() +#define fCLEAR_K0_LOCK() g_assert_not_reached() + +#define fTLB_IDXMASK(INDEX) \ + ((INDEX) & (fPOW2_ROUNDUP(fCAST4u(env_archcpu(env)->num_tlbs)) - 1)) + +#define fTLB_NONPOW2WRAP(INDEX) \ + (((INDEX) >= env_archcpu(env)->num_tlbs) ? \ + ((INDEX) - env_archcpu(env)->num_tlbs) : \ + (INDEX)) + + +#define fTLBW(INDEX, VALUE) \ + hex_tlbw(env, (INDEX), (VALUE)) +#define fTLBW_EXTENDED(INDEX, VALUE) \ + hex_tlbw(env, (INDEX), (VALUE)) +#define fTLB_ENTRY_OVERLAP(VALUE) \ + (hex_tlb_check_overlap(env, VALUE, -1) != -2) +#define fTLB_ENTRY_OVERLAP_IDX(VALUE) \ + hex_tlb_check_overlap(env, VALUE, -1) +#define fTLBR(INDEX) \ + (env->hex_tlb->entries[fTLB_NONPOW2WRAP(fTLB_IDXMASK(INDEX))]) +#define fTLBR_EXTENDED(INDEX) \ + (env->hex_tlb->entries[fTLB_NONPOW2WRAP(fTLB_IDXMASK(INDEX))]) +#define fTLBP(TLBHI) \ + hex_tlb_lookup(env, ((TLBHI) >> 12), ((TLBHI) << 12)) +#define iic_flush_cache(p) + +#define fIN_DEBUG_MODE(TNUM) \ + ((GET_FIELD(ISDBST_DEBUGMODE, arch_get_system_reg(env, HEX_SREG_ISDBST)) \ + & (0x1 << (TNUM))) != 0) + +#define fIN_DEBUG_MODE_NO_ISDB(TNUM) false +#define fIN_DEBUG_MODE_WARN(TNUM) false + +#ifdef QEMU_GENERATE + +/* + * Read tags back as zero for now: + * + * tag value in RD[31:10] for 32k, RD[31:9] for 16k + */ +#define fICTAGR(RS, RD, RD2) \ + do { \ + RD = ctx->zero; \ + } while (0) +#define fICTAGW(RS, RD) +#define fICDATAR(RS, RD) \ + do { \ + RD = ctx->zero; \ + } while (0) +#define fICDATAW(RS, RD) + +#define fDCTAGW(RS, RT) +/* tag: RD[23:0], state: RD[30:29] */ +#define fDCTAGR(INDEX, DST, DST_REG_NUM) \ + do { \ + DST = ctx->zero; \ + } while (0) +#else + +/* + * Read tags back as zero for now: + * + * tag value in RD[31:10] for 32k, RD[31:9] for 16k + */ +#define fICTAGR(RS, RD, RD2) \ + do { \ + RD = 0x00; \ + } while (0) +#define fICTAGW(RS, RD) +#define fICDATAR(RS, RD) \ + do { \ + RD = 0x00; \ + } while (0) +#define fICDATAW(RS, RD) + +#define fDCTAGW(RS, RT) +/* tag: RD[23:0], state: RD[30:29] */ +#define fDCTAGR(INDEX, DST, DST_REG_NUM) \ + do { \ + DST = HEX_DC_STATE_INVALID | 0x00; \ + } while (0) +#endif + +#endif + +#define NUM_TLB_REGS(x) (env_archcpu(env)->num_tlbs) + +#endif From eeed2ee5cd3d100eef58e2cef2fa1a64a99d16da Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 20 May 2024 17:05:26 -0500 Subject: [PATCH 018/126] target/hexagon: Add handlers for guest/sysreg r/w This commit provides handlers to generate TCG for guest and system register reads and writes. They will be leveraged by a future commit. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/genptr.c | 159 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 2c5e15cfcf6f9..488d0b4b978b1 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -23,6 +23,7 @@ #include "exec/helper-gen.h" #include "insn.h" #include "opcodes.h" +#include "sys_macros.h" #include "translate.h" #define QEMU_GENERATE /* Used internally by macros.h */ #include "macros.h" @@ -128,6 +129,164 @@ TCGv get_result_pred(DisasContext *ctx, int pnum) } } +#ifndef CONFIG_USER_ONLY +G_GNUC_UNUSED +static bool greg_writable(int rnum, bool pair) +{ + if (pair) { + if (rnum < HEX_GREG_G3) { + return true; + } + qemu_log_mask(LOG_UNIMP, + "Warning: ignoring write to guest register pair G%d:%d\n", + rnum + 1, rnum); + } else { + if (rnum <= HEX_GREG_G3) { + return true; + } + qemu_log_mask(LOG_UNIMP, + "Warning: ignoring write to guest register G%d\n", rnum); + } + return false; +} + +G_GNUC_UNUSED +static void check_greg_impl(int rnum, bool pair) +{ + if (pair && (!greg_implemented(rnum) || !greg_implemented(rnum + 1))) { + qemu_log_mask(LOG_UNIMP, + "Warning: guest register pair G%d:%d is unimplemented or " + "reserved. Read will yield 0.\n", + rnum + 1, rnum); + } else if (!pair && !greg_implemented(rnum)) { + qemu_log_mask(LOG_UNIMP, + "Warning: guest register G%d is unimplemented or reserved." + " Read will yield 0.\n", rnum); + } +} + +G_GNUC_UNUSED +static inline void gen_log_greg_write(DisasContext *ctx, int rnum, TCGv val) +{ + tcg_gen_mov_tl(ctx->greg_new_value[rnum], val); +} + +G_GNUC_UNUSED +static void gen_log_greg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val) +{ + TCGv val32 = tcg_temp_new(); + + /* Low word */ + tcg_gen_extrl_i64_i32(val32, val); + gen_log_greg_write(ctx, rnum, val32); + + /* High word */ + tcg_gen_extrh_i64_i32(val32, val); + gen_log_greg_write(ctx, rnum + 1, val32); +} + +static const target_ulong sreg_immut_masks[NUM_SREGS] = { + [HEX_SREG_STID] = 0xff00ff00, + [HEX_SREG_ELR] = 0x00000003, + [HEX_SREG_SSR] = 0x00008000, + [HEX_SREG_CCR] = 0x10e0ff24, + [HEX_SREG_HTID] = IMMUTABLE, + [HEX_SREG_IMASK] = 0xffff0000, + [HEX_SREG_GEVB] = 0x000000ff, + [HEX_SREG_EVB] = 0x000000ff, + [HEX_SREG_MODECTL] = IMMUTABLE, + [HEX_SREG_SYSCFG] = 0x80001c00, + [HEX_SREG_IPENDAD] = IMMUTABLE, + [HEX_SREG_VID] = 0xfc00fc00, + [HEX_SREG_VID1] = 0xfc00fc00, + [HEX_SREG_BESTWAIT] = 0xfffffe00, + [HEX_SREG_SCHEDCFG] = 0xfffffef0, + [HEX_SREG_CFGBASE] = IMMUTABLE, + [HEX_SREG_REV] = IMMUTABLE, + [HEX_SREG_ISDBST] = IMMUTABLE, + [HEX_SREG_ISDBCFG0] = 0xe0000000, + [HEX_SREG_BRKPTPC0] = 0x00000003, + [HEX_SREG_BRKPTCFG0] = 0xfc007000, + [HEX_SREG_BRKPTPC1] = 0x00000003, + [HEX_SREG_BRKPTCFG1] = 0xfc007000, + [HEX_SREG_ISDBMBXIN] = IMMUTABLE, + [HEX_SREG_ISDBEN] = 0xfffffffe, + [HEX_SREG_TIMERLO] = IMMUTABLE, + [HEX_SREG_TIMERHI] = IMMUTABLE, +}; + +G_GNUC_UNUSED +static void gen_log_sreg_write(DisasContext *ctx, int rnum, TCGv val) +{ + const target_ulong reg_mask = sreg_immut_masks[rnum]; + + if (reg_mask != IMMUTABLE) { + if (rnum < HEX_SREG_GLB_START) { + gen_masked_reg_write(val, hex_t_sreg[rnum], reg_mask); + tcg_gen_mov_tl(ctx->t_sreg_new_value[rnum], val); + } else { + gen_masked_reg_write(val, hex_g_sreg[rnum], reg_mask); + gen_helper_sreg_write(tcg_env, tcg_constant_i32(rnum), val); + } + } +} + +G_GNUC_UNUSED +static void gen_log_sreg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val) +{ + TCGv val32 = tcg_temp_new(); + + /* Low word */ + tcg_gen_extrl_i64_i32(val32, val); + gen_log_sreg_write(ctx, rnum, val32); + + /* High word */ + tcg_gen_extrh_i64_i32(val32, val); + gen_log_sreg_write(ctx, rnum + 1, val32); +} + +G_GNUC_UNUSED +static void gen_read_sreg(TCGv dst, int reg_num) +{ + if (reg_num >= HEX_SREG_GLB_START || reg_num == HEX_SREG_BADVA) { + gen_helper_sreg_read(dst, tcg_env, tcg_constant_i32(reg_num)); + } else { + tcg_gen_mov_tl(dst, hex_t_sreg[reg_num]); + } +} + +G_GNUC_UNUSED +static void gen_read_sreg_pair(TCGv_i64 dst, int reg_num) +{ + if (reg_num < HEX_SREG_GLB_START) { + if (reg_num + 1 == HEX_SREG_BADVA) { + TCGv badva = tcg_temp_new(); + gen_helper_sreg_read(badva, tcg_env, + tcg_constant_tl(HEX_SREG_BADVA)); + tcg_gen_concat_i32_i64(dst, hex_t_sreg[reg_num], badva); + } else { + tcg_gen_concat_i32_i64(dst, hex_t_sreg[reg_num], + hex_t_sreg[reg_num + 1]); + } + } else { + gen_helper_sreg_read_pair(dst, tcg_env, tcg_constant_tl(reg_num)); + } +} + +G_GNUC_UNUSED +static void gen_read_greg(TCGv dst, int reg_num) +{ + gen_helper_greg_read(dst, tcg_env, tcg_constant_tl(reg_num)); +} + +G_GNUC_UNUSED +static void gen_read_greg_pair(TCGv_i64 dst, int reg_num) +{ + gen_helper_greg_read_pair(dst, tcg_env, tcg_constant_tl(reg_num)); +} +#endif + + void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) { TCGv pred = get_result_pred(ctx, pnum); From 19899f600856d389f47af92551a2928f2dab68ae Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 20 May 2024 17:54:50 -0500 Subject: [PATCH 019/126] target/hexagon: Add placeholder greg/sreg r/w helpers Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/helper.h | 9 +++++++++ target/hexagon/op_helper.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index f8baa599c88cd..fddbd99a197d7 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -107,3 +107,12 @@ DEF_HELPER_4(probe_noshuf_load, void, env, i32, int, int) DEF_HELPER_2(probe_pkt_scalar_store_s0, void, env, int) DEF_HELPER_2(probe_hvx_stores, void, env, int) DEF_HELPER_2(probe_pkt_scalar_hvx_stores, void, env, int) + +#if !defined(CONFIG_USER_ONLY) +DEF_HELPER_2(sreg_read, i32, env, i32) +DEF_HELPER_2(sreg_read_pair, i64, env, i32) +DEF_HELPER_2(greg_read, i32, env, i32) +DEF_HELPER_2(greg_read_pair, i64, env, i32) +DEF_HELPER_3(sreg_write, void, env, i32, i32) +DEF_HELPER_3(sreg_write_pair, void, env, i32, i64) +#endif diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 4feec232983a4..ccd806836cf7d 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1315,6 +1315,40 @@ void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV) } } +#ifndef CONFIG_USER_ONLY +void HELPER(sreg_write)(CPUHexagonState *env, uint32_t reg, uint32_t val) +{ + g_assert_not_reached(); +} + +void HELPER(sreg_write_pair)(CPUHexagonState *env, uint32_t reg, uint64_t val) + +{ + g_assert_not_reached(); +} + +uint32_t HELPER(sreg_read)(CPUHexagonState *env, uint32_t reg) +{ + g_assert_not_reached(); +} + +uint64_t HELPER(sreg_read_pair)(CPUHexagonState *env, uint32_t reg) +{ + g_assert_not_reached(); +} + +uint32_t HELPER(greg_read)(CPUHexagonState *env, uint32_t reg) +{ + g_assert_not_reached(); +} + +uint64_t HELPER(greg_read_pair)(CPUHexagonState *env, uint32_t reg) +{ + g_assert_not_reached(); +} +#endif + + /* These macros can be referenced in the generated helper functions */ #define warn(...) /* Nothing */ #define fatal(...) g_assert_not_reached(); From 0a9f7356c41b9f4f231d330e08b713844eccd850 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Sun, 8 Sep 2024 12:15:28 -0700 Subject: [PATCH 020/126] target/hexagon: Add vmstate representation Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 3 +++ target/hexagon/internal.h | 4 ++++ target/hexagon/machine.c | 25 +++++++++++++++++++++++++ 3 files changed, 32 insertions(+) create mode 100644 target/hexagon/machine.c diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index bf4a9bd6266e3..7c070f5123d9a 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -371,6 +371,9 @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data) cc->gdb_stop_before_watchpoint = true; cc->gdb_core_xml_file = "hexagon-core.xml"; cc->disas_set_info = hexagon_cpu_disas_set_info; +#ifndef CONFIG_USER_ONLY + dc->vmsd = &vmstate_hexagon_cpu; +#endif cc->tcg_ops = &hexagon_tcg_ops; } diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h index 32e96f00d97a1..96581413165c6 100644 --- a/target/hexagon/internal.h +++ b/target/hexagon/internal.h @@ -31,4 +31,8 @@ void hexagon_debug(CPUHexagonState *env); extern const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS]; +#ifndef CONFIG_USER_ONLY +extern const VMStateDescription vmstate_hexagon_cpu; +#endif + #endif diff --git a/target/hexagon/machine.c b/target/hexagon/machine.c new file mode 100644 index 0000000000000..d9d71edf7718a --- /dev/null +++ b/target/hexagon/machine.c @@ -0,0 +1,25 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "migration/cpu.h" +#include "cpu.h" + + +const VMStateDescription vmstate_hexagon_cpu = { + .name = "cpu", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_CPU(), + VMSTATE_UINTTL_ARRAY(env.gpr, HexagonCPU, TOTAL_PER_THREAD_REGS), + VMSTATE_UINTTL_ARRAY(env.pred, HexagonCPU, NUM_PREGS), + VMSTATE_UINTTL_ARRAY(env.t_sreg, HexagonCPU, NUM_SREGS), + VMSTATE_UINTTL_ARRAY(env.greg, HexagonCPU, NUM_GREGS), + VMSTATE_END_OF_LIST() + }, +}; + From 01ce2ee5099f99acf50542faa4dc81bed0486452 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Sun, 26 May 2024 19:11:55 -0500 Subject: [PATCH 021/126] target/hexagon: Make A_PRIV, "J2_trap*" insts need_env() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/hex_common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index b0c86cb3babd6..f7d8b0d092d73 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -250,7 +250,9 @@ def need_env(tag): "A_LOAD" in attribdict[tag] or "A_CVI_GATHER" in attribdict[tag] or "A_CVI_SCATTER" in attribdict[tag] or - "A_IMPLICIT_WRITES_USR" in attribdict[tag]) + "A_IMPLICIT_WRITES_USR" in attribdict[tag] or + "A_PRIV" in attribdict[tag] or + "J2_trap" in tag) def need_slot(tag): From 269ac168332dda133cf4ff6e69786038acbf20bf Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Thu, 23 May 2024 21:42:44 -0500 Subject: [PATCH 022/126] target/hexagon: Define register fields for system regs Define the register fields for ssr, schedcfg, stid, bestwait, ccr, modectl, imask, ipendad. Define the fields for TLB entries. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/reg_fields_def.h.inc | 96 +++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/target/hexagon/reg_fields_def.h.inc b/target/hexagon/reg_fields_def.h.inc index f2a58d486c55e..156a3514e77d0 100644 --- a/target/hexagon/reg_fields_def.h.inc +++ b/target/hexagon/reg_fields_def.h.inc @@ -39,3 +39,99 @@ DEF_REG_FIELD(USR_FPDBZE, 26, 1) DEF_REG_FIELD(USR_FPOVFE, 27, 1) DEF_REG_FIELD(USR_FPUNFE, 28, 1) DEF_REG_FIELD(USR_FPINPE, 29, 1) + +DEF_REG_FIELD(IPENDAD_IAD, 16, 16) +DEF_REG_FIELD(IPENDAD_IPEND, 0, 16) + +DEF_REG_FIELD(SCHEDCFG_EN, 8, 1) +DEF_REG_FIELD(SCHEDCFG_INTNO, 0, 4) +DEF_REG_FIELD(BESTWAIT_PRIO, 0, 10) + + +/* PTE (aka TLB entry) fields */ +DEF_REG_FIELD(PTE_PPD, 0, 24) +DEF_REG_FIELD(PTE_C, 24, 4) +DEF_REG_FIELD(PTE_U, 28, 1) +DEF_REG_FIELD(PTE_R, 29, 1) +DEF_REG_FIELD(PTE_W, 30, 1) +DEF_REG_FIELD(PTE_X, 31, 1) +DEF_REG_FIELD(PTE_VPN, 32, 20) +DEF_REG_FIELD(PTE_ASID, 52, 7) +DEF_REG_FIELD(PTE_ATR0, 59, 1) +DEF_REG_FIELD(PTE_ATR1, 60, 1) +DEF_REG_FIELD(PTE_PA35, 61, 1) +DEF_REG_FIELD(PTE_G, 62, 1) +DEF_REG_FIELD(PTE_V, 63, 1) + +/* SYSCFG fields */ +DEF_REG_FIELD(SYSCFG_MMUEN, 0, 1) +DEF_REG_FIELD(SYSCFG_ICEN, 1, 1) +DEF_REG_FIELD(SYSCFG_DCEN, 2, 1) +DEF_REG_FIELD(SYSCFG_ISDBTRUSTED, 3, 1) +DEF_REG_FIELD(SYSCFG_GIE, 4, 1) +DEF_REG_FIELD(SYSCFG_ISDBREADY, 5, 1) +DEF_REG_FIELD(SYSCFG_PCYCLEEN, 6, 1) +DEF_REG_FIELD(SYSCFG_V2X, 7, 1) +DEF_REG_FIELD(SYSCFG_IGNOREDABORT, 8, 1) +DEF_REG_FIELD(SYSCFG_PM, 9, 1) +DEF_REG_FIELD(SYSCFG_TLBLOCK, 11, 1) +DEF_REG_FIELD(SYSCFG_K0LOCK, 12, 1) +DEF_REG_FIELD(SYSCFG_BQ, 13, 1) +DEF_REG_FIELD(SYSCFG_PRIO, 14, 1) +DEF_REG_FIELD(SYSCFG_DMT, 15, 1) +DEF_REG_FIELD(SYSCFG_L2CFG, 16, 3) +DEF_REG_FIELD(SYSCFG_ITCM, 19, 1) +DEF_REG_FIELD(SYSCFG_L2NWA, 21, 1) +DEF_REG_FIELD(SYSCFG_L2NRA, 22, 1) +DEF_REG_FIELD(SYSCFG_L2WB, 23, 1) +DEF_REG_FIELD(SYSCFG_L2P, 24, 1) +DEF_REG_FIELD(SYSCFG_SLVCTL0, 25, 2) +DEF_REG_FIELD(SYSCFG_SLVCTL1, 27, 2) +DEF_REG_FIELD(SYSCFG_L2PARTSIZE, 29, 2) +DEF_REG_FIELD(SYSCFG_L2GCA, 31, 1) + +/* SSR fields */ +DEF_REG_FIELD(SSR_CAUSE, 0, 8) +DEF_REG_FIELD(SSR_ASID, 8, 7) +DEF_REG_FIELD(SSR_UM, 16, 1) +DEF_REG_FIELD(SSR_EX, 17, 1) +DEF_REG_FIELD(SSR_IE, 18, 1) +DEF_REG_FIELD(SSR_GM, 19, 1) +DEF_REG_FIELD(SSR_V0, 20, 1) +DEF_REG_FIELD(SSR_V1, 21, 1) +DEF_REG_FIELD(SSR_BVS, 22, 1) +DEF_REG_FIELD(SSR_CE, 23, 1) +DEF_REG_FIELD(SSR_PE, 24, 1) +DEF_REG_FIELD(SSR_BP, 25, 1) +DEF_REG_FIELD(SSR_XE2, 26, 1) +DEF_REG_FIELD(SSR_XA, 27, 3) +DEF_REG_FIELD(SSR_SS, 30, 1) +DEF_REG_FIELD(SSR_XE, 31, 1) + +/* misc registers */ +DEF_REG_FIELD(IMASK_MASK, 0, 16) + +DEF_REG_FIELD(STID_PRIO, 16, 8) +DEF_REG_FIELD(STID_STID, 0, 8) + +/* MODECTL fields */ +DEF_REG_FIELD(MODECTL_E, 0, 8) +DEF_REG_FIELD(MODECTL_W, 16, 8) + +DEF_REG_FIELD(CCR_L1ICP, 0, 2) +DEF_REG_FIELD(CCR_L1DCP, 3, 2) +DEF_REG_FIELD(CCR_L2CP, 6, 2) + +DEF_REG_FIELD(CCR_HFI, 16, 1) +DEF_REG_FIELD(CCR_HFD, 17, 1) +DEF_REG_FIELD(CCR_HFIL2, 18, 1) +DEF_REG_FIELD(CCR_HFDL2, 19, 1) +DEF_REG_FIELD(CCR_SFD, 20, 1) + +DEF_REG_FIELD(CCR_GIE, 24, 1) +DEF_REG_FIELD(CCR_GTE, 25, 1) +DEF_REG_FIELD(CCR_GEE, 26, 1) +DEF_REG_FIELD(CCR_GRE, 27, 1) +DEF_REG_FIELD(CCR_VV1, 29, 1) +DEF_REG_FIELD(CCR_VV2, 30, 1) +DEF_REG_FIELD(CCR_VV3, 31, 1) From 0a39a723dfb136ec974e61fd5e03b7afd3d2caa6 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 4 Sep 2024 18:34:38 -0700 Subject: [PATCH 023/126] target/hexagon: Implement do_raise_exception() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/internal.h | 5 +++++ target/hexagon/op_helper.c | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h index 96581413165c6..7cf7bcaa6cd8b 100644 --- a/target/hexagon/internal.h +++ b/target/hexagon/internal.h @@ -31,6 +31,11 @@ void hexagon_debug(CPUHexagonState *env); extern const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS]; +void G_NORETURN do_raise_exception(CPUHexagonState *env, + uint32_t exception, + target_ulong PC, + uintptr_t retaddr); + #ifndef CONFIG_USER_ONLY extern const VMStateDescription vmstate_hexagon_cpu; #endif diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index ccd806836cf7d..1aa5b32b1f732 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -37,6 +37,26 @@ #define SF_MANTBITS 23 /* Exceptions processing helpers */ +G_NORETURN +void do_raise_exception(CPUHexagonState *env, uint32_t exception, + target_ulong PC, uintptr_t retaddr) +{ + CPUState *cs = env_cpu(env); +#ifdef CONFIG_USER_ONLY + qemu_log_mask(CPU_LOG_INT, "%s: 0x%08x\n", __func__, exception); +#else + qemu_log_mask(CPU_LOG_INT, "%s: 0x%08x, @ %08" PRIx32 "\n", + __func__, exception, PC); + + ASSERT_DIRECT_TO_GUEST_UNSET(env, exception); +#endif + + env->gpr[HEX_REG_PC] = PC; + cs->exception_index = exception; + cpu_loop_exit_restore(cs, retaddr); + cs->halted = false; +} + G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env, uint32_t exception, uintptr_t pc) From 0ef74b83b42fb2c386fd086846dec2865dc14986 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 29 May 2024 13:06:36 -0500 Subject: [PATCH 024/126] target/hexagon: Add system reg insns Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/hex_common.py | 15 +- target/hexagon/imported/encode_pp.def | 213 +++++++++++++++------ target/hexagon/imported/system.idef | 262 +++++++++++++++++++++++--- target/hexagon/macros.h | 2 + 4 files changed, 410 insertions(+), 82 deletions(-) diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index f7d8b0d092d73..4c77fcf4db440 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -1239,11 +1239,18 @@ def init_registers(): for reg in new_regs: new_registers[f"{reg.regtype}{reg.regid}"] = reg +def is_new_reg(tag, regid): + if regid[0] in "NO": + return True + return regid[0] == "P" and \ + f"{regid}N" in semdict[tag] and \ + f"{regid}V" not in semdict[tag] + def get_register(tag, regtype, regid): - if f"{regtype}{regid}V" in semdict[tag]: - return registers[f"{regtype}{regid}"] - else: - return new_registers[f"{regtype}{regid}"] + regid = f"{regtype}{regid}" + is_new = is_new_reg(tag, regid) + reg = new_registers[regid] if is_new else registers[regid] + return reg def helper_ret_type(tag, regs): ## If there is a scalar result, it is the return type diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index 0cd30a5e85755..37faf62b1b7ea 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2020 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,6 +16,7 @@ */ /* + * encode32.def * Encodings for 32 bit instructions * */ @@ -341,6 +342,8 @@ DEF_ENC32(L4_pload##TAG##tnew_abs,ICLASS_LD" 1 11 "OPC" iiiii PP110tti 1--ddd DEF_ENC32(L4_pload##TAG##fnew_abs,ICLASS_LD" 1 11 "OPC" iiiii PP111tti 1--ddddd") + + /* 0 000 misc: dealloc,loadw_locked,dcfetch */ STD_LD_ENC(bzw4,"0 101") STD_LD_ENC(bzw2,"0 011") @@ -375,6 +378,7 @@ DEF_ANTICLASS32(ICLASS_LD" 1110 000----- PP------ --------",LD_ADDR_POST_REG) DEF_ENC32(L2_deallocframe, ICLASS_LD" 000 0 000 sssss PP0----- ---ddddd") DEF_ENC32(L4_return, ICLASS_LD" 011 0 000 sssss PP0000-- ---ddddd") + DEF_ENC32(L4_return_t, ICLASS_LD" 011 0 000 sssss PP0100vv ---ddddd") DEF_ENC32(L4_return_f, ICLASS_LD" 011 0 000 sssss PP1100vv ---ddddd") DEF_ENC32(L4_return_tnew_pt, ICLASS_LD" 011 0 000 sssss PP0110vv ---ddddd") @@ -382,15 +386,18 @@ DEF_ENC32(L4_return_fnew_pt, ICLASS_LD" 011 0 000 sssss PP1110vv ---ddddd") DEF_ENC32(L4_return_tnew_pnt, ICLASS_LD" 011 0 000 sssss PP0010vv ---ddddd") DEF_ENC32(L4_return_fnew_pnt, ICLASS_LD" 011 0 000 sssss PP1010vv ---ddddd") -DEF_ENC32(L2_loadw_locked,ICLASS_LD" 001 0 000 sssss PP000--- 000ddddd") - +/** Load Acquire Store Release Encoding **/ +DEF_ENC32(L2_loadw_locked, ICLASS_LD" 001 0 000 sssss PP000--- 000ddddd") +DEF_ENC32(L4_loadd_locked, ICLASS_LD" 001 0 000 sssss PP010--- 000ddddd") DEF_ENC32(L2_loadw_aq, ICLASS_LD" 001 0 000 sssss PP001--- 000ddddd") DEF_ENC32(L4_loadd_aq, ICLASS_LD" 001 0 000 sssss PP011--- 000ddddd") -DEF_ENC32(R6_release_at_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --0011dd") -DEF_ENC32(R6_release_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1011dd") + +DEF_ENC32(S2_storew_locked, ICLASS_ST" 000 01 01sssss PP-ttttt ----00dd") +DEF_ENC32(S4_stored_locked, ICLASS_ST" 000 01 11sssss PP0ttttt ----00dd") + DEF_ENC32(S2_storew_rl_at_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --0010dd") DEF_ENC32(S2_storew_rl_st_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --1010dd") @@ -398,13 +405,11 @@ DEF_ENC32(S2_storew_rl_st_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --1010dd") DEF_ENC32(S4_stored_rl_at_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --0010dd") DEF_ENC32(S4_stored_rl_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1010dd") -DEF_ENC32(L4_loadd_locked,ICLASS_LD" 001 0 000 sssss PP010--- 000ddddd") -DEF_EXT_SPACE(EXTRACTW, ICLASS_LD" 001 0 000 iiiii PP0iiiii -01iiiii") -DEF_ENC32(Y2_dcfetchbo, ICLASS_LD" 010 0 000 sssss PP0--iii iiiiiiii") - - - +DEF_ENC32(R6_release_at_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --0011dd") +DEF_ENC32(R6_release_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1011dd") +DEF_EXT_SPACE(EXTRACTW, ICLASS_LD" 001 0 000 iiiii PP0iiiii 001iiiii") +DEF_ENC32(Y2_dcfetchbo, ICLASS_LD" 010 0 000 sssss PP0--iii iiiiiiii") @@ -488,13 +493,17 @@ STD_PST_ENC(rinew, "1 101","10ttt") /* x bus/cache */ /* x store/cache */ DEF_ENC32(S2_allocframe, ICLASS_ST" 000 01 00xxxxx PP000iii iiiiiiii") -DEF_ENC32(S2_storew_locked,ICLASS_ST" 000 01 01sssss PP-ttttt ----00dd") -DEF_ENC32(S4_stored_locked,ICLASS_ST" 000 01 11sssss PP0ttttt ----00dd") +DEF_ENC32(Y5_l2locka, ICLASS_ST" 000 01 11sssss PP1----- ------dd") DEF_ENC32(Y2_dczeroa, ICLASS_ST" 000 01 10sssss PP0----- --------") -DEF_ENC32(Y2_barrier, ICLASS_ST" 100 00 00----- PP------ 000-----") +DEF_ENC32(Y2_barrier, ICLASS_ST" 100 00 00----- PP------ 000-----") DEF_ENC32(Y2_syncht, ICLASS_ST" 100 00 10----- PP------ --------") +DEF_ENC32(Y2_l2kill, ICLASS_ST" 100 00 01----- PP-000-- --------") +DEF_ENC32(Y5_l2gunlock, ICLASS_ST" 100 00 01----- PP-010-- --------") +DEF_ENC32(Y5_l2gclean, ICLASS_ST" 100 00 01----- PP-100-- --------") +DEF_ENC32(Y5_l2gcleaninv, ICLASS_ST" 100 00 01----- PP-110-- --------") +DEF_ENC32(Y2_l2cleaninvidx,ICLASS_ST" 100 00 11sssss PP------ --------") @@ -502,9 +511,34 @@ DEF_ENC32(Y2_dccleana, ICLASS_ST" 000 00 00sssss PP------ --------") DEF_ENC32(Y2_dcinva, ICLASS_ST" 000 00 01sssss PP------ --------") DEF_ENC32(Y2_dccleaninva, ICLASS_ST" 000 00 10sssss PP------ --------") -DEF_ENC32(Y4_l2fetch, ICLASS_ST" 011 00 00sssss PP-ttttt 000-----") +/* Super */ +DEF_ENC32(Y2_dckill, ICLASS_ST" 001 00 00----- PP------ --------") +DEF_ENC32(Y2_dccleanidx, ICLASS_ST" 001 00 01sssss PP------ --------") +DEF_ENC32(Y2_dcinvidx, ICLASS_ST" 001 00 10sssss PP------ --------") +DEF_ENC32(Y2_dccleaninvidx,ICLASS_ST" 001 00 11sssss PP------ --------") + +DEF_ENC32(Y2_dctagw ,ICLASS_ST" 010 00 00sssss PP-ttttt --------") +DEF_ENC32(Y2_dctagr ,ICLASS_ST" 010 00 01sssss PP------ ---ddddd") + +DEF_ENC32(Y4_l2tagw ,ICLASS_ST" 010 00 10sssss PP0ttttt --------") +DEF_ENC32(Y4_l2tagr ,ICLASS_ST" 010 00 11sssss PP------ ---ddddd") + +DEF_ENC32(Y4_l2fetch, ICLASS_ST" 011 00 00sssss PP-ttttt 000-----") +DEF_ENC32(Y5_l2cleanidx, ICLASS_ST" 011 00 01sssss PP------ --------") +DEF_ENC32(Y5_l2invidx, ICLASS_ST" 011 00 10sssss PP------ --------") +DEF_ENC32(Y5_l2unlocka, ICLASS_ST" 011 00 11sssss PP------ --------") DEF_ENC32(Y5_l2fetch, ICLASS_ST" 011 01 00sssss PP-ttttt --------") +DEF_ENC32(Y6_l2gcleanpa, ICLASS_ST" 011 01 01----- PP-ttttt --------") +DEF_ENC32(Y6_l2gcleaninvpa,ICLASS_ST" 011 01 10----- PP-ttttt --------") + + + + + + + + /*******************************/ /* */ /* */ @@ -547,13 +581,23 @@ DEF_ENC32(J2_jumprfnewpt, ICLASS_J" 0011 011sssss PP-11-uu --------") DEF_FIELDROW_DESC32(ICLASS_J" 0100 -------- PP------ --------","[#4] (#u8) ") DEF_ENC32(J2_trap0, ICLASS_J" 0100 00------ PP-iiiii ---iii--") -DEF_ENC32(J2_pause, ICLASS_J" 0100 01------ PP-iiiii ---iii--") +DEF_ENC32(J2_trap1, ICLASS_J" 0100 10-xxxxx PP-iiiii ---iii--") +DEF_ENC32(J2_pause, ICLASS_J" 0100 01----ii PP-iiiii ---iii--") + +DEF_FIELDROW_DESC32(ICLASS_J" 0101 -------- PP------ --------","[#5] Rd=(Rs) ") +DEF_ENC32(Y2_icdatar, ICLASS_J" 0101 101sssss PP------ ---ddddd") +DEF_ENC32(Y2_ictagr, ICLASS_J" 0101 111sssss PP------ ---ddddd") +DEF_ENC32(Y2_ictagw, ICLASS_J" 0101 110sssss PP0ttttt --------") +DEF_ENC32(Y2_icdataw, ICLASS_J" 0101 110sssss PP1ttttt --------") DEF_FIELDROW_DESC32(ICLASS_J" 0110 -------- PP------ --------","[#6] icop(Rs) ") DEF_ENC32(Y2_icinva, ICLASS_J" 0110 110sssss PP000--- --------") +DEF_ENC32(Y2_icinvidx, ICLASS_J" 0110 110sssss PP001--- --------") +DEF_ENC32(Y2_ickill, ICLASS_J" 0110 110----- PP010--- --------") DEF_FIELDROW_DESC32(ICLASS_J" 0111 -------- PP------ --------","[#7] () ") DEF_ENC32(Y2_isync, ICLASS_J" 0111 11000000 PP0---00 00000010") +DEF_ENC32(J2_rte, ICLASS_J" 0111 111----- PP00---- 000-----") /* JUMP */ DEF_FIELDROW_DESC32(ICLASS_J" 100- -------- PP------ --------","[#8,9] PC=(#r22)") @@ -591,7 +635,6 @@ DEF_ENC32(J2_callf, ICLASS_J" 1101 ii1iiiii PPi-0-uu iiiiiii-") /*******************************/ -/* EJP: this has to match what we have in htmldocs.py... so I will call it CJ, we can change it */ DEF_CLASS32(ICLASS_CJ" 0--- -------- PP------ --------",CJ) DEF_FIELDROW_DESC32(ICLASS_CJ" 00-- -------- -------- --------","[#0-3] pd=cmp.xx(R,#u5) ; if ([!]p0.new) jump:[h] #s9:2 ") @@ -738,12 +781,19 @@ DEF_ENC32(J2_jumprltezpt,ICLASS_CR" 0001 11isssss PPi1iiii iiiiiii-") DEF_FIELDROW_DESC32( ICLASS_CR" 0010 -------- PP------ --------","[#2] Cd=Rs ") DEF_ENC32(A2_tfrrcr, ICLASS_CR" 0010 001sssss PP------ ---ddddd") +DEF_ENC32(G4_tfrgrcr, ICLASS_CR" 0010 000sssss PP------ ---ddddd") +DEF_ENC32(Y4_trace, ICLASS_CR" 0010 010sssss PP------ 000-----") +DEF_ENC32(Y6_diag, ICLASS_CR" 0010 010sssss PP------ 001-----") +DEF_ENC32(Y6_diag0, ICLASS_CR" 0010 010sssss PP-ttttt 010-----") +DEF_ENC32(Y6_diag1, ICLASS_CR" 0010 010sssss PP-ttttt 011-----") DEF_FIELDROW_DESC32( ICLASS_CR" 0011 -------- PP------ --------","[#3] Cdd=Rss ") DEF_ENC32(A4_tfrpcp, ICLASS_CR" 0011 001sssss PP------ ---ddddd") +DEF_ENC32(G4_tfrgpcp, ICLASS_CR" 0011 000sssss PP------ ---ddddd") DEF_FIELDROW_DESC32( ICLASS_CR" 1000 -------- PP------ --------","[#8] Rdd=Css ") DEF_ENC32(A4_tfrcpp, ICLASS_CR" 1000 000sssss PP------ ---ddddd") +DEF_ENC32(G4_tfrgcpp, ICLASS_CR" 1000 001sssss PP------ ---ddddd") DEF_FIELDROW_DESC32( ICLASS_CR" 1001 -------- PP------ --------","[#9] (#r8,#U10)") DEF_ENC32(J2_ploop1si, ICLASS_CR" 1001 101IIIII PP-iiiii IIIii-II") @@ -754,6 +804,7 @@ DEF_ENC32(J2_loop1i, ICLASS_CR" 1001 001IIIII PP-iiiii IIIii-II") DEF_FIELDROW_DESC32( ICLASS_CR" 1010 -------- PP------ --------","[#10] Rd=Cs ") DEF_ENC32(A2_tfrcrr, ICLASS_CR" 1010 000sssss PP------ ---ddddd") +DEF_ENC32(G4_tfrgcrr, ICLASS_CR" 1010 001sssss PP------ ---ddddd") DEF_ENC32(C4_addipc, ICLASS_CR" 1010 01001001 PP-iiiii i--ddddd") @@ -776,8 +827,66 @@ DEF_ENC32(C4_and_orn, ICLASS_CR" 1011 1011--ss PP0---tt uu----dd") DEF_ENC32(C4_or_andn, ICLASS_CR" 1011 1101--ss PP0---tt uu----dd") DEF_ENC32(C4_or_orn, ICLASS_CR" 1011 1111--ss PP0---tt uu----dd") -DEF_ENC32(C4_fastcorner9, ICLASS_CR"1011 0000--ss PP1---tt 1--1--dd") -DEF_ENC32(C4_fastcorner9_not, ICLASS_CR"1011 0001--ss PP1---tt 1--1--dd") +DEF_ENC32(C4_fastcorner9, ICLASS_CR"1011 0000--ss PP1---tt 1--1--dd") +DEF_ENC32(C4_fastcorner9_not, ICLASS_CR"1011 0001--ss PP1---tt 1--1--dd") + + + +/* Supervisor CR ops */ +/* Interrupts */ +DEF_FIELDROW_DESC32( ICLASS_CR" 0100 -------- PP------ --------","[#4] (Rs,Pt)") +DEF_ENC32(Y2_swi, ICLASS_CR" 0100 000sssss PP------ 000-----") +DEF_ENC32(Y2_cswi, ICLASS_CR" 0100 000sssss PP------ 001-----") +DEF_ENC32(Y2_iassignw, ICLASS_CR" 0100 000sssss PP------ 010-----") +DEF_ENC32(Y2_ciad, ICLASS_CR" 0100 000sssss PP------ 011-----") +DEF_ENC32(Y2_setimask, ICLASS_CR" 0100 100sssss PP----tt 000-----") +DEF_ENC32(Y2_setprio, ICLASS_CR" 0100 100sssss PP----tt 001-----") +DEF_ENC32(Y4_siad, ICLASS_CR" 0100 100sssss PP------ 011-----") + +DEF_ENC32(Y2_wait, ICLASS_CR" 0100 010sssss PP------ 000-----") +DEF_ENC32(Y2_resume, ICLASS_CR" 0100 010sssss PP------ 001-----") +DEF_ENC32(Y2_stop, ICLASS_CR" 0100 011sssss PP------ 000-----") +DEF_ENC32(Y2_start, ICLASS_CR" 0100 011sssss PP------ 001-----") +DEF_ENC32(Y4_nmi, ICLASS_CR" 0100 011sssss PP------ 010-----") + +DEF_FIELDROW_DESC32( ICLASS_CR" 0101 -------- PP------ --------","[#5] Rx ") +DEF_ENC32(Y2_crswap0, ICLASS_CR" 0101 000xxxxx PP------ --------") +DEF_ENC32(Y4_crswap1, ICLASS_CR" 0101 001xxxxx PP------ --------") + +DEF_FIELDROW_DESC32( ICLASS_CR" 0110 -------- PP------ --------","[#6] Rd=(Rs)") +DEF_ENC32(Y2_getimask, ICLASS_CR" 0110 000sssss PP------ ---ddddd") +DEF_ENC32(Y2_iassignr, ICLASS_CR" 0110 011sssss PP------ ---ddddd") + +DEF_FIELDROW_DESC32( ICLASS_CR" 0111 -------- PP------ --------","[#7] cr=Rs ") +DEF_ENC32(Y2_tfrsrcr, ICLASS_CR" 0111 00-sssss PP------ -ddddddd") + +DEF_FIELDROW_DESC32( ICLASS_CR" 1100 -------- PP------ --------","[#12] ") +DEF_ENC32(Y2_break, ICLASS_CR" 1100 001----- PP------ 000-----") +DEF_ENC32(Y2_tlblock, ICLASS_CR" 1100 001----- PP------ 001-----") +DEF_ENC32(Y2_tlbunlock,ICLASS_CR" 1100 001----- PP------ 010-----") +DEF_ENC32(Y2_k0lock, ICLASS_CR" 1100 001----- PP------ 011-----") +DEF_ENC32(Y2_k0unlock, ICLASS_CR" 1100 001----- PP------ 100-----") +DEF_ENC32(Y2_tlbp, ICLASS_CR" 1100 100sssss PP------ ---ddddd") +DEF_ENC32(Y5_tlboc, ICLASS_CR" 1100 111sssss PP------ ---ddddd") +DEF_ENC32(Y5_tlbasidi, ICLASS_CR" 1100 101sssss PP------ --------") +DEF_ENC32(Y2_tlbr, ICLASS_CR" 1100 010sssss PP------ ---ddddd") +DEF_ENC32(Y2_tlbw, ICLASS_CR" 1100 000sssss PP0ttttt --------") +DEF_ENC32(Y5_ctlbw, ICLASS_CR" 1100 110sssss PP0ttttt ---ddddd") + +DEF_FIELDROW_DESC32( ICLASS_CR" 1101 -------- PP------ --------","[#13] Rxx ") +DEF_ENC32(Y4_crswap10, ICLASS_CR" 1101 10-xxxxx PP------ ---00000") +DEF_ENC32(Y4_tfrspcp, ICLASS_CR" 1101 00-sssss PP------ -ddddddd") + +DEF_FIELDROW_DESC32( ICLASS_CR" 1110 -------- PP------ --------","[#14] Rd=cr ") +DEF_ENC32(Y2_tfrscrr, ICLASS_CR" 1110 1sssssss PP------ ---ddddd") + +DEF_FIELDROW_DESC32( ICLASS_CR" 1111 -------- PP------ --------","[#15] Rdd=Sss ") +DEF_ENC32(Y4_tfrscpp, ICLASS_CR" 1111 0sssssss PP------ ---ddddd") + + + + + @@ -956,9 +1065,9 @@ MPY_ENC(F2_dfmin, "1000","ddddd","0","0","1","1","11") MPY_ENC(F2_dfmax, "1000","ddddd","0","1","0","0","11") MPY_ENC(F2_dfmpyll, "1000","ddddd","0","1","0","1","11") -MPY_ENC(M7_dcmpyrw, "1000","ddddd","0","0","0","1","10") +MPY_ENC(M7_dcmpyrw, "1000","ddddd","0","0","0","1","10") MPY_ENC(M7_dcmpyrwc, "1000","ddddd","0","0","1","1","10") -MPY_ENC(M7_dcmpyiw, "1000","ddddd","0","1","1","0","10") +MPY_ENC(M7_dcmpyiw, "1000","ddddd","0","1","1","0","10") MPY_ENC(M7_dcmpyiwc, "1000","ddddd","0","1","1","1","10") @@ -967,14 +1076,14 @@ DEF_FIELDROW_DESC32(ICLASS_M" 1001 -------- PP------ --------","[#9] Rd=(Rss,Rtt MPY_ENC(M2_vdmpyrs_s0, "1001","ddddd","0","0","0","0","00") MPY_ENC(M2_vdmpyrs_s1, "1001","ddddd","0","0","0","1","00") -MPY_ENC(M7_wcmpyrw, "1001","ddddd","0","0","1","0","00") +MPY_ENC(M7_wcmpyrw, "1001","ddddd","0","0","1","0","00") MPY_ENC(M7_wcmpyrw_rnd, "1001","ddddd","0","0","1","1","00") -MPY_ENC(M7_wcmpyiw, "1001","ddddd","0","1","0","0","00") +MPY_ENC(M7_wcmpyiw, "1001","ddddd","0","1","0","0","00") MPY_ENC(M7_wcmpyiw_rnd, "1001","ddddd","0","1","0","1","00") -MPY_ENC(M7_wcmpyrwc, "1001","ddddd","0","1","1","0","00") +MPY_ENC(M7_wcmpyrwc, "1001","ddddd","0","1","1","0","00") MPY_ENC(M7_wcmpyrwc_rnd, "1001","ddddd","0","1","1","1","00") -MPY_ENC(M7_wcmpyiwc, "1001","ddddd","1","0","0","0","00") +MPY_ENC(M7_wcmpyiwc, "1001","ddddd","1","0","0","0","00") MPY_ENC(M7_wcmpyiwc_rnd, "1001","ddddd","1","0","0","1","00") @@ -1030,10 +1139,10 @@ MPY_ENC(F2_dfmpylh, "1010","xxxxx","0","0","0","0","11") MPY_ENC(F2_dfmpyhh, "1010","xxxxx","0","0","0","1","11") -MPY_ENC(M7_dcmpyrw_acc, "1010","xxxxx","0","0","0","1","10") -MPY_ENC(M7_dcmpyrwc_acc, "1010","xxxxx","0","0","1","1","10") -MPY_ENC(M7_dcmpyiw_acc, "1010","xxxxx","0","1","1","0","10") -MPY_ENC(M7_dcmpyiwc_acc, "1010","xxxxx","1","0","1","0","10") +MPY_ENC(M7_dcmpyrw_acc, "1010","xxxxx","0","0","0","1","10") +MPY_ENC(M7_dcmpyrwc_acc, "1010","xxxxx","0","0","1","1","10") +MPY_ENC(M7_dcmpyiw_acc, "1010","xxxxx","0","1","1","0","10") +MPY_ENC(M7_dcmpyiwc_acc, "1010","xxxxx","1","0","1","0","10") @@ -1063,7 +1172,6 @@ SP_MPY(M2_mpy_sat_rnd, "1100","ddddd","1","1","0") SP_MPY(M2_mpyu, "1100","ddddd","0","0","1") DEF_FIELDROW_DESC32(ICLASS_M" 1101 -------- PP------ --------","[#13] Rd=(Rs,Rt)") -/* EJP: same as mpyi MPY_ENC(M2_mpyui, "1101","ddddd","0","0","1","0","00") */ MPY_ENC(M2_mpyi, "1101","ddddd","0","0","0","0","00") MPY_ENC(M2_mpy_up, "1101","ddddd","0","0","0","0","01") MPY_ENC(M2_mpyu_up, "1101","ddddd","0","0","1","0","01") @@ -1266,7 +1374,6 @@ DEF_ENC32(C2_cmovenewif,ICLASS_ALU2op" 1110 1uu0iiii PP1iiiii iiiddddd") DEF_ENC32(C2_cmoveit, ICLASS_ALU2op" 1110 0uu0iiii PP0iiiii iiiddddd") DEF_ENC32(C2_cmoveif, ICLASS_ALU2op" 1110 1uu0iiii PP0iiiii iiiddddd") - DEF_FIELDROW_DESC32( ICLASS_ALU2op" 1111 -------- PP------ --------","[#15] nop") DEF_ENC32(A2_nop, ICLASS_ALU2op" 1111 -------- PP------ --------") @@ -1408,9 +1515,6 @@ DEF_FIELDROW_DESC32(ICLASS_ALU3op" 1110 -------- PP------ --------","[#14] Rese - - - /*******************************/ /* */ /* */ @@ -1508,7 +1612,6 @@ SH_RRI6_ENC(S6_rol_i_##TAGEND,MAJ4,MIN3,SMOD1 "11",DSTCHARS) DEF_FIELDROW_DESC32(ICLASS_S2op" 0000 -------- PP------ --------","[#0] Rdd=(Rss,#u6)") -/* EJP: there is actually quite a bit of space here, look at the reserved bits */ I6SHIFTTYPES(p, "0000","000","0","ddddd") I5SHIFTTYPES_NOROL(vw, "0000","010","0","ddddd") I4SHIFTTYPES(vh, "0000","100","0","ddddd") @@ -1620,8 +1723,8 @@ SH2_RR_ENC(A2_roundsat, "1000","110","-","001","ddddd") SH_RRI5_ENC(S2_asr_i_svw_trun, "1000","110", "010","ddddd") SH_RRI5_ENC(A4_bitspliti, "1000","110", "100","ddddd") -SH_RRI5_ENC(A7_clip, "1000","110", "101","ddddd") -SH_RRI5_ENC(A7_vclip, "1000","110", "110","ddddd") +SH_RRI5_ENC(A7_clip, "1000","110", "101","ddddd") +SH_RRI5_ENC(A7_vclip, "1000","110", "110","ddddd") SH2_RR_ENC(S4_clbpnorm, "1000","011","-","000","ddddd") @@ -1743,10 +1846,11 @@ SH_RRR_ENC(S2_shuffob, "0001","00-","-","10-","ddddd") SH_RRR_ENC(S2_shuffeh, "0001","00-","-","11-","ddddd") SH_RRR_ENC(S2_shuffoh, "0001","10-","-","000","ddddd") +// 001 SH_RRR_ENC(S2_vtrunewh, "0001","10-","-","010","ddddd") -SH_RRR_ENC(S6_vtrunehb_ppp, "0001","10-","-","011","ddddd") +SH_RRR_ENC(S6_vtrunehb_ppp, "0001","10-","-","011","ddddd") SH_RRR_ENC(S2_vtrunowh, "0001","10-","-","100","ddddd") -SH_RRR_ENC(S6_vtrunohb_ppp, "0001","10-","-","101","ddddd") +SH_RRR_ENC(S6_vtrunohb_ppp, "0001","10-","-","101","ddddd") SH_RRR_ENC(S2_lfsp, "0001","10-","-","110","ddddd") SH_RRR_ENC(S4_vxaddsubw, "0001","01-","-","000","ddddd") @@ -1780,8 +1884,6 @@ SH_RRR_ENC(S4_vrcrotate, "0011","11-","i","11i","ddddd") DEF_FIELDROW_DESC32(ICLASS_S3op" 0100 -------- PP------ --------","[#4] Rd=(Rs,Rt,#u3)") DEF_ENC32(S2_addasl_rrri, ICLASS_S3op" 0100 000 sssss PP0ttttt iiiddddd") - - DEF_FIELDROW_DESC32(ICLASS_S3op" 0101 -------- PP------ --------","[#5] Rd=(Rss,Rt)") SH_RRR_ENC(S2_asr_r_svw_trun, "0101","---","-","010","ddddd") SH_RRR_ENC(M4_cmpyi_wh, "0101","---","-","100","ddddd") @@ -1841,6 +1943,7 @@ DEF_FIELDROW_DESC32(ICLASS_S3op" 1010 -------- PP------ --------","[#10] Rxx=(Rs SH_RRR_ENC(S2_insertp_rp, "1010","0--","0","---","xxxxx") SH_RRR_ENC(M4_xor_xacc, "1010","10-","0","000","xxxxx") + DEF_FIELDROW_DESC32(ICLASS_S3op" 1011 -------- PP------ --------","[#11] Rxx=(Rss,Rt)") RSHIFTTYPES(p_or, "1011","000","-","-","xxxxx") RSHIFTTYPES(p_and, "1011","010","-","-","xxxxx") @@ -1848,19 +1951,19 @@ RSHIFTTYPES(p_nac, "1011","100","-","-","xxxxx") RSHIFTTYPES(p_acc, "1011","110","-","-","xxxxx") RSHIFTTYPES(p_xor, "1011","011","-","-","xxxxx") -SH_RRR_ENCX(A4_vrmaxh, "1011","001","0","001","uuuuu") -SH_RRR_ENCX(A4_vrmaxuh, "1011","001","1","001","uuuuu") -SH_RRR_ENCX(A4_vrmaxw, "1011","001","0","010","uuuuu") -SH_RRR_ENCX(A4_vrmaxuw, "1011","001","1","010","uuuuu") +SH_RRR_ENCX(A4_vrmaxh, "1011","001","0","001","uuuuu") +SH_RRR_ENCX(A4_vrmaxuh, "1011","001","1","001","uuuuu") +SH_RRR_ENCX(A4_vrmaxw, "1011","001","0","010","uuuuu") +SH_RRR_ENCX(A4_vrmaxuw, "1011","001","1","010","uuuuu") -SH_RRR_ENCX(A4_vrminh, "1011","001","0","101","uuuuu") -SH_RRR_ENCX(A4_vrminuh, "1011","001","1","101","uuuuu") -SH_RRR_ENCX(A4_vrminw, "1011","001","0","110","uuuuu") -SH_RRR_ENCX(A4_vrminuw, "1011","001","1","110","uuuuu") +SH_RRR_ENCX(A4_vrminh, "1011","001","0","101","uuuuu") +SH_RRR_ENCX(A4_vrminuh, "1011","001","1","101","uuuuu") +SH_RRR_ENCX(A4_vrminw, "1011","001","0","110","uuuuu") +SH_RRR_ENCX(A4_vrminuw, "1011","001","1","110","uuuuu") -SH_RRR_ENC(S2_vrcnegh, "1011","001","1","111","xxxxx") +SH_RRR_ENC(S2_vrcnegh, "1011","001","1","111","xxxxx") -SH_RRR_ENC(S4_vrcrotate_acc, "1011","101","i","--i","xxxxx") +SH_RRR_ENC(S4_vrcrotate_acc, "1011","101","i","--i","xxxxx") DEF_FIELDROW_DESC32(ICLASS_S3op" 1100 -------- PP------ --------","[#12] Rx=(Rs,Rt)") @@ -1874,11 +1977,6 @@ DEF_FIELDROW_DESC32(ICLASS_S3op" 1101 -------- PP------ --------","[#13] Reserve DEF_FIELDROW_DESC32(ICLASS_S3op" 1110 -------- PP------ --------","[#14] Reserved") -DEF_FIELDROW_DESC32(ICLASS_S3op" 1111 -------- PP------ --------","[#14] User Instruction") - - - - @@ -2129,3 +2227,6 @@ OP_OPI_RI(lsr,"1") DEF_FIELDROW_DESC32(ICLASS_ALU64" 1111 -------- PP------ --------","[#15] Rd=(Rs,Ru,#u6:2)") DEF_ENC32(M4_mpyri_addr_u2, ICLASS_ALU64" 1111 0ii sssss PPiddddd iiiuuuuu") DEF_ENC32(M4_mpyri_addr, ICLASS_ALU64" 1111 1ii sssss PPiddddd iiiuuuuu") + + + diff --git a/target/hexagon/imported/system.idef b/target/hexagon/imported/system.idef index 7c6568e75e429..fd7ef18b3e340 100644 --- a/target/hexagon/imported/system.idef +++ b/target/hexagon/imported/system.idef @@ -25,44 +25,262 @@ /* User->OS interface */ /********************************************/ -Q6INSN(J2_trap0,"trap0(#u8)",ATTRIBS(A_COF), +Q6INSN(J2_trap0,"trap0(#u8)",ATTRIBS(A_COF,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Trap to Operating System", - fTRAP(0,uiV); + fTRAP(0,uiV); ) -Q6INSN(J2_pause,"pause(#u8)",ATTRIBS(A_COF), +Q6INSN(J2_trap1,"trap1(Rx32,#u8)",ATTRIBS(A_COF,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), +"Trap to Operating System", + /* + * Note: if RxV is not written, we get the same as the input. + * Since trap1 is SOLO, this means the register will effectively not be updated + */ + if (!fTRAP1_VIRTINSN(uiV)) { + fTRAP(1,uiV); + } else if (uiV == 1) { + fVIRTINSN_RTE(uiV,RxV); + } else if (uiV == 3) { + fVIRTINSN_SETIE(uiV,RxV); + } else if (uiV == 4) { + fVIRTINSN_GETIE(uiV,RxV); + } else if (uiV == 6) { + fVIRTINSN_SPSWAP(uiV,RxV); + }) + +Q6INSN(J2_pause,"pause(#u8)",ATTRIBS(A_COF,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Enter low-power state for #u8 cycles",{fPAUSE(uiV);}) -Q6INSN(Y2_icinva,"icinva(Rs32)",ATTRIBS(A_ICOP,A_ICFLUSHOP),"Instruction Cache Invalidate Address",{fEA_REG(RsV); fICINVA(EA);}) +Q6INSN(J2_rte, "rte", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NO_TIMING_LOG), +"Return from Exception", +{ +fHIDE(if((thread->timing_on) && (thread->status & EXEC_STATUS_REPLAY)) { return; }) +fHIDE(CALLBACK(thread->processor_ptr->options->rte_callback, + thread->system_ptr,thread->processor_ptr, + thread->threadId,0);) +fCLEAR_RTE_EX(); +fBRANCH(fREAD_ELR(),COF_TYPE_RTE);}) + + +/********************************************/ +/* Interrupt Management */ +/********************************************/ + +Q6INSN(Y2_swi,"swi(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Software Interrupt",{DO_SWI(RsV);}) +Q6INSN(Y2_cswi,"cswi(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Cancel Software Interrupt",{DO_CSWI(RsV);}) +Q6INSN(Y2_ciad,"ciad(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Re-enable interrupt in IAD",{DO_CIAD(RsV);}) +Q6INSN(Y4_siad,"siad(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Disable interrupt in IAD",{DO_SIAD(RsV);}) +Q6INSN(Y2_iassignr,"Rd32=iassignr(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Read interrupt to thread assignments",{DO_IASSIGNR(RsV,RdV);}) +Q6INSN(Y2_iassignw,"iassignw(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Write interrupt to thread assignments",{DO_IASSIGNW(RsV);}) + + +Q6INSN(Y2_getimask,"Rd32=getimask(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Read imask register of another thread", +{RdV = READ_IMASK(RsV & thread->processor_ptr->thread_system_mask); }) + +Q6INSN(Y2_setimask,"setimask(Pt4,Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Change imask register of another thread", +{fPREDUSE_TIMING();WRITE_IMASK(PtV & thread->processor_ptr->thread_system_mask,RsV); }) + + + +/********************************************/ +/* TLB management */ +/********************************************/ + +Q6INSN(Y2_tlbw,"tlbw(Rss32,Rt32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), +"Write TLB entry", {fTLBW(RtV,RssV);}) + +Q6INSN(Y5_ctlbw,"Rd32=ctlbw(Rss32,Rt32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), +"Conditional Write TLB entry", +{ + if (fTLB_ENTRY_OVERLAP( (1LL<<63) | RssV )) { + RdV=fTLB_ENTRY_OVERLAP_IDX( (1LL<<63) | RssV); + } else { + fTLBW(RtV,RssV); + RdV=0x80000000; + } +}) + +Q6INSN(Y5_tlboc,"Rd32=tlboc(Rss32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), +"TLB overlap check", +{ + if (fTLB_ENTRY_OVERLAP( (1LL<<63) | RssV )) { + RdV=fTLB_ENTRY_OVERLAP_IDX( (1LL<<63) | RssV); + } else { + RdV=0x80000000; + } +}) + +Q6INSN(Y2_tlbr,"Rdd32=tlbr(Rs32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Read TLB entry", +{RddV = fTLBR(RsV);}) + +Q6INSN(Y2_tlbp,"Rd32=tlbp(Rs32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Probe TLB", {RdV=fTLBP(RsV);}) + +Q6INSN(Y5_tlbasidi,"tlbinvasid(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Invalidate ASID", +{ + fHIDE(int i;) + fHIDE(unsigned int NUM_TLB_ENTRIES = NUM_TLB_REGS(thread->processor_ptr);) + for (i = 0; i < NUM_TLB_ENTRIES; i++) { + if ((fGET_FIELD(fTLBR(i),PTE_G) == 0) && + (fGET_FIELD(fTLBR(i),PTE_ASID) == fEXTRACTU_RANGE(RsV,26,20))) { + fTLBW(i,fTLBR(i) & ~(1ULL << 63)); + } + } +}) + +Q6INSN(Y2_tlblock,"tlblock", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_NO_TIMING_LOG), "Lock TLB", +{fSET_TLB_LOCK();}) + +Q6INSN(Y2_tlbunlock,"tlbunlock", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Unlock TLB", +{fCLEAR_TLB_LOCK();}) + +Q6INSN(Y2_k0lock,"k0lock", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_NO_TIMING_LOG), "Lock K0", +{fSET_K0_LOCK();}) + +Q6INSN(Y2_k0unlock,"k0unlock", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Unlock K0", +{fCLEAR_K0_LOCK();}) + +/********************************************/ +/* Supervisor Reg Management */ +/********************************************/ + +Q6INSN(Y2_crswap0,"crswap(Rx32,sgp0)",ATTRIBS(A_PRIV,A_NOTE_PRIV), "Swap system general pointer 0 with GPR", +{fHIDE(size4s_t tmp;) tmp = RxV; RxV = READ_SGP0(); WRITE_SGP0(tmp);}) +Q6INSN(Y4_crswap1,"crswap(Rx32,sgp1)",ATTRIBS(A_PRIV,A_NOTE_PRIV), "Swap system general pointer 1 with GPR", +{fHIDE(size4s_t tmp;) tmp = RxV; RxV = READ_SGP1(); WRITE_SGP1(tmp);}) + +Q6INSN(Y4_crswap10,"crswap(Rxx32,sgp1:0)",ATTRIBS(A_PRIV,A_NOTE_PRIV), "Swap system general purpose 0/1 with GPR Pair", +{fHIDE(size8s_t tmp;) tmp = RxxV; RxxV=READ_SGP10(); WRITE_SGP10(tmp);}) + +Q6INSN(Y2_tfrscrr,"Rd32=Ss128",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Transfer Supervisor Reg to GPR", {RdV=SsV;}) +Q6INSN(Y2_tfrsrcr,"Sd128=Rs32",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Transfer GPR to Supervisor Reg", {SdV=RsV;}) +Q6INSN(Y4_tfrscpp,"Rdd32=Sss128",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Transfer Supervisor Reg to GPR", {RddV=SssV;}) +Q6INSN(Y4_tfrspcp,"Sdd128=Rss32",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Transfer GPR to Supervisor Reg", {SddV=RssV;}) + +Q6INSN(G4_tfrgcrr,"Rd32=Gs32",ATTRIBS(A_GUEST,A_NOTE_GUEST),"Transfer Guest Reg to GPR", {RdV=GsV;}) +Q6INSN(G4_tfrgrcr,"Gd32=Rs32",ATTRIBS(A_GUEST,A_NOTE_GUEST),"Transfer GPR to Guest Reg", {GdV=RsV;}) +Q6INSN(G4_tfrgcpp,"Rdd32=Gss32",ATTRIBS(A_GUEST,A_NOTE_GUEST),"Transfer Guest Reg to GPR", {RddV=GssV;}) +Q6INSN(G4_tfrgpcp,"Gdd32=Rss32",ATTRIBS(A_GUEST,A_NOTE_GUEST),"Transfer GPR to Guest Reg", {GddV=RssV;}) -Q6INSN(Y2_isync,"isync",ATTRIBS(),"Memory Synchronization",{fISYNC();}) -Q6INSN(Y2_barrier,"barrier",ATTRIBS(A_RESTRICT_SLOT0ONLY),"Memory Barrier",{fBARRIER();}) -Q6INSN(Y2_syncht,"syncht",ATTRIBS(A_RESTRICT_SLOT0ONLY),"Memory Synchronization",{fSYNCH();}) -Q6INSN(Y2_dcfetchbo,"dcfetch(Rs32+#u11:3)",ATTRIBS(A_RESTRICT_PREFERSLOT0,A_DCFETCH),"Data Cache Prefetch",{fEA_RI(RsV,uiV); fDCFETCH(EA);}) +Q6INSN(Y2_setprio,"setprio(Pt4,Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Change TID Prio of another thread", +{fPREDUSE_TIMING();WRITE_PRIO(PtV & thread->processor_ptr->thread_system_mask,RsV); }) -Q6INSN(Y2_dczeroa,"dczeroa(Rs32)",ATTRIBS(A_STORE,A_RESTRICT_SLOT0ONLY,A_DCZEROA),"Zero an aligned 32-byte cacheline",{fEA_REG(RsV); fDCZEROA(EA);}) -Q6INSN(Y2_dccleana,"dccleana(Rs32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_DCFLUSHOP),"Data Cache Clean Address",{fEA_REG(RsV); fDCCLEANA(EA);}) -Q6INSN(Y2_dccleaninva,"dccleaninva(Rs32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_DCFLUSHOP),"Data Cache Clean and Invalidate Address",{fEA_REG(RsV); fDCCLEANINVA(EA);}) -Q6INSN(Y2_dcinva,"dcinva(Rs32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_DCFLUSHOP),"Data Cache Invalidate Address",{fEA_REG(RsV); fDCCLEANINVA(EA);}) -Q6INSN(Y4_l2fetch,"l2fetch(Rs32,Rt32)",ATTRIBS(A_RESTRICT_SLOT0ONLY),"L2 Cache Prefetch", +/********************************************/ +/* Power Management / Thread on/off */ +/********************************************/ +Q6INSN(Y6_diag,"diag(Rs32)",ATTRIBS(),"Send value to Diag trace module",{ +}) +Q6INSN(Y6_diag0,"diag0(Rss32,Rtt32)",ATTRIBS(),"Send values of two register to DIAG Trace. Set X=0",{ +}) +Q6INSN(Y6_diag1,"diag1(Rss32,Rtt32)",ATTRIBS(),"Send values of two register to DIAG Trace. Set X=1",{ +}) + + +Q6INSN(Y4_trace,"trace(Rs32)",ATTRIBS(A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Send value to ETM trace",{ + fDO_TRACE(RsV); +}) + +Q6INSN(Y2_stop,"stop(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Stop thread(s)",{ + fHIDE(RsV=RsV;) + if (!fIN_DEBUG_MODE_NO_ISDB(fGET_TNUM())) fCLEAR_RUN_MODE(fGET_TNUM()); +}) + +Q6INSN(Y4_nmi,"nmi(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_NO_TIMING_LOG),"Raise NMI on thread(s)",{ + fDO_NMI(RsV); +}) + +Q6INSN(Y2_start,"start(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Start thread(s)",fSTART(RsV);) + +Q6INSN(Y2_wait,"wait(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_NO_TIMING_LOG),"Make thread(s) wait",{ + fHIDE(RsV=RsV;) + if (!fIN_DEBUG_MODE(fGET_TNUM())) fSET_WAIT_MODE(fGET_TNUM()); + fIN_DEBUG_MODE_WARN(fGET_TNUM()); +}) + +Q6INSN(Y2_resume,"resume(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Make thread(s) stop waiting",fRESUME(RsV);) + +Q6INSN(Y2_break,"brkpt",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Breakpoint",{fBREAK();}) + + +/********************************************/ +/* Cache Management */ +/********************************************/ + +Q6INSN(Y2_ictagr,"Rd32=ictagr(Rs32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICTAGOP),"Instruction Cache Tag Read",{fICTAGR(RsV,RdV,RdN);}) +Q6INSN(Y2_ictagw,"ictagw(Rs32,Rt32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICTAGOP),"Instruction Cache Tag Write",{fICTAGW(RsV,RtV);}) +Q6INSN(Y2_icdataw,"icdataw(Rs32,Rt32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICTAGOP),"Instruction Cache Data Write",{fICDATAW(RsV,RtV);}) +Q6INSN(Y2_icdatar,"Rd32=icdatar(Rs32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICTAGOP),"Instruction Cache Data Read",{fICDATAR(RsV, RdV);}) +Q6INSN(Y2_icinva,"icinva(Rs32)",ATTRIBS(A_ICOP,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYADDRESS,A_ICFLUSHOP),"Instruction Cache Invalidate Address",{fEA_REG(RsV); fICINVA(EA);}) +Q6INSN(Y2_icinvidx,"icinvidx(Rs32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICFLUSHOP),"Instruction Cache Invalidate Index",{fICINVIDX(RsV);}) +Q6INSN(Y2_ickill,"ickill",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_ICFLUSHOP),"Instruction Cache Invalidate",{fICKILL();}) + +Q6INSN(Y2_isync,"isync",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Memory Synchronization",{fISYNC();}) +Q6INSN(Y2_barrier,"barrier",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK),"Memory Barrier",{fBARRIER();}) +Q6INSN(Y2_syncht,"syncht",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET),"Memory Synchronization",{fSYNCH();}) + + +Q6INSN(Y2_dcfetchbo,"dcfetch(Rs32+#u11:3)",ATTRIBS(A_RESTRICT_PREFERSLOT0,A_DCFETCH,A_RESTRICT_NOSLOT1_STORE),"Data Cache Prefetch",{fEA_RI(RsV,uiV); fDCFETCH(EA);}) +Q6INSN(Y2_dckill,"dckill",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_DCFLUSHOP),"Data Cache Invalidate",{fDCKILL();}) + + +Q6INSN(Y2_dczeroa,"dczeroa(Rs32)",ATTRIBS(A_STORE,A_RESTRICT_SLOT1_AOK,A_NOTE_SLOT1_AOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYADDRESS,A_DCZEROA),"Zero an aligned 32-byte cacheline",{fEA_REG(RsV); fDCZEROA(EA);}) +Q6INSN(Y2_dccleana,"dccleana(Rs32)",ATTRIBS(A_RESTRICT_SLOT1_AOK,A_NOTE_SLOT1_AOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYADDRESS,A_DCFLUSHOP),"Data Cache Clean Address",{fEA_REG(RsV); fDCCLEANA(EA);}) +Q6INSN(Y2_dccleanidx,"dccleanidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_DCFLUSHOP),"Data Cache Clean Index",{fDCCLEANIDX(RsV);}) +Q6INSN(Y2_dccleaninva,"dccleaninva(Rs32)",ATTRIBS(A_RESTRICT_SLOT1_AOK,A_NOTE_SLOT1_AOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYADDRESS,A_DCFLUSHOP),"Data Cache Clean and Invalidate Address",{fEA_REG(RsV); fDCCLEANINVA(EA);}) +Q6INSN(Y2_dccleaninvidx,"dccleaninvidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_DCFLUSHOP),"Data Cache Clean and Invalidate Index",{fDCCLEANINVIDX(RsV);}) +Q6INSN(Y2_dcinva,"dcinva(Rs32)",ATTRIBS(A_RESTRICT_SLOT1_AOK,A_NOTE_SLOT1_AOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYADDRESS,A_DCFLUSHOP),"Data Cache Invalidate Address",{fEA_REG(RsV); fDCCLEANINVA(EA);}) +Q6INSN(Y2_dcinvidx,"dcinvidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_DCFLUSHOP),"Data Cache Invalidate Index",{fDCINVIDX(RsV);}) +Q6INSN(Y2_dctagr,"Rd32=dctagr(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_DCTAGOP),"Data Cache Tag Read",{fDCTAGR(RsV,RdV,RdN);}) +Q6INSN(Y2_dctagw,"dctagw(Rs32,Rt32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_SLOT0ONLY,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_DCTAGOP),"Data Cache Tag Write",{fDCTAGW(RsV,RtV);}) + + +Q6INSN(Y2_l2kill,"l2kill",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Cache Invalidate",{fL2KILL();}) +Q6INSN(Y4_l2tagw,"l2tagw(Rs32,Rt32)",ATTRIBS(A_PRIV,A_NOTE_BADTAG_UNDEF,A_NOTE_PRIV,A_RESTRICT_SLOT0ONLY,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_L2TAGOP),"L2 Cache Tag Write",{fL2TAGW(RsV,RtV);}) +Q6INSN(Y4_l2tagr,"Rd32=l2tagr(Rs32)",ATTRIBS(A_PRIV,A_NOTE_BADTAG_UNDEF,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_L2TAGOP),"L2 Cache Tag Read",{fL2TAGR(RsV,RdV,RdN);}) + +Q6INSN(Y2_l2cleaninvidx,"l2cleaninvidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_L2FLUSHOP),"L2 Cache Clean and Invalidate Index",{fL2CLEANINVIDX(RsV); }) +Q6INSN(Y5_l2cleanidx,"l2cleanidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_L2FLUSHOP),"L2 Cache Clean by Index",{fL2CLEANIDX(RsV); }) +Q6INSN(Y5_l2invidx,"l2invidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_L2FLUSHOP),"L2 Cache Invalidate by Index",{fL2INVIDX(RsV); }) + + + +Q6INSN(Y4_l2fetch,"l2fetch(Rs32,Rt32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK),"L2 Cache Prefetch", { fL2FETCH(RsV, - (RtV&0xff), /*height*/ - ((RtV>>8)&0xff), /*width*/ - ((RtV>>16)&0xffff), /*stride*/ - 0); /*extra attrib flags*/ + (RtV&0xff), /*height*/ + ((RtV>>8)&0xff), /*width*/ + ((RtV>>16)&0xffff), /*stride*/ + 0); /*extra attrib flags*/ }) -Q6INSN(Y5_l2fetch,"l2fetch(Rs32,Rtt32)",ATTRIBS(A_RESTRICT_SLOT0ONLY),"L2 Cache Prefetch", +Q6INSN(Y5_l2fetch,"l2fetch(Rs32,Rtt32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK),"L2 Cache Prefetch", { fL2FETCH(RsV, - fGETUHALF(0,RttV), /*height*/ - fGETUHALF(1,RttV), /*width*/ - fGETUHALF(2,RttV), /*stride*/ - fGETUHALF(3,RttV)); /*flags*/ + fGETUHALF(0,RttV), /*height*/ + fGETUHALF(1,RttV), /*width*/ + fGETUHALF(2,RttV), /*stride*/ + fGETUHALF(3,RttV)); /*flags*/ }) + +Q6INSN(Y5_l2locka,"Pd4=l2locka(Rs32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_CACHEOP,A_COPBYADDRESS,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_LATEPRED,A_NOTE_LATEPRED), +"Lock L2 cache line by address", { fEA_REG(RsV); fL2LOCKA(EA,PdV,PdN); fHIDE(MARK_LATE_PRED_WRITE(PdN)) }) + + +Q6INSN(Y5_l2unlocka,"l2unlocka(Rs32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_CACHEOP,A_COPBYADDRESS,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK), "UnLock L2 cache line by address", { fEA_REG(RsV); fL2UNLOCKA(EA); }) + + + +Q6INSN(Y5_l2gunlock,"l2gunlock",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Unlock",{fL2UNLOCK();}) + +Q6INSN(Y5_l2gclean,"l2gclean",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Clean",{fL2CLEAN();}) + +Q6INSN(Y5_l2gcleaninv,"l2gcleaninv",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Clean and Invalidate",{fL2CLEANINV();}) + +Q6INSN(Y6_l2gcleanpa,"l2gclean(Rtt32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Clean by PA Range",{fL2CLEANPA(RttV);}) + +Q6INSN(Y6_l2gcleaninvpa,"l2gcleaninv(Rtt32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Clean and Invalidate by PA Range",{fL2CLEANINVPA(RttV);}) + diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 6e4a3a16970c2..b0e9610d98d55 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -675,3 +675,5 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #endif #endif + +#define fPREDUSE_TIMING() From 6498d068c02bf38947285c9433b68ae722c5988c Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 8 Jul 2024 13:28:08 -0700 Subject: [PATCH 025/126] target/hexagon: Add sysemu TCG overrides Define TCG overrides for setprio(), crswap(,sgp{0,1,1:0}). Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu_helper.c | 36 +++++++++++++++++++++++++++++++ target/hexagon/cpu_helper.h | 32 ++++++++++++++++++++++++++++ target/hexagon/gen_tcg_sys.h | 41 ++++++++++++++++++++++++++++++++++++ target/hexagon/genptr.c | 4 ++++ target/hexagon/helper.h | 1 + target/hexagon/hex_common.py | 2 ++ target/hexagon/meson.build | 14 ++++++------ target/hexagon/op_helper.c | 7 ++++++ 8 files changed, 131 insertions(+), 6 deletions(-) create mode 100644 target/hexagon/cpu_helper.c create mode 100644 target/hexagon/cpu_helper.h create mode 100644 target/hexagon/gen_tcg_sys.h diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c new file mode 100644 index 0000000000000..6e4bc85580e6b --- /dev/null +++ b/target/hexagon/cpu_helper.c @@ -0,0 +1,36 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "cpu_helper.h" +#include "system/cpus.h" +#ifdef CONFIG_USER_ONLY +#include "qemu.h" +#include "exec/helper-proto.h" +#else +#include "hw/boards.h" +#include "hw/hexagon/hexagon.h" +#endif +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "qemu/log.h" +#include "tcg/tcg-op.h" +#include "internal.h" +#include "macros.h" +#include "sys_macros.h" +#include "arch.h" + + +#ifndef CONFIG_USER_ONLY + +uint32_t arch_get_system_reg(CPUHexagonState *env, uint32_t reg) +{ + g_assert_not_reached(); +} + + +#endif diff --git a/target/hexagon/cpu_helper.h b/target/hexagon/cpu_helper.h new file mode 100644 index 0000000000000..194bcbf451379 --- /dev/null +++ b/target/hexagon/cpu_helper.h @@ -0,0 +1,32 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HEXAGON_CPU_HELPER_H +#define HEXAGON_CPU_HELPER_H + +static inline void arch_set_thread_reg(CPUHexagonState *env, uint32_t reg, + uint32_t val) +{ + g_assert(reg < TOTAL_PER_THREAD_REGS); + g_assert_not_reached(); +} + +static inline uint32_t arch_get_thread_reg(CPUHexagonState *env, uint32_t reg) +{ + g_assert(reg < TOTAL_PER_THREAD_REGS); + g_assert_not_reached(); +} + +static inline void arch_set_system_reg(CPUHexagonState *env, uint32_t reg, + uint32_t val) +{ + g_assert_not_reached(); +} + +uint32_t arch_get_system_reg(CPUHexagonState *env, uint32_t reg); + +#endif + diff --git a/target/hexagon/gen_tcg_sys.h b/target/hexagon/gen_tcg_sys.h new file mode 100644 index 0000000000000..362703ab45e8a --- /dev/null +++ b/target/hexagon/gen_tcg_sys.h @@ -0,0 +1,41 @@ +/* + * Copyright(c) 2022-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HEXAGON_GEN_TCG_SYS_H +#define HEXAGON_GEN_TCG_SYS_H + +#define fGEN_TCG_Y2_setprio(SHORTCODE) \ + gen_helper_setprio(tcg_env, PtV, RsV) + +#define fGEN_TCG_Y2_crswap0(SHORTCODE) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + tcg_gen_mov_tl(tmp, RxV); \ + tcg_gen_mov_tl(RxV, hex_t_sreg[HEX_SREG_SGP0]); \ + tcg_gen_mov_tl(ctx->t_sreg_new_value[HEX_SREG_SGP0], tmp); \ + } while (0) + +#define fGEN_TCG_Y4_crswap1(SHORTCODE) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + tcg_gen_mov_tl(tmp, RxV); \ + tcg_gen_mov_tl(RxV, hex_t_sreg[HEX_SREG_SGP1]); \ + tcg_gen_mov_tl(ctx->t_sreg_new_value[HEX_SREG_SGP1], tmp); \ + } while (0) + +#define fGEN_TCG_Y4_crswap10(SHORTCODE) \ + do { \ + g_assert_not_reached(); \ + TCGv_i64 tmp = tcg_temp_new_i64(); \ + tcg_gen_mov_i64(tmp, RxxV); \ + tcg_gen_concat_i32_i64(RxxV, \ + hex_t_sreg[HEX_SREG_SGP0], \ + hex_t_sreg[HEX_SREG_SGP1]); \ + tcg_gen_extrl_i64_i32(ctx->t_sreg_new_value[HEX_SREG_SGP0], tmp); \ + tcg_gen_extrh_i64_i32(ctx->t_sreg_new_value[HEX_SREG_SGP1], tmp); \ + } while (0) + +#endif diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 488d0b4b978b1..5554c9515c4db 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -31,6 +31,10 @@ #undef QEMU_GENERATE #include "gen_tcg.h" #include "gen_tcg_hvx.h" +#ifndef CONFIG_USER_ONLY +#include "gen_tcg_sys.h" +#endif + #include "genptr.h" TCGv gen_read_reg(TCGv result, int num) diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index fddbd99a197d7..146f4f02e4158 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -115,4 +115,5 @@ DEF_HELPER_2(greg_read, i32, env, i32) DEF_HELPER_2(greg_read_pair, i64, env, i32) DEF_HELPER_3(sreg_write, void, env, i32, i32) DEF_HELPER_3(sreg_write_pair, void, env, i32, i64) +DEF_HELPER_3(setprio, void, env, i32, i32) #endif diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index 4c77fcf4db440..c7c8a53238b24 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -1363,6 +1363,7 @@ def parse_common_args(desc): parser.add_argument("semantics", help="semantics file") parser.add_argument("overrides", help="overrides file") parser.add_argument("overrides_vec", help="vector overrides file") + parser.add_argument("overrides_sys", help="system overrides file") parser.add_argument("out", help="output file") parser.add_argument("--idef-parser", help="file of instructions translated by idef-parser") @@ -1370,6 +1371,7 @@ def parse_common_args(desc): read_semantics_file(args.semantics) read_overrides_file(args.overrides) read_overrides_file(args.overrides_vec) + read_overrides_file(args.overrides_sys) if args.idef_parser: read_idef_parser_enabled_file(args.idef_parser) calculate_attribs() diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build index bb4ebaae816ea..3ec53010fa020 100644 --- a/target/hexagon/meson.build +++ b/target/hexagon/meson.build @@ -20,6 +20,7 @@ hexagon_ss = ss.source_set() hex_common_py = 'hex_common.py' gen_tcg_h = meson.current_source_dir() / 'gen_tcg.h' gen_tcg_hvx_h = meson.current_source_dir() / 'gen_tcg_hvx.h' +gen_tcg_sys_h = meson.current_source_dir() / 'gen_tcg_sys.h' idef_parser_dir = meson.current_source_dir() / 'idef-parser' # @@ -249,6 +250,7 @@ hexagon_ss.add(files( 'cpu.c', 'translate.c', 'op_helper.c', + 'cpu_helper.c', 'gdbstub.c', 'genptr.c', 'reg_fields.c', @@ -346,12 +348,12 @@ if idef_parser_enabled and 'hexagon-linux-user' in target_dirs # Setup input and dependencies for the next step, this depends on whether or # not idef-parser is enabled helper_dep = [semantics_generated, idef_generated_tcg_c, idef_generated_tcg] - helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h, '--idef-parser', idef_generated_list] + helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h, '--idef-parser', idef_generated_list] else # Setup input and dependencies for the next step, this depends on whether or # not idef-parser is enabled helper_dep = [semantics_generated] - helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h] + helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h] endif # @@ -365,7 +367,7 @@ helper_protos_generated = custom_target( 'helper_protos_generated.h.inc', output: 'helper_protos_generated.h.inc', depends: helper_dep, - depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h], + depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h], command: [python, files('gen_helper_protos.py'), helper_in, '@OUTPUT@'], ) hexagon_ss.add(helper_protos_generated) @@ -374,7 +376,7 @@ helper_funcs_generated = custom_target( 'helper_funcs_generated.c.inc', output: 'helper_funcs_generated.c.inc', depends: helper_dep, - depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h], + depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h], command: [python, files('gen_helper_funcs.py'), helper_in, '@OUTPUT@'], ) hexagon_ss.add(helper_funcs_generated) @@ -383,7 +385,7 @@ tcg_funcs_generated = custom_target( 'tcg_funcs_generated.c.inc', output: 'tcg_funcs_generated.c.inc', depends: helper_dep, - depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h], + depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h], command: [python, files('gen_tcg_funcs.py'), helper_in, '@OUTPUT@'], ) hexagon_ss.add(tcg_funcs_generated) @@ -392,7 +394,7 @@ analyze_funcs_generated = custom_target( 'analyze_funcs_generated.c.inc', output: 'analyze_funcs_generated.c.inc', depends: helper_dep, - depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h], + depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h], command: [python, files('gen_analyze_funcs.py'), helper_in, '@OUTPUT@'], ) hexagon_ss.add(analyze_funcs_generated) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 1aa5b32b1f732..865e8ebb3cae9 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -31,6 +31,7 @@ #include "mmvec/mmvec.h" #include "mmvec/macros.h" #include "op_helper.h" +#include "cpu_helper.h" #include "translate.h" #define SF_BIAS 127 @@ -1366,6 +1367,12 @@ uint64_t HELPER(greg_read_pair)(CPUHexagonState *env, uint32_t reg) { g_assert_not_reached(); } + +void HELPER(setprio)(CPUHexagonState *env, uint32_t thread, uint32_t prio) +{ + g_assert_not_reached(); +} + #endif From cd07070891d59c1b08c402c5f5652dab42624fd0 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Thu, 12 Sep 2024 07:05:56 -0700 Subject: [PATCH 026/126] target/hexagon: Add implicit attributes to sysemu macros Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/hex_common.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index c7c8a53238b24..8b738ca5494ed 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -128,8 +128,13 @@ def calculate_attribs(): add_qemu_macro_attrib("fTRAP", "A_IMPLICIT_READS_PC") add_qemu_macro_attrib("fSET_OVERFLOW", "A_IMPLICIT_WRITES_USR") add_qemu_macro_attrib("fSET_LPCFG", "A_IMPLICIT_WRITES_USR") + add_qemu_macro_attrib("fLOAD_LOCKED", "A_LLSC") + add_qemu_macro_attrib("fSTORE_LOCKED", "A_LLSC") + add_qemu_macro_attrib("fCLEAR_RTE_EX", "A_IMPLICIT_WRITES_SSR") add_qemu_macro_attrib("fLOAD", "A_SCALAR_LOAD") add_qemu_macro_attrib("fSTORE", "A_SCALAR_STORE") + add_qemu_macro_attrib("fSET_K0_LOCK", "A_IMPLICIT_READS_PC") + add_qemu_macro_attrib("fSET_TLB_LOCK", "A_IMPLICIT_READS_PC") add_qemu_macro_attrib('fLSBNEW0', 'A_IMPLICIT_READS_P0') add_qemu_macro_attrib('fLSBNEW0NOT', 'A_IMPLICIT_READS_P0') add_qemu_macro_attrib('fREAD_P0', 'A_IMPLICIT_READS_P0') From 7dba853286452b0e8164798f050514f5e1022f46 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 24 Jul 2024 20:04:46 -0700 Subject: [PATCH 027/126] target/hexagon: Add TCG overrides for int handler insts Define TCG overrides for {c,}swi {c,s}iad, iassign{r,w}, {s,g}etimask instructions. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/gen_tcg_sys.h | 25 ++++++++++++++++++++++ target/hexagon/helper.h | 8 ++++++++ target/hexagon/op_helper.c | 40 ++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/target/hexagon/gen_tcg_sys.h b/target/hexagon/gen_tcg_sys.h index 362703ab45e8a..642ca3d3ff643 100644 --- a/target/hexagon/gen_tcg_sys.h +++ b/target/hexagon/gen_tcg_sys.h @@ -7,6 +7,31 @@ #ifndef HEXAGON_GEN_TCG_SYS_H #define HEXAGON_GEN_TCG_SYS_H +/* System mode instructions */ +#define fGEN_TCG_Y2_swi(SHORTCODE) \ + gen_helper_swi(tcg_env, RsV) + +#define fGEN_TCG_Y2_cswi(SHORTCODE) \ + gen_helper_cswi(tcg_env, RsV) + +#define fGEN_TCG_Y2_ciad(SHORTCODE) \ + gen_helper_ciad(tcg_env, RsV) + +#define fGEN_TCG_Y4_siad(SHORTCODE) \ + gen_helper_siad(tcg_env, RsV) + +#define fGEN_TCG_Y2_iassignw(SHORTCODE) \ + gen_helper_iassignw(tcg_env, RsV) + +#define fGEN_TCG_Y2_iassignr(SHORTCODE) \ + gen_helper_iassignr(RdV, tcg_env, RsV) + +#define fGEN_TCG_Y2_getimask(SHORTCODE) \ + gen_helper_getimask(RdV, tcg_env, RsV) + +#define fGEN_TCG_Y2_setimask(SHORTCODE) \ + gen_helper_setimask(tcg_env, PtV, RsV) + #define fGEN_TCG_Y2_setprio(SHORTCODE) \ gen_helper_setprio(tcg_env, PtV, RsV) diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index 146f4f02e4158..2fe4440ddc6e1 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -109,6 +109,14 @@ DEF_HELPER_2(probe_hvx_stores, void, env, int) DEF_HELPER_2(probe_pkt_scalar_hvx_stores, void, env, int) #if !defined(CONFIG_USER_ONLY) +DEF_HELPER_2(swi, void, env, i32) +DEF_HELPER_2(cswi, void, env, i32) +DEF_HELPER_2(ciad, void, env, i32) +DEF_HELPER_2(siad, void, env, i32) +DEF_HELPER_2(iassignw, void, env, i32) +DEF_HELPER_2(iassignr, i32, env, i32) +DEF_HELPER_2(getimask, i32, env, i32) +DEF_HELPER_3(setimask, void, env, i32, i32) DEF_HELPER_2(sreg_read, i32, env, i32) DEF_HELPER_2(sreg_read_pair, i64, env, i32) DEF_HELPER_2(greg_read, i32, env, i32) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 865e8ebb3cae9..575f3fb1635ff 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1337,6 +1337,46 @@ void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV) } #ifndef CONFIG_USER_ONLY +void HELPER(ciad)(CPUHexagonState *env, uint32_t mask) +{ + g_assert_not_reached(); +} + +void HELPER(siad)(CPUHexagonState *env, uint32_t mask) +{ + g_assert_not_reached(); +} + +void HELPER(swi)(CPUHexagonState *env, uint32_t mask) +{ + g_assert_not_reached(); +} + +void HELPER(cswi)(CPUHexagonState *env, uint32_t mask) +{ + g_assert_not_reached(); +} + +void HELPER(iassignw)(CPUHexagonState *env, uint32_t src) +{ + g_assert_not_reached(); +} + +uint32_t HELPER(iassignr)(CPUHexagonState *env, uint32_t src) +{ + g_assert_not_reached(); +} + +uint32_t HELPER(getimask)(CPUHexagonState *env, uint32_t tid) +{ + g_assert_not_reached(); +} + +void HELPER(setimask)(CPUHexagonState *env, uint32_t pred, uint32_t imask) +{ + g_assert_not_reached(); +} + void HELPER(sreg_write)(CPUHexagonState *env, uint32_t reg, uint32_t val) { g_assert_not_reached(); From eac63524714950535b392d0e290930ed46904597 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 24 Jul 2024 20:27:34 -0700 Subject: [PATCH 028/126] target/hexagon: Add TCG overrides for thread ctl Define TCG overrides for start, stop, wait, resume instructions. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/gen_tcg_sys.h | 18 ++++++++++++++++++ target/hexagon/helper.h | 4 ++++ target/hexagon/op_helper.c | 20 ++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/target/hexagon/gen_tcg_sys.h b/target/hexagon/gen_tcg_sys.h index 642ca3d3ff643..942d07b401ee6 100644 --- a/target/hexagon/gen_tcg_sys.h +++ b/target/hexagon/gen_tcg_sys.h @@ -63,4 +63,22 @@ tcg_gen_extrh_i64_i32(ctx->t_sreg_new_value[HEX_SREG_SGP1], tmp); \ } while (0) +#define fGEN_TCG_Y2_wait(SHORTCODE) \ + do { \ + RsV = RsV; \ + gen_helper_wait(tcg_env, tcg_constant_tl(ctx->pkt->pc)); \ + } while (0) + +#define fGEN_TCG_Y2_resume(SHORTCODE) \ + gen_helper_resume(tcg_env, RsV) + +#define fGEN_TCG_Y2_start(SHORTCODE) \ + gen_helper_start(tcg_env, RsV) + +#define fGEN_TCG_Y2_stop(SHORTCODE) \ + do { \ + RsV = RsV; \ + gen_helper_stop(tcg_env); \ + } while (0) + #endif diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index 2fe4440ddc6e1..ada520bd52aed 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -124,4 +124,8 @@ DEF_HELPER_2(greg_read_pair, i64, env, i32) DEF_HELPER_3(sreg_write, void, env, i32, i32) DEF_HELPER_3(sreg_write_pair, void, env, i32, i64) DEF_HELPER_3(setprio, void, env, i32, i32) +DEF_HELPER_2(start, void, env, i32) +DEF_HELPER_1(stop, void, env) +DEF_HELPER_2(wait, void, env, i32) +DEF_HELPER_2(resume, void, env, i32) #endif diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 575f3fb1635ff..09a52843298a2 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1367,6 +1367,26 @@ uint32_t HELPER(iassignr)(CPUHexagonState *env, uint32_t src) g_assert_not_reached(); } +void HELPER(start)(CPUHexagonState *env, uint32_t imask) +{ + g_assert_not_reached(); +} + +void HELPER(stop)(CPUHexagonState *env) +{ + g_assert_not_reached(); +} + +void HELPER(wait)(CPUHexagonState *env, target_ulong PC) +{ + g_assert_not_reached(); +} + +void HELPER(resume)(CPUHexagonState *env, uint32_t mask) +{ + g_assert_not_reached(); +} + uint32_t HELPER(getimask)(CPUHexagonState *env, uint32_t tid) { g_assert_not_reached(); From cd8e910d217ec4ebf2e1733c36618cc1e3cebe16 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 24 Jul 2024 20:34:02 -0700 Subject: [PATCH 029/126] target/hexagon: Add TCG overrides for rte, nmi Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/gen_tcg_sys.h | 19 +++++++++++++++++++ target/hexagon/helper.h | 1 + target/hexagon/op_helper.c | 4 ++++ 3 files changed, 24 insertions(+) diff --git a/target/hexagon/gen_tcg_sys.h b/target/hexagon/gen_tcg_sys.h index 942d07b401ee6..6d73a18db455b 100644 --- a/target/hexagon/gen_tcg_sys.h +++ b/target/hexagon/gen_tcg_sys.h @@ -81,4 +81,23 @@ gen_helper_stop(tcg_env); \ } while (0) +/* + * rte (return from exception) + * Clear the EX bit in SSR + * Jump to ELR + */ +#define fGEN_TCG_J2_rte(SHORTCODE) \ + do { \ + TCGv new_ssr = tcg_temp_new(); \ + tcg_gen_deposit_tl(new_ssr, hex_t_sreg[HEX_SREG_SSR], \ + tcg_constant_tl(0), \ + reg_field_info[SSR_EX].offset, \ + reg_field_info[SSR_EX].width); \ + gen_log_sreg_write(ctx, HEX_SREG_SSR, new_ssr); \ + gen_jumpr(ctx, hex_t_sreg[HEX_SREG_ELR]); \ + } while (0) + +#define fGEN_TCG_Y4_nmi(SHORTCODE) \ + gen_helper_nmi(tcg_env, RsV) + #endif diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index ada520bd52aed..730eaf8b9a0f9 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -128,4 +128,5 @@ DEF_HELPER_2(start, void, env, i32) DEF_HELPER_1(stop, void, env) DEF_HELPER_2(wait, void, env, i32) DEF_HELPER_2(resume, void, env, i32) +DEF_HELPER_2(nmi, void, env, i32) #endif diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 09a52843298a2..139a0b5ab27cc 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1433,6 +1433,10 @@ void HELPER(setprio)(CPUHexagonState *env, uint32_t thread, uint32_t prio) g_assert_not_reached(); } +void HELPER(nmi)(CPUHexagonState *env, uint32_t thread_mask) +{ + g_assert_not_reached(); +} #endif From b306cbc059dba8985193320567dce78bab8f3d12 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Thu, 25 Jul 2024 17:17:14 -0700 Subject: [PATCH 030/126] target/hexagon: Add sreg_{read,write} helpers Co-authored-by: Sid Manning <sidneym@quicinc.com> Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 1 + target/hexagon/cpu_helper.c | 37 ++++++++++++ target/hexagon/cpu_helper.h | 8 +++ target/hexagon/op_helper.c | 114 ++++++++++++++++++++++++++++++++++-- 4 files changed, 156 insertions(+), 4 deletions(-) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 7c070f5123d9a..0ee06be20c274 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -322,6 +322,7 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp) qemu_init_vcpu(cs); cpu_reset(cs); #ifndef CONFIG_USER_ONLY + CPUHexagonState *env = cpu_env(cs); if (cs->cpu_index == 0) { env->g_sreg = g_new0(target_ulong, NUM_SREGS); } else { diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index 6e4bc85580e6b..00622531764f0 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -27,10 +27,47 @@ #ifndef CONFIG_USER_ONLY +uint32_t hexagon_get_pmu_counter(CPUHexagonState *cur_env, int index) +{ + g_assert_not_reached(); +} + uint32_t arch_get_system_reg(CPUHexagonState *env, uint32_t reg) { g_assert_not_reached(); } +uint64_t hexagon_get_sys_pcycle_count(CPUHexagonState *env) +{ + g_assert_not_reached(); +} + +uint32_t hexagon_get_sys_pcycle_count_high(CPUHexagonState *env) +{ + g_assert_not_reached(); +} + +uint32_t hexagon_get_sys_pcycle_count_low(CPUHexagonState *env) +{ + g_assert_not_reached(); +} + +void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env, + uint32_t cycles_hi) +{ + g_assert_not_reached(); +} + +void hexagon_set_sys_pcycle_count_low(CPUHexagonState *env, + uint32_t cycles_lo) +{ + g_assert_not_reached(); +} + +void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t cycles) +{ + g_assert_not_reached(); +} + #endif diff --git a/target/hexagon/cpu_helper.h b/target/hexagon/cpu_helper.h index 194bcbf451379..5f5f15149ab7c 100644 --- a/target/hexagon/cpu_helper.h +++ b/target/hexagon/cpu_helper.h @@ -7,6 +7,14 @@ #ifndef HEXAGON_CPU_HELPER_H #define HEXAGON_CPU_HELPER_H +uint32_t hexagon_get_pmu_counter(CPUHexagonState *cur_env, int index); +uint64_t hexagon_get_sys_pcycle_count(CPUHexagonState *env); +uint32_t hexagon_get_sys_pcycle_count_low(CPUHexagonState *env); +uint32_t hexagon_get_sys_pcycle_count_high(CPUHexagonState *env); +void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t); +void hexagon_set_sys_pcycle_count_low(CPUHexagonState *env, uint32_t); +void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env, uint32_t); + static inline void arch_set_thread_reg(CPUHexagonState *env, uint32_t reg, uint32_t val) { diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 139a0b5ab27cc..76b2475d880ae 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -17,6 +17,7 @@ #include "qemu/osdep.h" #include "qemu/log.h" +#include "qemu/main-loop.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/helper-proto.h" @@ -1397,25 +1398,130 @@ void HELPER(setimask)(CPUHexagonState *env, uint32_t pred, uint32_t imask) g_assert_not_reached(); } -void HELPER(sreg_write)(CPUHexagonState *env, uint32_t reg, uint32_t val) +static bool handle_pmu_sreg_write(CPUHexagonState *env, uint32_t reg, + uint32_t val) +{ + if (reg == HEX_SREG_PMUSTID0 || reg == HEX_SREG_PMUSTID1 + || reg == HEX_SREG_PMUCFG || reg == HEX_SREG_PMUEVTCFG + || reg == HEX_SREG_PMUEVTCFG1 + || (reg >= HEX_SREG_PMUCNT4 && reg <= HEX_SREG_PMUCNT3)) { + qemu_log_mask(LOG_UNIMP, "PMU registers not yet implemented"); + return true; + } + return false; +} + +static void modify_syscfg(CPUHexagonState *env, uint32_t val) { g_assert_not_reached(); } -void HELPER(sreg_write_pair)(CPUHexagonState *env, uint32_t reg, uint64_t val) +static void hexagon_set_vid(CPUHexagonState *env, uint32_t offset, int val) +{ + g_assert_not_reached(); +} +static uint32_t hexagon_find_last_irq(CPUHexagonState *env, uint32_t vid) { g_assert_not_reached(); } +static void hexagon_read_timer(CPUHexagonState *env, uint32_t *low, + uint32_t *high) +{ + qemu_log_mask(LOG_UNIMP, "reading timer_hi/lo not yet supported\n"); +} + +static inline QEMU_ALWAYS_INLINE void sreg_write(CPUHexagonState *env, + uint32_t reg, uint32_t val) + +{ + g_assert(bql_locked()); + if ((reg == HEX_SREG_VID) || (reg == HEX_SREG_VID1)) { + hexagon_set_vid(env, (reg == HEX_SREG_VID) ? L2VIC_VID_0 : L2VIC_VID_1, + val); + arch_set_system_reg(env, reg, val); + } else if (reg == HEX_SREG_SYSCFG) { + modify_syscfg(env, val); + } else if (reg == HEX_SREG_IMASK) { + val = GET_FIELD(IMASK_MASK, val); + arch_set_system_reg(env, reg, val); + } else if (reg == HEX_SREG_PCYCLELO) { + hexagon_set_sys_pcycle_count_low(env, val); + } else if (reg == HEX_SREG_PCYCLEHI) { + hexagon_set_sys_pcycle_count_high(env, val); + } else if (!handle_pmu_sreg_write(env, reg, val)) { + if (reg >= HEX_SREG_GLB_START) { + arch_set_system_reg(env, reg, val); + } else { + arch_set_system_reg(env, reg, val); + } + } +} + +void HELPER(sreg_write)(CPUHexagonState *env, uint32_t reg, uint32_t val) +{ + BQL_LOCK_GUARD(); + sreg_write(env, reg, val); +} + +void HELPER(sreg_write_pair)(CPUHexagonState *env, uint32_t reg, uint64_t val) +{ + BQL_LOCK_GUARD(); + sreg_write(env, reg, val & 0xFFFFFFFF); + sreg_write(env, reg + 1, val >> 32); +} + +static inline QEMU_ALWAYS_INLINE uint32_t sreg_read(CPUHexagonState *env, + uint32_t reg) +{ + g_assert(bql_locked()); + if (reg == HEX_SREG_PMUSTID0 || reg == HEX_SREG_PMUSTID1 + || reg == HEX_SREG_PMUCFG || reg == HEX_SREG_PMUEVTCFG + || reg == HEX_SREG_PMUEVTCFG1 + || (reg >= HEX_SREG_PMUCNT4 && reg <= HEX_SREG_PMUCNT3)) { + qemu_log_mask(LOG_UNIMP, "PMU registers not yet implemented"); + return 0; + } + if ((reg == HEX_SREG_VID) || (reg == HEX_SREG_VID1)) { + const uint32_t vid = hexagon_find_last_irq(env, reg); + arch_set_system_reg(env, reg, vid); + } else if ((reg == HEX_SREG_TIMERLO) || (reg == HEX_SREG_TIMERHI)) { + uint32_t low = 0; + uint32_t high = 0; + hexagon_read_timer(env, &low, &high); + arch_set_system_reg(env, HEX_SREG_TIMERLO, low); + arch_set_system_reg(env, HEX_SREG_TIMERHI, high); + } else if (reg == HEX_SREG_BADVA) { + target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR); + if (GET_SSR_FIELD(SSR_BVS, ssr)) { + return arch_get_system_reg(env, HEX_SREG_BADVA1); + } + return arch_get_system_reg(env, HEX_SREG_BADVA0); + } + return arch_get_system_reg(env, reg); +} + uint32_t HELPER(sreg_read)(CPUHexagonState *env, uint32_t reg) { - g_assert_not_reached(); + BQL_LOCK_GUARD(); + return sreg_read(env, reg); } uint64_t HELPER(sreg_read_pair)(CPUHexagonState *env, uint32_t reg) { - g_assert_not_reached(); + BQL_LOCK_GUARD(); + if (reg == HEX_SREG_TIMERLO) { + uint32_t low = 0; + uint32_t high = 0; + hexagon_read_timer(env, &low, &high); + arch_set_system_reg(env, HEX_SREG_TIMERLO, low); + arch_set_system_reg(env, HEX_SREG_TIMERHI, high); + } else if (reg == HEX_SREG_PCYCLELO) { + return hexagon_get_sys_pcycle_count(env); + } + return (uint64_t)sreg_read(env, reg) | + (((uint64_t)sreg_read(env, reg + 1)) << 32); } uint32_t HELPER(greg_read)(CPUHexagonState *env, uint32_t reg) From 7d3e8193df11263669e70a28b90c6a74f9b343ac Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Fri, 9 Aug 2024 16:28:08 -0700 Subject: [PATCH 031/126] target/hexagon: Initialize htid, modectl regs Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 0ee06be20c274..c6b32f60249c0 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -26,6 +26,7 @@ #include "fpu/softfloat-helpers.h" #include "tcg/tcg.h" #include "exec/gdbstub.h" +#include "cpu_helper.h" static void hexagon_v66_cpu_init(Object *obj) { } static void hexagon_v67_cpu_init(Object *obj) { } @@ -289,11 +290,18 @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type) set_float_default_nan_pattern(0b11111111, &env->fp_status); #ifndef CONFIG_USER_ONLY + HexagonCPU *cpu = HEXAGON_CPU(cs); + if (cs->cpu_index == 0) { memset(env->g_sreg, 0, sizeof(target_ulong) * NUM_SREGS); } memset(env->t_sreg, 0, sizeof(target_ulong) * NUM_SREGS); memset(env->greg, 0, sizeof(target_ulong) * NUM_GREGS); + + if (cs->cpu_index == 0) { + arch_set_system_reg(env, HEX_SREG_MODECTL, 0x1); + } + arch_set_system_reg(env, HEX_SREG_HTID, cs->cpu_index); #endif } From a52116815ca963e3fac122e8a8a5a73e954ccb14 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Fri, 9 Aug 2024 19:24:45 -0700 Subject: [PATCH 032/126] target/hexagon: Add locks, id, next_PC to state Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 6 ++++++ target/hexagon/cpu.h | 37 +++++++++++++++++++++++++++++++++++-- target/hexagon/machine.c | 4 ++++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index c6b32f60249c0..3fa15060e7277 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -302,6 +302,12 @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type) arch_set_system_reg(env, HEX_SREG_MODECTL, 0x1); } arch_set_system_reg(env, HEX_SREG_HTID, cs->cpu_index); + memset(env->t_sreg, 0, sizeof(target_ulong) * NUM_SREGS); + memset(env->greg, 0, sizeof(target_ulong) * NUM_GREGS); + env->threadId = cs->cpu_index; + env->tlb_lock_state = HEX_LOCK_UNLOCKED; + env->k0_lock_state = HEX_LOCK_UNLOCKED; + env->next_PC = 0; #endif } diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 5dde4f8e880cb..aeb17d80ae54a 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -44,9 +44,36 @@ #define PRED_WRITES_MAX 5 /* 4 insns + endloop */ #define VSTORES_MAX 2 -#define CPU_RESOLVING_TYPE TYPE_HEXAGON_CPU +#ifndef CONFIG_USER_ONLY +#define CPU_INTERRUPT_SWI CPU_INTERRUPT_TGT_INT_0 +#define CPU_INTERRUPT_K0_UNLOCK CPU_INTERRUPT_TGT_INT_1 +#define CPU_INTERRUPT_TLB_UNLOCK CPU_INTERRUPT_TGT_INT_2 + +#define HEX_CPU_MODE_USER 1 +#define HEX_CPU_MODE_GUEST 2 +#define HEX_CPU_MODE_MONITOR 3 + +#define HEX_EXE_MODE_OFF 1 +#define HEX_EXE_MODE_RUN 2 +#define HEX_EXE_MODE_WAIT 3 +#define HEX_EXE_MODE_DEBUG 4 +#endif -#define MMU_USER_IDX 0 +#define MMU_USER_IDX 0 +#ifndef CONFIG_USER_ONLY +#define MMU_GUEST_IDX 1 +#define MMU_KERNEL_IDX 2 + +typedef enum { + HEX_LOCK_UNLOCKED = 0, + HEX_LOCK_WAITING = 1, + HEX_LOCK_OWNER = 2, + HEX_LOCK_QUEUED = 3 +} hex_lock_state_t; +#endif + + +#define CPU_RESOLVING_TYPE TYPE_HEXAGON_CPU typedef struct { target_ulong va; @@ -93,6 +120,12 @@ typedef struct CPUArchState { target_ulong *g_sreg; target_ulong greg[NUM_GREGS]; + + /* This alias of CPUState.cpu_index is used by imported sources: */ + target_ulong threadId; + hex_lock_state_t tlb_lock_state; + hex_lock_state_t k0_lock_state; + target_ulong next_PC; #endif target_ulong new_value_usr; diff --git a/target/hexagon/machine.c b/target/hexagon/machine.c index d9d71edf7718a..dc900422f4c63 100644 --- a/target/hexagon/machine.c +++ b/target/hexagon/machine.c @@ -19,6 +19,10 @@ const VMStateDescription vmstate_hexagon_cpu = { VMSTATE_UINTTL_ARRAY(env.pred, HexagonCPU, NUM_PREGS), VMSTATE_UINTTL_ARRAY(env.t_sreg, HexagonCPU, NUM_SREGS), VMSTATE_UINTTL_ARRAY(env.greg, HexagonCPU, NUM_GREGS), + VMSTATE_UINTTL(env.next_PC, HexagonCPU), + VMSTATE_UINTTL(env.tlb_lock_state, HexagonCPU), + VMSTATE_UINTTL(env.k0_lock_state, HexagonCPU), + VMSTATE_UINTTL(env.threadId, HexagonCPU), VMSTATE_END_OF_LIST() }, }; From 3441dfb7d62887caa295ec9604e127c6cb8ce127 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Fri, 9 Aug 2024 19:29:54 -0700 Subject: [PATCH 033/126] target/hexagon: Add a TLB count property Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 4 ++++ target/hexagon/cpu.h | 1 + target/hexagon/max.h | 26 ++++++++++++++++++++++++++ 3 files changed, 31 insertions(+) create mode 100644 target/hexagon/max.h diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 3fa15060e7277..4f7bdbd32af4f 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -27,6 +27,7 @@ #include "tcg/tcg.h" #include "exec/gdbstub.h" #include "cpu_helper.h" +#include "max.h" static void hexagon_v66_cpu_init(Object *obj) { } static void hexagon_v67_cpu_init(Object *obj) { } @@ -51,6 +52,9 @@ static ObjectClass *hexagon_cpu_class_by_name(const char *cpu_model) } static const Property hexagon_cpu_properties[] = { +#if !defined(CONFIG_USER_ONLY) + DEFINE_PROP_UINT32("jtlb-entries", HexagonCPU, num_tlbs, MAX_TLB_ENTRIES), +#endif DEFINE_PROP_BOOL("lldb-compat", HexagonCPU, lldb_compat, false), DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust, 0, qdev_prop_uint32, target_ulong), diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index aeb17d80ae54a..0113152e881b1 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -172,6 +172,7 @@ struct ArchCPU { bool lldb_compat; target_ulong lldb_stack_adjust; bool short_circuit; + uint32_t num_tlbs; }; #include "cpu_bits.h" diff --git a/target/hexagon/max.h b/target/hexagon/max.h new file mode 100644 index 0000000000000..0f595bcb736d7 --- /dev/null +++ b/target/hexagon/max.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HEXAGON_MAX_H +#define HEXAGON_MAX_H + +#define MAX_EXT_CONTEXTS 8 +#define MAX_L2_INTERLEAVES 2 +#define MAX_VFIFO_COUNT 4 + +#define SLOTS_MAX 4 + +#define REG_WRITES_MAX 32 +#define PRED_WRITES_MAX 5 +#define STORES_MAX 2 +#define LOADS_MAX 2 +#define MAX_PRED 4 + +#define PACKET_BYTES_MAX 16 +#define MAX_TLB_ENTRIES 1024 +#define DTLB_ENTRIES 16 +#define ITLB_ENTRIES 16 + +#endif /* HEXAGON_MAX_H */ From 53ef961f3c525ec75dbb4bf5653b248535d8ff8f Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Fri, 16 Aug 2024 09:03:28 -0700 Subject: [PATCH 034/126] target/hexagon: Add {TLB,k0}lock, cause code, wait_next_pc {TLB,k0}lock counts are used to represent the TLB, k0 locks among hardware threads. wait_next_pc represents the program counter to set when resuming from a wait-for-interrupts state. cause_code contains the precise exception cause.This will be used by subsequent commits. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 4 ++++ target/hexagon/cpu.h | 4 ++++ target/hexagon/machine.c | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 4f7bdbd32af4f..73a26ae50f04f 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -311,7 +311,11 @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type) env->threadId = cs->cpu_index; env->tlb_lock_state = HEX_LOCK_UNLOCKED; env->k0_lock_state = HEX_LOCK_UNLOCKED; + env->tlb_lock_count = 0; + env->k0_lock_count = 0; env->next_PC = 0; + env->wait_next_pc = 0; + env->cause_code = -1; #endif } diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 0113152e881b1..0dd26cfbb184a 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -107,6 +107,7 @@ typedef struct { typedef struct CPUArchState { target_ulong gpr[TOTAL_PER_THREAD_REGS]; target_ulong pred[NUM_PREGS]; + target_ulong cause_code; /* For comparing with LLDB on target - see adjust_stack_ptrs function */ target_ulong last_pc_dumped; @@ -120,11 +121,14 @@ typedef struct CPUArchState { target_ulong *g_sreg; target_ulong greg[NUM_GREGS]; + target_ulong wait_next_pc; /* This alias of CPUState.cpu_index is used by imported sources: */ target_ulong threadId; hex_lock_state_t tlb_lock_state; hex_lock_state_t k0_lock_state; + target_ulong tlb_lock_count; + target_ulong k0_lock_count; target_ulong next_PC; #endif target_ulong new_value_usr; diff --git a/target/hexagon/machine.c b/target/hexagon/machine.c index dc900422f4c63..9fdafb4573ddf 100644 --- a/target/hexagon/machine.c +++ b/target/hexagon/machine.c @@ -22,7 +22,11 @@ const VMStateDescription vmstate_hexagon_cpu = { VMSTATE_UINTTL(env.next_PC, HexagonCPU), VMSTATE_UINTTL(env.tlb_lock_state, HexagonCPU), VMSTATE_UINTTL(env.k0_lock_state, HexagonCPU), + VMSTATE_UINTTL(env.tlb_lock_count, HexagonCPU), + VMSTATE_UINTTL(env.k0_lock_count, HexagonCPU), VMSTATE_UINTTL(env.threadId, HexagonCPU), + VMSTATE_UINTTL(env.cause_code, HexagonCPU), + VMSTATE_UINTTL(env.wait_next_pc, HexagonCPU), VMSTATE_END_OF_LIST() }, }; From 26931a4bea128f61ec3a4e1c6f15f6c5101dd17b Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Fri, 16 Aug 2024 09:03:59 -0700 Subject: [PATCH 035/126] target/hexagon: Add stubs for modify_ssr/get_exe_mode Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu_helper.c | 8 ++++++++ target/hexagon/cpu_helper.h | 2 ++ 2 files changed, 10 insertions(+) diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index 00622531764f0..9f4fc716e33c6 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -69,5 +69,13 @@ void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t cycles) g_assert_not_reached(); } +void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t old) +{ + g_assert_not_reached(); +} +int get_exe_mode(CPUHexagonState *env) +{ + g_assert_not_reached(); +} #endif diff --git a/target/hexagon/cpu_helper.h b/target/hexagon/cpu_helper.h index 5f5f15149ab7c..e0c0c037a6eaf 100644 --- a/target/hexagon/cpu_helper.h +++ b/target/hexagon/cpu_helper.h @@ -14,6 +14,8 @@ uint32_t hexagon_get_sys_pcycle_count_high(CPUHexagonState *env); void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t); void hexagon_set_sys_pcycle_count_low(CPUHexagonState *env, uint32_t); void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env, uint32_t); +void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t old); +int get_exe_mode(CPUHexagonState *env); static inline void arch_set_thread_reg(CPUHexagonState *env, uint32_t reg, uint32_t val) From d72a9492e2ad3208175900786346a8899b3b00ef Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 26 Aug 2024 20:44:28 -0700 Subject: [PATCH 036/126] target/hexagon: Add gdb support for sys regs Co-authored-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- gdb-xml/hexagon-sys.xml | 116 +++++++++++++++++++++++++++++++++++++ target/hexagon/cpu.c | 17 ++++++ target/hexagon/cpu.h | 6 ++ target/hexagon/gdbstub.c | 45 ++++++++++++++ target/hexagon/internal.h | 4 ++ target/hexagon/op_helper.c | 16 +++++ 6 files changed, 204 insertions(+) create mode 100644 gdb-xml/hexagon-sys.xml diff --git a/gdb-xml/hexagon-sys.xml b/gdb-xml/hexagon-sys.xml new file mode 100644 index 0000000000000..1d9c21172253f --- /dev/null +++ b/gdb-xml/hexagon-sys.xml @@ -0,0 +1,116 @@ +<?xml version="1.0"?> +<!-- + Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + + This work is licensed under the terms of the GNU GPL, version 2 or + (at your option) any later version. See the COPYING file in the + top-level directory. + + Note: this file is intended to be use with LLDB, so it contains fields + that may be unknown to GDB. For more information on such fields, please + see: + https://github.com/llvm/llvm-project/blob/287aa6c4536408413b860e61fca0318a27214cf3/lldb/docs/lldb-gdb-remote.txt#L738-L860 + https://github.com/llvm/llvm-project/blob/287aa6c4536408413b860e61fca0318a27214cf3/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp#L4275-L4335 +--> + +<!DOCTYPE feature SYSTEM "gdb-target.dtd"> +<feature name="org.gnu.gdb.hexagon.sys"> + + <reg name="sgp0" bitsize="32" offset="4416" encoding="uint" format="hex" group="System Registers" dwarf_regnum="135" /> + <reg name="sgp1" bitsize="32" offset="4420" encoding="uint" format="hex" group="System Registers" dwarf_regnum="136" /> + <reg name="stid" bitsize="32" offset="4424" encoding="uint" format="hex" group="System Registers" dwarf_regnum="137" /> + <reg name="elr" bitsize="32" offset="4428" encoding="uint" format="hex" group="System Registers" dwarf_regnum="138" /> + <reg name="badva0" bitsize="32" offset="4432" encoding="uint" format="hex" group="System Registers" dwarf_regnum="139" /> + <reg name="badva1" bitsize="32" offset="4436" encoding="uint" format="hex" group="System Registers" dwarf_regnum="140" /> + <reg name="ssr" bitsize="32" offset="4440" encoding="uint" format="hex" group="System Registers" dwarf_regnum="141" /> + <reg name="ccr" bitsize="32" offset="4444" encoding="uint" format="hex" group="System Registers" dwarf_regnum="142" /> + <reg name="htid" bitsize="32" offset="4448" encoding="uint" format="hex" group="System Registers" dwarf_regnum="143" /> + <reg name="badva" bitsize="32" offset="4452" encoding="uint" format="hex" group="System Registers" dwarf_regnum="144" /> + <reg name="imask" bitsize="32" offset="4456" encoding="uint" format="hex" group="System Registers" dwarf_regnum="145" /> + <reg name="gevb" bitsize="32" offset="4460" encoding="uint" format="hex" group="System Registers" dwarf_regnum="146" /> + <reg name="rsv12" bitsize="32" offset="4464" encoding="uint" format="hex" group="System Registers" dwarf_regnum="147" /> + <reg name="rsv13" bitsize="32" offset="4468" encoding="uint" format="hex" group="System Registers" dwarf_regnum="148" /> + <reg name="rsv14" bitsize="32" offset="4472" encoding="uint" format="hex" group="System Registers" dwarf_regnum="149" /> + <reg name="rsv15" bitsize="32" offset="4476" encoding="uint" format="hex" group="System Registers" dwarf_regnum="150" /> + <reg name="evb" bitsize="32" offset="4480" encoding="uint" format="hex" group="System Registers" dwarf_regnum="151" /> + <reg name="modectl" bitsize="32" offset="4484" encoding="uint" format="hex" group="System Registers" dwarf_regnum="152" /> + <reg name="syscfg" bitsize="32" offset="4488" encoding="uint" format="hex" group="System Registers" dwarf_regnum="153" /> + <reg name="free19" bitsize="32" offset="4492" encoding="uint" format="hex" group="System Registers" dwarf_regnum="154" /> + <reg name="ipendad" bitsize="32" offset="4496" encoding="uint" format="hex" group="System Registers" dwarf_regnum="155" /> + <reg name="vid" bitsize="32" offset="4500" encoding="uint" format="hex" group="System Registers" dwarf_regnum="156" /> + <reg name="vid1" bitsize="32" offset="4504" encoding="uint" format="hex" group="System Registers" dwarf_regnum="157" /> + <reg name="bestwait" bitsize="32" offset="4508" encoding="uint" format="hex" group="System Registers" dwarf_regnum="158" /> + <reg name="free24" bitsize="32" offset="4512" encoding="uint" format="hex" group="System Registers" dwarf_regnum="159" /> + <reg name="schedcfg" bitsize="32" offset="4516" encoding="uint" format="hex" group="System Registers" dwarf_regnum="160" /> + <reg name="free26" bitsize="32" offset="4520" encoding="uint" format="hex" group="System Registers" dwarf_regnum="161" /> + <reg name="cfgbase" bitsize="32" offset="4524" encoding="uint" format="hex" group="System Registers" dwarf_regnum="162" /> + <reg name="diag" bitsize="32" offset="4528" encoding="uint" format="hex" group="System Registers" dwarf_regnum="163" /> + <reg name="rev" bitsize="32" offset="4532" encoding="uint" format="hex" group="System Registers" dwarf_regnum="164" /> + <reg name="pcyclelo" bitsize="32" offset="4536" encoding="uint" format="hex" group="System Registers" dwarf_regnum="165" /> + <reg name="pcyclehi" bitsize="32" offset="4540" encoding="uint" format="hex" group="System Registers" dwarf_regnum="166" /> + <reg name="isdbst" bitsize="32" offset="4544" encoding="uint" format="hex" group="System Registers" dwarf_regnum="167" /> + <reg name="isdbcfg0" bitsize="32" offset="4548" encoding="uint" format="hex" group="System Registers" dwarf_regnum="168" /> + <reg name="isdbcfg1" bitsize="32" offset="4552" encoding="uint" format="hex" group="System Registers" dwarf_regnum="169" /> + <reg name="livelock" bitsize="32" offset="4556" encoding="uint" format="hex" group="System Registers" dwarf_regnum="170" /> + <reg name="brkptpc0" bitsize="32" offset="4560" encoding="uint" format="hex" group="System Registers" dwarf_regnum="171" /> + <reg name="brkptccfg0" bitsize="32" offset="4564" encoding="uint" format="hex" group="System Registers" dwarf_regnum="172" /> + <reg name="brkptpc1" bitsize="32" offset="4568" encoding="uint" format="hex" group="System Registers" dwarf_regnum="173" /> + <reg name="brkptcfg1" bitsize="32" offset="4572" encoding="uint" format="hex" group="System Registers" dwarf_regnum="174" /> + <reg name="isdbmbxin" bitsize="32" offset="4576" encoding="uint" format="hex" group="System Registers" dwarf_regnum="175" /> + <reg name="isdbmbxout" bitsize="32" offset="4580" encoding="uint" format="hex" group="System Registers" dwarf_regnum="176" /> + <reg name="isdben" bitsize="32" offset="4584" encoding="uint" format="hex" group="System Registers" dwarf_regnum="177" /> + <reg name="isdbgpr" bitsize="32" offset="4588" encoding="uint" format="hex" group="System Registers" dwarf_regnum="178" /> + <reg name="pmucnt4" bitsize="32" offset="4592" encoding="uint" format="hex" group="System Registers" dwarf_regnum="179" /> + <reg name="pmucnt5" bitsize="32" offset="4596" encoding="uint" format="hex" group="System Registers" dwarf_regnum="180" /> + <reg name="pmucnt6" bitsize="32" offset="4600" encoding="uint" format="hex" group="System Registers" dwarf_regnum="181" /> + <reg name="pmucnt7" bitsize="32" offset="4604" encoding="uint" format="hex" group="System Registers" dwarf_regnum="182" /> + <reg name="pmucnt0" bitsize="32" offset="4608" encoding="uint" format="hex" group="System Registers" dwarf_regnum="183" /> + <reg name="pmucnt1" bitsize="32" offset="4612" encoding="uint" format="hex" group="System Registers" dwarf_regnum="184" /> + <reg name="pmucnt2" bitsize="32" offset="4616" encoding="uint" format="hex" group="System Registers" dwarf_regnum="185" /> + <reg name="pmucnt3" bitsize="32" offset="4620" encoding="uint" format="hex" group="System Registers" dwarf_regnum="186" /> + <reg name="pmuevtcfg" bitsize="32" offset="4624" encoding="uint" format="hex" group="System Registers" dwarf_regnum="187" /> + <reg name="pmustid0" bitsize="32" offset="4628" encoding="uint" format="hex" group="System Registers" dwarf_regnum="188" /> + <reg name="pmuevtcfg1" bitsize="32" offset="4632" encoding="uint" format="hex" group="System Registers" dwarf_regnum="189" /> + <reg name="pmustid1" bitsize="32" offset="4636" encoding="uint" format="hex" group="System Registers" dwarf_regnum="190" /> + <reg name="timerlo" bitsize="32" offset="4640" encoding="uint" format="hex" group="System Registers" dwarf_regnum="191" /> + <reg name="timerhi" bitsize="32" offset="4644" encoding="uint" format="hex" group="System Registers" dwarf_regnum="192" /> + <reg name="pmucfg" bitsize="32" offset="4648" encoding="uint" format="hex" group="System Registers" dwarf_regnum="193" /> + <reg name="rsv59" bitsize="32" offset="4652" encoding="uint" format="hex" group="System Registers" dwarf_regnum="194" /> + <reg name="rsv60" bitsize="32" offset="4656" encoding="uint" format="hex" group="System Registers" dwarf_regnum="195" /> + <reg name="rsv61" bitsize="32" offset="4660" encoding="uint" format="hex" group="System Registers" dwarf_regnum="196" /> + <reg name="rsv62" bitsize="32" offset="4664" encoding="uint" format="hex" group="System Registers" dwarf_regnum="197" /> + <reg name="rsv63" bitsize="32" offset="4668" encoding="uint" format="hex" group="System Registers" dwarf_regnum="198" /> + <reg name="g0" bitsize="32" offset="4672" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="179" /> + <reg name="g1" bitsize="32" offset="4676" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="180" /> + <reg name="g2" bitsize="32" offset="4680" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="181" /> + <reg name="g3" bitsize="32" offset="4684" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="182" /> + <reg name="rsv4" bitsize="32" offset="4688" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="183" /> + <reg name="rsv5" bitsize="32" offset="4692" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="184" /> + <reg name="rsv6" bitsize="32" offset="4696" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="185" /> + <reg name="rsv7" bitsize="32" offset="4700" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="186" /> + <reg name="rsv8" bitsize="32" offset="4704" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="187" /> + <reg name="rsv9" bitsize="32" offset="4708" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="188" /> + <reg name="rsv10" bitsize="32" offset="4712" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="189" /> + <reg name="rsv11" bitsize="32" offset="4716" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="190" /> + <reg name="rsv12" bitsize="32" offset="4720" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="191" /> + <reg name="rsv13" bitsize="32" offset="4724" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="192" /> + <reg name="rsv14" bitsize="32" offset="4728" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="193" /> + <reg name="rsv15" bitsize="32" offset="4732" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="194" /> + <reg name="gpmucnt4" bitsize="32" offset="4736" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="195" /> + <reg name="gpmucnt5" bitsize="32" offset="4740" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="196" /> + <reg name="gpmucnt6" bitsize="32" offset="4744" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="197" /> + <reg name="gpmucnt7" bitsize="32" offset="4748" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="198" /> + <reg name="rsv20" bitsize="32" offset="4752" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="199" /> + <reg name="rsv21" bitsize="32" offset="4756" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="200" /> + <reg name="rsv22" bitsize="32" offset="4760" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="201" /> + <reg name="rsv23" bitsize="32" offset="4764" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="202" /> + <reg name="gpcyclelo" bitsize="32" offset="4768" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="203" /> + <reg name="gpcyclehi" bitsize="32" offset="4772" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="204" /> + <reg name="gpmucnt0" bitsize="32" offset="4776" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="205" /> + <reg name="gpmucnt1" bitsize="32" offset="4780" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="206" /> + <reg name="gpmucnt2" bitsize="32" offset="4784" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="207" /> + <reg name="gpmucnt3" bitsize="32" offset="4788" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="208" /> + <reg name="rsv30" bitsize="32" offset="4792" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="209" /> + <reg name="rsv31" bitsize="32" offset="4796" encoding="uint" format="hex" group="Guest Registers" dwarf_regnum="210" /> + +</feature> diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 73a26ae50f04f..e21a293725d8a 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -29,6 +29,10 @@ #include "cpu_helper.h" #include "max.h" +#ifndef CONFIG_USER_ONLY +#include "sys_macros.h" +#endif + static void hexagon_v66_cpu_init(Object *obj) { } static void hexagon_v67_cpu_init(Object *obj) { } static void hexagon_v68_cpu_init(Object *obj) { } @@ -341,6 +345,12 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp) hexagon_hvx_gdb_write_register, gdb_find_static_feature("hexagon-hvx.xml"), 0); +#ifndef CONFIG_USER_ONLY + gdb_register_coprocessor(cs, hexagon_sys_gdb_read_register, + hexagon_sys_gdb_write_register, + gdb_find_static_feature("hexagon-sys.xml"), 0); +#endif + qemu_init_vcpu(cs); cpu_reset(cs); #ifndef CONFIG_USER_ONLY @@ -400,6 +410,13 @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data) cc->tcg_ops = &hexagon_tcg_ops; } +#ifndef CONFIG_USER_ONLY +uint32_t hexagon_greg_read(CPUHexagonState *env, uint32_t reg) +{ + g_assert_not_reached(); +} +#endif + #define DEFINE_CPU(type_name, initfn) \ { \ .name = type_name, \ diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 0dd26cfbb184a..84de8226f66a1 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -187,6 +187,12 @@ G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env, uint32_t exception, uintptr_t pc); +#ifndef CONFIG_USER_ONLY +uint32_t hexagon_greg_read(CPUHexagonState *env, uint32_t reg); +uint32_t hexagon_sreg_read(CPUHexagonState *env, uint32_t reg); +void hexagon_gdb_sreg_write(CPUHexagonState *env, uint32_t reg, uint32_t val); +#endif + static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc, uint64_t *cs_base, uint32_t *flags) { diff --git a/target/hexagon/gdbstub.c b/target/hexagon/gdbstub.c index 12d6b3bbcbb16..8476199b753ef 100644 --- a/target/hexagon/gdbstub.c +++ b/target/hexagon/gdbstub.c @@ -76,6 +76,51 @@ int hexagon_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) g_assert_not_reached(); } +#ifndef CONFIG_USER_ONLY +int hexagon_sys_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) +{ + CPUHexagonState *env = cpu_env(cs); + + if (n < NUM_SREGS) { + return gdb_get_regl(mem_buf, hexagon_sreg_read(env, n)); + } + n -= NUM_SREGS; + + if (n < NUM_GREGS) { + return gdb_get_regl(mem_buf, hexagon_greg_read(env, n)); + } + n -= NUM_GREGS; + + n -= TOTAL_PER_THREAD_REGS; + + if (n < NUM_PREGS) { + env->pred[n] = ldtul_p(mem_buf) & 0xff; + return sizeof(uint8_t); + } + + n -= NUM_PREGS; + + g_assert_not_reached(); +} + +int hexagon_sys_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) +{ + CPUHexagonState *env = cpu_env(cs); + + if (n < NUM_SREGS) { + hexagon_gdb_sreg_write(env, n, ldtul_p(mem_buf)); + return sizeof(target_ulong); + } + n -= NUM_SREGS; + + if (n < NUM_GREGS) { + return env->greg[n] = ldtul_p(mem_buf); + } + n -= NUM_GREGS; + + g_assert_not_reached(); +} +#endif static int gdb_get_vreg(CPUHexagonState *env, GByteArray *mem_buf, int n) { int total = 0; diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h index 7cf7bcaa6cd8b..c24c36092161f 100644 --- a/target/hexagon/internal.h +++ b/target/hexagon/internal.h @@ -22,6 +22,10 @@ int hexagon_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int hexagon_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); +#ifndef CONFIG_USER_ONLY +int hexagon_sys_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n); +int hexagon_sys_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n); +#endif int hexagon_hvx_gdb_read_register(CPUState *env, GByteArray *mem_buf, int n); int hexagon_hvx_gdb_write_register(CPUState *env, uint8_t *mem_buf, int n); diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 76b2475d880ae..fd9caafefc49f 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1465,6 +1465,17 @@ void HELPER(sreg_write)(CPUHexagonState *env, uint32_t reg, uint32_t val) sreg_write(env, reg, val); } +void hexagon_gdb_sreg_write(CPUHexagonState *env, uint32_t reg, uint32_t val) +{ + BQL_LOCK_GUARD(); + sreg_write(env, reg, val); + /* + * The above is needed to run special logic for regs like syscfg, but it + * won't set read-only bits. This will: + */ + arch_set_system_reg(env, reg, val); +} + void HELPER(sreg_write_pair)(CPUHexagonState *env, uint32_t reg, uint64_t val) { BQL_LOCK_GUARD(); @@ -1508,6 +1519,11 @@ uint32_t HELPER(sreg_read)(CPUHexagonState *env, uint32_t reg) return sreg_read(env, reg); } +uint32_t hexagon_sreg_read(CPUHexagonState *env, uint32_t reg) +{ + return sreg_read(env, reg); +} + uint64_t HELPER(sreg_read_pair)(CPUHexagonState *env, uint32_t reg) { BQL_LOCK_GUARD(); From ed96f45378ef0f193970eb227dfaf77f1bcde69f Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 26 Aug 2024 13:44:55 -0700 Subject: [PATCH 037/126] target/hexagon: Add initial MMU model Co-authored-by: Taylor Simpson <ltaylorsimpson@gmail.com> Co-authored-by: Michael Lambert <mlambert@quicinc.com> Co-authored-by: Sid Manning <sidneym@quicinc.com> Co-authored-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu-param.h | 4 + target/hexagon/cpu.c | 27 +- target/hexagon/cpu.h | 13 + target/hexagon/hex_mmu.c | 528 +++++++++++++++++++++++++++++++++++++ target/hexagon/hex_mmu.h | 30 +++ target/hexagon/internal.h | 3 + target/hexagon/machine.c | 30 +++ target/hexagon/meson.build | 3 +- target/hexagon/translate.c | 2 +- 9 files changed, 637 insertions(+), 3 deletions(-) create mode 100644 target/hexagon/hex_mmu.c create mode 100644 target/hexagon/hex_mmu.h diff --git a/target/hexagon/cpu-param.h b/target/hexagon/cpu-param.h index ccaf6a9d28d6f..d414ca89d690b 100644 --- a/target/hexagon/cpu-param.h +++ b/target/hexagon/cpu-param.h @@ -18,7 +18,11 @@ #ifndef HEXAGON_CPU_PARAM_H #define HEXAGON_CPU_PARAM_H +#ifdef CONFIG_USER_ONLY #define TARGET_PAGE_BITS 16 /* 64K pages */ +#else +#define TARGET_PAGE_BITS 12 /* 4K pages */ +#endif #define TARGET_PHYS_ADDR_SPACE_BITS 36 #define TARGET_VIRT_ADDR_SPACE_BITS 32 diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index e21a293725d8a..5304052acf2c2 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -28,6 +28,7 @@ #include "exec/gdbstub.h" #include "cpu_helper.h" #include "max.h" +#include "hex_mmu.h" #ifndef CONFIG_USER_ONLY #include "sys_macros.h" @@ -282,6 +283,18 @@ static void hexagon_restore_state_to_opc(CPUState *cs, cpu_env(cs)->gpr[HEX_REG_PC] = data[0]; } + +#ifndef CONFIG_USER_ONLY +static void mmu_reset(CPUHexagonState *env) +{ + CPUState *cs = env_cpu(env); + if (cs->cpu_index == 0) { + memset(env->hex_tlb, 0, sizeof(*env->hex_tlb)); + } +} +#endif + + static void hexagon_cpu_reset_hold(Object *obj, ResetType type) { CPUState *cs = CPU(obj); @@ -309,6 +322,7 @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type) if (cs->cpu_index == 0) { arch_set_system_reg(env, HEX_SREG_MODECTL, 0x1); } + mmu_reset(env); arch_set_system_reg(env, HEX_SREG_HTID, cs->cpu_index); memset(env->t_sreg, 0, sizeof(target_ulong) * NUM_SREGS); memset(env->greg, 0, sizeof(target_ulong) * NUM_GREGS); @@ -341,6 +355,14 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp) return; } +#ifndef CONFIG_USER_ONLY + HexagonCPU *cpu = HEXAGON_CPU(cs); + if (cpu->num_tlbs > MAX_TLB_ENTRIES) { + error_setg(errp, "Number of TLBs selected is invalid"); + return; + } +#endif + gdb_register_coprocessor(cs, hexagon_hvx_gdb_read_register, hexagon_hvx_gdb_write_register, gdb_find_static_feature("hexagon-hvx.xml"), 0); @@ -352,9 +374,12 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp) #endif qemu_init_vcpu(cs); - cpu_reset(cs); #ifndef CONFIG_USER_ONLY CPUHexagonState *env = cpu_env(cs); + hex_mmu_realize(env); +#endif + cpu_reset(cs); +#ifndef CONFIG_USER_ONLY if (cs->cpu_index == 0) { env->g_sreg = g_new0(target_ulong, NUM_SREGS); } else { diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 84de8226f66a1..0811ca00a8aef 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -35,6 +35,8 @@ #error "Hexagon does not support system emulation" #endif +typedef struct CPUHexagonTLBContext CPUHexagonTLBContext; + #define NUM_PREGS 4 #define TOTAL_PER_THREAD_REGS 64 @@ -130,6 +132,7 @@ typedef struct CPUArchState { target_ulong tlb_lock_count; target_ulong k0_lock_count; target_ulong next_PC; + CPUHexagonTLBContext *hex_tlb; #endif target_ulong new_value_usr; @@ -176,12 +179,15 @@ struct ArchCPU { bool lldb_compat; target_ulong lldb_stack_adjust; bool short_circuit; +#ifndef CONFIG_USER_ONLY uint32_t num_tlbs; +#endif }; #include "cpu_bits.h" FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1) +FIELD(TB_FLAGS, MMU_INDEX, 1, 3) G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env, uint32_t exception, @@ -193,6 +199,7 @@ uint32_t hexagon_sreg_read(CPUHexagonState *env, uint32_t reg); void hexagon_gdb_sreg_write(CPUHexagonState *env, uint32_t reg, uint32_t val); #endif +#include "exec/cpu-all.h" static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc, uint64_t *cs_base, uint32_t *flags) { @@ -206,6 +213,12 @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc, if (*pc & PCALIGN_MASK) { hexagon_raise_exception_err(env, HEX_CAUSE_PC_NOT_ALIGNED, 0); } +#ifndef CONFIG_USER_ONLY + hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX, + cpu_mmu_index(env_cpu(env), false)); +#else + hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX, MMU_USER_IDX); +#endif } typedef HexagonCPU ArchCPU; diff --git a/target/hexagon/hex_mmu.c b/target/hexagon/hex_mmu.c new file mode 100644 index 0000000000000..54c4ba2dbf9a2 --- /dev/null +++ b/target/hexagon/hex_mmu.c @@ -0,0 +1,528 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "qemu/qemu-print.h" +#include "cpu.h" +#include "system/cpus.h" +#include "internal.h" +#include "exec/exec-all.h" +#include "hex_mmu.h" +#include "macros.h" +#include "sys_macros.h" +#include "reg_fields.h" + +#define GET_TLB_FIELD(ENTRY, FIELD) \ + ((uint64_t)fEXTRACTU_BITS(ENTRY, reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset)) + +/* PPD (physical page descriptor) */ +static inline uint64_t GET_PPD(uint64_t entry) +{ + return GET_TLB_FIELD(entry, PTE_PPD) | + (GET_TLB_FIELD(entry, PTE_PA35) << reg_field_info[PTE_PPD].width); +} + +#define NO_ASID (1 << 8) + +typedef enum { + PGSIZE_4K, + PGSIZE_16K, + PGSIZE_64K, + PGSIZE_256K, + PGSIZE_1M, + PGSIZE_4M, + PGSIZE_16M, + PGSIZE_64M, + PGSIZE_256M, + PGSIZE_1G, + NUM_PGSIZE_TYPES +} tlb_pgsize_t; + +static const char *pgsize_str[NUM_PGSIZE_TYPES] = { + "4K", + "16K", + "64K", + "256K", + "1M", + "4M", + "16M", + "64M", + "256M", + "1G", +}; + +#define INVALID_MASK 0xffffffffLL + +static const uint64_t encmask_2_mask[] = { + 0x0fffLL, /* 4k, 0000 */ + 0x3fffLL, /* 16k, 0001 */ + 0xffffLL, /* 64k, 0010 */ + 0x3ffffLL, /* 256k, 0011 */ + 0xfffffLL, /* 1m, 0100 */ + 0x3fffffLL, /* 4m, 0101 */ + 0xffffffLL, /* 16m, 0110 */ + 0x3ffffffLL, /* 64m, 0111 */ + 0xfffffffLL, /* 256m, 1000 */ + 0x3fffffffLL, /* 1g, 1001 */ + INVALID_MASK, /* RSVD, 0111 */ +}; + +/* + * @return the page size type from @a entry. + */ +static inline tlb_pgsize_t hex_tlb_pgsize_type(uint64_t entry) +{ + if (entry == 0) { + qemu_log_mask(CPU_LOG_MMU, "%s: Supplied TLB entry was 0!\n", __func__); + return 0; + } + tlb_pgsize_t size = ctz64(entry); + g_assert(size < NUM_PGSIZE_TYPES); + return size; +} + +/* + * @return the page size of @a entry, in bytes. + */ +static inline uint64_t hex_tlb_page_size_bytes(uint64_t entry) +{ + return 1ull << (TARGET_PAGE_BITS + 2 * hex_tlb_pgsize_type(entry)); +} + +static inline uint64_t hex_tlb_phys_page_num(uint64_t entry) +{ + uint32_t ppd = GET_PPD(entry); + return ppd >> 1; +} + +static inline uint64_t hex_tlb_phys_addr(uint64_t entry) +{ + uint64_t pagemask = encmask_2_mask[hex_tlb_pgsize_type(entry)]; + uint64_t pagenum = hex_tlb_phys_page_num(entry); + uint64_t PA = (pagenum << TARGET_PAGE_BITS) & (~pagemask); + return PA; +} + +static inline uint64_t hex_tlb_virt_addr(uint64_t entry) +{ + return (uint64_t)GET_TLB_FIELD(entry, PTE_VPN) << TARGET_PAGE_BITS; +} + +static bool hex_dump_mmu_entry(FILE *f, uint64_t entry) +{ + if (GET_TLB_FIELD(entry, PTE_V)) { + fprintf(f, "0x%016" PRIx64 ": ", entry); + uint64_t PA = hex_tlb_phys_addr(entry); + uint64_t VA = hex_tlb_virt_addr(entry); + fprintf(f, "V:%" PRId64 " G:%" PRId64 " A1:%" PRId64 " A0:%" PRId64, + GET_TLB_FIELD(entry, PTE_V), GET_TLB_FIELD(entry, PTE_G), + GET_TLB_FIELD(entry, PTE_ATR1), GET_TLB_FIELD(entry, PTE_ATR0)); + fprintf(f, " ASID:0x%02" PRIx64 " VA:0x%08" PRIx64, + GET_TLB_FIELD(entry, PTE_ASID), VA); + fprintf(f, + " X:%" PRId64 " W:%" PRId64 " R:%" PRId64 " U:%" PRId64 + " C:%" PRId64, + GET_TLB_FIELD(entry, PTE_X), GET_TLB_FIELD(entry, PTE_W), + GET_TLB_FIELD(entry, PTE_R), GET_TLB_FIELD(entry, PTE_U), + GET_TLB_FIELD(entry, PTE_C)); + fprintf(f, " PA:0x%09" PRIx64 " SZ:%s (0x%" PRIx64 ")", PA, + pgsize_str[hex_tlb_pgsize_type(entry)], + hex_tlb_page_size_bytes(entry)); + fprintf(f, "\n"); + return true; + } + + /* Not valid */ + return false; +} + +void dump_mmu(CPUHexagonState *env) +{ + int i; + + HexagonCPU *cpu = env_archcpu(env); + for (i = 0; i < cpu->num_tlbs; i++) { + uint64_t entry = env->hex_tlb->entries[i]; + if (GET_TLB_FIELD(entry, PTE_V)) { + qemu_printf("0x%016" PRIx64 ": ", entry); + uint64_t PA = hex_tlb_phys_addr(entry); + uint64_t VA = hex_tlb_virt_addr(entry); + qemu_printf( + "V:%" PRId64 " G:%" PRId64 " A1:%" PRId64 " A0:%" PRId64, + GET_TLB_FIELD(entry, PTE_V), GET_TLB_FIELD(entry, PTE_G), + GET_TLB_FIELD(entry, PTE_ATR1), GET_TLB_FIELD(entry, PTE_ATR0)); + qemu_printf(" ASID:0x%02" PRIx64 " VA:0x%08" PRIx64, + GET_TLB_FIELD(entry, PTE_ASID), VA); + qemu_printf( + " X:%" PRId64 " W:%" PRId64 " R:%" PRId64 " U:%" PRId64 + " C:%" PRId64, + GET_TLB_FIELD(entry, PTE_X), GET_TLB_FIELD(entry, PTE_W), + GET_TLB_FIELD(entry, PTE_R), GET_TLB_FIELD(entry, PTE_U), + GET_TLB_FIELD(entry, PTE_C)); + qemu_printf(" PA:0x%09" PRIx64 " SZ:%s (0x%" PRIx64 ")", PA, + pgsize_str[hex_tlb_pgsize_type(entry)], + hex_tlb_page_size_bytes(entry)); + qemu_printf("\n"); + } + } +} + +static inline void hex_log_tlbw(uint32_t index, uint64_t entry) +{ + if (qemu_loglevel_mask(CPU_LOG_MMU)) { + if (qemu_log_enabled()) { + FILE *logfile = qemu_log_trylock(); + if (logfile) { + fprintf(logfile, "tlbw[%03d]: ", index); + if (!hex_dump_mmu_entry(logfile, entry)) { + fprintf(logfile, "invalid\n"); + } + qemu_log_unlock(logfile); + } + } + } +} + +void hex_tlbw(CPUHexagonState *env, uint32_t index, uint64_t value) +{ + uint32_t myidx = fTLB_NONPOW2WRAP(fTLB_IDXMASK(index)); + bool old_entry_valid = GET_TLB_FIELD(env->hex_tlb->entries[myidx], PTE_V); + if (old_entry_valid && hexagon_cpu_mmu_enabled(env)) { + CPUState *cs = env_cpu(env); + + tlb_flush(cs); + } + env->hex_tlb->entries[myidx] = (value); + hex_log_tlbw(myidx, value); +} + +void hex_mmu_realize(CPUHexagonState *env) +{ + CPUState *cs = env_cpu(env); + if (cs->cpu_index == 0) { + env->hex_tlb = g_malloc0(sizeof(CPUHexagonTLBContext)); + } else { + CPUState *cpu0_s = NULL; + CPUHexagonState *env0 = NULL; + CPU_FOREACH(cpu0_s) { + assert(cpu0_s->cpu_index == 0); + env0 = &(HEXAGON_CPU(cpu0_s)->env); + break; + } + env->hex_tlb = env0->hex_tlb; + } +} + +void hex_mmu_on(CPUHexagonState *env) +{ + CPUState *cs = env_cpu(env); + qemu_log_mask(CPU_LOG_MMU, "Hexagon MMU turned on!\n"); + tlb_flush(cs); +} + +void hex_mmu_off(CPUHexagonState *env) +{ + CPUState *cs = env_cpu(env); + qemu_log_mask(CPU_LOG_MMU, "Hexagon MMU turned off!\n"); + tlb_flush(cs); +} + +void hex_mmu_mode_change(CPUHexagonState *env) +{ + qemu_log_mask(CPU_LOG_MMU, "Hexagon mode change!\n"); + CPUState *cs = env_cpu(env); + tlb_flush(cs); +} + +static inline bool hex_tlb_entry_match_noperm(uint64_t entry, uint32_t asid, + uint64_t VA) +{ + if (GET_TLB_FIELD(entry, PTE_V)) { + if (GET_TLB_FIELD(entry, PTE_G)) { + /* Global entry - ingnore ASID */ + } else if (asid != NO_ASID) { + uint32_t tlb_asid = GET_TLB_FIELD(entry, PTE_ASID); + if (tlb_asid != asid) { + return false; + } + } + + uint64_t page_size = hex_tlb_page_size_bytes(entry); + uint64_t page_start = + ROUND_DOWN(hex_tlb_virt_addr(entry), page_size); + if (page_start <= VA && VA < page_start + page_size) { + return true; + } + } + return false; +} + +static inline void hex_tlb_entry_get_perm(CPUHexagonState *env, uint64_t entry, + MMUAccessType access_type, + int mmu_idx, int *prot, + int32_t *excp) +{ + g_assert_not_reached(); +} + +static inline bool hex_tlb_entry_match(CPUHexagonState *env, uint64_t entry, + uint8_t asid, target_ulong VA, + MMUAccessType access_type, hwaddr *PA, + int *prot, int *size, int32_t *excp, + int mmu_idx) +{ + if (hex_tlb_entry_match_noperm(entry, asid, VA)) { + hex_tlb_entry_get_perm(env, entry, access_type, mmu_idx, prot, excp); + *PA = hex_tlb_phys_addr(entry); + *size = hex_tlb_page_size_bytes(entry); + return true; + } + return false; +} + +bool hex_tlb_find_match(CPUHexagonState *env, target_ulong VA, + MMUAccessType access_type, hwaddr *PA, int *prot, + int *size, int32_t *excp, int mmu_idx) +{ + *PA = 0; + *prot = 0; + *size = 0; + *excp = 0; + uint32_t ssr = arch_get_system_reg(env, HEX_SREG_SSR); + uint8_t asid = GET_SSR_FIELD(SSR_ASID, ssr); + int i; + HexagonCPU *cpu = env_archcpu(env); + for (i = 0; i < cpu->num_tlbs; i++) { + uint64_t entry = env->hex_tlb->entries[i]; + if (hex_tlb_entry_match(env, entry, asid, VA, access_type, PA, prot, + size, excp, mmu_idx)) { + return true; + } + } + return false; +} + +static uint32_t hex_tlb_lookup_by_asid(CPUHexagonState *env, uint32_t asid, + uint32_t VA) +{ + g_assert_not_reached(); +} + +/* Called from tlbp instruction */ +uint32_t hex_tlb_lookup(CPUHexagonState *env, uint32_t ssr, uint32_t VA) +{ + return hex_tlb_lookup_by_asid(env, GET_SSR_FIELD(SSR_ASID, ssr), VA); +} + +static bool hex_tlb_is_match(CPUHexagonState *env, + uint64_t entry1, uint64_t entry2, + bool consider_gbit) +{ + bool valid1 = GET_TLB_FIELD(entry1, PTE_V); + bool valid2 = GET_TLB_FIELD(entry2, PTE_V); + uint64_t size1 = hex_tlb_page_size_bytes(entry1); + uint64_t vaddr1 = ROUND_DOWN(hex_tlb_virt_addr(entry1), size1); + uint64_t size2 = hex_tlb_page_size_bytes(entry2); + uint64_t vaddr2 = ROUND_DOWN(hex_tlb_virt_addr(entry2), size2); + int asid1 = GET_TLB_FIELD(entry1, PTE_ASID); + int asid2 = GET_TLB_FIELD(entry2, PTE_ASID); + bool gbit1 = GET_TLB_FIELD(entry1, PTE_G); + bool gbit2 = GET_TLB_FIELD(entry2, PTE_G); + + if (!valid1 || !valid2) { + return false; + } + + if (((vaddr1 <= vaddr2) && (vaddr2 < (vaddr1 + size1))) || + ((vaddr2 <= vaddr1) && (vaddr1 < (vaddr2 + size2)))) { + if (asid1 == asid2) { + return true; + } + if ((consider_gbit && gbit1) || gbit2) { + return true; + } + } + return false; +} + +/* + * Return codes: + * 0 or positive index of match + * -1 multiple matches + * -2 no match + */ +int hex_tlb_check_overlap(CPUHexagonState *env, uint64_t entry, uint64_t index) +{ + int matches = 0; + int last_match = 0; + int i; + + HexagonCPU *cpu = env_archcpu(env); + for (i = 0; i < cpu->num_tlbs; i++) { + if (hex_tlb_is_match(env, entry, env->hex_tlb->entries[i], false)) { + matches++; + last_match = i; + } + } + + if (matches == 1) { + return last_match; + } + if (matches == 0) { + return -2; + } + return -1; +} + +static inline void print_thread(const char *str, CPUState *cs) +{ + g_assert(bql_locked()); + CPUHexagonState *thread = cpu_env(cs); + bool is_stopped = cpu_is_stopped(cs); + int exe_mode = get_exe_mode(thread); + hex_lock_state_t lock_state = thread->tlb_lock_state; + qemu_log_mask(CPU_LOG_MMU, + "%s: threadId = %d: %s, exe_mode = %s, tlb_lock_state = %s\n", + str, + thread->threadId, + is_stopped ? "stopped" : "running", + exe_mode == HEX_EXE_MODE_OFF ? "off" : + exe_mode == HEX_EXE_MODE_RUN ? "run" : + exe_mode == HEX_EXE_MODE_WAIT ? "wait" : + exe_mode == HEX_EXE_MODE_DEBUG ? "debug" : + "unknown", + lock_state == HEX_LOCK_UNLOCKED ? "unlocked" : + lock_state == HEX_LOCK_WAITING ? "waiting" : + lock_state == HEX_LOCK_OWNER ? "owner" : + "unknown"); +} + +static inline void print_thread_states(const char *str) +{ + CPUState *cs; + CPU_FOREACH(cs) { + print_thread(str, cs); + } +} + +void hex_tlb_lock(CPUHexagonState *env) +{ + qemu_log_mask(CPU_LOG_MMU, "hex_tlb_lock: %d\n", env->threadId); + BQL_LOCK_GUARD(); + g_assert((env->tlb_lock_count == 0) || (env->tlb_lock_count == 1)); + + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + uint8_t tlb_lock = GET_SYSCFG_FIELD(SYSCFG_TLBLOCK, syscfg); + if (tlb_lock) { + if (env->tlb_lock_state == HEX_LOCK_QUEUED) { + env->next_PC += 4; + env->tlb_lock_count++; + env->tlb_lock_state = HEX_LOCK_OWNER; + SET_SYSCFG_FIELD(env, SYSCFG_TLBLOCK, 1); + return; + } + if (env->tlb_lock_state == HEX_LOCK_OWNER) { + qemu_log_mask(CPU_LOG_MMU | LOG_GUEST_ERROR, + "Double tlblock at PC: 0x%x, thread may hang\n", + env->next_PC); + env->next_PC += 4; + CPUState *cs = env_cpu(env); + cpu_interrupt(cs, CPU_INTERRUPT_HALT); + return; + } + env->tlb_lock_state = HEX_LOCK_WAITING; + CPUState *cs = env_cpu(env); + cpu_interrupt(cs, CPU_INTERRUPT_HALT); + } else { + env->next_PC += 4; + env->tlb_lock_count++; + env->tlb_lock_state = HEX_LOCK_OWNER; + SET_SYSCFG_FIELD(env, SYSCFG_TLBLOCK, 1); + } + + if (qemu_loglevel_mask(CPU_LOG_MMU)) { + qemu_log_mask(CPU_LOG_MMU, "Threads after hex_tlb_lock:\n"); + print_thread_states("\tThread"); + } +} + +void hex_tlb_unlock(CPUHexagonState *env) +{ + BQL_LOCK_GUARD(); + g_assert((env->tlb_lock_count == 0) || (env->tlb_lock_count == 1)); + + /* Nothing to do if the TLB isn't locked by this thread */ + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + uint8_t tlb_lock = GET_SYSCFG_FIELD(SYSCFG_TLBLOCK, syscfg); + if ((tlb_lock == 0) || + (env->tlb_lock_state != HEX_LOCK_OWNER)) { + qemu_log_mask(LOG_GUEST_ERROR, + "thread %d attempted to tlbunlock without having the " + "lock, tlb_lock state = %d\n", + env->threadId, env->tlb_lock_state); + g_assert(env->tlb_lock_state != HEX_LOCK_WAITING); + return; + } + + env->tlb_lock_count--; + env->tlb_lock_state = HEX_LOCK_UNLOCKED; + SET_SYSCFG_FIELD(env, SYSCFG_TLBLOCK, 0); + + /* Look for a thread to unlock */ + unsigned int this_threadId = env->threadId; + CPUHexagonState *unlock_thread = NULL; + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *thread = cpu_env(cs); + + /* + * The hardware implements round-robin fairness, so we look for threads + * starting at env->threadId + 1 and incrementing modulo the number of + * threads. + * + * To implement this, we check if thread is a earlier in the modulo + * sequence than unlock_thread. + * if unlock thread is higher than this thread + * thread must be between this thread and unlock_thread + * else + * thread higher than this thread is ahead of unlock_thread + * thread must be lower then unlock thread + */ + if (thread->tlb_lock_state == HEX_LOCK_WAITING) { + if (!unlock_thread) { + unlock_thread = thread; + } else if (unlock_thread->threadId > this_threadId) { + if (this_threadId < thread->threadId && + thread->threadId < unlock_thread->threadId) { + unlock_thread = thread; + } + } else { + if (thread->threadId > this_threadId) { + unlock_thread = thread; + } + if (thread->threadId < unlock_thread->threadId) { + unlock_thread = thread; + } + } + } + } + if (unlock_thread) { + cs = env_cpu(unlock_thread); + print_thread("\tWaiting thread found", cs); + unlock_thread->tlb_lock_state = HEX_LOCK_QUEUED; + SET_SYSCFG_FIELD(unlock_thread, SYSCFG_TLBLOCK, 1); + cpu_interrupt(cs, CPU_INTERRUPT_TLB_UNLOCK); + } + + if (qemu_loglevel_mask(CPU_LOG_MMU)) { + qemu_log_mask(CPU_LOG_MMU, "Threads after hex_tlb_unlock:\n"); + print_thread_states("\tThread"); + } + +} + diff --git a/target/hexagon/hex_mmu.h b/target/hexagon/hex_mmu.h new file mode 100644 index 0000000000000..fae8aefcac1db --- /dev/null +++ b/target/hexagon/hex_mmu.h @@ -0,0 +1,30 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HEXAGON_MMU_H +#define HEXAGON_MMU_H + +#include "max.h" + +struct CPUHexagonTLBContext { + uint64_t entries[MAX_TLB_ENTRIES]; +}; + +extern void hex_tlbw(CPUHexagonState *env, uint32_t index, uint64_t value); +extern uint32_t hex_tlb_lookup(CPUHexagonState *env, uint32_t ssr, uint32_t VA); +extern void hex_mmu_realize(CPUHexagonState *env); +extern void hex_mmu_on(CPUHexagonState *env); +extern void hex_mmu_off(CPUHexagonState *env); +extern void hex_mmu_mode_change(CPUHexagonState *env); +extern bool hex_tlb_find_match(CPUHexagonState *env, target_ulong VA, + MMUAccessType access_type, hwaddr *PA, int *prot, + int *size, int32_t *excp, int mmu_idx); +extern int hex_tlb_check_overlap(CPUHexagonState *env, uint64_t entry, + uint64_t index); +extern void hex_tlb_lock(CPUHexagonState *env); +extern void hex_tlb_unlock(CPUHexagonState *env); +void dump_mmu(CPUHexagonState *env); +#endif diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h index c24c36092161f..120cfde7b9588 100644 --- a/target/hexagon/internal.h +++ b/target/hexagon/internal.h @@ -40,6 +40,9 @@ void G_NORETURN do_raise_exception(CPUHexagonState *env, target_ulong PC, uintptr_t retaddr); +#define hexagon_cpu_mmu_enabled(env) \ + GET_SYSCFG_FIELD(SYSCFG_MMUEN, arch_get_system_reg(env, HEX_SREG_SYSCFG)) + #ifndef CONFIG_USER_ONLY extern const VMStateDescription vmstate_hexagon_cpu; #endif diff --git a/target/hexagon/machine.c b/target/hexagon/machine.c index 9fdafb4573ddf..fcdbacf9fd566 100644 --- a/target/hexagon/machine.c +++ b/target/hexagon/machine.c @@ -7,6 +7,33 @@ #include "qemu/osdep.h" #include "migration/cpu.h" #include "cpu.h" +#include "hex_mmu.h" + +static int get_hex_tlb_ptr(QEMUFile *f, void *pv, size_t size, + const VMStateField *field) +{ + CPUHexagonTLBContext *tlb = pv; + for (int i = 0; i < ARRAY_SIZE(tlb->entries); i++) { + tlb->entries[i] = qemu_get_be64(f); + } + return 0; +} + +static int put_hex_tlb_ptr(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, JSONWriter *vmdesc) +{ + CPUHexagonTLBContext *tlb = pv; + for (int i = 0; i < ARRAY_SIZE(tlb->entries); i++) { + qemu_put_be64(f, tlb->entries[i]); + } + return 0; +} + +const VMStateInfo vmstate_info_hex_tlb_ptr = { + .name = "hex_tlb_pointer", + .get = get_hex_tlb_ptr, + .put = put_hex_tlb_ptr, +}; const VMStateDescription vmstate_hexagon_cpu = { @@ -27,6 +54,9 @@ const VMStateDescription vmstate_hexagon_cpu = { VMSTATE_UINTTL(env.threadId, HexagonCPU), VMSTATE_UINTTL(env.cause_code, HexagonCPU), VMSTATE_UINTTL(env.wait_next_pc, HexagonCPU), + VMSTATE_POINTER(env.hex_tlb, HexagonCPU, 0, + vmstate_info_hex_tlb_ptr, CPUHexagonTLBContext *), + VMSTATE_END_OF_LIST() }, }; diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build index 3ec53010fa020..aa729a3683f15 100644 --- a/target/hexagon/meson.build +++ b/target/hexagon/meson.build @@ -273,7 +273,8 @@ hexagon_ss.add(files( # idef-generated-enabled-instructions # idef_parser_enabled = get_option('hexagon_idef_parser') -if idef_parser_enabled and 'hexagon-linux-user' in target_dirs +if idef_parser_enabled and ('hexagon-linux-user' in target_dirs or + 'hexagon-softmmu' in target_dirs) idef_parser_input_generated = custom_target( 'idef_parser_input.h.inc', output: 'idef_parser_input.h.inc', diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 71c137be308fe..9119e42ff77fa 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -944,7 +944,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, HexagonCPU *hex_cpu = env_archcpu(cpu_env(cs)); uint32_t hex_flags = dcbase->tb->flags; - ctx->mem_idx = MMU_USER_IDX; + ctx->mem_idx = FIELD_EX32(hex_flags, TB_FLAGS, MMU_INDEX); ctx->num_packets = 0; ctx->num_insns = 0; ctx->num_hvx_insns = 0; From 1c2076a53607c2ba0cf7433c5908aaa09c9bb5c9 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 27 Aug 2024 14:23:29 -0700 Subject: [PATCH 038/126] target/hexagon: Add IRQ events Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu_bits.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/target/hexagon/cpu_bits.h b/target/hexagon/cpu_bits.h index 5d26815eb9bcf..b559a7ba88091 100644 --- a/target/hexagon/cpu_bits.h +++ b/target/hexagon/cpu_bits.h @@ -26,6 +26,28 @@ enum hex_event { HEX_EVENT_NONE = -1, HEX_EVENT_TRAP0 = 0x008, + HEX_EVENT_FETCH_NO_UPAGE = 0x012, + HEX_EVENT_INVALID_PACKET = 0x015, + HEX_EVENT_INVALID_OPCODE = 0x015, + HEX_EVENT_PC_NOT_ALIGNED = 0x01e, + HEX_EVENT_PRIV_NO_UREAD = 0x024, + HEX_EVENT_PRIV_NO_UWRITE = 0x025, + HEX_EVENT_INT0 = 0x10, + HEX_EVENT_INT1 = 0x11, + HEX_EVENT_INT2 = 0x12, + HEX_EVENT_INT3 = 0x13, + HEX_EVENT_INT4 = 0x14, + HEX_EVENT_INT5 = 0x15, + HEX_EVENT_INT6 = 0x16, + HEX_EVENT_INT7 = 0x17, + HEX_EVENT_INT8 = 0x18, + HEX_EVENT_INT9 = 0x19, + HEX_EVENT_INTA = 0x1a, + HEX_EVENT_INTB = 0x1b, + HEX_EVENT_INTC = 0x1c, + HEX_EVENT_INTD = 0x1d, + HEX_EVENT_INTE = 0x1e, + HEX_EVENT_INTF = 0x1f, }; enum hex_cause { @@ -39,6 +61,18 @@ enum hex_cause { HEX_CAUSE_PRIV_NO_UWRITE = 0x025, HEX_CAUSE_PRIV_USER_NO_GINSN = 0x01a, HEX_CAUSE_PRIV_USER_NO_SINSN = 0x01b, + HEX_CAUSE_INT0 = 0x0c0, + HEX_CAUSE_INT1 = 0x0c1, + HEX_CAUSE_INT2 = 0x0c2, + HEX_CAUSE_INT3 = 0x0c3, + HEX_CAUSE_INT4 = 0x0c4, + HEX_CAUSE_INT5 = 0x0c5, + HEX_CAUSE_INT6 = 0x0c6, + HEX_CAUSE_INT7 = 0x0c7, + HEX_CAUSE_VIC0 = 0x0c2, + HEX_CAUSE_VIC1 = 0x0c3, + HEX_CAUSE_VIC2 = 0x0c4, + HEX_CAUSE_VIC3 = 0x0c5, }; enum data_cache_state { From 0423a9551be0d9b1e1b201c2fa5df943e5506fe3 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 27 Aug 2024 14:23:48 -0700 Subject: [PATCH 039/126] target/hexagon: Add clear_wait_mode() definition Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu_helper.c | 10 ++++++++++ target/hexagon/cpu_helper.h | 1 + 2 files changed, 11 insertions(+) diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index 9f4fc716e33c6..9373e491d6f04 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -74,6 +74,16 @@ void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t old) g_assert_not_reached(); } +void clear_wait_mode(CPUHexagonState *env) +{ + g_assert(bql_locked()); + + const uint32_t modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + uint32_t thread_wait_mask = GET_FIELD(MODECTL_W, modectl); + thread_wait_mask &= ~(0x1 << env->threadId); + SET_SYSTEM_FIELD(env, HEX_SREG_MODECTL, MODECTL_W, thread_wait_mask); +} + int get_exe_mode(CPUHexagonState *env) { g_assert_not_reached(); diff --git a/target/hexagon/cpu_helper.h b/target/hexagon/cpu_helper.h index e0c0c037a6eaf..6f0c6697ad13a 100644 --- a/target/hexagon/cpu_helper.h +++ b/target/hexagon/cpu_helper.h @@ -16,6 +16,7 @@ void hexagon_set_sys_pcycle_count_low(CPUHexagonState *env, uint32_t); void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env, uint32_t); void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t old); int get_exe_mode(CPUHexagonState *env); +void clear_wait_mode(CPUHexagonState *env); static inline void arch_set_thread_reg(CPUHexagonState *env, uint32_t reg, uint32_t val) From 113b9e24b76eebec39ff29857ae976c7c1ac70c0 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 27 Aug 2024 14:24:13 -0700 Subject: [PATCH 040/126] target/hexagon: Define f{S,G}ET_FIELD macros Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/macros.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index b0e9610d98d55..afbbe8e265241 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -649,6 +649,16 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) reg_field_info[FIELD].width, \ reg_field_info[FIELD].offset) +#define fGET_FIELD(VAL, FIELD) \ + fEXTRACTU_BITS(VAL, \ + reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset) +#define fSET_FIELD(VAL, FIELD, NEWVAL) \ + fINSERT_BITS(VAL, \ + reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset, \ + (NEWVAL)) + #ifdef QEMU_GENERATE #define fDCZEROA(REG) \ do { \ From f19a928adf2df8036d7b4a38253ad4d2752510a8 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 27 Aug 2024 14:25:26 -0700 Subject: [PATCH 041/126] target/hexagon: Add hex_interrupts support Co-authored-by: Taylor Simpson <ltaylorsimpson@gmail.com> Co-authored-by: Sid Manning <sidneym@quicinc.com> Co-authored-by: Michael Lambert <mlambert@quicinc.com> Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 2 + target/hexagon/cpu.h | 1 + target/hexagon/hex_interrupts.c | 324 ++++++++++++++++++++++++++++++++ target/hexagon/hex_interrupts.h | 15 ++ 4 files changed, 342 insertions(+) create mode 100644 target/hexagon/hex_interrupts.c create mode 100644 target/hexagon/hex_interrupts.h diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 5304052acf2c2..87042492cab41 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -59,6 +59,8 @@ static ObjectClass *hexagon_cpu_class_by_name(const char *cpu_model) static const Property hexagon_cpu_properties[] = { #if !defined(CONFIG_USER_ONLY) DEFINE_PROP_UINT32("jtlb-entries", HexagonCPU, num_tlbs, MAX_TLB_ENTRIES), + DEFINE_PROP_UINT32("l2vic-base-addr", HexagonCPU, l2vic_base_addr, + 0xffffffffULL), #endif DEFINE_PROP_BOOL("lldb-compat", HexagonCPU, lldb_compat, false), DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust, 0, diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 0811ca00a8aef..9a93b53336296 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -181,6 +181,7 @@ struct ArchCPU { bool short_circuit; #ifndef CONFIG_USER_ONLY uint32_t num_tlbs; + uint32_t l2vic_base_addr; #endif }; diff --git a/target/hexagon/hex_interrupts.c b/target/hexagon/hex_interrupts.c new file mode 100644 index 0000000000000..fd00bcfb9a573 --- /dev/null +++ b/target/hexagon/hex_interrupts.c @@ -0,0 +1,324 @@ +/* + * Copyright(c) 2022-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" +#include "cpu.h" +#include "hex_interrupts.h" +#include "macros.h" +#include "sys_macros.h" +#include "system/cpus.h" + +static bool hex_is_qualified_for_int(CPUHexagonState *env, int int_num); + +static bool get_syscfg_gie(CPUHexagonState *env) +{ + target_ulong syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + return GET_SYSCFG_FIELD(SYSCFG_GIE, syscfg); +} + +static bool get_ssr_ex(CPUHexagonState *env) +{ + target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR); + return GET_SSR_FIELD(SSR_EX, ssr); +} + +static bool get_ssr_ie(CPUHexagonState *env) +{ + target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR); + return GET_SSR_FIELD(SSR_IE, ssr); +} + +/* Do these together so we only have to call hexagon_modify_ssr once */ +static void set_ssr_ex_cause(CPUHexagonState *env, int ex, uint32_t cause) +{ + target_ulong old = arch_get_system_reg(env, HEX_SREG_SSR); + SET_SYSTEM_FIELD(env, HEX_SREG_SSR, SSR_EX, ex); + SET_SYSTEM_FIELD(env, HEX_SREG_SSR, SSR_CAUSE, cause); + target_ulong new = arch_get_system_reg(env, HEX_SREG_SSR); + hexagon_modify_ssr(env, new, old); +} + +static bool get_iad_bit(CPUHexagonState *env, int int_num) +{ + target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD); + target_ulong iad = GET_FIELD(IPENDAD_IAD, ipendad); + return extract32(iad, int_num, 1); +} + +static void set_iad_bit(CPUHexagonState *env, int int_num, int val) +{ + target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD); + target_ulong iad = GET_FIELD(IPENDAD_IAD, ipendad); + iad = deposit32(iad, int_num, 1, val); + fSET_FIELD(ipendad, IPENDAD_IAD, iad); + arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad); +} + +static uint32_t get_ipend(CPUHexagonState *env) +{ + target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD); + return GET_FIELD(IPENDAD_IPEND, ipendad); +} + +static inline bool get_ipend_bit(CPUHexagonState *env, int int_num) +{ + target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD); + target_ulong ipend = GET_FIELD(IPENDAD_IPEND, ipendad); + return extract32(ipend, int_num, 1); +} + +static void clear_ipend(CPUHexagonState *env, uint32_t mask) +{ + target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD); + target_ulong ipend = GET_FIELD(IPENDAD_IPEND, ipendad); + ipend &= ~mask; + fSET_FIELD(ipendad, IPENDAD_IPEND, ipend); + arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad); +} + +static void set_ipend(CPUHexagonState *env, uint32_t mask) +{ + target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD); + target_ulong ipend = GET_FIELD(IPENDAD_IPEND, ipendad); + ipend |= mask; + fSET_FIELD(ipendad, IPENDAD_IPEND, ipend); + arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad); +} + +static void set_ipend_bit(CPUHexagonState *env, int int_num, int val) +{ + target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD); + target_ulong ipend = GET_FIELD(IPENDAD_IPEND, ipendad); + ipend = deposit32(ipend, int_num, 1, val); + fSET_FIELD(ipendad, IPENDAD_IPEND, ipend); + arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad); +} + +static bool get_imask_bit(CPUHexagonState *env, int int_num) +{ + target_ulong imask = arch_get_system_reg(env, HEX_SREG_IMASK); + return extract32(imask, int_num, 1); +} + +static uint32_t get_prio(CPUHexagonState *env) +{ + target_ulong stid = arch_get_system_reg(env, HEX_SREG_STID); + return extract32(stid, reg_field_info[STID_PRIO].offset, + reg_field_info[STID_PRIO].width); +} + +static void set_elr(CPUHexagonState *env, target_ulong val) +{ + arch_set_system_reg(env, HEX_SREG_ELR, val); +} + +static bool get_schedcfgen(CPUHexagonState *env) +{ + target_ulong schedcfg = arch_get_system_reg(env, HEX_SREG_SCHEDCFG); + return extract32(schedcfg, reg_field_info[SCHEDCFG_EN].offset, + reg_field_info[SCHEDCFG_EN].width); +} + +static bool is_lowest_prio(CPUHexagonState *env, int int_num) +{ + uint32_t my_prio = get_prio(env); + CPUState *cs; + + CPU_FOREACH(cs) { + CPUHexagonState *hex_env = cpu_env(cs); + if (!hex_is_qualified_for_int(hex_env, int_num)) { + continue; + } + + /* Note that lower values indicate *higher* priority */ + if (my_prio < get_prio(hex_env)) { + return false; + } + } + return true; +} + +static bool hex_is_qualified_for_int(CPUHexagonState *env, int int_num) +{ + bool syscfg_gie = get_syscfg_gie(env); + bool iad = get_iad_bit(env, int_num); + bool ssr_ie = get_ssr_ie(env); + bool ssr_ex = get_ssr_ex(env); + bool imask = get_imask_bit(env, int_num); + + return syscfg_gie && !iad && ssr_ie && !ssr_ex && !imask; +} + +static void clear_pending_locks(CPUHexagonState *env) +{ + g_assert(bql_locked()); + if (env->k0_lock_state == HEX_LOCK_WAITING) { + env->k0_lock_state = HEX_LOCK_UNLOCKED; + } + if (env->tlb_lock_state == HEX_LOCK_WAITING) { + env->tlb_lock_state = HEX_LOCK_UNLOCKED; + } +} + +static bool should_not_exec(CPUHexagonState *env) +{ + return (get_exe_mode(env) == HEX_EXE_MODE_WAIT); +} + +static void restore_state(CPUHexagonState *env, bool int_accepted) +{ + CPUState *cs = env_cpu(env); + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_SWI); + if (!int_accepted && should_not_exec(env)) { + cpu_interrupt(cs, CPU_INTERRUPT_HALT); + } +} + +static void hex_accept_int(CPUHexagonState *env, int int_num) +{ + CPUState *cs = env_cpu(env); + target_ulong evb = arch_get_system_reg(env, HEX_SREG_EVB); + const int exe_mode = get_exe_mode(env); + const bool in_wait_mode = exe_mode == HEX_EXE_MODE_WAIT; + + set_ipend_bit(env, int_num, 0); + set_iad_bit(env, int_num, 1); + set_ssr_ex_cause(env, 1, HEX_CAUSE_INT0 | int_num); + cs->exception_index = HEX_EVENT_INT0 + int_num; + env->cause_code = HEX_EVENT_INT0 + int_num; + clear_pending_locks(env); + if (in_wait_mode) { + qemu_log_mask(CPU_LOG_INT, + "%s: thread %d resuming, exiting WAIT mode\n", + __func__, env->threadId); + set_elr(env, env->wait_next_pc); + clear_wait_mode(env); + cs->halted = false; + } else if (env->k0_lock_state == HEX_LOCK_WAITING) { + g_assert_not_reached(); + } else { + set_elr(env, env->gpr[HEX_REG_PC]); + } + env->gpr[HEX_REG_PC] = evb | (cs->exception_index << 2); + if (get_ipend(env) == 0) { + restore_state(env, true); + } +} + + +bool hex_check_interrupts(CPUHexagonState *env) +{ + CPUState *cs = env_cpu(env); + bool int_handled = false; + bool ssr_ex = get_ssr_ex(env); + int max_ints = 32; + bool schedcfgen; + + /* Early exit if nothing pending */ + if (get_ipend(env) == 0) { + restore_state(env, false); + return false; + } + + BQL_LOCK_GUARD(); + /* Only check priorities when schedcfgen is set */ + schedcfgen = get_schedcfgen(env); + for (int i = 0; i < max_ints; i++) { + if (!get_iad_bit(env, i) && get_ipend_bit(env, i)) { + qemu_log_mask(CPU_LOG_INT, + "%s: thread[%d] pc = 0x%x found int %d\n", __func__, + env->threadId, env->gpr[HEX_REG_PC], i); + if (hex_is_qualified_for_int(env, i) && + (!schedcfgen || is_lowest_prio(env, i))) { + qemu_log_mask(CPU_LOG_INT, "%s: thread[%d] int %d handled_\n", + __func__, env->threadId, i); + hex_accept_int(env, i); + int_handled = true; + break; + } + bool syscfg_gie = get_syscfg_gie(env); + bool iad = get_iad_bit(env, i); + bool ssr_ie = get_ssr_ie(env); + bool imask = get_imask_bit(env, i); + + qemu_log_mask(CPU_LOG_INT, + "%s: thread[%d] int %d not handled, qualified: %d, " + "schedcfg_en: %d, low prio %d\n", + __func__, env->threadId, i, + hex_is_qualified_for_int(env, i), schedcfgen, + is_lowest_prio(env, i)); + + qemu_log_mask(CPU_LOG_INT, + "%s: thread[%d] int %d not handled, GIE %d, iad %d, " + "SSR:IE %d, SSR:EX: %d, imask bit %d\n", + __func__, env->threadId, i, syscfg_gie, iad, ssr_ie, + ssr_ex, imask); + } + } + + /* + * If we didn't handle the interrupt and it wasn't + * because we were in EX state, then we won't be able + * to execute the interrupt on this CPU unless something + * changes in the CPU state. Clear the interrupt_request bits + * while preserving the IPEND bits, and we can re-assert the + * interrupt_request bit(s) when we execute one of those instructions. + */ + if (!int_handled && !ssr_ex) { + restore_state(env, int_handled); + } else if (int_handled) { + assert(!cs->halted); + } + + return int_handled; +} + +void hex_clear_interrupts(CPUHexagonState *env, uint32_t mask, uint32_t type) +{ + if (mask == 0) { + return; + } + + /* + * Notify all CPUs that the interrupt has happened + */ + BQL_LOCK_GUARD(); + clear_ipend(env, mask); + hex_interrupt_update(env); +} + +void hex_raise_interrupts(CPUHexagonState *env, uint32_t mask, uint32_t type) +{ + g_assert(bql_locked()); + if (mask == 0) { + return; + } + + /* + * Notify all CPUs that the interrupt has happened + */ + set_ipend(env, mask); + hex_interrupt_update(env); +} + +void hex_interrupt_update(CPUHexagonState *env) +{ + CPUState *cs; + + g_assert(bql_locked()); + if (get_ipend(env) != 0) { + CPU_FOREACH(cs) { + CPUHexagonState *hex_env = cpu_env(cs); + const int exe_mode = get_exe_mode(hex_env); + if (exe_mode != HEX_EXE_MODE_OFF) { + cs->interrupt_request |= CPU_INTERRUPT_SWI; + cpu_resume(cs); + } + } + } +} diff --git a/target/hexagon/hex_interrupts.h b/target/hexagon/hex_interrupts.h new file mode 100644 index 0000000000000..17a243946ce2f --- /dev/null +++ b/target/hexagon/hex_interrupts.h @@ -0,0 +1,15 @@ +/* + * Copyright(c) 2022-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HEX_INTERRUPTS_H +#define HEX_INTERRUPTS_H + +bool hex_check_interrupts(CPUHexagonState *env); +void hex_clear_interrupts(CPUHexagonState *env, uint32_t mask, uint32_t type); +void hex_raise_interrupts(CPUHexagonState *env, uint32_t mask, uint32_t type); +void hex_interrupt_update(CPUHexagonState *env); + +#endif From 240f05781f79b6093d954e5f46a7f55b470cf6d7 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 27 Aug 2024 19:16:02 -0700 Subject: [PATCH 042/126] target/hexagon: Implement ciad helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ciad is the clear interrupt auto disable instruction. This instruction is defined in the Qualcomm Hexagon V71 Programmer's Reference Manual - https://docs.qualcomm.com/bundle/publicresource/80-N2040-51_REV_AB_Hexagon_V71_ProgrammerS_Reference_Manual.pdf See §11.9.2 SYSTEM MONITOR. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/op_helper.c | 39 ++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index fd9caafefc49f..b28a18adf6182 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -34,6 +34,11 @@ #include "op_helper.h" #include "cpu_helper.h" #include "translate.h" +#ifndef CONFIG_USER_ONLY +#include "hex_mmu.h" +#include "hw/intc/l2vic.h" +#include "hex_interrupts.h" +#endif #define SF_BIAS 127 #define SF_MANTBITS 23 @@ -1338,9 +1343,36 @@ void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV) } #ifndef CONFIG_USER_ONLY +static void hexagon_set_vid(CPUHexagonState *env, uint32_t offset, int val) +{ + g_assert((offset == L2VIC_VID_0) || (offset == L2VIC_VID_1)); + CPUState *cs = env_cpu(env); + HexagonCPU *cpu = HEXAGON_CPU(cs); + const hwaddr pend_mem = cpu->l2vic_base_addr + offset; + cpu_physical_memory_write(pend_mem, &val, sizeof(val)); +} + +static void hexagon_clear_last_irq(CPUHexagonState *env, uint32_t offset) +{ + /* + * currently only l2vic is the only attached it uses vid0, remove + * the assert below if anther is added + */ + hexagon_set_vid(env, offset, L2VIC_CIAD_INSTRUCTION); +} + void HELPER(ciad)(CPUHexagonState *env, uint32_t mask) { - g_assert_not_reached(); + uint32_t ipendad; + uint32_t iad; + + BQL_LOCK_GUARD(); + ipendad = READ_SREG(HEX_SREG_IPENDAD); + iad = fGET_FIELD(ipendad, IPENDAD_IAD); + fSET_FIELD(ipendad, IPENDAD_IAD, iad & ~(mask)); + arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad); + hexagon_clear_last_irq(env, L2VIC_VID_0); + hex_interrupt_update(env); } void HELPER(siad)(CPUHexagonState *env, uint32_t mask) @@ -1416,11 +1448,6 @@ static void modify_syscfg(CPUHexagonState *env, uint32_t val) g_assert_not_reached(); } -static void hexagon_set_vid(CPUHexagonState *env, uint32_t offset, int val) -{ - g_assert_not_reached(); -} - static uint32_t hexagon_find_last_irq(CPUHexagonState *env, uint32_t vid) { g_assert_not_reached(); From 5395f01a99eca5170518a4a979ea038a5252a7bf Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 27 Aug 2024 19:19:43 -0700 Subject: [PATCH 043/126] target/hexagon: Implement {c,}swi helpers {c,}swi are the "software interrupt"/"Cancel pending interrupts" instructions. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/op_helper.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index b28a18adf6182..fed5cc2715cb8 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1382,12 +1382,14 @@ void HELPER(siad)(CPUHexagonState *env, uint32_t mask) void HELPER(swi)(CPUHexagonState *env, uint32_t mask) { - g_assert_not_reached(); + BQL_LOCK_GUARD(); + hex_raise_interrupts(env, mask, CPU_INTERRUPT_SWI); } void HELPER(cswi)(CPUHexagonState *env, uint32_t mask) { - g_assert_not_reached(); + BQL_LOCK_GUARD(); + hex_clear_interrupts(env, mask, CPU_INTERRUPT_SWI); } void HELPER(iassignw)(CPUHexagonState *env, uint32_t src) From aa88ca198e74f245fdb98cf760d93275210ab9d1 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 27 Aug 2024 19:22:54 -0700 Subject: [PATCH 044/126] target/hexagon: Implement iassign{r,w} helpers iassign{r,w} are the "Interrupt to thread assignment {read,write}" instructions. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/op_helper.c | 48 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index fed5cc2715cb8..ded6c80d62373 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1394,12 +1394,56 @@ void HELPER(cswi)(CPUHexagonState *env, uint32_t mask) void HELPER(iassignw)(CPUHexagonState *env, uint32_t src) { - g_assert_not_reached(); + uint32_t modectl; + uint32_t thread_enabled_mask; + CPUState *cpu; + + BQL_LOCK_GUARD(); + modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + thread_enabled_mask = GET_FIELD(MODECTL_E, modectl); + + CPU_FOREACH(cpu) { + CPUHexagonState *thread_env = &(HEXAGON_CPU(cpu)->env); + uint32_t thread_id_mask = 0x1 << thread_env->threadId; + if (thread_enabled_mask & thread_id_mask) { + uint32_t imask = arch_get_system_reg(thread_env, HEX_SREG_IMASK); + uint32_t intbitpos = (src >> 16) & 0xF; + uint32_t val = (src >> thread_env->threadId) & 0x1; + imask = deposit32(imask, intbitpos, 1, val); + arch_set_system_reg(thread_env, HEX_SREG_IMASK, imask); + + qemu_log_mask(CPU_LOG_INT, "%s: thread " TARGET_FMT_ld + ", new imask 0x%" PRIx32 "\n", __func__, + thread_env->threadId, imask); + } + } + hex_interrupt_update(env); } uint32_t HELPER(iassignr)(CPUHexagonState *env, uint32_t src) { - g_assert_not_reached(); + uint32_t modectl; + uint32_t thread_enabled_mask; + uint32_t intbitpos; + uint32_t dest_reg; + CPUState *cpu; + + BQL_LOCK_GUARD(); + modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + thread_enabled_mask = GET_FIELD(MODECTL_E, modectl); + /* src fields are in same position as modectl, but mean different things */ + intbitpos = GET_FIELD(MODECTL_W, src); + dest_reg = 0; + CPU_FOREACH(cpu) { + CPUHexagonState *thread_env = &(HEXAGON_CPU(cpu)->env); + uint32_t thread_id_mask = 0x1 << thread_env->threadId; + if (thread_enabled_mask & thread_id_mask) { + uint32_t imask = arch_get_system_reg(thread_env, HEX_SREG_IMASK); + dest_reg |= ((imask >> intbitpos) & 0x1) << thread_env->threadId; + } + } + + return dest_reg; } void HELPER(start)(CPUHexagonState *env, uint32_t imask) From 84162701bcc3c18827aa1b6578621d5c0d3e9bd4 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 27 Aug 2024 20:39:01 -0700 Subject: [PATCH 045/126] target/hexagon: Implement start/stop helpers Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 14 +++++- target/hexagon/cpu.h | 3 ++ target/hexagon/cpu_bits.h | 1 + target/hexagon/cpu_helper.c | 94 +++++++++++++++++++++++++++++++++++++ target/hexagon/cpu_helper.h | 3 ++ target/hexagon/op_helper.c | 4 +- 6 files changed, 116 insertions(+), 3 deletions(-) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 87042492cab41..20387dcf72715 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -32,6 +32,7 @@ #ifndef CONFIG_USER_ONLY #include "sys_macros.h" +#include "qemu/main-loop.h" #endif static void hexagon_v66_cpu_init(Object *obj) { } @@ -61,6 +62,7 @@ static const Property hexagon_cpu_properties[] = { DEFINE_PROP_UINT32("jtlb-entries", HexagonCPU, num_tlbs, MAX_TLB_ENTRIES), DEFINE_PROP_UINT32("l2vic-base-addr", HexagonCPU, l2vic_base_addr, 0xffffffffULL), + DEFINE_PROP_UINT32("hvx-contexts", HexagonCPU, hvx_contexts, 0), #endif DEFINE_PROP_BOOL("lldb-compat", HexagonCPU, lldb_compat, false), DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust, 0, @@ -294,8 +296,17 @@ static void mmu_reset(CPUHexagonState *env) memset(env->hex_tlb, 0, sizeof(*env->hex_tlb)); } } -#endif +void hexagon_cpu_soft_reset(CPUHexagonState *env) +{ + BQL_LOCK_GUARD(); + arch_set_system_reg(env, HEX_SREG_SSR, 0); + hexagon_ssr_set_cause(env, HEX_CAUSE_RESET); + + target_ulong evb = arch_get_system_reg(env, HEX_SREG_EVB); + arch_set_thread_reg(env, HEX_REG_PC, evb); +} +#endif static void hexagon_cpu_reset_hold(Object *obj, ResetType type) { @@ -326,6 +337,7 @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type) } mmu_reset(env); arch_set_system_reg(env, HEX_SREG_HTID, cs->cpu_index); + hexagon_cpu_soft_reset(env); memset(env->t_sreg, 0, sizeof(target_ulong) * NUM_SREGS); memset(env->greg, 0, sizeof(target_ulong) * NUM_GREGS); env->threadId = cs->cpu_index; diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 9a93b53336296..0f30b2791f2bb 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -45,6 +45,7 @@ typedef struct CPUHexagonTLBContext CPUHexagonTLBContext; #define REG_WRITES_MAX 32 #define PRED_WRITES_MAX 5 /* 4 insns + endloop */ #define VSTORES_MAX 2 +#define VECTOR_UNIT_MAX 8 #ifndef CONFIG_USER_ONLY #define CPU_INTERRUPT_SWI CPU_INTERRUPT_TGT_INT_0 @@ -182,6 +183,7 @@ struct ArchCPU { #ifndef CONFIG_USER_ONLY uint32_t num_tlbs; uint32_t l2vic_base_addr; + uint32_t hvx_contexts; #endif }; @@ -198,6 +200,7 @@ G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env, uint32_t hexagon_greg_read(CPUHexagonState *env, uint32_t reg); uint32_t hexagon_sreg_read(CPUHexagonState *env, uint32_t reg); void hexagon_gdb_sreg_write(CPUHexagonState *env, uint32_t reg, uint32_t val); +void hexagon_cpu_soft_reset(CPUHexagonState *env); #endif #include "exec/cpu-all.h" diff --git a/target/hexagon/cpu_bits.h b/target/hexagon/cpu_bits.h index b559a7ba88091..610094a759c57 100644 --- a/target/hexagon/cpu_bits.h +++ b/target/hexagon/cpu_bits.h @@ -52,6 +52,7 @@ enum hex_event { enum hex_cause { HEX_CAUSE_NONE = -1, + HEX_CAUSE_RESET = 0x000, HEX_CAUSE_TRAP0 = 0x172, HEX_CAUSE_FETCH_NO_UPAGE = 0x012, HEX_CAUSE_INVALID_PACKET = 0x015, diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index 9373e491d6f04..e151c6335a6cc 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -84,8 +84,102 @@ void clear_wait_mode(CPUHexagonState *env) SET_SYSTEM_FIELD(env, HEX_SREG_MODECTL, MODECTL_W, thread_wait_mask); } +void hexagon_ssr_set_cause(CPUHexagonState *env, uint32_t cause) +{ + g_assert(bql_locked()); + + const uint32_t old = arch_get_system_reg(env, HEX_SREG_SSR); + SET_SYSTEM_FIELD(env, HEX_SREG_SSR, SSR_EX, 1); + SET_SYSTEM_FIELD(env, HEX_SREG_SSR, SSR_CAUSE, cause); + const uint32_t new = arch_get_system_reg(env, HEX_SREG_SSR); + + hexagon_modify_ssr(env, new, old); +} + + int get_exe_mode(CPUHexagonState *env) { g_assert_not_reached(); } + +static void set_enable_mask(CPUHexagonState *env) +{ + g_assert(bql_locked()); + + const uint32_t modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + uint32_t thread_enabled_mask = GET_FIELD(MODECTL_E, modectl); + thread_enabled_mask |= 0x1 << env->threadId; + SET_SYSTEM_FIELD(env, HEX_SREG_MODECTL, MODECTL_E, thread_enabled_mask); +} + +static uint32_t clear_enable_mask(CPUHexagonState *env) +{ + g_assert(bql_locked()); + + const uint32_t modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + uint32_t thread_enabled_mask = GET_FIELD(MODECTL_E, modectl); + thread_enabled_mask &= ~(0x1 << env->threadId); + SET_SYSTEM_FIELD(env, HEX_SREG_MODECTL, MODECTL_E, thread_enabled_mask); + return thread_enabled_mask; +} +static void do_start_thread(CPUState *cs, run_on_cpu_data tbd) +{ + BQL_LOCK_GUARD(); + + CPUHexagonState *env = cpu_env(cs); + + hexagon_cpu_soft_reset(env); + + set_enable_mask(env); + + cs->halted = 0; + cs->exception_index = HEX_EVENT_NONE; + cpu_resume(cs); +} + +void hexagon_start_threads(CPUHexagonState *current_env, uint32_t mask) +{ + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *env = cpu_env(cs); + if (!(mask & (0x1 << env->threadId))) { + continue; + } + + if (current_env->threadId != env->threadId) { + async_safe_run_on_cpu(cs, do_start_thread, RUN_ON_CPU_NULL); + } + } +} + +/* + * When we have all threads stopped, the return + * value to the shell is register 2 from thread 0. + */ +static target_ulong get_thread0_r2(void) +{ + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *thread = cpu_env(cs); + if (thread->threadId == 0) { + return thread->gpr[2]; + } + } + g_assert_not_reached(); +} + +void hexagon_stop_thread(CPUHexagonState *env) + +{ + BQL_LOCK_GUARD(); + + uint32_t thread_enabled_mask = clear_enable_mask(env); + CPUState *cs = env_cpu(env); + cpu_interrupt(cs, CPU_INTERRUPT_HALT); + if (!thread_enabled_mask) { + /* All threads are stopped, exit */ + exit(get_thread0_r2()); + } +} + #endif diff --git a/target/hexagon/cpu_helper.h b/target/hexagon/cpu_helper.h index 6f0c6697ad13a..95a0cc0788684 100644 --- a/target/hexagon/cpu_helper.h +++ b/target/hexagon/cpu_helper.h @@ -17,6 +17,9 @@ void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env, uint32_t); void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t old); int get_exe_mode(CPUHexagonState *env); void clear_wait_mode(CPUHexagonState *env); +void hexagon_ssr_set_cause(CPUHexagonState *env, uint32_t cause); +void hexagon_start_threads(CPUHexagonState *env, uint32_t mask); +void hexagon_stop_thread(CPUHexagonState *env); static inline void arch_set_thread_reg(CPUHexagonState *env, uint32_t reg, uint32_t val) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index ded6c80d62373..9f79b1a20c6cf 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1448,12 +1448,12 @@ uint32_t HELPER(iassignr)(CPUHexagonState *env, uint32_t src) void HELPER(start)(CPUHexagonState *env, uint32_t imask) { - g_assert_not_reached(); + hexagon_start_threads(env, imask); } void HELPER(stop)(CPUHexagonState *env) { - g_assert_not_reached(); + hexagon_stop_thread(env); } void HELPER(wait)(CPUHexagonState *env, target_ulong PC) From 6043e6602e9917e9c7eafb6485d74bd9a758b7b8 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 27 Aug 2024 21:09:08 -0700 Subject: [PATCH 046/126] target/hexagon: Implement modify SSR The per-vCPU System Status Register controls many modal behaviors of the system architecture. When the SSR is updated, we trigger the necessary effects for interrupts, privilege/MMU, and HVX context mapping. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu_helper.c | 100 +++++++++++++++++++++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index e151c6335a6cc..3e2364a7b0787 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -14,6 +14,8 @@ #else #include "hw/boards.h" #include "hw/hexagon/hexagon.h" +#include "hex_interrupts.h" +#include "hex_mmu.h" #endif #include "exec/exec-all.h" #include "exec/cpu_ldst.h" @@ -69,9 +71,105 @@ void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t cycles) g_assert_not_reached(); } +static MMVector VRegs[VECTOR_UNIT_MAX][NUM_VREGS]; +static MMQReg QRegs[VECTOR_UNIT_MAX][NUM_QREGS]; + +/* + * EXT_CONTEXTS + * SSR.XA 2 4 6 8 + * 000 HVX Context 0 HVX Context 0 HVX Context 0 HVX Context 0 + * 001 HVX Context 1 HVX Context 1 HVX Context 1 HVX Context 1 + * 010 HVX Context 0 HVX Context 2 HVX Context 2 HVX Context 2 + * 011 HVX Context 1 HVX Context 3 HVX Context 3 HVX Context 3 + * 100 HVX Context 0 HVX Context 0 HVX Context 4 HVX Context 4 + * 101 HVX Context 1 HVX Context 1 HVX Context 5 HVX Context 5 + * 110 HVX Context 0 HVX Context 2 HVX Context 2 HVX Context 6 + * 111 HVX Context 1 HVX Context 3 HVX Context 3 HVX Context 7 + */ +static int parse_context_idx(CPUHexagonState *env, uint8_t XA) +{ + int ret; + HexagonCPU *cpu = env_archcpu(env); + if (cpu->hvx_contexts == 6 && XA >= 6) { + ret = XA - 6 + 2; + } else { + ret = XA % cpu->hvx_contexts; + } + g_assert(ret >= 0 && ret < VECTOR_UNIT_MAX); + return ret; +} + +static void check_overcommitted_hvx(CPUHexagonState *env, uint32_t ssr) +{ + if (!GET_FIELD(SSR_XE, ssr)) { + return; + } + + uint8_t XA = GET_SSR_FIELD(SSR_XA, ssr); + + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *env_ = cpu_env(cs); + if (env_ == env) { + continue; + } + /* Check if another thread has the XE bit set and same XA */ + uint32_t ssr_ = arch_get_system_reg(env_, HEX_SREG_SSR); + if (GET_SSR_FIELD(SSR_XE2, ssr_) && GET_FIELD(SSR_XA, ssr_) == XA) { + qemu_log_mask(LOG_GUEST_ERROR, + "setting SSR.XA '%d' on thread %d but thread" + " %d has same extension active\n", XA, env->threadId, + env_->threadId); + } + } +} + void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t old) { - g_assert_not_reached(); + g_assert(bql_locked()); + + bool old_EX = GET_SSR_FIELD(SSR_EX, old); + bool old_UM = GET_SSR_FIELD(SSR_UM, old); + bool old_GM = GET_SSR_FIELD(SSR_GM, old); + bool old_IE = GET_SSR_FIELD(SSR_IE, old); + uint8_t old_XA = GET_SSR_FIELD(SSR_XA, old); + bool new_EX = GET_SSR_FIELD(SSR_EX, new); + bool new_UM = GET_SSR_FIELD(SSR_UM, new); + bool new_GM = GET_SSR_FIELD(SSR_GM, new); + bool new_IE = GET_SSR_FIELD(SSR_IE, new); + uint8_t new_XA = GET_SSR_FIELD(SSR_XA, new); + + if ((old_EX != new_EX) || + (old_UM != new_UM) || + (old_GM != new_GM)) { + hex_mmu_mode_change(env); + } + + uint8_t old_asid = GET_SSR_FIELD(SSR_ASID, old); + uint8_t new_asid = GET_SSR_FIELD(SSR_ASID, new); + if (new_asid != old_asid) { + CPUState *cs = env_cpu(env); + tlb_flush(cs); + } + + if (old_XA != new_XA) { + int old_unit = parse_context_idx(env, old_XA); + int new_unit = parse_context_idx(env, new_XA); + + /* Ownership exchange */ + memcpy(VRegs[old_unit], env->VRegs, sizeof(env->VRegs)); + memcpy(QRegs[old_unit], env->QRegs, sizeof(env->QRegs)); + memcpy(env->VRegs, VRegs[new_unit], sizeof(env->VRegs)); + memcpy(env->QRegs, QRegs[new_unit], sizeof(env->QRegs)); + + check_overcommitted_hvx(env, new); + } + + /* See if the interrupts have been enabled or we have exited EX mode */ + if ((new_IE && !old_IE) || + (!new_EX && old_EX)) { + hex_interrupt_update(env); + } } void clear_wait_mode(CPUHexagonState *env) From 28ba7129c920fc005cbdfcf2a2c845fa469ffee3 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 27 Aug 2024 21:20:41 -0700 Subject: [PATCH 047/126] target/hexagon: Implement {g,s}etimask helpers Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/op_helper.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 9f79b1a20c6cf..83088cfaa3fb3 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1468,12 +1468,39 @@ void HELPER(resume)(CPUHexagonState *env, uint32_t mask) uint32_t HELPER(getimask)(CPUHexagonState *env, uint32_t tid) { - g_assert_not_reached(); + CPUState *cs; + CPU_FOREACH(cs) { + HexagonCPU *found_cpu = HEXAGON_CPU(cs); + CPUHexagonState *found_env = &found_cpu->env; + if (found_env->threadId == tid) { + target_ulong imask = arch_get_system_reg(found_env, HEX_SREG_IMASK); + qemu_log_mask(CPU_LOG_INT, "%s: tid %d imask = 0x%x\n", + __func__, env->threadId, + (unsigned)GET_FIELD(IMASK_MASK, imask)); + return GET_FIELD(IMASK_MASK, imask); + } + } + return 0; } void HELPER(setimask)(CPUHexagonState *env, uint32_t pred, uint32_t imask) { - g_assert_not_reached(); + CPUState *cs; + + BQL_LOCK_GUARD(); + CPU_FOREACH(cs) { + HexagonCPU *found_cpu = HEXAGON_CPU(cs); + CPUHexagonState *found_env = &found_cpu->env; + + if (pred == found_env->threadId) { + SET_SYSTEM_FIELD(found_env, HEX_SREG_IMASK, IMASK_MASK, imask); + qemu_log_mask(CPU_LOG_INT, "%s: tid %d imask 0x%x\n", + __func__, found_env->threadId, imask); + hex_interrupt_update(env); + return; + } + } + hex_interrupt_update(env); } static bool handle_pmu_sreg_write(CPUHexagonState *env, uint32_t reg, From adc98cc5dae8a086e6c6c029581906a890e7999a Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 27 Aug 2024 21:25:51 -0700 Subject: [PATCH 048/126] target/hexagon: Implement wait helper Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu_helper.c | 40 +++++++++++++++++++++++++++++++++++++ target/hexagon/cpu_helper.h | 1 + target/hexagon/op_helper.c | 6 +++++- 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index 3e2364a7b0787..e64568b9fcf5c 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -71,6 +71,46 @@ void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t cycles) g_assert_not_reached(); } +static void set_wait_mode(CPUHexagonState *env) +{ + g_assert(bql_locked()); + + const uint32_t modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + uint32_t thread_wait_mask = GET_FIELD(MODECTL_W, modectl); + thread_wait_mask |= 0x1 << env->threadId; + SET_SYSTEM_FIELD(env, HEX_SREG_MODECTL, MODECTL_W, thread_wait_mask); +} + +void hexagon_wait_thread(CPUHexagonState *env, target_ulong PC) +{ + g_assert(bql_locked()); + + if (qemu_loglevel_mask(LOG_GUEST_ERROR) && + (env->k0_lock_state != HEX_LOCK_UNLOCKED || + env->tlb_lock_state != HEX_LOCK_UNLOCKED)) { + qemu_log("WARNING: executing wait() with acquired lock" + "may lead to deadlock\n"); + } + g_assert(get_exe_mode(env) != HEX_EXE_MODE_WAIT); + + CPUState *cs = env_cpu(env); + /* + * The addtion of cpu_has_work is borrowed from arm's wfi helper + * and is critical for our stability + */ + if ((cs->exception_index != HEX_EVENT_NONE) || + (cpu_has_work(cs))) { + qemu_log_mask(CPU_LOG_INT, + "%s: thread %d skipping WAIT mode, have some work\n", + __func__, env->threadId); + return; + } + set_wait_mode(env); + env->wait_next_pc = PC + 4; + + cpu_interrupt(cs, CPU_INTERRUPT_HALT); +} + static MMVector VRegs[VECTOR_UNIT_MAX][NUM_VREGS]; static MMQReg QRegs[VECTOR_UNIT_MAX][NUM_QREGS]; diff --git a/target/hexagon/cpu_helper.h b/target/hexagon/cpu_helper.h index 95a0cc0788684..e8d89d8526792 100644 --- a/target/hexagon/cpu_helper.h +++ b/target/hexagon/cpu_helper.h @@ -20,6 +20,7 @@ void clear_wait_mode(CPUHexagonState *env); void hexagon_ssr_set_cause(CPUHexagonState *env, uint32_t cause); void hexagon_start_threads(CPUHexagonState *env, uint32_t mask); void hexagon_stop_thread(CPUHexagonState *env); +void hexagon_wait_thread(CPUHexagonState *env, target_ulong PC); static inline void arch_set_thread_reg(CPUHexagonState *env, uint32_t reg, uint32_t val) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 83088cfaa3fb3..03bed11f6e833 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1458,7 +1458,11 @@ void HELPER(stop)(CPUHexagonState *env) void HELPER(wait)(CPUHexagonState *env, target_ulong PC) { - g_assert_not_reached(); + BQL_LOCK_GUARD(); + + if (!fIN_DEBUG_MODE(env->threadId)) { + hexagon_wait_thread(env, PC); + } } void HELPER(resume)(CPUHexagonState *env, uint32_t mask) From 5e21c6ad764893c6c36f505c5882d035327f13ff Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 27 Aug 2024 21:35:29 -0700 Subject: [PATCH 049/126] target/hexagon: Implement get_exe_mode() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu_helper.c | 24 ++++++++++++++++++++++++ target/hexagon/reg_fields_def.h.inc | 11 +++++++++++ 2 files changed, 35 insertions(+) diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index e64568b9fcf5c..e0dd120cd4d58 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -237,6 +237,30 @@ void hexagon_ssr_set_cause(CPUHexagonState *env, uint32_t cause) int get_exe_mode(CPUHexagonState *env) { + g_assert(bql_locked()); + + target_ulong modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + uint32_t thread_enabled_mask = GET_FIELD(MODECTL_E, modectl); + bool E_bit = thread_enabled_mask & (0x1 << env->threadId); + uint32_t thread_wait_mask = GET_FIELD(MODECTL_W, modectl); + bool W_bit = thread_wait_mask & (0x1 << env->threadId); + target_ulong isdbst = arch_get_system_reg(env, HEX_SREG_ISDBST); + uint32_t debugmode = GET_FIELD(ISDBST_DEBUGMODE, isdbst); + bool D_bit = debugmode & (0x1 << env->threadId); + + /* Figure 4-2 */ + if (!D_bit && !W_bit && !E_bit) { + return HEX_EXE_MODE_OFF; + } + if (!D_bit && !W_bit && E_bit) { + return HEX_EXE_MODE_RUN; + } + if (!D_bit && W_bit && E_bit) { + return HEX_EXE_MODE_WAIT; + } + if (D_bit && !W_bit && E_bit) { + return HEX_EXE_MODE_DEBUG; + } g_assert_not_reached(); } diff --git a/target/hexagon/reg_fields_def.h.inc b/target/hexagon/reg_fields_def.h.inc index 156a3514e77d0..50b8c26f8bfa4 100644 --- a/target/hexagon/reg_fields_def.h.inc +++ b/target/hexagon/reg_fields_def.h.inc @@ -135,3 +135,14 @@ DEF_REG_FIELD(CCR_GRE, 27, 1) DEF_REG_FIELD(CCR_VV1, 29, 1) DEF_REG_FIELD(CCR_VV2, 30, 1) DEF_REG_FIELD(CCR_VV3, 31, 1) + +/* ISDB ST fields */ +DEF_REG_FIELD(ISDBST_WAITRUN, 24, 8) +DEF_REG_FIELD(ISDBST_ONOFF, 16, 8) +DEF_REG_FIELD(ISDBST_DEBUGMODE, 8, 8) +DEF_REG_FIELD(ISDBST_STUFFSTATUS, 5, 1) +DEF_REG_FIELD(ISDBST_CMDSTATUS, 4, 1) +DEF_REG_FIELD(ISDBST_PROCMODE, 3, 1) +DEF_REG_FIELD(ISDBST_MBXINSTATUS, 2, 1) +DEF_REG_FIELD(ISDBST_MBXOUTSTATUS, 1, 1) +DEF_REG_FIELD(ISDBST_READY, 0, 1) From 8760976a11fce1e50611a117ca238e0b8f5dffc5 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 27 Aug 2024 21:36:56 -0700 Subject: [PATCH 050/126] target/hexagon: Implement arch_get_system_reg() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu_helper.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index e0dd120cd4d58..0b0802bfb986f 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -36,7 +36,14 @@ uint32_t hexagon_get_pmu_counter(CPUHexagonState *cur_env, int index) uint32_t arch_get_system_reg(CPUHexagonState *env, uint32_t reg) { - g_assert_not_reached(); + if (reg == HEX_SREG_PCYCLELO) { + return hexagon_get_sys_pcycle_count_low(env); + } else if (reg == HEX_SREG_PCYCLEHI) { + return hexagon_get_sys_pcycle_count_high(env); + } + + g_assert(reg < NUM_SREGS); + return reg < HEX_SREG_GLB_START ? env->t_sreg[reg] : env->g_sreg[reg]; } uint64_t hexagon_get_sys_pcycle_count(CPUHexagonState *env) From e46ac8a9f886bc91215e9e8766a6d9263f8322aa Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 27 Aug 2024 21:50:21 -0700 Subject: [PATCH 051/126] target/hexagon: Implement arch_{s,g}et_{thread,system}_reg() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu_helper.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/target/hexagon/cpu_helper.h b/target/hexagon/cpu_helper.h index e8d89d8526792..1cdf4f8dd0ed6 100644 --- a/target/hexagon/cpu_helper.h +++ b/target/hexagon/cpu_helper.h @@ -26,20 +26,27 @@ static inline void arch_set_thread_reg(CPUHexagonState *env, uint32_t reg, uint32_t val) { g_assert(reg < TOTAL_PER_THREAD_REGS); - g_assert_not_reached(); + env->gpr[reg] = val; } static inline uint32_t arch_get_thread_reg(CPUHexagonState *env, uint32_t reg) { g_assert(reg < TOTAL_PER_THREAD_REGS); - g_assert_not_reached(); + return env->gpr[reg]; } +#ifndef CONFIG_USER_ONLY static inline void arch_set_system_reg(CPUHexagonState *env, uint32_t reg, uint32_t val) { - g_assert_not_reached(); + g_assert(reg < NUM_SREGS); + if (reg < HEX_SREG_GLB_START) { + env->t_sreg[reg] = val; + } else { + env->g_sreg[reg] = val; + } } +#endif uint32_t arch_get_system_reg(CPUHexagonState *env, uint32_t reg); From a7c7934f8d70f68078b191f8f4fe23bfe684401d Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 28 Aug 2024 17:56:18 -0700 Subject: [PATCH 052/126] target/hexagon: Add representation to count cycles The PCYCLE register can be enabled to indicate accumulated clock cycles. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 3 +++ target/hexagon/cpu.h | 3 ++- target/hexagon/machine.c | 25 ++++++++++++++++++++++++- 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 20387dcf72715..e09aa84ae469b 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -334,6 +334,7 @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type) if (cs->cpu_index == 0) { arch_set_system_reg(env, HEX_SREG_MODECTL, 0x1); + *(env->g_pcycle_base) = 0; } mmu_reset(env); arch_set_system_reg(env, HEX_SREG_HTID, cs->cpu_index); @@ -396,10 +397,12 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp) #ifndef CONFIG_USER_ONLY if (cs->cpu_index == 0) { env->g_sreg = g_new0(target_ulong, NUM_SREGS); + env->g_pcycle_base = g_malloc0(sizeof(*env->g_pcycle_base)); } else { CPUState *cpu0 = qemu_get_cpu(0); CPUHexagonState *env0 = cpu_env(cpu0); env->g_sreg = env0->g_sreg; + env->g_pcycle_base = env0->g_pcycle_base; } #endif diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 0f30b2791f2bb..857fb5b6e37af 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -117,7 +117,8 @@ typedef struct CPUArchState { target_ulong stack_start; uint8_t slot_cancelled; - + uint64_t t_cycle_count; + uint64_t *g_pcycle_base; #ifndef CONFIG_USER_ONLY /* Some system registers are per thread and some are global. */ target_ulong t_sreg[NUM_SREGS]; diff --git a/target/hexagon/machine.c b/target/hexagon/machine.c index fcdbacf9fd566..4baa22d51f8e9 100644 --- a/target/hexagon/machine.c +++ b/target/hexagon/machine.c @@ -9,6 +9,27 @@ #include "cpu.h" #include "hex_mmu.h" +static int get_u64_ptr(QEMUFile *f, void *pv, size_t size, + const VMStateField *field) +{ + uint64_t *p = pv; + *p = qemu_get_be64(f); + return 0; +} + +static int put_u64_ptr(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, JSONWriter *vmdesc) +{ + qemu_put_be64(f, *((uint64_t *)pv)); + return 0; +} + +const VMStateInfo vmstate_info_uint64_ptr = { + .name = "uint64_t_pointer", + .get = get_u64_ptr, + .put = put_u64_ptr, +}; + static int get_hex_tlb_ptr(QEMUFile *f, void *pv, size_t size, const VMStateField *field) { @@ -35,7 +56,6 @@ const VMStateInfo vmstate_info_hex_tlb_ptr = { .put = put_hex_tlb_ptr, }; - const VMStateDescription vmstate_hexagon_cpu = { .name = "cpu", .version_id = 0, @@ -56,6 +76,9 @@ const VMStateDescription vmstate_hexagon_cpu = { VMSTATE_UINTTL(env.wait_next_pc, HexagonCPU), VMSTATE_POINTER(env.hex_tlb, HexagonCPU, 0, vmstate_info_hex_tlb_ptr, CPUHexagonTLBContext *), + VMSTATE_UINT64(env.t_cycle_count, HexagonCPU), + VMSTATE_POINTER(env.g_pcycle_base, HexagonCPU, 0, + vmstate_info_uint64_ptr, uint64_t *), VMSTATE_END_OF_LIST() }, From de0bcfb8b4ff95f58b938dcd50ea278bfa975772 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 28 Aug 2024 17:58:06 -0700 Subject: [PATCH 053/126] target/hexagon: Add implementation of cycle counters Co-authored-by: Sid Manning <sidneym@quicinc.com> Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.h | 25 ++++++++++++++++++++++--- target/hexagon/cpu_helper.c | 12 +++++++++--- target/hexagon/translate.c | 27 +++++++++++++++++++++++++++ target/hexagon/translate.h | 2 ++ 4 files changed, 60 insertions(+), 6 deletions(-) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 857fb5b6e37af..3d42e5fc12bdc 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -27,6 +27,7 @@ #include "cpu-qom.h" #include "exec/cpu-defs.h" +#include "exec/cpu-common.h" #include "hex_regs.h" #include "mmvec/mmvec.h" #include "hw/registerfields.h" @@ -35,7 +36,10 @@ #error "Hexagon does not support system emulation" #endif +#ifndef CONFIG_USER_ONLY +#include "reg_fields.h" typedef struct CPUHexagonTLBContext CPUHexagonTLBContext; +#endif #define NUM_PREGS 4 #define TOTAL_PER_THREAD_REGS 64 @@ -192,6 +196,7 @@ struct ArchCPU { FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1) FIELD(TB_FLAGS, MMU_INDEX, 1, 3) +FIELD(TB_FLAGS, PCYCLE_ENABLED, 4, 1) G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env, uint32_t exception, @@ -205,6 +210,11 @@ void hexagon_cpu_soft_reset(CPUHexagonState *env); #endif #include "exec/cpu-all.h" + +#ifndef CONFIG_USER_ONLY +#include "cpu_helper.h" +#endif + static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc, uint64_t *cs_base, uint32_t *flags) { @@ -214,16 +224,27 @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc, if (*pc == env->gpr[HEX_REG_SA0]) { hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP, 1); } - *flags = hex_flags; if (*pc & PCALIGN_MASK) { hexagon_raise_exception_err(env, HEX_CAUSE_PC_NOT_ALIGNED, 0); } #ifndef CONFIG_USER_ONLY + target_ulong syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + + bool pcycle_enabled = extract32(syscfg, + reg_field_info[SYSCFG_PCYCLEEN].offset, + reg_field_info[SYSCFG_PCYCLEEN].width); + hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX, cpu_mmu_index(env_cpu(env), false)); + + if (pcycle_enabled) { + hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, PCYCLE_ENABLED, 1); + } #else + hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, PCYCLE_ENABLED, true); hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX, MMU_USER_IDX); #endif + *flags = hex_flags; } typedef HexagonCPU ArchCPU; @@ -232,6 +253,4 @@ void hexagon_translate_init(void); void hexagon_translate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, vaddr pc, void *host_pc); -#include "exec/cpu-all.h" - #endif /* HEXAGON_CPU_H */ diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index 0b0802bfb986f..1d9b9f8befc70 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -48,17 +48,23 @@ uint32_t arch_get_system_reg(CPUHexagonState *env, uint32_t reg) uint64_t hexagon_get_sys_pcycle_count(CPUHexagonState *env) { - g_assert_not_reached(); + uint64_t cycles = 0; + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *env_ = cpu_env(cs); + cycles += env_->t_cycle_count; + } + return *(env->g_pcycle_base) + cycles; } uint32_t hexagon_get_sys_pcycle_count_high(CPUHexagonState *env) { - g_assert_not_reached(); + return hexagon_get_sys_pcycle_count(env) >> 32; } uint32_t hexagon_get_sys_pcycle_count_low(CPUHexagonState *env) { - g_assert_not_reached(); + return extract64(hexagon_get_sys_pcycle_count(env), 0, 32); } void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env, diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 9119e42ff77fa..060df6e5eb629 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -57,6 +57,7 @@ TCGv_i64 hex_store_val64[STORES_MAX]; TCGv hex_llsc_addr; TCGv hex_llsc_val; TCGv_i64 hex_llsc_val_i64; +TCGv_i64 hex_cycle_count; TCGv hex_vstore_addr[VSTORES_MAX]; TCGv hex_vstore_size[VSTORES_MAX]; TCGv hex_vstore_pending[VSTORES_MAX]; @@ -125,6 +126,22 @@ static void gen_exception_raw(int excp) gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp)); } +#ifndef CONFIG_USER_ONLY +static inline void gen_precise_exception(int excp, target_ulong PC) +{ + tcg_gen_movi_tl(hex_cause_code, excp); + gen_exception(HEX_EVENT_PRECISE, PC); +} + +static inline void gen_pcycle_counters(DisasContext *ctx) +{ + if (ctx->pcycle_enabled) { + tcg_gen_addi_i64(hex_cycle_count, hex_cycle_count, ctx->num_cycles); + ctx->num_cycles = 0; + } +} +#endif + static void gen_exec_counters(DisasContext *ctx) { tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT], @@ -133,6 +150,10 @@ static void gen_exec_counters(DisasContext *ctx) hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns); tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT], hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns); + +#ifndef CONFIG_USER_ONLY + gen_pcycle_counters(ctx); +#endif } static bool use_goto_tb(DisasContext *ctx, target_ulong dest) @@ -785,6 +806,7 @@ static void gen_commit_hvx(DisasContext *ctx) } } +static const int PCYCLES_PER_PACKET = 3; static void update_exec_counters(DisasContext *ctx) { Packet *pkt = ctx->pkt; @@ -804,6 +826,7 @@ static void update_exec_counters(DisasContext *ctx) } ctx->num_packets++; + ctx->num_cycles += PCYCLES_PER_PACKET; ctx->num_insns += num_real_insns; ctx->num_hvx_insns += num_hvx_insns; } @@ -946,11 +969,13 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, ctx->mem_idx = FIELD_EX32(hex_flags, TB_FLAGS, MMU_INDEX); ctx->num_packets = 0; + ctx->num_cycles = 0; ctx->num_insns = 0; ctx->num_hvx_insns = 0; ctx->branch_cond = TCG_COND_NEVER; ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); ctx->short_circuit = hex_cpu->short_circuit; + ctx->pcycle_enabled = FIELD_EX32(hex_flags, TB_FLAGS, PCYCLE_ENABLED); } static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@ -1077,6 +1102,8 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, llsc_val), "llsc_val"); hex_llsc_val_i64 = tcg_global_mem_new_i64(tcg_env, offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64"); + hex_cycle_count = tcg_global_mem_new_i64(tcg_env, + offsetof(CPUHexagonState, t_cycle_count), "t_cycle_count"); for (i = 0; i < STORES_MAX; i++) { snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i); hex_store_addr[i] = tcg_global_mem_new(tcg_env, diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 0eaa3db03e815..9bc4b3ce8b332 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -83,6 +83,8 @@ typedef struct DisasContext { TCGv new_pred_value[NUM_PREGS]; TCGv branch_taken; TCGv dczero_addr; + bool pcycle_enabled; + uint32_t num_cycles; } DisasContext; bool is_gather_store_insn(DisasContext *ctx); From 546882a06741193a44ddef3de4622f0a01fca318 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 28 Aug 2024 17:58:36 -0700 Subject: [PATCH 054/126] target/hexagon: Implement modify_syscfg() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/op_helper.c | 51 +++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 03bed11f6e833..42805d0f1d03f 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1522,7 +1522,56 @@ static bool handle_pmu_sreg_write(CPUHexagonState *env, uint32_t reg, static void modify_syscfg(CPUHexagonState *env, uint32_t val) { - g_assert_not_reached(); + g_assert(bql_locked()); + + uint32_t old; + uint32_t syscfg_read_only_mask = 0x80001c00; + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + + /* clear read-only bits if they are set in the new value. */ + val &= ~syscfg_read_only_mask; + /* if read-only are currently set in syscfg keep them set. */ + val |= (syscfg & syscfg_read_only_mask); + + uint32_t tmp = val; + old = arch_get_system_reg(env, HEX_SREG_SYSCFG); + arch_set_system_reg(env, HEX_SREG_SYSCFG, tmp); + + /* Check for change in MMU enable */ + target_ulong old_mmu_enable = GET_SYSCFG_FIELD(SYSCFG_MMUEN, old); + uint8_t old_en = GET_SYSCFG_FIELD(SYSCFG_PCYCLEEN, old); + uint8_t old_gie = GET_SYSCFG_FIELD(SYSCFG_GIE, old); + target_ulong new_mmu_enable = + GET_SYSCFG_FIELD(SYSCFG_MMUEN, val); + if (new_mmu_enable && !old_mmu_enable) { + hex_mmu_on(env); + } else if (!new_mmu_enable && old_mmu_enable) { + hex_mmu_off(env); + } + + /* Changing pcycle enable from 0 to 1 resets the counters */ + uint8_t new_en = GET_SYSCFG_FIELD(SYSCFG_PCYCLEEN, val); + CPUState *cs; + if (old_en == 0 && new_en == 1) { + CPU_FOREACH(cs) { + CPUHexagonState *_env = cpu_env(cs); + _env->t_cycle_count = 0; + } + } + + /* See if global interrupts are turned on */ + uint8_t new_gie = GET_SYSCFG_FIELD(SYSCFG_GIE, val); + if (!old_gie && new_gie) { + qemu_log_mask(CPU_LOG_INT, "%s: global interrupts enabled\n", __func__); + hex_interrupt_update(env); + } + + if (qemu_loglevel_mask(LOG_UNIMP)) { + int new_v2x = GET_SYSCFG_FIELD(SYSCFG_V2X, val); + if (!new_v2x) { + qemu_log("HVX: 64 byte vector length is unsupported\n"); + } + } } static uint32_t hexagon_find_last_irq(CPUHexagonState *env, uint32_t vid) From d6d14b97562e69ac1896ff7c1626fa94ae26f9c7 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 4 Sep 2024 10:21:50 -0700 Subject: [PATCH 055/126] target/hexagon: Add system event, cause codes Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.h | 10 ++++++- target/hexagon/cpu_bits.h | 55 ++++++++++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 3d42e5fc12bdc..e1717dd303bcf 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -71,6 +71,15 @@ typedef struct CPUHexagonTLBContext CPUHexagonTLBContext; #define MMU_GUEST_IDX 1 #define MMU_KERNEL_IDX 2 +#define HEXAGON_CPU_IRQ_0 0 +#define HEXAGON_CPU_IRQ_1 1 +#define HEXAGON_CPU_IRQ_2 2 +#define HEXAGON_CPU_IRQ_3 3 +#define HEXAGON_CPU_IRQ_4 4 +#define HEXAGON_CPU_IRQ_5 5 +#define HEXAGON_CPU_IRQ_6 6 +#define HEXAGON_CPU_IRQ_7 7 + typedef enum { HEX_LOCK_UNLOCKED = 0, HEX_LOCK_WAITING = 1, @@ -79,7 +88,6 @@ typedef enum { } hex_lock_state_t; #endif - #define CPU_RESOLVING_TYPE TYPE_HEXAGON_CPU typedef struct { diff --git a/target/hexagon/cpu_bits.h b/target/hexagon/cpu_bits.h index 610094a759c57..c7cc426ec8888 100644 --- a/target/hexagon/cpu_bits.h +++ b/target/hexagon/cpu_bits.h @@ -24,14 +24,16 @@ #define PCALIGN_MASK (PCALIGN - 1) enum hex_event { - HEX_EVENT_NONE = -1, - HEX_EVENT_TRAP0 = 0x008, - HEX_EVENT_FETCH_NO_UPAGE = 0x012, - HEX_EVENT_INVALID_PACKET = 0x015, - HEX_EVENT_INVALID_OPCODE = 0x015, - HEX_EVENT_PC_NOT_ALIGNED = 0x01e, - HEX_EVENT_PRIV_NO_UREAD = 0x024, - HEX_EVENT_PRIV_NO_UWRITE = 0x025, + HEX_EVENT_NONE = -1, + HEX_EVENT_RESET = 0x0, + HEX_EVENT_IMPRECISE = 0x1, + HEX_EVENT_PRECISE = 0x2, + HEX_EVENT_TLB_MISS_X = 0x4, + HEX_EVENT_TLB_MISS_RW = 0x6, + HEX_EVENT_TRAP0 = 0x8, + HEX_EVENT_TRAP1 = 0x9, + HEX_EVENT_FPTRAP = 0xb, + HEX_EVENT_DEBUG = 0xc, HEX_EVENT_INT0 = 0x10, HEX_EVENT_INT1 = 0x11, HEX_EVENT_INT2 = 0x12, @@ -53,15 +55,38 @@ enum hex_event { enum hex_cause { HEX_CAUSE_NONE = -1, HEX_CAUSE_RESET = 0x000, - HEX_CAUSE_TRAP0 = 0x172, - HEX_CAUSE_FETCH_NO_UPAGE = 0x012, - HEX_CAUSE_INVALID_PACKET = 0x015, - HEX_CAUSE_INVALID_OPCODE = 0x015, - HEX_CAUSE_PC_NOT_ALIGNED = 0x01e, - HEX_CAUSE_PRIV_NO_UREAD = 0x024, - HEX_CAUSE_PRIV_NO_UWRITE = 0x025, + HEX_CAUSE_BIU_PRECISE = 0x001, + HEX_CAUSE_UNSUPORTED_HVX_64B = 0x002, /* QEMU-specific */ + HEX_CAUSE_DOUBLE_EXCEPT = 0x003, + HEX_CAUSE_TRAP0 = 0x008, + HEX_CAUSE_TRAP1 = 0x009, + HEX_CAUSE_FETCH_NO_XPAGE = 0x011, + HEX_CAUSE_FETCH_NO_UPAGE = 0x012, + HEX_CAUSE_INVALID_PACKET = 0x015, + HEX_CAUSE_INVALID_OPCODE = 0x015, + HEX_CAUSE_NO_COPROC_ENABLE = 0x016, + HEX_CAUSE_NO_COPROC2_ENABLE = 0x018, HEX_CAUSE_PRIV_USER_NO_GINSN = 0x01a, HEX_CAUSE_PRIV_USER_NO_SINSN = 0x01b, + HEX_CAUSE_REG_WRITE_CONFLICT = 0x01d, + HEX_CAUSE_PC_NOT_ALIGNED = 0x01e, + HEX_CAUSE_MISALIGNED_LOAD = 0x020, + HEX_CAUSE_MISALIGNED_STORE = 0x021, + HEX_CAUSE_PRIV_NO_READ = 0x022, + HEX_CAUSE_PRIV_NO_WRITE = 0x023, + HEX_CAUSE_PRIV_NO_UREAD = 0x024, + HEX_CAUSE_PRIV_NO_UWRITE = 0x025, + HEX_CAUSE_COPROC_LDST = 0x026, + HEX_CAUSE_STACK_LIMIT = 0x027, + HEX_CAUSE_VWCTRL_WINDOW_MISS = 0x029, + HEX_CAUSE_IMPRECISE_NMI = 0x043, + HEX_CAUSE_IMPRECISE_MULTI_TLB_MATCH = 0x044, + HEX_CAUSE_TLBMISSX_CAUSE_NORMAL = 0x060, + HEX_CAUSE_TLBMISSX_CAUSE_NEXTPAGE = 0x061, + HEX_CAUSE_TLBMISSRW_CAUSE_READ = 0x070, + HEX_CAUSE_TLBMISSRW_CAUSE_WRITE = 0x071, + HEX_CAUSE_DEBUG_SINGLESTEP = 0x80, + HEX_CAUSE_FPTRAP_CAUSE_BADFLOAT = 0x0bf, HEX_CAUSE_INT0 = 0x0c0, HEX_CAUSE_INT1 = 0x0c1, HEX_CAUSE_INT2 = 0x0c2, From 5c114b4ae02caa9bee5e2a6d0b0ed13e7e6b44b3 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 4 Sep 2024 10:22:14 -0700 Subject: [PATCH 056/126] target/hexagon: Implement hex_tlb_entry_get_perm() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/hex_mmu.c | 54 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/target/hexagon/hex_mmu.c b/target/hexagon/hex_mmu.c index 54c4ba2dbf9a2..d2297c036d606 100644 --- a/target/hexagon/hex_mmu.c +++ b/target/hexagon/hex_mmu.c @@ -267,7 +267,59 @@ static inline void hex_tlb_entry_get_perm(CPUHexagonState *env, uint64_t entry, int mmu_idx, int *prot, int32_t *excp) { - g_assert_not_reached(); + bool perm_x = GET_TLB_FIELD(entry, PTE_X); + bool perm_w = GET_TLB_FIELD(entry, PTE_W); + bool perm_r = GET_TLB_FIELD(entry, PTE_R); + bool perm_u = GET_TLB_FIELD(entry, PTE_U); + bool user_idx = mmu_idx == MMU_USER_IDX; + + if (mmu_idx == MMU_KERNEL_IDX) { + *prot = PAGE_VALID | PAGE_READ | PAGE_WRITE | PAGE_EXEC; + return; + } + + *prot = PAGE_VALID; + switch (access_type) { + case MMU_INST_FETCH: + if (user_idx && !perm_u) { + *excp = HEX_EVENT_PRECISE; + env->cause_code = HEX_CAUSE_FETCH_NO_UPAGE; + } else if (!perm_x) { + *excp = HEX_EVENT_PRECISE; + env->cause_code = HEX_CAUSE_FETCH_NO_XPAGE; + } + break; + case MMU_DATA_LOAD: + if (user_idx && !perm_u) { + *excp = HEX_EVENT_PRECISE; + env->cause_code = HEX_CAUSE_PRIV_NO_UREAD; + } else if (!perm_r) { + *excp = HEX_EVENT_PRECISE; + env->cause_code = HEX_CAUSE_PRIV_NO_READ; + } + break; + case MMU_DATA_STORE: + if (user_idx && !perm_u) { + *excp = HEX_EVENT_PRECISE; + env->cause_code = HEX_CAUSE_PRIV_NO_UWRITE; + } else if (!perm_w) { + *excp = HEX_EVENT_PRECISE; + env->cause_code = HEX_CAUSE_PRIV_NO_WRITE; + } + break; + } + + if (!user_idx || perm_u) { + if (perm_x) { + *prot |= PAGE_EXEC; + } + if (perm_r) { + *prot |= PAGE_READ; + } + if (perm_w) { + *prot |= PAGE_WRITE; + } + } } static inline bool hex_tlb_entry_match(CPUHexagonState *env, uint64_t entry, From fb8f1cfb247ab74d2e030608788397c8bee2e9f5 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 4 Sep 2024 14:33:40 -0700 Subject: [PATCH 057/126] target/hexagon: Implement hex_tlb_lookup_by_asid() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/hex_mmu.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/target/hexagon/hex_mmu.c b/target/hexagon/hex_mmu.c index d2297c036d606..07ad8e9616170 100644 --- a/target/hexagon/hex_mmu.c +++ b/target/hexagon/hex_mmu.c @@ -362,7 +362,31 @@ bool hex_tlb_find_match(CPUHexagonState *env, target_ulong VA, static uint32_t hex_tlb_lookup_by_asid(CPUHexagonState *env, uint32_t asid, uint32_t VA) { - g_assert_not_reached(); + uint32_t not_found = 0x80000000; + uint32_t idx = not_found; + int i; + + HexagonCPU *cpu = env_archcpu(env); + for (i = 0; i < cpu->num_tlbs; i++) { + uint64_t entry = env->hex_tlb->entries[i]; + if (hex_tlb_entry_match_noperm(entry, asid, VA)) { + if (idx != not_found) { + env->cause_code = HEX_CAUSE_IMPRECISE_MULTI_TLB_MATCH; + break; + } + idx = i; + } + } + + if (idx == not_found) { + qemu_log_mask(CPU_LOG_MMU, "%s: 0x%x, 0x%08x => NOT FOUND\n", + __func__, asid, VA); + } else { + qemu_log_mask(CPU_LOG_MMU, "%s: 0x%x, 0x%08x => %d\n", + __func__, asid, VA, idx); + } + + return idx; } /* Called from tlbp instruction */ From af4d7677f29ee26b3f6b120711f0df10a5f8cf94 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 4 Sep 2024 14:38:50 -0700 Subject: [PATCH 058/126] target/hexagon: Implement software interrupt Co-authored-by: Mike Lambert <mlambert@quicinc.com> Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 2 + target/hexagon/cpu.h | 1 - target/hexagon/hexswi.c | 258 +++++++++++++++++++++++++++++++++++++ target/hexagon/hexswi.h | 17 +++ target/hexagon/op_helper.c | 1 + 5 files changed, 278 insertions(+), 1 deletion(-) create mode 100644 target/hexagon/hexswi.c create mode 100644 target/hexagon/hexswi.h diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index e09aa84ae469b..bc7f51c0a4849 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -33,6 +33,8 @@ #ifndef CONFIG_USER_ONLY #include "sys_macros.h" #include "qemu/main-loop.h" +#include "hex_interrupts.h" +#include "hexswi.h" #endif static void hexagon_v66_cpu_init(Object *obj) { } diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index e1717dd303bcf..4da1dcd7cb2fb 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -260,5 +260,4 @@ typedef HexagonCPU ArchCPU; void hexagon_translate_init(void); void hexagon_translate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, vaddr pc, void *host_pc); - #endif /* HEXAGON_CPU_H */ diff --git a/target/hexagon/hexswi.c b/target/hexagon/hexswi.c new file mode 100644 index 0000000000000..5fcf9b2be9330 --- /dev/null +++ b/target/hexagon/hexswi.c @@ -0,0 +1,258 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#ifdef CONFIG_USER_ONLY +#include "exec/helper-proto.h" +#include "qemu.h" +#endif +#include "exec/cpu_ldst.h" +#include "exec/exec-all.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" +#include "arch.h" +#include "internal.h" +#include "macros.h" +#include "sys_macros.h" +#include "tcg/tcg-op.h" +#ifndef CONFIG_USER_ONLY +#include "hex_mmu.h" +#include "hexswi.h" +#endif + +#ifndef CONFIG_USER_ONLY + + +static void set_addresses(CPUHexagonState *env, target_ulong pc_offset, + target_ulong exception_index) + +{ + arch_set_system_reg(env, HEX_SREG_ELR, + arch_get_thread_reg(env, HEX_REG_PC) + pc_offset); + arch_set_thread_reg(env, HEX_REG_PC, + arch_get_system_reg(env, HEX_SREG_EVB) | + (exception_index << 2)); +} + +static const char *event_name[] = { + [HEX_EVENT_RESET] = "HEX_EVENT_RESET", + [HEX_EVENT_IMPRECISE] = "HEX_EVENT_IMPRECISE", + [HEX_EVENT_TLB_MISS_X] = "HEX_EVENT_TLB_MISS_X", + [HEX_EVENT_TLB_MISS_RW] = "HEX_EVENT_TLB_MISS_RW", + [HEX_EVENT_TRAP0] = "HEX_EVENT_TRAP0", + [HEX_EVENT_TRAP1] = "HEX_EVENT_TRAP1", + [HEX_EVENT_FPTRAP] = "HEX_EVENT_FPTRAP", + [HEX_EVENT_DEBUG] = "HEX_EVENT_DEBUG", + [HEX_EVENT_INT0] = "HEX_EVENT_INT0", + [HEX_EVENT_INT1] = "HEX_EVENT_INT1", + [HEX_EVENT_INT2] = "HEX_EVENT_INT2", + [HEX_EVENT_INT3] = "HEX_EVENT_INT3", + [HEX_EVENT_INT4] = "HEX_EVENT_INT4", + [HEX_EVENT_INT5] = "HEX_EVENT_INT5", + [HEX_EVENT_INT6] = "HEX_EVENT_INT6", + [HEX_EVENT_INT7] = "HEX_EVENT_INT7", + [HEX_EVENT_INT8] = "HEX_EVENT_INT8", + [HEX_EVENT_INT9] = "HEX_EVENT_INT9", + [HEX_EVENT_INTA] = "HEX_EVENT_INTA", + [HEX_EVENT_INTB] = "HEX_EVENT_INTB", + [HEX_EVENT_INTC] = "HEX_EVENT_INTC", + [HEX_EVENT_INTD] = "HEX_EVENT_INTD", + [HEX_EVENT_INTE] = "HEX_EVENT_INTE", + [HEX_EVENT_INTF] = "HEX_EVENT_INTF" +}; + +void hexagon_cpu_do_interrupt(CPUState *cs) + +{ + CPUHexagonState *env = cpu_env(cs); + BQL_LOCK_GUARD(); + + qemu_log_mask(CPU_LOG_INT, "\t%s: event 0x%x:%s, cause 0x%x(%d)\n", + __func__, cs->exception_index, + event_name[cs->exception_index], env->cause_code, + env->cause_code); + + env->llsc_addr = ~0; + + uint32_t ssr = arch_get_system_reg(env, HEX_SREG_SSR); + if (GET_SSR_FIELD(SSR_EX, ssr) == 1) { + arch_set_system_reg(env, HEX_SREG_DIAG, env->cause_code); + env->cause_code = HEX_CAUSE_DOUBLE_EXCEPT; + cs->exception_index = HEX_EVENT_PRECISE; + } + + switch (cs->exception_index) { + case HEX_EVENT_TRAP0: + if (env->cause_code == 0) { + qemu_log_mask(LOG_UNIMP, + "trap0 is unhandled, no semihosting available\n"); + } + + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 4, cs->exception_index); + break; + + case HEX_EVENT_TRAP1: + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 4, cs->exception_index); + break; + + case HEX_EVENT_TLB_MISS_X: + switch (env->cause_code) { + case HEX_CAUSE_TLBMISSX_CAUSE_NORMAL: + case HEX_CAUSE_TLBMISSX_CAUSE_NEXTPAGE: + qemu_log_mask(CPU_LOG_MMU, + "TLB miss EX exception (0x%x) caught: " + "Cause code (0x%x) " + "TID = 0x%" PRIx32 ", PC = 0x%" PRIx32 + ", BADVA = 0x%" PRIx32 "\n", + cs->exception_index, env->cause_code, env->threadId, + arch_get_thread_reg(env, HEX_REG_PC), + arch_get_system_reg(env, HEX_SREG_BADVA)); + + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 0, cs->exception_index); + break; + + default: + cpu_abort(cs, + "1:Hexagon exception %d/0x%x: " + "Unknown cause code %d/0x%x\n", + cs->exception_index, cs->exception_index, env->cause_code, + env->cause_code); + break; + } + break; + + case HEX_EVENT_TLB_MISS_RW: + switch (env->cause_code) { + case HEX_CAUSE_TLBMISSRW_CAUSE_READ: + case HEX_CAUSE_TLBMISSRW_CAUSE_WRITE: + qemu_log_mask(CPU_LOG_MMU, + "TLB miss RW exception (0x%x) caught: " + "Cause code (0x%x) " + "TID = 0x%" PRIx32 ", PC = 0x%" PRIx32 + ", BADVA = 0x%" PRIx32 "\n", + cs->exception_index, env->cause_code, env->threadId, + env->gpr[HEX_REG_PC], + arch_get_system_reg(env, HEX_SREG_BADVA)); + + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 0, cs->exception_index); + /* env->sreg[HEX_SREG_BADVA] is set when the exception is raised */ + break; + + default: + cpu_abort(cs, + "2:Hexagon exception %d/0x%x: " + "Unknown cause code %d/0x%x\n", + cs->exception_index, cs->exception_index, env->cause_code, + env->cause_code); + break; + } + break; + + case HEX_EVENT_FPTRAP: + hexagon_ssr_set_cause(env, env->cause_code); + arch_set_thread_reg(env, HEX_REG_PC, + arch_get_system_reg(env, HEX_SREG_EVB) | + (cs->exception_index << 2)); + break; + + case HEX_EVENT_DEBUG: + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 0, cs->exception_index); + qemu_log_mask(LOG_UNIMP, "single-step exception is not handled\n"); + break; + + case HEX_EVENT_PRECISE: + switch (env->cause_code) { + case HEX_CAUSE_FETCH_NO_XPAGE: + case HEX_CAUSE_FETCH_NO_UPAGE: + case HEX_CAUSE_PRIV_NO_READ: + case HEX_CAUSE_PRIV_NO_UREAD: + case HEX_CAUSE_PRIV_NO_WRITE: + case HEX_CAUSE_PRIV_NO_UWRITE: + case HEX_CAUSE_MISALIGNED_LOAD: + case HEX_CAUSE_MISALIGNED_STORE: + case HEX_CAUSE_PC_NOT_ALIGNED: + qemu_log_mask(CPU_LOG_MMU, + "MMU permission exception (0x%x) caught: " + "Cause code (0x%x) " + "TID = 0x%" PRIx32 ", PC = 0x%" PRIx32 + ", BADVA = 0x%" PRIx32 "\n", + cs->exception_index, env->cause_code, env->threadId, + env->gpr[HEX_REG_PC], + arch_get_system_reg(env, HEX_SREG_BADVA)); + + + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 0, cs->exception_index); + /* env->sreg[HEX_SREG_BADVA] is set when the exception is raised */ + break; + + case HEX_CAUSE_DOUBLE_EXCEPT: + case HEX_CAUSE_PRIV_USER_NO_SINSN: + case HEX_CAUSE_PRIV_USER_NO_GINSN: + case HEX_CAUSE_INVALID_OPCODE: + case HEX_CAUSE_NO_COPROC_ENABLE: + case HEX_CAUSE_NO_COPROC2_ENABLE: + case HEX_CAUSE_UNSUPORTED_HVX_64B: + case HEX_CAUSE_REG_WRITE_CONFLICT: + case HEX_CAUSE_VWCTRL_WINDOW_MISS: + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 0, cs->exception_index); + break; + + case HEX_CAUSE_COPROC_LDST: + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 0, cs->exception_index); + break; + + case HEX_CAUSE_STACK_LIMIT: + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 0, cs->exception_index); + break; + + default: + cpu_abort(cs, + "3:Hexagon exception %d/0x%x: " + "Unknown cause code %d/0x%x\n", + cs->exception_index, cs->exception_index, env->cause_code, + env->cause_code); + break; + } + break; + + case HEX_EVENT_IMPRECISE: + qemu_log_mask(LOG_UNIMP, + "Imprecise exception: this case is not yet handled"); + break; + + default: + qemu_log_mask(LOG_UNIMP, + "Hexagon Unsupported exception 0x%x/0x%x\n", + cs->exception_index, env->cause_code); + break; + } + + cs->exception_index = HEX_EVENT_NONE; +} + +void register_trap_exception(CPUHexagonState *env, int traptype, int imm, + target_ulong PC) +{ + CPUState *cs = env_cpu(env); + + cs->exception_index = (traptype == 0) ? HEX_EVENT_TRAP0 : HEX_EVENT_TRAP1; + ASSERT_DIRECT_TO_GUEST_UNSET(env, cs->exception_index); + + env->cause_code = imm; + env->gpr[HEX_REG_PC] = PC; + cpu_loop_exit(cs); +} +#endif diff --git a/target/hexagon/hexswi.h b/target/hexagon/hexswi.h new file mode 100644 index 0000000000000..5d232cb06cb05 --- /dev/null +++ b/target/hexagon/hexswi.h @@ -0,0 +1,17 @@ +/* + * Copyright(c) 2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HEXSWI_H +#define HEXSWI_H + + +#include "cpu.h" + +void hexagon_cpu_do_interrupt(CPUState *cpu); +void register_trap_exception(CPUHexagonState *env, int type, int imm, + target_ulong PC); + +#endif /* HEXSWI_H */ diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 42805d0f1d03f..687e7f45c27a0 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -38,6 +38,7 @@ #include "hex_mmu.h" #include "hw/intc/l2vic.h" #include "hex_interrupts.h" +#include "hexswi.h" #endif #define SF_BIAS 127 From f36fecf76865bc121140adbd7583be0256f6c4a1 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 4 Sep 2024 17:59:42 -0700 Subject: [PATCH 059/126] target/hexagon: Implement exec_interrupt, set_irq Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 73 ++++++++++++++++++++++++++++++++++++++++++++ target/hexagon/cpu.h | 5 +++ 2 files changed, 78 insertions(+) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index bc7f51c0a4849..52d9fb50606a5 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -31,6 +31,7 @@ #include "hex_mmu.h" #ifndef CONFIG_USER_ONLY +#include "macros.h" #include "sys_macros.h" #include "qemu/main-loop.h" #include "hex_interrupts.h" @@ -277,9 +278,28 @@ static void hexagon_cpu_synchronize_from_tb(CPUState *cs, cpu_env(cs)->gpr[HEX_REG_PC] = tb->pc; } +#ifndef CONFIG_USER_ONLY +bool hexagon_thread_is_enabled(CPUHexagonState *env) +{ + target_ulong modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + uint32_t thread_enabled_mask = GET_FIELD(MODECTL_E, modectl); + bool E_bit = thread_enabled_mask & (0x1 << env->threadId); + + return E_bit; +} +#endif + static bool hexagon_cpu_has_work(CPUState *cs) { +#ifndef CONFIG_USER_ONLY + CPUHexagonState *env = cpu_env(cs); + + return hexagon_thread_is_enabled(env) && + (cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_SWI + | CPU_INTERRUPT_K0_UNLOCK | CPU_INTERRUPT_TLB_UNLOCK)); +#else return true; +#endif } static void hexagon_restore_state_to_opc(CPUState *cs, @@ -411,19 +431,72 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp) mcc->parent_realize(dev, errp); } +#if !defined(CONFIG_USER_ONLY) +static void hexagon_cpu_set_irq(void *opaque, int irq, int level) +{ + HexagonCPU *cpu = HEXAGON_CPU(opaque); + CPUState *cs = CPU(cpu); + CPUHexagonState *env = cpu_env(cs); + + switch (irq) { + case HEXAGON_CPU_IRQ_0 ... HEXAGON_CPU_IRQ_7: + qemu_log_mask(CPU_LOG_INT, "%s: irq %d, level %d\n", + __func__, irq, level); + if (level) { + hex_raise_interrupts(env, 1 << irq, CPU_INTERRUPT_HARD); + } + break; + default: + g_assert_not_reached(); + } +} +#endif + + static void hexagon_cpu_init(Object *obj) { +#if !defined(CONFIG_USER_ONLY) + HexagonCPU *cpu = HEXAGON_CPU(obj); + qdev_init_gpio_in(DEVICE(cpu), hexagon_cpu_set_irq, 8); +#endif } #include "accel/tcg/cpu-ops.h" +#ifndef CONFIG_USER_ONLY + +static bool hexagon_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +{ + CPUHexagonState *env = cpu_env(cs); + if (interrupt_request & CPU_INTERRUPT_TLB_UNLOCK) { + cs->halted = false; + cpu_reset_interrupt(cs, CPU_INTERRUPT_TLB_UNLOCK); + return true; + } + if (interrupt_request & CPU_INTERRUPT_K0_UNLOCK) { + cs->halted = false; + cpu_reset_interrupt(cs, CPU_INTERRUPT_K0_UNLOCK); + return true; + } + if (interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_SWI)) { + return hex_check_interrupts(env); + } + return false; +} + +#endif + static const TCGCPUOps hexagon_tcg_ops = { .initialize = hexagon_translate_init, .translate_code = hexagon_translate_code, .synchronize_from_tb = hexagon_cpu_synchronize_from_tb, .restore_state_to_opc = hexagon_restore_state_to_opc, +#if !defined(CONFIG_USER_ONLY) + .cpu_exec_interrupt = hexagon_cpu_exec_interrupt, +#endif /* !CONFIG_USER_ONLY */ }; + static void hexagon_cpu_class_init(ObjectClass *c, void *data) { HexagonCPUClass *mcc = HEXAGON_CPU_CLASS(c); diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 4da1dcd7cb2fb..545fad0e295eb 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -211,6 +211,11 @@ G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env, uintptr_t pc); #ifndef CONFIG_USER_ONLY +/* + * @return true if the @a thread_env hardware thread is + * not stopped. + */ +bool hexagon_thread_is_enabled(CPUHexagonState *thread_env); uint32_t hexagon_greg_read(CPUHexagonState *env, uint32_t reg); uint32_t hexagon_sreg_read(CPUHexagonState *env, uint32_t reg); void hexagon_gdb_sreg_write(CPUHexagonState *env, uint32_t reg, uint32_t val); From 0dc8c11dd9d4c7c12417a2bbda2ee1373b3b0fa8 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 4 Sep 2024 18:36:52 -0700 Subject: [PATCH 060/126] target/hexagon: Implement hexagon_tlb_fill() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 133 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 52d9fb50606a5..d29e6a04e83bd 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -463,7 +463,137 @@ static void hexagon_cpu_init(Object *obj) #include "accel/tcg/cpu-ops.h" -#ifndef CONFIG_USER_ONLY +#if !defined(CONFIG_USER_ONLY) +static bool get_physical_address(CPUHexagonState *env, hwaddr *phys, int *prot, + int *size, int32_t *excp, target_ulong address, + MMUAccessType access_type, int mmu_idx) + +{ + if (hexagon_cpu_mmu_enabled(env)) { + return hex_tlb_find_match(env, address, access_type, phys, prot, size, + excp, mmu_idx); + } else { + *phys = address & 0xFFFFFFFF; + *prot = PAGE_VALID | PAGE_READ | PAGE_WRITE | PAGE_EXEC; + *size = TARGET_PAGE_SIZE; + return true; + } +} + +/* qemu seems to only want to know about TARGET_PAGE_SIZE pages */ +static void find_qemu_subpage(vaddr *addr, hwaddr *phys, int page_size) +{ + vaddr page_start = *addr & ~((vaddr)(page_size - 1)); + vaddr offset = ((*addr - page_start) / TARGET_PAGE_SIZE) * TARGET_PAGE_SIZE; + *addr = page_start + offset; + *phys += offset; +} + + +#define INVALID_BADVA 0xbadabada + +static void set_badva_regs(CPUHexagonState *env, target_ulong VA, int slot, + MMUAccessType access_type) +{ + arch_set_system_reg(env, HEX_SREG_BADVA, VA); + + if (access_type == MMU_INST_FETCH || slot == 0) { + arch_set_system_reg(env, HEX_SREG_BADVA0, VA); + arch_set_system_reg(env, HEX_SREG_BADVA1, INVALID_BADVA); + SET_SSR_FIELD(env, SSR_V0, 1); + SET_SSR_FIELD(env, SSR_V1, 0); + SET_SSR_FIELD(env, SSR_BVS, 0); + } else if (slot == 1) { + arch_set_system_reg(env, HEX_SREG_BADVA0, INVALID_BADVA); + arch_set_system_reg(env, HEX_SREG_BADVA1, VA); + SET_SSR_FIELD(env, SSR_V0, 0); + SET_SSR_FIELD(env, SSR_V1, 1); + SET_SSR_FIELD(env, SSR_BVS, 1); + } else { + g_assert_not_reached(); + } +} + +static void raise_tlbmiss_exception(CPUState *cs, target_ulong VA, int slot, + MMUAccessType access_type) +{ + CPUHexagonState *env = cpu_env(cs); + + set_badva_regs(env, VA, slot, access_type); + + switch (access_type) { + case MMU_INST_FETCH: + cs->exception_index = HEX_EVENT_TLB_MISS_X; + if ((VA & ~TARGET_PAGE_MASK) == 0) { + env->cause_code = HEX_CAUSE_TLBMISSX_CAUSE_NEXTPAGE; + } else { + env->cause_code = HEX_CAUSE_TLBMISSX_CAUSE_NORMAL; + } + break; + case MMU_DATA_LOAD: + cs->exception_index = HEX_EVENT_TLB_MISS_RW; + env->cause_code = HEX_CAUSE_TLBMISSRW_CAUSE_READ; + break; + case MMU_DATA_STORE: + cs->exception_index = HEX_EVENT_TLB_MISS_RW; + env->cause_code = HEX_CAUSE_TLBMISSRW_CAUSE_WRITE; + break; + } +} + +static void raise_perm_exception(CPUState *cs, target_ulong VA, int slot, + MMUAccessType access_type, int32_t excp) +{ + CPUHexagonState *env = cpu_env(cs); + + set_badva_regs(env, VA, slot, access_type); + cs->exception_index = excp; +} + +static const char *access_type_names[] = { "MMU_DATA_LOAD ", "MMU_DATA_STORE", + "MMU_INST_FETCH" }; + +static const char *mmu_idx_names[] = { "MMU_USER_IDX", "MMU_GUEST_IDX", + "MMU_KERNEL_IDX" }; + +static bool hexagon_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, bool probe, + uintptr_t retaddr) +{ + CPUHexagonState *env = cpu_env(cs); + static int slot = 0 /* This is always zero for now */; + hwaddr phys; + int prot = 0; + int page_size = 0; + int32_t excp = 0; + bool ret = 0; + + qemu_log_mask( + CPU_LOG_MMU, + "%s: tid = 0x%x, pc = 0x%08" PRIx32 ", vaddr = 0x%08" VADDR_PRIx + ", size = %d, %s,\tprobe = %d, %s\n", + __func__, env->threadId, env->gpr[HEX_REG_PC], address, size, + access_type_names[access_type], probe, mmu_idx_names[mmu_idx]); + ret = get_physical_address(env, &phys, &prot, &page_size, &excp, address, + access_type, mmu_idx); + if (ret) { + if (!excp) { + find_qemu_subpage(&address, &phys, page_size); + tlb_set_page(cs, address, phys, prot, mmu_idx, TARGET_PAGE_SIZE); + return ret; + } else { + raise_perm_exception(cs, address, slot, access_type, excp); + do_raise_exception(env, cs->exception_index, env->gpr[HEX_REG_PC], + retaddr); + } + } + if (probe) { + return false; + } + raise_tlbmiss_exception(cs, address, slot, access_type); + do_raise_exception(env, cs->exception_index, env->gpr[HEX_REG_PC], retaddr); +} + static bool hexagon_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { @@ -493,6 +623,7 @@ static const TCGCPUOps hexagon_tcg_ops = { .restore_state_to_opc = hexagon_restore_state_to_opc, #if !defined(CONFIG_USER_ONLY) .cpu_exec_interrupt = hexagon_cpu_exec_interrupt, + .tlb_fill = hexagon_tlb_fill, #endif /* !CONFIG_USER_ONLY */ }; From 6d0dbf532b187d0dc50a5de1e054aa5a37ce2d41 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 4 Sep 2024 18:42:58 -0700 Subject: [PATCH 061/126] target/hexagon: Implement siad inst siad is the 'Set interrupt auto disable' instruction. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/op_helper.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 687e7f45c27a0..118f112487a18 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1378,7 +1378,15 @@ void HELPER(ciad)(CPUHexagonState *env, uint32_t mask) void HELPER(siad)(CPUHexagonState *env, uint32_t mask) { - g_assert_not_reached(); + uint32_t ipendad; + uint32_t iad; + + BQL_LOCK_GUARD(); + ipendad = READ_SREG(HEX_SREG_IPENDAD); + iad = fGET_FIELD(ipendad, IPENDAD_IAD); + fSET_FIELD(ipendad, IPENDAD_IAD, iad | mask); + arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad); + hex_interrupt_update(env); } void HELPER(swi)(CPUHexagonState *env, uint32_t mask) From b8068a144e98859c5db3ed7ddc9ae6f8f615d4b4 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 4 Sep 2024 18:51:44 -0700 Subject: [PATCH 062/126] target/hexagon: Implement hexagon_resume_threads() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.h | 1 + target/hexagon/cpu_helper.c | 37 +++++++++++++++++++++++++++++++++++++ target/hexagon/cpu_helper.h | 1 + target/hexagon/op_helper.c | 3 ++- 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 545fad0e295eb..02f148054cd11 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -49,6 +49,7 @@ typedef struct CPUHexagonTLBContext CPUHexagonTLBContext; #define REG_WRITES_MAX 32 #define PRED_WRITES_MAX 5 /* 4 insns + endloop */ #define VSTORES_MAX 2 +#define THREADS_MAX 8 #define VECTOR_UNIT_MAX 8 #ifndef CONFIG_USER_ONLY diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index 1d9b9f8befc70..cc1a896542b34 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -124,6 +124,43 @@ void hexagon_wait_thread(CPUHexagonState *env, target_ulong PC) cpu_interrupt(cs, CPU_INTERRUPT_HALT); } +static void hexagon_resume_thread(CPUHexagonState *env) +{ + CPUState *cs = env_cpu(env); + clear_wait_mode(env); + /* + * The wait instruction keeps the PC pointing to itself + * so that it has an opportunity to check for interrupts. + * + * When we come out of wait mode, adjust the PC to the + * next executable instruction. + */ + env->gpr[HEX_REG_PC] = env->wait_next_pc; + cs = env_cpu(env); + ASSERT_DIRECT_TO_GUEST_UNSET(env, cs->exception_index); + cs->halted = false; + cs->exception_index = HEX_EVENT_NONE; + qemu_cpu_kick(cs); +} + +void hexagon_resume_threads(CPUHexagonState *current_env, uint32_t mask) +{ + CPUState *cs; + CPUHexagonState *env; + + g_assert(bql_locked()); + CPU_FOREACH(cs) { + env = cpu_env(cs); + g_assert(env->threadId < THREADS_MAX); + if ((mask & (0x1 << env->threadId))) { + if (get_exe_mode(env) == HEX_EXE_MODE_WAIT) { + hexagon_resume_thread(env); + } + } + } +} + + static MMVector VRegs[VECTOR_UNIT_MAX][NUM_VREGS]; static MMQReg QRegs[VECTOR_UNIT_MAX][NUM_QREGS]; diff --git a/target/hexagon/cpu_helper.h b/target/hexagon/cpu_helper.h index 1cdf4f8dd0ed6..0723485e79b0b 100644 --- a/target/hexagon/cpu_helper.h +++ b/target/hexagon/cpu_helper.h @@ -21,6 +21,7 @@ void hexagon_ssr_set_cause(CPUHexagonState *env, uint32_t cause); void hexagon_start_threads(CPUHexagonState *env, uint32_t mask); void hexagon_stop_thread(CPUHexagonState *env); void hexagon_wait_thread(CPUHexagonState *env, target_ulong PC); +void hexagon_resume_threads(CPUHexagonState *env, uint32_t mask); static inline void arch_set_thread_reg(CPUHexagonState *env, uint32_t reg, uint32_t val) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 118f112487a18..0dce133d3a6eb 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1476,7 +1476,8 @@ void HELPER(wait)(CPUHexagonState *env, target_ulong PC) void HELPER(resume)(CPUHexagonState *env, uint32_t mask) { - g_assert_not_reached(); + BQL_LOCK_GUARD(); + hexagon_resume_threads(env, mask); } uint32_t HELPER(getimask)(CPUHexagonState *env, uint32_t tid) From c299d689301b11f1e97bf9704426326f65d9fb36 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 4 Sep 2024 18:54:12 -0700 Subject: [PATCH 063/126] target/hexagon: Implement setprio, resched MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware-assisted scheduler helps manage tasks on the run queue and interrupt steering. This instruction is defined in the Qualcomm Hexagon V71 Programmer's Reference Manual - https://docs.qualcomm.com/bundle/publicresource/80-N2040-51_REV_AB_Hexagon_V71_ProgrammerS_Reference_Manual.pdf See §11.9.2 SYSTEM MONITOR. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/op_helper.c | 65 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 0dce133d3a6eb..d0dc4afac7e9f 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1465,6 +1465,57 @@ void HELPER(stop)(CPUHexagonState *env) hexagon_stop_thread(env); } +static inline QEMU_ALWAYS_INLINE void resched(CPUHexagonState *env) +{ + uint32_t schedcfg; + uint32_t schedcfg_en; + int int_number; + CPUState *cs; + uint32_t lowest_th_prio = 0; /* 0 is highest prio */ + uint32_t bestwait_reg; + uint32_t best_prio; + + BQL_LOCK_GUARD(); + qemu_log_mask(CPU_LOG_INT, "%s: check resched\n", __func__); + schedcfg = arch_get_system_reg(env, HEX_SREG_SCHEDCFG); + schedcfg_en = GET_FIELD(SCHEDCFG_EN, schedcfg); + int_number = GET_FIELD(SCHEDCFG_INTNO, schedcfg); + + if (!schedcfg_en) { + return; + } + + CPU_FOREACH(cs) { + HexagonCPU *thread = HEXAGON_CPU(cs); + CPUHexagonState *thread_env = &(thread->env); + uint32_t th_prio = GET_FIELD( + STID_PRIO, arch_get_system_reg(thread_env, HEX_SREG_STID)); + if (!hexagon_thread_is_enabled(thread_env)) { + continue; + } + + lowest_th_prio = (lowest_th_prio > th_prio) + ? lowest_th_prio + : th_prio; + } + + bestwait_reg = arch_get_system_reg(env, HEX_SREG_BESTWAIT); + best_prio = GET_FIELD(BESTWAIT_PRIO, bestwait_reg); + + /* + * If the lowest priority thread is lower priority than the + * value in the BESTWAIT register, we must raise the reschedule + * interrupt on the lowest priority thread. + */ + if (lowest_th_prio > best_prio) { + qemu_log_mask(CPU_LOG_INT, + "%s: raising resched int %d, cur PC 0x" TARGET_FMT_lx "\n", + __func__, int_number, arch_get_thread_reg(env, HEX_REG_PC)); + SET_SYSTEM_FIELD(env, HEX_SREG_BESTWAIT, BESTWAIT_PRIO, 0x1ff); + hex_raise_interrupts(env, 1 << int_number, CPU_INTERRUPT_SWI); + } +} + void HELPER(wait)(CPUHexagonState *env, target_ulong PC) { BQL_LOCK_GUARD(); @@ -1715,6 +1766,20 @@ uint64_t HELPER(greg_read_pair)(CPUHexagonState *env, uint32_t reg) void HELPER(setprio)(CPUHexagonState *env, uint32_t thread, uint32_t prio) { + CPUState *cs; + + BQL_LOCK_GUARD(); + CPU_FOREACH(cs) { + HexagonCPU *found_cpu = HEXAGON_CPU(cs); + CPUHexagonState *found_env = &found_cpu->env; + if (thread == found_env->threadId) { + SET_SYSTEM_FIELD(found_env, HEX_SREG_STID, STID_PRIO, prio); + qemu_log_mask(CPU_LOG_INT, "%s: tid %d prio = 0x%x\n", + __func__, found_env->threadId, prio); + resched(env); + return; + } + } g_assert_not_reached(); } From 0340554842ff0887aff5ef0080d007decd0cf5d1 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 4 Sep 2024 20:27:35 -0700 Subject: [PATCH 064/126] target/hexagon: Add sysemu_ops, cpu_get_phys_page_debug() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index d29e6a04e83bd..8612ee76201d3 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -489,6 +489,24 @@ static void find_qemu_subpage(vaddr *addr, hwaddr *phys, int page_size) *phys += offset; } +static hwaddr hexagon_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) +{ + CPUHexagonState *env = cpu_env(cs); + hwaddr phys_addr; + int prot; + int page_size = 0; + int32_t excp = 0; + int mmu_idx = MMU_KERNEL_IDX; + + if (get_physical_address(env, &phys_addr, &prot, &page_size, &excp, + addr, 0, mmu_idx)) { + find_qemu_subpage(&addr, &phys_addr, page_size); + return phys_addr; + } + + return -1; +} + #define INVALID_BADVA 0xbadabada @@ -595,6 +613,12 @@ static bool hexagon_tlb_fill(CPUState *cs, vaddr address, int size, } +#include "hw/core/sysemu-cpu-ops.h" + +static const struct SysemuCPUOps hexagon_sysemu_ops = { + .get_phys_page_debug = hexagon_cpu_get_phys_page_debug, +}; + static bool hexagon_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { CPUHexagonState *env = cpu_env(cs); @@ -624,6 +648,8 @@ static const TCGCPUOps hexagon_tcg_ops = { #if !defined(CONFIG_USER_ONLY) .cpu_exec_interrupt = hexagon_cpu_exec_interrupt, .tlb_fill = hexagon_tlb_fill, + .cpu_exec_halt = hexagon_cpu_has_work, + .do_interrupt = hexagon_cpu_do_interrupt, #endif /* !CONFIG_USER_ONLY */ }; @@ -653,9 +679,12 @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data) cc->gdb_core_xml_file = "hexagon-core.xml"; cc->disas_set_info = hexagon_cpu_disas_set_info; #ifndef CONFIG_USER_ONLY + cc->sysemu_ops = &hexagon_sysemu_ops; dc->vmsd = &vmstate_hexagon_cpu; #endif +#ifdef CONFIG_TCG cc->tcg_ops = &hexagon_tcg_ops; +#endif } #ifndef CONFIG_USER_ONLY From fc9159e9ddcb4e3f11e97e43bc4df7f54f15a610 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 4 Sep 2024 20:33:40 -0700 Subject: [PATCH 065/126] target/hexagon: Add exec-start-addr prop Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 7 ++----- target/hexagon/cpu.h | 1 + 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 8612ee76201d3..52498f74b5139 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -66,6 +66,7 @@ static const Property hexagon_cpu_properties[] = { DEFINE_PROP_UINT32("l2vic-base-addr", HexagonCPU, l2vic_base_addr, 0xffffffffULL), DEFINE_PROP_UINT32("hvx-contexts", HexagonCPU, hvx_contexts, 0), + DEFINE_PROP_UINT32("exec-start-addr", HexagonCPU, boot_addr, 0xffffffffULL), #endif DEFINE_PROP_BOOL("lldb-compat", HexagonCPU, lldb_compat, false), DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust, 0, @@ -361,8 +362,6 @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type) mmu_reset(env); arch_set_system_reg(env, HEX_SREG_HTID, cs->cpu_index); hexagon_cpu_soft_reset(env); - memset(env->t_sreg, 0, sizeof(target_ulong) * NUM_SREGS); - memset(env->greg, 0, sizeof(target_ulong) * NUM_GREGS); env->threadId = cs->cpu_index; env->tlb_lock_state = HEX_LOCK_UNLOCKED; env->k0_lock_state = HEX_LOCK_UNLOCKED; @@ -371,6 +370,7 @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type) env->next_PC = 0; env->wait_next_pc = 0; env->cause_code = -1; + arch_set_thread_reg(env, HEX_REG_PC, cpu->boot_addr); #endif } @@ -414,9 +414,6 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp) #ifndef CONFIG_USER_ONLY CPUHexagonState *env = cpu_env(cs); hex_mmu_realize(env); -#endif - cpu_reset(cs); -#ifndef CONFIG_USER_ONLY if (cs->cpu_index == 0) { env->g_sreg = g_new0(target_ulong, NUM_SREGS); env->g_pcycle_base = g_malloc0(sizeof(*env->g_pcycle_base)); diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 02f148054cd11..9a5a0bb78af75 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -198,6 +198,7 @@ struct ArchCPU { uint32_t num_tlbs; uint32_t l2vic_base_addr; uint32_t hvx_contexts; + uint32_t boot_addr; #endif }; From db48cf246a584dd4b05f048bcea5385022a1838f Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 4 Sep 2024 21:18:54 -0700 Subject: [PATCH 066/126] target/hexagon: Add hexagon_cpu_mmu_index() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 23 +++++++++++++++++++++ target/hexagon/cpu_helper.c | 41 +++++++++++++++++++++++++++++++++++++ target/hexagon/cpu_helper.h | 1 + 3 files changed, 65 insertions(+) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 52498f74b5139..76a8cd5b2de8f 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -650,6 +650,28 @@ static const TCGCPUOps hexagon_tcg_ops = { #endif /* !CONFIG_USER_ONLY */ }; +static int hexagon_cpu_mmu_index(CPUState *cs, bool ifetch) +{ +#ifndef CONFIG_USER_ONLY + BQL_LOCK_GUARD(); + CPUHexagonState *env = cpu_env(cs); + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + uint8_t mmuen = GET_SYSCFG_FIELD(SYSCFG_MMUEN, syscfg); + if (!mmuen) { + return MMU_KERNEL_IDX; + } + + int cpu_mode = get_cpu_mode(env); + if (cpu_mode == HEX_CPU_MODE_MONITOR) { + return MMU_KERNEL_IDX; + } else if (cpu_mode == HEX_CPU_MODE_GUEST) { + return MMU_GUEST_IDX; + } +#endif + + return MMU_USER_IDX; +} + static void hexagon_cpu_class_init(ObjectClass *c, void *data) { @@ -667,6 +689,7 @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data) cc->class_by_name = hexagon_cpu_class_by_name; cc->has_work = hexagon_cpu_has_work; + cc->mmu_index = hexagon_cpu_mmu_index; cc->dump_state = hexagon_dump_state; cc->set_pc = hexagon_cpu_set_pc; cc->get_pc = hexagon_cpu_get_pc; diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index cc1a896542b34..9c44cb79505a5 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -394,4 +394,45 @@ void hexagon_stop_thread(CPUHexagonState *env) } } +static int sys_in_monitor_mode_ssr(uint32_t ssr) +{ + if ((GET_SSR_FIELD(SSR_EX, ssr) != 0) || + ((GET_SSR_FIELD(SSR_EX, ssr) == 0) && (GET_SSR_FIELD(SSR_UM, ssr) == 0))) + return 1; + return 0; +} + +static int sys_in_guest_mode_ssr(uint32_t ssr) +{ + if ((GET_SSR_FIELD(SSR_EX, ssr) == 0) && + (GET_SSR_FIELD(SSR_UM, ssr) != 0) && + (GET_SSR_FIELD(SSR_GM, ssr) != 0)) + return 1; + return 0; +} + +static int sys_in_user_mode_ssr(uint32_t ssr) +{ + if ((GET_SSR_FIELD(SSR_EX, ssr) == 0) && + (GET_SSR_FIELD(SSR_UM, ssr) != 0) && + (GET_SSR_FIELD(SSR_GM, ssr) == 0)) + return 1; + return 0; +} + +int get_cpu_mode(CPUHexagonState *env) + +{ + uint32_t ssr = arch_get_system_reg(env, HEX_SREG_SSR); + + if (sys_in_monitor_mode_ssr(ssr)) { + return HEX_CPU_MODE_MONITOR; + } else if (sys_in_guest_mode_ssr(ssr)) { + return HEX_CPU_MODE_GUEST; + } else if (sys_in_user_mode_ssr(ssr)) { + return HEX_CPU_MODE_USER; + } + return HEX_CPU_MODE_MONITOR; +} + #endif diff --git a/target/hexagon/cpu_helper.h b/target/hexagon/cpu_helper.h index 0723485e79b0b..0a5134204f3dc 100644 --- a/target/hexagon/cpu_helper.h +++ b/target/hexagon/cpu_helper.h @@ -15,6 +15,7 @@ void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t); void hexagon_set_sys_pcycle_count_low(CPUHexagonState *env, uint32_t); void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env, uint32_t); void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t old); +int get_cpu_mode(CPUHexagonState *env); int get_exe_mode(CPUHexagonState *env); void clear_wait_mode(CPUHexagonState *env); void hexagon_ssr_set_cause(CPUHexagonState *env, uint32_t cause); From 310f6589294193e933c53c739063b93fa36c21f4 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Thu, 5 Sep 2024 18:27:04 -0700 Subject: [PATCH 067/126] target/hexagon: Decode trap1, rte as COF Also: handle rte instructions at the end of the packet. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/decode.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c index 23deba2426f84..5d0beeeaf2f1f 100644 --- a/target/hexagon/decode.c +++ b/target/hexagon/decode.c @@ -193,6 +193,8 @@ static bool decode_opcode_can_jump(int opcode) if ((GET_ATTRIB(opcode, A_JUMP)) || (GET_ATTRIB(opcode, A_CALL)) || (opcode == J2_trap0) || + (opcode == J2_trap1) || + (opcode == J2_rte) || (opcode == J2_pause)) { /* Exception to A_JUMP attribute */ if (opcode == J4_hintjumpr) { @@ -371,6 +373,18 @@ static void decode_shuffle_for_execution(Packet *packet) break; } } + /* + * And at the very very very end, move any RTE's, since they update + * user/supervisor mode. + */ +#if !defined(CONFIG_USER_ONLY) + for (i = 0; i < last_insn; i++) { + if (packet->insn[i].opcode == J2_rte) { + decode_send_insn_to(packet, i, last_insn); + break; + } + } +#endif } static void From 3ba37304292bef915d135a9cbe6d1044737da8dc Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Sun, 8 Sep 2024 11:56:26 -0700 Subject: [PATCH 068/126] target/hexagon: Implement hexagon_find_last_irq() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/op_helper.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index d0dc4afac7e9f..f3ffac81b68cc 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1637,7 +1637,13 @@ static void modify_syscfg(CPUHexagonState *env, uint32_t val) static uint32_t hexagon_find_last_irq(CPUHexagonState *env, uint32_t vid) { - g_assert_not_reached(); + int offset = (vid == HEX_SREG_VID) ? L2VIC_VID_0 : L2VIC_VID_1; + CPUState *cs = env_cpu(env); + HexagonCPU *cpu = HEXAGON_CPU(cs); + const hwaddr pend_mem = cpu->l2vic_base_addr + offset; + uint32_t irq; + cpu_physical_memory_read(pend_mem, &irq, sizeof(irq)); + return irq; } static void hexagon_read_timer(CPUHexagonState *env, uint32_t *low, From fef696c03ddcdc976c046731829e10ec20eb6aaa Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 9 Sep 2024 09:34:18 -0700 Subject: [PATCH 069/126] target/hexagon: Implement modify_ssr, resched, pending_interrupt Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/helper.h | 3 +++ target/hexagon/op_helper.c | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index 730eaf8b9a0f9..3df663baeb057 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -129,4 +129,7 @@ DEF_HELPER_1(stop, void, env) DEF_HELPER_2(wait, void, env, i32) DEF_HELPER_2(resume, void, env, i32) DEF_HELPER_2(nmi, void, env, i32) +DEF_HELPER_1(resched, void, env) +DEF_HELPER_3(modify_ssr, void, env, i32, i32) +DEF_HELPER_1(pending_interrupt, void, env) #endif diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index f3ffac81b68cc..702c3dd3c6438 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1178,6 +1178,15 @@ float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV, return RxxV; } +#ifndef CONFIG_USER_ONLY +void HELPER(modify_ssr)(CPUHexagonState *env, uint32_t new, uint32_t old) +{ + BQL_LOCK_GUARD(); + hexagon_modify_ssr(env, new, old); +} +#endif + + /* Histogram instructions */ void HELPER(vhist)(CPUHexagonState *env) @@ -1516,6 +1525,11 @@ static inline QEMU_ALWAYS_INLINE void resched(CPUHexagonState *env) } } +void HELPER(resched)(CPUHexagonState *env) +{ + resched(env); +} + void HELPER(wait)(CPUHexagonState *env, target_ulong PC) { BQL_LOCK_GUARD(); @@ -1793,6 +1807,12 @@ void HELPER(nmi)(CPUHexagonState *env, uint32_t thread_mask) { g_assert_not_reached(); } + +void HELPER(pending_interrupt)(CPUHexagonState *env) +{ + BQL_LOCK_GUARD(); + hex_interrupt_update(env); +} #endif From 1c32cfb3da41e85f41fec812c1a7c5c23d829138 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 9 Sep 2024 10:07:30 -0700 Subject: [PATCH 070/126] target/hexagon: Add pkt_ends_tb to translation Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/translate.c | 99 +++++++++++++++++++++++++++++++++++++- target/hexagon/translate.h | 1 + 2 files changed, 99 insertions(+), 1 deletion(-) diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 060df6e5eb629..475726388a3f2 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -259,6 +259,18 @@ static bool check_for_attrib(Packet *pkt, int attrib) return false; } +#ifndef CONFIG_USER_ONLY +static bool check_for_opcode(Packet *pkt, uint16_t opcode) +{ + for (int i = 0; i < pkt->num_insns; i++) { + if (pkt->insn[i].opcode == opcode) { + return true; + } + } + return false; +} +#endif + static bool need_slot_cancelled(Packet *pkt) { /* We only need slot_cancelled for conditional store instructions */ @@ -272,6 +284,90 @@ static bool need_slot_cancelled(Packet *pkt) return false; } +#ifndef CONFIG_USER_ONLY +static bool sreg_write_to_global(int reg_num) +{ + return reg_num == HEX_SREG_SSR || + reg_num == HEX_SREG_STID || + reg_num == HEX_SREG_IMASK || + reg_num == HEX_SREG_IPENDAD || + reg_num == HEX_SREG_BESTWAIT || + reg_num == HEX_SREG_SCHEDCFG; +} + +static bool has_sreg_write_to_global(Packet const *pkt) +{ + for (int i = 0; i < pkt->num_insns; i++) { + Insn const *insn = &pkt->insn[i]; + uint16_t opcode = insn->opcode; + if (opcode == Y2_tfrsrcr) { + /* Write to a single sreg */ + int reg_num = insn->regno[0]; + if (sreg_write_to_global(reg_num)) { + return true; + } + } else if (opcode == Y4_tfrspcp) { + /* Write to a sreg pair */ + int reg_num = insn->regno[0]; + if (sreg_write_to_global(reg_num)) { + return true; + } + if (sreg_write_to_global(reg_num + 1)) { + return true; + } + } + } + return false; +} +#endif + +static bool pkt_ends_tb(Packet *pkt) +{ + if (pkt->pkt_has_cof) { + return true; + } +#ifndef CONFIG_USER_ONLY + /* System mode instructions that end TLB */ + if (check_for_opcode(pkt, Y2_swi) || + check_for_opcode(pkt, Y2_cswi) || + check_for_opcode(pkt, Y2_ciad) || + check_for_opcode(pkt, Y4_siad) || + check_for_opcode(pkt, Y2_wait) || + check_for_opcode(pkt, Y2_resume) || + check_for_opcode(pkt, Y2_iassignw) || + check_for_opcode(pkt, Y2_setimask) || + check_for_opcode(pkt, Y4_nmi) || + check_for_opcode(pkt, Y2_setprio) || + check_for_opcode(pkt, Y2_start) || + check_for_opcode(pkt, Y2_stop) || + check_for_opcode(pkt, Y2_k0lock) || + check_for_opcode(pkt, Y2_k0unlock) || + check_for_opcode(pkt, Y2_tlblock) || + check_for_opcode(pkt, Y2_tlbunlock) || + check_for_opcode(pkt, Y2_break) || + check_for_opcode(pkt, Y2_isync) || + check_for_opcode(pkt, Y2_syncht) || + check_for_opcode(pkt, Y2_tlbp) || + check_for_opcode(pkt, Y2_tlbw) || + check_for_opcode(pkt, Y5_ctlbw) || + check_for_opcode(pkt, Y5_tlbasidi)) { + return true; + } + + /* + * Check for sreg writes that would end the TB + */ + if (check_for_attrib(pkt, A_IMPLICIT_WRITES_SSR)) { + return true; + } + if (has_sreg_write_to_global(pkt)) { + return true; + } +#endif + return false; +} + + static bool need_next_PC(DisasContext *ctx) { Packet *pkt = ctx->pkt; @@ -473,6 +569,7 @@ static void gen_start_packet(DisasContext *ctx) tcg_gen_movi_tl(hex_slot_cancelled, 0); } ctx->branch_taken = NULL; + ctx->pkt_ends_tb = pkt_ends_tb(pkt); if (pkt->pkt_has_cof) { ctx->branch_taken = tcg_temp_new(); if (pkt->pkt_has_multi_cof) { @@ -927,7 +1024,7 @@ static void gen_commit_packet(DisasContext *ctx) pkt->vhist_insn->generate(ctx); } - if (pkt->pkt_has_cof) { + if (ctx->pkt_ends_tb || ctx->base.is_jmp == DISAS_NORETURN) { gen_end_tb(ctx); } } diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 9bc4b3ce8b332..c9533fee1f5cc 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -84,6 +84,7 @@ typedef struct DisasContext { TCGv branch_taken; TCGv dczero_addr; bool pcycle_enabled; + bool pkt_ends_tb; uint32_t num_cycles; } DisasContext; From 45df6e9d3040564e5b25d3622d72ebe34971ba72 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 9 Sep 2024 09:35:18 -0700 Subject: [PATCH 071/126] target/hexagon: Add next_PC, {s,g}reg writes Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.h | 2 +- target/hexagon/genptr.c | 7 +- target/hexagon/translate.c | 142 ++++++++++++++++++++++++++++++++----- target/hexagon/translate.h | 2 + 4 files changed, 132 insertions(+), 21 deletions(-) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 9a5a0bb78af75..340e0a83a5baa 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -146,9 +146,9 @@ typedef struct CPUArchState { hex_lock_state_t k0_lock_state; target_ulong tlb_lock_count; target_ulong k0_lock_count; - target_ulong next_PC; CPUHexagonTLBContext *hex_tlb; #endif + target_ulong next_PC; target_ulong new_value_usr; MemLog mem_log_stores[STORES_MAX]; diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 5554c9515c4db..afc7e5f3a5ab4 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -634,14 +634,15 @@ static void gen_write_new_pc_addr(DisasContext *ctx, TCGv addr, tcg_gen_brcondi_tl(cond, pred, 0, pred_false); } + TCGv PC_wr = ctx->need_next_pc ? hex_next_PC : hex_gpr[HEX_REG_PC]; if (ctx->pkt->pkt_has_multi_cof) { /* If there are multiple branches in a packet, ignore the second one */ - tcg_gen_movcond_tl(TCG_COND_NE, hex_gpr[HEX_REG_PC], + tcg_gen_movcond_tl(TCG_COND_NE, PC_wr, ctx->branch_taken, tcg_constant_tl(0), - hex_gpr[HEX_REG_PC], addr); + PC_wr, addr); tcg_gen_movi_tl(ctx->branch_taken, 1); } else { - tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], addr); + tcg_gen_mov_tl(PC_wr, addr); } if (cond != TCG_COND_ALWAYS) { diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 475726388a3f2..d4b22acb729f6 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -49,6 +49,7 @@ static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = { TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; TCGv hex_pred[NUM_PREGS]; TCGv hex_slot_cancelled; +TCGv hex_next_PC; TCGv hex_new_value_usr; TCGv hex_store_addr[STORES_MAX]; TCGv hex_store_width[STORES_MAX]; @@ -61,12 +62,14 @@ TCGv_i64 hex_cycle_count; TCGv hex_vstore_addr[VSTORES_MAX]; TCGv hex_vstore_size[VSTORES_MAX]; TCGv hex_vstore_pending[VSTORES_MAX]; +static bool need_next_PC(DisasContext *ctx); #ifndef CONFIG_USER_ONLY TCGv hex_greg[NUM_GREGS]; TCGv hex_t_sreg[NUM_SREGS]; TCGv_ptr hex_g_sreg_ptr; TCGv hex_g_sreg[NUM_SREGS]; +TCGv hex_cause_code; #endif static const char * const hexagon_prednames[] = { @@ -184,6 +187,9 @@ static void gen_end_tb(DisasContext *ctx) gen_exec_counters(ctx); + if (ctx->need_next_pc) { + tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC); + } if (ctx->branch_cond != TCG_COND_NEVER) { if (ctx->branch_cond != TCG_COND_ALWAYS) { TCGLabel *skip = gen_new_label(); @@ -371,18 +377,24 @@ static bool pkt_ends_tb(Packet *pkt) static bool need_next_PC(DisasContext *ctx) { Packet *pkt = ctx->pkt; - - /* Check for conditional control flow or HW loop end */ - for (int i = 0; i < pkt->num_insns; i++) { - uint16_t opcode = pkt->insn[i].opcode; - if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) { - return true; - } - if (GET_ATTRIB(opcode, A_HWLOOP0_END) || - GET_ATTRIB(opcode, A_HWLOOP1_END)) { - return true; + if (pkt->pkt_has_cof || ctx->pkt_ends_tb) { + for (int i = 0; i < pkt->num_insns; i++) { + uint16_t opcode = pkt->insn[i].opcode; + if ((GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) || + GET_ATTRIB(opcode, A_HWLOOP0_END) || + GET_ATTRIB(opcode, A_HWLOOP1_END)) { + return true; + } } } + /* + * We end the TB on some instructions that do not change the flow (for + * other reasons). In these cases, we must set pc too, as the insn won't + * do it themselves. + */ + if (ctx->pkt_ends_tb && !check_for_attrib(pkt, A_COF)) { + return true; + } return false; } @@ -523,7 +535,14 @@ static void analyze_packet(DisasContext *ctx) static void gen_start_packet(DisasContext *ctx) { Packet *pkt = ctx->pkt; +#ifndef CONFIG_USER_ONLY + target_ulong next_PC = (check_for_opcode(pkt, Y2_k0lock) || + check_for_opcode(pkt, Y2_tlblock)) ? + ctx->base.pc_next : + ctx->base.pc_next + pkt->encod_pkt_size_in_bytes; +#else target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes; +#endif int i; /* Clear out the disassembly context */ @@ -531,6 +550,10 @@ static void gen_start_packet(DisasContext *ctx) ctx->reg_log_idx = 0; bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); +#ifndef CONFIG_USER_ONLY + ctx->greg_log_idx = 0; + ctx->sreg_log_idx = 0; +#endif ctx->preg_log_idx = 0; bitmap_zero(ctx->pregs_written, NUM_PREGS); ctx->future_vregs_idx = 0; @@ -563,21 +586,41 @@ static void gen_start_packet(DisasContext *ctx) * gen phase, so clear it again. */ bitmap_zero(ctx->pregs_written, NUM_PREGS); +#ifndef CONFIG_USER_ONLY + for (i = 0; i < NUM_SREGS; i++) { + ctx->t_sreg_new_value[i] = NULL; + } + for (i = 0; i < ctx->sreg_log_idx; i++) { + int reg_num = ctx->sreg_log[i]; + if (reg_num < HEX_SREG_GLB_START) { + ctx->t_sreg_new_value[reg_num] = tcg_temp_new(); + tcg_gen_mov_tl(ctx->t_sreg_new_value[reg_num], hex_t_sreg[reg_num]); + } + } + for (i = 0; i < NUM_GREGS; i++) { + ctx->greg_new_value[i] = NULL; + } + for (i = 0; i < ctx->greg_log_idx; i++) { + int reg_num = ctx->greg_log[i]; + ctx->greg_new_value[reg_num] = tcg_temp_new(); + } +#endif /* Initialize the runtime state for packet semantics */ if (need_slot_cancelled(pkt)) { tcg_gen_movi_tl(hex_slot_cancelled, 0); } ctx->branch_taken = NULL; - ctx->pkt_ends_tb = pkt_ends_tb(pkt); if (pkt->pkt_has_cof) { ctx->branch_taken = tcg_temp_new(); - if (pkt->pkt_has_multi_cof) { - tcg_gen_movi_tl(ctx->branch_taken, 0); - } - if (need_next_PC(ctx)) { - tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC); - } + } + if (pkt->pkt_has_multi_cof) { + tcg_gen_movi_tl(ctx->branch_taken, 0); + } + ctx->pkt_ends_tb = pkt_ends_tb(pkt); + ctx->need_next_pc = need_next_PC(ctx); + if (ctx->need_next_pc) { + tcg_gen_movi_tl(hex_next_PC, next_PC); } /* Preload the predicated registers into get_result_gpr(ctx, i) */ @@ -713,6 +756,59 @@ static void gen_reg_writes(DisasContext *ctx) } } +#ifndef CONFIG_USER_ONLY +static void gen_greg_writes(DisasContext *ctx) +{ + int i; + + for (i = 0; i < ctx->greg_log_idx; i++) { + int reg_num = ctx->greg_log[i]; + + tcg_gen_mov_tl(hex_greg[reg_num], ctx->greg_new_value[reg_num]); + } +} + + +static void gen_sreg_writes(DisasContext *ctx) +{ + int i; + + TCGv old_reg = tcg_temp_new(); + for (i = 0; i < ctx->sreg_log_idx; i++) { + int reg_num = ctx->sreg_log[i]; + + if (reg_num == HEX_SREG_SSR) { + tcg_gen_mov_tl(old_reg, hex_t_sreg[reg_num]); + tcg_gen_mov_tl(hex_t_sreg[reg_num], ctx->t_sreg_new_value[reg_num]); + gen_helper_modify_ssr(tcg_env, ctx->t_sreg_new_value[reg_num], + old_reg); + /* This can change processor state, so end the TB */ + ctx->base.is_jmp = DISAS_NORETURN; + } else if ((reg_num == HEX_SREG_STID) || + (reg_num == HEX_SREG_IMASK) || + (reg_num == HEX_SREG_IPENDAD)) { + if (reg_num < HEX_SREG_GLB_START) { + tcg_gen_mov_tl(old_reg, hex_t_sreg[reg_num]); + tcg_gen_mov_tl(hex_t_sreg[reg_num], + ctx->t_sreg_new_value[reg_num]); + } + /* This can change the interrupt state, so end the TB */ + gen_helper_pending_interrupt(tcg_env); + ctx->base.is_jmp = DISAS_NORETURN; + } else if ((reg_num == HEX_SREG_BESTWAIT) || + (reg_num == HEX_SREG_SCHEDCFG)) { + /* This can trigger resched interrupt, so end the TB */ + gen_helper_resched(tcg_env); + ctx->base.is_jmp = DISAS_NORETURN; + } + + if (reg_num < HEX_SREG_GLB_START) { + tcg_gen_mov_tl(hex_t_sreg[reg_num], ctx->t_sreg_new_value[reg_num]); + } + } +} +#endif + static void gen_pred_writes(DisasContext *ctx) { /* Early exit if not needed or the log is empty */ @@ -1012,6 +1108,10 @@ static void gen_commit_packet(DisasContext *ctx) process_store_log(ctx); gen_reg_writes(ctx); +#if !defined(CONFIG_USER_ONLY) + gen_greg_writes(ctx); + gen_sreg_writes(ctx); +#endif gen_pred_writes(ctx); if (pkt->pkt_has_hvx) { gen_commit_hvx(ctx); @@ -1073,6 +1173,7 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); ctx->short_circuit = hex_cpu->short_circuit; ctx->pcycle_enabled = FIELD_EX32(hex_flags, TB_FLAGS, PCYCLE_ENABLED); + ctx->need_next_pc = false; } static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@ -1201,6 +1302,13 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64"); hex_cycle_count = tcg_global_mem_new_i64(tcg_env, offsetof(CPUHexagonState, t_cycle_count), "t_cycle_count"); +#ifndef CONFIG_USER_ONLY + hex_cause_code = tcg_global_mem_new(tcg_env, + offsetof(CPUHexagonState, cause_code), "cause_code"); +#endif + hex_next_PC = tcg_global_mem_new(tcg_env, + offsetof(CPUHexagonState, next_PC), "next_PC"); + for (i = 0; i < STORES_MAX; i++) { snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i); hex_store_addr[i] = tcg_global_mem_new(tcg_env, diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index c9533fee1f5cc..ad1a2f4045347 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -85,6 +85,7 @@ typedef struct DisasContext { TCGv dczero_addr; bool pcycle_enabled; bool pkt_ends_tb; + bool need_next_pc; uint32_t num_cycles; } DisasContext; @@ -306,6 +307,7 @@ static inline void ctx_log_qreg_read(DisasContext *ctx, } extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; +extern TCGv hex_next_PC; extern TCGv hex_pred[NUM_PREGS]; extern TCGv hex_slot_cancelled; extern TCGv hex_new_value_usr; From 64c21be0938ede910d385c07ce22a6931b2b31f0 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 9 Sep 2024 10:48:04 -0700 Subject: [PATCH 072/126] target/hexagon: Add implicit sysreg writes Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/translate.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index d4b22acb729f6..ff881d10602bb 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -426,6 +426,16 @@ static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum) } } +#ifndef CONFIG_USER_ONLY +static void mark_implicit_sreg_write(DisasContext *ctx, int attrib, int snum) +{ + uint16_t opcode = ctx->insn->opcode; + if (GET_ATTRIB(opcode, attrib)) { + ctx_log_sreg_write(ctx, snum); + } +} +#endif + static void mark_implicit_reg_writes(DisasContext *ctx) { mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP, HEX_REG_FP); @@ -437,6 +447,12 @@ static void mark_implicit_reg_writes(DisasContext *ctx) mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1); mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR); mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR); + +#ifndef CONFIG_USER_ONLY + mark_implicit_sreg_write(ctx, A_IMPLICIT_WRITES_SGP0, HEX_SREG_SGP0); + mark_implicit_sreg_write(ctx, A_IMPLICIT_WRITES_SGP1, HEX_SREG_SGP1); + mark_implicit_sreg_write(ctx, A_IMPLICIT_WRITES_SSR, HEX_SREG_SSR); +#endif } static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum) From 9e5aa94e17945f5c60daa51ac6ecdd9455c4a9f7 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 11 Sep 2024 14:05:59 -0700 Subject: [PATCH 073/126] target/hexagon: Define system, guest reg names Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 29 +++++++++++++++++++++++++++++ target/hexagon/internal.h | 2 ++ 2 files changed, 31 insertions(+) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 76a8cd5b2de8f..80f388e51c81a 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -85,6 +85,35 @@ const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = { "c24", "c25", "c26", "c27", "c28", "c29", "c30", "c31", }; +#ifndef CONFIG_USER_ONLY +const char * const hexagon_sregnames[] = { + "sgp0", "sgp1", "stid", "elr", "badva0", + "badva1", "ssr", "ccr", "htid", "badva", + "imask", "gevb", "vwctrl", "s13", "s14", + "s15", "evb", "modectl", "syscfg", "segment", + "ipendad", "vid", "vid1", "bestwait", "s24", + "schedcfg", "s26", "cfgbase", "diag", "rev", + "pcyclelo", "pcyclehi", "isdbst", "isdbcfg0", "isdbcfg1", + "livelock", "brkptpc0", "brkptcfg0", "brkptpc1", "brkptcfg1", + "isdbmbxin", "isdbmbxout", "isdben", "isdbgpr", "pmucnt4", + "pmucnt5", "pmucnt6", "pmucnt7", "pmucnt0", "pmucnt1", + "pmucnt2", "pmucnt3", "pmuevtcfg", "pmustid0", "pmuevtcfg1", + "pmustid1", "timerlo", "timerhi", "pmucfg", "rgdr2", + "rgdr", "turkey", "duck", "chicken", +}; + +G_STATIC_ASSERT(NUM_SREGS == ARRAY_SIZE(hexagon_sregnames)); + +const char * const hexagon_gregnames[] = { + "gelr", "gsr", "gosp", "gbadva", "gcommit1t", + "gcommit2t", "gcommit3t", "gcommit4t", "gcommit5t", "gcommit6t", + "gpcycle1t", "gpcycle2t", "gpcycle3t", "gpcycle4t", "gpcycle5t", + "gpcycle6t", "gpmucnt4", "gpmucnt5", "gpmucnt6", "gpmucnt7", + "gcommit7t", "gcommit8t", "gpcycle7t", "gpcycle8t", "gpcyclelo", + "gpcyclehi", "gpmucnt0", "gpmucnt1", "gpmucnt2", "gpmucnt3", + "g30", "g31", +}; +#endif /* * One of the main debugging techniques is to use "-d cpu" and compare against * LLDB output when single stepping. However, the target and qemu put the diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h index 120cfde7b9588..fd2397b9ef0eb 100644 --- a/target/hexagon/internal.h +++ b/target/hexagon/internal.h @@ -34,6 +34,8 @@ void hexagon_debug_qreg(CPUHexagonState *env, int regnum); void hexagon_debug(CPUHexagonState *env); extern const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS]; +extern const char * const hexagon_sregnames[]; +extern const char * const hexagon_gregnames[]; void G_NORETURN do_raise_exception(CPUHexagonState *env, uint32_t exception, From 59b249f492ad3ad0be8d2949caa7032716463819 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 11 Sep 2024 14:06:15 -0700 Subject: [PATCH 074/126] target/hexagon: initialize sys/guest reg TCGvs Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/translate.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index ff881d10602bb..248ed60f29868 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -1295,6 +1295,26 @@ void hexagon_translate_init(void) opcode_init(); +#ifndef CONFIG_USER_ONLY + for (i = 0; i < NUM_GREGS; i++) { + hex_greg[i] = tcg_global_mem_new(tcg_env, + offsetof(CPUHexagonState, greg[i]), + hexagon_gregnames[i]); + } + hex_g_sreg_ptr = tcg_global_mem_new_ptr(tcg_env, + offsetof(CPUHexagonState, g_sreg), "hex_g_sreg_ptr"); + for (i = 0; i < NUM_SREGS; i++) { + if (i < HEX_SREG_GLB_START) { + hex_t_sreg[i] = tcg_global_mem_new(tcg_env, + offsetof(CPUHexagonState, t_sreg[i]), + hexagon_sregnames[i]); + } else { + hex_g_sreg[i] = tcg_global_mem_new(hex_g_sreg_ptr, + i * sizeof(target_ulong), + hexagon_sregnames[i]); + } + } +#endif for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { hex_gpr[i] = tcg_global_mem_new(tcg_env, offsetof(CPUHexagonState, gpr[i]), From bca90008a0413972e8d0f9a28847f32301c3b150 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Thu, 12 Sep 2024 07:22:51 -0700 Subject: [PATCH 075/126] target/hexagon: Add TLB, k0 {un,}lock Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/op_helper.c | 104 ++++++++++++++++++++++++++++++++++++ target/hexagon/sys_macros.h | 8 +-- 2 files changed, 108 insertions(+), 4 deletions(-) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 702c3dd3c6438..f3b14fbf58f68 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1184,6 +1184,110 @@ void HELPER(modify_ssr)(CPUHexagonState *env, uint32_t new, uint32_t old) BQL_LOCK_GUARD(); hexagon_modify_ssr(env, new, old); } + +static void hex_k0_lock(CPUHexagonState *env) +{ + BQL_LOCK_GUARD(); + g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1)); + + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + if (GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg)) { + if (env->k0_lock_state == HEX_LOCK_QUEUED) { + env->next_PC += 4; + env->k0_lock_count++; + env->k0_lock_state = HEX_LOCK_OWNER; + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1); + return; + } + if (env->k0_lock_state == HEX_LOCK_OWNER) { + qemu_log_mask(LOG_GUEST_ERROR, + "Double k0lock at PC: 0x%x, thread may hang\n", + env->next_PC); + env->next_PC += 4; + CPUState *cs = env_cpu(env); + cpu_interrupt(cs, CPU_INTERRUPT_HALT); + return; + } + env->k0_lock_state = HEX_LOCK_WAITING; + CPUState *cs = env_cpu(env); + cpu_interrupt(cs, CPU_INTERRUPT_HALT); + } else { + env->next_PC += 4; + env->k0_lock_count++; + env->k0_lock_state = HEX_LOCK_OWNER; + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1); + } + +} + +static void hex_k0_unlock(CPUHexagonState *env) +{ + BQL_LOCK_GUARD(); + g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1)); + + /* Nothing to do if the k0 isn't locked by this thread */ + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + if ((GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg) == 0) || + (env->k0_lock_state != HEX_LOCK_OWNER)) { + qemu_log_mask(LOG_GUEST_ERROR, + "thread %d attempted to unlock k0 without having the " + "lock, k0_lock state = %d, syscfg:k0 = %d\n", + env->threadId, env->k0_lock_state, + GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg)); + g_assert(env->k0_lock_state != HEX_LOCK_WAITING); + return; + } + + env->k0_lock_count--; + env->k0_lock_state = HEX_LOCK_UNLOCKED; + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 0); + + /* Look for a thread to unlock */ + unsigned int this_threadId = env->threadId; + CPUHexagonState *unlock_thread = NULL; + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *thread = cpu_env(cs); + + /* + * The hardware implements round-robin fairness, so we look for threads + * starting at env->threadId + 1 and incrementing modulo the number of + * threads. + * + * To implement this, we check if thread is a earlier in the modulo + * sequence than unlock_thread. + * if unlock thread is higher than this thread + * thread must be between this thread and unlock_thread + * else + * thread higher than this thread is ahead of unlock_thread + * thread must be lower then unlock thread + */ + if (thread->k0_lock_state == HEX_LOCK_WAITING) { + if (!unlock_thread) { + unlock_thread = thread; + } else if (unlock_thread->threadId > this_threadId) { + if (this_threadId < thread->threadId && + thread->threadId < unlock_thread->threadId) { + unlock_thread = thread; + } + } else { + if (thread->threadId > this_threadId) { + unlock_thread = thread; + } + if (thread->threadId < unlock_thread->threadId) { + unlock_thread = thread; + } + } + } + } + if (unlock_thread) { + cs = env_cpu(unlock_thread); + unlock_thread->k0_lock_state = HEX_LOCK_QUEUED; + SET_SYSCFG_FIELD(unlock_thread, SYSCFG_K0LOCK, 1); + cpu_interrupt(cs, CPU_INTERRUPT_K0_UNLOCK); + } + +} #endif diff --git a/target/hexagon/sys_macros.h b/target/hexagon/sys_macros.h index 3c4c3c7aa5ece..e5dc1ce0ab9fd 100644 --- a/target/hexagon/sys_macros.h +++ b/target/hexagon/sys_macros.h @@ -143,11 +143,11 @@ #define fDCINVIDX(REG) #define fDCINVA(REG) do { REG = REG; } while (0) /* Nothing to do in qemu */ -#define fSET_TLB_LOCK() g_assert_not_reached() -#define fCLEAR_TLB_LOCK() g_assert_not_reached() +#define fSET_TLB_LOCK() hex_tlb_lock(env); +#define fCLEAR_TLB_LOCK() hex_tlb_unlock(env); -#define fSET_K0_LOCK() g_assert_not_reached() -#define fCLEAR_K0_LOCK() g_assert_not_reached() +#define fSET_K0_LOCK() hex_k0_lock(env); +#define fCLEAR_K0_LOCK() hex_k0_unlock(env); #define fTLB_IDXMASK(INDEX) \ ((INDEX) & (fPOW2_ROUNDUP(fCAST4u(env_archcpu(env)->num_tlbs)) - 1)) From d6574eb4ddafaad367b7fdd706cf35d1a7181778 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Thu, 12 Sep 2024 14:10:04 -0700 Subject: [PATCH 076/126] target/hexagon: Define gen_precise_exception() Add PC to raise_exception helper Replace the fGEN_TCG_J2_trap0 macro override with the fTRAP()-generated system helper instead. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/gen_tcg.h | 7 ------- target/hexagon/helper.h | 2 +- target/hexagon/op_helper.c | 10 ++++------ target/hexagon/translate.c | 13 ++++++++----- 4 files changed, 13 insertions(+), 19 deletions(-) diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 71f8a0e2d0848..146aadc737643 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -1370,13 +1370,6 @@ #define fGEN_TCG_S2_storew_rl_st_vi(SHORTCODE) SHORTCODE #define fGEN_TCG_S4_stored_rl_st_vi(SHORTCODE) SHORTCODE -#define fGEN_TCG_J2_trap0(SHORTCODE) \ - do { \ - uiV = uiV; \ - tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->pkt->pc); \ - TCGv excp = tcg_constant_tl(HEX_EVENT_TRAP0); \ - gen_helper_raise_exception(tcg_env, excp); \ - } while (0) #endif #define fGEN_TCG_A2_nop(SHORTCODE) do { } while (0) diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index 3df663baeb057..5bcb2f48097cf 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -18,7 +18,7 @@ #include "internal.h" #include "helper_protos_generated.h.inc" -DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32) +DEF_HELPER_FLAGS_3(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32, i32) DEF_HELPER_2(commit_store, void, env, int) DEF_HELPER_3(gather_store, void, env, i32, int) DEF_HELPER_1(commit_hvx_stores, void, env) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index f3b14fbf58f68..3bd4e2a872339 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -69,15 +69,13 @@ G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env, uint32_t exception, uintptr_t pc) { - CPUState *cs = env_cpu(env); - qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception); - cs->exception_index = exception; - cpu_loop_exit_restore(cs, pc); + do_raise_exception(env, exception, pc, 0); } -G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp) +G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp, + target_ulong PC) { - hexagon_raise_exception_err(env, excp, 0); + hexagon_raise_exception_err(env, excp, PC); } void log_store32(CPUHexagonState *env, target_ulong addr, diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index 248ed60f29868..f4133a10490ee 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -124,9 +124,10 @@ intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum, return offset; } -static void gen_exception_raw(int excp) +static void gen_exception(int excp, target_ulong PC) { - gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp)); + gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp), + tcg_constant_tl(PC)); } #ifndef CONFIG_USER_ONLY @@ -221,9 +222,11 @@ static void gen_end_tb(DisasContext *ctx) void hex_gen_exception_end_tb(DisasContext *ctx, int excp) { - gen_exec_counters(ctx); - tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC); - gen_exception_raw(excp); +#ifdef CONFIG_USER_ONLY + gen_exception(excp, ctx->pkt->pc); +#else + gen_precise_exception(excp, ctx->pkt->pc); +#endif ctx->base.is_jmp = DISAS_NORETURN; } From 78b5d547b2ef61abeb2c371de6cbb294e5b8054f Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 17 Sep 2024 18:15:07 -0700 Subject: [PATCH 077/126] target/hexagon: Add TCG overrides for transfer insts Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/gen_tcg_sys.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/target/hexagon/gen_tcg_sys.h b/target/hexagon/gen_tcg_sys.h index 6d73a18db455b..e56553462fb06 100644 --- a/target/hexagon/gen_tcg_sys.h +++ b/target/hexagon/gen_tcg_sys.h @@ -81,6 +81,31 @@ gen_helper_stop(tcg_env); \ } while (0) +#define fGEN_TCG_Y2_tfrscrr(SHORTCODE) \ + tcg_gen_mov_tl(RdV, SsV) + +#define fGEN_TCG_Y2_tfrsrcr(SHORTCODE) \ + tcg_gen_mov_tl(SdV, RsV) + +#define fGEN_TCG_Y4_tfrscpp(SHORTCODE) \ + tcg_gen_mov_i64(RddV, SssV) + +#define fGEN_TCG_Y4_tfrspcp(SHORTCODE) \ + tcg_gen_mov_i64(SddV, RssV) + +#define fGEN_TCG_G4_tfrgcrr(SHORTCODE) \ + tcg_gen_mov_tl(RdV, GsV) + +#define fGEN_TCG_G4_tfrgrcr(SHORTCODE) \ + tcg_gen_mov_tl(GdV, RsV) + +#define fGEN_TCG_G4_tfrgcpp(SHORTCODE) \ + tcg_gen_mov_i64(RddV, GssV) + +#define fGEN_TCG_G4_tfrgpcp(SHORTCODE) \ + tcg_gen_mov_i64(GddV, RssV) + + /* * rte (return from exception) * Clear the EX bit in SSR From 279648899a98452d38824db46e217f1be825193d Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Wed, 18 Sep 2024 09:33:45 -0700 Subject: [PATCH 078/126] target/hexagon: Add support for loadw_phys Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/hex_common.py | 3 +++ target/hexagon/imported/encode_pp.def | 1 + target/hexagon/imported/ldst.idef | 3 +++ 3 files changed, 7 insertions(+) diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index 8b738ca5494ed..6e4dd8d1c8aa2 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -266,6 +266,9 @@ def need_slot(tag): and "A_CVI_GATHER" not in attribdict[tag] and ("A_STORE" in attribdict[tag] or "A_LOAD" in attribdict[tag]) + and tag != "L4_loadw_phys" + and tag != "L6_memcpy" + and tag != "Y6_dmlink" ): return 1 else: diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index 37faf62b1b7ea..41e4ab9e3a268 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -388,6 +388,7 @@ DEF_ENC32(L4_return_fnew_pnt, ICLASS_LD" 011 0 000 sssss PP1010vv ---ddddd") /** Load Acquire Store Release Encoding **/ +DEF_ENC32(L4_loadw_phys, ICLASS_LD" 001 0 000 sssss PP1ttttt -00ddddd") DEF_ENC32(L2_loadw_locked, ICLASS_LD" 001 0 000 sssss PP000--- 000ddddd") DEF_ENC32(L4_loadd_locked, ICLASS_LD" 001 0 000 sssss PP010--- 000ddddd") diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef index 53198176a994e..4e1e5d5326ddb 100644 --- a/target/hexagon/imported/ldst.idef +++ b/target/hexagon/imported/ldst.idef @@ -203,6 +203,9 @@ Q6INSN(S2_storew_locked,"memw_locked(Rs32,Pd4)=Rt32", ATTRIBS(A_REGWRSIZE_4B,A_M Q6INSN(L4_loadd_locked,"Rdd32=memd_locked(Rs32)", ATTRIBS(A_REGWRSIZE_8B,A_MEMSIZE_8B,A_LOAD,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK), "Load double with lock", { fEA_REG(RsV); fLOAD_LOCKED(1,8,u,EA,RddV) }) +Q6INSN(L4_loadw_phys,"Rd32=memw_phys(Rs32,Rt32)", ATTRIBS(A_REGWRSIZE_4B,A_PRIV,A_RESTRICT_SLOT0ONLY,A_NOTE_PRIV,A_MEMSIZE_4B,A_LOAD,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Load word from physical address", +{ fLOAD_PHYS(1,4,u,RsV,RtV,RdV); }) + Q6INSN(S4_stored_locked,"memd_locked(Rs32,Pd4)=Rtt32", ATTRIBS(A_REGWRSIZE_8B,A_MEMSIZE_8B,A_STORE,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_LATEPRED,A_NOTE_LATEPRED), "Store word with lock", { fEA_REG(RsV); fSTORE_LOCKED(1,8,EA,RttV,PdV) }) From 546bd235ec2ef7c31317f5544f67367d93041684 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Fri, 6 Dec 2024 10:30:26 -0300 Subject: [PATCH 079/126] target/hexagon: Add guest reg reading functionality Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- target/hexagon/cpu.c | 19 ++++++++++++++++++- target/hexagon/op_helper.c | 19 +++++++++++++++++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 80f388e51c81a..2913d7e8264a7 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -739,7 +739,24 @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data) #ifndef CONFIG_USER_ONLY uint32_t hexagon_greg_read(CPUHexagonState *env, uint32_t reg) { - g_assert_not_reached(); + target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR); + int ssr_ce = GET_SSR_FIELD(SSR_CE, ssr); + + if (reg <= HEX_GREG_G3) { + return env->greg[reg]; + } + switch (reg) { + case HEX_GREG_GPCYCLELO: + return ssr_ce ? hexagon_get_sys_pcycle_count_low(env) : 0; + + case HEX_GREG_GPCYCLEHI: + return ssr_ce ? hexagon_get_sys_pcycle_count_high(env) : 0; + + default: + qemu_log_mask(LOG_UNIMP, "reading greg %" PRId32 + " not yet supported.\n", reg); + return 0; + } } #endif diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 3bd4e2a872339..28b555e87375b 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1877,13 +1877,28 @@ uint64_t HELPER(sreg_read_pair)(CPUHexagonState *env, uint32_t reg) } uint32_t HELPER(greg_read)(CPUHexagonState *env, uint32_t reg) + { - g_assert_not_reached(); + return hexagon_greg_read(env, reg); } uint64_t HELPER(greg_read_pair)(CPUHexagonState *env, uint32_t reg) + { - g_assert_not_reached(); + if (reg == HEX_GREG_G0 || reg == HEX_GREG_G2) { + return (uint64_t)(env->greg[reg]) | + (((uint64_t)(env->greg[reg + 1])) << 32); + } + switch (reg) { + case HEX_GREG_GPCYCLELO: { + target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR); + int ssr_ce = GET_SSR_FIELD(SSR_CE, ssr); + return ssr_ce ? hexagon_get_sys_pcycle_count(env) : 0; + } + default: + return (uint64_t)hexagon_greg_read(env, reg) | + ((uint64_t)(hexagon_greg_read(env, reg + 1)) << 32); + } } void HELPER(setprio)(CPUHexagonState *env, uint32_t thread, uint32_t prio) From b231c250b38655043ebccdf55d3a2b040237f612 Mon Sep 17 00:00:00 2001 From: Brian Cain <brian.cain@oss.qualcomm.com> Date: Wed, 11 Dec 2024 14:08:53 -0300 Subject: [PATCH 080/126] target/hexagon: Add pcycle setting functionality Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- target/hexagon/cpu.c | 10 +++++++--- target/hexagon/cpu_helper.c | 17 ++++++++++++++--- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 2913d7e8264a7..05b358dd501ca 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -440,19 +440,23 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp) #endif qemu_init_vcpu(cs); -#ifndef CONFIG_USER_ONLY CPUHexagonState *env = cpu_env(cs); +#ifndef CONFIG_USER_ONLY hex_mmu_realize(env); if (cs->cpu_index == 0) { env->g_sreg = g_new0(target_ulong, NUM_SREGS); - env->g_pcycle_base = g_malloc0(sizeof(*env->g_pcycle_base)); } else { CPUState *cpu0 = qemu_get_cpu(0); CPUHexagonState *env0 = cpu_env(cpu0); env->g_sreg = env0->g_sreg; - env->g_pcycle_base = env0->g_pcycle_base; } #endif + if (cs->cpu_index == 0) { + env->g_pcycle_base = g_malloc0(sizeof(*env->g_pcycle_base)); + } else { + CPUState *cpu0 = qemu_get_cpu(0); + env->g_pcycle_base = cpu_env(cpu0)->g_pcycle_base; + } mcc->parent_realize(dev, errp); } diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index 9c44cb79505a5..08c749e9fa9b8 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -70,18 +70,29 @@ uint32_t hexagon_get_sys_pcycle_count_low(CPUHexagonState *env) void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env, uint32_t cycles_hi) { - g_assert_not_reached(); + uint64_t cur_cycles = hexagon_get_sys_pcycle_count(env); + uint64_t cycles = + ((uint64_t)cycles_hi << 32) | extract64(cur_cycles, 0, 32); + hexagon_set_sys_pcycle_count(env, cycles); } void hexagon_set_sys_pcycle_count_low(CPUHexagonState *env, uint32_t cycles_lo) { - g_assert_not_reached(); + uint64_t cur_cycles = hexagon_get_sys_pcycle_count(env); + uint64_t cycles = extract64(cur_cycles, 32, 32) | cycles_lo; + hexagon_set_sys_pcycle_count(env, cycles); } void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t cycles) { - g_assert_not_reached(); + *(env->g_pcycle_base) = cycles; + + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *env_ = cpu_env(cs); + env_->t_cycle_count = 0; + } } static void set_wait_mode(CPUHexagonState *env) From 5b5d143f56eedbc338ffc6ecfeb84ad8e225254b Mon Sep 17 00:00:00 2001 From: Sid Manning <sidneym@quicinc.com> Date: Tue, 7 Nov 2023 19:44:05 -0800 Subject: [PATCH 081/126] hw/intc: Add l2vic interrupt controller Co-authored-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Co-authored-by: Damien Hedde <damien.hedde@dahe.fr> Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- MAINTAINERS | 2 + docs/devel/hexagon-l2vic.rst | 59 +++++ docs/devel/index-internals.rst | 1 + hw/intc/Kconfig | 3 + hw/intc/l2vic.c | 417 +++++++++++++++++++++++++++++++++ hw/intc/meson.build | 2 + hw/intc/trace-events | 4 + include/hw/intc/l2vic.h | 37 +++ 8 files changed, 525 insertions(+) create mode 100644 docs/devel/hexagon-l2vic.rst create mode 100644 hw/intc/l2vic.c create mode 100644 include/hw/intc/l2vic.h diff --git a/MAINTAINERS b/MAINTAINERS index c343ef6c13e09..65c24f6086823 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -232,6 +232,7 @@ Hexagon TCG CPUs M: Brian Cain <brian.cain@oss.qualcomm.com> S: Supported F: target/hexagon/ +F: hw/intc/l2vic.[ch] X: target/hexagon/idef-parser/ X: target/hexagon/gen_idef_parser_funcs.py F: linux-user/hexagon/ @@ -242,6 +243,7 @@ F: docker/dockerfiles/debian-hexagon-cross.docker F: gdb-xml/hexagon*.xml F: docs/system/target-hexagon.rst F: docs/devel/hexagon-sys.rst +F: docs/devel/hexagon-l2vic.rst T: git https://github.com/quic/qemu.git hex-next Hexagon idef-parser diff --git a/docs/devel/hexagon-l2vic.rst b/docs/devel/hexagon-l2vic.rst new file mode 100644 index 0000000000000..088563627445d --- /dev/null +++ b/docs/devel/hexagon-l2vic.rst @@ -0,0 +1,59 @@ +Hexagon L2 Vectored Interrupt Controller +======================================== + + +.. code-block:: none + + +-------+ + | | +----------------+ + | l2vic | | hexagon core | + | | | | + | +-----| | | + ------> |VID0 >------------->irq2 -\ | + ------> | | | | | + ... > | | | | | + ------> | | | <int steering> | + | +-----| | / | | \ | + | ... | | | | | | | + | +-----| | t0 t1 t2 t3 ...| + ------> |VIDN | | | + ------> | | | | + ------> | | | | + ------> | | | | + | +-----| | | + | | |Global SREG File| + | State | | | + | [ ]|<============|=>[VID ] | + | [ ]|<============|=>[VID1] | + | [ ]| | | + | [ ]| | | + | | | | + +-------+ +----------------+ + +L2VIC/Core Integration +---------------------- + +* hexagon core supports 8 external interrupt sources +* l2vic supports 1024 input interrupts mapped among 4 output interrupts +* l2vic has four output signals: { VID0, VID1, VID2, VID3 } +* l2vic device has a bank of registers per-VID that can be used to query + the status or assert new interrupts. +* Interrupts are 'steered' to threads based on { thread priority, 'EX' state, + thread interrupt mask, thread interrupt enable, global interrupt enable, + etc. }. +* Any hardware thread could conceivably handle any input interrupt, dependent + on state. +* The system register transfer instruction can read the VID0-VID3 values from + the l2vic when reading from hexagon core system registers "VID" and "VID1". +* When l2vic VID0 has multiple active interrupts, it pulses the VID0 output + IRQ and stores the IRQ number for the VID0 register field. Only after this + interrupt is cleared can the l2vic pulse the VID0 output IRQ again and provide + the next interrupt number on the VID0 register. +* The ``ciad`` instruction clears the l2vic input interrupt and un-disables the + core interrupt. If some/an l2vic VID0 interrupt is pending when this occurs, + the next interrupt should fire and any subseqeunt reads of the VID register + should reflect the newly raised interrupt. +* In QEMU, on an external interrupt or an unmasked-pending interrupt, + all vCPUs are triggered (has_work==true) and each will grab the IO lock + while considering the steering logic to determine whether they're the thread + that must handle the interrupt. diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst index 0471db80645f7..6620497595abe 100644 --- a/docs/devel/index-internals.rst +++ b/docs/devel/index-internals.rst @@ -15,6 +15,7 @@ Details about QEMU's various subsystems including how to add features to them. clocks ebpf_rss hexagon-sys + hexagon-l2vic migration/index multi-process reset diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig index 7547528f2c27b..a5b136e2fa72f 100644 --- a/hw/intc/Kconfig +++ b/hw/intc/Kconfig @@ -8,6 +8,9 @@ config I8259 config PL190 bool +config L2VIC + bool + config IOAPIC bool select I8259 diff --git a/hw/intc/l2vic.c b/hw/intc/l2vic.c new file mode 100644 index 0000000000000..9df657521407d --- /dev/null +++ b/hw/intc/l2vic.c @@ -0,0 +1,417 @@ +/* + * QEMU L2VIC Interrupt Controller + * + * Arm PrimeCell PL190 Vector Interrupt Controller was used as a reference. + * Copyright(c) 2020-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/irq.h" +#include "hw/sysbus.h" +#include "migration/vmstate.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "hw/intc/l2vic.h" +#include "trace.h" + +#define L2VICA(s, n) (s[(n) >> 2]) + +#define TYPE_L2VIC "l2vic" +#define L2VIC(obj) OBJECT_CHECK(L2VICState, (obj), TYPE_L2VIC) + +#define SLICE_MAX (L2VIC_INTERRUPT_MAX / 32) + +typedef struct L2VICState { + SysBusDevice parent_obj; + + QemuMutex active; + MemoryRegion iomem; + MemoryRegion fast_iomem; + uint32_t level; + /* + * offset 0:vid group 0 etc, 10 bits in each group + * are used: + */ + uint32_t vid_group[4]; + uint32_t vid0; + /* Clear Status of Active Edge interrupt, not used: */ + uint32_t int_clear[SLICE_MAX] QEMU_ALIGNED(16); + /* Enable interrupt source */ + uint32_t int_enable[SLICE_MAX] QEMU_ALIGNED(16); + /* Clear (set to 0) corresponding bit in int_enable */ + uint32_t int_enable_clear; + /* Set (to 1) corresponding bit in int_enable */ + uint32_t int_enable_set; + /* Present for debugging, not used */ + uint32_t int_pending[SLICE_MAX] QEMU_ALIGNED(16); + /* Generate an interrupt */ + uint32_t int_soft; + /* Which enabled interrupt is active */ + uint32_t int_status[SLICE_MAX] QEMU_ALIGNED(16); + /* Edge or Level interrupt */ + uint32_t int_type[SLICE_MAX] QEMU_ALIGNED(16); + /* L2 interrupt group 0-3 0x600-0x7FF */ + uint32_t int_group_n0[SLICE_MAX] QEMU_ALIGNED(16); + uint32_t int_group_n1[SLICE_MAX] QEMU_ALIGNED(16); + uint32_t int_group_n2[SLICE_MAX] QEMU_ALIGNED(16); + uint32_t int_group_n3[SLICE_MAX] QEMU_ALIGNED(16); + qemu_irq irq[8]; +} L2VICState; + + +/* + * Find out if this irq is associated with a group other than + * the default group + */ +static uint32_t *get_int_group(L2VICState *s, int irq) +{ + int n = irq & 0x1f; + if (n < 8) { + return s->int_group_n0; + } + if (n < 16) { + return s->int_group_n1; + } + if (n < 24) { + return s->int_group_n2; + } + return s->int_group_n3; +} + +static int find_slice(int irq) +{ + return irq / 32; +} + +static int get_vid(L2VICState *s, int irq) +{ + uint32_t *group = get_int_group(s, irq); + uint32_t slice = group[find_slice(irq)]; + /* Mask with 0x7 to remove the GRP:EN bit */ + uint32_t val = slice >> ((irq & 0x7) * 4); + if (val & 0x8) { + return val & 0x7; + } else { + return 0; + } +} + +static inline bool vid_active(L2VICState *s) + +{ + /* scan all 1024 bits in int_status arrary */ + const int size = sizeof(s->int_status) * CHAR_BIT; + const int active_irq = find_first_bit((unsigned long *)s->int_status, size); + return ((active_irq != size)) ? true : false; +} + +static bool l2vic_update(L2VICState *s, int irq) +{ + if (vid_active(s)) { + return true; + } + + bool pending = test_bit(irq, (unsigned long *)s->int_pending); + bool enable = test_bit(irq, (unsigned long *)s->int_enable); + if (pending && enable) { + int vid = get_vid(s, irq); + set_bit(irq, (unsigned long *)s->int_status); + clear_bit(irq, (unsigned long *)s->int_pending); + clear_bit(irq, (unsigned long *)s->int_enable); + /* ensure the irq line goes low after going high */ + s->vid0 = irq; + s->vid_group[get_vid(s, irq)] = irq; + + /* already low: now call pulse */ + /* pulse: calls qemu_upper() and then qemu_lower()) */ + qemu_irq_pulse(s->irq[vid + 2]); + trace_l2vic_delivered(irq, vid); + return true; + } + return false; +} + +static void l2vic_update_all(L2VICState *s) +{ + for (int i = 0; i < L2VIC_INTERRUPT_MAX; i++) { + if (l2vic_update(s, i) == true) { + /* once vid is active, no-one else can set it until ciad */ + return; + } + } +} + +static void l2vic_set_irq(void *opaque, int irq, int level) +{ + L2VICState *s = (L2VICState *)opaque; + if (level) { + qemu_mutex_lock(&s->active); + set_bit(irq, (unsigned long *)s->int_pending); + qemu_mutex_unlock(&s->active); + } + l2vic_update(s, irq); +} + +static void l2vic_write(void *opaque, hwaddr offset, uint64_t val, + unsigned size) +{ + L2VICState *s = (L2VICState *)opaque; + qemu_mutex_lock(&s->active); + trace_l2vic_reg_write((unsigned)offset, (uint32_t)val); + + if (offset == L2VIC_VID_0) { + if ((int)val != L2VIC_CIAD_INSTRUCTION) { + s->vid0 = val; + } else { + /* ciad issued: clear int_status */ + clear_bit(s->vid0, (unsigned long *)s->int_status); + } + } else if (offset >= L2VIC_INT_ENABLEn && + offset < (L2VIC_INT_ENABLE_CLEARn)) { + L2VICA(s->int_enable, offset - L2VIC_INT_ENABLEn) = val; + } else if (offset >= L2VIC_INT_ENABLE_CLEARn && + offset < L2VIC_INT_ENABLE_SETn) { + L2VICA(s->int_enable, offset - L2VIC_INT_ENABLE_CLEARn) &= ~val; + } else if (offset >= L2VIC_INT_ENABLE_SETn && offset < L2VIC_INT_TYPEn) { + L2VICA(s->int_enable, offset - L2VIC_INT_ENABLE_SETn) |= val; + } else if (offset >= L2VIC_INT_TYPEn && offset < L2VIC_INT_TYPEn + 0x80) { + L2VICA(s->int_type, offset - L2VIC_INT_TYPEn) = val; + } else if (offset >= L2VIC_INT_STATUSn && offset < L2VIC_INT_CLEARn) { + L2VICA(s->int_status, offset - L2VIC_INT_STATUSn) = val; + } else if (offset >= L2VIC_INT_CLEARn && offset < L2VIC_SOFT_INTn) { + L2VICA(s->int_clear, offset - L2VIC_INT_CLEARn) = val; + } else if (offset >= L2VIC_INT_PENDINGn && + offset < L2VIC_INT_PENDINGn + 0x80) { + L2VICA(s->int_pending, offset - L2VIC_INT_PENDINGn) = val; + } else if (offset >= L2VIC_SOFT_INTn && offset < L2VIC_INT_PENDINGn) { + L2VICA(s->int_enable, offset - L2VIC_SOFT_INTn) |= val; + /* + * Need to reverse engineer the actual irq number. + */ + int irq = find_first_bit((unsigned long *)&val, + sizeof(s->int_enable[0]) * CHAR_BIT); + hwaddr byteoffset = offset - L2VIC_SOFT_INTn; + g_assert(irq != sizeof(s->int_enable[0]) * CHAR_BIT); + irq += byteoffset * 8; + + /* The soft-int interface only works with edge-triggered interrupts */ + if (test_bit(irq, (unsigned long *)s->int_type)) { + qemu_mutex_unlock(&s->active); + l2vic_set_irq(opaque, irq, 1); + qemu_mutex_lock(&s->active); + } + } else if (offset >= L2VIC_INT_GRPn_0 && offset < L2VIC_INT_GRPn_1) { + L2VICA(s->int_group_n0, offset - L2VIC_INT_GRPn_0) = val; + } else if (offset >= L2VIC_INT_GRPn_1 && offset < L2VIC_INT_GRPn_2) { + L2VICA(s->int_group_n1, offset - L2VIC_INT_GRPn_1) = val; + } else if (offset >= L2VIC_INT_GRPn_2 && offset < L2VIC_INT_GRPn_3) { + L2VICA(s->int_group_n2, offset - L2VIC_INT_GRPn_2) = val; + } else if (offset >= L2VIC_INT_GRPn_3 && offset < L2VIC_INT_GRPn_3 + 0x80) { + L2VICA(s->int_group_n3, offset - L2VIC_INT_GRPn_3) = val; + } else { + qemu_log_mask(LOG_UNIMP, "%s: offset %x unimplemented\n", __func__, + (int)offset); + } + l2vic_update_all(s); + qemu_mutex_unlock(&s->active); + return; +} + +static uint64_t l2vic_read(void *opaque, hwaddr offset, unsigned size) +{ + uint64_t value; + L2VICState *s = (L2VICState *)opaque; + qemu_mutex_lock(&s->active); + + if (offset == L2VIC_VID_GRP_0) { + value = s->vid_group[0]; + } else if (offset == L2VIC_VID_GRP_1) { + value = s->vid_group[1]; + } else if (offset == L2VIC_VID_GRP_2) { + value = s->vid_group[2]; + } else if (offset == L2VIC_VID_GRP_3) { + value = s->vid_group[3]; + } else if (offset == L2VIC_VID_0) { + value = s->vid0; + } else if (offset >= L2VIC_INT_ENABLEn && + offset < L2VIC_INT_ENABLE_CLEARn) { + value = L2VICA(s->int_enable, offset - L2VIC_INT_ENABLEn); + } else if (offset >= L2VIC_INT_ENABLE_CLEARn && + offset < L2VIC_INT_ENABLE_SETn) { + value = 0; + } else if (offset >= L2VIC_INT_ENABLE_SETn && offset < L2VIC_INT_TYPEn) { + value = 0; + } else if (offset >= L2VIC_INT_TYPEn && offset < L2VIC_INT_TYPEn + 0x80) { + value = L2VICA(s->int_type, offset - L2VIC_INT_TYPEn); + } else if (offset >= L2VIC_INT_STATUSn && offset < L2VIC_INT_CLEARn) { + value = L2VICA(s->int_status, offset - L2VIC_INT_STATUSn); + } else if (offset >= L2VIC_INT_CLEARn && offset < L2VIC_SOFT_INTn) { + value = L2VICA(s->int_clear, offset - L2VIC_INT_CLEARn); + } else if (offset >= L2VIC_SOFT_INTn && offset < L2VIC_INT_PENDINGn) { + value = 0; + } else if (offset >= L2VIC_INT_PENDINGn && + offset < L2VIC_INT_PENDINGn + 0x80) { + value = L2VICA(s->int_pending, offset - L2VIC_INT_PENDINGn); + } else if (offset >= L2VIC_INT_GRPn_0 && offset < L2VIC_INT_GRPn_1) { + value = L2VICA(s->int_group_n0, offset - L2VIC_INT_GRPn_0); + } else if (offset >= L2VIC_INT_GRPn_1 && offset < L2VIC_INT_GRPn_2) { + value = L2VICA(s->int_group_n1, offset - L2VIC_INT_GRPn_1); + } else if (offset >= L2VIC_INT_GRPn_2 && offset < L2VIC_INT_GRPn_3) { + value = L2VICA(s->int_group_n2, offset - L2VIC_INT_GRPn_2); + } else if (offset >= L2VIC_INT_GRPn_3 && offset < L2VIC_INT_GRPn_3 + 0x80) { + value = L2VICA(s->int_group_n3, offset - L2VIC_INT_GRPn_3); + } else { + value = 0; + qemu_log_mask(LOG_GUEST_ERROR, "L2VIC: %s: offset 0x%x\n", __func__, + (int)offset); + } + + trace_l2vic_reg_read((unsigned)offset, value); + qemu_mutex_unlock(&s->active); + + return value; +} + +static const MemoryRegionOps l2vic_ops = { + .read = l2vic_read, + .write = l2vic_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, + .valid.unaligned = false, +}; + +#define FASTL2VIC_ENABLE 0x0 +#define FASTL2VIC_DISABLE 0x1 +#define FASTL2VIC_INT 0x2 + +static void fastl2vic_write(void *opaque, hwaddr offset, uint64_t val, + unsigned size) +{ + if (offset == 0) { + uint32_t cmd = (val >> 16) & 0x3; + uint32_t irq = val & 0x3ff; + uint32_t slice = (irq / 32) * 4; + val = 1 << (irq % 32); + + if (cmd == FASTL2VIC_ENABLE) { + l2vic_write(opaque, L2VIC_INT_ENABLE_SETn + slice, val, size); + } else if (cmd == FASTL2VIC_DISABLE) { + l2vic_write(opaque, L2VIC_INT_ENABLE_CLEARn + slice, val, size); + } else if (cmd == FASTL2VIC_INT) { + l2vic_write(opaque, L2VIC_SOFT_INTn + slice, val, size); + } + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid write cmd %" PRId32 "\n", + __func__, cmd); + return; + } + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid write offset 0x%08" HWADDR_PRIx + "\n", __func__, offset); +} + +static const MemoryRegionOps fastl2vic_ops = { + .write = fastl2vic_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, + .valid.unaligned = false, +}; + +static void l2vic_reset_hold(Object *obj, G_GNUC_UNUSED ResetType res_type) +{ + L2VICState *s = L2VIC(obj); + memset(s->int_clear, 0, sizeof(s->int_clear)); + memset(s->int_enable, 0, sizeof(s->int_enable)); + memset(s->int_pending, 0, sizeof(s->int_pending)); + memset(s->int_status, 0, sizeof(s->int_status)); + memset(s->int_type, 0, sizeof(s->int_type)); + memset(s->int_group_n0, 0, sizeof(s->int_group_n0)); + memset(s->int_group_n1, 0, sizeof(s->int_group_n1)); + memset(s->int_group_n2, 0, sizeof(s->int_group_n2)); + memset(s->int_group_n3, 0, sizeof(s->int_group_n3)); + s->int_soft = 0; + s->vid0 = 0; + + l2vic_update_all(s); +} + + +static void reset_irq_handler(void *opaque, int irq, int level) +{ + L2VICState *s = (L2VICState *)opaque; + Object *obj = OBJECT(opaque); + if (level) { + l2vic_reset_hold(obj, RESET_TYPE_COLD); + } + l2vic_update_all(s); +} + +static void l2vic_init(Object *obj) +{ + DeviceState *dev = DEVICE(obj); + L2VICState *s = L2VIC(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + int i; + + memory_region_init_io(&s->iomem, obj, &l2vic_ops, s, "l2vic", 0x1000); + sysbus_init_mmio(sbd, &s->iomem); + memory_region_init_io(&s->fast_iomem, obj, &fastl2vic_ops, s, "fast", + 0x10000); + sysbus_init_mmio(sbd, &s->fast_iomem); + + qdev_init_gpio_in(dev, l2vic_set_irq, L2VIC_INTERRUPT_MAX); + qdev_init_gpio_in_named(dev, reset_irq_handler, "reset", 1); + for (i = 0; i < 8; i++) { + sysbus_init_irq(sbd, &s->irq[i]); + } + qemu_mutex_init(&s->active); /* TODO: Remove this is an experiment */ +} + +static const VMStateDescription vmstate_l2vic = { + .name = "l2vic", + .version_id = 1, + .minimum_version_id = 1, + .fields = + (VMStateField[]){ + VMSTATE_UINT32(level, L2VICState), + VMSTATE_UINT32_ARRAY(vid_group, L2VICState, 4), + VMSTATE_UINT32(vid0, L2VICState), + VMSTATE_UINT32_ARRAY(int_enable, L2VICState, SLICE_MAX), + VMSTATE_UINT32(int_enable_clear, L2VICState), + VMSTATE_UINT32(int_enable_set, L2VICState), + VMSTATE_UINT32_ARRAY(int_type, L2VICState, SLICE_MAX), + VMSTATE_UINT32_ARRAY(int_status, L2VICState, SLICE_MAX), + VMSTATE_UINT32_ARRAY(int_clear, L2VICState, SLICE_MAX), + VMSTATE_UINT32(int_soft, L2VICState), + VMSTATE_UINT32_ARRAY(int_pending, L2VICState, SLICE_MAX), + VMSTATE_UINT32_ARRAY(int_group_n0, L2VICState, SLICE_MAX), + VMSTATE_UINT32_ARRAY(int_group_n1, L2VICState, SLICE_MAX), + VMSTATE_UINT32_ARRAY(int_group_n2, L2VICState, SLICE_MAX), + VMSTATE_UINT32_ARRAY(int_group_n3, L2VICState, SLICE_MAX), + VMSTATE_END_OF_LIST() } +}; + +static void l2vic_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + + dc->vmsd = &vmstate_l2vic; + rc->phases.hold = l2vic_reset_hold; +} + +static const TypeInfo l2vic_info = { + .name = TYPE_L2VIC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(L2VICState), + .instance_init = l2vic_init, + .class_init = l2vic_class_init, +}; + +static void l2vic_register_types(void) +{ + type_register_static(&l2vic_info); +} + +type_init(l2vic_register_types) diff --git a/hw/intc/meson.build b/hw/intc/meson.build index 602da304b02d5..35f4a7bad5efe 100644 --- a/hw/intc/meson.build +++ b/hw/intc/meson.build @@ -67,6 +67,8 @@ specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('xics_spapr.c', 'spapr_xi specific_ss.add(when: 'CONFIG_XIVE', if_true: files('xive.c')) specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_XIVE'], if_true: files('spapr_xive_kvm.c')) + +specific_ss.add(when: 'CONFIG_L2VIC', if_true: files('l2vic.c')) specific_ss.add(when: 'CONFIG_M68K_IRQC', if_true: files('m68k_irqc.c')) specific_ss.add(when: 'CONFIG_LOONGSON_IPI_COMMON', if_true: files('loongson_ipi_common.c')) specific_ss.add(when: 'CONFIG_LOONGSON_IPI', if_true: files('loongson_ipi.c')) diff --git a/hw/intc/trace-events b/hw/intc/trace-events index 3dcf147198337..bc66260fc0cb4 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -303,6 +303,10 @@ sh_intc_register(const char *s, int id, unsigned short v, int c, int m) "%s %u - sh_intc_read(unsigned size, uint64_t offset, unsigned long val) "size %u 0x%" PRIx64 " -> 0x%lx" sh_intc_write(unsigned size, uint64_t offset, unsigned long val) "size %u 0x%" PRIx64 " <- 0x%lx" sh_intc_set(int id, int enable) "setting interrupt group %d to %d" +# l2vic.c +l2vic_reg_write(unsigned int addr, uint32_t value) "addr: 0x%03x value: 0x%08"PRIx32 +l2vic_reg_read(unsigned int addr, uint32_t value) "addr: 0x%03x value: 0x%08"PRIx32 +l2vic_delivered(int irq, int vid) "l2vic: delivered %d (vid %d)" # loongson_ipi.c loongson_ipi_read(unsigned size, uint64_t addr, uint64_t val) "size: %u addr: 0x%"PRIx64 "val: 0x%"PRIx64 diff --git a/include/hw/intc/l2vic.h b/include/hw/intc/l2vic.h new file mode 100644 index 0000000000000..ed8ccf33b1f8c --- /dev/null +++ b/include/hw/intc/l2vic.h @@ -0,0 +1,37 @@ +/* + * QEMU L2VIC Interrupt Controller + * + * Copyright(c) 2020-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#define L2VIC_VID_GRP_0 0x0 /* Read */ +#define L2VIC_VID_GRP_1 0x4 /* Read */ +#define L2VIC_VID_GRP_2 0x8 /* Read */ +#define L2VIC_VID_GRP_3 0xC /* Read */ +#define L2VIC_VID_0 0x10 /* Read SOFTWARE DEFINED */ +#define L2VIC_VID_1 0x14 /* Read SOFTWARE DEFINED NOT YET USED */ +#define L2VIC_INT_ENABLEn 0x100 /* Read/Write */ +#define L2VIC_INT_ENABLE_CLEARn 0x180 /* Write */ +#define L2VIC_INT_ENABLE_SETn 0x200 /* Write */ +#define L2VIC_INT_TYPEn 0x280 /* Read/Write */ +#define L2VIC_INT_STATUSn 0x380 /* Read */ +#define L2VIC_INT_CLEARn 0x400 /* Write */ +#define L2VIC_SOFT_INTn 0x480 /* Write */ +#define L2VIC_INT_PENDINGn 0x500 /* Read */ +#define L2VIC_INT_GRPn_0 0x600 /* Read/Write */ +#define L2VIC_INT_GRPn_1 0x680 /* Read/Write */ +#define L2VIC_INT_GRPn_2 0x700 /* Read/Write */ +#define L2VIC_INT_GRPn_3 0x780 /* Read/Write */ + +#define L2VIC_INTERRUPT_MAX 1024 +#define L2VIC_CIAD_INSTRUCTION -1 +/* + * Note about l2vic groups: + * Each interrupt to L2VIC can be configured to associate with one of + * four groups. + * Group 0 interrupts go to IRQ2 via VID 0 (SSR: 0xC2, the default) + * Group 1 interrupts go to IRQ3 via VID 1 (SSR: 0xC3) + * Group 2 interrupts go to IRQ4 via VID 2 (SSR: 0xC4) + * Group 3 interrupts go to IRQ5 via VID 3 (SSR: 0xC5) + */ From 10c6ab91b728dccf8efced1ecf74ba6dd7eda35e Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Sat, 2 Dec 2023 10:09:53 -0800 Subject: [PATCH 082/126] hw/hexagon: Add machine configs for sysemu Co-authored-by: Mike Lambert <mlambert@quicinc.com> Co-authored-by: Sid Manning <sidneym@quicinc.com> Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- MAINTAINERS | 2 + hw/Kconfig | 1 + hw/hexagon/Kconfig | 6 + hw/hexagon/hexagon_dsp.c | 177 +++++++++++++++++++++++++ hw/hexagon/machine_cfg_v66g_1024.h.inc | 64 +++++++++ hw/hexagon/meson.build | 5 + hw/meson.build | 1 + include/hw/hexagon/hexagon.h | 151 +++++++++++++++++++++ qapi/machine.json | 2 +- target/hexagon/machine.c | 1 - 10 files changed, 408 insertions(+), 2 deletions(-) create mode 100644 hw/hexagon/Kconfig create mode 100644 hw/hexagon/hexagon_dsp.c create mode 100644 hw/hexagon/machine_cfg_v66g_1024.h.inc create mode 100644 hw/hexagon/meson.build create mode 100644 include/hw/hexagon/hexagon.h diff --git a/MAINTAINERS b/MAINTAINERS index 65c24f6086823..3b970a24071a0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -233,6 +233,8 @@ M: Brian Cain <brian.cain@oss.qualcomm.com> S: Supported F: target/hexagon/ F: hw/intc/l2vic.[ch] +F: hw/hexagon/ +F: include/hw/hexagon/ X: target/hexagon/idef-parser/ X: target/hexagon/gen_idef_parser_funcs.py F: linux-user/hexagon/ diff --git a/hw/Kconfig b/hw/Kconfig index 9a86a6a28a643..4dc7914c13f52 100644 --- a/hw/Kconfig +++ b/hw/Kconfig @@ -67,6 +67,7 @@ source sparc/Kconfig source sparc64/Kconfig source tricore/Kconfig source xtensa/Kconfig +source hexagon/Kconfig # Symbols used by multiple targets config TEST_DEVICES diff --git a/hw/hexagon/Kconfig b/hw/hexagon/Kconfig new file mode 100644 index 0000000000000..3fc14756e6df0 --- /dev/null +++ b/hw/hexagon/Kconfig @@ -0,0 +1,6 @@ +config HEX_DSP + bool + default y + depends on HEXAGON && TCG + imply PTIMER + select L2VIC # Vector PIC diff --git a/hw/hexagon/hexagon_dsp.c b/hw/hexagon/hexagon_dsp.c new file mode 100644 index 0000000000000..9f18cb6e3ad1a --- /dev/null +++ b/hw/hexagon/hexagon_dsp.c @@ -0,0 +1,177 @@ +/* + * Hexagon DSP Subsystem emulation. This represents a generic DSP + * subsystem with few peripherals, like the Compute DSP. + * + * Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "exec/address-spaces.h" +#include "hw/hw.h" +#include "hw/boards.h" +#include "hw/qdev-properties.h" +#include "hw/hexagon/hexagon.h" +#include "hw/loader.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "elf.h" +#include "cpu.h" +#include "include/migration/cpu.h" +#include "include/system/system.h" +#include "target/hexagon/internal.h" +#include "system/reset.h" + +#include "machine_cfg_v66g_1024.h.inc" + +static void hex_symbol_callback(const char *st_name, int st_info, + uint64_t st_value, uint64_t st_size) +{ +} + +/* Board init. */ +static struct hexagon_board_boot_info hexagon_binfo; + +static void hexagon_load_kernel(HexagonCPU *cpu) +{ + uint64_t pentry; + long kernel_size; + + kernel_size = load_elf_ram_sym(hexagon_binfo.kernel_filename, NULL, NULL, + NULL, &pentry, NULL, NULL, + &hexagon_binfo.kernel_elf_flags, 0, EM_HEXAGON, 0, 0, + &address_space_memory, false, hex_symbol_callback); + + if (kernel_size <= 0) { + error_report("no kernel file '%s'", + hexagon_binfo.kernel_filename); + exit(1); + } + + qdev_prop_set_uint32(DEVICE(cpu), "exec-start-addr", pentry); +} + +static void hexagon_init_bootstrap(MachineState *machine, HexagonCPU *cpu) +{ + if (machine->kernel_filename) { + hexagon_load_kernel(cpu); + } +} + +static void do_cpu_reset(void *opaque) +{ + HexagonCPU *cpu = opaque; + CPUState *cs = CPU(cpu); + cpu_reset(cs); +} + +static void hexagon_common_init(MachineState *machine, Rev_t rev, + hexagon_machine_config *m_cfg) +{ + memset(&hexagon_binfo, 0, sizeof(hexagon_binfo)); + if (machine->kernel_filename) { + hexagon_binfo.ram_size = machine->ram_size; + hexagon_binfo.kernel_filename = machine->kernel_filename; + } + + machine->enable_graphics = 0; + + MemoryRegion *address_space = get_system_memory(); + + MemoryRegion *sram = g_new(MemoryRegion, 1); + memory_region_init_ram(sram, NULL, "ddr.ram", + machine->ram_size, &error_fatal); + memory_region_add_subregion(address_space, 0x0, sram); + + Error **errp = NULL; + + for (int i = 0; i < machine->smp.cpus; i++) { + HexagonCPU *cpu = HEXAGON_CPU(object_new(machine->cpu_type)); + qemu_register_reset(do_cpu_reset, cpu); + + /* + * CPU #0 is the only CPU running at boot, others must be + * explicitly enabled via start instruction. + */ + qdev_prop_set_bit(DEVICE(cpu), "start-powered-off", (i != 0)); + qdev_prop_set_uint32(DEVICE(cpu), "l2vic-base-addr", m_cfg->l2vic_base); + qdev_prop_set_uint32(DEVICE(cpu), "config-table-addr", m_cfg->cfgbase); + qdev_prop_set_uint32(DEVICE(cpu), "hvx-contexts", + m_cfg->cfgtable.ext_contexts); + qdev_prop_set_uint32(DEVICE(cpu), "jtlb-entries", + m_cfg->cfgtable.jtlb_size_entries); + + + if (i == 0) { + hexagon_init_bootstrap(machine, cpu); + if (!qdev_realize_and_unref(DEVICE(cpu), NULL, errp)) { + return; + } + DeviceState *l2vic_dev; + l2vic_dev = sysbus_create_varargs("l2vic", m_cfg->l2vic_base, + /* IRQ#, Evnt#,CauseCode */ + qdev_get_gpio_in(DEVICE(cpu), 0), + qdev_get_gpio_in(DEVICE(cpu), 1), + qdev_get_gpio_in(DEVICE(cpu), 2), + qdev_get_gpio_in(DEVICE(cpu), 3), + qdev_get_gpio_in(DEVICE(cpu), 4), + qdev_get_gpio_in(DEVICE(cpu), 5), + qdev_get_gpio_in(DEVICE(cpu), 6), + qdev_get_gpio_in(DEVICE(cpu), 7), + NULL); + sysbus_mmio_map(SYS_BUS_DEVICE(l2vic_dev), 1, + m_cfg->cfgtable.fastl2vic_base << 16); + } else if (!qdev_realize_and_unref(DEVICE(cpu), NULL, errp)) { + return; + } + + } +} + +static void init_mc(MachineClass *mc) +{ + mc->block_default_type = IF_SD; + mc->default_ram_size = 4 * GiB; + mc->no_parallel = 1; + mc->no_floppy = 1; + mc->no_cdrom = 1; + mc->no_serial = 1; + mc->is_default = false; + mc->max_cpus = 8; +} + +/* ----------------------------------------------------------------- */ +/* Core-specific configuration settings are defined below this line. */ +/* Config table values defined in machine_configs.h.inc */ +/* ----------------------------------------------------------------- */ + +static void v66g_1024_config_init(MachineState *machine) +{ + hexagon_common_init(machine, v66_rev, &v66g_1024); +} + +static void v66g_1024_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->desc = "Hexagon V66G_1024"; + mc->init = v66g_1024_config_init; + init_mc(mc); + mc->is_default = true; + mc->default_cpu_type = TYPE_HEXAGON_CPU_V66; + mc->default_cpus = 4; +} + +static const TypeInfo hexagon_machine_types[] = { + { + .name = MACHINE_TYPE_NAME("V66G_1024"), + .parent = TYPE_MACHINE, + .class_init = v66g_1024_init, + }, +}; + +DEFINE_TYPES(hexagon_machine_types) diff --git a/hw/hexagon/machine_cfg_v66g_1024.h.inc b/hw/hexagon/machine_cfg_v66g_1024.h.inc new file mode 100644 index 0000000000000..604cc7777cbf0 --- /dev/null +++ b/hw/hexagon/machine_cfg_v66g_1024.h.inc @@ -0,0 +1,64 @@ + +static hexagon_machine_config v66g_1024 = { + .cfgbase = 0xd8180000, + .l2tcm_size = 0x00000000, + .l2vic_base = 0xfc910000, + .l2vic_size = 0x00001000, + .csr_base = 0xfc900000, + .qtmr_rg0 = 0xfc921000, + .qtmr_rg1 = 0xfc922000, + .cfgtable = { + .l2tcm_base = 0x0000d800, + .reserved0 = 0x0000d400, + .subsystem_base = 0x0000fc90, + .etm_base = 0x0000d805, + .l2cfg_base = 0x0000d81a, + .reserved1 = 0x00000000, + .l1s0_base = 0x0000d820, + .axi2_lowaddr = 0x00003000, + .streamer_base = 0x00000000, + .reserved2 = 0x0000d819, + .fastl2vic_base = 0x0000d81e, + .jtlb_size_entries = 0x00000080, + .coproc_present = 0x00000001, + .ext_contexts = 0x00000004, + .vtcm_base = 0x0000d820, + .vtcm_size_kb = 0x00000100, + .l2tag_size = 0x00000400, + .l2ecomem_size = 0x00000400, + .thread_enable_mask = 0x0000000f, + .eccreg_base = 0x0000d81f, + .l2line_size = 0x00000080, + .tiny_core = 0x00000000, + .l2itcm_size = 0x00000000, + .l2itcm_base = 0x0000d820, + .reserved3 = 0x00000000, + .dtm_present = 0x00000000, + .dma_version = 0x00000000, + .hvx_vec_log_length = 0x00000080, + .core_id = 0x00000000, + .core_count = 0x00000000, + .coproc2_reg0 = 0x00000000, + .coproc2_reg1 = 0x00000000, + .v2x_mode = 0x00000000, + .coproc2_reg2 = 0x00000000, + .coproc2_reg3 = 0x00000000, + .coproc2_reg4 = 0x00000000, + .coproc2_reg5 = 0x00000000, + .coproc2_reg6 = 0x00000000, + .coproc2_reg7 = 0x00000000, + .acd_preset = 0x00000000, + .mnd_preset = 0x00000000, + .l1d_size_kb = 0x00000000, + .l1i_size_kb = 0x00000000, + .l1d_write_policy = 0x00000000, + .vtcm_bank_width = 0x00000000, + .reserved3 = 0x00000000, + .reserved4 = 0x00000000, + .reserved5 = 0x00000000, + .coproc2_cvt_mpy_size = 0x00000000, + .consistency_domain = 0x00000000, + .capacity_domain = 0x00000000, + .axi3_lowaddr = 0x00000000, + }, +}; diff --git a/hw/hexagon/meson.build b/hw/hexagon/meson.build new file mode 100644 index 0000000000000..2ef3dbcd3492d --- /dev/null +++ b/hw/hexagon/meson.build @@ -0,0 +1,5 @@ +hexagon_ss = ss.source_set() +hexagon_ss.add(when: 'CONFIG_HEX_DSP', if_true: files('hexagon_dsp.c',)) + +hw_arch += {'hexagon': hexagon_ss} + diff --git a/hw/meson.build b/hw/meson.build index b91f761fe08aa..6aaf469f95e47 100644 --- a/hw/meson.build +++ b/hw/meson.build @@ -66,3 +66,4 @@ subdir('sparc') subdir('sparc64') subdir('tricore') subdir('xtensa') +subdir('hexagon') diff --git a/include/hw/hexagon/hexagon.h b/include/hw/hexagon/hexagon.h new file mode 100644 index 0000000000000..0afaac3b1f85a --- /dev/null +++ b/include/hw/hexagon/hexagon.h @@ -0,0 +1,151 @@ +/* + * Hexagon Baseboard System emulation. + * + * Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + +#ifndef HW_HEXAGON_H +#define HW_HEXAGON_H + +#include "exec/memory.h" + +struct hexagon_board_boot_info { + uint64_t ram_size; + const char *kernel_filename; + uint32_t kernel_elf_flags; +}; + +typedef enum { + unknown_rev = 0, + v66_rev = 0xa666, + v67_rev = 0x2667, + v68_rev = 0x8d68, + v69_rev = 0x8c69, + v71_rev = 0x8c71, + v73_rev = 0x8c73, + v73m_rev = 0xcc73, +} Rev_t; +#define HEXAGON_LATEST_REV v73 +#define HEXAGON_LATEST_REV_UPPER V73 + +/* + * Config table address bases represent bits [35:16]. + */ +#define HEXAGON_CFG_ADDR_BASE(addr) (((addr) >> 16) & 0x0fffff) + +#define HEXAGON_CFGSPACE_ENTRIES (128) + +typedef union { + struct { + /* Base address of L2TCM space */ + uint32_t l2tcm_base; + uint32_t reserved0; + /* Base address of subsystem space */ + uint32_t subsystem_base; + /* Base address of ETM space */ + uint32_t etm_base; + /* Base address of L2 configuration space */ + uint32_t l2cfg_base; + uint32_t reserved1; + /* Base address of L1S */ + uint32_t l1s0_base; + /* Base address of AXI2 */ + uint32_t axi2_lowaddr; + /* Base address of streamer base */ + uint32_t streamer_base; + uint32_t reserved2; + /* Base address of fast L2VIC */ + uint32_t fastl2vic_base; + /* Number of entries in JTLB */ + uint32_t jtlb_size_entries; + /* Coprocessor type */ + uint32_t coproc_present; + /* Number of extension execution contexts available */ + uint32_t ext_contexts; + /* Base address of Hexagon Vector Tightly Coupled Memory (VTCM) */ + uint32_t vtcm_base; + /* Size of VTCM (in KB) */ + uint32_t vtcm_size_kb; + /* L2 tag size */ + uint32_t l2tag_size; + /* Amount of physical L2 memory in released version */ + uint32_t l2ecomem_size; + /* Hardware threads available on the core */ + uint32_t thread_enable_mask; + /* Base address of the ECC registers */ + uint32_t eccreg_base; + /* L2 line size */ + uint32_t l2line_size; + /* Small Core processor (also implies audio extension) */ + uint32_t tiny_core; + /* Size of L2TCM */ + uint32_t l2itcm_size; + /* Base address of L2-ITCM */ + uint32_t l2itcm_base; + uint32_t reserved3; + /* DTM is present */ + uint32_t dtm_present; + /* Version of the DMA */ + uint32_t dma_version; + /* Native HVX vector length in log of bytes */ + uint32_t hvx_vec_log_length; + /* Core ID of the multi-core */ + uint32_t core_id; + /* Number of multi-core cores */ + uint32_t core_count; + uint32_t coproc2_reg0; + uint32_t coproc2_reg1; + /* Supported HVX vector length */ + uint32_t v2x_mode; + uint32_t coproc2_reg2; + uint32_t coproc2_reg3; + uint32_t coproc2_reg4; + uint32_t coproc2_reg5; + uint32_t coproc2_reg6; + uint32_t coproc2_reg7; + /* Voltage droop mitigation technique parameter */ + uint32_t acd_preset; + /* Voltage droop mitigation technique parameter */ + uint32_t mnd_preset; + /* L1 data cache size (in KB) */ + uint32_t l1d_size_kb; + /* L1 instruction cache size in (KB) */ + uint32_t l1i_size_kb; + /* L1 data cache write policy: see HexagonL1WritePolicy */ + uint32_t l1d_write_policy; + /* VTCM bank width */ + uint32_t vtcm_bank_width; + uint32_t reserved4; + uint32_t reserved5; + uint32_t reserved6; + uint32_t coproc2_cvt_mpy_size; + uint32_t consistency_domain; + uint32_t capacity_domain; + uint32_t axi3_lowaddr; + uint32_t coproc2_int8_subcolumns; + uint32_t corecfg_present; + uint32_t coproc2_fp16_acc_exp; + uint32_t AXIM2_secondary_base; + }; + uint32_t raw[HEXAGON_CFGSPACE_ENTRIES]; +} hexagon_config_table; + +typedef struct { + /* Base address of config table */ + uint32_t cfgbase; + /* Size of L2 TCM */ + uint32_t l2tcm_size; + /* Base address of L2VIC */ + uint32_t l2vic_base; + /* Size of L2VIC region */ + uint32_t l2vic_size; + /* QTimer csr base */ + uint32_t csr_base; + uint32_t qtmr_rg0; + uint32_t qtmr_rg1; + hexagon_config_table cfgtable; +} hexagon_machine_config; + +#endif diff --git a/qapi/machine.json b/qapi/machine.json index a6b8795b09edc..a7070bad4d522 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -33,7 +33,7 @@ # Since: 3.0 ## { 'enum' : 'SysEmuTarget', - 'data' : [ 'aarch64', 'alpha', 'arm', 'avr', 'hppa', 'i386', + 'data' : [ 'aarch64', 'alpha', 'arm', 'avr', 'hexagon', 'hppa', 'i386', 'loongarch64', 'm68k', 'microblaze', 'microblazeel', 'mips', 'mips64', 'mips64el', 'mipsel', 'or1k', 'ppc', 'ppc64', 'riscv32', 'riscv64', 'rx', 's390x', 'sh4', diff --git a/target/hexagon/machine.c b/target/hexagon/machine.c index 4baa22d51f8e9..79e9b7effa5e7 100644 --- a/target/hexagon/machine.c +++ b/target/hexagon/machine.c @@ -79,7 +79,6 @@ const VMStateDescription vmstate_hexagon_cpu = { VMSTATE_UINT64(env.t_cycle_count, HexagonCPU), VMSTATE_POINTER(env.g_pcycle_base, HexagonCPU, 0, vmstate_info_uint64_ptr, uint64_t *), - VMSTATE_END_OF_LIST() }, }; From 8891ea77b7e442d18972a8d2fc999a69d205ebea Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 15 Oct 2024 18:56:30 -0700 Subject: [PATCH 083/126] hw/hexagon: Add v68, sa8775-cdsp0 defs Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc | 64 ++++++++++++++++++++++ hw/hexagon/machine_cfg_v68n_1024.h.inc | 65 +++++++++++++++++++++++ 2 files changed, 129 insertions(+) create mode 100644 hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc create mode 100644 hw/hexagon/machine_cfg_v68n_1024.h.inc diff --git a/hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc b/hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc new file mode 100644 index 0000000000000..d8fa961f6d091 --- /dev/null +++ b/hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc @@ -0,0 +1,64 @@ + +static hexagon_machine_config SA8775P_cdsp0 = { + .cfgbase = 0x24000000 + 0x180000, + .l2tcm_size = 0x00000000, + .l2vic_base = 0x26300000 + 0x90000, + .l2vic_size = 0x00001000, + .csr_base = 0x26300000, + .qtmr_rg0 = 0x26300000 + 0xA1000, + .qtmr_rg1 = 0x26300000 + 0xA2000, + .cfgtable = { + .l2tcm_base = 0x00002400, + .reserved0 = 0x00000000, + .subsystem_base = 0x00002638, + .etm_base = 0x00002419, + .l2cfg_base = 0x0000241a, + .reserved1 = 0x0000241b, + .l1s0_base = 0x00002500, + .axi2_lowaddr = 0x00000000, + .streamer_base = 0x00000000, + .reserved2 = 0x00000000, + .fastl2vic_base = 0x0000241e, + .jtlb_size_entries = 0x00000080, + .coproc_present = 0x00000001, + .ext_contexts = 0x00000004, + .vtcm_base = 0x00002500, + .vtcm_size_kb = 0x00002000, + .l2tag_size = 0x00000400, + .l2ecomem_size = 0x00000000, + .thread_enable_mask = 0x0000003f, + .eccreg_base = 0x0000241f, + .l2line_size = 0x00000080, + .tiny_core = 0x00000000, + .l2itcm_size = 0x00000000, + .l2itcm_base = 0x00002400, + .reserved3 = 0x00000000, + .dtm_present = 0x00000000, + .dma_version = 0x00000003, + .hvx_vec_log_length = 0x00000007, + .core_id = 0x00000000, + .core_count = 0x00000000, + .coproc2_reg0 = 0x00000040, + .coproc2_reg1 = 0x00000020, + .v2x_mode = 0x00000001, + .coproc2_reg2 = 0x00000008, + .coproc2_reg3 = 0x00000020, + .coproc2_reg4 = 0x00000000, + .coproc2_reg5 = 0x00000002, + .coproc2_reg6 = 0x00000016, + .coproc2_reg7 = 0x00000006, + .acd_preset = 0x00000001, + .mnd_preset = 0x00000000, + .l1d_size_kb = 0x00000010, + .l1i_size_kb = 0x00000020, + .l1d_write_policy = 0x00000002, + .vtcm_bank_width = 0x00000080, + .reserved3 = 0x00000001, + .reserved4 = 0x00000000, + .reserved5 = 0x00000003, + .coproc2_cvt_mpy_size = 0x0000000a, + .consistency_domain = 0x000000e0, + .capacity_domain = 0x00000080, + .axi3_lowaddr = 0x00000000, + }, +}; diff --git a/hw/hexagon/machine_cfg_v68n_1024.h.inc b/hw/hexagon/machine_cfg_v68n_1024.h.inc new file mode 100644 index 0000000000000..60eb112a1199c --- /dev/null +++ b/hw/hexagon/machine_cfg_v68n_1024.h.inc @@ -0,0 +1,65 @@ + +static hexagon_machine_config v68n_1024 = { + .cfgbase = 0xde000000, + .l2tcm_size = 0x00000000, + .l2vic_base = 0xfc910000, + .l2vic_size = 0x00001000, + .csr_base = 0xfc900000, + .qtmr_rg0 = 0xfc921000, + .qtmr_rg1 = 0xfc922000, + .cfgtable = { + .l2tcm_base = 0x0000d800, + .reserved0 = 0x00000000, + .subsystem_base = 0x0000fc90, + .etm_base = 0x0000d819, + .l2cfg_base = 0x0000d81a, + .reserved1 = 0x00000000, + .l1s0_base = 0x0000d840, + .axi2_lowaddr = 0x00003000, + .streamer_base = 0x0000d81c, + .reserved2 = 0x0000d81d, + .fastl2vic_base = 0x0000d81e, + .jtlb_size_entries = 0x00000080, + .coproc_present = 0x00000001, + .ext_contexts = 0x00000004, + .vtcm_base = 0x0000d840, + .vtcm_size_kb = 0x00001000, + .l2tag_size = 0x00000400, + .l2ecomem_size = 0x00000400, + .thread_enable_mask = 0x0000003f, + .eccreg_base = 0x0000d81f, + .l2line_size = 0x00000080, + .tiny_core = 0x00000000, + .l2itcm_size = 0x00000000, + .l2itcm_base = 0x0000d820, + .reserved3 = 0x00000000, + .dtm_present = 0x00000000, + .dma_version = 0x00000001, + .hvx_vec_log_length = 0x00000007, + .core_id = 0x00000000, + .core_count = 0x00000000, + .coproc2_reg0 = 0x00000040, + .coproc2_reg1 = 0x00000020, + .v2x_mode = 0x1f1f1f1f, + .coproc2_reg2 = 0x1f1f1f1f, + .coproc2_reg3 = 0x1f1f1f1f, + .coproc2_reg4 = 0x1f1f1f1f, + .coproc2_reg5 = 0x1f1f1f1f, + .coproc2_reg6 = 0x1f1f1f1f, + .coproc2_reg7 = 0x1f1f1f1f, + .acd_preset = 0x1f1f1f1f, + .mnd_preset = 0x1f1f1f1f, + .l1d_size_kb = 0x1f1f1f1f, + .l1i_size_kb = 0x1f1f1f1f, + .l1d_write_policy = 0x1f1f1f1f, + .vtcm_bank_width = 0x1f1f1f1f, + .reserved3 = 0x1f1f1f1f, + .reserved4 = 0x1f1f1f1f, + .reserved5 = 0x1f1f1f1f, + .coproc2_cvt_mpy_size = 0x1f1f1f1f, + .consistency_domain = 0x1f1f1f1f, + .capacity_domain = 0x1f1f1f1f, + .axi3_lowaddr = 0x1f1f1f1f, + }, +}; + From 230947a1c04fe881276515abea0079463e3b2225 Mon Sep 17 00:00:00 2001 From: Sid Manning <sidneym@quicinc.com> Date: Wed, 18 Dec 2024 09:23:33 -0800 Subject: [PATCH 084/126] hw/hexagon: Add support for cfgbase Signed-off-by: Sid Manning <sidneym@quicinc.com> --- hw/hexagon/hexagon_dsp.c | 10 ++++++++++ target/hexagon/cpu.c | 6 ++++++ target/hexagon/cpu.h | 1 + 3 files changed, 17 insertions(+) diff --git a/hw/hexagon/hexagon_dsp.c b/hw/hexagon/hexagon_dsp.c index 9f18cb6e3ad1a..c4962a98bc1c8 100644 --- a/hw/hexagon/hexagon_dsp.c +++ b/hw/hexagon/hexagon_dsp.c @@ -82,6 +82,12 @@ static void hexagon_common_init(MachineState *machine, Rev_t rev, MemoryRegion *address_space = get_system_memory(); + MemoryRegion *config_table_rom = g_new(MemoryRegion, 1); + memory_region_init_rom(config_table_rom, NULL, "config_table.rom", + sizeof(m_cfg->cfgtable), &error_fatal); + memory_region_add_subregion(address_space, m_cfg->cfgbase, + config_table_rom); + MemoryRegion *sram = g_new(MemoryRegion, 1); memory_region_init_ram(sram, NULL, "ddr.ram", machine->ram_size, &error_fatal); @@ -130,6 +136,10 @@ static void hexagon_common_init(MachineState *machine, Rev_t rev, } } + + rom_add_blob_fixed_as("config_table.rom", &m_cfg->cfgtable, + sizeof(m_cfg->cfgtable), m_cfg->cfgbase, + &address_space_memory); } static void init_mc(MachineClass *mc) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 05b358dd501ca..7ebb53a5da8ee 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -67,6 +67,8 @@ static const Property hexagon_cpu_properties[] = { 0xffffffffULL), DEFINE_PROP_UINT32("hvx-contexts", HexagonCPU, hvx_contexts, 0), DEFINE_PROP_UINT32("exec-start-addr", HexagonCPU, boot_addr, 0xffffffffULL), + DEFINE_PROP_UINT64("config-table-addr", HexagonCPU, config_table_addr, + 0xffffffffULL), #endif DEFINE_PROP_BOOL("lldb-compat", HexagonCPU, lldb_compat, false), DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust, 0, @@ -360,6 +362,8 @@ void hexagon_cpu_soft_reset(CPUHexagonState *env) } #endif + +#define HEXAGON_CFG_ADDR_BASE(addr) (((addr) >> 16) & 0x0fffff) static void hexagon_cpu_reset_hold(Object *obj, ResetType type) { CPUState *cs = CPU(obj); @@ -400,6 +404,8 @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type) env->wait_next_pc = 0; env->cause_code = -1; arch_set_thread_reg(env, HEX_REG_PC, cpu->boot_addr); + arch_set_system_reg(env, HEX_SREG_CFGBASE, + HEXAGON_CFG_ADDR_BASE(cpu->config_table_addr)); #endif } diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 340e0a83a5baa..f5b92d33a1560 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -199,6 +199,7 @@ struct ArchCPU { uint32_t l2vic_base_addr; uint32_t hvx_contexts; uint32_t boot_addr; + uint64_t config_table_addr; #endif }; From 55849d3937ca8f870c39e82dda5a7e589d072607 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Tue, 22 Oct 2024 13:50:07 -0700 Subject: [PATCH 085/126] hw/hexagon: Modify "Standalone" symbols These symbols are used by Hexagon Standalone OS to indicate whether the program should halt and wait for interrupts at startup. For QEMU, we want these programs to just continue crt0 startup through to the user program's main(). Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- hw/hexagon/hexagon_dsp.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/hw/hexagon/hexagon_dsp.c b/hw/hexagon/hexagon_dsp.c index c4962a98bc1c8..34bbe98149cfc 100644 --- a/hw/hexagon/hexagon_dsp.c +++ b/hw/hexagon/hexagon_dsp.c @@ -28,9 +28,17 @@ #include "machine_cfg_v66g_1024.h.inc" +static hwaddr isdb_secure_flag; +static hwaddr isdb_trusted_flag; static void hex_symbol_callback(const char *st_name, int st_info, uint64_t st_value, uint64_t st_size) { + if (!g_strcmp0("isdb_secure_flag", st_name)) { + isdb_secure_flag = st_value; + } + if (!g_strcmp0("isdb_trusted_flag", st_name)) { + isdb_trusted_flag = st_value; + } } /* Board init. */ @@ -59,6 +67,13 @@ static void hexagon_init_bootstrap(MachineState *machine, HexagonCPU *cpu) { if (machine->kernel_filename) { hexagon_load_kernel(cpu); + uint32_t mem = 1; + if (isdb_secure_flag) { + cpu_physical_memory_write(isdb_secure_flag, &mem, sizeof(mem)); + } + if (isdb_trusted_flag) { + cpu_physical_memory_write(isdb_trusted_flag, &mem, sizeof(mem)); + } } } From c93d9c09061ef1f08282fd9f58a8315472b8bfa1 Mon Sep 17 00:00:00 2001 From: Brian Cain <brian.cain@oss.qualcomm.com> Date: Sat, 2 Dec 2023 10:09:53 -0800 Subject: [PATCH 086/126] target/hexagon: add build config for softmmu Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- MAINTAINERS | 1 + configs/devices/hexagon-softmmu/default.mak | 7 +++++++ configs/targets/hexagon-softmmu.mak | 6 ++++++ target/Kconfig | 1 + target/hexagon/Kconfig | 2 ++ target/hexagon/cpu.h | 4 ---- target/hexagon/meson.build | 9 +++++++++ 7 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 configs/devices/hexagon-softmmu/default.mak create mode 100644 configs/targets/hexagon-softmmu.mak create mode 100644 target/hexagon/Kconfig diff --git a/MAINTAINERS b/MAINTAINERS index 3b970a24071a0..58497567dc83d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -241,6 +241,7 @@ F: linux-user/hexagon/ F: tests/tcg/hexagon/ F: disas/hexagon.c F: configs/targets/hexagon-linux-user/default.mak +F: configs/devices/hexagon-softmmu/default.mak F: docker/dockerfiles/debian-hexagon-cross.docker F: gdb-xml/hexagon*.xml F: docs/system/target-hexagon.rst diff --git a/configs/devices/hexagon-softmmu/default.mak b/configs/devices/hexagon-softmmu/default.mak new file mode 100644 index 0000000000000..08e709aea72fb --- /dev/null +++ b/configs/devices/hexagon-softmmu/default.mak @@ -0,0 +1,7 @@ +# Default configuration for hexagon-softmmu + +# Uncomment the following lines to disable these optional devices: + +# Boards are selected by default, uncomment to keep out of the build. +# CONFIG_HEX_DSP=y +# CONFIG_L2VIC=y diff --git a/configs/targets/hexagon-softmmu.mak b/configs/targets/hexagon-softmmu.mak new file mode 100644 index 0000000000000..8c208bf46884c --- /dev/null +++ b/configs/targets/hexagon-softmmu.mak @@ -0,0 +1,6 @@ +# Default configuration for hexagon-softmmu + +TARGET_ARCH=hexagon +TARGET_SUPPORTS_MTTCG=y +TARGET_XML_FILES=gdb-xml/hexagon-core.xml gdb-xml/hexagon-hvx.xml gdb-xml/hexagon-sys.xml +TARGET_LONG_BITS=32 diff --git a/target/Kconfig b/target/Kconfig index d0c7b59d9c718..37781146b9bbb 100644 --- a/target/Kconfig +++ b/target/Kconfig @@ -16,6 +16,7 @@ source sh4/Kconfig source sparc/Kconfig source tricore/Kconfig source xtensa/Kconfig +source hexagon/Kconfig config TARGET_BIG_ENDIAN bool diff --git a/target/hexagon/Kconfig b/target/hexagon/Kconfig new file mode 100644 index 0000000000000..7e556f350633c --- /dev/null +++ b/target/hexagon/Kconfig @@ -0,0 +1,2 @@ +config HEXAGON + bool diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index f5b92d33a1560..0608d3265cd12 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -32,10 +32,6 @@ #include "mmvec/mmvec.h" #include "hw/registerfields.h" -#ifndef CONFIG_USER_ONLY -#error "Hexagon does not support system emulation" -#endif - #ifndef CONFIG_USER_ONLY #include "reg_fields.h" typedef struct CPUHexagonTLBContext CPUHexagonTLBContext; diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build index aa729a3683f15..d2b56b9e65e54 100644 --- a/target/hexagon/meson.build +++ b/target/hexagon/meson.build @@ -245,6 +245,7 @@ decodetree_trans_funcs_generated = custom_target( command: [python, files('gen_trans_funcs.py'), semantics_generated, '@OUTPUT@'], ) hexagon_ss.add(decodetree_trans_funcs_generated) +hexagon_softmmu_ss = ss.source_set() hexagon_ss.add(files( 'cpu.c', @@ -264,6 +265,13 @@ hexagon_ss.add(files( 'mmvec/system_ext_mmvec.c', )) +hexagon_softmmu_ss.add(files( + 'hex_mmu.c', + 'hex_interrupts.c', + 'hexswi.c', + 'machine.c', +)) + # # Step 4.5 # We use flex/bison based idef-parser to generate TCG code for a lot @@ -401,3 +409,4 @@ analyze_funcs_generated = custom_target( hexagon_ss.add(analyze_funcs_generated) target_arch += {'hexagon': hexagon_ss} +target_system_arch += {'hexagon': hexagon_softmmu_ss} From 479e2fd35bc9174428cffa7363d8d49922bb28f7 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Sun, 28 Jul 2024 22:13:33 -0700 Subject: [PATCH 087/126] hw/hexagon: Define hexagon "virt" machine Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- configs/devices/hexagon-softmmu/default.mak | 1 + configs/targets/hexagon-softmmu.mak | 1 + hw/hexagon/Kconfig | 8 + hw/hexagon/meson.build | 2 + hw/hexagon/virt.c | 393 ++++++++++++++++++++ include/hw/hexagon/virt.h | 41 ++ target/hexagon/cpu.c | 2 +- 7 files changed, 447 insertions(+), 1 deletion(-) create mode 100644 hw/hexagon/virt.c create mode 100644 include/hw/hexagon/virt.h diff --git a/configs/devices/hexagon-softmmu/default.mak b/configs/devices/hexagon-softmmu/default.mak index 08e709aea72fb..37b4f9f3237aa 100644 --- a/configs/devices/hexagon-softmmu/default.mak +++ b/configs/devices/hexagon-softmmu/default.mak @@ -3,5 +3,6 @@ # Uncomment the following lines to disable these optional devices: # Boards are selected by default, uncomment to keep out of the build. +# CONFIG_HEX_VIRT=y # CONFIG_HEX_DSP=y # CONFIG_L2VIC=y diff --git a/configs/targets/hexagon-softmmu.mak b/configs/targets/hexagon-softmmu.mak index 8c208bf46884c..9f8fca1dc162e 100644 --- a/configs/targets/hexagon-softmmu.mak +++ b/configs/targets/hexagon-softmmu.mak @@ -4,3 +4,4 @@ TARGET_ARCH=hexagon TARGET_SUPPORTS_MTTCG=y TARGET_XML_FILES=gdb-xml/hexagon-core.xml gdb-xml/hexagon-hvx.xml gdb-xml/hexagon-sys.xml TARGET_LONG_BITS=32 +TARGET_NEED_FDT=y diff --git a/hw/hexagon/Kconfig b/hw/hexagon/Kconfig index 3fc14756e6df0..f3f011573105e 100644 --- a/hw/hexagon/Kconfig +++ b/hw/hexagon/Kconfig @@ -4,3 +4,11 @@ config HEX_DSP depends on HEXAGON && TCG imply PTIMER select L2VIC # Vector PIC + +config HEX_VIRT + bool + default y + depends on HEX_DSP && FDT + select DEVICE_TREE + select VIRTIO_MMIO + select PL011 diff --git a/hw/hexagon/meson.build b/hw/hexagon/meson.build index 2ef3dbcd3492d..649ad6dc02b3f 100644 --- a/hw/hexagon/meson.build +++ b/hw/hexagon/meson.build @@ -3,3 +3,5 @@ hexagon_ss.add(when: 'CONFIG_HEX_DSP', if_true: files('hexagon_dsp.c',)) hw_arch += {'hexagon': hexagon_ss} +hexagon_ss.add(when: 'CONFIG_HEX_VIRT', if_true: files('virt.c',)) + diff --git a/hw/hexagon/virt.c b/hw/hexagon/virt.c new file mode 100644 index 0000000000000..1999745598635 --- /dev/null +++ b/hw/hexagon/virt.c @@ -0,0 +1,393 @@ +/* + * Hexagon virt emulation + * + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "exec/address-spaces.h" +#include "hw/char/pl011.h" +#include "hw/core/sysbus-fdt.h" +#include "hw/hexagon/hexagon.h" +#include "hw/hexagon/virt.h" +#include "hw/loader.h" +#include "hw/qdev-properties.h" +#include "hw/register.h" +#include "qemu/error-report.h" +#include "qemu/guest-random.h" +#include "qemu/units.h" +#include "elf.h" +#include "machine_cfg_v68n_1024.h.inc" +#include "system/device_tree.h" +#include "system/reset.h" +#include "system/system.h" +#include <libfdt.h> + +static const int VIRTIO_DEV_COUNT = 2; + +static const MemMapEntry base_memmap[] = { + [VIRT_UART0] = { 0x10000000, 0x00000200 }, + [VIRT_MMIO] = { 0x11000000, 0x1000000, }, + [VIRT_GPT] = { 0xab000000, 0x00001000 }, + [VIRT_FDT] = { 0x99900000, 0x00000200 }, +}; + +static const int irqmap[] = { + [VIRT_MMIO] = 18, /* ...to 18 + VIRTIO_DEV_COUNT - 1 */ + [VIRT_GPT] = 12, + [VIRT_UART0] = 15, + [VIRT_QTMR0] = 2, + [VIRT_QTMR1] = 4, +}; + + +static void create_fdt(HexagonVirtMachineState *vms) +{ + MachineState *ms = MACHINE(vms); + void *fdt = create_device_tree(&vms->fdt_size); + + if (!fdt) { + error_report("create_device_tree() failed"); + exit(1); + } + + ms->fdt = fdt; + + qemu_fdt_setprop_string(fdt, "/", "compatible", "linux,hexagon-virt"); + qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 0x2); + qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x1); + qemu_fdt_setprop_string(fdt, "/", "model", "linux,hexagon-virt"); + + qemu_fdt_setprop_string(fdt, "/", "model", "hexagon-virt,qemu"); + qemu_fdt_setprop_string(fdt, "/", "compatible", "qcom,sm8150"); + + qemu_fdt_add_subnode(fdt, "/soc"); + qemu_fdt_setprop_cell(fdt, "/soc", "#address-cells", 0x2); + qemu_fdt_setprop_cell(fdt, "/soc", "#size-cells", 0x1); + qemu_fdt_setprop(fdt, "/soc", "ranges", NULL, 0); + + qemu_fdt_add_subnode(fdt, "/chosen"); + + uint8_t rng_seed[32]; + qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed)); + qemu_fdt_setprop(fdt, "/chosen", "rng-seed", rng_seed, sizeof(rng_seed)); +} + +static void fdt_add_hvx(HexagonVirtMachineState *vms, + const hexagon_machine_config *m_cfg, Error **errp) +{ + const MachineState *ms = MACHINE(vms); + uint32_t vtcm_size_bytes = m_cfg->cfgtable.vtcm_size_kb * 1024; + if (vtcm_size_bytes > 0) { + memory_region_init_ram(&vms->vtcm, NULL, "vtcm.ram", vtcm_size_bytes, + errp); + memory_region_add_subregion(vms->sys, m_cfg->cfgtable.vtcm_base << 16, + &vms->vtcm); + + qemu_fdt_add_subnode(ms->fdt, "/soc/vtcm"); + qemu_fdt_setprop_string(ms->fdt, "/soc/vtcm", "compatible", + "qcom,hexagon_vtcm"); + + assert(sizeof(m_cfg->cfgtable.vtcm_base) == sizeof(uint32_t)); + qemu_fdt_setprop_cells(ms->fdt, "/soc/vtcm", "reg", 0, + m_cfg->cfgtable.vtcm_base << 16, + vtcm_size_bytes); + } + + if (m_cfg->cfgtable.ext_contexts > 0) { + qemu_fdt_add_subnode(ms->fdt, "/soc/hvx"); + qemu_fdt_setprop_string(ms->fdt, "/soc/hvx", "compatible", + "qcom,hexagon-hvx"); + qemu_fdt_setprop_cells(ms->fdt, "/soc/hvx", "qcom,hvx-max-ctxts", + m_cfg->cfgtable.ext_contexts); + qemu_fdt_setprop_cells(ms->fdt, "/soc/hvx", "qcom,hvx-vlength", + m_cfg->cfgtable.hvx_vec_log_length); + } +} + +static int32_t irq_hvm_ic_phandle = -1; +static void fdt_add_hvm_pic_node(HexagonVirtMachineState *vms, + const hexagon_machine_config *m_cfg) +{ + MachineState *ms = MACHINE(vms); + irq_hvm_ic_phandle = qemu_fdt_alloc_phandle(ms->fdt); + + qemu_fdt_setprop_cell(ms->fdt, "/soc", "interrupt-parent", + irq_hvm_ic_phandle); + + qemu_fdt_add_subnode(ms->fdt, "/soc/interrupt-controller"); + qemu_fdt_setprop_cell(ms->fdt, "/soc/interrupt-controller", + "#address-cells", 2); + qemu_fdt_setprop_cell(ms->fdt, "/soc/interrupt-controller", + "#interrupt-cells", 2); + qemu_fdt_setprop_string(ms->fdt, "/soc/interrupt-controller", "compatible", + "qcom,h2-pic,hvm-pic"); + qemu_fdt_setprop(ms->fdt, "/soc/interrupt-controller", + "interrupt-controller", NULL, 0); + qemu_fdt_setprop_cell(ms->fdt, "/soc/interrupt-controller", "phandle", + irq_hvm_ic_phandle); + + sysbus_mmio_map(SYS_BUS_DEVICE(vms->l2vic), 1, + m_cfg->cfgtable.fastl2vic_base << 16); +} + + +static void fdt_add_gpt_node(HexagonVirtMachineState *vms) +{ + g_autofree char *name = NULL; + MachineState *ms = MACHINE(vms); + + name = g_strdup_printf("/soc/gpt@%" PRIx64, + (int64_t)base_memmap[VIRT_GPT].base); + qemu_fdt_add_subnode(ms->fdt, name); + qemu_fdt_setprop_string(ms->fdt, name, "compatible", + "qcom,h2-timer,hvm-timer"); + qemu_fdt_setprop_cells(ms->fdt, name, "interrupts", irqmap[VIRT_GPT], 0); + qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0x0, + base_memmap[VIRT_GPT].base, + base_memmap[VIRT_GPT].size); +} + +static int32_t clock_phandle = -1; +static void fdt_add_clocks(const HexagonVirtMachineState *vms) +{ + MachineState *ms = MACHINE(vms); + clock_phandle = qemu_fdt_alloc_phandle(ms->fdt); + qemu_fdt_add_subnode(ms->fdt, "/apb-pclk"); + qemu_fdt_setprop_string(ms->fdt, "/apb-pclk", "compatible", "fixed-clock"); + qemu_fdt_setprop_cell(ms->fdt, "/apb-pclk", "#clock-cells", 0x0); + qemu_fdt_setprop_cell(ms->fdt, "/apb-pclk", "clock-frequency", 24000000); + qemu_fdt_setprop_string(ms->fdt, "/apb-pclk", "clock-output-names", + "clk24mhz"); + qemu_fdt_setprop_cell(ms->fdt, "/apb-pclk", "phandle", clock_phandle); +} + +static void fdt_add_uart(const HexagonVirtMachineState *vms, int uart) +{ + char *nodename; + hwaddr base = base_memmap[uart].base; + hwaddr size = base_memmap[uart].size; + assert(uart == 0); + int irq = irqmap[VIRT_UART0 + uart]; + const char compat[] = "arm,pl011\0arm,primecell"; + const char clocknames[] = "uartclk\0apb_pclk"; + MachineState *ms = MACHINE(vms); + + pl011_create(base, qdev_get_gpio_in(vms->l2vic, irq), serial_hd(0)); + + nodename = g_strdup_printf("/pl011@%" PRIx64, base); + qemu_fdt_add_subnode(ms->fdt, nodename); + + /* Note that we can't use setprop_string because of the embedded NUL */ + qemu_fdt_setprop(ms->fdt, nodename, "compatible", compat, sizeof(compat)); + qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 0, base, size); + qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", 32 + irq, 0); + qemu_fdt_setprop_cells(ms->fdt, nodename, "clocks", clock_phandle, + clock_phandle); + qemu_fdt_setprop(ms->fdt, nodename, "clock-names", clocknames, + sizeof(clocknames)); + qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", + irq_hvm_ic_phandle); + + qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", nodename); + qemu_fdt_add_subnode(ms->fdt, "/aliases"); + qemu_fdt_setprop_string(ms->fdt, "/aliases", "serial0", nodename); + + g_free(nodename); +} + +static void fdt_add_cpu_nodes(const HexagonVirtMachineState *vms) +{ + MachineState *ms = MACHINE(vms); + qemu_fdt_add_subnode(ms->fdt, "/cpus"); + qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", 0x1); + qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0); + + /* cpu nodes */ + for (int num = ms->smp.cpus - 1; num >= 0; num--) { + char *nodename = g_strdup_printf("/cpus/cpu@%d", num); + qemu_fdt_add_subnode(ms->fdt, nodename); + qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "cpu"); + qemu_fdt_setprop_cell(ms->fdt, nodename, "reg", num); + qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", + qemu_fdt_alloc_phandle(ms->fdt)); + g_free(nodename); + } +} + + +static void fdt_add_virtio_devices(const HexagonVirtMachineState *vms) +{ + MachineState *ms = MACHINE(vms); + /* VirtIO MMIO devices */ + for (int i = 0; i < VIRTIO_DEV_COUNT; i++) { + char *nodename; + int irq = irqmap[VIRT_MMIO] + i; + size_t size = base_memmap[VIRT_MMIO].size; + hwaddr base = base_memmap[VIRT_MMIO].base + i * size; + + nodename = g_strdup_printf("/virtio_mmio@%" PRIx64, base); + qemu_fdt_add_subnode(ms->fdt, nodename); + qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "virtio,mmio"); + qemu_fdt_setprop_sized_cells(ms->fdt, nodename, "reg", 2, base, 1, + size); + qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", irq, 0); + qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", + irq_hvm_ic_phandle); + + sysbus_create_simple( + "virtio-mmio", base, + qdev_get_gpio_in(vms->l2vic, irqmap[VIRT_MMIO] + i)); + + g_free(nodename); + } +} + +static void virt_instance_init(Object *obj) +{ + HexagonVirtMachineState *vms = HEXAGON_VIRT_MACHINE(obj); + + create_fdt(vms); +} + +void hexagon_load_fdt(const HexagonVirtMachineState *vms) +{ + MachineState *ms = MACHINE(vms); + hwaddr fdt_addr = base_memmap[VIRT_FDT].base; + uint32_t fdtsize = vms->fdt_size; + + /* copy in the device tree */ + rom_add_blob_fixed_as("fdt", ms->fdt, fdtsize, fdt_addr, + &address_space_memory); + qemu_register_reset_nosnapshotload( + qemu_fdt_randomize_seeds, + rom_ptr_for_as(&address_space_memory, fdt_addr, fdtsize)); +} + +static uint64_t load_kernel(const HexagonVirtMachineState *vms) +{ + MachineState *ms = MACHINE(vms); + uint64_t entry = 0; + if (load_elf_ram_sym(ms->kernel_filename, NULL, NULL, NULL, NULL, &entry, + NULL, NULL, 0, EM_HEXAGON, 0, 0, &address_space_memory, + false, NULL) > 0) { + return entry; + } + error_report("error loading '%s'", ms->kernel_filename); + exit(1); +} + +static void do_cpu_reset(void *opaque) +{ + HexagonCPU *cpu = opaque; + CPUState *cs = CPU(cpu); + cpu_reset(cs); +} + +static void virt_init(MachineState *ms) +{ + HexagonVirtMachineState *vms = HEXAGON_VIRT_MACHINE(ms); + Error **errp = NULL; + const hexagon_machine_config *m_cfg = &v68n_1024; + + qemu_fdt_setprop_string(ms->fdt, "/chosen", "bootargs", ms->kernel_cmdline); + + vms->sys = get_system_memory(); + + memory_region_init_ram(&vms->ram, NULL, "ddr.ram", ms->ram_size, errp); + memory_region_add_subregion(vms->sys, 0x0, &vms->ram); + + if (m_cfg->l2tcm_size) { + memory_region_init_ram(&vms->tcm, NULL, "tcm.ram", m_cfg->l2tcm_size, + errp); + memory_region_add_subregion(vms->sys, m_cfg->cfgtable.l2tcm_base << 16, + &vms->tcm); + } + + memory_region_init_rom(&vms->cfgtable, NULL, "config_table.rom", + sizeof(m_cfg->cfgtable), errp); + memory_region_add_subregion(vms->sys, m_cfg->cfgbase, &vms->cfgtable); + fdt_add_hvx(vms, m_cfg, errp); + const char *cpu_model = ms->cpu_type; + + if (!cpu_model) { + cpu_model = HEXAGON_CPU_TYPE_NAME("v73"); + } + + HexagonCPU *cpu_0 = NULL; + for (int i = 0; i < ms->smp.cpus; i++) { + HexagonCPU *cpu = HEXAGON_CPU(object_new(ms->cpu_type)); + qemu_register_reset(do_cpu_reset, cpu); + + if (i == 0) { + cpu_0 = cpu; + if (ms->kernel_filename) { + uint64_t entry = load_kernel(vms); + + qdev_prop_set_uint32(DEVICE(cpu_0), "exec-start-addr", entry); + } + } + qdev_prop_set_bit(DEVICE(cpu), "start-powered-off", (i != 0)); + qdev_prop_set_uint32(DEVICE(cpu), "hvx-contexts", + m_cfg->cfgtable.ext_contexts); + qdev_prop_set_uint32(DEVICE(cpu), "config-table-addr", m_cfg->cfgbase); + qdev_prop_set_uint32(DEVICE(cpu), "l2vic-base-addr", m_cfg->l2vic_base); + qdev_prop_set_uint32(DEVICE(cpu), "jtlb-entries", + m_cfg->cfgtable.jtlb_size_entries); + + if (!qdev_realize_and_unref(DEVICE(cpu), NULL, errp)) { + return; + } + } + vms->l2vic = sysbus_create_varargs( + "l2vic", m_cfg->l2vic_base, qdev_get_gpio_in(DEVICE(cpu_0), 0), + qdev_get_gpio_in(DEVICE(cpu_0), 1), qdev_get_gpio_in(DEVICE(cpu_0), 2), + qdev_get_gpio_in(DEVICE(cpu_0), 3), qdev_get_gpio_in(DEVICE(cpu_0), 4), + qdev_get_gpio_in(DEVICE(cpu_0), 5), qdev_get_gpio_in(DEVICE(cpu_0), 6), + qdev_get_gpio_in(DEVICE(cpu_0), 7), NULL); + + fdt_add_hvm_pic_node(vms, m_cfg); + fdt_add_virtio_devices(vms); + fdt_add_cpu_nodes(vms); + fdt_add_clocks(vms); + fdt_add_uart(vms, VIRT_UART0); + fdt_add_gpt_node(vms); + + rom_add_blob_fixed_as("config_table.rom", &m_cfg->cfgtable, + sizeof(m_cfg->cfgtable), m_cfg->cfgbase, + &address_space_memory); + + + hexagon_load_fdt(vms); +} + + +static void virt_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->init = virt_init; + mc->default_cpu_type = HEXAGON_CPU_TYPE_NAME("v73"); + mc->default_ram_size = 4 * GiB; + mc->max_cpus = 8; + mc->default_cpus = 8; + mc->is_default = false; + mc->default_kernel_irqchip_split = false; + mc->block_default_type = IF_VIRTIO; + mc->default_boot_order = NULL; + mc->no_cdrom = 1; + mc->numa_mem_supported = false; + mc->default_nic = "virtio-mmio-bus"; +} + + +static const TypeInfo virt_machine_types[] = { { + .name = TYPE_HEXAGON_VIRT_MACHINE, + .parent = TYPE_MACHINE, + .instance_size = sizeof(HexagonVirtMachineState), + .class_init = virt_class_init, + .instance_init = virt_instance_init, +} }; + +DEFINE_TYPES(virt_machine_types) diff --git a/include/hw/hexagon/virt.h b/include/hw/hexagon/virt.h new file mode 100644 index 0000000000000..0c165a786d305 --- /dev/null +++ b/include/hw/hexagon/virt.h @@ -0,0 +1,41 @@ +/* + * Definitions for hexagon virt board. + * + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_HEXAGONVIRT_H +#define HW_HEXAGONVIRT_H + +#include "hw/boards.h" +#include "target/hexagon/cpu.h" + +struct HexagonVirtMachineState { + /*< private >*/ + MachineState parent_obj; + + int fdt_size; + MemoryRegion *sys; + MemoryRegion cfgtable; + MemoryRegion ram; + MemoryRegion tcm; + MemoryRegion vtcm; + DeviceState *l2vic; +}; + +void hexagon_load_fdt(const struct HexagonVirtMachineState *vms); + +enum { + VIRT_UART0, + VIRT_QTMR0, + VIRT_QTMR1, + VIRT_GPT, + VIRT_MMIO, + VIRT_FDT, +}; + +#define TYPE_HEXAGON_VIRT_MACHINE MACHINE_TYPE_NAME("virt") +OBJECT_DECLARE_SIMPLE_TYPE(HexagonVirtMachineState, HEXAGON_VIRT_MACHINE) + +#endif /* HW_HEXAGONVIRT_H */ diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 7ebb53a5da8ee..798da24b8d336 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by From e47cd0ad45d01f7b0237ec6ed7b3b12e31ebf648 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Fri, 25 Oct 2024 20:49:14 -0700 Subject: [PATCH 088/126] tests/functional: Add a hexagon minivm test Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- MAINTAINERS | 1 + tests/functional/meson.build | 8 +++++ tests/functional/test_hexagon_minivm.py | 42 +++++++++++++++++++++++++ 3 files changed, 51 insertions(+) create mode 100755 tests/functional/test_hexagon_minivm.py diff --git a/MAINTAINERS b/MAINTAINERS index 58497567dc83d..fea1425198636 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -247,6 +247,7 @@ F: gdb-xml/hexagon*.xml F: docs/system/target-hexagon.rst F: docs/devel/hexagon-sys.rst F: docs/devel/hexagon-l2vic.rst +F: tests/functional/test_hexagon_minivm.py T: git https://github.com/quic/qemu.git hex-next Hexagon idef-parser diff --git a/tests/functional/meson.build b/tests/functional/meson.build index 3fd2652c0782e..7e361c68dd904 100644 --- a/tests/functional/meson.build +++ b/tests/functional/meson.build @@ -140,6 +140,14 @@ tests_i386_system_quick = [ 'migration', ] +test_timeouts += { + 'hexagon_minivm': 180, +} + +tests_hexagon_system_quick = [ + 'hexagon_minivm', +] + tests_i386_system_thorough = [ 'i386_tuxrun', ] diff --git a/tests/functional/test_hexagon_minivm.py b/tests/functional/test_hexagon_minivm.py new file mode 100755 index 0000000000000..2ba92bcce383a --- /dev/null +++ b/tests/functional/test_hexagon_minivm.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# +# Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. +# +# SPDX-License-Identifier: GPL-2.0-or-later + +import os +from glob import glob +from qemu_test import QemuSystemTest, Asset +from qemu_test import wait_for_console_pattern + +class MiniVMTest(QemuSystemTest): + + timeout = 180 + GUEST_ENTRY = 0xc0000000 + + REPO = 'https://artifacts.codelinaro.org/artifactory' + ASSET_TARBALL = \ + Asset(f'{REPO}/codelinaro-toolchain-for-hexagon/' + '19.1.5/hexagon_minivm_2024_Dec_15.tar.gz', + 'd7920b5ff14bed5a10b23ada7d4eb927ede08635281f25067e0d5711feee2c2a') + + def test_minivm(self): + self.set_machine('virt') + self.archive_extract(self.ASSET_TARBALL) + rootfs_path = f'{self.workdir}/hexagon-unknown-linux-musl-rootfs' + kernel_path = f'{rootfs_path}/boot/minivm' + + assert(os.path.exists(kernel_path)) + for test_bin_path in glob(f'{rootfs_path}/boot/test_*'): + print(f'# Testing "{os.path.basename(test_bin_path)}"') + + vm = self.get_vm() + vm.add_args('-kernel', kernel_path, + '-device', + f'loader,addr={hex(self.GUEST_ENTRY)},file={test_bin_path}') + vm.launch() + vm.wait() + self.assertEqual(vm.exitcode(), 0) + +if __name__ == '__main__': + QemuSystemTest.main() From 4b4a6146dbd62df14a106af2ff1a5d43b6021ae6 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 9 Sep 2024 09:42:54 -0700 Subject: [PATCH 089/126] target/hexagon: s/pkt_has_store/pkt_has_scalar_store To remove any confusion with HVX or other potential store instructions, we'll qualify this context var with "scalar". Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/decode.c | 4 ++-- target/hexagon/gen_helper_funcs.py | 2 +- target/hexagon/genptr.c | 3 ++- target/hexagon/idef-parser/README.rst | 2 +- target/hexagon/idef-parser/parser-helpers.c | 4 ++-- target/hexagon/insn.h | 4 ++-- target/hexagon/macros.h | 8 ++++---- target/hexagon/op_helper.c | 4 ++-- target/hexagon/translate.c | 9 +++++---- 9 files changed, 21 insertions(+), 19 deletions(-) diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c index 5d0beeeaf2f1f..41bf03c9b5133 100644 --- a/target/hexagon/decode.c +++ b/target/hexagon/decode.c @@ -238,9 +238,9 @@ static void decode_set_insn_attr_fields(Packet *pkt) if (GET_ATTRIB(opcode, A_SCALAR_STORE) && !GET_ATTRIB(opcode, A_MEMSIZE_0B)) { if (pkt->insn[i].slot == 0) { - pkt->pkt_has_store_s0 = true; + pkt->pkt_has_scalar_store_s0 = true; } else { - pkt->pkt_has_store_s1 = true; + pkt->pkt_has_scalar_store_s1 = true; } } } diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py index dd8ab60598557..32e3bac746251 100755 --- a/target/hexagon/gen_helper_funcs.py +++ b/target/hexagon/gen_helper_funcs.py @@ -69,7 +69,7 @@ def gen_helper_function(f, tag, tagregs, tagimms): if hex_common.need_slot(tag): if "A_LOAD" in hex_common.attribdict[tag]: f.write(hex_common.code_fmt(f"""\ - bool pkt_has_store_s1 = slotval & 0x1; + bool pkt_has_scalar_store_s1 = slotval & 0x1; """)) f.write(hex_common.code_fmt(f"""\ uint32_t slot = slotval >> 1; diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index afc7e5f3a5ab4..f38968271b172 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -558,7 +558,8 @@ static inline void gen_store_conditional8(DisasContext *ctx, #ifndef CONFIG_HEXAGON_IDEF_PARSER static TCGv gen_slotval(DisasContext *ctx) { - int slotval = (ctx->pkt->pkt_has_store_s1 & 1) | (ctx->insn->slot << 1); + int slotval = + (ctx->pkt->pkt_has_scalar_store_s1 & 1) | (ctx->insn->slot << 1); return tcg_constant_tl(slotval); } #endif diff --git a/target/hexagon/idef-parser/README.rst b/target/hexagon/idef-parser/README.rst index 7199177ee33e6..235e3debee3c3 100644 --- a/target/hexagon/idef-parser/README.rst +++ b/target/hexagon/idef-parser/README.rst @@ -637,7 +637,7 @@ tinycode for the Hexagon ``add`` instruction :: ---- 00021094 - mov_i32 pkt_has_store_s1,$0x0 + mov_i32 pkt_has_scalar_store_s1,$0x0 add_i32 tmp0,r2,r2 mov_i32 loc2,tmp0 mov_i32 new_r1,loc2 diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c index a7dcd85fe43d9..3316c230f8a5f 100644 --- a/target/hexagon/idef-parser/parser-helpers.c +++ b/target/hexagon/idef-parser/parser-helpers.c @@ -1725,7 +1725,7 @@ void gen_cancel(Context *c, YYLTYPE *locp) void gen_load_cancel(Context *c, YYLTYPE *locp) { - OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_store_s1) {\n"); + OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_scalar_store_s1) {\n"); OUT(c, locp, "ctx->s1_store_processed = false;\n"); OUT(c, locp, "process_store(ctx, 1);\n"); OUT(c, locp, "}\n"); @@ -1750,7 +1750,7 @@ void gen_load(Context *c, YYLTYPE *locp, HexValue *width, /* Lookup the effective address EA */ find_variable(c, locp, ea, ea); - OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_store_s1) {\n"); + OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_scalar_store_s1) {\n"); OUT(c, locp, "probe_noshuf_load(", ea, ", ", width, ", ctx->mem_idx);\n"); OUT(c, locp, "process_store(ctx, 1);\n"); OUT(c, locp, "}\n"); diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h index 24dcf7fe9f385..5d59430da9e12 100644 --- a/target/hexagon/insn.h +++ b/target/hexagon/insn.h @@ -66,8 +66,8 @@ struct Packet { bool pkt_has_dczeroa; - bool pkt_has_store_s0; - bool pkt_has_store_s1; + bool pkt_has_scalar_store_s0; + bool pkt_has_scalar_store_s1; bool pkt_has_hvx; Insn *vhist_insn; diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index afbbe8e265241..06c1dd2f407e6 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -82,7 +82,7 @@ */ #define CHECK_NOSHUF(VA, SIZE) \ do { \ - if (insn->slot == 0 && ctx->pkt->pkt_has_store_s1) { \ + if (insn->slot == 0 && ctx->pkt->pkt_has_scalar_store_s1) { \ probe_noshuf_load(VA, SIZE, ctx->mem_idx); \ process_store(ctx, 1); \ } \ @@ -93,11 +93,11 @@ TCGLabel *noshuf_label = gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, PRED, 0, noshuf_label); \ GET_EA; \ - if (insn->slot == 0 && ctx->pkt->pkt_has_store_s1) { \ + if (insn->slot == 0 && ctx->pkt->pkt_has_scalar_store_s1) { \ probe_noshuf_load(EA, SIZE, ctx->mem_idx); \ } \ gen_set_label(noshuf_label); \ - if (insn->slot == 0 && ctx->pkt->pkt_has_store_s1) { \ + if (insn->slot == 0 && ctx->pkt->pkt_has_scalar_store_s1) { \ process_store(ctx, 1); \ } \ } while (0) @@ -524,7 +524,7 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fLOAD(NUM, SIZE, SIGN, EA, DST) \ do { \ - check_noshuf(env, pkt_has_store_s1, slot, EA, SIZE, GETPC()); \ + check_noshuf(env, pkt_has_scalar_store_s1, slot, EA, SIZE, GETPC()); \ DST = (size##SIZE##SIGN##_t)MEM_LOAD##SIZE(env, EA, GETPC()); \ } while (0) #endif diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 28b555e87375b..dd26a0e3e0a12 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -490,11 +490,11 @@ void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask) * If the load is in slot 0 and there is a store in slot1 (that * wasn't cancelled), we have to do the store first. */ -static void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1, +static void check_noshuf(CPUHexagonState *env, bool pkt_has_scalar_store_s1, uint32_t slot, target_ulong vaddr, int size, uintptr_t ra) { - if (slot == 0 && pkt_has_store_s1 && + if (slot == 0 && pkt_has_scalar_store_s1 && ((env->slot_cancelled & (1 << 1)) == 0)) { probe_read(env, vaddr, size, MMU_USER_IDX, ra); commit_store(env, 1, ra); diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index f4133a10490ee..35765d48ba11a 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -932,11 +932,11 @@ static void process_store_log(DisasContext *ctx) * the memory accesses overlap. */ Packet *pkt = ctx->pkt; - if (pkt->pkt_has_store_s1) { + if (pkt->pkt_has_scalar_store_s1) { g_assert(!pkt->pkt_has_dczeroa); process_store(ctx, 1); } - if (pkt->pkt_has_store_s0) { + if (pkt->pkt_has_scalar_store_s0) { g_assert(!pkt->pkt_has_dczeroa); process_store(ctx, 0); } @@ -1063,8 +1063,9 @@ static void gen_commit_packet(DisasContext *ctx) * involved in committing the packet. */ Packet *pkt = ctx->pkt; - bool has_store_s0 = pkt->pkt_has_store_s0; - bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed); + bool has_store_s0 = pkt->pkt_has_scalar_store_s0; + bool has_store_s1 = + (pkt->pkt_has_scalar_store_s1 && !ctx->s1_store_processed); bool has_hvx_store = pkt_has_hvx_store(pkt); if (pkt->pkt_has_dczeroa) { /* From a839fa38b74153a32e0776ebcb53afd04e12af85 Mon Sep 17 00:00:00 2001 From: Brian Cain <brian.cain@oss.qualcomm.com> Date: Thu, 2 Jan 2025 19:47:54 -0800 Subject: [PATCH 090/126] target/hexagon: Add a QTimer address prop This property will be used by a future commit. Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu.c | 2 ++ target/hexagon/cpu.h | 1 + 2 files changed, 3 insertions(+) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 798da24b8d336..471a1ef6c8e59 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -65,6 +65,8 @@ static const Property hexagon_cpu_properties[] = { DEFINE_PROP_UINT32("jtlb-entries", HexagonCPU, num_tlbs, MAX_TLB_ENTRIES), DEFINE_PROP_UINT32("l2vic-base-addr", HexagonCPU, l2vic_base_addr, 0xffffffffULL), + DEFINE_PROP_UINT32("qtimer-base-addr", HexagonCPU, qtimer_base_addr, + 0xffffffffULL), DEFINE_PROP_UINT32("hvx-contexts", HexagonCPU, hvx_contexts, 0), DEFINE_PROP_UINT32("exec-start-addr", HexagonCPU, boot_addr, 0xffffffffULL), DEFINE_PROP_UINT64("config-table-addr", HexagonCPU, config_table_addr, diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 0608d3265cd12..50265da40dc90 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -193,6 +193,7 @@ struct ArchCPU { #ifndef CONFIG_USER_ONLY uint32_t num_tlbs; uint32_t l2vic_base_addr; + uint32_t qtimer_base_addr; uint32_t hvx_contexts; uint32_t boot_addr; uint64_t config_table_addr; From 0f571a1d18d46a0d2992804e183633326c72b945 Mon Sep 17 00:00:00 2001 From: Sid Manning <sidneym@quicinc.com> Date: Tue, 7 Nov 2023 17:01:28 -0800 Subject: [PATCH 091/126] hw/timer: Add QTimer device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note: QTimer was implemented before ARM SSE Timer was upstreamed, there may be opportunity to use that device instead. Co-authored-by: Damien Hedde <damien.hedde@dahe.fr> Co-authored-by: Tobias Röhmel <quic_trohmel@quicinc.com> Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- MAINTAINERS | 2 + hw/hexagon/hexagon_dsp.c | 1 - hw/hexagon/virt.c | 22 ++ hw/timer/meson.build | 2 + hw/timer/qct-qtimer.c | 519 ++++++++++++++++++++++++++++++++++ include/hw/timer/qct-qtimer.h | 85 ++++++ 6 files changed, 630 insertions(+), 1 deletion(-) create mode 100644 hw/timer/qct-qtimer.c create mode 100644 include/hw/timer/qct-qtimer.h diff --git a/MAINTAINERS b/MAINTAINERS index fea1425198636..babcc4bf11f6c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -234,7 +234,9 @@ S: Supported F: target/hexagon/ F: hw/intc/l2vic.[ch] F: hw/hexagon/ +F: hw/timer/qct-qtimer.c F: include/hw/hexagon/ +F: include/hw/timer/qct-qtimer.h X: target/hexagon/idef-parser/ X: target/hexagon/gen_idef_parser_funcs.py F: linux-user/hexagon/ diff --git a/hw/hexagon/hexagon_dsp.c b/hw/hexagon/hexagon_dsp.c index 34bbe98149cfc..198f983993366 100644 --- a/hw/hexagon/hexagon_dsp.c +++ b/hw/hexagon/hexagon_dsp.c @@ -3,7 +3,6 @@ * subsystem with few peripherals, like the Compute DSP. * * Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. - * * SPDX-License-Identifier: GPL-2.0-or-later */ diff --git a/hw/hexagon/virt.c b/hw/hexagon/virt.c index 1999745598635..13ecca466821e 100644 --- a/hw/hexagon/virt.c +++ b/hw/hexagon/virt.c @@ -14,6 +14,7 @@ #include "hw/loader.h" #include "hw/qdev-properties.h" #include "hw/register.h" +#include "hw/timer/qct-qtimer.h" #include "qemu/error-report.h" #include "qemu/guest-random.h" #include "qemu/units.h" @@ -244,6 +245,26 @@ static void fdt_add_virtio_devices(const HexagonVirtMachineState *vms) } } +static void create_qtimer(HexagonVirtMachineState *vms, + const hexagon_machine_config *m_cfg) +{ + Error **errp = NULL; + QCTQtimerState *qtimer = QCT_QTIMER(qdev_new(TYPE_QCT_QTIMER)); + + object_property_set_uint(OBJECT(qtimer), "nr_frames", 2, errp); + object_property_set_uint(OBJECT(qtimer), "nr_views", 1, errp); + object_property_set_uint(OBJECT(qtimer), "cnttid", 0x111, errp); + sysbus_realize_and_unref(SYS_BUS_DEVICE(qtimer), errp); + + + sysbus_mmio_map(SYS_BUS_DEVICE(qtimer), 0, m_cfg->qtmr_rg1); + sysbus_mmio_map(SYS_BUS_DEVICE(qtimer), 1, m_cfg->qtmr_rg0); + sysbus_connect_irq(SYS_BUS_DEVICE(qtimer), 0, + qdev_get_gpio_in(vms->l2vic, irqmap[VIRT_QTMR0])); + sysbus_connect_irq(SYS_BUS_DEVICE(qtimer), 1, + qdev_get_gpio_in(vms->l2vic, irqmap[VIRT_QTMR1])); +} + static void virt_instance_init(Object *obj) { HexagonVirtMachineState *vms = HEXAGON_VIRT_MACHINE(obj); @@ -353,6 +374,7 @@ static void virt_init(MachineState *ms) fdt_add_clocks(vms); fdt_add_uart(vms, VIRT_UART0); fdt_add_gpt_node(vms); + create_qtimer(vms, m_cfg); rom_add_blob_fixed_as("config_table.rom", &m_cfg->cfgtable, sizeof(m_cfg->cfgtable), m_cfg->cfgbase, diff --git a/hw/timer/meson.build b/hw/timer/meson.build index f5f9eed2d0a9f..6c30bf602226f 100644 --- a/hw/timer/meson.build +++ b/hw/timer/meson.build @@ -34,3 +34,5 @@ specific_ss.add(when: 'CONFIG_IBEX', if_true: files('ibex_timer.c')) system_ss.add(when: 'CONFIG_SIFIVE_PWM', if_true: files('sifive_pwm.c')) specific_ss.add(when: 'CONFIG_AVR_TIMER16', if_true: files('avr_timer16.c')) + +specific_ss.add(when: 'CONFIG_HEX_DSP', if_true: files('qct-qtimer.c')) diff --git a/hw/timer/qct-qtimer.c b/hw/timer/qct-qtimer.c new file mode 100644 index 0000000000000..413f7249eef00 --- /dev/null +++ b/hw/timer/qct-qtimer.c @@ -0,0 +1,519 @@ +/* + * Qualcomm QCT QTimer + * + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + +#include "qemu/osdep.h" +#include "hw/irq.h" +#include "hw/qdev-properties.h" +#include "hw/timer/qct-qtimer.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "qemu/timer.h" + +/* Common timer implementation. */ + +#define QTIMER_MEM_SIZE_BYTES 0x1000 +#define QTIMER_MEM_REGION_SIZE_BYTES 0x1000 +#define QTIMER_DEFAULT_FREQ_HZ 19200000ULL +#define QTMR_TIMER_INDEX_MASK (0xf000) +#define HIGH_32(val) (0x0ffffffffULL & (val >> 32)) +#define LOW_32(val) (0x0ffffffffULL & val) + +/* + * QTimer version reg: + * + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Major | Minor | Step | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +static unsigned int TIMER_VERSION = 0x20020000; + +/* + * qct_qtimer_read/write: + * if offset < 0x1000 read restricted registers: + * QCT_QTIMER_AC_CNTFREQ/CNTSR/CNTTID/CNTACR/CNTOFF_(LO/HI)/QCT_QTIMER_VERSION + */ +static uint64_t qct_qtimer_read(void *opaque, hwaddr offset, unsigned size) +{ + QCTQtimerState *s = (QCTQtimerState *)opaque; + uint32_t frame = 0; + + switch (offset) { + case QCT_QTIMER_AC_CNTFRQ: + return s->freq; + case QCT_QTIMER_AC_CNTSR: + return s->secure; + case QCT_QTIMER_AC_CNTTID: + return s->cnttid; + case QCT_QTIMER_AC_CNTACR_START ... QCT_QTIMER_AC_CNTACR_END: + frame = (offset - 0x40) / 0x4; + if (frame >= s->nr_frames) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: QCT_QTIMER_AC_CNT: Bad offset %x\n", __func__, + (int)offset); + return 0x0; + } + return s->timer[frame].cnt_ctrl; + case QCT_QTIMER_VERSION: + return TIMER_VERSION; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: QCT_QTIMER_AC_CNT: Bad offset %x\n", + __func__, (int)offset); + return 0x0; + } + + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%x\n", __func__, + (int)offset); + return 0; +} + +static void qct_qtimer_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size) +{ + QCTQtimerState *s = (QCTQtimerState *)opaque; + uint32_t frame = 0; + + if (offset < 0x1000) { + switch (offset) { + case QCT_QTIMER_AC_CNTFRQ: + s->freq = value; + return; + case QCT_QTIMER_AC_CNTSR: + if (value > 0xFF) + qemu_log_mask(LOG_GUEST_ERROR, + "%s: QCT_QTIMER_AC_CNTSR: Bad value %x\n", + __func__, (int)value); + else + s->secure = value; + return; + case QCT_QTIMER_AC_CNTACR_START ... QCT_QTIMER_AC_CNTACR_END: + frame = (offset - QCT_QTIMER_AC_CNTACR_START) / 0x4; + if (frame >= s->nr_frames) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: QCT_QTIMER_AC_CNT: Bad offset %x\n", + __func__, (int)offset); + return; + } + s->timer[frame].cnt_ctrl = value; + return; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: QCT_QTIMER_AC_CNT: Bad offset %x\n", __func__, + (int)offset); + return; + } + } else + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %x\n", __func__, + (int)offset); +} + +static const MemoryRegionOps qct_qtimer_ops = { + .read = qct_qtimer_read, + .write = qct_qtimer_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const VMStateDescription vmstate_qct_qtimer = { + .name = "qct-qtimer", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]){ VMSTATE_END_OF_LIST() } +}; + +static void qct_qtimer_init(Object *obj) +{ + QCTQtimerState *s = QCT_QTIMER(obj); + + object_property_add_uint32_ptr(obj, "secure", &s->secure, + OBJ_PROP_FLAG_READ); + object_property_add_uint32_ptr(obj, "frame_id", &s->frame_id, + OBJ_PROP_FLAG_READ); +} + +static void hex_timer_update(QCTHextimerState *s) +{ + /* Update interrupts. */ + int level = s->int_level && (s->control & QCT_QTIMER_CNTP_CTL_ENABLE); + qemu_set_irq(s->irq, level); +} + +static MemTxResult hex_timer_read(void *opaque, hwaddr offset, uint64_t *data, + unsigned size, MemTxAttrs attrs) +{ + QCTQtimerState *qct_s = (QCTQtimerState *)opaque; + uint32_t slot_nr = (offset & 0xF000) >> 12; + uint32_t reg_offset = offset & 0xFFF; + uint32_t view = slot_nr % qct_s->nr_views; + uint32_t frame = slot_nr / qct_s->nr_views; + + if (frame >= qct_s->nr_frames) { + *data = 0; + return MEMTX_ACCESS_ERROR; + } + QCTHextimerState *s = &qct_s->timer[frame]; + + + /* + * This is the case where we have 2 views, but the second one is not + * implemented. + */ + if (view && !(qct_s->cnttid & (0x4 << (frame * 4)))) { + *data = 0; + return MEMTX_OK; + } + + switch (reg_offset) { + case (QCT_QTIMER_CNT_FREQ): /* Ticks/Second */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RFRQ)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !((s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0PCTEN) || + (s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0VCTEN))) { + return MEMTX_ACCESS_ERROR; + } + + *data = s->freq; + return MEMTX_OK; + case (QCT_QTIMER_CNTP_CVAL_LO): /* TimerLoad */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + *data = LOW_32((s->cntval)); + return MEMTX_OK; + case (QCT_QTIMER_CNTP_CVAL_HI): /* TimerLoad */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + *data = HIGH_32((s->cntval)); + return MEMTX_OK; + case QCT_QTIMER_CNTPCT_LO: + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RPCT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0PCTEN)) { + return MEMTX_ACCESS_ERROR; + } + + *data = LOW_32((s->cntpct + (ptimer_get_count(s->timer)))); + return MEMTX_OK; + case QCT_QTIMER_CNTPCT_HI: + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RPCT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0PCTEN)) { + return MEMTX_ACCESS_ERROR; + } + + *data = HIGH_32((s->cntpct + (ptimer_get_count(s->timer)))); + return MEMTX_OK; + case (QCT_QTIMER_CNTP_TVAL): /* CVAL - CNTP */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + *data = + (s->cntval - (HIGH_32((s->cntpct + (ptimer_get_count(s->timer)))) + + LOW_32((s->cntpct + (ptimer_get_count(s->timer)))))); + return MEMTX_OK; + case (QCT_QTIMER_CNTP_CTL): /* TimerMIS */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + *data = s->int_level; + return MEMTX_OK; + case QCT_QTIMER_CNTPL0ACR: + if (view) { + *data = 0; + } else { + *data = s->cntpl0acr; + } + return MEMTX_OK; + + case QCT_QTIMER_VERSION: + *data = TIMER_VERSION; + return MEMTX_OK; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %x\n", __func__, + (int)offset); + *data = 0; + return MEMTX_ACCESS_ERROR; + } +} + +/* + * Reset the timer limit after settings have changed. + * May only be called from inside a ptimer transaction block. + */ +static void hex_timer_recalibrate(QCTHextimerState *s, int reload) +{ + uint64_t limit; + /* Periodic. */ + limit = s->limit; + ptimer_set_limit(s->timer, limit, reload); +} + +static MemTxResult hex_timer_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size, MemTxAttrs attrs) +{ + QCTQtimerState *qct_s = (QCTQtimerState *)opaque; + uint32_t slot_nr = (offset & 0xF000) >> 12; + uint32_t reg_offset = offset & 0xFFF; + uint32_t view = slot_nr % qct_s->nr_views; + uint32_t frame = slot_nr / qct_s->nr_views; + + if (frame >= qct_s->nr_frames) { + return MEMTX_ACCESS_ERROR; + } + QCTHextimerState *s = &qct_s->timer[frame]; + + /* + * This is the case where we have 2 views, but the second one is not + * implemented. + */ + if (view && !(qct_s->cnttid & (0x4 << (frame * 4)))) { + return MEMTX_OK; + } + + switch (reg_offset) { + case (QCT_QTIMER_CNTP_CVAL_LO): /* TimerLoad */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + + s->int_level = 0; + s->cntval = value; + ptimer_transaction_begin(s->timer); + if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) { + /* + * Pause the timer if it is running. This may cause some + * inaccuracy due to rounding, but avoids other issues. + */ + ptimer_stop(s->timer); + } + hex_timer_recalibrate(s, 1); + if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) { + ptimer_run(s->timer, 0); + } + ptimer_transaction_commit(s->timer); + break; + case (QCT_QTIMER_CNTP_CVAL_HI): + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + break; + case (QCT_QTIMER_CNTP_CTL): /* Timer control register */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + ptimer_transaction_begin(s->timer); + if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) { + /* + * Pause the timer if it is running. This may cause some + * inaccuracy due to rounding, but avoids other issues. + */ + ptimer_stop(s->timer); + } + s->control = value; + hex_timer_recalibrate(s, s->control & QCT_QTIMER_CNTP_CTL_ENABLE); + ptimer_set_freq(s->timer, s->freq); + ptimer_set_period(s->timer, 1); + if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) { + ptimer_run(s->timer, 0); + } + ptimer_transaction_commit(s->timer); + break; + case (QCT_QTIMER_CNTP_TVAL): /* CVAL - CNTP */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + ptimer_transaction_begin(s->timer); + if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) { + /* + * Pause the timer if it is running. This may cause some + * inaccuracy due to rounding, but avoids other issues. + */ + ptimer_stop(s->timer); + } + s->cntval = s->cntpct + value; + ptimer_set_freq(s->timer, s->freq); + ptimer_set_period(s->timer, 1); + if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) { + ptimer_run(s->timer, 0); + } + ptimer_transaction_commit(s->timer); + break; + case QCT_QTIMER_CNTPL0ACR: + if (view) { + break; + } + + s->cntpl0acr = value; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %x\n", __func__, + (int)offset); + return MEMTX_ACCESS_ERROR; + } + hex_timer_update(s); + return MEMTX_OK; +} + +static void hex_timer_tick(void *opaque) +{ + QCTHextimerState *s = (QCTHextimerState *)opaque; + if ((s->cntpct >= s->cntval) && (s->int_level != 1)) { + s->int_level = 1; + hex_timer_update(s); + return; + } + s->cntpct += s->limit; +} + +static const MemoryRegionOps hex_timer_ops = { + .read_with_attrs = hex_timer_read, + .write_with_attrs = hex_timer_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const VMStateDescription vmstate_hex_timer = { + .name = "hex_timer", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]){ VMSTATE_UINT32(control, QCTHextimerState), + VMSTATE_UINT32(cnt_ctrl, QCTHextimerState), + VMSTATE_UINT64(cntpct, QCTHextimerState), + VMSTATE_UINT64(cntval, QCTHextimerState), + VMSTATE_UINT64(limit, QCTHextimerState), + VMSTATE_UINT32(int_level, QCTHextimerState), + VMSTATE_PTIMER(timer, QCTHextimerState), + VMSTATE_END_OF_LIST() } +}; + +static void qct_qtimer_realize(DeviceState *dev, Error **errp) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + QCTQtimerState *s = QCT_QTIMER(dev); + unsigned int i; + + if (s->nr_frames > QCT_QTIMER_TIMER_FRAME_ELTS) { + error_setg(errp, "nr_frames too high"); + return; + } + + if (s->nr_views > QCT_QTIMER_TIMER_VIEW_ELTS) { + error_setg(errp, "nr_views too high"); + return; + } + + memory_region_init_io(&s->iomem, OBJECT(sbd), &qct_qtimer_ops, s, "qutimer", + QTIMER_MEM_SIZE_BYTES); + sysbus_init_mmio(sbd, &s->iomem); + + memory_region_init_io(&s->view_iomem, OBJECT(sbd), &hex_timer_ops, s, + "qutimer_views", + QTIMER_MEM_SIZE_BYTES * s->nr_frames * s->nr_views); + sysbus_init_mmio(sbd, &s->view_iomem); + + for (i = 0; i < s->nr_frames; i++) { + s->timer[i].limit = 1; + s->timer[i].control = QCT_QTIMER_CNTP_CTL_ENABLE; + s->timer[i].cnt_ctrl = + (QCT_QTIMER_AC_CNTACR_RWPT | QCT_QTIMER_AC_CNTACR_RWVT | + QCT_QTIMER_AC_CNTACR_RVOFF | QCT_QTIMER_AC_CNTACR_RFRQ | + QCT_QTIMER_AC_CNTACR_RPVCT | QCT_QTIMER_AC_CNTACR_RPCT); + s->timer[i].qtimer = s; + s->timer[i].freq = QTIMER_DEFAULT_FREQ_HZ; + + s->secure |= (1 << i); + + sysbus_init_irq(sbd, &(s->timer[i].irq)); + + (s->timer[i]).timer = + ptimer_init(hex_timer_tick, &s->timer[i], PTIMER_POLICY_LEGACY); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_hex_timer, + &s->timer[i]); + } +} + +static const Property qct_qtimer_properties[] = { + DEFINE_PROP_UINT32("freq", QCTQtimerState, freq, QTIMER_DEFAULT_FREQ_HZ), + DEFINE_PROP_UINT32("nr_frames", QCTQtimerState, nr_frames, 2), + DEFINE_PROP_UINT32("nr_views", QCTQtimerState, nr_views, 1), + DEFINE_PROP_UINT32("cnttid", QCTQtimerState, cnttid, 0x11), +}; + +static void qct_qtimer_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *k = DEVICE_CLASS(klass); + + device_class_set_props(k, qct_qtimer_properties); + k->realize = qct_qtimer_realize; + k->vmsd = &vmstate_qct_qtimer; +} + +static const TypeInfo qct_qtimer_info = { + .name = TYPE_QCT_QTIMER, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(QCTQtimerState), + .instance_init = qct_qtimer_init, + .class_init = qct_qtimer_class_init, +}; + +static void qct_qtimer_register_types(void) +{ + type_register_static(&qct_qtimer_info); +} + +type_init(qct_qtimer_register_types) diff --git a/include/hw/timer/qct-qtimer.h b/include/hw/timer/qct-qtimer.h new file mode 100644 index 0000000000000..90f7981ccf8df --- /dev/null +++ b/include/hw/timer/qct-qtimer.h @@ -0,0 +1,85 @@ +/* + * Qualcomm QCT QTimer + * + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef TIMER_QCT_QTIMER_H +#define TIMER_QCT_QTIMER_H + +#include "hw/ptimer.h" +#include "hw/sysbus.h" + +#define TYPE_QCT_QTIMER "qct-qtimer" +#define TYPE_QCT_HEXTIMER "qct-hextimer" +OBJECT_DECLARE_SIMPLE_TYPE(QCTQtimerState, QCT_QTIMER) +OBJECT_DECLARE_SIMPLE_TYPE(QCTHextimerState, QCT_HEXTIMER) + +struct QCTHextimerState { + QCTQtimerState *qtimer; + ptimer_state *timer; + uint64_t cntval; /* + * Physical timer compare value interrupt when cntpct > + * cntval + */ + uint64_t cntpct; /* Physical counter */ + uint32_t control; + uint32_t cnt_ctrl; + uint32_t cntpl0acr; + uint64_t limit; + uint32_t freq; + uint32_t int_level; + qemu_irq irq; +}; + +#define QCT_QTIMER_TIMER_FRAME_ELTS (8) +#define QCT_QTIMER_TIMER_VIEW_ELTS (2) +struct QCTQtimerState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + MemoryRegion view_iomem; + uint32_t secure; + struct QCTHextimerState timer[QCT_QTIMER_TIMER_FRAME_ELTS]; + uint32_t frame_id; + uint32_t freq; + uint32_t nr_frames; + uint32_t nr_views; + uint32_t cnttid; +}; + +#define QCT_QTIMER_AC_CNTFRQ (0x000) +#define QCT_QTIMER_AC_CNTSR (0x004) +#define QCT_QTIMER_AC_CNTSR_NSN_1 (1 << 0) +#define QCT_QTIMER_AC_CNTSR_NSN_2 (1 << 1) +#define QCT_QTIMER_AC_CNTSR_NSN_3 (1 << 2) +#define QCT_QTIMER_AC_CNTTID (0x08) +#define QCT_QTIMER_AC_CNTACR_0 (0x40) +#define QCT_QTIMER_AC_CNTACR_1 (0x44) +#define QCT_QTIMER_AC_CNTACR_2 (0x48) +#define QCT_QTIMER_AC_CNTACR_RWPT (1 << 5) /* R/W of CNTP_* regs */ +#define QCT_QTIMER_AC_CNTACR_RWVT (1 << 4) /* R/W of CNTV_* regs */ +#define QCT_QTIMER_AC_CNTACR_RVOFF (1 << 3) /* R/W of CNTVOFF register */ +#define QCT_QTIMER_AC_CNTACR_RFRQ (1 << 2) /* R/W of CNTFRQ register */ +#define QCT_QTIMER_AC_CNTACR_RPVCT (1 << 1) /* R/W of CNTVCT register */ +#define QCT_QTIMER_AC_CNTACR_RPCT (1 << 0) /* R/W of CNTPCT register */ +#define QCT_QTIMER_VERSION (0x0fd0) +#define QCT_QTIMER_CNTPCT_LO (0x000) +#define QCT_QTIMER_CNTPCT_HI (0x004) +#define QCT_QTIMER_CNT_FREQ (0x010) +#define QCT_QTIMER_CNTPL0ACR (0x014) +#define QCT_QTIMER_CNTPL0ACR_PL0CTEN (1 << 9) +#define QCT_QTIMER_CNTPL0ACR_PL0TVEN (1 << 8) +#define QCT_QTIMER_CNTPL0ACR_PL0VCTEN (1 << 1) +#define QCT_QTIMER_CNTPL0ACR_PL0PCTEN (1 << 0) +#define QCT_QTIMER_CNTP_CVAL_LO (0x020) +#define QCT_QTIMER_CNTP_CVAL_HI (0x024) +#define QCT_QTIMER_CNTP_TVAL (0x028) +#define QCT_QTIMER_CNTP_CTL (0x02c) +#define QCT_QTIMER_CNTP_CTL_ISTAT (1 << 2) +#define QCT_QTIMER_CNTP_CTL_INTEN (1 << 1) +#define QCT_QTIMER_CNTP_CTL_ENABLE (1 << 0) +#define QCT_QTIMER_AC_CNTACR_START 0x40 +#define QCT_QTIMER_AC_CNTACR_END 0x5C + +#endif /* TIMER_QCT_QTIMER_H */ From 44dd5ac384e4cfd72e51c7d14497374555770002 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 8 Jul 2024 18:51:06 -0700 Subject: [PATCH 092/126] docs: Add hexagon VM info Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- MAINTAINERS | 1 + docs/devel/hexagon-vm.rst | 190 +++++++++++++++++++++++++++++++++ docs/devel/index-internals.rst | 1 + docs/system/target-hexagon.rst | 11 ++ 4 files changed, 203 insertions(+) create mode 100644 docs/devel/hexagon-vm.rst diff --git a/MAINTAINERS b/MAINTAINERS index babcc4bf11f6c..5d7a5753b593a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -250,6 +250,7 @@ F: docs/system/target-hexagon.rst F: docs/devel/hexagon-sys.rst F: docs/devel/hexagon-l2vic.rst F: tests/functional/test_hexagon_minivm.py +F: docs/devel/hexagon-vm.rst T: git https://github.com/quic/qemu.git hex-next Hexagon idef-parser diff --git a/docs/devel/hexagon-vm.rst b/docs/devel/hexagon-vm.rst new file mode 100644 index 0000000000000..fb16d56d59def --- /dev/null +++ b/docs/devel/hexagon-vm.rst @@ -0,0 +1,190 @@ +Hexagon Virtual Machine +======================= + +The hexagon virtual machine is a hypervisor that can partition a single +Hexagon DSP among multiple guest operating systems, and abstracts the +specific details of a DSP architectural revision for the sake of consistency +among generations. + +Events +------ + +The guest operating system should register the Guest Event Vector Base +via the ``vmsetvec`` virtual instruction at system startup. The vector table +and handlers are determined by the guest OS. + +Guests return from event handlers with ``vmrte``. This instruction will restore +the mode (user versus guest), interrupt enable state, PC, SP. + +.. list-table:: Event types + :header-rows: 1 + + * - Number + - Name + - Description + - Maskable + - Detail + * - 0 + - Reserved + - + - + - + * - 1 + - Machine check event + - unrecoverable VM state + - No + - execution terminates if unhandled + * - 2 + - General exception + - internal hardware or software exception + - No + - + * - 3-4 + - Reserved + - + - + - + * - 5 + - ``trap0`` + - ``trap0`` instruction + - No + - + * - 6 + - Reserved + - + - + - + * - 7 + - Interrupt + - external interrupts + - Yes + - increasing interrupt numbers have descending priority + +Startup +------- +In order to transition to user-mode, the guest OS must set the ``UM`` bit in +the guest status register and specify the address to start executing in +user mode in the guest event link register. + +Virtual Instructions +-------------------- + +.. list-table:: Virtual Instructions + :header-rows: 1 + + * - Instruction + - Behavior + - Operand + - Input + - Output + * - vmversion + - returns the VM version + - 0x0 + - requested VM version + - provided VM version + * - vmrte + - return from event + - 0x1 + - Event info in g3:0 + - N/A + * - vmsetvec + - set event vector + - 0x2 + - r0 is set to vector table addr + - r0 is 0 on success, 1 otherwise + * - vmsetie + - set interrupt enabled + - 0x3 + - r0 is set to 1 to enable, 0 to disable + - previous IE bit is stored as LSB of r0 + * - vmgetie + - get interrupt enabled + - 0x4 + - N/A + - current IE bit is stored as LSB of r0 + * - vmintop + - interrupt operation + - 0x5 + - r0 = Interrupt Op, r1-r4: Depends on Op + - r0 - value depends on operation + * - vmclrmap + - clear virtual memory map + - 0xa + - r0 = Interrupt Op, r1-r4: Depends on Op + - r0 - value depends on operation + * - vmnewmap + - set new virtual memory map + - 0xb + - + r0 contains logical address of new segment table + + r1 = type of translations: 0 indicates a logical address of a zero-terminated linear list, 1 indicates a set of page tables. + - r0 contains 0 on success, otherwise negative error code + * - vmcache + - VM cache control: not modeled + - 0xd + - + r0 contains the operation to be performed + + r1 = Starting virtual address + + r2 contains the length in bytes + - r0 contains 0 on success, otherwise -1. Cache behavior is not modeled so this operation always succeeds. + * - vmgettime + - Get virtual machine time + - 0xe + - N/A + - r0 contains the least significant 32 bits of timestamp, r1 contains the most significant 32 bits of timestamp + * - vmsettime + - Set virtual machine time + - 0xf + - r0 contains the least significant 32 bits of timestamp, r1 contains the most significant 32 bits of timestamp + - N/A + * - vmwait + - wait for interrupt + - 0x10 + - N/A + - r0 contains the interrupt number of the interrupt waking the guest + * - vmyield + - voluntarily yield VM task + - 0x11 + - N/A + - N/A + * - vmstart + - Create new virtual processor instance + - 0x12 + - r0 contains the starting execution address, r1 contains the starting stack pointer + - r0 contains the Virtual processor number of new virtual processor on success, otherwise -1 + * - vmstop + - terminate current virtual processor instance + - 0x13 + - N/A + - N/A + * - vmvpid + - get the virtual processor ID + - 0x14 + - N/A + - r0 contains the virtual processor number of virtual processor executing the instruction + * - vmsetregs + - Set guest registers + - 0x15 + - r0-3 hold g0-3 values + - N/A + * - vmgetregs + - Get guest registers + - 0x16 + - N/A + - r0-3 hold g0-3 values + * - vmtimerop + - perform an operation on a system timer + - 0x18 + - + getfreq = 0 + + getres = 1 + + gettime = 2 + + gettimeout = 3 + + settimeout = 4 + + deltatimeout = 5 + - r0 contains result of the timer operation call + * - vmgetinfo + - Get system info + - 0x1a + - Index of the system info parameter: + + + build_id = 0 + + info_boot_flags = 1 + - value of the indicated system info parameter diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst index 6620497595abe..82f788682bb4a 100644 --- a/docs/devel/index-internals.rst +++ b/docs/devel/index-internals.rst @@ -16,6 +16,7 @@ Details about QEMU's various subsystems including how to add features to them. ebpf_rss hexagon-sys hexagon-l2vic + hexagon-vm migration/index multi-process reset diff --git a/docs/system/target-hexagon.rst b/docs/system/target-hexagon.rst index b2ffee91eb02e..894337a533cde 100644 --- a/docs/system/target-hexagon.rst +++ b/docs/system/target-hexagon.rst @@ -92,6 +92,17 @@ The ``trap0`` instruction can activate these semihosting calls so that the guest software can access the host console and filesystem. Semihosting is not yet implemented in QEMU hexagon. +Hexagon Virtual Machine +----------------------- + +The hexagon virtual machine is a hypervisor that can partition a single +Hexagon DSP among multiple guest operating systems, and abstracts the +specific details of a DSP architectural revision for the sake of consistency +among generations. + +[minivm](https://github.com/quic/hexagonMVM) is a reference implementation +of this VM interface. + Hexagon Features ================ From 7c37ce51c661014dc18b52c4e4153bdb55e08262 Mon Sep 17 00:00:00 2001 From: Brian Cain <brian.cain@oss.qualcomm.com> Date: Thu, 2 Jan 2025 19:53:06 -0800 Subject: [PATCH 093/126] target/hexagon: Implement hexagon_read_timer() Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/op_helper.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index dd26a0e3e0a12..8eacb3b041156 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -37,6 +37,7 @@ #ifndef CONFIG_USER_ONLY #include "hex_mmu.h" #include "hw/intc/l2vic.h" +#include "hw/timer/qct-qtimer.h" #include "hex_interrupts.h" #include "hexswi.h" #endif @@ -1765,7 +1766,13 @@ static uint32_t hexagon_find_last_irq(CPUHexagonState *env, uint32_t vid) static void hexagon_read_timer(CPUHexagonState *env, uint32_t *low, uint32_t *high) { - qemu_log_mask(LOG_UNIMP, "reading timer_hi/lo not yet supported\n"); + CPUState *cs = env_cpu(env); + HexagonCPU *cpu = HEXAGON_CPU(cs); + const hwaddr low_addr = cpu->qtimer_base_addr + QCT_QTIMER_CNTPCT_LO; + const hwaddr high_addr = cpu->qtimer_base_addr + QCT_QTIMER_CNTPCT_HI; + + cpu_physical_memory_read(low_addr, low, sizeof(*low)); + cpu_physical_memory_read(high_addr, high, sizeof(*high)); } static inline QEMU_ALWAYS_INLINE void sreg_write(CPUHexagonState *env, From 21174cf572b1b2ef3f0c02bfe0bed92ae86cbdba Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Tue, 28 Jan 2025 11:17:06 -0800 Subject: [PATCH 094/126] semihosting: add the "usefs" feature This will be used by Hexagon semihosting. Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- include/semihosting/semihost.h | 6 ++++++ qemu-options.hx | 7 ++++++- semihosting/config.c | 11 +++++++++++ semihosting/syscalls.c | 13 +++++++++++-- 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/include/semihosting/semihost.h b/include/semihosting/semihost.h index 97d2a2ba996d1..6e07766106516 100644 --- a/include/semihosting/semihost.h +++ b/include/semihosting/semihost.h @@ -51,6 +51,11 @@ static inline const char *semihosting_get_cmdline(void) { return NULL; } + +static inline const char *semihosting_get_usefs(void) +{ + return NULL; +} #else /* !CONFIG_USER_ONLY */ /** * semihosting_enabled: @@ -63,6 +68,7 @@ SemihostingTarget semihosting_get_target(void); const char *semihosting_get_arg(int i); int semihosting_get_argc(void); const char *semihosting_get_cmdline(void); +const char *semihosting_get_usefs(void); void semihosting_arg_fallback(const char *file, const char *cmd); /* for vl.c hooks */ void qemu_semihosting_enable(void); diff --git a/qemu-options.hx b/qemu-options.hx index dc694a99a30a7..9aee48a725728 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -5128,7 +5128,7 @@ DEF("semihosting-config", HAS_ARG, QEMU_OPTION_semihosting_config, QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | QEMU_ARCH_MIPS | QEMU_ARCH_RISCV) SRST -``-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,userspace=on|off][,arg=str[,...]]`` +``-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,userspace=on|off][,usefs=<path>][,arg=str[,...]]`` Enable and configure :ref:`Semihosting` (ARM, M68K, Xtensa, MIPS, RISC-V only). @@ -5152,6 +5152,11 @@ SRST only be used if all guest code is trusted (for example, in bare-metal test case code). + ``usefs=<path>`` + Sets a fallback directory to be used by the open semihosting call. If + the requested file is not found QEMU will search again at the given + path. + ``arg=str1,arg=str2,...`` Allows the user to pass input arguments, and can be used multiple times to build up a list. The old-style diff --git a/semihosting/config.c b/semihosting/config.c index 56283b5c3c38a..a64a8dfd27da5 100644 --- a/semihosting/config.c +++ b/semihosting/config.c @@ -46,6 +46,9 @@ QemuOptsList qemu_semihosting_config_opts = { }, { .name = "arg", .type = QEMU_OPT_STRING, + }, { + .name = "usefs", + .type = QEMU_OPT_STRING, }, { /* end of list */ } }, @@ -58,6 +61,7 @@ typedef struct SemihostingConfig { char **argv; int argc; const char *cmdline; /* concatenated argv */ + const char *usefs; } SemihostingConfig; static SemihostingConfig semihosting; @@ -94,6 +98,11 @@ const char *semihosting_get_cmdline(void) return semihosting.cmdline; } +const char *semihosting_get_usefs(void) +{ + return semihosting.usefs; +} + static int add_semihosting_arg(void *opaque, const char *name, const char *val, Error **errp) @@ -144,6 +153,8 @@ int qemu_semihosting_config_options(const char *optstr) true); semihosting.userspace_enabled = qemu_opt_get_bool(opts, "userspace", false); + semihosting.usefs = qemu_opt_get(opts, "usefs"); + const char *target = qemu_opt_get(opts, "target"); /* setup of chardev is deferred until they are initialised */ semihost_chardev = qemu_opt_get(opts, "chardev"); diff --git a/semihosting/syscalls.c b/semihosting/syscalls.c index f6451d9bb0e65..ef717912a8663 100644 --- a/semihosting/syscalls.c +++ b/semihosting/syscalls.c @@ -261,7 +261,8 @@ static void host_open(CPUState *cs, gdb_syscall_complete_cb complete, { CPUArchState *env G_GNUC_UNUSED = cpu_env(cs); char *p; - int ret, host_flags = O_BINARY; + int ret, err, host_flags = O_BINARY; + const char *usefs = semihosting_get_usefs(); ret = validate_lock_user_string(&p, cs, fname, fname_len); if (ret < 0) { @@ -287,9 +288,17 @@ static void host_open(CPUState *cs, gdb_syscall_complete_cb complete, } ret = open(p, host_flags, mode); + err = errno; + if (ret < 0 && err == ENOENT && usefs) { + g_autoptr(GString) usefs_fname = g_string_new(NULL); + g_string_append_printf(usefs_fname, "%s/%s", usefs, p); + ret = open(usefs_fname->str, host_flags, mode); + err = errno; + } + if (ret < 0) { qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to open %s\n", __func__, p); - complete(cs, -1, errno); + complete(cs, -1, err); } else { int guestfd = alloc_guestfd(); associate_guestfd(guestfd, ret); From ad40781b2254dceacedf946690936aff9fd9dcf2 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 29 Jan 2025 06:22:26 -0800 Subject: [PATCH 095/126] semihosting: add option for extended open() modes These modes are defined and will be used by hexagon semihosting. Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- semihosting/arm-compat-semi.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/semihosting/arm-compat-semi.c b/semihosting/arm-compat-semi.c index 86e5260e504be..e26bccb25eaf9 100644 --- a/semihosting/arm-compat-semi.c +++ b/semihosting/arm-compat-semi.c @@ -85,7 +85,15 @@ #define O_BINARY 0 #endif -static int gdb_open_modeflags[12] = { +#include "common-semi-target.h" + +#ifdef SEMIHOSTING_EXT_OPEN_MODES +#define GDB_OPEN_MODES_NR 14 +#else +#define GDB_OPEN_MODES_NR 12 +#endif + +static int gdb_open_modeflags[GDB_OPEN_MODES_NR] = { GDB_O_RDONLY, GDB_O_RDONLY, GDB_O_RDWR, @@ -98,6 +106,10 @@ static int gdb_open_modeflags[12] = { GDB_O_WRONLY | GDB_O_CREAT | GDB_O_APPEND, GDB_O_RDWR | GDB_O_CREAT | GDB_O_APPEND, GDB_O_RDWR | GDB_O_CREAT | GDB_O_APPEND, +#ifdef SEMIHOSTING_EXT_OPEN_MODES + GDB_O_RDWR | GDB_O_CREAT, + GDB_O_RDWR | GDB_O_CREAT | GDB_O_EXCL, +#endif }; #ifndef CONFIG_USER_ONLY @@ -386,7 +398,7 @@ void do_common_semihosting(CPUState *cs) if (!s) { goto do_fault; } - if (arg1 >= 12) { + if (arg1 >= GDB_OPEN_MODES_NR) { unlock_user(s, arg0, 0); common_semi_cb(cs, -1, EINVAL); break; From 4e7a90bb918110442d3fc5c2d705dae587d09630 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 29 Jan 2025 06:25:16 -0800 Subject: [PATCH 096/126] semihosting: extract GET_ARG() to its own function This allows for future archs implementing semihosting to define their custom "GET_ARG". In particular, this will be used for Hexagon. Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- semihosting/arm-compat-semi.c | 15 ++++----------- target/arm/common-semi-target.h | 11 +++++++++++ target/riscv/common-semi-target.h | 11 +++++++++++ 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/semihosting/arm-compat-semi.c b/semihosting/arm-compat-semi.c index e26bccb25eaf9..925683efe5bfc 100644 --- a/semihosting/arm-compat-semi.c +++ b/semihosting/arm-compat-semi.c @@ -192,17 +192,10 @@ static LayoutInfo common_semi_find_bases(CPUState *cs) * error indication (0 on success, non-0 for error) which the caller * should check. */ - -#define GET_ARG(n) do { \ - if (is_64bit_semihosting(env)) { \ - if (get_user_u64(arg ## n, args + (n) * 8)) { \ - goto do_fault; \ - } \ - } else { \ - if (get_user_u32(arg ## n, args + (n) * 4)) { \ - goto do_fault; \ - } \ - } \ +#define GET_ARG(n) do { \ + if (common_semi_read_arg_word(env, &arg ## n, args, n)) { \ + goto do_fault; \ + } \ } while (0) #define SET_ARG(n, val) \ diff --git a/target/arm/common-semi-target.h b/target/arm/common-semi-target.h index da51f2d7f540d..69429a45c6526 100644 --- a/target/arm/common-semi-target.h +++ b/target/arm/common-semi-target.h @@ -12,6 +12,17 @@ #include "target/arm/cpu-qom.h" +static inline bool common_semi_read_arg_word(CPUArchState *env, + target_ulong *save_to, + target_ulong args_addr, + int arg_num) +{ + if (is_64bit_semihosting(env)) { + return get_user_u64(*save_to, args_addr + (arg_num) * 8)); + } + return get_user_u32(*save_to, args_addr + (arg_num) * 4)); +} + static inline target_ulong common_semi_arg(CPUState *cs, int argno) { ARMCPU *cpu = ARM_CPU(cs); diff --git a/target/riscv/common-semi-target.h b/target/riscv/common-semi-target.h index 7c8a59e0cc3cd..ef6929bdfc5a2 100644 --- a/target/riscv/common-semi-target.h +++ b/target/riscv/common-semi-target.h @@ -11,6 +11,17 @@ #ifndef TARGET_RISCV_COMMON_SEMI_TARGET_H #define TARGET_RISCV_COMMON_SEMI_TARGET_H +static inline bool common_semi_read_arg_word(CPUArchState *env, + target_ulong *save_to, + target_ulong args_addr, + int arg_num) +{ + if (is_64bit_semihosting(env)) { + return get_user_u64(*save_to, args_addr + (arg_num) * 8)); + } + return get_user_u32(*save_to, args_addr + (arg_num) * 4)); +} + static inline target_ulong common_semi_arg(CPUState *cs, int argno) { RISCVCPU *cpu = RISCV_CPU(cs); From 1a60139d38daef84736ac68563015ea153a69543 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 29 Jan 2025 06:26:35 -0800 Subject: [PATCH 097/126] semihosting: add optional callbacks To be used by Hexagon. Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- semihosting/arm-compat-semi.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/semihosting/arm-compat-semi.c b/semihosting/arm-compat-semi.c index 925683efe5bfc..0990dd704ee00 100644 --- a/semihosting/arm-compat-semi.c +++ b/semihosting/arm-compat-semi.c @@ -85,6 +85,21 @@ #define O_BINARY 0 #endif +struct semihosting_opt_callbacks { + void (*set_err)(CPUState *cs, target_ulong err); + void (*prepare_for_read)(CPUState *cs, target_ulong fd, target_ulong buf, + target_ulong len); +} opt_callbacks; + +#define SEMIHOSTING_REGISTER_OPT_CALLBACKS(callbacks) \ + struct semihosting_opt_callbacks opt_callbacks = callbacks; + +#define CALL_OPT_CALLBACK(FN, ARGS...) do { \ + if (opt_callbacks.FN) { \ + opt_callbacks.FN(ARGS); \ + } \ +} while (0) + #include "common-semi-target.h" #ifdef SEMIHOSTING_EXT_OPEN_MODES @@ -236,6 +251,7 @@ static void common_semi_cb(CPUState *cs, uint64_t ret, int err) ts->swi_errno = err; #else syscall_err = err; + CALL_OPT_CALLBACK(set_err, cs, err); #endif } common_semi_set_ret(cs, ret); @@ -471,6 +487,7 @@ void do_common_semihosting(CPUState *cs) GET_ARG(0); GET_ARG(1); GET_ARG(2); + CALL_OPT_CALLBACK(prepare_for_read, cs, arg0, arg1, arg2); semihost_sys_read(cs, common_semi_rw_cb, arg0, arg1, arg2); break; From 45429b9fc72195d7a7fe5764b3c54272150288c0 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 29 Jan 2025 06:37:17 -0800 Subject: [PATCH 098/126] semihosting: add config opt to use stdio To be used by Hexagon semihosting. Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- semihosting/guestfd.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/semihosting/guestfd.c b/semihosting/guestfd.c index d3241434c516f..4d846f4e5d10c 100644 --- a/semihosting/guestfd.c +++ b/semihosting/guestfd.c @@ -23,6 +23,18 @@ GuestFD console_in_gf; GuestFD console_out_gf; #endif +static void semihosting_use_stdio(void) +{ + console_in_gf.type = GuestFDHost; + console_in_gf.hostfd = 0; + console_out_gf.type = GuestFDHost; + console_out_gf.hostfd = 1; + guestfd_array = g_array_set_size(guestfd_array, 3); + associate_guestfd(0, 0); + associate_guestfd(1, 1); + associate_guestfd(2, 2); +} + void qemu_semihosting_guestfd_init(void) { /* New entries zero-initialized, i.e. type GuestFDUnused */ @@ -36,8 +48,12 @@ void qemu_semihosting_guestfd_init(void) console_out_gf.type = GuestFDGDB; console_out_gf.hostfd = 2; } else { +#ifdef CONFIG_SEMIHOSTING_USE_STDIO + semihosting_use_stdio(); +#else console_in_gf.type = GuestFDConsole; console_out_gf.type = GuestFDConsole; +#endif } #else /* Otherwise, the stdio file descriptors apply. */ From 6df47fddf61e0350a2369309eb217e3d11813ea4 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 29 Jan 2025 06:52:40 -0800 Subject: [PATCH 099/126] Hexagon: add aux functions for guest mem load/store Will be used for semihosting. Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- target/hexagon/cpu_helper.c | 129 ++++++++++++++++++++++++++++++++++++ target/hexagon/cpu_helper.h | 6 ++ 2 files changed, 135 insertions(+) diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index 08c749e9fa9b8..d9e19a0491433 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -29,6 +29,135 @@ #ifndef CONFIG_USER_ONLY +static bool hexagon_read_memory_small(CPUHexagonState *env, target_ulong addr, + int byte_count, unsigned char *dstbuf, + int mmu_idx, uintptr_t retaddr) + { + /* handle small sizes */ + switch (byte_count) { + case 1: + *dstbuf = cpu_ldub_mmuidx_ra(env, addr, mmu_idx, retaddr); + return true; + + case 2: + if (QEMU_IS_ALIGNED(addr, 2)) { + *(unsigned short *)dstbuf = + cpu_lduw_mmuidx_ra(env, addr, mmu_idx, retaddr); + return true; + } + break; + + case 4: + if (QEMU_IS_ALIGNED(addr, 4)) { + *(uint32_t *)dstbuf = + cpu_ldl_mmuidx_ra(env, addr, mmu_idx, retaddr); + return true; + } + break; + + case 8: + if (QEMU_IS_ALIGNED(addr, 8)) { + *(uint64_t *)dstbuf = + cpu_ldq_mmuidx_ra(env, addr, mmu_idx, retaddr); + return true; + } + break; + + default: + /* larger request, handle elsewhere */ + return false; + } + + /* not aligned, copy bytes */ + for (int i = 0; i < byte_count; ++i) { + *dstbuf++ = cpu_ldub_mmuidx_ra(env, addr++, mmu_idx, retaddr); + } + return true; +} + +void hexagon_read_memory(CPUHexagonState *env, target_ulong vaddr, int size, + void *retptr, uintptr_t retaddr) +{ + BQL_LOCK_GUARD(); + CPUState *cs = env_cpu(env); + unsigned mmu_idx = cpu_mmu_index(cs, false); + if (!hexagon_read_memory_small(env, vaddr, size, retptr, mmu_idx, retaddr)) { + cpu_abort(cs, "%s: ERROR: bad size = %d!\n", __func__, size); + } +} + +static bool hexagon_write_memory_small(CPUHexagonState *env, target_ulong addr, + int byte_count, unsigned char *srcbuf, + int mmu_idx, uintptr_t retaddr) +{ + /* handle small sizes */ + switch (byte_count) { + case 1: + cpu_stb_mmuidx_ra(env, addr, *srcbuf, mmu_idx, retaddr); + return true; + + case 2: + if (QEMU_IS_ALIGNED(addr, 2)) { + cpu_stw_mmuidx_ra(env, addr, *(uint16_t *)srcbuf, mmu_idx, retaddr); + return true; + } + break; + + case 4: + if (QEMU_IS_ALIGNED(addr, 4)) { + cpu_stl_mmuidx_ra(env, addr, *(uint32_t *)srcbuf, mmu_idx, retaddr); + return true; + } + break; + + case 8: + if (QEMU_IS_ALIGNED(addr, 8)) { + cpu_stq_mmuidx_ra(env, addr, *(uint64_t *)srcbuf, mmu_idx, retaddr); + return true; + } + break; + + default: + /* larger request, handle elsewhere */ + return false; + } + + /* not aligned, copy bytes */ + for (int i = 0; i < byte_count; ++i) { + cpu_stb_mmuidx_ra(env, addr++, *srcbuf++, mmu_idx, retaddr); + } + + return true; +} + +void hexagon_write_memory(CPUHexagonState *env, target_ulong vaddr, + int size, uint64_t data, uintptr_t retaddr) +{ + CPUState *cs = env_cpu(env); + unsigned mmu_idx = cpu_mmu_index(cs, false); + if (!hexagon_write_memory_small(env, vaddr, size, (unsigned char *)&data, + mmu_idx, retaddr)) { + cpu_abort(cs, "%s: ERROR: bad size = %d!\n", __func__, size); + } +} + +static inline uint32_t page_start(uint32_t addr) +{ + uint32_t page_align = ~(TARGET_PAGE_SIZE - 1); + return addr & page_align; +} + +void hexagon_touch_memory(CPUHexagonState *env, uint32_t start_addr, + uint32_t length, uintptr_t retaddr) +{ + unsigned int warm; + uint32_t first = page_start(start_addr); + uint32_t last = page_start(start_addr + length - 1); + for (uint32_t page = first; page <= last; page += TARGET_PAGE_SIZE) { + hexagon_read_memory(env, page, 1, &warm, retaddr); + } +} + uint32_t hexagon_get_pmu_counter(CPUHexagonState *cur_env, int index) { g_assert_not_reached(); diff --git a/target/hexagon/cpu_helper.h b/target/hexagon/cpu_helper.h index 0a5134204f3dc..f86f5e744fd46 100644 --- a/target/hexagon/cpu_helper.h +++ b/target/hexagon/cpu_helper.h @@ -7,6 +7,12 @@ #ifndef HEXAGON_CPU_HELPER_H #define HEXAGON_CPU_HELPER_H +void hexagon_read_memory(CPUHexagonState *env, target_ulong vaddr, int size, + void *retptr, uintptr_t retaddr); +void hexagon_write_memory(CPUHexagonState *env, target_ulong vaddr, + int size, uint64_t data, uintptr_t retaddr); +void hexagon_touch_memory(CPUHexagonState *env, uint32_t start_addr, + uint32_t length, uintptr_t retaddr); uint32_t hexagon_get_pmu_counter(CPUHexagonState *cur_env, int index); uint64_t hexagon_get_sys_pcycle_count(CPUHexagonState *env); uint32_t hexagon_get_sys_pcycle_count_low(CPUHexagonState *env); From 58b87fbd412474e8a003dfff04de85bbf759384a Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 29 Jan 2025 06:49:40 -0800 Subject: [PATCH 100/126] Hexagon: add semihosting support Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- configs/targets/hexagon-softmmu.mak | 3 + hw/hexagon/Kconfig | 1 + hw/hexagon/hexagon_dsp.c | 2 + qemu-options.hx | 8 +-- target/hexagon/common-semi-target.h | 87 +++++++++++++++++++++++++++++ target/hexagon/hexswi.c | 18 +++++- 6 files changed, 113 insertions(+), 6 deletions(-) create mode 100644 target/hexagon/common-semi-target.h diff --git a/configs/targets/hexagon-softmmu.mak b/configs/targets/hexagon-softmmu.mak index 9f8fca1dc162e..03cf1306a3484 100644 --- a/configs/targets/hexagon-softmmu.mak +++ b/configs/targets/hexagon-softmmu.mak @@ -5,3 +5,6 @@ TARGET_SUPPORTS_MTTCG=y TARGET_XML_FILES=gdb-xml/hexagon-core.xml gdb-xml/hexagon-hvx.xml gdb-xml/hexagon-sys.xml TARGET_LONG_BITS=32 TARGET_NEED_FDT=y +CONFIG_SEMIHOSTING=y +CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y +CONFIG_SEMIHOSTING_USE_STDIO=y diff --git a/hw/hexagon/Kconfig b/hw/hexagon/Kconfig index f3f011573105e..9a2369974e097 100644 --- a/hw/hexagon/Kconfig +++ b/hw/hexagon/Kconfig @@ -4,6 +4,7 @@ config HEX_DSP depends on HEXAGON && TCG imply PTIMER select L2VIC # Vector PIC + select ARM_COMPATIBLE_SEMIHOSTING config HEX_VIRT bool diff --git a/hw/hexagon/hexagon_dsp.c b/hw/hexagon/hexagon_dsp.c index 198f983993366..eca310c0e3452 100644 --- a/hw/hexagon/hexagon_dsp.c +++ b/hw/hexagon/hexagon_dsp.c @@ -24,6 +24,7 @@ #include "include/system/system.h" #include "target/hexagon/internal.h" #include "system/reset.h" +#include "include/semihosting/semihost.h" #include "machine_cfg_v66g_1024.h.inc" @@ -166,6 +167,7 @@ static void init_mc(MachineClass *mc) mc->no_serial = 1; mc->is_default = false; mc->max_cpus = 8; + qemu_semihosting_enable(); } /* ----------------------------------------------------------------- */ diff --git a/qemu-options.hx b/qemu-options.hx index 9aee48a725728..888b3092bef78 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -5110,7 +5110,7 @@ ERST DEF("semihosting", 0, QEMU_OPTION_semihosting, "-semihosting semihosting mode\n", QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | - QEMU_ARCH_MIPS | QEMU_ARCH_RISCV) + QEMU_ARCH_MIPS | QEMU_ARCH_RISCV | QEMU_ARCH_HEXAGON) SRST ``-semihosting`` Enable :ref:`Semihosting` mode (ARM, M68K, Xtensa, MIPS, RISC-V only). @@ -5126,11 +5126,11 @@ DEF("semihosting-config", HAS_ARG, QEMU_OPTION_semihosting_config, "-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,userspace=on|off][,arg=str[,...]]\n" \ " semihosting configuration\n", QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | -QEMU_ARCH_MIPS | QEMU_ARCH_RISCV) +QEMU_ARCH_MIPS | QEMU_ARCH_RISCV | QEMU_ARCH_HEXAGON) SRST ``-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,userspace=on|off][,usefs=<path>][,arg=str[,...]]`` - Enable and configure :ref:`Semihosting` (ARM, M68K, Xtensa, MIPS, RISC-V - only). + Enable and configure :ref:`Semihosting` (ARM, M68K, Xtensa, MIPS, RISC-V, + Hexagon only). .. warning:: Note that this allows guest direct access to the host filesystem, so diff --git a/target/hexagon/common-semi-target.h b/target/hexagon/common-semi-target.h new file mode 100644 index 0000000000000..759aaeba905fa --- /dev/null +++ b/target/hexagon/common-semi-target.h @@ -0,0 +1,87 @@ +/* + * Target-specific parts of semihosting/arm-compat-semi.c. + * + * Copyright(c) 2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef TARGET_HEXAGON_COMMON_SEMI_TARGET_H +#define TARGET_HEXAGON_COMMON_SEMI_TARGET_H + +#include "cpu.h" +#include "cpu_helper.h" +#include "qemu/log.h" +#include "semihosting/uaccess.h" + +static inline bool common_semi_read_arg_word(CPUArchState *env, + target_ulong *save_to, + target_ulong args_addr, + int arg_num) +{ + hexagon_read_memory(env, args_addr + (arg_num) * 4, 4, save_to, 0); + return false; +} + +static inline target_ulong common_semi_arg(CPUState *cs, int argno) +{ + CPUHexagonState *env = cpu_env(cs); + return arch_get_thread_reg(env, HEX_REG_R00 + argno); +} + +static inline void common_semi_set_ret(CPUState *cs, target_ulong ret) +{ + CPUHexagonState *env = cpu_env(cs); + arch_set_thread_reg(env, HEX_REG_R00, ret); +} + +static inline void hex_semi_set_err(CPUState *cs, target_ulong err) +{ + CPUHexagonState *env = cpu_env(cs); + arch_set_thread_reg(env, HEX_REG_R01, err); +} + +static inline bool common_semi_sys_exit_extended(CPUState *cs, int nr) +{ + return false; +} + +static inline bool is_64bit_semihosting(CPUArchState *env) +{ + return false; +} + +static inline target_ulong common_semi_stack_bottom(CPUState *cs) +{ + CPUHexagonState *env = cpu_env(cs); + return arch_get_thread_reg(env, HEX_REG_SP); +} + +static inline bool common_semi_has_synccache(CPUArchState *env) +{ + return false; +} + +static inline void hex_prepare_for_read(CPUState *cs, target_ulong fd, + target_ulong buf, target_ulong len) +{ + CPUHexagonState *env = cpu_env(cs); + /* + * Need to make sure the page we are going to write to is available. + * The file pointer advances with the read. If the write to bufaddr + * faults the swi function will be restarted but the file pointer + * will be wrong. + */ + hexagon_touch_memory(env, buf, len, 0); +} + +const struct semihosting_opt_callbacks hex_opt_callbacks = { + .prepare_for_read = hex_prepare_for_read, + .set_err = hex_semi_set_err, +}; + +SEMIHOSTING_REGISTER_OPT_CALLBACKS(hex_opt_callbacks) + +#define SEMIHOSTING_EXT_OPEN_MODES + +#endif diff --git a/target/hexagon/hexswi.c b/target/hexagon/hexswi.c index 5fcf9b2be9330..daa9f965145e6 100644 --- a/target/hexagon/hexswi.c +++ b/target/hexagon/hexswi.c @@ -22,10 +22,25 @@ #ifndef CONFIG_USER_ONLY #include "hex_mmu.h" #include "hexswi.h" +#include "semihosting/common-semi.h" #endif #ifndef CONFIG_USER_ONLY +#define HEX_SYS_EXCEPTION 0x18 + +static void sim_handle_trap0(CPUHexagonState *env) +{ + g_assert(bql_locked()); + target_ulong what_swi = arch_get_thread_reg(env, HEX_REG_R00); + + if (what_swi == HEX_SYS_EXCEPTION) { + arch_set_system_reg(env, HEX_SREG_MODECTL, 0); + exit(arch_get_thread_reg(env, HEX_REG_R02)); + } + + do_common_semihosting(cs); +} static void set_addresses(CPUHexagonState *env, target_ulong pc_offset, target_ulong exception_index) @@ -88,8 +103,7 @@ void hexagon_cpu_do_interrupt(CPUState *cs) switch (cs->exception_index) { case HEX_EVENT_TRAP0: if (env->cause_code == 0) { - qemu_log_mask(LOG_UNIMP, - "trap0 is unhandled, no semihosting available\n"); + sim_handle_trap0(env); } hexagon_ssr_set_cause(env, env->cause_code); From bcbccd1c5bbfc191d4c043ac94a13707f5806eb3 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 29 Jan 2025 07:29:55 -0800 Subject: [PATCH 101/126] Hexagon: add main arch-specific semihosting operations Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- include/semihosting/common-semi.h | 1 + include/semihosting/syscalls.h | 2 + semihosting/arm-compat-semi.c | 2 +- semihosting/syscalls.c | 27 ++++ target/hexagon/hexswi.c | 253 +++++++++++++++++++++++++++++- 5 files changed, 281 insertions(+), 4 deletions(-) diff --git a/include/semihosting/common-semi.h b/include/semihosting/common-semi.h index 0a91db7c4149a..58dfb99d7a5b4 100644 --- a/include/semihosting/common-semi.h +++ b/include/semihosting/common-semi.h @@ -34,6 +34,7 @@ #ifndef COMMON_SEMI_H #define COMMON_SEMI_H +void common_semi_cb(CPUState *cs, uint64_t ret, int err); void do_common_semihosting(CPUState *cs); #endif /* COMMON_SEMI_H */ diff --git a/include/semihosting/syscalls.h b/include/semihosting/syscalls.h index 6627c45fb281a..dec2ee0ad4acd 100644 --- a/include/semihosting/syscalls.h +++ b/include/semihosting/syscalls.h @@ -75,4 +75,6 @@ void semihost_sys_gettimeofday(CPUState *cs, gdb_syscall_complete_cb complete, void semihost_sys_poll_one(CPUState *cs, gdb_syscall_complete_cb complete, int fd, GIOCondition cond, int timeout); +void semihost_sys_ftruncate(CPUState *cs, gdb_syscall_complete_cb complete, + int fd, off_t len); #endif /* SEMIHOSTING_SYSCALLS_H */ diff --git a/semihosting/arm-compat-semi.c b/semihosting/arm-compat-semi.c index 0990dd704ee00..e4825a8667182 100644 --- a/semihosting/arm-compat-semi.c +++ b/semihosting/arm-compat-semi.c @@ -243,7 +243,7 @@ static inline uint32_t get_swi_errno(CPUState *cs) #endif } -static void common_semi_cb(CPUState *cs, uint64_t ret, int err) +void common_semi_cb(CPUState *cs, uint64_t ret, int err) { if (err) { #ifdef CONFIG_USER_ONLY diff --git a/semihosting/syscalls.c b/semihosting/syscalls.c index ef717912a8663..e790c79efe858 100644 --- a/semihosting/syscalls.c +++ b/semihosting/syscalls.c @@ -13,6 +13,7 @@ #include "semihosting/guestfd.h" #include "semihosting/syscalls.h" #include "semihosting/console.h" +#include "semihosting/semihost.h" #ifdef CONFIG_USER_ONLY #include "qemu.h" #else @@ -551,6 +552,13 @@ static void host_poll_one(CPUState *cs, gdb_syscall_complete_cb complete, } #endif +static void host_ftruncate(CPUState *cs, gdb_syscall_complete_cb complete, + GuestFD *gf, off_t len) +{ + int err = ftruncate(gf->hostfd, len); + complete(cs, err, err < 0 ? errno : 0); +} + /* * Static file semihosting syscall implementations. */ @@ -992,3 +1000,22 @@ void semihost_sys_poll_one(CPUState *cs, gdb_syscall_complete_cb complete, } } #endif + +void semihost_sys_ftruncate(CPUState *cs, gdb_syscall_complete_cb complete, + int fd, off_t len) +{ + GuestFD *gf = get_guestfd(fd); + if (!gf) { + complete(cs, -1, EBADF); + return; + } + + switch (gf->type) { + case GuestFDHost: + host_ftruncate(cs, complete, gf, len); + break; + default: + fprintf(stderr, "ftruncate call not implemented for this semihosting mode.\n"); + g_assert_not_reached(); + } +} diff --git a/target/hexagon/hexswi.c b/target/hexagon/hexswi.c index daa9f965145e6..b67996eb7aef6 100644 --- a/target/hexagon/hexswi.c +++ b/target/hexagon/hexswi.c @@ -23,23 +23,270 @@ #include "hex_mmu.h" #include "hexswi.h" #include "semihosting/common-semi.h" +#include "semihosting/syscalls.h" +#include "semihosting/guestfd.h" #endif #ifndef CONFIG_USER_ONLY -#define HEX_SYS_EXCEPTION 0x18 +/* non-arm-compatible semihosting calls */ +#define HEXAGON_SPECIFIC_SWI_FLAGS \ + DEF_SWI_FLAG(EXCEPTION, 0x18) \ + DEF_SWI_FLAG(READ_CYCLES, 0x40) \ + DEF_SWI_FLAG(PROF_ON, 0x41) \ + DEF_SWI_FLAG(PROF_OFF, 0x42) \ + DEF_SWI_FLAG(WRITECREG, 0x43) \ + DEF_SWI_FLAG(READ_TCYCLES, 0x44) \ + DEF_SWI_FLAG(READ_ICOUNT, 0x47) \ + DEF_SWI_FLAG(PROF_STATSRESET, 0x48) \ + DEF_SWI_FLAG(DUMP_PMU_STATS, 0x4a) \ + DEF_SWI_FLAG(READ_PCYCLES, 0x52) \ + DEF_SWI_FLAG(FTELL, 0x100) \ + DEF_SWI_FLAG(FSTAT, 0x101) \ + DEF_SWI_FLAG(STAT, 0x103) \ + DEF_SWI_FLAG(GETCWD, 0x104) \ + DEF_SWI_FLAG(ACCESS, 0x105) \ + DEF_SWI_FLAG(EXEC, 0x185) \ + DEF_SWI_FLAG(FTRUNC, 0x186) + +#define DEF_SWI_FLAG(name, val) HEX_SYS_ ##name = val, +enum hex_swi_flag { + HEXAGON_SPECIFIC_SWI_FLAGS +}; +#undef DEF_SWI_FLAG + +#define DEF_SWI_FLAG(_, val) case val: +static inline bool is_hexagon_specific_swi_flag(enum hex_swi_flag what_swi) +{ + switch (what_swi) { + HEXAGON_SPECIFIC_SWI_FLAGS + return true; + } + return false; +} +#undef DEF_SWI_FLAG + +/* We start from 1 as 0 is used to signal an error from opendir() */ +static const int DIR_INDEX_OFFSET = 1; + +static void common_semi_ftell_cb(CPUState *cs, uint64_t ret, int err) +{ + if (err) { + ret = -1; + } + common_semi_cb(cs, ret, err); +} static void sim_handle_trap0(CPUHexagonState *env) { g_assert(bql_locked()); target_ulong what_swi = arch_get_thread_reg(env, HEX_REG_R00); + target_ulong swi_info = arch_get_thread_reg(env, HEX_REG_R01); + uintptr_t retaddr = 0; + CPUState *cs = env_cpu(env); + + if (!is_hexagon_specific_swi_flag(what_swi)) { + do_common_semihosting(cs); + return; + } + + switch (what_swi) { - if (what_swi == HEX_SYS_EXCEPTION) { + case HEX_SYS_EXCEPTION: arch_set_system_reg(env, HEX_SREG_MODECTL, 0); exit(arch_get_thread_reg(env, HEX_REG_R02)); + break; + + case HEX_SYS_WRITECREG: + fprintf(stdout, "%c", swi_info); + fflush(stdout); + common_semi_cb(cs, 0, 0); + break; + + case HEX_SYS_STAT: + case HEX_SYS_FSTAT: + { + /* + * This must match the caller's definition, it would be in the + * caller's angel.h or equivalent header. + */ + struct __SYS_STAT { + uint64_t dev; + uint64_t ino; + uint32_t mode; + uint32_t nlink; + uint64_t rdev; + uint32_t size; + uint32_t __pad1; + uint32_t atime; + uint32_t mtime; + uint32_t ctime; + uint32_t __pad2; + } sys_stat; + struct stat st_buf; + uint8_t *st_bufptr = (uint8_t *)&sys_stat; + int rc, err = 0; + char filename[BUFSIZ]; + target_ulong physicalFilenameAddr; + target_ulong statBufferAddr; + hexagon_read_memory(env, swi_info, 4, &physicalFilenameAddr, retaddr); + + if (what_swi == HEX_SYS_STAT) { + int i = 0; + do { + hexagon_read_memory(env, physicalFilenameAddr + i, 1, + &filename[i], retaddr); + i++; + } while ((i < BUFSIZ) && filename[i - 1]); + rc = stat(filename, &st_buf); + err = errno; + } else{ + int fd = physicalFilenameAddr; + GuestFD *gf = get_guestfd(fd); + if (gf->type != GuestFDHost) { + fprintf(stderr, "fstat semihosting only implemented for native mode.\n"); + g_assert_not_reached(); + } + rc = fstat(gf->hostfd, &st_buf); + err = errno; + } + if (rc == 0) { + sys_stat.dev = st_buf.st_dev; + sys_stat.ino = st_buf.st_ino; + sys_stat.mode = st_buf.st_mode; + sys_stat.nlink = (uint32_t) st_buf.st_nlink; + sys_stat.rdev = st_buf.st_rdev; + sys_stat.size = (uint32_t) st_buf.st_size; +#if defined(__linux__) + sys_stat.atime = (uint32_t) st_buf.st_atim.tv_sec; + sys_stat.mtime = (uint32_t) st_buf.st_mtim.tv_sec; + sys_stat.ctime = (uint32_t) st_buf.st_ctim.tv_sec; +#elif defined(_WIN32) + sys_stat.atime = st_buf.st_atime; + sys_stat.mtime = st_buf.st_mtime; + sys_stat.ctime = st_buf.st_ctime; +#endif + } + hexagon_read_memory(env, swi_info + 4, 4, &statBufferAddr, retaddr); + + for (int i = 0; i < sizeof(sys_stat); i++) { + hexagon_write_memory(env, statBufferAddr + i, 1, st_bufptr[i], + retaddr); + } + common_semi_cb(cs, rc, err); + } + break; + + case HEX_SYS_FTRUNC: + { + int fd; + off_t size_limit; + hexagon_read_memory(env, swi_info, 4, &fd, retaddr); + hexagon_read_memory(env, swi_info + 4, 8, &size_limit, retaddr); + semihost_sys_ftruncate(cs, common_semi_cb, fd, size_limit); } + break; - do_common_semihosting(cs); + case HEX_SYS_ACCESS: + { + char filename[BUFSIZ]; + uint32_t FileNameAddr; + uint32_t BufferMode; + int rc, err = 0; + + int i = 0; + + hexagon_read_memory(env, swi_info, 4, &FileNameAddr, retaddr); + do { + hexagon_read_memory(env, FileNameAddr + i, 1, &filename[i], retaddr); + i++; + } while ((i < BUFSIZ) && (filename[i - 1])); + filename[i] = 0; + + hexagon_read_memory(env, swi_info + 4, 4, &BufferMode, retaddr); + + rc = access(filename, BufferMode); + if (rc != 0) { + err = errno; + } + common_semi_cb(cs, rc, err); + } + break; + + case HEX_SYS_GETCWD: + { + char cwdPtr[PATH_MAX]; + uint32_t BufferAddr; + uint32_t BufferSize; + uint32_t rc = 0, err = 0; + + hexagon_read_memory(env, swi_info, 4, &BufferAddr, retaddr); + hexagon_read_memory(env, swi_info + 4, 4, &BufferSize, retaddr); + + if (!getcwd(cwdPtr, PATH_MAX)) { + err = errno; + } else { + size_t cwd_size = strlen(cwdPtr); + if (cwd_size > BufferSize) { + err = ERANGE; + } else { + for (int i = 0; i < cwd_size; i++) { + hexagon_write_memory(env, BufferAddr + i, 1, + (uint64_t)cwdPtr[i], retaddr); + } + rc = BufferAddr; + } + } + common_semi_cb(cs, rc, err); + break; + } + + case HEX_SYS_EXEC: + { + qemu_log_mask(LOG_UNIMP, "SYS_EXEC is deprecated\n"); + common_semi_cb(cs, -1, ENOSYS); + } + break; + + case HEX_SYS_FTELL: + { + int fd; + hexagon_read_memory(env, swi_info, 4, &fd, retaddr); + semihost_sys_lseek(cs, common_semi_ftell_cb, fd, 0, GDB_SEEK_CUR); + } + break; + + case HEX_SYS_READ_CYCLES: + case HEX_SYS_READ_TCYCLES: + case HEX_SYS_READ_ICOUNT: + { + arch_set_thread_reg(env, HEX_REG_R00, 0); + arch_set_thread_reg(env, HEX_REG_R01, 0); + break; + } + + case HEX_SYS_READ_PCYCLES: + { + arch_set_thread_reg(env, HEX_REG_R00, + arch_get_system_reg(env, HEX_SREG_PCYCLELO)); + arch_set_thread_reg(env, HEX_REG_R01, + arch_get_system_reg(env, HEX_SREG_PCYCLEHI)); + break; + } + + case HEX_SYS_PROF_ON: + case HEX_SYS_PROF_OFF: + case HEX_SYS_PROF_STATSRESET: + case HEX_SYS_DUMP_PMU_STATS: + common_semi_cb(cs, -1, ENOSYS); + qemu_log_mask(LOG_UNIMP, "SWI call %x is unimplemented in QEMU\n", + what_swi); + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "error: unknown swi call 0x%x\n", what_swi); + cpu_abort(cs, "Hexagon Unsupported swi call 0x%x\n", what_swi); + } } static void set_addresses(CPUHexagonState *env, target_ulong pc_offset, From febff91c1bd732d751c6c99a6789e6939b1b5516 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 29 Jan 2025 07:30:53 -0800 Subject: [PATCH 102/126] Hexagon: add COREDUMP semihosting operation Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- target/hexagon/cpu.c | 2 +- target/hexagon/hexswi.c | 131 ++++++++++++++++++++++++++++++++++++++ target/hexagon/internal.h | 1 + 3 files changed, 133 insertions(+), 1 deletion(-) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 471a1ef6c8e59..438e160504275 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -227,7 +227,7 @@ void hexagon_debug_qreg(CPUHexagonState *env, int regnum) print_qreg(stdout, env, regnum, false); } -static void hexagon_dump(CPUHexagonState *env, FILE *f, int flags) +void hexagon_dump(CPUHexagonState *env, FILE *f, int flags) { HexagonCPU *cpu = env_archcpu(env); diff --git a/target/hexagon/hexswi.c b/target/hexagon/hexswi.c index b67996eb7aef6..611725e6b165c 100644 --- a/target/hexagon/hexswi.c +++ b/target/hexagon/hexswi.c @@ -41,6 +41,7 @@ DEF_SWI_FLAG(PROF_STATSRESET, 0x48) \ DEF_SWI_FLAG(DUMP_PMU_STATS, 0x4a) \ DEF_SWI_FLAG(READ_PCYCLES, 0x52) \ + DEF_SWI_FLAG(COREDUMP, 0xCD) \ DEF_SWI_FLAG(FTELL, 0x100) \ DEF_SWI_FLAG(FSTAT, 0x101) \ DEF_SWI_FLAG(STAT, 0x103) \ @@ -77,6 +78,132 @@ static void common_semi_ftell_cb(CPUState *cs, uint64_t ret, int err) common_semi_cb(cs, ret, err); } +static void coredump(CPUHexagonState *env) +{ + uint32_t ssr = arch_get_system_reg(env, HEX_SREG_SSR); + printf("CRASH!\n"); + printf("I think the exception was: "); + switch (GET_SSR_FIELD(SSR_CAUSE, ssr)) { + case 0x43: + printf("0x43, NMI"); + break; + case 0x42: + printf("0x42, Data abort"); + break; + case 0x44: + printf("0x44, Multi TLB match"); + break; + case HEX_CAUSE_BIU_PRECISE: + printf("0x%x, Bus Error (Precise BIU error)", + HEX_CAUSE_BIU_PRECISE); + break; + case HEX_CAUSE_DOUBLE_EXCEPT: + printf("0x%x, Exception observed when EX = 1 (double exception)", + HEX_CAUSE_DOUBLE_EXCEPT); + break; + case HEX_CAUSE_FETCH_NO_XPAGE: + printf("0x%x, Privilege violation: User/Guest mode execute" + " to page with no execute permissions", + HEX_CAUSE_FETCH_NO_XPAGE); + break; + case HEX_CAUSE_FETCH_NO_UPAGE: + printf("0x%x, Privilege violation: " + "User mode exececute to page with no user permissions", + HEX_CAUSE_FETCH_NO_UPAGE); + break; + case HEX_CAUSE_INVALID_PACKET: + printf("0x%x, Invalid packet", + HEX_CAUSE_INVALID_PACKET); + break; + case HEX_CAUSE_PRIV_USER_NO_GINSN: + printf("0x%x, Privilege violation: guest mode insn in user mode", + HEX_CAUSE_PRIV_USER_NO_GINSN); + break; + case HEX_CAUSE_PRIV_USER_NO_SINSN: + printf("0x%x, Privilege violation: " + "monitor mode insn ins user/guest mode", + HEX_CAUSE_PRIV_USER_NO_SINSN); + break; + case HEX_CAUSE_REG_WRITE_CONFLICT: + printf("0x%x, Multiple writes to same register", + HEX_CAUSE_REG_WRITE_CONFLICT); + break; + case HEX_CAUSE_PC_NOT_ALIGNED: + printf("0x%x, PC not aligned", + HEX_CAUSE_PC_NOT_ALIGNED); + break; + case HEX_CAUSE_MISALIGNED_LOAD: + printf("0x%x, Misaligned Load @ 0x%x", + HEX_CAUSE_MISALIGNED_LOAD, + arch_get_system_reg(env, HEX_SREG_BADVA)); + break; + case HEX_CAUSE_MISALIGNED_STORE: + printf("0x%x, Misaligned Store @ 0x%x", + HEX_CAUSE_MISALIGNED_STORE, + arch_get_system_reg(env, HEX_SREG_BADVA)); + break; + case HEX_CAUSE_PRIV_NO_READ: + printf("0x%x, Privilege violation: " + "user/guest read permission @ 0x%x", + HEX_CAUSE_PRIV_NO_READ, + arch_get_system_reg(env, HEX_SREG_BADVA)); + break; + case HEX_CAUSE_PRIV_NO_WRITE: + printf("0x%x, Privilege violation: " + "user/guest write permission @ 0x%x", + HEX_CAUSE_PRIV_NO_WRITE, + arch_get_system_reg(env, HEX_SREG_BADVA)); + break; + case HEX_CAUSE_PRIV_NO_UREAD: + printf("0x%x, Privilege violation: user read permission @ 0x%x", + HEX_CAUSE_PRIV_NO_UREAD, + arch_get_system_reg(env, HEX_SREG_BADVA)); + break; + case HEX_CAUSE_PRIV_NO_UWRITE: + printf("0x%x, Privilege violation: user write permission @ 0x%x", + HEX_CAUSE_PRIV_NO_UWRITE, + arch_get_system_reg(env, HEX_SREG_BADVA)); + break; + case HEX_CAUSE_COPROC_LDST: + printf("0x%x, Coprocessor VMEM address error. @ 0x%x", + HEX_CAUSE_COPROC_LDST, + arch_get_system_reg(env, HEX_SREG_BADVA)); + break; + case HEX_CAUSE_STACK_LIMIT: + printf("0x%x, Stack limit check error", HEX_CAUSE_STACK_LIMIT); + break; + case HEX_CAUSE_FPTRAP_CAUSE_BADFLOAT: + printf("0x%X, Floating-Point: Execution of Floating-Point " + "instruction resulted in exception", + HEX_CAUSE_FPTRAP_CAUSE_BADFLOAT); + break; + case HEX_CAUSE_NO_COPROC_ENABLE: + printf("0x%x, Illegal Execution of Coprocessor Instruction", + HEX_CAUSE_NO_COPROC_ENABLE); + break; + case HEX_CAUSE_NO_COPROC2_ENABLE: + printf("0x%x, " + "Illegal Execution of Secondary Coprocessor Instruction", + HEX_CAUSE_NO_COPROC2_ENABLE); + break; + case HEX_CAUSE_UNSUPORTED_HVX_64B: + printf("0x%x, " + "Unsuported Execution of Coprocessor Instruction with 64bits Mode On", + HEX_CAUSE_UNSUPORTED_HVX_64B); + break; + case HEX_CAUSE_VWCTRL_WINDOW_MISS: + printf("0x%x, " + "Thread accessing a region outside VWCTRL window", + HEX_CAUSE_VWCTRL_WINDOW_MISS); + break; + default: + printf("Don't know"); + break; + } + printf("\nRegister Dump:\n"); + hexagon_dump(env, stdout, 0); +} + static void sim_handle_trap0(CPUHexagonState *env) { g_assert(bql_locked()); @@ -248,6 +375,10 @@ static void sim_handle_trap0(CPUHexagonState *env) } break; + case HEX_SYS_COREDUMP: + coredump(env); + break; + case HEX_SYS_FTELL: { int fd; diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h index fd2397b9ef0eb..ff89c9cda43ff 100644 --- a/target/hexagon/internal.h +++ b/target/hexagon/internal.h @@ -32,6 +32,7 @@ int hexagon_hvx_gdb_write_register(CPUState *env, uint8_t *mem_buf, int n); void hexagon_debug_vreg(CPUHexagonState *env, int regnum); void hexagon_debug_qreg(CPUHexagonState *env, int regnum); void hexagon_debug(CPUHexagonState *env); +void hexagon_dump(CPUHexagonState *env, FILE *f, int flags); extern const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS]; extern const char * const hexagon_sregnames[]; From 81f98b46cea67b93bed5bc3f21200216cfb07814 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 29 Jan 2025 07:31:58 -0800 Subject: [PATCH 103/126] Hexagon: add {OPEN|READ|CLOSE}_DIR semihosting operations Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- hw/hexagon/hexagon_dsp.c | 2 ++ target/hexagon/cpu.h | 1 + target/hexagon/hexswi.c | 78 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) diff --git a/hw/hexagon/hexagon_dsp.c b/hw/hexagon/hexagon_dsp.c index eca310c0e3452..348977a542fa4 100644 --- a/hw/hexagon/hexagon_dsp.c +++ b/hw/hexagon/hexagon_dsp.c @@ -112,6 +112,7 @@ static void hexagon_common_init(MachineState *machine, Rev_t rev, for (int i = 0; i < machine->smp.cpus; i++) { HexagonCPU *cpu = HEXAGON_CPU(object_new(machine->cpu_type)); + CPUHexagonState *env = &cpu->env; qemu_register_reset(do_cpu_reset, cpu); /* @@ -147,6 +148,7 @@ static void hexagon_common_init(MachineState *machine, Rev_t rev, sysbus_mmio_map(SYS_BUS_DEVICE(l2vic_dev), 1, m_cfg->cfgtable.fastl2vic_base << 16); } else if (!qdev_realize_and_unref(DEVICE(cpu), NULL, errp)) { + env->dir_list = NULL; return; } diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 50265da40dc90..ea618802a9290 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -143,6 +143,7 @@ typedef struct CPUArchState { target_ulong tlb_lock_count; target_ulong k0_lock_count; CPUHexagonTLBContext *hex_tlb; + GList *dir_list; #endif target_ulong next_PC; target_ulong new_value_usr; diff --git a/target/hexagon/hexswi.c b/target/hexagon/hexswi.c index 611725e6b165c..a08d7f68917c1 100644 --- a/target/hexagon/hexswi.c +++ b/target/hexagon/hexswi.c @@ -47,6 +47,9 @@ DEF_SWI_FLAG(STAT, 0x103) \ DEF_SWI_FLAG(GETCWD, 0x104) \ DEF_SWI_FLAG(ACCESS, 0x105) \ + DEF_SWI_FLAG(OPENDIR, 0x180) \ + DEF_SWI_FLAG(CLOSEDIR, 0x181) \ + DEF_SWI_FLAG(READDIR, 0x182) \ DEF_SWI_FLAG(EXEC, 0x185) \ DEF_SWI_FLAG(FTRUNC, 0x186) @@ -375,6 +378,81 @@ static void sim_handle_trap0(CPUHexagonState *env) } break; + case HEX_SYS_OPENDIR: + { + DIR *dir; + char buf[BUFSIZ]; + int rc = 0, err = 0; + + int i = 0; + do { + hexagon_read_memory(env, swi_info + i, 1, &buf[i], retaddr); + i++; + } while (buf[i - 1]); + + dir = opendir(buf); + if (dir != NULL) { + env->dir_list = g_list_append(env->dir_list, dir); + rc = g_list_index(env->dir_list, dir) + DIR_INDEX_OFFSET; + } else { + err = errno; + } + common_semi_cb(cs, rc, err); + break; + } + + case HEX_SYS_READDIR: + { + struct dirent *host_dir_entry = NULL; + int dir_index = swi_info - DIR_INDEX_OFFSET; + DIR *dir = g_list_nth_data(env->dir_list, dir_index); + uint32_t rc = 0, err = 0; + + if (dir) { + errno = 0; + host_dir_entry = readdir(dir); + if (host_dir_entry == NULL) { + err = errno; + } + } else { + err = EBADF; + } + + if (host_dir_entry) { + uint32_t guest_dir_entry = arch_get_thread_reg(env, HEX_REG_R02); + hexagon_write_memory(env, guest_dir_entry, 4, host_dir_entry->d_ino, + retaddr); + for (int i = 0; i < sizeof(host_dir_entry->d_name); i++) { + hexagon_write_memory(env, guest_dir_entry + 4 + i, 1, + host_dir_entry->d_name[i], retaddr); + if (!host_dir_entry->d_name[i]) { + break; + } + } + rc = guest_dir_entry; + } + common_semi_cb(cs, rc, err); + break; + } + + case HEX_SYS_CLOSEDIR: + { + DIR *dir; + int ret = 0, err = 0; + + dir = g_list_nth_data(env->dir_list, swi_info); + if (dir != NULL) { + ret = closedir(dir); + if (ret != 0) { + err = errno; + } + } else { + err = EBADF; + } + common_semi_cb(cs, ret, err); + break; + } + case HEX_SYS_COREDUMP: coredump(env); break; From f80033bc8f32d695fcd8d830857d432106f4dd82 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 29 Jan 2025 07:39:10 -0800 Subject: [PATCH 104/126] Hexagon: add semihosting check-tcg test This also adds the a minimal crt0/libc for hexagon, allowing us to build and run standalone system emulation tests in the future. Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- tests/tcg/hexagon/Makefile.softmmu-target | 71 + tests/tcg/hexagon/system/crt0/crt0.S | 103 ++ tests/tcg/hexagon/system/crt0/crt0.inc | 25 + .../tcg/hexagon/system/crt0/crt0_standalone.S | 1206 +++++++++++++++++ .../hexagon/system/crt0/hexagon_standalone.h | 103 ++ tests/tcg/hexagon/system/crt0/min_libc.c | 359 +++++ tests/tcg/hexagon/system/crt0/pte.S | 80 ++ tests/tcg/hexagon/system/crt0/tlb.c | 198 +++ tests/tcg/hexagon/system/semihost.c | 297 ++++ tests/tcg/hexagon/system/strutils.h | 25 + 10 files changed, 2467 insertions(+) create mode 100644 tests/tcg/hexagon/Makefile.softmmu-target create mode 100644 tests/tcg/hexagon/system/crt0/crt0.S create mode 100755 tests/tcg/hexagon/system/crt0/crt0.inc create mode 100644 tests/tcg/hexagon/system/crt0/crt0_standalone.S create mode 100644 tests/tcg/hexagon/system/crt0/hexagon_standalone.h create mode 100644 tests/tcg/hexagon/system/crt0/min_libc.c create mode 100644 tests/tcg/hexagon/system/crt0/pte.S create mode 100644 tests/tcg/hexagon/system/crt0/tlb.c create mode 100644 tests/tcg/hexagon/system/semihost.c create mode 100644 tests/tcg/hexagon/system/strutils.h diff --git a/tests/tcg/hexagon/Makefile.softmmu-target b/tests/tcg/hexagon/Makefile.softmmu-target new file mode 100644 index 0000000000000..f965f4f4fac6d --- /dev/null +++ b/tests/tcg/hexagon/Makefile.softmmu-target @@ -0,0 +1,71 @@ +## +## Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. +## +## SPDX-License-Identifier: GPL-2.0-or-later +## + +# -*- Mode: makefile -*- +# +# Hexagon SoftMMU tests - included from tests/tcg/Makefile +# + +HEXAGON_SYSTEM_SRC=$(SRC_PATH)/tests/tcg/hexagon/system + +# Set search path for all sources +VPATH += $(HEXAGON_SYSTEM_SRC) + +########### Compiling options +# We force -O0 to avoid optimizations that would break the +# libc simplifications we made at min_libc.c +# +CFLAGS=-mv73 -U__linux__ -G0 -nodefaultlibs -nostdlib -static -fno-PIC -O0 -g -Werror +LDFLAGS=-lclang_rt.builtins-hexagon + +########### QEMU options +QEMU_BASE_MACHINE=-M V66G_1024 -semihosting-config usefs=$(SRC_PATH)/tests/tcg/hexagon/system +QEMU_OPTS+=-display none + +QEMU_OPTS+=$(QEMU_BASE_MACHINE) -kernel + +crt0.o: crt0/crt0.S crt0/crt0.inc +crt0_standalone.o: crt0/crt0_standalone.S crt0/crt0.inc +pte.o: crt0/pte.S +min_libc.o: crt0/min_libc.c +tlb.o: crt0/tlb.c + +CRT0_OBJS=crt0.o crt0_standalone.o pte.o min_libc.o tlb.o + +TESTS += \ + semihost \ + $() + +$(TESTS): $(CRT0_OBJS) + +# Build and link the tests +echo-and-run = echo $(1) && $(1) +define build_fn + @if test "$(3)" = LINK; then extra="$(LDFLAGS)"; else extra=-c; fi && \ + $(call echo-and-run, $(CC) $(CFLAGS) $(1) -o $(2) $$extra) +endef + +$(CRT0_OBJS): + $(call build_fn,$<,$@) +$(TESTS): + $(call build_fn,$^,$@,LINK) + +%.o: %.S + $(call build_fn,$<,$@) +%.o: %.c + $(call build_fn,$<,$@) + +semihost.o: semihost.c strutils.h +semihost: semihost.o + +############# Custom test rules + +run-semihost: semihost + mkdir -p _semihost_dir + touch _semihost_dir/fileA _semihost_dir/fileB + $(call run-test, $<, $(QEMU) --append "arg1 arg2" $(QEMU_OPTS) $< \ + > $<.stdout) + $(call quiet-command, grep -q "PASS" $<.stdout, "GREP", "PASS") diff --git a/tests/tcg/hexagon/system/crt0/crt0.S b/tests/tcg/hexagon/system/crt0/crt0.S new file mode 100644 index 0000000000000..8a40e39536ebb --- /dev/null +++ b/tests/tcg/hexagon/system/crt0/crt0.S @@ -0,0 +1,103 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "crt0.inc" + .equ DEFAULT_HEAP_SIZE, 0x4000000 /* 64MB */ + .equ DEFAULT_STACK_SIZE, 0x100000 /* 1MB */ + + .section .start, "ax", @progbits + .subsection 0 + .org 0 + + .global _start + .type _start, @function + .p2align 5 +_start: + jump hexagon_start_init + jump hexagon_start_main + .size _start, . - _start + +/*----------------------------------------------------------------------------*/ + + .global hexagon_pre_main + .type hexagon_pre_main, @function + +hexagon_pre_main: + /* Mark first stack frame. */ + fp = #0 + + ReadFrom heapBase, r4 + + AddrOf DEFAULT_HEAP_SIZE + r5 = r0 + + r5 = add (r4, r5) /* Calculate aligned heap top. */ + r5 = add (r5, #15) + r5 = and (r5, #-16) + WriteTo heapLimit, r5 + + /* Set up stack. */ + AddrOf DEFAULT_STACK_SIZE + r7 = r0 + + r6 = add (r5, r7) /* Assume stack after heap. */ + r6 = and (r6, #-16) + + WriteTo stackBase, r6 + + ReadFrom stackBase, r6 + + r7 = sub (r6, r7) /* Desired stack size. */ + r7 = add (r7, #15) + r7 = and (r7, #-16) + WriteTo stackLimit, r7 + + /* Set stack up. */ + ReadFrom stackBase, r0 + sp = and (r0, #-16) /* Align top of stack. */ + + /* Zero up BSS. */ + AddrOf __bss_start, r0 + AddrOf _end, r2 + AddrOf memset, r28 /* bzero () is deprecated. */ + { r1 = #0 + r2 = sub (r2, r0) + callr r28 } + .size hexagon_pre_main, . - hexagon_pre_main + +/*----------------------------------------------------------------------------*/ + + .global hexagon_start_main + .type hexagon_start_main, @function +hexagon_start_main: + AddrOf _start_main, r28 + callr r28 + /*Stop all threads to terminate execution */ + r0 = #0x3f + stop (r0) + .size hexagon_start_main, . - hexagon_start_main + +/*----------------------------------------------------------------------------*/ + + .data + .global heapBase + .global heapLimit + .global stackBase + .global stackLimit + .global setHeapAngelCallParams + +.HeapParams: +heapBase: + .word end /* Provided by the linker script. */ +heapLimit: + .word end + (DEFAULT_HEAP_SIZE & -16) +stackBase: + .word 0 +stackLimit: + .word end + ((DEFAULT_HEAP_SIZE + 15) & -16) + +setHeapAngelCallParams: + .word .HeapParams diff --git a/tests/tcg/hexagon/system/crt0/crt0.inc b/tests/tcg/hexagon/system/crt0/crt0.inc new file mode 100755 index 0000000000000..a28d68c51cd5a --- /dev/null +++ b/tests/tcg/hexagon/system/crt0/crt0.inc @@ -0,0 +1,25 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + .macro AddrOf Var, To = r0 + \To\() = ## (\Var) + .endm + + .macro ReadFrom Var, To = r0 + AddrOf \Var, \To + \To = memw (\To) + .endm + + .macro WriteTo Var, From = r0, Ptr = r1 + .ifnc "\From", "\Ptr" + AddrOf \Var, \Ptr + memw (\Ptr) = \From + \From = memw (\Ptr) + .else + .print "Macro arguments \"From\" and \"Ptr\" cannot be the same." + .err + .endif + .endm diff --git a/tests/tcg/hexagon/system/crt0/crt0_standalone.S b/tests/tcg/hexagon/system/crt0/crt0_standalone.S new file mode 100644 index 0000000000000..a3ca6ea95da2a --- /dev/null +++ b/tests/tcg/hexagon/system/crt0/crt0_standalone.S @@ -0,0 +1,1206 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "crt0.inc" + .equ TLB_FIXED_ENTRIES, 6 + + .org 0x20 /* This must be at address 0x20 */ +EventVectorBase: + .word .EventVectors + +/* This can vary based on the revid of the part: + 64, 128, 192. Most are 128 */ +_NumTLBEntries: + .word 127 + +TLBMapTable: + .word UPTE_START + +CoreDump: + .word RegDump + + .subsection 0 + + /* Make sure that data and code don't end up in the same L2 cache-line. */ + .p2align 6, 0 + + .global hexagon_start_init + .type hexagon_start_init, @function +hexagon_start_init: +.Init: + /* Clean up house (make sure that R0 is initialized before DCKILL). */ + dckill + isync + ickill + isync + +.InitSSR: + /* SFD = 0, IE = 0, UM = 0, EX = 0, ASID = 0 */ + r0 = #0 + ssr = r0 + isync + + /* Setup events */ +.InitVector: + ReadFrom EventVectorBase + evb = r0 + +.InitStack: + ReadFrom exc_stack_tops + sgp0 = r0 + +.InitFramekey: + r0 = #0 + framekey = r0 + + /* Configure cycle counter. */ +.InitPcycle: + r1 = #1 + r0 = syscfg + r0 = insert (r1, #1, #6) + syscfg = r0 + + /* Configure IMT/DMT. */ +.InitDMT: + r1 = #1 + r0 = syscfg + r0 = insert (r1, #1, #15) + syscfg = r0 +.InitQoS: + r1 = #1 + r0 = syscfg + r0 = insert (r1, #1, #13) + syscfg = r0 +1: +.InitXE: + r1 = #1 + r0 = ssr + r0 = insert (r1, #1, #31) + ssr = r0 + + //{ 0x4066, 0x4, 0x7F, 0, 4 }, // v66a_512 + { + r0 = #0x2c // JTLB size + r2 = cfgbase + } + r1 = asl(r2, #5) + r0 = memw_phys(r0, r1) + { + r0 = add(r0, #-1); + memw(##_tlbmax) = r0.new + } + + { + r0 = #0x40 // L2 Tag size + r2 = cfgbase + } + r0 = memw_phys(r0, r1) + r1 = #0; + p0 = cmp.eq(r0, #0x400) + { + if (p0) r1 = #5 + if (p0) jump 1f + } + p0 = cmp.eq(r0, #0x200) + { + if (p0) r1 = #4 + if (p0) jump 1f + } + p0 = cmp.eq(r0, #0x100) + { + if (p0) r1 = #3 + if (p0) jump 1f + } + p0 = cmp.eq(r0, #0x080) + { + if (p0) r1 = #2 + if (p0) jump 1f + } +1: + memw(##_l2cfg) = r1 + +/* L2 config sequence: + * 1 - Disable prefetching by clearing HFd/i bits in ssr/ccr + */ + r0 = ccr + r3 = #0 + r0 = insert (r3, #4, #16) /* Clear HFi, HFd, HFiL2 HFdL2 bits */ + ccr = r0 + + /* Configure L2 cache. */ + r0 = syscfg + r0 = insert (r3, #3, #16) /* Set L2 size to 0 via L2CFG. */ + + +/* L2 config sequence: + * 2 - execute an isync which is aligned to a 32byte boundary. + */ + .p2alignl 5, 0x7f00c000 + isync + +/* L2 config sequence: + * 3 - execute an syncht insn to insure there are no outstanding + * memory transactions. + */ + syncht + +/* L2 config sequence: + * 4 - Set the desired L2 size for < V4 (set to 0 for >= V4). + */ + syscfg = r0 + isync + +/* L2 config sequence: + * 5 - Execute the L2KILL insn to initiate the cache. + */ + l2kill + syncht + +/* L2 config sequence: + * 6 - Set the desired L2 size. + */ + r2 = memw(##_l2cfg) + r3 = #0x5 + r3 = min (r2, r3) /* min between desired and hwmax */ + r0 = insert (r3, #4, #16) /* Set L2 size via L2CFG. */ + syscfg = r0 + isync + + /* Configure L1 caches. */ +.InitCache: + r1 = #0 + r1 = #1 + r2 = syscfg + r2 = insert (r1, #1, #1) + r2 = insert (r0, #1, #2) + + r1 = #1 + r2 = insert (r1, #1, #23) + + syscfg = r2 + isync + + /* BEGIN code to turn on translation */ +.InitTLB: + // V65 an later use a table for this stuff, should get a table for all of it! + r0 = memw(##_tlbmax) + + /* Clear TLB and store the number of TLBs */ + { + r3:2 = combine(#0,#0) + memw(##_NumTLBEntries) = r0 + } + + loop0(.InitTLBLoop, r0) +.falign +.InitTLBLoop: + tlbw(r3:2,r0) + r0 = add (r0, #-1) + {}:endloop0 + isync + +.InitTLBGlobal: /* Fixed entry for everything. */ + AddrOf _start, r2 + r2 = lsr (r2, #12) + + AddrOf 0xc3f00000, r1 /* Global, 1-1 mapping. */ + AddrOf 0xf7000000, r0 /* Full perms, fully cacheable WB */ + r1 = or (r1, r2) /* 1M translation */ + r0 |= asl (r2,#1) + r0 = setbit(r0,#4) + r0 = and(r0,#-16) + r2 = #0 + tlbw(r1:0,r3) + + /* TODO Should there be a TLB entry for TCM too? */ + + r0 = syscfg + r0 = setbit (r0, #0) /* Turn the MMU on. */ + syscfg = r0 + isync + +.InitInt: + /* Set up rising edge triggered interrupts */ + r0 = #0 + imask = r0 + r1 = #-1 + cswi (r1) + + /* Enable interrupts globally. */ + r0 = ssr + r0 = setbit (r0, #18) + ssr= r0 + + r0 = syscfg + r0 = setbit (r0, #4) + syscfg = r0 + isync + + /* Set up input params to Angel call */ + r0 = #22 + AddrOf setHeapAngelCallParams, r1 + trap0 (#0) + +.PreMain: + AddrOf hexagon_pre_main, r28 + jumpr r28 + .size hexagon_start_init, . - hexagon_start_init + +.global qdsp6_start_init +.set qdsp6_start_init, \ + hexagon_start_init + +/* (At this point the machine is mostly ready for normal execution */ + + /* This code is jumped to when we start a new thread. */ + /* It reads some values out of memory and uses them */ + /* to begin execution. */ + /* The code supports going to a function of the type: */ + /* void foo (void *arg); */ + /* or */ + /* void foo (int arg); */ + /* All we have to do is get the location of "foo", the */ + /* value for "arg", and set up the stack. */ + /* This stuff has been set up for us by thread_create, below.*/ + /* Under the OS, we have no need for this, it is merely for */ + /* trying multithreaded applications on the raw hardware. */ + + .p2align 4 + .weak thread_stop + .type thread_stop, @function +thread_stop: +{ + r0 = htid + r1 = #1 +} + r1 = lsl (r1, r0) + stop (r1) + + .p2align 4 + + .type event_handle_reset, @function + +event_handle_reset: + r1 = htid /* do not alter until final register initialization */ + + { + r28 = ##(start_pc) + r29 = ##(start_sp) + } + + r2 = #0 /* UM = 0 EX = 0 IE = 0 ASID = 0 */ + ssr = r2 + isync + imask = r2 + + r2 = ##(exc_stack_tops) + r2 = memw (r2+r1<<#2) + sgp0 = r2 + + /* Initialize GP to the start of the global data area. */ + //r2 = ##(_SDA_BASE_) + //gp = r2 + + r2.h = #4 + r2.l = #0 + ssr = r2 /* Turn on interrupts */ + + r3 = #1 + r2 = ssr + r2 = insert (r3, #1, #31) + ssr = r2 + + r2.h = #0x1 /* Enable cache fetching */ + usr = r2 + + r0 = #1 + r2 = #1 + r0 |= asl (r2, #1) + r2 = ccr + r2 = insert (r0, #2, #16) + /* Enable dcfetch and l2fetch. */ + r2 = setbit (r2, #20) + ccr = r2 + + isync + + { + r2 = ##framekey_tbl + r3 = ##stack_size + } + { + r2 = memw(r2+r1<<#2) /* load framekey from memory array */ + r3 = memw(r3+r1<<#2) /* load stack_size from memory array */ + } + { + framekey = r2 /* store into framekey register */ + r2 = memw (sp+r1<<#2) + } + r3 = sub(r2, r3) /* framelimt = sp-stack_size) */ + framelimit = r3 /* store into framelimit register */ + + { + r28 = memw (r28+r1<<#2) + sp = memw (sp+r1<<#2) + fp = #0 + } + + { + r0 = ##(start_param) + lr = ##(thread_stop) + } + fp = #0 + r1 = htid + r0 = memw (r0+r1<<#2) + + jump thread_start + + .size event_handle_reset, . - event_handle_reset + + .global __coredump + .type coredump, @function + .set __coredump, coredump +coredump: + r0 = ssr + r0 = clrbit (r0, #16) /* UM = 0 */ + r0 = clrbit (r0, #17) /* EX = 0 */ + ssr = r0 + isync + r0 = #0xCD + trap0 (#0) + r2 = #-1 + r0 = #-1 + stop (r0) + .size event_core_dump, . - event_core_dump + + .type event_handle_nmi, @function +event_handle_nmi: + r0 = #1 + stid = r0 + jump coredump + .size event_handle_nmi, . - event_handle_nmi + + .type event_handle_error, @function +event_handle_error: + r0 = #2 + stid = r0 + jump coredump + .size event_handle_error, . - event_handle_error + + .type event_handle_rsvd, @function +event_handle_rsvd: + r0.h = #0xdead + r0.l = #0xbeef + stid = r0 + jump coredump + .size event_handle_rsvd, . - event_handle_rsvd + + .global thread_start + .type thread_start, @function +thread_start: + jumpr r28 + .size thread_start, . - thread_start + + /* TLB HANDLING */ + /* There are a few strategies we have tried for TLB handling. */ + /* The first is just to map every page 1:1 for virtual:physical */ + /* This means we have nothing to look up but no flexibility */ + /* The strategy implemented here is to divide memory into */ + /* a bunch of 1MB pages. Each page is by default set to the */ + /* corresponding physical 1M page, but the translation (and the */ + /* cacheability) can be changed with the add_translation function*/ + /* below. */ + /* We have to keep the table in memory, and it's down in the data*/ + /* section. */ + /* The page at address 0 is always kept in the TLB. */ + /* You will run into problems if the data gets pushed out into */ + /* another page, because you don't have a translation for the */ + /* data you need to do the translation! */ + /* The solution is to put the translation table (and probably */ + /* the TLB fill code) in special section (s) that go near address 0 */ + /* You can set that up in the linker script. */ + /* TLB miss because of eXecution */ + /* See HEXAGON Architecture System-Level Spec for more information */ + + + + .subsection 0 + + .p2align 6 + .global event_handle_tlbmissx + .type event_handle_tlbmissx, @function + +event_handle_tlbmissx: + crswap (sp, sgp0) + sp = add (sp, #-64) + /* Save off state */ + { + memd (sp + #0) = r1:0 + memd (sp + #8) = r3:2 + } + { + memd (sp + #16) = r5:4 + memd (sp + #24) = r7:6 + } + { + memd (sp + #32) = r9:8 + r9 = p3:0 + } + r8 = ssr + r7 = elr + p1 = tstbit (r8, #0) + { + /* Calculate 4K page index */ + r7 = lsr (r7, #12) + /* Check for next page hit */ + if (!p1) jump 1f + r0 = ##(__tlb_idx) + } + r7 = add (r7, #1) +1: + { + r1 = memw(##_tlb_fixed_entries) /* First non-fixed entry. */ + r3 = memw(##_NumTLBEntries) + } + /* Atomically increment index */ + /* NEVER overwrite fixed entries */ +1: + r6 = memw_locked (r0) + { + r6 = add (r6, #1) + /* This was hard coded to p0 = cmp.ge(r6, #NUM_TLB_ENTRIES) + Now we are using 2 registers so switch to the equivalent + p0 = !cmp.gt(r3, r6) */ + p0 = !cmp.gt (r3, r6) + } + /* Will never store a number greater than + _NumTLBEntries in &__tlb_idx */ + r6 = mux (p0, r1, r6) + memw_locked (r0, p0) = r6 + if (!p0) jump 1b /* Retry, lost reservation. */ + + { + r7 = lsr (r7, #8) /* 1M page index */ + r3 = memw (##TLBMapTable) + } + r3 = addasl (r3, r7, #1) + { + r3 = memh (r3) + r7 = asl (r7, #8) /* VPN */ + } + r5 = extractu (r3, #12, #4) + { + r4 = extractu (r3, #4, #0) + r0 = #0x0010 /* 1M */ + r1 = #0 + } + { + r4 = asl (r4, #24) + r1.h = #0xc000 + r0.h = #0xf000 + } +1: + { + r1 = or (r1, r7) /* c000_0000 + VPN */ + r0 |= asl(r5,#9) /* f000_0000 + PPD */ + } + r0 = or (r0, r4) + /* Get Lock */ + tlblock + r5 = tlbp(r1) + p0 = tstbit (r5, #31) + if (!p0) jump 1f + + tlbw(r1:0,r6) + isync + +1: + tlbunlock + + p3:0 = r9 + { + r9:8 = memd (sp + #32) + r7:6 = memd (sp + #24) + } + { + r5:4 = memd (sp + #16) + r3:2 = memd (sp + #8) + } + { + r1:0 = memd (sp + #0) + sp = add (sp, #64) + } + crswap (sp, sgp0) + rte + + .size .event_handle_tlbmissx, . - event_handle_tlbmissx + + /* TLB Miss RW */ + /* Basically the same as TLB MissX, but we get */ + /* The address from BADVA instead of EVB... see the */ + /* HEXAGON Architecture System-level Spec for more details. */ + + .p2align 6 + + .global event_handle_tlbmissrw + .type event_handle_tlbmissrw, @function + +event_handle_tlbmissrw: + crswap (sp, sgp0) + sp = add (sp, #-64) + { + memd (sp + #0) = r1:0 + memd (sp + #8) = r3:2 + } + { + memd (sp + #16) = r5:4 + memd (sp + #24) = r7:6 + } + { + memd (sp + #32) = r9:8 + r8 = ssr + } + r7 = badva + r9 = p3:0 + { + r0 = ##__tlb_idx + r1 = memw(##_tlb_fixed_entries) + } + { + r7 = lsr (r7, #20) + r3 = memw(##_NumTLBEntries) /* 31, 63, 127, or 191 */ + } + /* Atomically increment index */ + /* NEVER overwrite entry 0 */ +1: + r6 = memw_locked (r0) + { + r6 = add (r6, #1) + /* This was hard coded to p0 = cmp.ge(r6, #NUM_TLB_ENTRIES) + Now we are using 2 registers so switch to the equivalent + p0 = !cmp.gt(r3, r6) */ + p0 = !cmp.gt (r3, r6) + } + /* Will never store a number greater than + _NumTLBEntries in &__tlb_idx */ + r6 = mux (p0, r1, r6) + memw_locked (r0, p0) = r6 + if (!p0) jump 1b /* Retry, lost reservation. */ + + r3 = memw (##TLBMapTable) + r3 = addasl (r3, r7, #1) + { + r3 = memh (r3) + r7 = asl (r7, #8) /* VPN */ + } + + r4 = extractu (r3, #4, #0) +.L_OK: + { + r5 = extractu (r3, #12, #4) + r0 = #0x0010 /* 1M */ + r1 = #0 + } + { + r4 = asl (r4, #24) + r1.h = #0xc000 + r0.h = #0xf000 + } +1: + { + r1 = or (r1, r7) /* R5: VPN | C000_0000 */ + r0 |= asl(r5,#9) /* R4: PPD | F000_0000 */ + } + r0 = or (r0, r4) + + tlblock + r5 = tlbp(r1) + p0 = tstbit (r5, #31) + if (!p0) jump 1f + + tlbw(r1:0,r6) + isync + jump 2f +1: + // If we take a miss around a user defined page they need to + // manually create another page or not touch the regions above + // and below their page within a 1M boundary. + r4 = memw(##_tlb_fixed_entries) + p0 = cmp.gt(r4, r5) // r4>r5 == r5<r4, (entryfound < num_fixed) + if (p0) jump . // DEAD +2: + tlbunlock + + p3:0 = r9 + { + r9:8 = memd (sp + #32) + r7:6 = memd (sp + #24) + } + { + r5:4 = memd (sp + #16) + r3:2 = memd (sp + #8) + } + { + r1:0 = memd (sp + #0) + sp = add (sp, #64) + } + crswap (sp, sgp0) + rte + + .size event_handle_tlbmissrw, . - event_handle_tlbmissrw + +/* This code handles the OS-like requests coming */ +/* from the application. */ + + .p2align 4 + + .type event_handle_trap0, @function + +event_handle_trap0: + crswap (sp, sgp0) + { + sp = add (sp, #-40) + memd (sp + #-40) = r5:4 + r5 = p3:0 + p0 = cmp.eq (r0, #0x40) /* read (thread) cycles */ + } + { + memd (sp + #8) = r3:2 + p1 = cmp.eq (r0, #0x44) /* read tcycles */ + p2 = cmp.eq (r0, #0x52) /* read pcycles */ + r4.h = #HI (0x55555555) /* 1/3 in 0.32 fixed point */ + } + +7: + { + p3:0 = r5 + r3:2 = memd (sp + #8) + r5:4 = memd (sp) + sp = add (sp, #40) + } + crswap (sp, sgp0) + rte + +8: + { + if (!p2) jump 9f + r6.l = #38 + } + { + p2 = cmp.eq (r1, r6) + jump 1b + } + +9: + r1 = memw (##CoreDump) + + jump 1b + + .size event_handle_trap0, . - event_handle_trap0 + + .p2align 4 + + .type event_handle_trap1, @function + +event_handle_trap1: + r0 = #9 + stid = r0 + jump coredump + + .size event_handle_trap1, . - event_handle_trap1 + + /* This is the code jumped to by the interrupt vectors */ + /* (above). We save context, jump to the function, */ + /* restore context, and return to where we left off. */ + + .type event_handle_int, @function + +event_handle_int: + crswap (sp, sgp0) + allocframe (#160) + { + memd (sp + #0) = r1:0 + memd (sp + #8) = r3:2 + r0 = SA0 + } + { + memd (sp + #16) = r5:4 + memd (sp + #24) = r7:6 + r1 = LC0 + } + { + memd (sp + #32) = r9:8 + memd (sp + #40) = r11:10 + r2 = SA1 + } + { + memd (sp + #48) = r13:12 + memd (sp + #56) = r15:14 + r3 = LC1 + } + { + memd (sp + #64) = r17:16 + memd (sp + #72) = r19:18 + r6 = p3:0 + } + { + memd (sp + #80) = r21:20 + memd (sp + #88) = r23:22 + r5:4 = C7:6 /* M1 and M0 */ + } + { + memd (sp + #96) = r25:24 + memd (sp + #104) = r27:26 + r7 = USR + } + { + memd (sp + #112) = r1:0 + memd (sp + #136) = r7:6 + r8 = UGP + } + r0 = ssr + { + memd (sp + #120) = r3:2 + r2 = r0 + r7 = insert (r0, #8, #16) + } + { + r9 = ELR + memd (sp + #128) = r5:4 + r0 = and (r0, #0x1f) + r1 = ##(__IntHandlers) + } + { + r1 = addasl (r1, r0, #2) + } + { + memd (sp + #144) = r9:8 + r1 = memw (r1) + r3 = #0 + lr = r28 + } + { + memd (sp + #152) = lr:fp + r2 = insert (r3, #3, #16) + p0 = cmp.eq (r1, #0) + } + if (p0) jump 1f // if null, skip a bunch of stuff + ssr = r2 + crswap (sp, sgp0) + /* Call interrupt handler */ + callr r1 + /* Ok, we're back... */ + crswap (sp, sgp0) + /* R7.H is also intnum.. use for ciad */ + /* ciad ... do early to jump over */ + r0 = ssr + { + r26.h = #0x0000 + r7:6 = memd (sp + #136) + r1 = #6 /* EX, IE, !UM */ + } + { + r7 = asrh (r7) + r26.l = #0x0001 + r0 = insert(r1, #3, #16) + } + r7 = and (r7, #0x1f) + r26 = lsl (r26, r7) + + ssr = r0 + ciad (r26) +1: + { + lr:fp = memd (sp + #152) + r9:8 = memd (sp + #144) + } + elr = r9 + { + r7:6 = memd (sp + #136) + r5:4 = memd (sp + #128) + UGP = r8 + } + { + r3:2 = memd (sp + #120) + r1:0 = memd (sp + #112) + usr = r7 + r28 = lr + } + { + r27:26 = memd (sp + #104) + r25:24 = memd (sp + #96) + m0 = r4 + } + { + r23:22 = memd (sp + #88) + r21:20 = memd (sp + #80) + m1 = r5 + } + { + r19:18 = memd (sp + #72) + r17:16 = memd (sp + #64) + p3:0 = r6 + } + { + r15:14 = memd (sp + #56) + r13:12 = memd (sp + #48) + lc1 = r3 + } + { + r11:10 = memd (sp + #40) + r9:8 = memd (sp + #32) + sa1 = r2 + } + { + r7:6 = memd (sp + #24) + r5:4 = memd (sp + #16) + lc0 = r1 + } + { + r3:2 = memd (sp + #8) + r1:0 = memd (sp + #0) + sa0 = r0 + } + deallocframe + crswap (sp, sgp0) + rte + + .size event_handle_int, . - event_handle_int + + /* Dummy function for when we don't have code registered for an interrupt.*/ + + .p2align 4 + + .type .NoHandler, @function + +.NoHandler: + jumpr lr + + .size .NoHandler, . - .NoHandler + + .text + +/* Next we have the event vectors */ +/* See the HEXAGON Architecture System-Level Specification */ +/* for more information.*/ + + .p2align 12, 0 + + .type .EventVectors, @function + +.EventVectors: + jump event_handle_reset + jump event_handle_nmi + jump event_handle_error + jump event_handle_rsvd + jump event_handle_tlbmissx + jump event_handle_rsvd + jump event_handle_tlbmissrw + jump event_handle_rsvd + jump event_handle_trap0 + jump event_handle_trap1 + jump event_handle_rsvd /* 10 */ + jump event_handle_rsvd /* 11 */ + jump event_handle_rsvd /* 12 */ + jump event_handle_rsvd /* 13 */ + jump event_handle_rsvd /* 14 */ + jump event_handle_rsvd /* 15 */ + jump event_handle_int /* Event number 16, Interrupt 0 */ + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int + jump event_handle_int /* Event number 47, Interrupt 31 */ + + .size .EventVectors, . - .EventVectors + +/**************** DATA SECTION ****************/ + + /* Here are definitions for some of the data we use above */ + + .section .start, "awx" + .subsection 1 + + .p2align 4, 0 + + .global __IntHandlers + .set __IntHandlers, .IntHandlers +.IntHandlers: + .word .NoHandler /* 0 */ + .word .NoHandler /* 1 */ + .word .NoHandler /* 2 */ + .word .NoHandler /* 3 */ + .word .NoHandler /* 4 */ + .word .NoHandler /* 5 */ + .word .NoHandler /* 6 */ + .word .NoHandler /* 7 */ + .word .NoHandler /* 8 */ + .word .NoHandler /* 9 */ + .word .NoHandler /* 10 */ + .word .NoHandler /* 11 */ + .word .NoHandler /* 12 */ + .word .NoHandler /* 13 */ + .word .NoHandler /* 14 */ + .word .NoHandler /* 15 */ + .word .NoHandler /* 16 */ + .word .NoHandler /* 17 */ + .word .NoHandler /* 18 */ + .word .NoHandler /* 19 */ + .word .NoHandler /* 20 */ + .word .NoHandler /* 21 */ + .word .NoHandler /* 22 */ + .word .NoHandler /* 23 */ + .word .NoHandler /* 24 */ + .word .NoHandler /* 25 */ + .word .NoHandler /* 26 */ + .word .NoHandler /* 27 */ + .word .NoHandler /* 28 */ + .word .NoHandler /* 29 */ + .word .NoHandler /* 30 */ + .word .NoHandler /* 31 */ + + .p2align 5, 0 +RegDump: + .space 4 * (32 + 10 + 29) + + /* This space is used by the supervisor code for saving */ + /* context for kernel stuff. It's also used to hold the */ + /* normal user code registers while we call the user-defined */ + /* interrupt service routine */ +/* Stack tops... enough for a couple context saves... */ + .p2align 3, 0 +exc_stack_lim0: .space 384 +exc_stack_top0: .word 0 + .p2align 3, 0 +exc_stack_lim1: .space 384 +exc_stack_top1: .word 0 + .p2align 3, 0 +exc_stack_lim2: .space 384 +exc_stack_top2: .word 0 + .p2align 3, 0 +exc_stack_lim3: .space 384 +exc_stack_top3: .word 0 + .p2align 3, 0 +exc_stack_lim4: .space 384 +exc_stack_top4: .word 0 + .p2align 3, 0 +exc_stack_lim5: .space 384 +exc_stack_top5: .word 0 + .p2align 3, 0 +exc_stack_lim6: .space 384 +exc_stack_top6: .word 0 + .p2align 3, 0 +exc_stack_lim7: .space 384 +exc_stack_top7: .word 0 + .p2align 3, 0 +exc_stack_lim8: .space 384 +exc_stack_top8: .word 0 + .p2align 3, 0 +exc_stack_lim9: .space 384 +exc_stack_top9: .word 0 + .p2align 3, 0 +exc_stack_lim10: .space 384 +exc_stack_top10: .word 0 + .p2align 3, 0 +exc_stack_lim11: .space 384 +exc_stack_top11: .word 0 + .p2align 3, 0 +exc_stack_lim12: .space 384 +exc_stack_top12: .word 0 + .p2align 3, 0 +exc_stack_lim13: .space 384 +exc_stack_top13: .word 0 + .p2align 3, 0 +exc_stack_lim14: .space 384 +exc_stack_top14: .word 0 + .p2align 3, 0 +exc_stack_lim15: .space 384 +exc_stack_top15: .word 0 +exc_stack_tops: + .word exc_stack_top0 + .word exc_stack_top1 + .word exc_stack_top2 + .word exc_stack_top3 + .word exc_stack_top4 + .word exc_stack_top5 + .word exc_stack_top6 + .word exc_stack_top7 + .word exc_stack_top8 + .word exc_stack_top9 + .word exc_stack_top10 + .word exc_stack_top11 + .word exc_stack_top12 + .word exc_stack_top13 + .word exc_stack_top14 + .word exc_stack_top15 + + .global __start_pc + .set __start_pc, start_pc +start_pc: + jump . + jump . + jump . + jump . + jump . + jump . + jump . + jump . + jump . + jump . + jump . + jump . + jump . + jump . + jump . + jump . + + + .global __start_sp + .set __start_sp, start_sp +start_sp: + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + + .global __start_param + .set __start_param, start_param +start_param: + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + + .global __stack_size + .set __stack_size, stack_size +stack_size: + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + + .global __framekey + .set __framekey, framekey_tbl +framekey_tbl: + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + .word 0 + +_l2cfg: + .word 0 +_tlbmax: + .word 0 + +syscfg_l2_table: + .byte 0x0 /* rev: 0x0xxx: No L2 -> 0k L2 cache */ + .byte 0x2 /* rev: 0x1xxx: 128K L2 -> 128k L2 cache */ + .byte 0x3 /* rev: 0x2xxx: 256K L2 -> 256k L2 cache */ + .byte 0x3 /* rev: 0x3xxx: Not valid at this time */ + .byte 0x4 /* rev: 0x4xxx: 512K L2 -> 512k L2 cache */ + .byte 0x4 /* rev: 0x5xxx: Not valid at this time */ + .byte 0x4 /* rev: 0x6xxx: 768K L2 -> 512k L2 cache */ + .byte 0x4 /* rev: 0x7xxx: Not valid at this time */ + .byte 0x5 /* rev: 0x8xxx: 1024K L2 -> 1024 L2 cache */ + .byte 0x4 /* rev: 0x9xxx: Not valid at this time */ + .byte 0x5 /* rev: 0xAxxx: 1536K L2 -> 1024 L2 cache */ + .byte 0x4 /* rev: 0xBxxx: Not valid at this time */ + .byte 0x4 /* rev: 0xCxxx: Not valid at this time */ + .byte 0x4 /* rev: 0xDxxx: Not valid at this time */ + .byte 0x4 /* rev: 0xExxx: Not valid at this time */ + .byte 0x4 /* rev: 0xFxxx: Not valid at this time */ + + + /* Data used for TLB refill */ + + .p2align 6, 0 + + .global __tlb_lock + .set __tlb_lock, tlb_lock +tlb_lock: + .word 0 + .global __tlb_idx + .set __tlb_idx, tlb_idx +tlb_idx: + .word TLB_FIXED_ENTRIES - 1 + + .global _tlb_fixed_entries +_tlb_fixed_entries: + .word TLB_FIXED_ENTRIES diff --git a/tests/tcg/hexagon/system/crt0/hexagon_standalone.h b/tests/tcg/hexagon/system/crt0/hexagon_standalone.h new file mode 100644 index 0000000000000..01ca41349f0f1 --- /dev/null +++ b/tests/tcg/hexagon/system/crt0/hexagon_standalone.h @@ -0,0 +1,103 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdint.h> +#include <stdio.h> + +#ifndef _TLB_H +#define _TLB_H + +typedef enum { + SHIFT_4K = 0, + SHIFT_16K, + SHIFT_64K, + SHIFT_256K, + SHIFT_1M, + SHIFT_4M, + SHIFT_16M, + SHIFT_64M, + SHIFT_256M, + SHIFT_1G, +} PageShift; + +typedef enum { + PAGE_4K = 1 << SHIFT_4K, + PAGE_16K = 1 << SHIFT_16K, + PAGE_64K = 1 << SHIFT_64K, + PAGE_256K = 1 << SHIFT_256K, + PAGE_1M = 1 << SHIFT_1M, + PAGE_4M = 1 << SHIFT_4M, + PAGE_16M = 1 << SHIFT_16M, + PAGE_64M = 1 << SHIFT_64M, + PAGE_256M = 1 << SHIFT_256M, + PAGE_1G = 1 << SHIFT_1G, +} PageSize; + + +/* + * TLB entry format: + * + * TLBHI: + * 63 | 62 | 61 | 60:59 | 58 -- 52 | 51 -------- 32 | + * V | G | EP PPNex | ASID | Virtual Page # | + * ------------------------------------------- + * + * V - Valid bit. + * G - Global bit. If set ASID is ignored and the page + * is globally accessible. + * EP - Extra Physical Bit + * PPNex - Extended Physical Page. (V73 and beyond) + * ASID - Address Space Identifier. + * Virtual Page - Virtual Page number. It has a minimum 4K alignment. + * This means the input value is right shifted 12 bits + * and that is what is placed into this field. + * + * TLBLO: + * 31 | 30 | 29 | 28 | 27 -- 24 | 23 --------- 1 | 0 | + * X | W | R | U | C | Physical Page # | S | + * ---------------------------------------------------- + * + * X - Execute Enabled + * W - Write Enabled + * R - Read Enabled + * U - User mode accessible + * C - Cacheablilty attributes: L1/L2 Cacheable Writeback/thru + * Physical Page - Physical Page # + * + */ + +typedef union { + struct { + uint64_t S:1; + uint64_t PPN:23; + uint64_t CacheAttr:4; + uint64_t XWRU:4; + uint64_t VirtualPage:20; + uint64_t ASID:7; +#if __HEXAGON_ARCH__ < 73 + uint64_t A0:1; + uint64_t A1:1; +#else + uint64_t PPN_EX:2; +#endif + uint64_t EP:1; + uint64_t VG:2; + }; + uint64_t raw; +} TLBEntry; + + +#define TLB_NOT_FOUND 0x80000000 + +int add_translation_extended(int index, void *va, uint64_t pa, + unsigned int page_size, unsigned int xwru, + unsigned int cccc, unsigned int asid, + unsigned int aa, unsigned int vg); +void add_translation_fixed(int index, void *va, void *pa, int cccc, + int permissions); +void add_translation(void *va, void *pa, int cccc); + +#endif /* _TLB_H */ diff --git a/tests/tcg/hexagon/system/crt0/min_libc.c b/tests/tcg/hexagon/system/crt0/min_libc.c new file mode 100644 index 0000000000000..f44ee49f8f444 --- /dev/null +++ b/tests/tcg/hexagon/system/crt0/min_libc.c @@ -0,0 +1,359 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +/* + * Small cheat: take size_t, NULL, and other type/symbol definitions from the + * hexagon toolchain. We cannot link with the libc, though, as the actual + * implementation for functions like printf and open are defined for Linux, and + * we are running on "bare metal". + */ +#include <stdio.h> +#include <stdint.h> +#include <assert.h> +#include <string.h> + +FILE *const stdout = (FILE *)1; + +void exit(int code) +{ + asm volatile( + "r2 = %0\n" + "stop(r0)\n" + : + : "r"(code) + : "r2"); + __builtin_unreachable(); +} + +/* The assert() macro will use this. */ +void __assert_fail(const char *assertion, const char *file, int line, + const char *function) +{ + printf("ASSERT fail '%s' at file '%s' line %d function %s\n", + assertion, file, line, function); + exit(1); +} + +void *memset(void *b, int c, size_t len) +{ + for (size_t i = 0; i < len; i++) { + ((unsigned char *)b)[i] = (unsigned char)c; + } + return b; +} + +int memcmp(const void *p1, const void *p2, size_t n) +{ + const char *s1 = p1; + const char *s2 = p2; + for ( ; n && (*s1 == *s2); s1++, s2++, n--) { + /* empty */ + } + return n ? *(unsigned char *)s1 - *(unsigned char *)s2 : 0; +} + +int bcmp(const void *s1, const void *s2, size_t n) +{ + return __builtin_bcmp(s1, s2, n); +} + + +#define HEX_SYS_WRITEC 0x03 +#define HEX_SYS_WRITE0 0x04 +#define HEX_SYS_GET_CMDLINE 0x15 + +/* + * Macro flavors: + * - DIRECT_SWI takes up to two args an put them at r1 and r2. + * - SWI takes up to four args and puts them in an array, placing the + * array address at r1. + */ + +static int swi_ret, swi_err, swi_args[4]; +#define DO_SWI(CODE, ARG0, ARG1) \ + do { \ + asm volatile( \ + "r0 = %2\n" \ + "r1 = %3\n" \ + "r2 = %4\n" \ + "trap0(#0)\n" \ + "%0 = r0\n" \ + "%1 = r1\n" \ + : "=r"(swi_ret), "=r"(swi_err) \ + : "r"(CODE), "r"(ARG0), "r"(ARG1) \ + : "r0", "r1", "r2", "memory" \ + ); \ + } while (0) + +#define SWI0(CODE) DO_SWI(CODE, swi_args, 0) +#define SWI1(CODE, ARG0) \ + do { swi_args[0] = (uint32_t)(ARG0); SWI0(CODE); } while (0) +#define SWI2(CODE, ARG0, ARG1) \ + do { swi_args[1] = (uint32_t)(ARG1); SWI1(CODE, ARG0); } while (0) +#define SWI3(CODE, ARG0, ARG1, ARG2) \ + do { swi_args[2] = (uint32_t)(ARG2); SWI2(CODE, ARG0, ARG1); } while (0) +#define SWI4(CODE, ARG0, ARG1, ARG2, ARG3) \ + do { swi_args[3] = (uint32_t)(ARG3); SWI3(CODE, ARG0, ARG1, ARG2); } while (0) + +#define GET_MACRO_5(_1, _2, _3, _4, _5, NAME, ...) NAME +#define SWI(...) \ + ({ GET_MACRO_5(__VA_ARGS__, SWI4, SWI3, SWI2, SWI1, SWI0)(__VA_ARGS__); \ + swi_ret; }) + +#define DIRECT_SWI0(CODE) DO_SWI(CODE, 0, 0) +#define DIRECT_SWI1(CODE, ARG1) DO_SWI(CODE, ARG1, 0) +#define DIRECT_SWI2(CODE, ARG1, ARG2) DO_SWI(CODE, ARG1, ARG2) + +#define GET_MACRO_3(_1, _2, _3, NAME, ...) NAME +#define DIRECT_SWI(...) \ + ({ GET_MACRO_3(__VA_ARGS__, DIRECT_SWI2, DIRECT_SWI1, DIRECT_SWI0)(__VA_ARGS__); \ + swi_ret; }) + +int puts(const char *str) +{ + DIRECT_SWI(HEX_SYS_WRITE0, str); + DIRECT_SWI(HEX_SYS_WRITE0, "\n"); + return 0; +} + +int fputs(const char *str, FILE *f) +{ + assert(f == stdout); /* Only stdout is supported. */ + DIRECT_SWI(HEX_SYS_WRITE0, str); + return 0; +} + +size_t fwrite(const void *ptr, size_t size, size_t nitems, FILE *f) +{ + assert(f == stdout); /* Only stdout is supported. */ + for (size_t i = 0; i < size * nitems; i++) { + DIRECT_SWI(HEX_SYS_WRITEC, &ptr[i]); + } + return size * nitems; +} + +int putchar(int c) +{ + DIRECT_SWI(HEX_SYS_WRITEC, &c); + return c; +} + +static char *num_to_s(uint64_t signed_num, uint64_t base) +{ + static char buffer[1024]; + char *bptr = buffer; + uint64_t num; + + if (base == 16) { + num = signed_num; + } else if (base == 10) { + if (signed_num < 0) { + *bptr++ = '-'; + signed_num *= -1; + } + num = signed_num; + } else { + puts("fatal: num_to_s expects base 16 or 10"); + exit(1); + } + + if (!num) { + return "0"; + } + + uint64_t divider = 1; + for (uint64_t n = num; n >= base; n /= base) { + divider *= base; + } + + while (num) { + unsigned int digit = num / divider; + if (digit) { + num %= divider; + divider /= base; + if (digit >= 10) { + *bptr++ = 'a' + (digit - 10); + } else { + *bptr++ = '0' + digit; + } + while (num < divider) { + *bptr++ = '0'; + divider /= base; + } + } else { + divider /= base; + } + } + + *bptr = '\0'; + return buffer; +} + +static int advance_prefix(const char **str_ptr, char *prefix) +{ + const char *str = *str_ptr; + while (*str && *str == *prefix) { + str++; + prefix++; + } + str--; + if (!*prefix) { + *str_ptr = str; + return 1; + } + return 0; +} + +static char *pad0(char *str, int n) +{ + static char buffer[1024]; + int len = strlen(str); + assert(n < 1024); + + int i; + for (i = 0; i < n - len; i++) { + buffer[i] = '0'; + } + strcpy(&buffer[i], str); + return buffer; +} + +/* + * Very simple implementation. No error checking. + * Supported formats are: + * %d, %s, %c, %x, %016llx + */ +int printf(const char *format, ...) +{ + va_list ap; + __builtin_va_start(ap, format); + for (const char *ptr = format; *ptr; ptr++) { + if (*ptr == '%') { + ptr++; + switch (*ptr) { + case 'd': + case 'x': + case 'p': + { + int num = __builtin_va_arg(ap, int); + fputs(num_to_s(num, *ptr == 'd' ? 10 : 16), stdout); + break; + } + case 's': + fputs(__builtin_va_arg(ap, char *), stdout); + break; + case 'c': + putchar(__builtin_va_arg(ap, int)); + break; + case '%': + putchar('%'); + break; + case '0': + if (advance_prefix(&ptr, "016llx")) { + uint64_t num = __builtin_va_arg(ap, uint64_t); + fputs(pad0(num_to_s(num, 16), 16), stdout); + break; + } + /* else: fallthrough */ + default: + fputs("fatal: unknown printf modifier '", stdout); + putchar(*ptr); + puts("'"); + exit(1); + } + } else { + putchar(*ptr); + } + } + __builtin_va_end(ap); + return 1; +} + +size_t strlen(const char *s) +{ + size_t len = 0; + for ( ; *s; s++) { + len++; + } + return len; +} + +char *strcpy(char *dst, const char *src) +{ + int i; + for (i = 0; src[i]; i++) { + dst[i] = src[i]; + } + dst[i] = '\0'; + return dst; +} + +int strcmp(const char *s1, const char *s2) +{ + for ( ; *s1 && (*s1 == *s2); s1++, s2++) { + /* empty */ + } + return *(unsigned char *)s1 - *(unsigned char *)s2; +} + +char *strrchr(const char *s, int c) +{ + for (int i = strlen(s) - 1; i >= 0; i--) { + if (s[i] == c) { + return (char *)&s[i]; + } + } + return NULL; +} + +#define MAX_ARGS 15 +/* + * Very simplistic implementation, using static buffers, and assuming no + * args will contain spaces. + */ +static inline char **getcmdline(int *argc) +{ + static char *args[MAX_ARGS] = { NULL }; + char buf[4096]; + char *c; + int id = 0; + + assert(!SWI(HEX_SYS_GET_CMDLINE, buf, sizeof(buf))); + + *argc = 1; + for (c = buf; *c; c++) { + if (*c == ' ' && *(c + 1)) { + (*argc)++; + } + } + assert(*argc <= MAX_ARGS); + + if (*argc == 0) { + return args; + } + + args[id++] = buf; + for (c = buf; *c; c++) { + if (*c == ' ') { + *c = '\0'; + if (id < *argc) { + args[id++] = c + 1; + } + } + } + return args; +} + +int main(int argc, char **argv, char **envp); +void _start_main(void) +{ + int argc; + char **argv = getcmdline(&argc); + /* For now, we ignore envp */ + char *envp[] = { NULL }; + exit(main(argc, argv, envp)); + exit(1); +} diff --git a/tests/tcg/hexagon/system/crt0/pte.S b/tests/tcg/hexagon/system/crt0/pte.S new file mode 100644 index 0000000000000..406e453891185 --- /dev/null +++ b/tests/tcg/hexagon/system/crt0/pte.S @@ -0,0 +1,80 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + .section .start, "awx", @progbits + .p2align 3 + .subsection 1 +/* This is the translation table */ +/* We make a table of 2^12 entries */ +/* Each entry is a .hword (16 bits) */ +/* Each entry is initialized to 0 in the 4 LSBs (cached WB, see system spec) */ +/* Each entry is initialized to 1:1 Virtual:Physical in the upper 12 bits. */ +/* We use the preprocessor to avoid copy-paste errors and to avoid */ +/* an 8192-line addition to the file. */ + + .set __UPTE_START, UPTE_START + .weak __UPTE_START, UPTE_START +UPTE_START: +#define TLBENTRY(X) .hword ((((X) >> 16) & (0xfff0)) | 0x7); + +#define TLB_1M(X) TLBENTRY ((X) << 20) +#define TLB_16M(X) \ + TLB_1M (((X) << 4) + 0) \ + TLB_1M (((X) << 4) + 1) \ + TLB_1M (((X) << 4) + 2) \ + TLB_1M (((X) << 4) + 3) \ + TLB_1M (((X) << 4) + 4) \ + TLB_1M (((X) << 4) + 5) \ + TLB_1M (((X) << 4) + 6) \ + TLB_1M (((X) << 4) + 7) \ + TLB_1M (((X) << 4) + 8) \ + TLB_1M (((X) << 4) + 9) \ + TLB_1M (((X) << 4) + 10) \ + TLB_1M (((X) << 4) + 11) \ + TLB_1M (((X) << 4) + 12) \ + TLB_1M (((X) << 4) + 13) \ + TLB_1M (((X) << 4) + 14) \ + TLB_1M (((X) << 4) + 15) + +#define TLB_256M(X) \ + TLB_16M (((X) << 4) + 0) \ + TLB_16M (((X) << 4) + 1) \ + TLB_16M (((X) << 4) + 2) \ + TLB_16M (((X) << 4) + 3) \ + TLB_16M (((X) << 4) + 4) \ + TLB_16M (((X) << 4) + 5) \ + TLB_16M (((X) << 4) + 6) \ + TLB_16M (((X) << 4) + 7) \ + TLB_16M (((X) << 4) + 8) \ + TLB_16M (((X) << 4) + 9) \ + TLB_16M (((X) << 4) + 10) \ + TLB_16M (((X) << 4) + 11) \ + TLB_16M (((X) << 4) + 12) \ + TLB_16M (((X) << 4) + 13) \ + TLB_16M (((X) << 4) + 14) \ + TLB_16M (((X) << 4) + 15) + +#define TLB_4G \ + TLB_256M (0) \ + TLB_256M (1) \ + TLB_256M (2) \ + TLB_256M (3) \ + TLB_256M (4) \ + TLB_256M (5) \ + TLB_256M (6) \ + TLB_256M (7) \ + TLB_256M (8) \ + TLB_256M (9) \ + TLB_256M (10) \ + TLB_256M (11) \ + TLB_256M (12) \ + TLB_256M (13) \ + TLB_256M (14) \ + TLB_256M (15) + +TLB_4G + + .size UPTE_START, . - UPTE_START diff --git a/tests/tcg/hexagon/system/crt0/tlb.c b/tests/tcg/hexagon/system/crt0/tlb.c new file mode 100644 index 0000000000000..00e07761dbe90 --- /dev/null +++ b/tests/tcg/hexagon/system/crt0/tlb.c @@ -0,0 +1,198 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdint.h> +#include "hexagon_standalone.h" + +/* + * The following 2 functions use global addressing mode + * to avoid GP relative overflows. + */ +static inline uint32_t get_tlb_fixed_entries(void) +{ + uint32_t *addr; + asm volatile ("%0=##_tlb_fixed_entries\n\t" + : "=r"(addr)); + return *addr; +} +static inline uint32_t *get_UPTE_START(void) +{ + uint32_t addr; + asm volatile ("%0=##__UPTE_START\n\t" + : "=r"(addr)); + return (uint32_t *)addr; +} + +static inline uint32_t get_ssr(void) +{ + uint32_t reg; + asm volatile ("%0=ssr\n\t" + : "=r"(reg)); + return reg; +} + + +static inline int64_t read_tlb_entry(int index) +{ + uint64_t reg; + asm volatile ("%[reg]=tlbr(%[index])" + : [reg] "=r" (reg) + : [index] "r" (index)); + asm volatile ("isync"); + return reg; +} + + +static inline void write_tlb_entry(TLBEntry tlb, int index) +{ + uint64_t entry = tlb.raw; + asm volatile ("tlblock\n" + "tlbw(%[entry], %[index])\n" + "isync\n" + "tlbunlock\n" + : + : [entry] "r" (entry), [index] "r" (index)); +} + +static inline int32_t tlb_probe(uint32_t va) +{ + uint32_t VirtualPageNumber = va >> 12; + uint32_t ASID = (get_ssr() >> 8) & 0x7f; + uint32_t probe = ((ASID << 20) | VirtualPageNumber) & 0x7ffffff; + uint32_t result = 0; + asm volatile ("%[result]=tlbp(%[probe])" + : [result] "=r" (result) + : [probe] "r" (probe)); + + return result; +} + + +static inline void tlb_invalidate(uint32_t va) +{ + int entry = tlb_probe(va); + if (entry == TLB_NOT_FOUND) { + return; + } + + TLBEntry tlb; + tlb.raw = read_tlb_entry(entry); + tlb.raw = tlb.raw & ~(1ull << 63); /* Clear the V bit. */ + write_tlb_entry(tlb, entry); +} + + +static inline TLBEntry basic_entry(uint32_t va, uint64_t pa, PageSize pagesize) +{ + TLBEntry T; + uint64_t PPN; + T.raw = 0ull; + T.VirtualPage = va >> 12; /* 63-51 */ +#if __HEXAGON_ARCH__ > 72 + T.PPN_EX = (pa & (3ull << 36)) >> 36; +#endif + T.EP = (pa & (1ull << 35)) >> 35; + PPN = pa >> 12ull; + PPN = (PPN << 1ull) | pagesize; + if (pagesize == 1) { + T.S = 1; + } + T.raw |= PPN; + return T; +} +/* + * function: mkentry + * description: + * - Given just a Physical Address (pa) and a Virtual Address (va) + * create a default entry. + * - A user wanting to change the cache attributes or permissions + * can do so prior to writing the entry. + */ +static TLBEntry mkentry(uint32_t va, uint64_t pa, PageSize pagesize) +{ + + /* Make an entry and set some reasonable defaults */ + TLBEntry T = basic_entry(va, pa, pagesize); + + T.CacheAttr = 0x7; + T.XWRU = 0x6; + T.VG = 0x3; + return T; +} + +int add_translation_extended(int index, void *va, uint64_t pa, + unsigned int page_size, unsigned int xwru, + unsigned int cccc, unsigned int asid, + unsigned int aa, unsigned int vg) +{ + uint32_t num_entries = get_tlb_fixed_entries(); + + if ((index < 1) || (index > (num_entries - 1))) { + return -1; + } + + tlb_invalidate((uint32_t)va); + TLBEntry T; + T = basic_entry((uint32_t)va, pa, page_size); + T.ASID = ((uint64_t)asid & 0x7f); + T.CacheAttr = ((uint64_t)cccc & 0xf); + T.XWRU = ((uint64_t)xwru & 0xf); + T.VG = ((uint64_t)vg & 0x3); +#if __HEXAGON_ARCH__ < 73 + T.raw |= ((uint64_t)aa & 0x3) << 59ull; +#endif + write_tlb_entry(T, index); + + return 0; +} + + +void add_translation_fixed(int index, void *va, void *pa, int cccc, + int permissions) +{ + tlb_invalidate((uint32_t)va); + add_translation_extended(index, va, (uint64_t)pa, PAGE_1M, permissions, cccc, + 0, 0, 3); +} + +/* + * The following deals with the PTE software structure. The actual entry will + * not be placed into the TLB until an address fault occurrs. + */ + +typedef union { + struct { + uint16_t cache:4; + uint16_t pa:12; + }; + uint16_t PTE_raw; +} SMALL_PTE; + +static SMALL_PTE *findPTEAddr(uint32_t va) +{ + uint32_t *PTE = get_UPTE_START(); + int index = va >> 20; + return (SMALL_PTE *)PTE + index; +} +static SMALL_PTE findPTEValue(uint32_t va) +{ + SMALL_PTE *A = findPTEAddr(va); + return *A; +} + +/* This function adds a translation into the mapping table, see above */ +/* Because we use 1MB pages, we only need to translate 12 bits. */ +/* We keep those 12 bits plus 4 bits (where we keep the C field, */ +/* see the System-level architecture spec on TLB entries) in */ +/* a 16-bit entry in the table. */ +/* We index into the table using the upper 12 bits. */ +/* As a note, 2 bytes x 2^12 entries == 8KB table */ +void add_translation(void *va, void *pa, int cccc) +{ + SMALL_PTE *S = findPTEAddr((uint32_t)va); + S->pa = (uint32_t)pa >> 20; + S->cache = cccc; +} diff --git a/tests/tcg/hexagon/system/semihost.c b/tests/tcg/hexagon/system/semihost.c new file mode 100644 index 0000000000000..7a0fa0cb73ff2 --- /dev/null +++ b/tests/tcg/hexagon/system/semihost.c @@ -0,0 +1,297 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <errno.h> +#include <unistd.h> +#include <dirent.h> +#include "strutils.h" + +/* Defines in order of testing */ + +/* env/CLI-related */ +#define HEX_SYS_GET_CMDLINE 0x15 +#define HEX_SYS_GETCWD 0x104 + +/* File manipulation */ +#define HEX_SYS_TMPNAM 0x0d +#define HEX_SYS_OPEN 0x01 +#define HEX_SYS_ACCESS 0x105 +#define HEX_SYS_ISTTY 0x09 +#define HEX_SYS_WRITE 0x05 +#define HEX_SYS_SEEK 0x0a +#define HEX_SYS_READ 0x06 +#define HEX_SYS_FTELL 0x100 +#define HEX_SYS_FSTAT 0x101 +#define HEX_SYS_FTRUNC 0x186 +#define HEX_SYS_FLEN 0x0c +#define HEX_SYS_CLOSE 0x02 +#define HEX_SYS_ERRNO 0x13 +#define HEX_SYS_RENAME 0x0f +#define HEX_SYS_STAT 0x103 +#define HEX_SYS_REMOVE 0x0e + +/* Time */ +#define HEX_SYS_CLOCK 0x10 +#define HEX_SYS_TIME 0x11 + +/* dirent */ +#define HEX_SYS_OPENDIR 0x180 +#define HEX_SYS_CLOSEDIR 0x181 +#define HEX_SYS_READDIR 0x182 + +/* STDOUT */ +#define HEX_SYS_WRITEC 0x03 +#define HEX_SYS_WRITE0 0x04 +#define HEX_SYS_WRITECREG 0x43 + +static uint32_t ret, err, args[4]; + +/* + * Macro flavors: + * - DIRECT_SWI takes up to two args an put them at r1 and r2. + * - SWI takes up to four args and puts them in an array, placing the + * array address at r1. + */ + +#define DO_SWI(CODE, ARG0, ARG1) \ + do { \ + asm volatile( \ + "r0 = %2\n" \ + "r1 = %3\n" \ + "r2 = %4\n" \ + "trap0(#0)\n" \ + "%0 = r0\n" \ + "%1 = r1\n" \ + : "=r"(ret), "=r"(err) \ + : "r"(CODE), "r"(ARG0), "r"(ARG1) \ + : "r0", "r1", "r2", "memory" \ + ); \ + } while (0) + +#define SWI0(CODE) DO_SWI(CODE, args, 0) +#define SWI1(CODE, ARG0) \ + do { args[0] = (uint32_t)(ARG0); SWI0(CODE); } while (0) +#define SWI2(CODE, ARG0, ARG1) \ + do { args[1] = (uint32_t)(ARG1); SWI1(CODE, ARG0); } while (0) +#define SWI3(CODE, ARG0, ARG1, ARG2) \ + do { args[2] = (uint32_t)(ARG2); SWI2(CODE, ARG0, ARG1); } while (0) +#define SWI4(CODE, ARG0, ARG1, ARG2, ARG3) \ + do { args[3] = (uint32_t)(ARG3); SWI3(CODE, ARG0, ARG1, ARG2); } while (0) + +#define GET_MACRO_5(_1, _2, _3, _4, _5, NAME, ...) NAME +#define SWI(...) \ + GET_MACRO_5(__VA_ARGS__, SWI4, SWI3, SWI2, SWI1, SWI0)(__VA_ARGS__) + +#define DIRECT_SWI0(CODE) DO_SWI(CODE, 0, 0) +#define DIRECT_SWI1(CODE, ARG1) DO_SWI(CODE, ARG1, 0) +#define DIRECT_SWI2(CODE, ARG1, ARG2) DO_SWI(CODE, ARG1, ARG2) + +#define GET_MACRO_3(_1, _2, _3, NAME, ...) NAME +#define DIRECT_SWI(...) \ + GET_MACRO_3(__VA_ARGS__, DIRECT_SWI2, DIRECT_SWI1, DIRECT_SWI0)(__VA_ARGS__) + +#define is_path_sep(C) ((C) == '/' || (C) == '\\') + +static int path_ends_with(const char *str, const char *suffix) +{ + const char *str_cursor = str + strlen(str) - 1; + const char *suffix_cursor = suffix + strlen(suffix) - 1; + while (str_cursor >= str && suffix_cursor >= suffix) { + /* is_path_sep handles the semihosting-on-Windows case */ + if (*str_cursor != *suffix_cursor && + !(is_path_sep(*str_cursor) && is_path_sep(*suffix_cursor))) { + return 0; + } + str_cursor--; + suffix_cursor--; + } + return 1; +} + +/* + * This must match the caller's definition, it would be in the + * caller's angel.h or equivalent header. + */ +struct __SYS_STAT { + uint64_t dev; + uint64_t ino; + uint32_t mode; + uint32_t nlink; + uint64_t rdev; + uint32_t size; + uint32_t __pad1; + uint32_t atime; + uint32_t mtime; + uint32_t ctime; + uint32_t __pad2; +}; + +int main(int argc, char **argv) +{ + /* GET_CMDLINE */ + char argv_concat[1024]; + char *cursor = argv_concat; + for (int i = 0; i < argc; i++) { + strcpy(cursor, argv[i]); + cursor += strlen(argv[i]); + *cursor = ' '; + cursor++; + } + *(cursor - 1) = '\0'; + char buf[4096]; + SWI(HEX_SYS_GET_CMDLINE, buf, sizeof(buf)); + assert(!ret && !strcmp(buf, argv_concat)); + + /* GETCWD */ + const char *expected_cwd = "tests/tcg/hexagon-softmmu"; + SWI(HEX_SYS_GETCWD, buf, sizeof(buf)); + assert(ret && path_ends_with(buf, expected_cwd)); + + /* TMPNAM */ + char fname[4096]; + SWI(HEX_SYS_TMPNAM, fname, 0, sizeof(fname)); + assert(!ret); + + /* OPEN */ + /* 13 is O_RDWR | O_CREAT | O_EXCL */ + SWI(HEX_SYS_OPEN, fname, 13, strlen(fname)); + int fd = (int)ret; + assert(fd >= 0); + + /* ACCESS */ + SWI(HEX_SYS_ACCESS, fname, R_OK); + assert(!ret); + /* ACCESS with error */ + SWI(HEX_SYS_ACCESS, "non-existent-semihost-file", R_OK); + assert(ret); + assert(err == ENOENT); + + /* ISTTY */ + SWI(HEX_SYS_ISTTY, fd); + assert(!ret); + + /* WRITE */ + char *str = "hello"; + SWI(HEX_SYS_WRITE, fd, str, strlen(str)); + assert(!ret); + + /* SEEK */ + SWI(HEX_SYS_SEEK, fd, 0); + assert(!ret); + + /* READ */ + int n = strlen(str); + SWI(HEX_SYS_READ, fd, buf, n); + buf[n] = '\0'; + assert(!ret && !strcmp(str, buf)); + + /* FTELL */ + SWI(HEX_SYS_FTELL, fd); + assert(ret == strlen(str)); + + /* FSTAT */ + struct __SYS_STAT st; + SWI(HEX_SYS_FSTAT, fd, &st); + assert(!ret); + assert(st.atime && st.ctime && st.mtime); + assert(st.size == strlen(str)); + assert((st.mode & S_IFMT) == S_IFREG); + + /* FTRUNC */ + SWI(HEX_SYS_FTRUNC, fd, 1, 0); + assert(!ret); + + /* FLEN */ + SWI(HEX_SYS_FLEN, fd); + assert(ret == 1); + + /* CLOSE */ + SWI(HEX_SYS_CLOSE, fd); + assert(!ret); + + /* CLOSE w/ error && ERRNO */ + SWI(HEX_SYS_CLOSE, fd); + assert(ret); + assert(err == EBADF); + SWI(HEX_SYS_ERRNO); + assert(ret == EBADF); + + /* RENAME */ + char ogfname[4096]; + int len = strlen(fname); + strcpy(ogfname, fname); + fname[len - 1] = (fname[len - 1] == 'a' ? 'b' : 'a'); + SWI(HEX_SYS_RENAME, ogfname, len, fname, len); + assert(!ret); + + /* STAT */ + SWI(HEX_SYS_STAT, fname, &st); + assert(!ret); + assert(st.atime && st.ctime && st.mtime); + assert(st.size == 1); + assert((st.mode & S_IFMT) == S_IFREG); + + /* REMOVE */ + SWI(HEX_SYS_REMOVE, fname, strlen(fname)); + assert(!ret); + + /* STAT w/ error */ + SWI(HEX_SYS_STAT, fname, &st); + assert(ret); + assert(err == ENOENT); + + /* TIME && CLOCK */ + SWI(HEX_SYS_TIME); + assert(ret); + SWI(HEX_SYS_CLOCK); + assert(ret); + + /* OPENDIR */ + char *dname = "./_semihost_dir"; + DIRECT_SWI(HEX_SYS_OPENDIR, dname); + assert(ret); + int dir_index = ret; + + /* READDIR */ + char *expected_files[4] = { ".", "..", "fileA", "fileB" }; + char found_files_buffer[4][256]; + char *found_files[4]; + for (int i = 0; 1; i++) { + struct __attribute__((__packed__)) { int32_t _; char d_name[256]; } dirent; + DIRECT_SWI(HEX_SYS_READDIR, dir_index, &dirent); + if (!ret) { + break; + } + assert(i < 4); + found_files[i] = found_files_buffer[i]; + strcpy(found_files[i], dirent.d_name); + } + + sort_str_arr(found_files, 4); + for (int i = 0; i < 4; i++) { + assert(!strcmp(found_files[i], expected_files[i])); + } + + /* CLOSEDIR */ + DIRECT_SWI(HEX_SYS_CLOSEDIR, dir_index); + assert(!ret); + + /* WRITEC, WRITECREG, WRITE0 */ + /* We use DO_SWI directly here to bypass the args array */ + char *pass = "PASS\n"; + DIRECT_SWI(HEX_SYS_WRITEC, &pass[0]); + DIRECT_SWI(HEX_SYS_WRITECREG, pass[1]); + DIRECT_SWI(HEX_SYS_WRITE0, &pass[2]); + + return 0; +} diff --git a/tests/tcg/hexagon/system/strutils.h b/tests/tcg/hexagon/system/strutils.h new file mode 100644 index 0000000000000..14f4a290b8173 --- /dev/null +++ b/tests/tcg/hexagon/system/strutils.h @@ -0,0 +1,25 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef STRUTILS_H +#define STRUTILS_H + +#include <string.h> + +void sort_str_arr(char **arr, size_t n) +{ + for (int i = 0; i < n - 1; i++) { + for (int j = 0; j < n - i - 1; j++) { + if (strcmp(arr[j], arr[j + 1]) > 0) { + char *tmp = arr[j]; + arr[j] = arr[j + 1]; + arr[j + 1] = tmp; + } + } + } +} + +#endif From 6200a46f4c8f4eeb7fcd06194df56d30cb8406cf Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 5 Feb 2025 08:34:33 -0800 Subject: [PATCH 105/126] target/hexagon: fill in the 'rev' system register This register should store the revision identifier for the running Hexagon arch cpu. Let's save the cpu revision and fill the register with it. Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- target/hexagon/cpu.c | 22 ++++++++++++++++------ target/hexagon/cpu.h | 1 + 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 438e160504275..8fac0c28fff9d 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -29,6 +29,7 @@ #include "cpu_helper.h" #include "max.h" #include "hex_mmu.h" +#include "hw/hexagon/hexagon.h" #ifndef CONFIG_USER_ONLY #include "macros.h" @@ -38,12 +39,19 @@ #include "hexswi.h" #endif -static void hexagon_v66_cpu_init(Object *obj) { } -static void hexagon_v67_cpu_init(Object *obj) { } -static void hexagon_v68_cpu_init(Object *obj) { } -static void hexagon_v69_cpu_init(Object *obj) { } -static void hexagon_v71_cpu_init(Object *obj) { } -static void hexagon_v73_cpu_init(Object *obj) { } +#define DEFINE_STD_CPU_INIT_FUNC(REV) \ + static void hexagon_##REV##_cpu_init(Object *obj) \ + { \ + HexagonCPU *cpu = HEXAGON_CPU(obj); \ + cpu->rev_reg = REV##_rev; \ + } + +DEFINE_STD_CPU_INIT_FUNC(v66) +DEFINE_STD_CPU_INIT_FUNC(v67) +DEFINE_STD_CPU_INIT_FUNC(v68) +DEFINE_STD_CPU_INIT_FUNC(v69) +DEFINE_STD_CPU_INIT_FUNC(v71) +DEFINE_STD_CPU_INIT_FUNC(v73) static ObjectClass *hexagon_cpu_class_by_name(const char *cpu_model) { @@ -72,6 +80,7 @@ static const Property hexagon_cpu_properties[] = { DEFINE_PROP_UINT64("config-table-addr", HexagonCPU, config_table_addr, 0xffffffffULL), #endif + DEFINE_PROP_UINT32("dsp-rev", HexagonCPU, rev_reg, 0), DEFINE_PROP_BOOL("lldb-compat", HexagonCPU, lldb_compat, false), DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust, 0, qdev_prop_uint32, target_ulong), @@ -391,6 +400,7 @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type) memset(env->greg, 0, sizeof(target_ulong) * NUM_GREGS); if (cs->cpu_index == 0) { + arch_set_system_reg(env, HEX_SREG_REV, cpu->rev_reg); arch_set_system_reg(env, HEX_SREG_MODECTL, 0x1); *(env->g_pcycle_base) = 0; } diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index ea618802a9290..8b334068e295b 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -188,6 +188,7 @@ struct ArchCPU { CPUHexagonState env; + uint32_t rev_reg; bool lldb_compat; target_ulong lldb_stack_adjust; bool short_circuit; From cef8e4c9021b30d8f7db2ed5494cb4f12299d53a Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 5 Feb 2025 08:49:36 -0800 Subject: [PATCH 106/126] target/hexagon: print full name of control regs Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- target/hexagon/cpu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 8fac0c28fff9d..1fa560717364c 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -93,9 +93,10 @@ const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = { "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", "sa0", "lc0", "sa1", "lc1", "p3_0", "c5", "m0", "m1", - "usr", "pc", "ugp", "gp", "cs0", "cs1", "c14", "c15", - "c16", "c17", "c18", "c19", "pkt_cnt", "insn_cnt", "hvx_cnt", "c23", - "c24", "c25", "c26", "c27", "c28", "c29", "c30", "c31", + "usr", "pc", "ugp", "gp", "cs0", "cs1", "upcyclelo", "upcyclehi", + "framelimit", "framekey", "pktcountlo", "pktcounthi", "upmucnt0", + "upmucnt1", "upmucnt2", "upmucnt3", "upmucnt4", "upmucnt5", "upmucnt6", + "upmucnt7", "c28", "c29", "utimerlo", "utimerhi", }; #ifndef CONFIG_USER_ONLY From 0e57d995af6536702ce989b6d8808d844b252af3 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 5 Feb 2025 08:50:51 -0800 Subject: [PATCH 107/126] target/hexagon: fix system register names with -d in_asm Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- target/hexagon/printinsn.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/target/hexagon/printinsn.c b/target/hexagon/printinsn.c index 4865cdd133b5b..f780092586cf6 100644 --- a/target/hexagon/printinsn.c +++ b/target/hexagon/printinsn.c @@ -24,16 +24,17 @@ static const char *sreg2str(unsigned int reg) { - if (reg < TOTAL_PER_THREAD_REGS) { - return hexagon_regnames[reg]; - } else { - return "???"; +#ifndef CONFIG_USER_ONLY + if (reg < NUM_SREGS) { + return hexagon_sregnames[reg]; } +#endif + return "???"; } static const char *creg2str(unsigned int reg) { - return sreg2str(reg + HEX_REG_SA0); + return hexagon_regnames[reg + HEX_REG_SA0]; } static void snprintinsn(GString *buf, Insn *insn) From 0f1e32bcf9595aa3a48468293dbdf60f526edbd9 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 5 Feb 2025 08:54:31 -0800 Subject: [PATCH 108/126] target/hexagon: reset registers on cpu_reset Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- target/hexagon/cpu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index 1fa560717364c..f90e8f726a887 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -405,6 +405,15 @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type) arch_set_system_reg(env, HEX_SREG_MODECTL, 0x1); *(env->g_pcycle_base) = 0; } + + memset(env->gpr, 0, sizeof(target_ulong) * TOTAL_PER_THREAD_REGS); + memset(env->pred, 0, sizeof(target_ulong) * NUM_PREGS); + memset(env->VRegs, 0, sizeof(MMVector) * NUM_VREGS); + memset(env->QRegs, 0, sizeof(MMQReg) * NUM_QREGS); + memset(env->vstore_pending, 0, sizeof(target_ulong) * VSTORES_MAX); + env->t_cycle_count = 0; + env->vtcm_pending = false; + mmu_reset(env); arch_set_system_reg(env, HEX_SREG_HTID, cs->cpu_index); hexagon_cpu_soft_reset(env); From 17a3555eac6cef586655e7aef1754a118c71b294 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 5 Feb 2025 09:14:00 -0800 Subject: [PATCH 109/126] tests/tcg/hexagon: add MMU tests Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- tests/tcg/hexagon/Makefile.softmmu-target | 25 +- tests/tcg/hexagon/system/mmu.h | 718 ++++++++++++++++++++ tests/tcg/hexagon/system/mmu_asids.c | 80 +++ tests/tcg/hexagon/system/mmu_overlap.c | 65 ++ tests/tcg/hexagon/system/reg_fields_def.h | 87 +++ tests/tcg/hexagon/system/tlb-miss-tlblock.S | 156 +++++ 6 files changed, 1128 insertions(+), 3 deletions(-) create mode 100644 tests/tcg/hexagon/system/mmu.h create mode 100644 tests/tcg/hexagon/system/mmu_asids.c create mode 100644 tests/tcg/hexagon/system/mmu_overlap.c create mode 100644 tests/tcg/hexagon/system/reg_fields_def.h create mode 100644 tests/tcg/hexagon/system/tlb-miss-tlblock.S diff --git a/tests/tcg/hexagon/Makefile.softmmu-target b/tests/tcg/hexagon/Makefile.softmmu-target index f965f4f4fac6d..7e561efd25825 100644 --- a/tests/tcg/hexagon/Makefile.softmmu-target +++ b/tests/tcg/hexagon/Makefile.softmmu-target @@ -35,11 +35,18 @@ tlb.o: crt0/tlb.c CRT0_OBJS=crt0.o crt0_standalone.o pte.o min_libc.o tlb.o -TESTS += \ +TESTS_BUILT_WITH_DEFAULT_RULES = \ semihost \ + mmu_overlap \ + mmu_asids \ + $() + +TESTS += \ + $(TESTS_BUILT_WITH_DEFAULT_RULES) \ + tlb-miss-tlblock \ $() -$(TESTS): $(CRT0_OBJS) +$(TESTS_BUILT_WITH_DEFAULT_RULES): $(CRT0_OBJS) # Build and link the tests echo-and-run = echo $(1) && $(1) @@ -50,7 +57,7 @@ endef $(CRT0_OBJS): $(call build_fn,$<,$@) -$(TESTS): +$(TESTS_BUILT_WITH_DEFAULT_RULES): $(call build_fn,$^,$@,LINK) %.o: %.S @@ -58,8 +65,20 @@ $(TESTS): %.o: %.c $(call build_fn,$<,$@) +mmu.h: ../hex_test.h + semihost.o: semihost.c strutils.h semihost: semihost.o +mmu_overlap.o: mmu_overlap.c mmu.h +mmu_overlap: mmu_overlap.o +mmu_asids.o: mmu_asids.c mmu.h +mmu_asids: mmu_asids.o + +############# Custom build options + +# We don't want to link this one with crt0 files +tlb-miss-tlblock: tlb-miss-tlblock.o + $(CC) $(CFLAGS) $< -o $@ -nostartfiles -Wl,-Ttext,0x9b800000 -Wl,-entry,0x9b800000 ############# Custom test rules diff --git a/tests/tcg/hexagon/system/mmu.h b/tests/tcg/hexagon/system/mmu.h new file mode 100644 index 0000000000000..0856c94ab5ddb --- /dev/null +++ b/tests/tcg/hexagon/system/mmu.h @@ -0,0 +1,718 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef MMU_H +#define MMU_H +#include <assert.h> +#include <string.h> +#include <stdint.h> +#include "crt0/hexagon_standalone.h" + +/* + * Helpers for MMU tests + */ + +#define TARGET_PAGE_BITS 12 +#ifndef TLB_NOT_FOUND +#define TLB_NOT_FOUND (1 << 31) +#endif + +static inline uint32_t page_start(uint32_t addr, uint32_t page_size_bits) +{ + uint32_t page_size = 1 << page_size_bits; + uint32_t page_align = ~(page_size - 1); + return addr & page_align; +} + +/* + * The Hexagon standalone runtime leaves TLB entries 1-5 reserved for + * user-defined entries. We'll set them up to map virtual addresses at + * 1MB offsets above the actual physical address + * PA == VA - (entry_num * 1MB) + * + * We'll define some macros/functions to help with the manipulation + */ + +#define ONE_MB (1 << 20) +#define TWO_MB (2 * ONE_MB) +#define THREE_MB (3 * ONE_MB) +#define FOUR_MB (4 * ONE_MB) +#define FIVE_MB (5 * ONE_MB) + +#define ONE_MB_ENTRY 1 +#define TWO_MB_ENTRY 2 +#define THREE_MB_ENTRY 3 +#define FOUR_MB_ENTRY 4 +#define FIVE_MB_ENTRY 5 + +static inline uint32_t tlb_entry_num(uint32_t va) +{ + return va >> 20; +} + +#define fZXTN(N, M, VAL) ((VAL) & ((1LL << (N)) - 1)) +#define fEXTRACTU_BITS(INREG, WIDTH, OFFSET) \ + (fZXTN(WIDTH, 32, (INREG >> OFFSET))) + +#define fINSERT_BITS(REG, WIDTH, OFFSET, INVAL) \ + do { \ + REG = ((REG) & ~(((1LL << (WIDTH)) - 1) << (OFFSET))) | \ + (((INVAL) & ((1LL << (WIDTH)) - 1)) << (OFFSET)); \ + } while (0) + +#define GET_FIELD(ENTRY, FIELD) \ + fEXTRACTU_BITS(ENTRY, reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset) +#define SET_FIELD(ENTRY, FIELD, VAL) \ + fINSERT_BITS(ENTRY, reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset, (VAL)) + +typedef struct { + int offset; + int width; +} reg_field_t; + +enum reg_fields_enum { +#define DEF_REG_FIELD(TAG, NAME, START, WIDTH, DESCRIPTION) \ + TAG, +#include "reg_fields_def.h" + NUM_REG_FIELDS +#undef DEF_REG_FIELD +}; + +static const reg_field_t reg_field_info[] = { +#define DEF_REG_FIELD(TAG, NAME, START, WIDTH, DESCRIPTION) \ + { START, WIDTH }, + +#include "reg_fields_def.h" + + { 0, 0 } +#undef DEF_REG_FIELD +}; + +/* + * PPD (physical page descriptor) is formed by putting the PTE_PA35 field + * in the MSB of the PPD + */ +#define GET_PPD(ENTRY) \ + ((GET_FIELD((ENTRY), PTE_PPD) | \ + (GET_FIELD((ENTRY), PTE_PA35) << reg_field_info[PTE_PPD].width))) + +#define NUM_PGSIZE_TYPES (SHIFT_1G + 1) + +static const char *pgsize_str(PageSize pgsize) +{ + static const char *size_str[NUM_PGSIZE_TYPES] = { + "4K", + "16K", + "64K", + "256K", + "1M", + "4M", + "16M", + "64M", + "256M", + "1G" + }; + assert(pgsize); + return size_str[__builtin_ctz(pgsize)]; +} + +static const uint64_t encmask_2_mask[] = { + 0x0fffLL, /* 4k, 0000 */ + 0x3fffLL, /* 16k, 0001 */ + 0xffffLL, /* 64k, 0010 */ + 0x3ffffLL, /* 256k, 0011 */ + 0xfffffLL, /* 1m, 0100 */ + 0x3fffffLL, /* 4m, 0101 */ + 0xffffffLL, /* 16M, 0110 */ + 0xffffffffLL, /* RSVD, 0111 */ +}; + +static inline int hex_tlb_pgsize(uint64_t entry) +{ + assert(entry != 0); + int size = __builtin_ctzll(entry); + assert(size < NUM_PGSIZE_TYPES); + return size; +} + +static inline uint32_t hex_tlb_page_size(uint64_t entry) +{ + return 1 << (TARGET_PAGE_BITS + 2 * hex_tlb_pgsize(entry)); +} + +static inline uint64_t hex_tlb_phys_page_num(uint64_t entry) +{ + uint32_t ppd = GET_PPD(entry); + return ppd >> 1; +} + +static inline uint64_t hex_tlb_phys_addr(uint64_t entry) +{ + uint64_t pagemask = encmask_2_mask[hex_tlb_pgsize(entry)]; + uint64_t pagenum = hex_tlb_phys_page_num(entry); + uint64_t PA = (pagenum << TARGET_PAGE_BITS) & (~pagemask); + return PA; +} + +static inline uint64_t hex_tlb_virt_addr(uint64_t entry) +{ + return GET_FIELD(entry, PTE_VPN) << TARGET_PAGE_BITS; +} + +static inline uint64_t create_mmu_entry(uint8_t G, uint8_t A0, uint8_t A1, + uint8_t ASID, uint32_t VA, + uint8_t X, int8_t W, uint8_t R, + uint8_t U, uint8_t C, uint64_t PA, + PageSize SZ) +{ + uint64_t entry = 0; + SET_FIELD(entry, PTE_V, 1); + SET_FIELD(entry, PTE_G, G); + SET_FIELD(entry, PTE_ATR0, A0); + SET_FIELD(entry, PTE_ATR1, A1); + SET_FIELD(entry, PTE_ASID, ASID); + SET_FIELD(entry, PTE_VPN, VA >> TARGET_PAGE_BITS); + SET_FIELD(entry, PTE_X, X); + SET_FIELD(entry, PTE_W, W); + SET_FIELD(entry, PTE_R, R); + SET_FIELD(entry, PTE_U, U); + SET_FIELD(entry, PTE_C, C); + SET_FIELD(entry, PTE_PA35, (PA >> (TARGET_PAGE_BITS + 35)) & 1); + SET_FIELD(entry, PTE_PPD, ((PA >> (TARGET_PAGE_BITS - 1)))); + entry |= SZ; + return entry; +} + +static inline uint64_t tlbr(uint32_t i) +{ + uint64_t ret; + asm volatile ("%0 = tlbr(%1)\n\t" : "=r"(ret) : "r"(i)); + return ret; +} + +static inline uint32_t ctlbw(uint64_t entry, uint32_t idx) +{ + uint32_t ret; + asm volatile ("%0 = ctlbw(%1, %2)\n\t" : "=r"(ret) : "r"(entry), "r"(idx)); + return ret; +} + +static inline uint32_t tlbp(uint32_t asid, uint32_t VA) +{ + uint32_t x = ((asid & 0x7f) << 20) | ((VA >> 12) & 0xfffff); + uint32_t ret; + asm volatile ("%0 = tlbp(%1)\n\t" : "=r"(ret) : "r"(x)); + return ret; +} + +static inline void tlbw(uint64_t entry, uint32_t idx) +{ + asm volatile ("tlbw(%0, %1)\n\t" :: "r"(entry), "r"(idx)); +} + +static inline uint32_t tlboc(uint64_t entry) +{ + uint32_t ret; + asm volatile ("%0 = tlboc(%1)\n\t" : "=r"(ret) : "r"(entry)); + return ret; +} + +void tlbinvasid(uint32_t entry_hi) +{ + asm volatile ("tlbinvasid(%0)\n\t" :: "r"(entry_hi)); +} + +static inline void enter_user_mode(void) +{ + asm volatile ("r0 = ssr\n\t" + "r0 = clrbit(r0, #17) // EX\n\t" + "r0 = setbit(r0, #16) // UM\n\t" + "r0 = clrbit(r0, #19) // GM\n\t" + "ssr = r0\n\t" : : : "r0"); +} + +static inline void enter_kernel_mode(void) +{ + asm volatile ("r0 = ssr\n\t" + "r0 = clrbit(r0, #17) // EX\n\t" + "r0 = clrbit(r0, #16) // UM\n\t" + "r0 = clrbit(r0, #19) // GM\n\t" + "ssr = r0\n\t" : : : "r0"); +} + +static inline uint32_t *getevb() +{ + uint32_t reg; + asm volatile ("%0 = evb\n\t" : "=r"(reg)); + return (uint32_t *)reg; +} + +static inline void setevb(void *new_evb) +{ + asm volatile("evb = %0\n\t" : : "r"(new_evb)); +} + +static inline uint32_t getbadva() +{ + uint32_t badva; + asm volatile ("%0 = badva\n\t" : "=r"(badva)); + return badva; +} + +static void inc_elr(uint32_t inc) +{ + + asm volatile ("r1 = %0\n\t" + "r2 = elr\n\t" + "r1 = add(r2, r1)\n\t" + "elr = r1\n\t" + : : "r"(inc) : "r1", "r2"); +} + +static inline void do_coredump(void) +{ + asm volatile("r0 = #2\n\t" + "stid = r0\n\t" + "jump __coredump\n\t" : : : "r0"); +} + +static inline uint32_t getssr(void) +{ + uint32_t ret; + asm volatile ("%0 = ssr\n\t" : "=r"(ret)); + return ret; +} + +static inline void setssr(uint32_t new_ssr) +{ + asm volatile ("ssr = %0\n\t" :: "r"(new_ssr)); +} + +static inline void set_asid(uint32_t asid) +{ + uint32_t ssr = getssr(); + SET_FIELD(ssr, SSR_ASID, asid); + setssr(ssr); +} + +int err; +#include "../hex_test.h" + +static void *old_evb; + +typedef uint64_t exception_vector[2]; +static exception_vector my_exceptions; + +static inline void clear_exception_vector(exception_vector excp) +{ + excp[0] = 0; + excp[1] = 0; +} + +static inline void set_exception_vector_bit(exception_vector excp, uint32_t bit) +{ + if (bit < 64) { + excp[0] |= 1LL << bit; + } else if (bit < 128) { + excp[1] |= 1LL << (bit - 64); + } +} + +#define check_exception_vector(excp, expect) \ + do { \ + check64(excp[0], expect[0]); \ + check64(excp[1], expect[1]); \ + } while (0) + +static inline void print_exception_vector(exception_vector excp) +{ + printf("exceptions (0x%016llx 0x%016llx):", excp[1], excp[0]); + for (int i = 0; i < 64; i++) { + if (excp[0] & (1LL << i)) { + printf(" 0x%x", i); + } + } + for (int i = 0; i < 64; i++) { + if (excp[1] & (1LL << i)) { + printf(" 0x%x", i + 64); + } + } + printf("\n"); +} + +/* volatile because it is written through different MMU mappings */ +typedef volatile int mmu_variable; +mmu_variable data = 0xdeadbeef; + +typedef int (*func_t)(void); +/* volatile because it will be invoked via different MMU mappings */ +typedef volatile func_t mmu_func_t; + +/* + * Create a function that returns its (virtual) address + * Write it fully in assembly so we don't have to worry about + * which optimization level we are compiled with + */ +extern int func_return_pc(void); +asm( +".global func_return_pc\n" +".balign 4\n" +".type func_return_pc, @function\n" +"func_return_pc:\n" +" r0 = pc\n" +" jumpr r31\n" +".size func_return_pc, . - func_return_pc\n" +); + +enum { + TLB_U = (1 << 0), + TLB_R = (1 << 1), + TLB_W = (1 << 2), + TLB_X = (1 << 3), +}; + +#define HEX_CAUSE_FETCH_NO_XPAGE 0x011 +#define HEX_CAUSE_FETCH_NO_UPAGE 0x012 +#define HEX_CAUSE_PRIV_NO_READ 0x022 +#define HEX_CAUSE_PRIV_NO_WRITE 0x023 +#define HEX_CAUSE_PRIV_NO_UREAD 0x024 +#define HEX_CAUSE_PRIV_NO_UWRITE 0x025 +#define HEX_CAUSE_IMPRECISE_MULTI_TLB_MATCH 0x044 +#define HEX_CAUSE_TLBMISSX_NORMAL 0x060 +#define HEX_CAUSE_TLBMISSX_NEXTPAGE 0x061 +#define HEX_CAUSE_TLBMISSRW_READ 0x070 +#define HEX_CAUSE_TLBMISSRW_WRITE 0x071 + +/* + * The following lets us override the default exception handlers + * This can be handy for adding code to check that they are called as well + * as special handling needed for the test to succeed. + * + * MY_EVENT_HANDLE Use this to define your own event handler + * DEFAULT_EVENT_HANDLE Use this to point to the default handler + * my_event_vectors New event vector table + * install_my_event_vectors Change from the default event handlers + */ + +extern void *my_event_vectors; + +#define MY_EVENT_HANDLE(name, helper) \ +void name(void) \ +{ \ + asm volatile("crswap(sp, sgp0)\n\t" \ + "memd(sp++#8) = r1:0\n\t" \ + "memd(sp++#8) = r3:2\n\t" \ + "memd(sp++#8) = r5:4\n\t" \ + "memd(sp++#8) = r7:6\n\t" \ + "memd(sp++#8) = r9:8\n\t" \ + "memd(sp++#8) = r11:10\n\t" \ + "memd(sp++#8) = r13:12\n\t" \ + "memd(sp++#8) = r15:14\n\t" \ + "memd(sp++#8) = r17:16\n\t" \ + "memd(sp++#8) = r19:18\n\t" \ + "memd(sp++#8) = r21:20\n\t" \ + "memd(sp++#8) = r23:22\n\t" \ + "memd(sp++#8) = r25:24\n\t" \ + "memd(sp++#8) = r27:26\n\t" \ + "memd(sp++#8) = r31:30\n\t" \ + "r0 = ssr\n\t" \ + "call " #helper "\n\t" \ + "sp = add(sp, #-8)\n\t" \ + "r31:30 = memd(sp++#-8)\n\t" \ + "r27:26 = memd(sp++#-8)\n\t" \ + "r25:24 = memd(sp++#-8)\n\t" \ + "r23:22 = memd(sp++#-8)\n\t" \ + "r21:20 = memd(sp++#-8)\n\t" \ + "r19:18 = memd(sp++#-8)\n\t" \ + "r17:16 = memd(sp++#-8)\n\t" \ + "r15:14 = memd(sp++#-8)\n\t" \ + "r13:12 = memd(sp++#-8)\n\t" \ + "r11:10 = memd(sp++#-8)\n\t" \ + "r9:8 = memd(sp++#-8)\n\t" \ + "r7:6 = memd(sp++#-8)\n\t" \ + "r5:4 = memd(sp++#-8)\n\t" \ + "r3:2 = memd(sp++#-8)\n\t" \ + "r1:0 = memd(sp)\n\t" \ + "crswap(sp, sgp0);\n\t" \ + "rte\n\t"); \ +} + +#ifndef NO_DEFAULT_EVENT_HANDLES + +#define DEFAULT_EVENT_HANDLE(name, offset) \ +void name(void) \ +{ \ + asm volatile("r0 = %0\n\t" \ + "r0 = add(r0, #" #offset ")\n\t" \ + "jumpr r0\n\t" \ + : : "r"(old_evb) : "r0"); \ +} + + +/* Use these values as the offset for DEFAULT_EVENT_HANDLE */ +asm ( +".set HANDLE_RESET_OFFSET, 0x00\n\t" +".set HANDLE_NMI_OFFSET, 0x04\n\t" +".set HANDLE_ERROR_OFFSET, 0x08\n\t" +".set HANDLE_RSVD_OFFSET, 0x0c\n\t" +".set HANDLE_TLBMISSX_OFFSET, 0x10\n\t" +".set HANDLE_TLBMISSRW_OFFSET, 0x18\n\t" +".set HANDLE_TRAP0_OFFSET, 0x20\n\t" +".set HANDLE_TRAP1_OFFSET, 0x24\n\t" +".set HANDLE_FPERROR_OFFSET, 0x28\n\t" +".set HANDLE_INT_OFFSET, 0x40\n\t" +); + +asm( +".align 0x1000\n\t" +"my_event_vectors:\n\t" + "jump my_event_handle_reset\n\t" + "jump my_event_handle_nmi\n\t" + "jump my_event_handle_error\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_tlbmissx\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_tlbmissrw\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_trap0\n\t" + "jump my_event_handle_trap1\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_fperror\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" +); + +#define DEFAULT_EVENT_HANDLES \ +DEFAULT_EVENT_HANDLE(my_event_handle_error, HANDLE_ERROR_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_nmi, HANDLE_NMI_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_tlbmissrw, HANDLE_TLBMISSRW_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_tlbmissx, HANDLE_TLBMISSX_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_reset, HANDLE_RESET_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_rsvd, HANDLE_RSVD_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_trap0, HANDLE_TRAP0_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_trap1, HANDLE_TRAP1_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_int, HANDLE_INT_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_fperror, HANDLE_FPERROR_OFFSET) + +#endif /* NO_DEFAULT_EVENT_HANDLES */ + +/* When a permission error happens, add the permission to the TLB entry */ +void my_event_handle_error_helper(uint32_t ssr) +{ + uint32_t cause = GET_FIELD(ssr, SSR_CAUSE); + uint32_t badva = getbadva(); + uint32_t entry_num = tlb_entry_num(badva); + uint64_t entry; + + set_exception_vector_bit(my_exceptions, cause); + + switch (cause) { + case HEX_CAUSE_FETCH_NO_XPAGE: + entry = tlbr(entry_num); + SET_FIELD(entry, PTE_X, 1); + tlbw(entry, entry_num); + break; + case HEX_CAUSE_FETCH_NO_UPAGE: + entry = tlbr(entry_num); + SET_FIELD(entry, PTE_U, 1); + tlbw(entry, entry_num); + break; + case HEX_CAUSE_PRIV_NO_READ: + entry = tlbr(entry_num); + SET_FIELD(entry, PTE_R, 1); + tlbw(entry, entry_num); + break; + case HEX_CAUSE_PRIV_NO_WRITE: + entry = tlbr(entry_num); + SET_FIELD(entry, PTE_W, 1); + tlbw(entry, entry_num); + break; + case HEX_CAUSE_PRIV_NO_UREAD: + entry = tlbr(entry_num); + SET_FIELD(entry, PTE_U, 1); + tlbw(entry, entry_num); + break; + case HEX_CAUSE_PRIV_NO_UWRITE: + entry = tlbr(entry_num); + SET_FIELD(entry, PTE_U, 1); + tlbw(entry, entry_num); + break; + default: + do_coredump(); + break; + } +} + +void my_event_handle_nmi_helper(uint32_t ssr) +{ + uint32_t cause = GET_FIELD(ssr, SSR_CAUSE); + + set_exception_vector_bit(my_exceptions, cause); + + switch (cause) { + case HEX_CAUSE_IMPRECISE_MULTI_TLB_MATCH: + break; + default: + do_coredump(); + break; + } +} + +/* + * When a TLB miss happens, create a mapping + * We'll set different read/write/execute permissions + * for different entry numbers. + */ +void my_event_handle_tlbmissrw_helper(uint32_t ssr) +{ + uint32_t cause = GET_FIELD(ssr, SSR_CAUSE); + uint32_t badva = getbadva(); + uint32_t entry_num = tlb_entry_num(badva); + uint32_t VA = page_start(badva, TARGET_PAGE_BITS); + uint32_t PA = VA - (entry_num * ONE_MB); + + uint64_t entry = + create_mmu_entry(1, 0, 0, 0, VA, 0, 0, 0, 1, 0x3, PA, PAGE_4K); + if (entry_num == TWO_MB_ENTRY) { + SET_FIELD(entry, PTE_R, 1); + } + if (entry_num == THREE_MB_ENTRY) { + SET_FIELD(entry, PTE_W, 1); + } + + set_exception_vector_bit(my_exceptions, cause); + + switch (cause) { + case HEX_CAUSE_TLBMISSRW_READ: + tlbw(entry, entry_num); + break; + case HEX_CAUSE_TLBMISSRW_WRITE: + tlbw(entry, entry_num); + break; + default: + do_coredump(); + break; + } +} + +void my_event_handle_tlbmissx_helper(uint32_t ssr) +{ + uint32_t cause = GET_FIELD(ssr, SSR_CAUSE); + uint32_t badva = getbadva(); + uint32_t entry_num = tlb_entry_num(badva); + uint32_t VA = page_start(badva, TARGET_PAGE_BITS); + uint32_t PA = VA - (entry_num * ONE_MB); + + uint64_t entry = + create_mmu_entry(1, 0, 0, 0, VA, 0, 0, 0, 1, 0x3, PA, PAGE_4K); + + set_exception_vector_bit(my_exceptions, cause); + + switch (cause) { + case HEX_CAUSE_TLBMISSX_NORMAL: + tlbw(entry, entry_num); + break; + default: + do_coredump(); + break; + } +} + +static inline void install_my_event_vectors(void) +{ + old_evb = getevb(); + setevb(&my_event_vectors); +} + +#define MAKE_GOTO(name) \ +void goto_##name(void) \ +{ \ + asm volatile("r0 = ##" #name "\n\t" \ + "jumpr r0\n\t" \ + : : : "r0"); \ +} + +#define MAKE_ERR_HANDLER(name, helper_fn) \ + MY_EVENT_HANDLE(name, helper_fn) \ + MAKE_GOTO(name) + +#define INSTALL_ERR_HANDLER(name) { \ + /* + * Install our own privelege exception handler. + * The normal behavior is to coredump + * Read and decode the jump displacemnts from evb + * ASSUME negative displacement which is the standard. + */ \ + uint32_t *evb_err = getevb() + 2; \ + uint32_t err_distance = -(0xfe000000 | *evb_err) << 1; \ + uint32_t err_handler = (uint32_t)evb_err - err_distance; \ + memcpy((void *)err_handler, goto_##name, 12); \ +} while (0) + +static inline void remove_trans(int index) +{ + uint64_t entry = tlbr(index); + SET_FIELD(entry, PTE_V, 0); + tlbw(entry, index); +} + +static inline void clear_overlapping_entry(unsigned int asid, uint32_t va) +{ + int32_t index = tlbp(asid, va); + if (index != TLB_NOT_FOUND) { + remove_trans(index); + } +} + +static void add_trans(int index, uint32_t va, uint64_t pa, + PageSize page_size, uint8_t xwru, + unsigned int asid, uint8_t V, uint8_t G) +{ + if (V) { + clear_overlapping_entry(asid, va); + } + assert(!add_translation_extended(index, (void *)va, pa, page_size, + xwru, 0, asid, 0, + ((V & 1) << 1) | (G & 1))); +} + +#endif diff --git a/tests/tcg/hexagon/system/mmu_asids.c b/tests/tcg/hexagon/system/mmu_asids.c new file mode 100644 index 0000000000000..34f25c25a3d7f --- /dev/null +++ b/tests/tcg/hexagon/system/mmu_asids.c @@ -0,0 +1,80 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdlib.h> +#include <stdio.h> +#include <stdbool.h> +#include <string.h> + + +#define DEBUG 0 + +#include "mmu.h" + +DEFAULT_EVENT_HANDLES + +void test_asids(void) +{ + uint32_t addr = (uint32_t)&data; + uint32_t page = page_start(addr, TARGET_PAGE_BITS); + uint32_t offset = FIVE_MB; + uint32_t new_addr = addr + offset; + uint32_t new_page = page + offset; + uint64_t entry = + create_mmu_entry(0, 0, 0, 1, new_page, 1, 1, 1, 0, 7, page, PAGE_4K); + /* + * Create a TLB entry for ASID=1 + * Write it at index 1 + * Check that it is present + * Invalidate the ASID + * Check that it is not found + */ + tlbw(entry, 1); + check32(tlboc(entry), 1); + tlbinvasid(entry >> 32); + check32(tlboc(entry), TLB_NOT_FOUND); + + /* + * Re-install the entry + * Put ourselves in ASID=1 + * Do a load and a store + */ + data = 0xdeadbeef; + tlbw(entry, 1); + set_asid(1); + check32(*(mmu_variable *)new_addr, 0xdeadbeef); + *(mmu_variable *)new_addr = 0xcafebabe; + check32(data, 0xcafebabe); + + /* + * Make sure a load from ASID 2 gets a different value. + * The standalone runtime will create a VA==PA entry on + * a TLB miss, so the load will be reading from uninitialized + * memory. + */ + set_asid(2); + data = 0xdeadbeef; + check32_ne(*(mmu_variable *)new_addr, 0xdeadbeef); + + /* + * Invalidate the ASID and make sure a loads from ASID 1 + * gets a different value. + */ + tlbinvasid(entry >> 32); + set_asid(1); + data = 0xcafebabe; + check32_ne(*(mmu_variable *)new_addr, 0xcafebabe); +} + +int main() +{ + puts("Hexagon MMU ASID test"); + + test_asids(); + + printf("%s\n", ((err) ? "FAIL" : "PASS")); + return err; +} diff --git a/tests/tcg/hexagon/system/mmu_overlap.c b/tests/tcg/hexagon/system/mmu_overlap.c new file mode 100644 index 0000000000000..73d0565abed43 --- /dev/null +++ b/tests/tcg/hexagon/system/mmu_overlap.c @@ -0,0 +1,65 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdlib.h> +#include <stdio.h> +#include <stdbool.h> +#include <string.h> + +#define DEBUG 0 + +#include "mmu.h" + +DEFAULT_EVENT_HANDLES + +void test_overlap(void) +{ + uint32_t addr = (uint32_t)&data; + uint32_t page = page_start(addr, 20); + uint32_t offset = FIVE_MB; + uint32_t new_page = page + offset; + uint32_t new_addr = addr + offset; + uint8_t data_perm = TLB_X | TLB_W | TLB_R | TLB_U; + uint64_t entry; + + add_trans(1, new_page, page, PAGE_1M, data_perm, 0, 1, 1); + check32(tlbp(0, new_addr), 1); + + /* Check an entry that overlaps with the one we just created */ + entry = + create_mmu_entry(1, 0, 0, 0, new_page, 1, 1, 1, 0, 7, page, PAGE_4K); + check32(tlboc(entry), 1); + /* Check that conditional TLB write (ctlbw) does NOT write the new entry */ + check32(ctlbw(entry, 2), 0x1); + + /* Create an entry that does not overlap with the one we just created */ + entry = create_mmu_entry(1, 0, 0, 0, new_page + ONE_MB, 1, 1, 1, 0, 7, page, + PAGE_4K); + check32(tlboc(entry), TLB_NOT_FOUND); + /* Check that conditional TLB write (ctlbw) does write the new entry */ + check32(ctlbw(entry, 2), TLB_NOT_FOUND); + + /* Create an entry that overalps both of these entries */ + entry = + create_mmu_entry(1, 0, 0, 0, new_page, 1, 1, 1, 0, 7, page, PAGE_4M); + check32(tlboc(entry), 0xffffffff); + + /* Clear the TLB entries */ + remove_trans(1); + check32(tlbp(0, new_addr), TLB_NOT_FOUND); + remove_trans(2); + check32(tlbp(0, (new_addr + ONE_MB)), TLB_NOT_FOUND); +} + +int main() +{ + puts("Hexagon MMU overlap test"); + + test_overlap(); + + printf("%s\n", ((err) ? "FAIL" : "PASS")); + return err; +} diff --git a/tests/tcg/hexagon/system/reg_fields_def.h b/tests/tcg/hexagon/system/reg_fields_def.h new file mode 100644 index 0000000000000..ff2769a1399d6 --- /dev/null +++ b/tests/tcg/hexagon/system/reg_fields_def.h @@ -0,0 +1,87 @@ +/* PTE (aka TLB entry) fields */ +DEF_REG_FIELD(PTE_PPD, + "PPD", 0, 24, + "Physical page number that the corresponding virtual page maps to.") +DEF_REG_FIELD(PTE_C, + "C", 24, 4, + "Cacheability attributes for the page.") +DEF_REG_FIELD(PTE_U, + "U", 28, 1, + "User mode permitted.") +DEF_REG_FIELD(PTE_R, + "R", 29, 1, + "Read-enable.") +DEF_REG_FIELD(PTE_W, + "W", 30, 1, + "Write-enable.") +DEF_REG_FIELD(PTE_X, + "X", 31, 1, + "Execute-enable.") +DEF_REG_FIELD(PTE_VPN, + "VPN", 32, 20, + "Virtual page number that is matched against the load or store address.") +DEF_REG_FIELD(PTE_ASID, + "ASID", 52, 7, + "7-bit address space identifier (tag extender)") +DEF_REG_FIELD(PTE_ATR0, + "ATR0", 59, 1, + "General purpose attribute bit kept as an attribute of each cache line.") +DEF_REG_FIELD(PTE_ATR1, + "ATR1", 60, 1, + "General purpose attribute bit kept as an attribute of each cache line.") +DEF_REG_FIELD(PTE_PA35, + "PA35", 61, 1, + "The Extra Physical bit is the most-significant physical address bit.") +DEF_REG_FIELD(PTE_G, + "G", 62, 1, + "Global bit. If set, then the ASID is ignored in the match.") +DEF_REG_FIELD(PTE_V, + "V", 63, 1, + "Valid bit. indicates whether this entry should be used for matching.") + +/* SSR fields */ +DEF_REG_FIELD(SSR_CAUSE, + "cause", 0, 8, + "8-bit field that contains the reason for various exception.") +DEF_REG_FIELD(SSR_ASID, + "asid", 8, 7, + "7-bit field that contains the Address Space Identifier.") +DEF_REG_FIELD(SSR_UM, + "um", 16, 1, + "read-write bit.") +DEF_REG_FIELD(SSR_EX, + "ex", 17, 1, + "set when an interrupt or exception is accepted.") +DEF_REG_FIELD(SSR_IE, + "ie", 18, 1, + "indicates whether the global interrupt is enabled.") +DEF_REG_FIELD(SSR_GM, + "gm", 19, 1, + "Guest mode bit.") +DEF_REG_FIELD(SSR_V0, + "v0", 20, 1, + "if BADVA0 register contents are from a valid slot 0 instruction.") +DEF_REG_FIELD(SSR_V1, + "v1", 21, 1, + "if BADVA1 register contents are from a valid slot 1 instruction.") +DEF_REG_FIELD(SSR_BVS, + "bvs", 22, 1, + "BADVA Selector.") +DEF_REG_FIELD(SSR_CE, + "ce", 23, 1, + "grants user or guest read permissions to the PCYCLE register aliases.") +DEF_REG_FIELD(SSR_PE, + "pe", 24, 1, + "grants guest read permissions to the PMU register aliases.") +DEF_REG_FIELD(SSR_BP, + "bp", 25, 1, + "Internal Bus Priority bit.") +DEF_REG_FIELD(SSR_XA, + "xa", 27, 3, + "Extension Active, which control operation of an attached coprocessor.") +DEF_REG_FIELD(SSR_SS, + "ss", 30, 1, + "Single Step, which enables single-step exceptions.") +DEF_REG_FIELD(SSR_XE, + "xe", 31, 1, + "Coprocessor Enable, which enables use of an attached coprocessor.") diff --git a/tests/tcg/hexagon/system/tlb-miss-tlblock.S b/tests/tcg/hexagon/system/tlb-miss-tlblock.S new file mode 100644 index 0000000000000..fe07aca47b37a --- /dev/null +++ b/tests/tcg/hexagon/system/tlb-miss-tlblock.S @@ -0,0 +1,156 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +/* + * Test Purpose: + * Verify that tlbmissx and tlbmissrw do not set the syscfg.tl bit + * The HW spec says: + * "TLBLOCK is acquired automatically whenever a hardware thread raises a + * TLB miss-RW or TLBmiss-X exception." + * The casual reader would assume that a miss handler would implicitly have + * the lock, that apparently + * isn't the case. + */ + +.global start +start: + r0 = ##evb + evb = r0 + r0 = ##0 + ssr = r0 + jump #setup + +#define tlb_index r11 +#define stack r29 +#define data r18 +tlb_index = ##0x00000007 + +.org 0x100 + +evb: + jump #reset + jump #nmi + jump #error + jump #0 + jump #tlbmissx + jump #0 + jump #tlbmissrw + + +setup: + { + r1 = ##0xc009b800 + r0 = ##0xf7137010 + } + tlb_index = add(tlb_index, #1) + tlbw(r1:0,tlb_index) + +/* Enable MMU */ + r2 = ##0x0085a07f + syscfg = r2 + +/* Test setup */ + r12 = #0x12 + r0 = #0x6 + r7 = ##0x77777777 + r6 = ##0x66666666 + data = ##0xf2000000 + stack = ##0x9ba01000 + jump ##.L_server_loop + +/* event vector handlers */ +reset: + r2 = #1 + stop(r0) +nmi: + r2 = #1 + stop(r0) +error: + r2 = #1 + stop(r0) + + +/* + * Can only handle a single ex fault. + */ +tlbmissx: + r0 = syscfg + r1 = #0x800 +/* + * Fail if we automatically start setting SYSCFG:TL again + */ + r0 = and(r0, r1) + { + p0 = cmp.eq(r0, r1); if (p0.new) jump:t .Lfailmissx + } + { + r1 = ##0xc009b900 + r0 = ##0xf7137210 + } + tlb_index = add(tlb_index, #1) + tlbw(r1:0,tlb_index) + tlbunlock + rte + stop(r0); +.Lfailmissx: + r2 = #1 + stop(r2); + +/* + * Can only handle a stack fault and a data fault + */ +tlbmissrw: + r0 = syscfg + r1 = #0x800 +/* + * Fail if we automatically start setting SYSCFG:TL again + */ + r0 = and(r0, r1) + { + p0 = cmp.eq(r0, r1); if (p0.new) jump:t .Lfailmissrw + } + r0 = badva + p0 = cmp.eq (stack, r0) // missed the stack + if (!p0) jump .Ldata + { + r1 = ##0xc009ba00 + r0 = ##0xf7137210 + } + jump #.Ldone +.Ldata: + { + r1 = ##0xc00f2000 + r0 = ##0xf71e4010 + } +.Ldone: + tlb_index = add(tlb_index, #1) + tlbw(r1:0,tlb_index) + tlbunlock + rte +.Lfailmissrw: + r2 = #1 + stop(r2); + + + +.org 0x100000 + nop +.Lpass: + r2 = #0 + stop(r0); + trap0(#0x18) +.L_server_loop: +{ + p0 = cmp.eq(r0,#-0x1) + if (!p0.new) jump:t .Lpass + memd(stack) = r7:6; // S1 store to stack will also fault + memw(data) = r12; // S0 store will fault +} +/* + * We should not get here: + */ + r2 = #1 + stop(r0); From c349f4ca46b1fdd9a3b66614b3688eb8825f7796 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 5 Feb 2025 09:21:23 -0800 Subject: [PATCH 110/126] tests/tcg/hexagon: add interrupt and priority tests Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- tests/tcg/hexagon/Makefile.softmmu-target | 4 ++ tests/tcg/hexagon/system/ciad-siad.c | 50 +++++++++++++++++++++++ tests/tcg/hexagon/system/monitor_insts.S | 18 ++++++++ tests/tcg/hexagon/system/standalone_hw.c | 43 +++++++++++++++++++ 4 files changed, 115 insertions(+) create mode 100644 tests/tcg/hexagon/system/ciad-siad.c create mode 100644 tests/tcg/hexagon/system/monitor_insts.S create mode 100644 tests/tcg/hexagon/system/standalone_hw.c diff --git a/tests/tcg/hexagon/Makefile.softmmu-target b/tests/tcg/hexagon/Makefile.softmmu-target index 7e561efd25825..7fe39ef690aee 100644 --- a/tests/tcg/hexagon/Makefile.softmmu-target +++ b/tests/tcg/hexagon/Makefile.softmmu-target @@ -39,6 +39,8 @@ TESTS_BUILT_WITH_DEFAULT_RULES = \ semihost \ mmu_overlap \ mmu_asids \ + standalone_hw \ + ciad-siad \ $() TESTS += \ @@ -73,6 +75,8 @@ mmu_overlap.o: mmu_overlap.c mmu.h mmu_overlap: mmu_overlap.o mmu_asids.o: mmu_asids.c mmu.h mmu_asids: mmu_asids.o +ciad-siad: ciad-siad.o +standalone_hw: standalone_hw.o monitor_insts.o ############# Custom build options diff --git a/tests/tcg/hexagon/system/ciad-siad.c b/tests/tcg/hexagon/system/ciad-siad.c new file mode 100644 index 0000000000000..e3fbb7a506dc9 --- /dev/null +++ b/tests/tcg/hexagon/system/ciad-siad.c @@ -0,0 +1,50 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdint.h> +#include <stdio.h> + + +static inline void siad(uint32_t val) +{ + asm volatile ("siad(%0);" + : : "r"(val)); + return; +} +static inline void ciad(uint32_t val) +{ + asm volatile ("ciad(%0);" + : : "r"(val)); + return; +} + +static inline uint32_t getipendad() +{ + uint32_t reg; + asm volatile ("%0=s20;" + : "=r"(reg)); + return reg; +} +int +main(int argc, char *argv[]) +{ + siad(4); + int ipend = getipendad(); + if (ipend != (0x4 << 16)) { + goto fail; + } + ciad(4); + ipend = getipendad(); + if (ipend) { + goto fail; + } + + printf("PASS\n"); + return 0; +fail: + printf("FAIL\n"); + return 1; +} diff --git a/tests/tcg/hexagon/system/monitor_insts.S b/tests/tcg/hexagon/system/monitor_insts.S new file mode 100644 index 0000000000000..8027068511f1f --- /dev/null +++ b/tests/tcg/hexagon/system/monitor_insts.S @@ -0,0 +1,18 @@ +/* + * Copyright(c) 2020-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + .text + .type test_set_prio, @function + .global test_set_prio + +test_set_prio: + r0 = #3 + r1 = #1 + p0 = cmp.eq(r0,r1) + setprio(p0, r0) + jumpr lr + + .size test_set_prio, . - test_set_prio diff --git a/tests/tcg/hexagon/system/standalone_hw.c b/tests/tcg/hexagon/system/standalone_hw.c new file mode 100644 index 0000000000000..c67343204a805 --- /dev/null +++ b/tests/tcg/hexagon/system/standalone_hw.c @@ -0,0 +1,43 @@ +#include <stdio.h> +#include <assert.h> + +void test_set_prio(); + +void inst_test() +{ + asm volatile("dczeroa(r0)\n\t" + "dccleanidx(r0)\n\t" + "dcinvidx(r0)\n\t" + "r1 = dctagr(r0)\n\t" + "dctagw(r0, r1)\n\t" + "dcfetch(r0)\n\t" + "dccleaninvidx(r0)\n\t" + "l2gclean\n\t" + "l2gclean(r1:0)\n\t" + "l2gcleaninv\n\t" + "l2gcleaninv(r1:0)\n\t" + "l2gunlock\n\t" + "l2kill\n\t" + "trace(r0)\n\t" + "pause(#1)\n\t" + ); + + asm volatile("r0 = #0\n\t" + "r1 = iassignr(r0)\n\t" + /* Set interrupt 0 to disabled on all threads */ + "r0 = #0\n\t" + "iassignw(r0)\n\t"); + + test_set_prio(); + printf("Executed monitor mode instructions\n"); +} + +int main(int argc, const char *argv[]) +{ + inst_test(); + printf("Hello, World: (argc: %d)\n", argc); + assert(argc >= 1); + for (int i = 0; i < argc; i++) { + printf("\t> '%s'\n", argv[i]); + } +} From a94f0f21cd4f60d1627b9dde3d60f0b8ce0cbef4 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 5 Feb 2025 09:22:54 -0800 Subject: [PATCH 111/126] tests/tcg/hexagon: add tests for system registers Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- tests/tcg/hexagon/Makefile.softmmu-target | 5 + tests/tcg/hexagon/system/badva.c | 335 ++++++++++++++++++++++ tests/tcg/hexagon/system/vid_reg.c | 36 +++ 3 files changed, 376 insertions(+) create mode 100644 tests/tcg/hexagon/system/badva.c create mode 100644 tests/tcg/hexagon/system/vid_reg.c diff --git a/tests/tcg/hexagon/Makefile.softmmu-target b/tests/tcg/hexagon/Makefile.softmmu-target index 7fe39ef690aee..3187194b0a61d 100644 --- a/tests/tcg/hexagon/Makefile.softmmu-target +++ b/tests/tcg/hexagon/Makefile.softmmu-target @@ -41,6 +41,8 @@ TESTS_BUILT_WITH_DEFAULT_RULES = \ mmu_asids \ standalone_hw \ ciad-siad \ + badva \ + vid_reg \ $() TESTS += \ @@ -77,6 +79,9 @@ mmu_asids.o: mmu_asids.c mmu.h mmu_asids: mmu_asids.o ciad-siad: ciad-siad.o standalone_hw: standalone_hw.o monitor_insts.o +vid_reg: vid_reg.o +badva.o: badva.c ../hex_test.h crt0/hexagon_standalone.h +badva: badva.o ############# Custom build options diff --git a/tests/tcg/hexagon/system/badva.c b/tests/tcg/hexagon/system/badva.c new file mode 100644 index 0000000000000..1351269d10776 --- /dev/null +++ b/tests/tcg/hexagon/system/badva.c @@ -0,0 +1,335 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "crt0/hexagon_standalone.h" + +#define DEBUG 0 + +int err; +#include "../hex_test.h" + +/* volatile because it is written through different MMU mappings */ +typedef volatile int mmu_variable; +mmu_variable data0 = 0xdeadbeef; +mmu_variable data1 = 0xabcdef01; + +#define ONE_MB (1 << 20) +#define INVALID_BADVA 0xbadabada + +static uint32_t read_badva(void) +{ + uint32_t ret; + __asm__ __volatile__("%0 = badva\n\t" : "=r"(ret)); + return ret; +} + +static uint32_t read_badva0(void) +{ + uint32_t ret; + __asm__ __volatile__("%0 = badva0\n\t" : "=r"(ret)); + return ret; +} + +static uint32_t read_badva1(void) +{ + uint32_t ret; + __asm__ __volatile__("%0 = badva1\n\t" : "=r"(ret)); + return ret; +} + +static uint32_t read_ssr(void) +{ + uint32_t ret; + __asm__ __volatile__("%0 = ssr\n\t" : "=r"(ret)); + return ret; +} + +static void write_badva0(uint32_t val) +{ + __asm__ __volatile__("badva0=%0;" : : "r"(val)); + return; +} + +static void write_badva1(uint32_t val) +{ + __asm__ __volatile__("badva1=%0;" : : "r"(val)); + return; +} + +#define SSR_V0_BIT 20 +#define SSR_V1_BIT 21 +#define SSR_BVS_BIT 21 + +static uint32_t read_ssr_v0(void) +{ + return (read_ssr() >> SSR_V0_BIT) & 0x1; +} + +static uint32_t read_ssr_v1(void) +{ + return (read_ssr() >> SSR_V1_BIT) & 0x1; +} + +static uint32_t read_ssr_bvs(void) +{ + return (read_ssr() >> SSR_BVS_BIT) & 0x1; +} + +static void dual_store(mmu_variable *p, mmu_variable *q, uint32_t pval, + uint32_t qval) +{ +#if DEBUG + printf("dual_store:\t0x%p, 0x%p, 0x%lx, 0x%lx\n", p, q, pval, qval); +#endif + + __asm__ __volatile__("r6 = #0\n\t" + "badva0 = r6\n\t" + "badva1 = r6\n\t" + "r6 = ssr\n\t" + "r6 = clrbit(r6, #%4) // V0\n\t" + "r6 = clrbit(r6, #%5) // V1\n\t" + "r6 = clrbit(r6, #%6) // BVS\n\t" + "ssr = r6\n\t" + "{\n\t" + " memw(%0) = %2 // slot 1\n\t" + " memw(%1) = %3 // slot 0\n\t" + "}\n\t" + : "=m"(*p), "=m"(*q) + : "r"(pval), "r"(qval), "i"(SSR_V0_BIT), + "i"(SSR_V1_BIT), "i"(SSR_BVS_BIT) + : "r6"); +} + +static void dual_load(mmu_variable *p, mmu_variable *q, uint32_t *pval, + uint32_t *qval) +{ + uint32_t val0, val1; + +#if DEBUG + printf("dual_load:\t0x%p, 0x%p\n", p, q); +#endif + + __asm__ __volatile__("r6 = #0\n\t" + "badva0 = r6\n\t" + "badva1 = r6\n\t" + "r6 = ssr\n\t" + "r6 = clrbit(r6, #%4) // V0\n\t" + "r6 = clrbit(r6, #%5) // V1\n\t" + "r6 = clrbit(r6, #%6) // BVS\n\t" + "ssr = r6\n\t" + "{\n\t" + " %1 = memw(%3) // slot 1\n\t" + " %0 = memw(%2) // slot 0\n\t" + "}\n\t" + : "=r"(val0), "=r"(val1) + : "m"(*p), "m"(*q), "i"(SSR_V0_BIT), "i"(SSR_V1_BIT), + "i"(SSR_BVS_BIT) + : "r6"); + +#if DEBUG + printf("\t\t0x%lx, 0x%lx\n", val0, val1); +#endif + + *pval = val0; + *qval = val1; +} + +static void load_store(mmu_variable *p, mmu_variable *q, uint32_t *pval, + uint32_t qval) +{ + uint32_t val; + +#if DEBUG + printf("load_store:\t0x%p, 0x%p, 0x%lx\n", p, q, qval); +#endif + + __asm__ __volatile__("r6 = #0\n\t" + "badva0 = r6\n\t" + "badva1 = r6\n\t" + "r6 = ssr\n\t" + "r6 = clrbit(r6, #%4) // V0\n\t" + "r6 = clrbit(r6, #%5) // V1\n\t" + "r6 = clrbit(r6, #%6) // BVS\n\t" + "ssr = r6\n\t" + "{\n\t" + " %0 = memw(%2) // slot 1\n\t" + " memw(%1) = %3 // slot 0\n\t" + "}\n\t" + : "=r"(val), "=m"(*q) + : "m"(*p), "r"(qval), "i"(SSR_V0_BIT), "i"(SSR_V1_BIT), + "i"(SSR_BVS_BIT) + : "r6"); + +#if DEBUG + printf("\t\t0x%lx\n", val); +#endif + + *pval = val; +} + +enum { + TLB_U = (1 << 0), + TLB_R = (1 << 1), + TLB_W = (1 << 2), + TLB_X = (1 << 3), +}; + +uint32_t add_trans_pgsize(uint32_t page_size_bits) +{ + switch (page_size_bits) { + case 12: /* 4KB */ + return 1; + case 14: /* 16KB */ + return 2; + case 16: /* 64KB */ + return 4; + case 18: /* 256KB */ + return 8; + case 20: /* 1MB */ + return 16; + case 22: /* 4MB */ + return 32; + case 24: /* 16MB */ + return 64; + default: + return 1; + } +} + +int mb_counter = 1; + +static mmu_variable *map_data_address(mmu_variable *p, uint32_t data_offset) +{ + uint32_t page_size_bits = 12; + uint32_t page_size = 1 << page_size_bits; + uint32_t page_align = ~(page_size - 1); + + uint32_t data_addr = (uint32_t)p; + uint32_t data_page = data_addr & page_align; + + uint32_t new_data_page = data_page + data_offset; + uint32_t read_data_addr = data_addr + data_offset; + unsigned int data_perm = TLB_X | TLB_W | TLB_U; + add_translation((void *)new_data_page, (void *)data_page, 0); + + return (mmu_variable *)read_data_addr; +} + +static void test_dual_store(void) +{ + data0 = 0x12345678; + data1 = 0x87654321; + + mmu_variable *new_data0 = map_data_address(&data0, mb_counter * ONE_MB); + mb_counter++; + mmu_variable *new_data1 = map_data_address(&data1, mb_counter * ONE_MB); + mb_counter++; + + dual_store(new_data0, new_data1, 0x1, 0x2); + if (read_badva() == (uint32_t)new_data0) { + check32(read_badva0(), (uint32_t)new_data0); + check32(read_badva1(), INVALID_BADVA); + check32(read_ssr_v0(), 1); + check32(read_ssr_v1(), 0); + check32(read_ssr_bvs(), 0); + } else if (read_badva() == (uint32_t)new_data1) { + check32(read_badva0(), INVALID_BADVA); + check32(read_badva1(), (uint32_t)new_data1); + check32(read_ssr_v0(), 0); + check32(read_ssr_v1(), 1); + check32(read_ssr_bvs(), 1); + } else { + /* Something went wrong! */ + check32(0, 1); + } + check32(data0, 0x1); + check32(data1, 0x2); +} + +static void test_dual_load(void) +{ + uint32_t val0, val1; + + data0 = 0xaabbccdd; + data1 = 0xeeff0011; + + mmu_variable *new_data0 = map_data_address(&data0, mb_counter * ONE_MB); + mb_counter++; + mmu_variable *new_data1 = map_data_address(&data1, mb_counter * ONE_MB); + mb_counter++; + + dual_load(new_data0, new_data1, &val0, &val1); + if (read_badva() == (uint32_t)new_data0) { + check32(read_badva0(), (uint32_t)new_data0); + check32(read_badva1(), INVALID_BADVA); + check32(read_ssr_v0(), 1); + check32(read_ssr_v1(), 0); + check32(read_ssr_bvs(), 0); + } else if (read_badva() == (uint32_t)new_data1) { + check32(read_badva0(), INVALID_BADVA); + check32(read_badva1(), (uint32_t)new_data1); + check32(read_ssr_v0(), 0); + check32(read_ssr_v1(), 1); + check32(read_ssr_bvs(), 1); + } else { + /* Something went wrong! */ + check32(0, 1); + } + check32(val0, 0xaabbccdd); + check32(val1, 0xeeff0011); +} + +static void test_load_store(void) +{ + uint32_t val; + + data0 = 0x11223344; + data1 = 0x55667788; + + mmu_variable *new_data0 = map_data_address(&data0, mb_counter * ONE_MB); + mb_counter++; + mmu_variable *new_data1 = map_data_address(&data1, mb_counter * ONE_MB); + mb_counter++; + + load_store(new_data0, new_data1, &val, 0x123); + if (read_badva() == (uint32_t)new_data1) { + check32(read_badva0(), (uint32_t)new_data1); + check32(read_badva1(), INVALID_BADVA); + check32(read_ssr_v0(), 1); + check32(read_ssr_v1(), 0); + check32(read_ssr_bvs(), 0); + } else if (read_badva() == (uint32_t)new_data0) { + check32(read_badva0(), INVALID_BADVA); + check32(read_badva1(), (uint32_t)new_data0); + check32(read_ssr_v0(), 0); + check32(read_ssr_v1(), 1); + check32(read_ssr_bvs(), 1); + } else { + /* Something went wrong! */ + check32(0, 1); + } + check32(val, 0x11223344); + check32(data1, 0x123); +} +static void test_badva_write(void) +{ + uint32_t va = 0x11223344; + write_badva0(va); + check32(read_badva(), va); +} + +int main() +{ + puts("Hexagon badva test"); + + test_dual_store(); + test_dual_load(); + test_load_store(); + test_badva_write(); + + printf("%s\n", ((err) ? "FAIL" : "PASS")); + return err; +} diff --git a/tests/tcg/hexagon/system/vid_reg.c b/tests/tcg/hexagon/system/vid_reg.c new file mode 100644 index 0000000000000..25f266f98b2d7 --- /dev/null +++ b/tests/tcg/hexagon/system/vid_reg.c @@ -0,0 +1,36 @@ +/* + * Verify vid reads/writes really update the register. + */ + +#include <assert.h> +#include <stdint.h> +#include <stdio.h> + +static inline uint32_t getvid() +{ + uint32_t reg; + asm volatile("%0=vid;" : "=r"(reg)); + return reg; +} +static inline void setvid(uint32_t val) +{ + asm volatile("vid=%0;" : : "r"(val)); + return; +} +int main() +{ + uint32_t testval = 0x3ff03ff; + setvid(testval); + if (testval != getvid()) { + printf("ERROR: vid read returned: 0x%x\n", getvid()); + } + assert(testval == getvid()); + + /* L2VIC_NO_PENDING (0xffffffff) should not update the vid */ + setvid(0xffffffff); + if (testval != getvid()) { + printf("ERROR: vid read returned: 0x%x\n", getvid()); + } + + assert(testval == getvid()); +} From 6befeee9b9af1934ad728de4868b135afcd98d07 Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 5 Feb 2025 09:33:47 -0800 Subject: [PATCH 112/126] tests/tcg/hexagon: add HVX tests Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- tests/tcg/hexagon/Makefile.softmmu-target | 9 + tests/tcg/hexagon/system/cfgtable.h | 39 + tests/tcg/hexagon/system/hvx-multi.c | 119 ++ tests/tcg/hexagon/system/standalone_vec.c | 1419 +++++++++++++++++++++ 4 files changed, 1586 insertions(+) create mode 100644 tests/tcg/hexagon/system/cfgtable.h create mode 100644 tests/tcg/hexagon/system/hvx-multi.c create mode 100644 tests/tcg/hexagon/system/standalone_vec.c diff --git a/tests/tcg/hexagon/Makefile.softmmu-target b/tests/tcg/hexagon/Makefile.softmmu-target index 3187194b0a61d..3f070bfea91b2 100644 --- a/tests/tcg/hexagon/Makefile.softmmu-target +++ b/tests/tcg/hexagon/Makefile.softmmu-target @@ -43,6 +43,8 @@ TESTS_BUILT_WITH_DEFAULT_RULES = \ ciad-siad \ badva \ vid_reg \ + hvx-multi \ + standalone_vec \ $() TESTS += \ @@ -80,11 +82,18 @@ mmu_asids: mmu_asids.o ciad-siad: ciad-siad.o standalone_hw: standalone_hw.o monitor_insts.o vid_reg: vid_reg.o +hvx-multi.o: hvx-multi.c ../hvx_misc.h +hvx-multi: hvx-multi.o +standalone_vec.o: standalone_vec.c cfgtable.h +standalone_vec: standalone_vec.o badva.o: badva.c ../hex_test.h crt0/hexagon_standalone.h badva: badva.o ############# Custom build options +standalone_vec.o: CFLAGS+= -mv69 -O2 -mhvx -fvectorize +hvx-multi.o: CFLAGS+= -O2 -mhvx + # We don't want to link this one with crt0 files tlb-miss-tlblock: tlb-miss-tlblock.o $(CC) $(CFLAGS) $< -o $@ -nostartfiles -Wl,-Ttext,0x9b800000 -Wl,-entry,0x9b800000 diff --git a/tests/tcg/hexagon/system/cfgtable.h b/tests/tcg/hexagon/system/cfgtable.h new file mode 100644 index 0000000000000..fff84ef56950b --- /dev/null +++ b/tests/tcg/hexagon/system/cfgtable.h @@ -0,0 +1,39 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef CFGTABLE_H +#define CFGTABLE_H + +#include <stdint.h> + +static uint32_t read_cfgtable_field(uint32_t offset) +{ + uint32_t val; + asm volatile("r0 = cfgbase\n\t" + "r0 = asl(r0, #5)\n\t" + "%0 = memw_phys(%1, r0)\n\t" + : "=r"(val) + : "r"(offset) + : "r0"); + return val; +} + +#define GET_SUBSYSTEM_BASE() (read_cfgtable_field(0x8) << 16) +#define GET_FASTL2VIC_BASE() (read_cfgtable_field(0x28) << 16) + +static uintptr_t get_vtcm_base(void) +{ +#if __HEXAGON_ARCH__ == 65 + return 0xD8200000L; +#elif __HEXAGON_ARCH__ >= 66 + int vtcm_offset = 0x038; + return read_cfgtable_field(vtcm_offset) << 16; +#else +#error "unsupported hexagon revision" +#endif +} + +#endif /* CFGTABLE_H */ diff --git a/tests/tcg/hexagon/system/hvx-multi.c b/tests/tcg/hexagon/system/hvx-multi.c new file mode 100644 index 0000000000000..0d2e90c2c79b2 --- /dev/null +++ b/tests/tcg/hexagon/system/hvx-multi.c @@ -0,0 +1,119 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdio.h> +#include <stdint.h> +#include <string.h> + +int err; + +#include "../hvx_misc.h" + +void set_hvx_context(int n) +{ + uint32_t ssr_context_bits = n << 27; + asm volatile( + "r1 = ssr\n" + "r1 = and(r1, ##0xc7ffffff)\n" + "r1 = or(r1, %0)\n" + "ssr = r1\r" + "isync\n" + : + : "r"(ssr_context_bits) + : "r1" + ); +} + +void setv0(int n) +{ + asm volatile( + "v0 = vsplat(%0)\n" + : : "r"(n) : "v0" + ); +} + +void store_v0(MMVector *v) +{ + asm volatile( + "vmemu(%0) = v0\n" + : + : "r"(v) + : "memory" + ); +} + +uint32_t get_num_contexts(void) +{ + const int EXT_CONTEXT_OFFSET = 13; + unsigned int cfgbase; + asm volatile("%0 = cfgbase\n" : "=r"(cfgbase)); + uint32_t *cfgtable = (uint32_t *)(cfgbase << 16); + return *(cfgtable + EXT_CONTEXT_OFFSET); +} + +uint32_t get_rev(void) +{ + uint32_t rev; + asm volatile("%0 = rev\n" : "=r"(rev)); + return rev; +} + +/* + * This test verifies that each new context is properly selected and is + * independent of the thread. + */ +int main() +{ + int num_contexts = get_num_contexts(); + printf("rev=v%x, HVX-contexts=%d\n", (int)(get_rev() & 0xff), num_contexts); + memset(&output[0], 0, 8 * sizeof(MMVector)); + + /* First set v0 on all the contexts. */ + for (int i = 0; i < num_contexts; i++) { + set_hvx_context(i); + setv0(i + 1); + } + + /* + * Now each context should have its own v0 value. Save it to memory. We + * check all possible SSR.XA values to make sure the "aliases" are + * implemented correctly. + */ + for (int i = 0; i < 8; i++) { + set_hvx_context(i); + store_v0(&output[i]); + } + + + /* + * Set expected values: + * + * num contexts + * SSR.XA 2 4 6 8 + * 000 HVX Context 0 HVX Context 0 HVX Context 0 HVX Context 0 + * 001 HVX Context 1 HVX Context 1 HVX Context 1 HVX Context 1 + * 010 HVX Context 0 HVX Context 2 HVX Context 2 HVX Context 2 + * 011 HVX Context 1 HVX Context 3 HVX Context 3 HVX Context 3 + * 100 HVX Context 0 HVX Context 0 HVX Context 4 HVX Context 4 + * 101 HVX Context 1 HVX Context 1 HVX Context 5 HVX Context 5 + * 110 HVX Context 0 HVX Context 2 HVX Context 2 HVX Context 6 + * 111 HVX Context 1 HVX Context 3 HVX Context 3 HVX Context 7 + */ + for (int i = 0; i < 8; i++) { + int expected = (i % num_contexts) + 1; + /* Exception for num_contexts=6 */ + if (num_contexts == 6 && i >= 6) { + expected = (i - 6 + 2) + 1; + } + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[i].w[j] = expected; + } + } + + check_output_w(__LINE__, 8); + puts(err ? "FAIL" : "PASS"); + return !!err; +} diff --git a/tests/tcg/hexagon/system/standalone_vec.c b/tests/tcg/hexagon/system/standalone_vec.c new file mode 100644 index 0000000000000..eb1b2ef4830ce --- /dev/null +++ b/tests/tcg/hexagon/system/standalone_vec.c @@ -0,0 +1,1419 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <hexagon_types.h> +#include <hexagon_protos.h> + +#include "cfgtable.h" + +int err; + +#ifdef __linux__ +#define VTCM_SIZE_KB (2048) +#define VTCM_BYTES_PER_KB (1024) + +static char vtcm_buffer[VTCM_SIZE_KB * VTCM_BYTES_PER_KB] + __attribute__((aligned(0x10000))); +#endif + +/* define the number of rows/cols in a square matrix */ +#define MATRIX_SIZE 64 + +/* define the size of the scatter buffer */ +#define SCATTER_BUFFER_SIZE (MATRIX_SIZE * MATRIX_SIZE) + +#define SCATTER16_BUF_SIZE (2 * SCATTER_BUFFER_SIZE) +#define SCATTER32_BUF_SIZE (4 * SCATTER_BUFFER_SIZE) + +#define GATHER16_BUF_SIZE (2 * MATRIX_SIZE) +#define GATHER32_BUF_SIZE (4 * MATRIX_SIZE) + +uintptr_t VTCM_BASE_ADDRESS; +uintptr_t VTCM_SCATTER16_ADDRESS; +uintptr_t VTCM_GATHER16_ADDRESS; +uintptr_t VTCM_SCATTER32_ADDRESS; +uintptr_t VTCM_GATHER32_ADDRESS; +uintptr_t VTCM_SCATTER16_32_ADDRESS; +uintptr_t VTCM_GATHER16_32_ADDRESS; + +/* the vtcm base address */ +unsigned char *vtcm_base; + +/* scatter gather 16 bit elements using 16 bit offsets */ +unsigned short *vscatter16; +unsigned short *vgather16; +unsigned short vscatter16_ref[SCATTER_BUFFER_SIZE]; +unsigned short vgather16_ref[MATRIX_SIZE]; + +/* scatter gather 32 bit elements using 32 bit offsets */ +unsigned int *vscatter32; +unsigned int *vgather32; +unsigned int vscatter32_ref[SCATTER_BUFFER_SIZE]; +unsigned int vgather32_ref[MATRIX_SIZE]; + +/* scatter gather 16 bit elements using 32 bit offsets */ +unsigned short *vscatter16_32; +unsigned short *vgather16_32; +unsigned short vscatter16_32_ref[SCATTER_BUFFER_SIZE]; +unsigned short vgather16_32_ref[MATRIX_SIZE]; + + +/* declare the arrays of offsets */ +unsigned short half_offsets[MATRIX_SIZE]; +unsigned int word_offsets[MATRIX_SIZE]; + +/* declare the arrays of values */ +unsigned short half_values[MATRIX_SIZE]; +unsigned short half_acc_values[MATRIX_SIZE]; +unsigned short half_q_values[MATRIX_SIZE]; +unsigned int word_values[MATRIX_SIZE]; +unsigned int word_acc_values[MATRIX_SIZE]; +unsigned int word_q_values[MATRIX_SIZE]; + +/* declare the array of predicates */ +unsigned short half_predicates[MATRIX_SIZE]; +unsigned int word_predicates[MATRIX_SIZE]; + +/* make this big enough for all the intrinsics */ +unsigned int region_len = 4 * SCATTER_BUFFER_SIZE - 1; + +/* optionally add sync instructions */ +#define SYNC_VECTOR 1 + +/* optionally print cycle counts */ +#define PRINT_CYCLE_COUNTS 0 + +#if PRINT_CYCLE_COUNTS +unsigned long long start_cycles; +#define START_CYCLES start_cycles = hexagon_sim_read_pcycles(); +#define PRINT_CYCLES(x) printf(x, hexagon_sim_read_pcycles() - start_cycles); +#else +#define START_CYCLES +#define PRINT_CYCLES(x) +#endif + +/* define a scratch area for debug and prefill */ +#define SCRATCH_SIZE 0x8800 + +#define FILL_CHAR '.' + +/* fill vtcm scratch with ee */ +void prefill_vtcm_scratch(void) +{ + memset((void *)VTCM_BASE_ADDRESS, FILL_CHAR, SCRATCH_SIZE * sizeof(char)); +} + +/* print vtcm scratch buffer */ +void print_vtcm_scratch_16(void) +{ + unsigned short *vtmp = (unsigned short *)VTCM_BASE_ADDRESS; + + printf("\n\nPrinting the vtcm scratch in half words"); + + for (int i = 0; i < SCRATCH_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + for (int j = 0; j < 2; j++) { + printf("%c", (char)((vtmp[i] >> j * 8) & 0xff)); + } + + printf(" "); + } +} + +/* print vtcm scratch buffer */ +void print_vtcm_scratch_32(void) +{ + unsigned int *vtmp = (unsigned int *)VTCM_BASE_ADDRESS; + + printf("\n\nPrinting the vtcm scratch in words"); + + for (int i = 0; i < SCRATCH_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + for (int j = 0; j < 4; j++) { + printf("%c", (char)((vtmp[i] >> j * 8) & 0xff)); + } + + printf(" "); + } +} + + +/* create byte offsets to be a diagonal of the matrix with 16 bit elements */ +void create_offsets_and_values_16(void) +{ + unsigned short half_element = 0; + unsigned short half_q_element = 0; + char letter = 'A'; + char q_letter = '@'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + half_offsets[i] = i * (2 * MATRIX_SIZE + 2); + + half_element = 0; + half_q_element = 0; + for (int j = 0; j < 2; j++) { + half_element |= letter << j * 8; + half_q_element |= q_letter << j * 8; + } + + half_values[i] = half_element; + half_acc_values[i] = ((i % 10) << 8) + (i % 10); + half_q_values[i] = half_q_element; + + letter++; + /* reset to 'A' */ + if (letter == 'M') { + letter = 'A'; + } + } +} + +/* create a predicate mask for the half word scatter */ +void create_preds_16() +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + half_predicates[i] = (i % 3 == 0 || i % 5 == 0) ? ~0 : 0; + } +} + + +/* create byte offsets to be a diagonal of the matrix with 32 bit elements */ +void create_offsets_and_values_32(void) +{ + unsigned int word_element = 0; + unsigned int word_q_element = 0; + char letter = 'A'; + char q_letter = '&'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + word_offsets[i] = i * (4 * MATRIX_SIZE + 4); + + word_element = 0; + word_q_element = 0; + for (int j = 0; j < 4; j++) { + word_element |= letter << j * 8; + word_q_element |= q_letter << j * 8; + } + + word_values[i] = word_element; + word_acc_values[i] = ((i % 10) << 8) + (i % 10); + word_q_values[i] = word_q_element; + + letter++; + /* reset to 'A' */ + if (letter == 'M') { + letter = 'A'; + } + } +} + +/* create a predicate mask for the word scatter */ +void create_preds_32() +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + word_predicates[i] = (i % 4 == 0 || i % 7 == 0) ? ~0 : 0; + } +} + + +void dump_buf(char *str, void *addr, int element_size, int byte_len) + +{ + unsigned short *sptr = addr; + unsigned int *ptr = addr; + + printf("\n\nBuffer: %s\n", str); + for (int i = 0; i < byte_len / element_size; ++ptr, ++sptr, ++i) { + if (i != 0 && (i % 16) == 0) { + printf("\n"); + } + if (element_size == 2) { + printf("%c ", *sptr); + } else if (element_size == 4) { + printf("%4.4x ", *ptr); + } + } +} + +/* + * create byte offsets to be a diagonal of the matrix with 16 bit elements and + * 32 bit offsets + */ +void create_offsets_and_values_16_32(void) +{ + unsigned int half_element = 0; + unsigned short half_q_element = 0; + char letter = 'D'; + char q_letter = '$'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + word_offsets[i] = i * (2 * MATRIX_SIZE + 2); + + half_element = 0; + half_q_element = 0; + for (int j = 0; j < 2; j++) { + half_element |= letter << j * 8; + half_q_element |= q_letter << j * 8; + } + + half_values[i] = half_element; + half_acc_values[i] = ((i % 10) << 8) + (i % 10); + half_q_values[i] = half_q_element; + + letter++; + /* reset to 'A' */ + if (letter == 'P') { + letter = 'D'; + } + } + + /* + * dump_buf("word_offsets", word_offsets, sizeof(*word_offsets), + * sizeof(word_offsets)); dump_buf("half_offsets", half_offsets, + * sizeof(*half_offsets), sizeof(half_offsets)); + */ +} + +void create_preds_16_32() +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + half_predicates[i] = (i % 2 == 0 || i % 13 == 0) ? ~0 : 0; + } +} + +#define SCATTER_RELEASE(ADDR) \ + asm volatile("vmem(%0 + #0):scatter_release\n" : : "r"(ADDR)); + +/* scatter the 16 bit elements using intrinsics */ +void vector_scatter_16(void) +{ + START_CYCLES; + + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_values; + + /* do the scatter */ + Q6_vscatter_RMVhV(VTCM_SCATTER16_ADDRESS, region_len, offsets, values); + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter16); + /* + * This dummy load from vscatter16 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16; +#endif + + PRINT_CYCLES("\nVector Scatter 16 cycles = %llu\n"); +} + +/* scatter-accumulate the 16 bit elements using intrinsics */ +void vector_scatter_acc_16(void) +{ + START_CYCLES; + + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_acc_values; + + /* do the scatter */ + Q6_vscatteracc_RMVhV(VTCM_SCATTER16_ADDRESS, region_len, offsets, values); + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter16); + /* + * This dummy load from vscatter16 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16; +#endif + + PRINT_CYCLES("\nVector Scatter Acc 16 cycles = %llu\n"); +} + +/* scatter the 16 bit elements using intrinsics */ +void vector_scatter_q_16(void) +{ + START_CYCLES; + + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_q_values; + HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; + HVX_VectorPred preds = Q6_Q_vand_VR(pred_reg, ~0); + + /* do the scatter */ + Q6_vscatter_QRMVhV(preds, VTCM_SCATTER16_ADDRESS, region_len, offsets, + values); + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter16); + /* + * This dummy load from vscatter16 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16; +#endif + + PRINT_CYCLES("\nVector Scatter Q 16 cycles = %llu\n"); +} + +/* scatter the 32 bit elements using intrinsics */ +void vector_scatter_32(void) +{ + START_CYCLES; + + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_values; + HVX_Vector valueshi = *(HVX_Vector *)&word_values[MATRIX_SIZE / 2]; + + /* do the scatter */ + Q6_vscatter_RMVwV(VTCM_SCATTER32_ADDRESS, region_len, offsetslo, valueslo); + Q6_vscatter_RMVwV(VTCM_SCATTER32_ADDRESS, region_len, offsetshi, valueshi); + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter32); + /* + * This dummy load from vscatter32 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter32; +#endif + + PRINT_CYCLES("\nVector Scatter 32 cycles = %llu\n"); +} + +/* scatter-acc the 32 bit elements using intrinsics */ +void vector_scatter_acc_32(void) +{ + START_CYCLES; + + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_acc_values; + HVX_Vector valueshi = *(HVX_Vector *)&word_acc_values[MATRIX_SIZE / 2]; + + /* do the scatter */ + Q6_vscatteracc_RMVwV(VTCM_SCATTER32_ADDRESS, region_len, offsetslo, + valueslo); + Q6_vscatteracc_RMVwV(VTCM_SCATTER32_ADDRESS, region_len, offsetshi, + valueshi); + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter32); + /* + * This dummy load from vscatter32 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter32; +#endif + + PRINT_CYCLES("\nVector Scatter Acc 32 cycles = %llu\n"); +} + +/* scatter the 32 bit elements using intrinsics */ +void vector_scatter_q_32(void) +{ + START_CYCLES; + + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_q_values; + HVX_Vector valueshi = *(HVX_Vector *)&word_q_values[MATRIX_SIZE / 2]; + HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates; + HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2]; + HVX_VectorPred predslo = Q6_Q_vand_VR(pred_reglo, ~0); + HVX_VectorPred predshi = Q6_Q_vand_VR(pred_reghi, ~0); + + /* do the scatter */ + Q6_vscatter_QRMVwV(predslo, VTCM_SCATTER32_ADDRESS, region_len, offsetslo, + valueslo); + Q6_vscatter_QRMVwV(predshi, VTCM_SCATTER32_ADDRESS, region_len, offsetshi, + valueshi); + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter16); + /* + * This dummy load from vscatter16 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16; +#endif + + PRINT_CYCLES("\nVector Scatter Q 16 cycles = %llu\n"); +} + +void print_vector(char *str, HVX_Vector *v) + +{ + unsigned char *ptr = (unsigned char *)v; + + printf("\n\nVector: %s\n", str); + for (int i = 0; i < sizeof(HVX_Vector) * 4; ++ptr, ++i) { + if (i != 0 && (i % 16) == 0) { + printf("\n"); + } + printf("%c ", *ptr); + } + printf("\n"); +} + +void print_vectorpair(char *str, HVX_VectorPair *v) + +{ + unsigned char *ptr = (unsigned char *)v; + + printf("\n\nVectorPair: %s\n", str); + for (int i = 0; i < sizeof(HVX_VectorPair); ++ptr, ++i) { + if (i != 0 && (i % 16) == 0) { + printf("\n"); + } + printf("%c ", *ptr); + } + printf("\n"); +} + +/* scatter the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_16_32(void) +{ + START_CYCLES; + + /* get the word offsets in a vector pair */ + HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets; + /* print_vectorpair("word_offsets", (HVX_VectorPair *)&word_offsets); */ + + /* these values need to be shuffled for the RMWwV scatter */ + HVX_Vector values = *(HVX_Vector *)half_values; + values = Q6_Vh_vshuff_Vh(values); + /* print_vector("values", (HVX_Vector *)&values); */ + + /* do the scatter */ + Q6_vscatter_RMWwV(VTCM_SCATTER16_32_ADDRESS, region_len, offsets, values); + /* print_vector("scatter16_32_address", (HVX_Vector */ + /* *)VTCM_SCATTER16_32_ADDRESS); */ + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter16_32); + /* + * This dummy load from vscatter16_32 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16_32; +#endif + + PRINT_CYCLES("\nVector Scatter 16_32 cycles = %llu\n"); +} + +/* scatter-acc the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_acc_16_32(void) +{ + START_CYCLES; + + /* get the word offsets in a vector pair */ + HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets; + /* print_vectorpair("word_offsets", (HVX_VectorPair *)&word_offsets); */ + + /* these values need to be shuffled for the RMWwV scatter */ + HVX_Vector values = *(HVX_Vector *)half_acc_values; + values = Q6_Vh_vshuff_Vh(values); + /* print_vector("values", (HVX_Vector *)&values); */ + + /* do the scatter */ + Q6_vscatteracc_RMWwV(VTCM_SCATTER16_32_ADDRESS, region_len, offsets, + values); + /* print_vector("scatter16_32_address", (HVX_Vector */ + /* *)VTCM_SCATTER16_32_ADDRESS); */ + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter16_32); + /* + * This dummy load from vscatter16_32 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16_32; +#endif + + PRINT_CYCLES("\nVector Scatter Acc 16_32 cycles = %llu\n"); +} + +/* scatter-acc the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_q_16_32(void) +{ + START_CYCLES; + + /* get the word offsets in a vector pair */ + HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets; + /* print_vectorpair("word_offsets", (HVX_VectorPair *)&word_offsets); */ + + /* these values need to be shuffled for the RMWwV scatter */ + HVX_Vector values = *(HVX_Vector *)half_q_values; + values = Q6_Vh_vshuff_Vh(values); + /* print_vector("values", (HVX_Vector *)&values); */ + + HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; + pred_reg = Q6_Vh_vshuff_Vh(pred_reg); + HVX_VectorPred preds = Q6_Q_vand_VR(pred_reg, ~0); + + /* do the scatter */ + Q6_vscatter_QRMWwV(preds, VTCM_SCATTER16_32_ADDRESS, region_len, offsets, + values); + /* print_vector("scatter16_32_address", (HVX_Vector */ + /* *)VTCM_SCATTER16_32_ADDRESS); */ + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter16_32); + /* + * This dummy load from vscatter16_32 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16_32; +#endif + + PRINT_CYCLES("\nVector Scatter Q 16_32 cycles = %llu\n"); +} + + +/* gather the elements from the scatter16 buffer */ +void vector_gather_16(void) +{ + START_CYCLES; + + HVX_Vector *vgather = (HVX_Vector *)VTCM_GATHER16_ADDRESS; + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + + /* do the gather to the gather16 buffer */ + Q6_vgather_ARMVh(vgather, VTCM_SCATTER16_ADDRESS, region_len, offsets); + + +#if SYNC_VECTOR + /* This dummy read of vgather will stall until completion */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vgather; +#endif + + PRINT_CYCLES("\nVector Gather 16 cycles = %llu\n"); +} + +static unsigned short gather_q_16_init(void) +{ + char letter = '?'; + return letter | (letter << 8); +} + +void vector_gather_q_16(void) +{ + START_CYCLES; + + HVX_Vector *vgather = (HVX_Vector *)VTCM_GATHER16_ADDRESS; + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; + HVX_VectorPred preds = Q6_Q_vand_VR(pred_reg, ~0); + + *vgather = Q6_Vh_vsplat_R(gather_q_16_init()); + /* do the gather to the gather16 buffer */ + Q6_vgather_AQRMVh(vgather, preds, VTCM_SCATTER16_ADDRESS, region_len, + offsets); + + +#if SYNC_VECTOR + /* This dummy read of vgather will stall until completion */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vgather; +#endif + + PRINT_CYCLES("\nVector Gather Q 16 cycles = %llu\n"); +} + + +/* gather the elements from the scatter32 buffer */ +void vector_gather_32(void) +{ + START_CYCLES; + + HVX_Vector *vgatherlo = (HVX_Vector *)VTCM_GATHER32_ADDRESS; + HVX_Vector *vgatherhi = + (HVX_Vector *)(VTCM_GATHER32_ADDRESS + (MATRIX_SIZE * 2)); + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + + /* do the gather to vgather */ + Q6_vgather_ARMVw(vgatherlo, VTCM_SCATTER32_ADDRESS, region_len, offsetslo); + Q6_vgather_ARMVw(vgatherhi, VTCM_SCATTER32_ADDRESS, region_len, offsetshi); + +#if SYNC_VECTOR + /* This dummy read of vgatherhi will stall until completion */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vgatherhi; +#endif + + PRINT_CYCLES("\nVector Gather 32 cycles = %llu\n"); +} + +static unsigned int gather_q_32_init(void) +{ + char letter = '?'; + return letter | (letter << 8) | (letter << 16) | (letter << 24); +} + +void vector_gather_q_32(void) +{ + START_CYCLES; + + HVX_Vector *vgatherlo = (HVX_Vector *)VTCM_GATHER32_ADDRESS; + HVX_Vector *vgatherhi = + (HVX_Vector *)(VTCM_GATHER32_ADDRESS + (MATRIX_SIZE * 2)); + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates; + HVX_VectorPred predslo = Q6_Q_vand_VR(pred_reglo, ~0); + HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2]; + HVX_VectorPred predshi = Q6_Q_vand_VR(pred_reghi, ~0); + + *vgatherlo = Q6_Vh_vsplat_R(gather_q_32_init()); + *vgatherhi = Q6_Vh_vsplat_R(gather_q_32_init()); + /* do the gather to vgather */ + Q6_vgather_AQRMVw(vgatherlo, predslo, VTCM_SCATTER32_ADDRESS, region_len, + offsetslo); + Q6_vgather_AQRMVw(vgatherhi, predshi, VTCM_SCATTER32_ADDRESS, region_len, + offsetshi); + +#if SYNC_VECTOR + /* This dummy read of vgatherhi will stall until completion */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vgatherhi; +#endif + + PRINT_CYCLES("\nVector Gather Q 32 cycles = %llu\n"); +} + +/* gather the elements from the scatter16_32 buffer */ +void vector_gather_16_32(void) +{ + START_CYCLES; + + /* get the vtcm address to gather from */ + HVX_Vector *vgather = (HVX_Vector *)VTCM_GATHER16_32_ADDRESS; + + /* get the word offsets in a vector pair */ + HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets; + + /* do the gather to vgather */ + Q6_vgather_ARMWw(vgather, VTCM_SCATTER16_32_ADDRESS, region_len, offsets); + + /* the read of gather will stall until completion */ + volatile HVX_Vector values = *(HVX_Vector *)vgather; + + /* deal the elements to get the order back */ + values = Q6_Vh_vdeal_Vh(values); + + /* write it back to vtcm address */ + *(HVX_Vector *)vgather = values; + + + PRINT_CYCLES("\nVector Gather 16_32 cycles = %llu\n"); +} + +void vector_gather_q_16_32(void) +{ + START_CYCLES; + + /* get the vtcm address to gather from */ + HVX_Vector *vgather = (HVX_Vector *)VTCM_GATHER16_32_ADDRESS; + + /* get the word offsets in a vector pair */ + HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets; + HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; + pred_reg = Q6_Vh_vshuff_Vh(pred_reg); + HVX_VectorPred preds = Q6_Q_vand_VR(pred_reg, ~0); + + *vgather = Q6_Vh_vsplat_R(gather_q_16_init()); + /* do the gather to vgather */ + Q6_vgather_AQRMWw(vgather, preds, VTCM_SCATTER16_32_ADDRESS, region_len, + offsets); + + /* the read of gather will stall until completion */ + volatile HVX_Vector values = *(HVX_Vector *)vgather; + + /* deal the elements to get the order back */ + values = Q6_Vh_vdeal_Vh(values); + + /* write it back to vtcm address */ + *(HVX_Vector *)vgather = values; + + + PRINT_CYCLES("\nVector Gather Q 16_32 cycles = %llu\n"); +} + + +static void check_buffer(const char *name, void *c, void *r, size_t size) +{ + char *check = (char *)c; + char *ref = (char *)r; + /* printf("check buffer %s 0x%x, 0x%x, %d\n", name, check, ref, size); */ + for (int i = 0; i < size; i++) { + if (check[i] != ref[i]) { + printf("Error %s [%d]: 0x%x (%c) != 0x%x (%c)\n", name, i, check[i], + check[i], ref[i], ref[i]); + err++; + } + } +} + + +/* + * These scalar functions are the C equivalents of the vector functions that + * use HVX + */ + +/* scatter the 16 bit elements using C */ +void scalar_scatter_16(unsigned short *vscatter16) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16[half_offsets[i] / 2] = half_values[i]; + } + + PRINT_CYCLES("\nScalar Scatter 16 cycles = %llu\n"); +} + +void check_scatter_16() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + check_buffer("check_scatter_16", vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 16 bit elements using C */ +void scalar_scatter_acc_16(unsigned short *vscatter16) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16[half_offsets[i] / 2] += half_acc_values[i]; + } + + PRINT_CYCLES("\nScalar Scatter Acc 16 cycles = %llu\n"); +} + +/* scatter the 16 bit elements using C */ +void scalar_scatter_q_16(unsigned short *vscatter16) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; i++) { + if (half_predicates[i]) { + vscatter16[half_offsets[i] / 2] = half_q_values[i]; + } + } + + PRINT_CYCLES("\nScalar Scatter Q 16 cycles = %llu\n"); +} + + +void check_scatter_acc_16() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + scalar_scatter_acc_16(vscatter16_ref); + check_buffer("check_scatter_acc_16", vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +void check_scatter_q_16() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + scalar_scatter_acc_16(vscatter16_ref); + scalar_scatter_q_16(vscatter16_ref); + check_buffer("check_scatter_q_16", vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_32(unsigned int *vscatter32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter32[word_offsets[i] / 4] = word_values[i]; + } + + PRINT_CYCLES("\n\nScalar Scatter 32 cycles = %llu\n"); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_acc_32(unsigned int *vscatter32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter32[word_offsets[i] / 4] += word_acc_values[i]; + } + + PRINT_CYCLES("\nScalar Scatter Acc 32 cycles = %llu\n"); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_q_32(unsigned int *vscatter32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; i++) { + if (word_predicates[i]) { + vscatter32[word_offsets[i] / 4] = word_q_values[i]; + } + } + + PRINT_CYCLES("\nScalar Scatter Q 32 cycles = %llu\n"); +} + +void check_scatter_32() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + check_buffer("check_scatter_32", vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +void check_scatter_acc_32() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + scalar_scatter_acc_32(vscatter32_ref); + check_buffer("check_scatter_acc_32", vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +void check_scatter_q_32() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + scalar_scatter_acc_32(vscatter32_ref); + scalar_scatter_q_32(vscatter32_ref); + check_buffer("check_scatter_q_32", vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_16_32(unsigned short *vscatter16_32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16_32[word_offsets[i] / 2] = half_values[i]; + } + + PRINT_CYCLES("\n\nScalar Scatter 16_32 cycles = %llu\n"); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatteracc_16_32(unsigned short *vscatter16_32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16_32[word_offsets[i] / 2] += half_acc_values[i]; + } + + PRINT_CYCLES("\n\nScalar Scatter Acc 16_32 cycles = %llu\n"); +} + +void scalar_scatter_q_16_32(unsigned short *vscatter16_32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; i++) { + if (half_predicates[i]) { + vscatter16_32[word_offsets[i] / 2] = half_q_values[i]; + } + } + + PRINT_CYCLES("\nScalar Scatter Q 16_32 cycles = %llu\n"); +} + +void check_scatter_16_32() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + check_buffer("check_scatter_16_32", vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +void check_scatter_acc_16_32() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + scalar_scatteracc_16_32(vscatter16_32_ref); + check_buffer("check_scatter_acc_16_32", vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +void check_scatter_q_16_32() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + scalar_scatteracc_16_32(vscatter16_32_ref); + scalar_scatter_q_16_32(vscatter16_32_ref); + check_buffer("check_scatter_q_16_32", vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_16(unsigned short *vgather16) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather16[i] = vscatter16[half_offsets[i] / 2]; + } + + PRINT_CYCLES("\n\nScalar Gather 16 cycles = %llu\n"); +} + +void scalar_gather_q_16(unsigned short *vgather16) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (half_predicates[i]) { + vgather16[i] = vscatter16[half_offsets[i] / 2]; + } + } + + PRINT_CYCLES("\n\nScalar Gather Q 16 cycles = %llu\n"); +} + +void check_gather_16() +{ + memset(vgather16_ref, 0, MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16(vgather16_ref); + check_buffer("check_gather_16", vgather16, vgather16_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +void check_gather_q_16() +{ + memset(vgather16_ref, gather_q_16_init(), + MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_q_16(vgather16_ref); + check_buffer("check_gather_q_16", vgather16, vgather16_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_32(unsigned int *vgather32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather32[i] = vscatter32[word_offsets[i] / 4]; + } + + PRINT_CYCLES("\n\nScalar Gather 32 cycles = %llu\n"); +} + +void scalar_gather_q_32(unsigned int *vgather32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (word_predicates[i]) { + vgather32[i] = vscatter32[word_offsets[i] / 4]; + } + } + + PRINT_CYCLES("\n\nScalar Gather Q 32 cycles = %llu\n"); +} + + +void check_gather_32(void) +{ + memset(vgather32_ref, 0, MATRIX_SIZE * sizeof(unsigned int)); + scalar_gather_32(vgather32_ref); + check_buffer("check_gather_32", vgather32, vgather32_ref, + MATRIX_SIZE * sizeof(unsigned int)); +} + +void check_gather_q_32(void) +{ + memset(vgather32_ref, gather_q_32_init(), + MATRIX_SIZE * sizeof(unsigned int)); + scalar_gather_q_32(vgather32_ref); + check_buffer("check_gather_q_32", vgather32, vgather32_ref, + MATRIX_SIZE * sizeof(unsigned int)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_16_32(unsigned short *vgather16_32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather16_32[i] = vscatter16_32[word_offsets[i] / 2]; + } + + PRINT_CYCLES("\n\nScalar Gather 16_32 cycles = %llu\n"); +} + +void scalar_gather_q_16_32(unsigned short *vgather16_32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (half_predicates[i]) { + vgather16_32[i] = vscatter16_32[word_offsets[i] / 2]; + } + } + + PRINT_CYCLES("\n\nScalar Gather Q 16_32 cycles = %llu\n"); +} + +void check_gather_16_32(void) +{ + memset(vgather16_32_ref, 0, MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16_32(vgather16_32_ref); + check_buffer("check_gather_16_32", vgather16_32, vgather16_32_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +void check_gather_q_16_32(void) +{ + memset(vgather16_32_ref, gather_q_16_init(), + MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_q_16_32(vgather16_32_ref); + check_buffer("check_gather_q_16_32", vgather16_32, vgather16_32_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +/* These functions print the buffers to the display */ + +/* print scatter16 buffer */ +void print_scatter16_buffer(void) +{ +#if PRINT_DATA + /* + * printf("\n\nPrinting the 16 bit scatter buffer at 0x%08x", + * VTCM_SCATTER16_ADDRESS); + */ + printf("\n\nPrinting the 16 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + + for (int j = 0; j < 2; j++) { + printf("%c", (char)((vscatter16[i] >> j * 8) & 0xff)); + } + + printf(" "); + } + printf("\n"); +#endif +} + +/* print the gather 16 buffer */ +void print_gather_result_16(void) +{ +#if PRINT_DATA + /* + * printf("\n\nPrinting the 16 bit gather result at 0x%08x\n", + * VTCM_GATHER16_ADDRESS); + */ + printf("\n\nPrinting the 16 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 2; j++) { + printf("%c", (char)((vgather16[i] >> j * 8) & 0xff)); + } + + printf(" "); + } + printf("\n"); +#endif +} + +/* print the scatter32 buffer */ +void print_scatter32_buffer(void) +{ +#if PRINT_DATA + /* + * printf("\n\nPrinting the 32 bit scatter buffer at 0x%08x", + * VTCM_SCATTER32_ADDRESS); + */ + printf("\n\nPrinting the 32 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + + for (int j = 0; j < 4; j++) { + printf("%c", (char)((vscatter32[i] >> j * 8) & 0xff)); + } + + printf(" "); + } + printf("\n"); +#endif +} + + +/* print the gather 32 buffer */ +void print_gather_result_32(void) +{ +#if PRINT_DATA + /* + * printf("\n\nPrinting the 32 bit gather result at 0x%08x\n", + * VTCM_GATHER32_ADDRESS); + */ + printf("\n\nPrinting the 32 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 4; j++) { + printf("%c", (char)((vgather32[i] >> j * 8) & 0xff)); + } + + printf(" "); + } + printf("\n"); +#endif +} + +/* print the scatter16_32 buffer */ +void print_scatter16_32_buffer(void) +{ +#if PRINT_DATA + /* + * printf("\n\nPrinting the 16_32 bit scatter buffer at 0x%08x", + * VTCM_SCATTER16_32_ADDRESS); + */ + printf("\n\nPrinting the 16_32 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + + for (int j = 0; j < 2; j++) { + printf("%c", (unsigned char)((vscatter16_32[i] >> j * 8) & 0xff)); + } + + printf(" "); + } + printf("\n"); +#endif +} + +/* print the gather 16_32 buffer */ +void print_gather_result_16_32(void) +{ +#if PRINT_DATA + /* + * printf("\n\nPrinting the 16_32 bit gather result at 0x%08x\n", + * VTCM_GATHER16_32_ADDRESS); + */ + printf("\n\nPrinting the 16_32 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 2; j++) { + printf("%c", (unsigned char)((vgather16_32[i] >> j * 8) & 0xff)); + } + + printf(" "); + } + printf("\n"); +#endif +} + +/* + * set up the tcm address translation + * Note: This method is only for the standalone environment + * SDK users should use the "VTCM Manager" to use VTCM + */ +void setup_tcm(void) +{ + VTCM_BASE_ADDRESS = get_vtcm_base(); + + uint64_t pa = VTCM_BASE_ADDRESS; + void *va = (void *)VTCM_BASE_ADDRESS; + + VTCM_SCATTER16_ADDRESS = VTCM_BASE_ADDRESS; + VTCM_GATHER16_ADDRESS = VTCM_BASE_ADDRESS + SCATTER16_BUF_SIZE; + VTCM_SCATTER32_ADDRESS = VTCM_GATHER16_ADDRESS + GATHER16_BUF_SIZE; + VTCM_GATHER32_ADDRESS = VTCM_SCATTER32_ADDRESS + SCATTER32_BUF_SIZE; + VTCM_SCATTER16_32_ADDRESS = VTCM_GATHER32_ADDRESS + GATHER32_BUF_SIZE; + VTCM_GATHER16_32_ADDRESS = VTCM_SCATTER16_32_ADDRESS + SCATTER16_BUF_SIZE; + + /* the vtcm base address */ + vtcm_base = (unsigned char *)VTCM_BASE_ADDRESS; + + /* scatter gather 16 bit elements using 16 bit offsets */ + vscatter16 = (unsigned short *)VTCM_SCATTER16_ADDRESS; + vgather16 = (unsigned short *)VTCM_GATHER16_ADDRESS; + + /* scatter gather 32 bit elements using 32 bit offsets */ + vscatter32 = (unsigned int *)VTCM_SCATTER32_ADDRESS; + vgather32 = (unsigned int *)VTCM_GATHER32_ADDRESS; + + /* scatter gather 16 bit elements using 32 bit offsets */ + vscatter16_32 = (unsigned short *)VTCM_SCATTER16_32_ADDRESS; + vgather16_32 = (unsigned short *)VTCM_GATHER16_32_ADDRESS; +} + +void inst_test() +{ + /* Should NOT throw an error when paranoid-commit-state turned on */ + uint32_t R; + asm volatile("release(%0):at\n\t" : : "r"(R)); +} + + +int main() +{ + setup_tcm(); + prefill_vtcm_scratch(); + + /* 16 bit elements with 16 bit offsets */ + create_offsets_and_values_16(); + create_preds_16(); + +#if PRINT_CYCLE_COUNTS + scalar_scatter_16(vscatter16); +#endif + vector_scatter_16(); + print_scatter16_buffer(); + check_scatter_16(); + + +#if PRINT_CYCLE_COUNTS + scalar_gather_16(vgather16); +#endif + vector_gather_16(); + print_gather_result_16(); + check_gather_16(); + + vector_gather_q_16(); + print_gather_result_16(); + check_gather_q_16(); + + vector_scatter_acc_16(); + print_scatter16_buffer(); + check_scatter_acc_16(); + + vector_scatter_q_16(); + print_scatter16_buffer(); + check_scatter_q_16(); + + /* 32 bit elements with 32 bit offsets */ + create_offsets_and_values_32(); + create_preds_32(); + +#if PRINT_CYCLE_COUNTS + scalar_scatter_32(vscatter32); +#endif + + vector_scatter_32(); + + print_scatter32_buffer(); + check_scatter_32(); + +#if PRINT_CYCLE_COUNTS + scalar_gather_32(vgather32); +#endif + + vector_gather_32(); + + print_gather_result_32(); + check_gather_32(); + + vector_gather_q_32(); + print_gather_result_32(); + check_gather_q_32(); + + vector_scatter_acc_32(); + print_scatter32_buffer(); + check_scatter_acc_32(); + + vector_scatter_q_32(); + print_scatter32_buffer(); + check_scatter_q_32(); + + /* 16 bit elements with 32 bit offsets */ + create_offsets_and_values_16_32(); + create_preds_16_32(); + +#if PRINT_CYCLE_COUNTS + scalar_scatter_16_32(); +#endif + vector_scatter_16_32(); + + print_scatter16_32_buffer(); + check_scatter_16_32(); + +#if PRINT_CYCLE_COUNTS + scalar_gather_16_32(vgather16_32); +#endif + + vector_gather_16_32(); + + print_gather_result_16_32(); + check_gather_16_32(); + + vector_gather_q_16_32(); + print_gather_result_16_32(); + check_gather_q_16_32(); + + vector_scatter_acc_16_32(); + print_scatter16_32_buffer(); + check_scatter_acc_16_32(); + + vector_scatter_q_16_32(); + print_scatter16_32_buffer(); + check_scatter_q_16_32(); + + inst_test(); + printf("%s\n", ((err) ? "FAIL" : "PASS")); + return err; +} From e68daefb1ba44e1d6f986dec36250a4430af04fb Mon Sep 17 00:00:00 2001 From: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> Date: Wed, 5 Feb 2025 09:37:24 -0800 Subject: [PATCH 113/126] tests/tcg/hexagon: add l2vic tests Signed-off-by: Matheus Tavares Bernardino <quic_mathbern@quicinc.com> --- tests/tcg/hexagon/Makefile.softmmu-target | 6 ++ tests/tcg/hexagon/system/fastl2vic.c | 73 ++++++++++++++++++ tests/tcg/hexagon/system/int_range.c | 94 +++++++++++++++++++++++ 3 files changed, 173 insertions(+) create mode 100644 tests/tcg/hexagon/system/fastl2vic.c create mode 100644 tests/tcg/hexagon/system/int_range.c diff --git a/tests/tcg/hexagon/Makefile.softmmu-target b/tests/tcg/hexagon/Makefile.softmmu-target index 3f070bfea91b2..0b12f7485b62f 100644 --- a/tests/tcg/hexagon/Makefile.softmmu-target +++ b/tests/tcg/hexagon/Makefile.softmmu-target @@ -45,6 +45,8 @@ TESTS_BUILT_WITH_DEFAULT_RULES = \ vid_reg \ hvx-multi \ standalone_vec \ + fastl2vic \ + int_range \ $() TESTS += \ @@ -88,6 +90,10 @@ standalone_vec.o: standalone_vec.c cfgtable.h standalone_vec: standalone_vec.o badva.o: badva.c ../hex_test.h crt0/hexagon_standalone.h badva: badva.o +fastl2vic.o: fastl2vic.c cfgtable.h +fastl2vic: fastl2vic.o +int_range.o: int_range.c cfgtable.h +int_range: int_range.o ############# Custom build options diff --git a/tests/tcg/hexagon/system/fastl2vic.c b/tests/tcg/hexagon/system/fastl2vic.c new file mode 100644 index 0000000000000..a115ae73f7990 --- /dev/null +++ b/tests/tcg/hexagon/system/fastl2vic.c @@ -0,0 +1,73 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +/* + * Test the fastl2vic interface. + * + * hexagon-sim a.out --subsystem_base=0xfab0 --cosim_file q6ss.cfg + */ + +#include "crt0/hexagon_standalone.h" + +#include "cfgtable.h" + +#define CSR_BASE 0xfab00000 +#define L2VIC_BASE ((CSR_BASE) + 0x10000) +#define L2VIC_INT_ENABLE(b, n) \ + ((unsigned int *) ((b) + 0x100 + 4 * (n / 32))) +#define L2VIC_INT_ENABLE_SET(b, n) \ + ((unsigned int *) ((b) + 0x200 + 4 * (n / 32))) + +int main() +{ + int ret = 0; + unsigned int irq_bit; + + /* setup the fastl2vic interface and setup an indirect mapping */ + volatile uint32_t *A = (uint32_t *)0x888e0000; + add_translation_extended(3, (void *)A, GET_FASTL2VIC_BASE(), 16, 7, 4, 0, 0, 3); + + uint32_t l2vic_base = GET_SUBSYSTEM_BASE() + 0x10000; + + /* set and verify an interrupt using the L2VIC_BASE */ + irq_bit = (1 << (66 % 32)); + *L2VIC_INT_ENABLE_SET(l2vic_base, 66) = irq_bit; + if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x4) { + ret = __LINE__; + } + + /* set and verify an interrupt using the FASTL2VIC interface */ + *A = 68; + if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x14) { + ret = __LINE__; + } + *A = 67; + if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x1C) { + ret = __LINE__; + } + + + /* Now clear the lines */ + *A = ((1 << 16) | 68); + if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0xC) { + ret = __LINE__; + } + *A = ((1 << 16) | 66); + if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x8) { + ret = __LINE__; + } + *A = ((1 << 16) | 67); + if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x0) { + ret = __LINE__; + } + + if (ret) { + printf("%s: FAIL, last failure near line %d\n", __FILE__, ret); + } else { + printf("PASS\n"); + } + return ret; +} diff --git a/tests/tcg/hexagon/system/int_range.c b/tests/tcg/hexagon/system/int_range.c new file mode 100644 index 0000000000000..688355886362e --- /dev/null +++ b/tests/tcg/hexagon/system/int_range.c @@ -0,0 +1,94 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +/* + * Test the range of the l2vic interface. + */ + + +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include "cfgtable.h" + +#define L2VIC_INT_ENABLE(b, n) \ + ((volatile unsigned int *)((b) + 0x100 + 4 * (n / 32))) /* device mem */ + +#define L2VIC_INT_ENABLE_SET(b, n) \ + ((volatile unsigned int *)((b) + 0x200 + 4 * (n / 32))) /* device mem */ + +#define L2VIC_INT_ENABLE_CLEAR(b, n) \ + ((volatile unsigned int *)((b) + 0x180 + 4 * (n / 32))) /* device mem */ + +#define L2VIC_SOFT_INT_SET(b, n) \ + ((volatile unsigned int *)((b) + 0x480 + 4 * (n / 32))) /* device mem */ + +#define L2VIC_INT_TYPE(b, n) \ + ((volatile unsigned int *)((b) + 0x280 + 4 * (n / 32))) /* device mem */ + +volatile int pass; /* must use volatile */ +int g_irq; +volatile uint32_t g_l2vic_base; /* must use volatile */ + + +/* + * When complete the irqlog will contain the value of the vid when the + * handler was active. + */ +#define INTMAX 1024 +#define LEFT_SET 666 + +int main() +{ + unsigned int irq_bit; + unsigned int left_set = 0; + int ret = 0; + + /* setup the fastl2vic interface and setup an indirect mapping */ + g_l2vic_base = GET_SUBSYSTEM_BASE() + 0x10000; + + /* Setup interrupts */ + for (int irq = 1; irq < INTMAX; irq++) { + irq_bit = (1 << (irq % 32)); + *L2VIC_INT_ENABLE(g_l2vic_base, irq) |= irq_bit; + } + + /* Read them all back and check */ + for (int irq = 1; irq < INTMAX; irq++) { + if ((*L2VIC_INT_ENABLE(g_l2vic_base, irq) & (1 << (irq % 32))) != + (1 << irq % 32)) { + printf("%d: ERROR: irq: %d: 0x%x\n", __LINE__, irq, + *L2VIC_INT_ENABLE(g_l2vic_base, irq)); + ret = 1; + } + } + /* Clear them all, except int 1 and LEFT_SET (test) */ + for (int irq = 1; irq < INTMAX; irq++) { + if (!(irq % LEFT_SET)) { + continue; + } + irq_bit = (1 << (irq % 32)); + *L2VIC_INT_ENABLE_CLEAR(g_l2vic_base, irq) |= irq_bit; + } + + /* make sure just LEFT_SET is set */ + for (int irq = 0; irq < INTMAX; irq++) { + if ((*L2VIC_INT_ENABLE(g_l2vic_base, irq) & (1 << (irq % 32))) != + (0 << irq % 32)) { + if (irq != LEFT_SET) { + printf("%d: ERROR: irq: %d: 0x%x\n", __LINE__, irq, + *L2VIC_INT_ENABLE(g_l2vic_base, irq)); + ret = 1; + } else { + left_set = irq; + } + } + } + if (left_set == LEFT_SET) { + printf("PASS\n"); + } + return ret; +} From 4314811d8ffe6b5d1c1e96b0b190b5190dc04f35 Mon Sep 17 00:00:00 2001 From: Brian Cain <bcain@quicinc.com> Date: Mon, 23 Sep 2024 21:37:36 -0700 Subject: [PATCH 114/126] target/hexagon: add utimer reg impl Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> --- target/hexagon/cpu_helper.c | 4 ++ target/hexagon/genptr.c | 9 +++++ target/hexagon/helper.h | 2 + target/hexagon/op_helper.c | 65 +++++++++++++++++++++++++++++++ tests/tcg/hexagon/Makefile.target | 2 + tests/tcg/hexagon/reg_mut.c | 6 +-- tests/tcg/hexagon/utimer.c | 50 ++++++++++++++++++++++++ 7 files changed, 135 insertions(+), 3 deletions(-) create mode 100644 tests/tcg/hexagon/utimer.c diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c index d9e19a0491433..5a651de0514f2 100644 --- a/target/hexagon/cpu_helper.c +++ b/target/hexagon/cpu_helper.c @@ -175,6 +175,8 @@ uint32_t arch_get_system_reg(CPUHexagonState *env, uint32_t reg) return reg < HEX_SREG_GLB_START ? env->t_sreg[reg] : env->g_sreg[reg]; } +#endif + uint64_t hexagon_get_sys_pcycle_count(CPUHexagonState *env) { uint64_t cycles = 0; @@ -224,6 +226,8 @@ void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t cycles) } } +#ifndef CONFIG_USER_ONLY + static void set_wait_mode(CPUHexagonState *env) { g_assert(bql_locked()); diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index f38968271b172..1dde04529bbe6 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -346,6 +346,11 @@ static inline void gen_read_ctrl_reg(DisasContext *ctx, const int reg_num, } else if (reg_num == HEX_REG_QEMU_HVX_CNT) { tcg_gen_addi_tl(dest, hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns); + } else if ((reg_num == HEX_REG_PKTCNTLO) + || (reg_num == HEX_REG_PKTCNTHI) + || (reg_num == HEX_REG_UTIMERLO) + || (reg_num == HEX_REG_UTIMERHI)) { + gen_helper_creg_read(dest, tcg_env, tcg_constant_tl(reg_num)); } else { tcg_gen_mov_tl(dest, hex_gpr[reg_num]); } @@ -374,6 +379,10 @@ static inline void gen_read_ctrl_reg_pair(DisasContext *ctx, const int reg_num, tcg_gen_addi_tl(hvx_cnt, hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns); tcg_gen_concat_i32_i64(dest, hvx_cnt, hex_gpr[reg_num + 1]); + } else if ((reg_num == HEX_REG_PKTCNTLO) + || (reg_num == HEX_REG_UTIMERLO) + || (reg_num == HEX_REG_UPCYCLELO)) { + gen_helper_creg_read_pair(dest, tcg_env, tcg_constant_i32(reg_num)); } else { tcg_gen_concat_i32_i64(dest, hex_gpr[reg_num], diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index 5bcb2f48097cf..b381e0e116b36 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -108,6 +108,8 @@ DEF_HELPER_2(probe_pkt_scalar_store_s0, void, env, int) DEF_HELPER_2(probe_hvx_stores, void, env, int) DEF_HELPER_2(probe_pkt_scalar_hvx_stores, void, env, int) +DEF_HELPER_2(creg_read, i32, env, i32) +DEF_HELPER_2(creg_read_pair, i64, env, i32) #if !defined(CONFIG_USER_ONLY) DEF_HELPER_2(swi, void, env, i32) DEF_HELPER_2(cswi, void, env, i32) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 8eacb3b041156..7a83e8975031b 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -18,6 +18,7 @@ #include "qemu/osdep.h" #include "qemu/log.h" #include "qemu/main-loop.h" +#include "qemu/timer.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/helper-proto.h" @@ -1775,6 +1776,43 @@ static void hexagon_read_timer(CPUHexagonState *env, uint32_t *low, cpu_physical_memory_read(high_addr, high, sizeof(*high)); } +static inline bool ssr_ce_enabled(CPUHexagonState *env) +{ + target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR); + return GET_SSR_FIELD(SSR_CE, ssr); +} + +static uint32_t creg_read(CPUHexagonState *env, uint32_t reg) +{ + uint32_t low, high; + switch (reg) { + case HEX_REG_UPCYCLELO: + return ssr_ce_enabled(env) ? hexagon_get_sys_pcycle_count_low(env) : 0; + case HEX_REG_UPCYCLEHI: + return ssr_ce_enabled(env) ? hexagon_get_sys_pcycle_count_high(env) : 0; + case HEX_REG_UTIMERLO: + hexagon_read_timer(env, &low, &high); + return low; + case HEX_REG_UTIMERHI: + hexagon_read_timer(env, &low, &high); + return high; + default: + return env->gpr[reg]; + } +} + +uint32_t HELPER(creg_read)(CPUHexagonState *env, uint32_t reg) +{ + return creg_read(env, reg); +} + +uint64_t HELPER(creg_read_pair)(CPUHexagonState *env, uint32_t reg) +{ + return (uint64_t)creg_read(env, reg) | + (((uint64_t)creg_read(env, reg + 1)) << 32); +} + + static inline QEMU_ALWAYS_INLINE void sreg_write(CPUHexagonState *env, uint32_t reg, uint32_t val) @@ -1939,6 +1977,33 @@ void HELPER(pending_interrupt)(CPUHexagonState *env) } #endif +#ifdef CONFIG_USER_ONLY +uint32_t HELPER(creg_read)(CPUHexagonState *env, uint32_t reg) +{ + /* These are handled directly by gen_read_ctrl_reg(). */ + g_assert(reg != HEX_REG_UPCYCLELO && reg != HEX_REG_UPCYCLEHI); + + if (reg == HEX_REG_UTIMERHI) { + return cpu_get_host_ticks() >> 32; + } else if (reg == HEX_REG_UTIMERLO) { + return extract32(cpu_get_host_ticks(), 0, 32); + } + return 0; +} + +uint64_t HELPER(creg_read_pair)(CPUHexagonState *env, uint32_t reg) +{ + if (reg == HEX_REG_UPCYCLELO) { + /* Pretend SSR[CE] is always set. */ + return hexagon_get_sys_pcycle_count(env); + } + if (reg == HEX_REG_UTIMERLO) { + return cpu_get_host_ticks(); + } + return 0; +} +#endif + /* These macros can be referenced in the generated helper functions */ #define warn(...) /* Nothing */ diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index e5182c01d8a0c..44dd927b59372 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -52,6 +52,7 @@ HEX_TESTS += hvx_misc HEX_TESTS += hvx_histogram HEX_TESTS += invalid-slots HEX_TESTS += unaligned_pc +HEX_TESTS += utimer run-and-check-exception = $(call run-test,$2,$3 2>$2.stderr; \ test $$? -eq 1 && grep -q "exception $(strip $1)" $2.stderr) @@ -109,6 +110,7 @@ preg_alias: preg_alias.c hex_test.h read_write_overlap: read_write_overlap.c hex_test.h reg_mut: reg_mut.c hex_test.h unaligned_pc: unaligned_pc.c +utimer: utimer.c hex_test.h # This test has to be compiled for the -mv67t target usr: usr.c hex_test.h diff --git a/tests/tcg/hexagon/reg_mut.c b/tests/tcg/hexagon/reg_mut.c index c5a39e55100da..45db9ae5cd157 100644 --- a/tests/tcg/hexagon/reg_mut.c +++ b/tests/tcg/hexagon/reg_mut.c @@ -77,10 +77,10 @@ static inline void write_control_registers(void) check32(result, 0x00000000); WRITE_REG_NOCLOBBER(result, "utimerlo", 0xffffffff); - check32(result, 0x00000000); + check32_ne(result, 0xffffffff); WRITE_REG_NOCLOBBER(result, "utimerhi", 0xffffffff); - check32(result, 0x00000000); + check32_ne(result, 0xffffffff); /* * PC is special. Setting it to these values @@ -107,7 +107,7 @@ static inline void write_control_register_pairs(void) check64(result, 0x0000000000000000); WRITE_REG_NOCLOBBER(result, "c31:30", 0xffffffffffffffff); - check64(result, 0x0000000000000000); + check64_ne(result, 0xffffffffffffffff); WRITE_REG_PAIR_ENCODED(result, "c9:8", (uint64_t) 0x0000000000000000, C9_8_EQ_R1_0); diff --git a/tests/tcg/hexagon/utimer.c b/tests/tcg/hexagon/utimer.c new file mode 100644 index 0000000000000..ae3bca3201920 --- /dev/null +++ b/tests/tcg/hexagon/utimer.c @@ -0,0 +1,50 @@ +/* + * Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <stdint.h> +#include <stdio.h> + +static int err; + +#include "hex_test.h" + +static uint64_t get_time() +{ + uint64_t time; + asm volatile("%0 = utimer\n\t" + : "=r"(time) + : + : + ); + return time; +} + +static uint64_t get_time_from_regs() +{ + uint32_t time_low; + uint32_t time_high; + asm volatile("%0 = utimerhi\n\t" + "%1 = utimerlo\n\t" + : "=r"(time_high), "=r"(time_low) + : + : + ); + return ((uint64_t)time_high << 32) | (uint64_t)time_low; +} + + +int main() +{ + err = 0; + + uint64_t t0 = get_time(); + check64_ne(t0, 0); + + uint64_t t1 = get_time_from_regs(); + check64_ne(t1, 0); + + puts(err ? "FAIL" : "PASS"); + return err; +} From 69b658735319834cafd5264d4c828760ee4f5612 Mon Sep 17 00:00:00 2001 From: Taylor Simpson <ltaylorsimpson@gmail.com> Date: Tue, 29 Oct 2024 11:49:47 -0600 Subject: [PATCH 115/126] Hexagon (target/hexagon) Make "info tlb" work in qemu monitor Add the #if defined (TARGET_HEXAGON) to hmp-commands-info.hx Prefix each TLB entry with the index Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com> --- hmp-commands-info.hx | 3 ++- target/hexagon/hex_mmu.c | 5 ++--- target/hexagon/meson.build | 1 + target/hexagon/monitor.c | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 4 deletions(-) create mode 100644 target/hexagon/monitor.c diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index c59cd6637b97e..ecdbdf623d11c 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -196,7 +196,8 @@ SRST ERST #if defined(TARGET_I386) || defined(TARGET_SH4) || defined(TARGET_SPARC) || \ - defined(TARGET_PPC) || defined(TARGET_XTENSA) || defined(TARGET_M68K) + defined(TARGET_PPC) || defined(TARGET_XTENSA) || defined(TARGET_M68K) || \ + defined(TARGET_HEXAGON) { .name = "tlb", .args_type = "", diff --git a/target/hexagon/hex_mmu.c b/target/hexagon/hex_mmu.c index 07ad8e9616170..8037528a2ccd3 100644 --- a/target/hexagon/hex_mmu.c +++ b/target/hexagon/hex_mmu.c @@ -143,12 +143,11 @@ static bool hex_dump_mmu_entry(FILE *f, uint64_t entry) void dump_mmu(CPUHexagonState *env) { - int i; - HexagonCPU *cpu = env_archcpu(env); - for (i = 0; i < cpu->num_tlbs; i++) { + for (uint32_t i = 0; i < cpu->num_tlbs; i++) { uint64_t entry = env->hex_tlb->entries[i]; if (GET_TLB_FIELD(entry, PTE_V)) { + qemu_printf("[%03" PRIu32 "] ", i); qemu_printf("0x%016" PRIx64 ": ", entry); uint64_t PA = hex_tlb_phys_addr(entry); uint64_t VA = hex_tlb_virt_addr(entry); diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build index d2b56b9e65e54..642c052d6e1cc 100644 --- a/target/hexagon/meson.build +++ b/target/hexagon/meson.build @@ -270,6 +270,7 @@ hexagon_softmmu_ss.add(files( 'hex_interrupts.c', 'hexswi.c', 'machine.c', + 'monitor.c', )) # diff --git a/target/hexagon/monitor.c b/target/hexagon/monitor.c new file mode 100644 index 0000000000000..534ca2abe63a8 --- /dev/null +++ b/target/hexagon/monitor.c @@ -0,0 +1,36 @@ +/* + * Copyright(c) 2022-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + +#include "qemu/osdep.h" +#include "cpu.h" +#include "cpu_bits.h" +#include "monitor/monitor.h" +#include "monitor/hmp-target.h" +#include "monitor/hmp.h" +#include "hex_mmu.h" + +const MonitorDef monitor_defs[] = { + { NULL }, +}; + +const MonitorDef *target_monitor_defs(void) +{ + return monitor_defs; +} + +void hmp_info_tlb(Monitor *mon, const QDict *qdict) +{ +#if !defined(CONFIG_USER_ONLY) + CPUArchState *env = mon_get_cpu_env(mon); + if (!env) { + monitor_printf(mon, "No CPU available\n"); + return; + } + + dump_mmu(env); +#endif +} From bd558c8d150a88bb0acad5299ae01c8e5327eebb Mon Sep 17 00:00:00 2001 From: Sid Manning <sidneym@quicinc.com> Date: Wed, 5 Mar 2025 15:16:45 -0800 Subject: [PATCH 116/126] Fix parameter order of call to load_elf_ram_sym fixup c13443215acf21a2e9be05e3bc20e784639b6985 Signed-off-by: Sid Manning <sidneym@quicinc.com> --- hw/hexagon/virt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/hexagon/virt.c b/hw/hexagon/virt.c index 13ecca466821e..b991bc94a838f 100644 --- a/hw/hexagon/virt.c +++ b/hw/hexagon/virt.c @@ -290,7 +290,7 @@ static uint64_t load_kernel(const HexagonVirtMachineState *vms) { MachineState *ms = MACHINE(vms); uint64_t entry = 0; - if (load_elf_ram_sym(ms->kernel_filename, NULL, NULL, NULL, NULL, &entry, + if (load_elf_ram_sym(ms->kernel_filename, NULL, NULL, NULL, &entry, NULL, NULL, NULL, 0, EM_HEXAGON, 0, 0, &address_space_memory, false, NULL) > 0) { return entry; From ebf5dee9facc275fab91d60d8acc5e153425d518 Mon Sep 17 00:00:00 2001 From: Marco Liebel <mliebel@quicinc.com> Date: Thu, 27 Feb 2025 10:42:02 -0800 Subject: [PATCH 117/126] target/hexagon: Add instruction definitions Signed-off-by: Marco Liebel <mliebel@quicinc.com> --- target/hexagon/imported/mmvec/ext.idef | 370 +++++++++++++++++++++++++ 1 file changed, 370 insertions(+) diff --git a/target/hexagon/imported/mmvec/ext.idef b/target/hexagon/imported/mmvec/ext.idef index 03d31f6181d75..1b7c5afb42f76 100644 --- a/target/hexagon/imported/mmvec/ext.idef +++ b/target/hexagon/imported/mmvec/ext.idef @@ -1400,6 +1400,376 @@ ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhus_acc, "Vxx32+=vmpyhus(Vu32,Vv32)"," VxxV.v[1].w[i] += fMPY16SU(fGETHALF(1, VuV.w[i]), fGETUHALF(1, VvV.uw[i]))) +/* VMPY_QF32 */ +/* multiply qf32 input, produce qf32 output*/ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32,"Vd32.qf32=vmpy(Vu32.qf32,Vv32.qf32)","Vector multiply: qf32 output from qf32 input", + fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]); + fHIDE(unfloat )v = fPARSEQF32(VvV.qf32[i]); + VdV.qf32[i] = fRNDSATQF32(u.exp+v.exp, u.sig*v.sig, 0)) + +/* VMPY_QF32_SF */ +/* multiply ieee sf input, produce qf32 output*/ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32_sf,"Vd32.qf32=vmpy(Vu32.sf,Vv32.sf)","Vector multiply: qf32 output from IEEE sf input", + fHIDE(unfloat )u = fPARSESF(VuV.sf[i]); + fHIDE(unfloat )v = fPARSESF(VvV.sf[i]); + VdV.qf32[i] = fRNDSATQF32(u.exp+v.exp, u.sig*v.sig, 0); + if(u.sign^v.sign) VdV.qf32[i] = fNEGQF32(VdV.qf32[i])) + + +/* VMPY_QF16 */ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(16,vmpy_qf16,"Vd32.qf16=vmpy(Vu32.qf16,Vv32.qf16)","Vector multiply: qf16 output from qf16 inupt", + fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]); + fHIDE(unfloat )v = fPARSEQF16(VvV.qf16[i]); + VdV.qf16[i] = fRNDSATQF16(u.exp+v.exp, u.sig*v.sig, 0)) + +/* VMPY_QF16_HF */ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(16,vmpy_qf16_hf,"Vd32.qf16=vmpy(Vu32.hf,Vv32.hf)","Vector multiply: qf16 output from ieee hf input", + fHIDE(unfloat )u = fPARSEHF(VuV.hf[i]); + fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]); + VdV.qf16[i] = fRNDSATQF16(u.exp+v.exp, u.sig*v.sig, 0); + if(u.sign^v.sign) VdV.qf16[i] = fNEGQF16(VdV.qf16[i])) + +/* VMPY_QF16_with_QF16_HF */ +/* get the magnitude of qf16 before multiply */ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(16,vmpy_qf16_mix_hf,"Vd32.qf16=vmpy(Vu32.qf16,Vv32.hf)","Vector multiply: qf16 output from mixed input of qf16 and ieee hf", + fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]); + fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]); + VdV.qf16[i] = fRNDSATQF16(u.exp+v.exp, u.sig*v.sig, 0); + if(v.sign) VdV.qf16[i] = fNEGQF16(VdV.qf16[i])) + +/* VMPY_QF32_QF16 */ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32_qf16,"Vdd32.qf32=vmpy(Vu32.qf16,Vv32.qf16)","Vector multiply: double qf32 output from qf16 input", + fHIDE(unfloat )u0 = fPARSEQF16(VuV.w[i] & 0xFFFF); + fHIDE(unfloat )u1 = fPARSEQF16((VuV.w[i]>>16) & 0xFFFF); + fHIDE(unfloat )v0 = fPARSEQF16(VvV.w[i] & 0xFFFF); + fHIDE(unfloat )v1 = fPARSEQF16((VvV.w[i]>>16) & 0xFFFF); + VddV.v[0].qf32[i] = fRNDSATQF32(u0.exp+v0.exp, u0.sig*v0.sig, 0); + VddV.v[1].qf32[i] = fRNDSATQF32(u1.exp+v1.exp, u1.sig*v1.sig, 0)) + +/* VMPY_QF32_HF */ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32_hf,"Vdd32.qf32=vmpy(Vu32.hf,Vv32.hf)","Vector multiply: double qf32 output from ieee hf input", + fHIDE(unfloat )u0 = fPARSEHF(VuV.w[i] & 0xFFFF); + fHIDE(unfloat )u1 = fPARSEHF((VuV.w[i]>>16) & 0xFFFF); + fHIDE(unfloat )v0 = fPARSEHF(VvV.w[i] & 0xFFFF); + fHIDE(unfloat )v1 = fPARSEHF((VvV.w[i]>>16) & 0xFFFF); + VddV.v[0].qf32[i] = fRNDSATQF32(u0.exp+v0.exp, u0.sig*v0.sig, 0); + VddV.v[1].qf32[i] = fRNDSATQF32(u1.exp+v1.exp, u1.sig*v1.sig, 0); + if(u0.sign^v0.sign) VddV.v[0].qf32[i] = fNEGQF32(VddV.v[0].qf32[i]); + if(u1.sign^v1.sign) VddV.v[1].qf32[i] = fNEGQF32(VddV.v[1].qf32[i])) + +/* VMPY_QF32_with_QF16_HF */ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32_mix_hf,"Vdd32.qf32=vmpy(Vu32.qf16,Vv32.hf)","Vector multiply: double qf32 output from mixed input of qf16 and ieee hf", + fHIDE(unfloat )u0 = fPARSEQF16(VuV.w[i] & 0xFFFF); + fHIDE(unfloat )u1 = fPARSEQF16((VuV.w[i]>>16) & 0xFFFF); + fHIDE(unfloat )v0 = fPARSEHF(VvV.w[i] & 0xFFFF); + fHIDE(unfloat )v1 = fPARSEHF((VvV.w[i]>>16) & 0xFFFF); + VddV.v[0].qf32[i] = fRNDSATQF32(u0.exp+v0.exp, u0.sig*v0.sig, 0); + VddV.v[1].qf32[i] = fRNDSATQF32(u1.exp+v1.exp, u1.sig*v1.sig, 0); + if(v0.sign) VddV.v[0].qf32[i] = fNEGQF32(VddV.v[0].qf32[i]); + if(v1.sign) VddV.v[1].qf32[i] = fNEGQF32(VddV.v[1].qf32[i])) + +/* VADD_QF32 */ +ITERATOR_INSN_SHIFT_SLOT(32,vadd_qf32,"Vd32.qf32=vadd(Vu32.qf32,Vv32.qf32)","Vector addition of qf32 input", + fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]); + fHIDE(unfloat )v = fPARSEQF32(VvV.qf32[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig)); + if (exp<v.exp) exp = v.exp; + } else { + exp = v.exp+((v.sig==0.0)? (-(FRAC_SF+1)):ilogb(v.sig)); + if (exp<u.exp) exp = u.exp; + } + fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp); + fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp); + fHIDE(double ) sig = sig_u + sig_v; + fHIDE(double ) sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low)) + +/* VADD_SF */ +ITERATOR_INSN_SHIFT_SLOT(32,vadd_sf,"Vd32.qf32=vadd(Vu32.sf,Vv32.sf)","Vector addition of sf input", + fHIDE(unfloat )u = fPARSESF(VuV.sf[i]); + fHIDE(unfloat )v = fPARSESF(VvV.sf[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig)); + if (exp<v.exp) exp = v.exp; + } else { + exp = v.exp+((v.sig==0.0)? (-(FRAC_SF+1)):ilogb(v.sig)); + if (exp<u.exp) exp = u.exp; + } + //printf("ARCHSIM: u.sign:%d, v.sign:%d, u.sig:%10.30f, v.sig:%10.30f\\n", u.sign, v.sign, u.sig, v.sig); + + fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp); + fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp); + + //printf("ARCHSIM: u.exp:%d, v.exp:%d, exp:%d, sig_u:%10.30f, sig_v:%10.30f\\n", u.exp, v.exp, exp, sig_u, sig_v); + + fHIDE(double sig;) + fHIDE(double sig_low;) + + if((u.sign^v.sign)==0){ + sig = sig_u + sig_v; + sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + } + else if((u.sign==0) && (v.sign==1)) + { + sig = sig_u - sig_v; + sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : sig_u-(sig_v+sig); + } + else{ + sig = sig_v - sig_u; + sig_low = (v.exp>u.exp) ? (sig_v-sig)-sig_u : sig_v-(sig_u+sig); + } + VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low); + //printf("ARCHSIM: output:%x\\n", VdV.qf32[i]); + if(u.sign && v.sign) VdV.qf32[i] = fNEGQF32(VdV.qf32[i])) + +/* VADD_QF32_MIX */ +ITERATOR_INSN_SHIFT_SLOT(32,vadd_qf32_mix,"Vd32.qf32=vadd(Vu32.qf32,Vv32.sf)","Vector addition of mixed qf32 and sf", + fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]); + fHIDE(unfloat )v = fPARSESF(VvV.sf[i]); + if(v.sign) v.sig = (-1.0)*v.sig; + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig)); + if (exp<v.exp) exp = v.exp; + } else { + exp = v.exp+((v.sig==0.0)? (-(FRAC_SF+1)):ilogb(v.sig)); + if (exp<u.exp) exp = u.exp; + } + fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp); + fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp); + fHIDE(double) sig = sig_u + sig_v; + fHIDE(double) sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low)) + +/* VSUB_QF32 */ +ITERATOR_INSN_SHIFT_SLOT(32,vsub_qf32,"Vd32.qf32=vsub(Vu32.qf32,Vv32.qf32)","Vector subtraction of qf32 input", + fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]); + fHIDE(unfloat )v = fPARSEQF32(VvV.qf32[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig)); + if (exp<v.exp) exp = v.exp; + } else { + exp = v.exp+((v.sig==0.0)? (-(FRAC_SF+1)):ilogb(v.sig)); + if (exp<u.exp) exp = u.exp; + } + fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp); + fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp); + fHIDE(double) sig = sig_u - sig_v; + fHIDE(double) sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig)); + VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low)) + +/* VSUB_SF */ +ITERATOR_INSN_SHIFT_SLOT(32,vsub_sf,"Vd32.qf32=vsub(Vu32.sf,Vv32.sf)","Vector subtraction of ieee sf input", + fHIDE(unfloat )u = fPARSESF(VuV.sf[i]); + fHIDE(unfloat )v = fPARSESF(VvV.sf[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig)); + if (exp<v.exp) exp = v.exp; + } else { + exp = v.exp+((v.sig==0.0)? (-(FRAC_SF+1)):ilogb(v.sig)); + if (exp<u.exp) exp = u.exp; + } + fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp); + fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp); + fHIDE(double sig;) + fHIDE(double sig_low;) + if((u.sign==0) && (v.sign==0)) { + sig = sig_u - sig_v; + sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig)); + } + else if(u.sign ^ v.sign){ + sig = sig_u + sig_v; + sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + } + else{ + sig = sig_v - sig_u; + sig_low = (v.exp>u.exp) ? (sig_v-sig)-sig_u : sig_v-(sig_u+sig); + } + VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low); + if((u.sign==1) && (v.sign==0)) VdV.qf32[i] = fNEGQF32(VdV.qf32[i])) + +/* VSUB_QF32_MIX */ +ITERATOR_INSN_SHIFT_SLOT(32,vsub_qf32_mix,"Vd32.qf32=vsub(Vu32.qf32,Vv32.sf)","Vector subtraction of mixed qf32 input and sf", + fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]); + fHIDE(unfloat )v = fPARSESF(VvV.sf[i]); + if(v.sign) v.sig = (-1.0)*v.sig; + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig)); + if (exp<v.exp) exp = v.exp; + } else { + exp = v.exp+((v.sig==0.0)? (-(FRAC_SF+1)):ilogb(v.sig)); + if (exp<u.exp) exp = u.exp; + } + fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp); + fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp); + fHIDE(double ) sig = sig_u - sig_v; + fHIDE(double) sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig)); + VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low)) + +/* VADD_QF16 */ +ITERATOR_INSN_SHIFT_SLOT(16,vadd_qf16,"Vd32.qf16=vadd(Vu32.qf16,Vv32.qf16)","Vector addition of qf16 input", + fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]); + fHIDE(unfloat )v = fPARSEQF16(VvV.qf16[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig)); + if (exp<v.exp) exp = v.exp; + } else { + exp = v.exp+((v.sig==0.0)? (-(FRAC_HF+1)):ilogb(v.sig)); + if (exp<u.exp) exp = u.exp; + } + fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp); + fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp); + fHIDE(double) sig = sig_u + sig_v; + fHIDE(double) sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low)) + +/* VADD_HF */ +ITERATOR_INSN_SHIFT_SLOT(16,vadd_hf,"Vd32.qf16=vadd(Vu32.hf,Vv32.hf)","Vector addition of hf input", + fHIDE(unfloat )u = fPARSEHF(VuV.hf[i]); + fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig)); + if (exp<v.exp) exp = v.exp; + } else { + exp = v.exp+((v.sig==0.0)? (-(FRAC_HF+1)):ilogb(v.sig)); + if (exp<u.exp) exp = u.exp; + } + fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp); + fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp); + + fHIDE(double sig;) + fHIDE(double sig_low;) + + if((u.sign^v.sign)==0){ + sig = sig_u + sig_v; + sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + } + else if((u.sign==0) && (v.sign==1)) + { + sig = sig_u - sig_v; + sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : sig_u-(sig_v+sig); + } + else{ + sig = sig_v - sig_u; + sig_low = (v.exp>u.exp) ? (sig_v-sig)-sig_u : sig_v-(sig_u+sig); + } + VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low); + if(u.sign && v.sign) + VdV.qf16[i] = fNEGQF16(VdV.qf16[i])) + +/* VADD_QF16_MIX */ +ITERATOR_INSN_SHIFT_SLOT(16,vadd_qf16_mix,"Vd32.qf16=vadd(Vu32.qf16,Vv32.hf)","Vector addition of mixed qf16 and hf", + fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]); + fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]); + if(v.sign) v.sig = (-1.0)*v.sig; + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig)); + if (exp<v.exp) exp = v.exp; + } else { + exp = v.exp+((v.sig==0.0)? (-(FRAC_HF+1)):ilogb(v.sig)); + if (exp<u.exp) exp = u.exp; + } + fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp); + fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp); + fHIDE(double) sig = sig_u + sig_v; + fHIDE(double) sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low)) + +/* VSUB_QF16 */ +ITERATOR_INSN_SHIFT_SLOT(16,vsub_qf16,"Vd32.qf16=vsub(Vu32.qf16,Vv32.qf16)","Vector subtraction of qf16 input", + fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]); + fHIDE(unfloat )v = fPARSEQF16(VvV.qf16[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig)); + if (exp<v.exp) exp = v.exp; + } else { + exp = v.exp+((v.sig==0.0)? (-(FRAC_HF+1)):ilogb(v.sig)); + if (exp<u.exp) exp = u.exp; + } + fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp); + fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp); + fHIDE(double) sig = sig_u - sig_v; + fHIDE(double) sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig)); + VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low)) + +/* VSUB_HF */ +ITERATOR_INSN_SHIFT_SLOT(16,vsub_hf,"Vd32.qf16=vsub(Vu32.hf,Vv32.hf)","Vector subtraction of hf input", + fHIDE(unfloat )u = fPARSEHF(VuV.hf[i]); + fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig)); + if (exp<v.exp) exp = v.exp; + } else { + exp = v.exp+((v.sig==0.0)? (-(FRAC_HF+1)):ilogb(v.sig)); + if (exp<u.exp) exp = u.exp; + } + fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp); + fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp); + fHIDE(double sig;) + fHIDE(double sig_low;) + if((u.sign==0) && (v.sign==0)) { + sig = sig_u - sig_v; + sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig)); + } + else if(u.sign ^ v.sign){ + sig = sig_u + sig_v; + sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + } + else{ + sig = sig_v - sig_u; + sig_low = (v.exp>u.exp) ? (sig_v-sig)-sig_u : sig_v-(sig_u+sig); + } + VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low); + if((u.sign==1) && (v.sign==0)) + VdV.qf16[i] = fNEGQF16(VdV.qf16[i])) + + +/* VSUB_QF16_MIXED */ +ITERATOR_INSN_SHIFT_SLOT(16,vsub_qf16_mix,"Vd32.qf16=vsub(Vu32.qf16,Vv32.hf)","Vector subtraction of mixed qf16 and hf", + fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]); + fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]); + if(v.sign) v.sig = (-1.0)*v.sig; + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig)); + if (exp<v.exp) exp = v.exp; + } else { + exp = v.exp+((v.sig==0.0)? (-(FRAC_HF+1)):ilogb(v.sig)); + if (exp<u.exp) exp = u.exp; + } + fHIDE(double ) sig_u = ldexp(u.sig, u.exp-exp); + fHIDE(double ) sig_v = ldexp(v.sig, v.exp-exp); + fHIDE(double) sig = sig_u - sig_v; + fHIDE(double) sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig)); + VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low)) + +// FP Convert QF32/W/UW to ieee SF +ITERATOR_INSN_SHIFT_SLOT(32,vconv_sf_qf32,"Vd32.sf=Vu32.qf32","Vector conversion of qf32 format to ieee SF", + fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]); + VdV.sf[i] = fRNDSATSF(u.exp, u.sig)) + +// FP Convert QF16/H/UH to ieee HF +ITERATOR_INSN_SHIFT_SLOT(16,vconv_hf_qf16,"Vd32.hf=Vu32.qf16","Vector conversion of qf16 format to ieee HF", + fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]); + VdV.hf[i] = fRNDSATHF(u.exp, u.sig)) + +// FP Convert double QF32 to two packed ieee HF in one vector +ITERATOR_INSN_SHIFT_SLOT(32,vconv_hf_qf32,"Vd32.hf=Vuu32.qf32","Vector conversion of double qf32 to ieee HF", + fHIDE(unfloat )u0 = fPARSEQF32(VuuV.v[0].qf32[i]); + fHIDE(unfloat )u1 = fPARSEQF32(VuuV.v[1].qf32[i]); + VdV.hf[2*i] = fRNDSATHF(u0.exp, u0.sig); + VdV.hf[2*i+1] = fRNDSATHF(u1.exp, u1.sig)) ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpyih,"Vd32=vmpyih(Vu32,Vv32)","Vd32.h=vmpyi(Vu32.h,Vv32.h)", From 12a283f956ddbcc6b0fa52d46b83a472e1d53459 Mon Sep 17 00:00:00 2001 From: Marco Liebel <mliebel@quicinc.com> Date: Thu, 27 Feb 2025 10:50:30 -0800 Subject: [PATCH 118/126] FIXME: target/hexagon: Add qfloat files Signed-off-by: Marco Liebel <mliebel@quicinc.com> --- target/hexagon/meson.build | 2 + target/hexagon/mmvec/kvx_ieee.c | 1460 ++++++++++++++ target/hexagon/mmvec/kvx_ieee.h | 141 ++ target/hexagon/mmvec/kvx_mac_reduce.c | 1156 +++++++++++ target/hexagon/mmvec/macros_auto.h | 221 +++ target/hexagon/mmvec/mmvec.h | 5 + target/hexagon/mmvec/mmvec_qfloat.c | 2563 +++++++++++++++++++++++++ target/hexagon/mmvec/mmvec_qfloat.h | 199 ++ target/hexagon/op_helper.c | 2 + 9 files changed, 5749 insertions(+) create mode 100644 target/hexagon/mmvec/kvx_ieee.c create mode 100644 target/hexagon/mmvec/kvx_ieee.h create mode 100644 target/hexagon/mmvec/kvx_mac_reduce.c create mode 100644 target/hexagon/mmvec/macros_auto.h create mode 100644 target/hexagon/mmvec/mmvec_qfloat.c create mode 100644 target/hexagon/mmvec/mmvec_qfloat.h diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build index 642c052d6e1cc..280b5dc58ac5b 100644 --- a/target/hexagon/meson.build +++ b/target/hexagon/meson.build @@ -263,6 +263,8 @@ hexagon_ss.add(files( 'fma_emu.c', 'mmvec/decode_ext_mmvec.c', 'mmvec/system_ext_mmvec.c', + 'mmvec/mmvec_qfloat.c', + 'mmvec/kvx_ieee.c', )) hexagon_softmmu_ss.add(files( diff --git a/target/hexagon/mmvec/kvx_ieee.c b/target/hexagon/mmvec/kvx_ieee.c new file mode 100644 index 0000000000000..3e67230f62e47 --- /dev/null +++ b/target/hexagon/mmvec/kvx_ieee.c @@ -0,0 +1,1460 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "kvx_ieee.h" +#include "kvx_mac_reduce.c" +#include "qemu/host-utils.h" + +uint32_t shiftRightJam32( uint32_t a, uint_fast16_t dist ) +{ + return + (dist < 31) ? a>>dist | ((uint32_t) (a<<(-dist & 31)) != 0) : (a != 0); +} + +uint_fast8_t countLeadingZeros16( uint16_t a ) +{ + return clz16(a); +} + +struct exp8_sig16 normSubnormalF16Sig( uint_fast16_t sig ) +{ + int_fast8_t shiftDist; + struct exp8_sig16 z; + + shiftDist = countLeadingZeros16( sig ) - 5; + z.exp = 1 - shiftDist; + z.sig = sig<<shiftDist; + return z; + +} + +uint16_t roundPackToF16( bool sign, int_fast16_t exp, uint_fast16_t sig ) +{ + bool roundNearEven; + uint_fast8_t roundIncrement, roundBits; + + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + roundNearEven = 1; + roundIncrement = 0x8; + roundBits = sig & 0xF; + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + if ( 0x1D <= (unsigned int) exp ) { + if ( exp < 0 ) { + /*---------------------------------------------------------------- + *----------------------------------------------------------------*/ + sig = shiftRightJam32( sig, -exp ); + exp = 0; + roundBits = sig & 0xF; + //if ( isTiny && roundBits ) { + // softfloat_raiseFlags( softfloat_flag_underflow ); + //} + } else if ( (0x1D < exp) || (0x8000 <= sig + roundIncrement) ) { + /*---------------------------------------------------------------- + *----------------------------------------------------------------*/ + return packToF16UI( sign, 0x1F, 0 ) - ! roundIncrement; + } + } + /*------------------------------------------------------------------------ + *------------------------------------------------------------------------*/ + sig = (sig + roundIncrement)>>4; + sig &= ~(uint_fast16_t) (! (roundBits ^ 8) & roundNearEven); + if ( ! sig ) exp = 0; + + return packToF16UI( sign, exp, sig ); + +} + + +uint32_t fp_mult_sf_sf (uint32_t op1, uint32_t op2) +{ + + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("fp_mult_sf_sf"); + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF32UI(op1) || isNaNF32UI(op2)) + return FP32_DEF_NAN; + + u_op1.ui = op1; + u_op2.ui = op2; + a = u_op1.f; + b = u_op2.f; + rslt = a*b; + u_rslt.f = rslt; + result = u_rslt.ui; + + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint32_t fp_add_sf_sf (uint32_t op1, uint32_t op2) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("fp_add_sf_sf"); + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF32UI(op1) || isNaNF32UI(op2)) + return FP32_DEF_NAN; + + u_op1.ui = op1; + u_op2.ui = op2; + a = u_op1.f; + b = u_op2.f; + rslt = a+b; + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint32_t fp_sub_sf_sf (uint32_t op1, uint32_t op2) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF32UI(op1) || isNaNF32UI(op2)) + return FP32_DEF_NAN; + + u_op1.ui = op1; + u_op2.ui = op2; + a = u_op1.f; + b = u_op2.f; + rslt = a-b; + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +//-------------------------------------------------------------- +//Function to convert FP32 to FP16 +//-------------------------------------------------------------- + +uint16_t f32_to_f16 ( uint32_t a) +{ + bool sign; + int_fast16_t exp; + uint_fast32_t frac; + uint_fast16_t frac16; + + sign = signF32UI( a ); + exp = expF32UI ( a ); + frac = fracF32UI( a ); + + // Inf and NaN case + if ( exp == 0xFF ) { + if ( frac ) { + return FP16_DEF_NAN; + } else { + return packToF16UI( sign, 0x1F, 0 ); + } + } + + /*------------------------------------------------------------------------ + frac>>9 : keeping 14 bit of precision out ot 23 bits in FP32 + (frac & 0x1FF) != 0) : setting the sticky bit required for rounding + *------------------------------------------------------------------------*/ + frac16 = frac>>9 | ((frac & 0x1FF) != 0); + + //If input was a Zero + if ( ! (exp | frac16) ) { + return packToF16UI( sign, 0, 0 ); + } + + return roundPackToF16( sign, exp - 0x71, frac16 | 0x4000 ); + +} + +//-------------------------------------------------------------- +//Function to convert FP16 to FP32 +//-------------------------------------------------------------- + +uint32_t f16_to_f32( uint16_t a ) +{ + bool sign; + int_fast8_t exp; + uint_fast16_t frac; + struct exp8_sig16 normExpSig; + + sign = signF16UI( a ); + exp = expF16UI ( a ); + frac = fracF16UI( a ); + + + if ( exp == 0x1F ) { + if ( frac ) { + return FP32_DEF_NAN; + } else { + return packToF32UI( sign, 0xFF, 0 ); + } + } + + + if ( ! exp ) { + if ( ! frac ) { + return packToF32UI( sign, 0, 0 ); + } + normExpSig = normSubnormalF16Sig( frac ); + exp = normExpSig.exp - 1; + frac = normExpSig.sig; + } + + + return packToF32UI( sign, exp + 0x70, (uint_fast32_t) frac<<13 ); + +} + +uint16_t fp_mult_hf_hf (uint16_t op1, uint16_t op2) +{ + + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result_f32; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP16_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + rslt = a*b; + u_rslt.f = rslt; + result_f32 = u_rslt.ui; + + result = f32_to_f16(result_f32); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint16_t fp_add_hf_hf (uint16_t op1, uint16_t op2) +{ + + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result_f32; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP16_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + rslt = a+b; + u_rslt.f = rslt; + result_f32 = u_rslt.ui; + + result = f32_to_f16(result_f32); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint16_t fp_sub_hf_hf (uint16_t op1, uint16_t op2) +{ + + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result_f32; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP16_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + rslt = a-b; + u_rslt.f = rslt; + result_f32 = u_rslt.ui; + + result = f32_to_f16(result_f32); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint32_t fp_mult_sf_hf (uint16_t op1, uint16_t op2) +{ + + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP32_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + rslt = a*b; + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint32_t fp_add_sf_hf (uint16_t op1, uint16_t op2) +{ + + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP32_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + rslt = a+b; + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint32_t fp_sub_sf_hf (uint16_t op1, uint16_t op2) +{ + + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP32_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + rslt = a-b; + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint32_t fp_mult_sf_bf_acc (uint16_t op1, uint16_t op2, uint32_t acc) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_acc; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + double a,b,facc,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%04x\n",op1); + printf("Debug : op2 =0x%04x\n",op2); + printf("Debug : acc =0x%08x\n",acc); + #endif + + op1_f32 = ((uint32_t)op1) << 16; + op2_f32 = ((uint32_t)op2) << 16; + + if(isNaNF32UI(op1_f32) || isNaNF32UI(op2_f32) || isNaNF32UI(acc)) + return FP32_DEF_NAN; + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + u_acc.ui = acc; + a = u_op1.f; + b = u_op2.f; + facc = u_acc.f; + //rslt = fma(a,b,facc); + rslt = (a * b) + facc; + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : facc = %f\n",facc); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%04x\n",result); + #endif + + return result; +} + +uint32_t fp_mult_sf_bf (uint16_t op1, uint16_t op2) +{ + uint32_t op1_f32; + uint32_t op2_f32; + op1_f32 = ((uint32_t)op1) << 16; + op2_f32 = ((uint32_t)op2) << 16; + return fp_mult_sf_sf(op1_f32, op2_f32); +} + +uint32_t fp_add_sf_bf (uint16_t op1, uint16_t op2) +{ + uint32_t op1_f32; + uint32_t op2_f32; + op1_f32 = ((uint32_t)op1) << 16; + op2_f32 = ((uint32_t)op2) << 16; + return fp_add_sf_sf(op1_f32, op2_f32); +} + +uint32_t fp_sub_sf_bf (uint16_t op1, uint16_t op2) +{ + uint32_t op1_f32; + uint32_t op2_f32; + op1_f32 = ((uint32_t)op1) << 16; + op2_f32 = ((uint32_t)op2) << 16; + return fp_sub_sf_sf(op1_f32, op2_f32); +} + +uint16_t f16_to_uh( uint16_t op1) +{ + union ui32_f32 u_op1; + + float a,frac; + uint32_t op1_f32; + uint16_t result; + + //converting a NaN to an integral ----> Vx4Rslt is +MAX_INT + if(isNaNF16UI(op1)) + { + result = UHW_MAX; + goto end; + } + //converting a negative floating-point value to + //unsigned integer U(h|b) ----> (Vx4Rslt is 0) + if(signF16UI(op1)) + { + result = 0x0; + goto end; + } + //converting ±Inf to an integral ----> Vx4Rslt is ±MAX_INT + if(isInfF16UI(op1)) + { + result = UHW_MAX; + goto end; + } + //out of range FP to integer ------> Vx4Rslt is ±MAX_INT + + //The default float-to-integer conversion in C does not + //round to the nearest integer, but instead truncates toward zero. + op1_f32 = f16_to_f32(op1); + u_op1.ui = op1_f32; + a = u_op1.f; + frac = a - (float)((uint16_t) a); + //round to the nearest + result = (uint16_t) (a + 0.5); + //Ties to Even + if(frac == 0.5) + { + if((result % 2)) result--; + } + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : a frac = %f\n",frac); + #endif + + end: + #ifdef DEBUG + printf("Debug : result =0x%x\n",result); + #endif + return result; +} + +int16_t f16_to_h( uint16_t op1) +{ + union ui32_f32 u_op1; + + float a,frac; + uint32_t op1_f32; + int16_t result; + + //converting a NaN to an integral ----> Vx4Rslt is +MAX_INT + if(isNaNF16UI(op1)) + { + result = HW_MAX; + goto end; + } + //converting ±Inf to an integral ----> Vx4Rslt is ±MAX_INT + if(isInfF16UI(op1)) + { + result = signF16UI(op1) ? HW_MIN : HW_MAX; + goto end; + } + + //The default float-to-integer conversion in C does not round + //to the nearest integer, but instead truncates toward zero. + op1_f32 = f16_to_f32(op1); + u_op1.ui = op1_f32; + a = u_op1.f; + + //out of range FP to integer ------> Vx4Rslt is ±MAX_INT + if(a > (float)(HW_MAX)) + { + result = HW_MAX; + goto end; + } + if(a < (float)(HW_MIN)) + { + result = HW_MIN; + goto end; + } + + frac = fabs(a - (float)((int16_t) a)); + //round to the nearest + result = (a > 0) ? ((int16_t) (a + 0.5)) : ((int16_t) (a - 0.5)); + //Ties to Even + if(frac == 0.5) + { + if((result % 2)) + { + if(a > 0) result--; + if(a < 0) result++; + } + } + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : a frac = %f\n",frac); + #endif + + end: + #ifdef DEBUG + printf("Debug : result =0x%04x\n",result); + #endif + return result; +} + +uint8_t f16_to_ub( uint16_t op1) +{ + union ui32_f32 u_op1; + + float a,frac; + uint32_t op1_f32; + uint8_t result; + + //converting a NaN to an integral ----> Vx4Rslt is +MAX_INT + if(isNaNF16UI(op1)) + { + result = UBYTE_MAX; + goto end; + } + //converting a negative floating-point value to + //unsigned integer U(h|b) ----> (Vx4Rslt is 0) + if(signF16UI(op1)) + { + result = 0x0; + goto end; + } + //converting ±Inf to an integral ----> Vx4Rslt is ±MAX_INT + if(isInfF16UI(op1)) + { + result = UBYTE_MAX; + goto end; + } + + //The default float-to-integer conversion in C does + //not round to the nearest integer, but instead truncates toward zero. + op1_f32 = f16_to_f32(op1); + u_op1.ui = op1_f32; + a = u_op1.f; + + //out of range FP to integer ------> Vx4Rslt is ±MAX_INT + if( a > (float)(UBYTE_MAX)) + { + result = UBYTE_MAX; + goto end; + } + + frac = a - (float)((uint16_t) a); + //round to the nearest + result = (uint8_t) (a + 0.5); + //Ties to Even + if(frac == 0.5) + { + if((result % 2)) + { + if(a > 0) result--; + if(a < 0) result++; + } + } + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : a frac = %f\n",frac); + #endif + + end: + #ifdef DEBUG + printf("Debug : result =0x%x\n",result); + #endif + return result; +} + +int8_t f16_to_b( uint16_t op1) +{ + union ui32_f32 u_op1; + + float a,frac; + uint32_t op1_f32; + int16_t result; + + //converting a NaN to an integral ----> Vx4Rslt is +MAX_INT + if(isNaNF16UI(op1)) + { + result = BYTE_MAX; + goto end; + } + //converting ±Inf to an integral ----> Vx4Rslt is ±MAX_INT + if(isInfF16UI(op1)) + { + result = signF16UI(op1) ? BYTE_MIN : BYTE_MAX; + goto end; + } + + //The default float-to-integer conversion in C does not + //round to the nearest integer, but instead truncates toward zero. + op1_f32 = f16_to_f32(op1); + u_op1.ui = op1_f32; + a = u_op1.f; + + //out of range FP to integer ------> Vx4Rslt is ±MAX_INT + if(a > (float)(BYTE_MAX)) + { + result = BYTE_MAX; + goto end; + } + if(a < (float)(BYTE_MIN)) + { + result = BYTE_MIN; + goto end; + } + + frac = fabs(a - (float)((int16_t) a)); + //round to the nearest + result = (a > 0) ? ((int16_t) (a + 0.5)) : ((int16_t) (a - 0.5)); + //Ties to Even + if(frac == 0.5) + { + if((result % 2)) + { + if(a > 0) result--; + if(a < 0) result++; + } + } + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : a frac = %f\n",frac); + #endif + + end: + #ifdef DEBUG + printf("Debug : result =0x%04x\n",result); + #endif + return result; +} + +uint16_t uh_to_f16(uint16_t op1) +{ + union ui32_f32 u_op1; + + float a; + uint32_t rslt; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + #endif + + a = (float) op1; + u_op1.f = a; + rslt = u_op1.ui; + result = f32_to_f16(rslt); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : rslt = 0x%08x\n",rslt); + printf("Debug : result =0x%04x\n",result); + #endif + + return result; +} + +uint16_t h_to_f16 (int16_t op1) +{ + union ui32_f32 u_op1; + + float a; + uint32_t rslt; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + #endif + + a = (float) op1; + u_op1.f = a; + rslt = u_op1.ui; + result = f32_to_f16(rslt); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : rslt = 0x%08x\n",rslt); + printf("Debug : result =0x%04x\n",result); + #endif + + return result; +} + +uint16_t ub_to_f16(uint8_t op1) +{ + union ui32_f32 u_op1; + + float a; + uint32_t rslt; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + #endif + + a = (float) op1; + u_op1.f = a; + rslt = u_op1.ui; + result = f32_to_f16(rslt); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : rslt = 0x%08x\n",rslt); + printf("Debug : result =0x%04x\n",result); + #endif + + return result; +} + +uint16_t b_to_f16 (int8_t op1) +{ + union ui32_f32 u_op1; + + float a; + uint32_t rslt; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + #endif + + a = (float) op1; + u_op1.f = a; + rslt = u_op1.ui; + result = f32_to_f16(rslt); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : rslt = 0x%08x\n",rslt); + printf("Debug : result =0x%04x\n",result); + #endif + + return result; +} + +uint16_t sf_to_bf (int32_t op1) +{ + uint32_t rslt = op1; + if((rslt & 0x1FFFF) == 0x08000){ + //break; // do not round up if exactly .5 and even already + } + else if ((rslt & 0x8000) == 0x8000){ + rslt += 0x8000; //rounding to nearest number + } + rslt = isNaNF32UI(op1) ? FP32_DEF_NAN : rslt; + uint16_t result = (rslt >> 16); + return result; +} + +uint32_t fp_vdmpy (uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l) +{ + union ui32_f32 u_op; + union ui32_f32 u_rslt; + + uint32_t op1_u_f32, op1_l_f32, op2_u_f32, op2_l_f32; + float f_op1_u, f_op1_l, f_op2_u, f_op2_l; + double f_prod_l, f_prod_u, rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1_u =0x%04x\n",op1_u); + printf("Debug : op1_l =0x%04x\n",op1_l); + printf("Debug : op2_u =0x%04x\n",op2_u); + printf("Debug : op2_l =0x%04x\n",op2_l); + #endif + + if(isNaNF16UI(op1_u) || isNaNF16UI(op1_l) || isNaNF16UI(op2_u) || + isNaNF16UI(op2_l)) + { result = FP32_DEF_NAN; + goto end; + } + + op1_u_f32 = f16_to_f32(op1_u); + op1_l_f32 = f16_to_f32(op1_l); + op2_u_f32 = f16_to_f32(op2_u); + op2_l_f32 = f16_to_f32(op2_l); + + u_op.ui = op1_u_f32; + f_op1_u = u_op.f; + + u_op.ui = op1_l_f32; + f_op1_l = u_op.f; + + u_op.ui = op2_l_f32; + f_op2_l = u_op.f; + + u_op.ui = op2_u_f32; + f_op2_u = u_op.f; + + f_prod_l = f_op1_l * f_op2_l; + f_prod_u = f_op1_u * f_op2_u; + rslt = f_prod_u + f_prod_l; + + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : f_op1_u = %f\n",f_op1_u); + printf("Debug : f_op1_l = %f\n",f_op1_l); + printf("Debug : f_op2_u = %f\n",f_op2_u); + printf("Debug : f_op2_l = %f\n",f_op2_l); + printf("Debug : f_prod_l = %f\n",f_prod_l); + printf("Debug : f_prod_u = %f\n",f_prod_u); + printf("Debug : rslt = %f\n",rslt); + #endif + +end: + #ifdef DEBUG + printf("Debug : result =0x%08x\n",result); + #endif + return result; +} + +uint32_t fp_vdmpy_acc_dumb (uint32_t acc,uint16_t op1_u,uint16_t op1_l, + uint16_t op2_u,uint16_t op2_l) +{ + union ui32_f32 u_op; + union ui32_f32 u_acc; + union ui32_f32 u_rslt; + + uint32_t op1_u_f32, op1_l_f32, op2_u_f32, op2_l_f32; + float f_op1_u, f_op1_l, f_op2_u, f_op2_l, f_acc; + long double f_prod_l, f_prod_u, rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1_u =0x%04x\n",op1_u); + printf("Debug : op1_l =0x%04x\n",op1_l); + printf("Debug : op2_u =0x%04x\n",op2_u); + printf("Debug : op2_l =0x%04x\n",op2_l); + printf("Debug : acc =0x%08x\n",acc); + #endif + + op1_u_f32 = f16_to_f32(op1_u); + op1_l_f32 = f16_to_f32(op1_l); + op2_u_f32 = f16_to_f32(op2_u); + op2_l_f32 = f16_to_f32(op2_l); + + u_op.ui = op1_u_f32; + f_op1_u = u_op.f; + + u_op.ui = op1_l_f32; + f_op1_l = u_op.f; + + u_op.ui = op2_l_f32; + f_op2_l = u_op.f; + + u_op.ui = op2_u_f32; + f_op2_u = u_op.f; + + u_acc.ui = acc; + f_acc = u_acc.f; + + f_prod_l = (long double)(f_op1_l * f_op2_l); + f_prod_u = (long double)(f_op1_u * f_op2_u); + rslt = (long double)((long double)f_acc + f_prod_u + f_prod_l); + + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : f_op1_u = %f\n",f_op1_u); + printf("Debug : f_op1_l = %f\n",f_op1_l); + printf("Debug : f_op2_u = %f\n",f_op2_u); + printf("Debug : f_op2_l = %f\n",f_op2_l); + printf("Debug : f_acc = %f\n",f_acc); + printf("Debug : f_prod_l = %Lf\n",f_prod_l); + printf("Debug : f_prod_u = %Lf\n",f_prod_u); + printf("Debug : rslt = %Lf\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint16_t fp_min_hf(uint16_t op1,uint16_t op2) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result_f32; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP16_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + + rslt = (a>b) ? b : a; + // +0 is evaluated equal to -0 in C. Handeling that case separatly + if( (fabs(a) == 0.0f) && (fabs(b) == 0.0f) && (signF16UI(op1) != + signF16UI(op2)) ) + { + rslt = signF16UI(op1) ? a : b; + } + u_rslt.f = rslt; + result_f32 = u_rslt.ui; + + result = f32_to_f16(result_f32); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; + +} + +uint32_t fp_min_sf(uint32_t op1,uint32_t op2) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF32UI(op1) || isNaNF32UI(op2)) + return FP32_DEF_NAN; + + u_op1.ui = op1; + u_op2.ui = op2; + a = u_op1.f; + b = u_op2.f; + rslt = (a>b) ? b : a; + // +0 is evaluated equal to -0 in C. Handeling that case separatly + if( (fabs(a) == 0.0f) && (fabs(b) == 0.0f) && + (signF32UI(op1) != signF32UI(op2)) ) + { + rslt = signF32UI(op1) ? a : b; + } + u_rslt.f = rslt; + result = u_rslt.ui; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint16_t fp_min_bf(uint16_t op1,uint16_t op2) +{ + uint32_t op1_f32; + uint32_t op2_f32; + + uint32_t result_f32; + uint16_t result; + + op1_f32 = ((uint32_t)op1) << 16; + op2_f32 = ((uint32_t)op2) << 16; + + result_f32 = fp_min_sf(op1_f32, op2_f32); + result_f32 = result_f32 >> 16; + result = result_f32 & 0xFFFF; + return result; +} + + +uint16_t fp_max_hf(uint16_t op1,uint16_t op2) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result_f32; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP16_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + + rslt = (a>b) ? a : b; + // +0 is evaluated equal to -0 in C. Handeling that case separatly + if( (fabs(a) == 0.0f) && + (fabs(b) == 0.0f) && (signF16UI(op1) != signF16UI(op2)) ) + { + rslt = signF16UI(op1) ? b : a; + } + u_rslt.f = rslt; + result_f32 = u_rslt.ui; + + result = f32_to_f16(result_f32); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; + +} + +uint32_t fp_max_sf(uint32_t op1,uint32_t op2) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF32UI(op1) || isNaNF32UI(op2)) + return FP32_DEF_NAN; + + u_op1.ui = op1; + u_op2.ui = op2; + a = u_op1.f; + b = u_op2.f; + rslt = (a>b) ? a : b; + // +0 is evaluated equal to -0 in C. Handeling that case separatly + if( (fabs(a) == 0.0f) && (fabs(b) == 0.0f) && + (signF32UI(op1) != signF32UI(op2)) ) + { + rslt = signF32UI(op1) ? b : a; + } + u_rslt.f = rslt; + result = u_rslt.ui; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint16_t fp_max_bf(uint16_t op1,uint16_t op2) +{ + uint32_t op1_f32; + uint32_t op2_f32; + + uint32_t result_f32; + uint16_t result; + + op1_f32 = ((uint32_t)op1) << 16; + op2_f32 = ((uint32_t)op2) << 16; + + result_f32 = fp_max_sf(op1_f32, op2_f32); + result_f32 = result_f32 >> 16; + result = result_f32 & 0xFFFF; + return result; +} + +uint16_t fp_abs_bf(uint16_t op1) +{ + union ui32_f32 u_op1; + + float result_f; + uint32_t result_f32; + uint16_t result; + + u_op1.ui = ((uint32_t)op1) << 16; + + result_f = fabs(u_op1.f); + u_op1.f = result_f; + result_f32 = u_op1.ui >> 16; + result = result_f32 & 0xFFFF; + return result; +} + +uint16_t fp_neg_bf(uint16_t op1) +{ + union ui32_f32 u_op1; + + float result_f; + uint32_t result_f32; + uint16_t result; + + u_op1.ui = ((uint32_t)op1) << 16; + + result_f = -(u_op1.f); + u_op1.f = result_f; + result_f32 = u_op1.ui >> 16; + result = result_f32 & 0xFFFF; + return result; +} + +//float fmaf( float x, float y, float z ); +uint16_t fp_mult_hf_hf_acc_dumb (uint16_t op1, uint16_t op2, uint16_t acc) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_acc; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + uint32_t acc_f32; + + float a,b,facc,rslt; + uint32_t result_f32; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%04x\n",op1); + printf("Debug : op2 =0x%04x\n",op2); + printf("Debug : acc =0x%04x\n",acc); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2) || isNaNF16UI(acc)) + return FP16_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + acc_f32 = f16_to_f32(acc); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + u_acc.ui = acc_f32; + a = u_op1.f; + b = u_op2.f; + facc = u_acc.f; + //rslt = fma(a,b,facc); + rslt = (a * b) + facc; + u_rslt.f = rslt; + result_f32 = u_rslt.ui; + + result = f32_to_f16(result_f32); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : facc = %f\n",facc); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%04x\n",result); + #endif + + return result; +} + +uint32_t fp_mult_sf_hf_acc (uint16_t op1, uint16_t op2, uint32_t acc) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_acc; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,facc,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%04x\n",op1); + printf("Debug : op2 =0x%04x\n",op2); + printf("Debug : acc =0x%08x\n",acc); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2) || isNaNF32UI(acc)) + return FP32_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + u_acc.ui = acc; + a = u_op1.f; + b = u_op2.f; + facc = u_acc.f; + //rslt = fma(a,b,facc); + rslt = (a * b) + facc; + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : facc = %f\n",facc); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%04x\n",result); + #endif + + return result; +} diff --git a/target/hexagon/mmvec/kvx_ieee.h b/target/hexagon/mmvec/kvx_ieee.h new file mode 100644 index 0000000000000..ad80b70239254 --- /dev/null +++ b/target/hexagon/mmvec/kvx_ieee.h @@ -0,0 +1,141 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef KVX_COMPACT_H +#define KVX_COMPACT_H 1 + +#include <math.h> +#include "hex_arch_types.h" + +//Double precision +#define signF64UI( a ) ((bool) ((uint64_t) (a)>>63)) +#define expF64UI( a ) ((int_fast16_t) ((a)>>52) & 0x7FF) +#define fracF64UI( a ) ((a) & UINT64_C( 0x000FFFFFFFFFFFFF )) +#define packToF64UI( sign, exp, sig ) ((uint64_t) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<52) + (sig))) +#define isNaNF64UI( a ) (((~(a) & UINT64_C( 0x7FF0000000000000 )) == 0) && ((a) & UINT64_C( 0x000FFFFFFFFFFFFF ))) + +//SF defines +#define FP32_DEF_NAN 0x7FFFFFFF +#define isNaNF32UI( a ) (((~(a) & 0x7F800000) == 0) && ((a) & 0x007FFFFF)) +#define isInfF32UI( a ) (((~(a) & 0x7F800000) == 0) && (((a) & 0x007FFFFF) == 0)) +#define signF32UI( a ) ((bool) ((uint32_t) (a)>>31)) +#define expF32UI( a ) ((int_fast16_t) ((a)>>23) & 0xFF) +#define fracF32UI( a ) ((a) & 0x007FFFFF) +#define packToF32UI( sign, exp, sig ) (((uint32_t) (sign)<<31) + ((uint32_t) (exp)<<23) + (sig)) + +//HF defines +#define FP16_DEF_NAN 0x7FFF +#define isNaNF16UI( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF)) +#define isInfF16UI( a ) (((~(a) & 0x7C00) == 0) && (((a) & 0x03FF) == 0)) +#define signF16UI( a ) ((bool) ((uint16_t) (a)>>15)) +#define expF16UI( a ) ((int_fast8_t) ((a)>>10) & 0x1F) +#define fracF16UI( a ) ((a) & 0x03FF) +#define packToF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<10) + (sig)) + +#define UHW_MIN 0 +#define UHW_MAX 65535 +#define HW_MIN -32768 +#define HW_MAX 32767 + +#define UBYTE_MIN 0 +#define UBYTE_MAX 255 +#define BYTE_MIN -128 +#define BYTE_MAX 127 + +//union ui16_f16 { uint16_t ui; float16_t f; }; +union ui32_f32 { uint32_t ui; float f; }; +union ui64_f64 { uint64_t ui; double f; }; +struct exp8_sig16 { int_fast8_t exp; uint_fast16_t sig; }; + +uint32_t shiftRightJam32( uint32_t a, uint_fast16_t dist ); +uint_fast8_t countLeadingZeros16( uint16_t a ); +struct exp8_sig16 normSubnormalF16Sig( uint_fast16_t sig ); +uint16_t roundPackToF16( bool sign, int_fast16_t exp, uint_fast16_t sig ); + +//-------------------------------------------------------------------------- +// IEEE - FP Convert instructions +//-------------------------------------------------------------------------- +uint16_t f32_to_f16 ( uint32_t a); +uint32_t f16_to_f32( uint16_t a ); + +uint16_t f16_to_uh( uint16_t op1); +int16_t f16_to_h ( uint16_t op1); +uint8_t f16_to_ub( uint16_t op1); +int8_t f16_to_b ( uint16_t op1); + +uint16_t uh_to_f16(uint16_t op1); +uint16_t h_to_f16 (int16_t op1); +uint16_t ub_to_f16(uint8_t op1); +uint16_t b_to_f16 (int8_t op1); + +uint16_t sf_to_bf (int32_t op1); + +//-------------------------------------------------------------------------- +// IEEE - FP ADD/SUB/MPY instructions +//-------------------------------------------------------------------------- + +//size4s_t fp_mult(size4s_t input_1, size4s_t input_2); +uint32_t fp_mult_sf_sf (uint32_t op1, uint32_t op2); +uint32_t fp_add_sf_sf (uint32_t op1, uint32_t op2); +uint32_t fp_sub_sf_sf (uint32_t op1, uint32_t op2); + +uint16_t fp_mult_hf_hf (uint16_t op1, uint16_t op2); +uint16_t fp_add_hf_hf (uint16_t op1, uint16_t op2); +uint16_t fp_sub_hf_hf (uint16_t op1, uint16_t op2); + +uint32_t fp_mult_sf_hf (uint16_t op1, uint16_t op2); +uint32_t fp_add_sf_hf (uint16_t op1, uint16_t op2); +uint32_t fp_sub_sf_hf (uint16_t op1, uint16_t op2); + +uint32_t fp_mult_sf_bf (uint16_t op1, uint16_t op2); +uint32_t fp_add_sf_bf (uint16_t op1, uint16_t op2); +uint32_t fp_sub_sf_bf (uint16_t op1, uint16_t op2); + +//-------------------------------------------------------------------------- +// IEEE - FP Accumulate instructions +//-------------------------------------------------------------------------- + +uint16_t fp_mult_hf_hf_acc (uint16_t op1, uint16_t op2, uint16_t acc); +uint32_t fp_mult_sf_bf_acc (uint16_t op1, uint16_t op2, uint32_t acc); +uint32_t fp_mult_sf_hf_acc (uint16_t op1, uint16_t op2, uint32_t acc); + +//-------------------------------------------------------------------------- +// IEEE - FP Reduce instructions +//-------------------------------------------------------------------------- + +uint32_t fp_vdmpy (uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l); +uint32_t fp_vdmpy_acc (uint32_t acc,uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l); + +//-------------------------------------------------------------------------- +// IEEE - FP Select instructions +//-------------------------------------------------------------------------- + +uint16_t fp_min_hf(uint16_t op1,uint16_t op2); +uint16_t fp_max_hf(uint16_t op1,uint16_t op2); +uint32_t fp_min_sf(uint32_t op1,uint32_t op2); +uint32_t fp_max_sf(uint32_t op1,uint32_t op2); +uint16_t fp_min_bf(uint16_t op1,uint16_t op2); +uint16_t fp_max_bf(uint16_t op1,uint16_t op2); +uint16_t fp_abs_bf(uint16_t op1); +uint16_t fp_neg_bf(uint16_t op1); + +//-------------------------------------------------------------------------- +// IEEE - FP Experiment Implementations +//-------------------------------------------------------------------------- +uint16_t fp_mult_hf_hf_acc_dumb (uint16_t op1, uint16_t op2, uint16_t acc); +uint32_t fp_vdmpy_acc_dumb (uint32_t acc,uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l); +#endif diff --git a/target/hexagon/mmvec/kvx_mac_reduce.c b/target/hexagon/mmvec/kvx_mac_reduce.c new file mode 100644 index 0000000000000..e11e41ae5891a --- /dev/null +++ b/target/hexagon/mmvec/kvx_mac_reduce.c @@ -0,0 +1,1156 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "kvx_ieee.h" + +#define DF_MANTBITS() 52 +#define SF_MANTBITS() 23 +#define HF_MANTBITS() 10 + +#define DF_INF_EXP 0x7ff +#define DF_BIAS 1023 + +#define SF_INF_EXP 0xff +#define SF_BIAS 127 + +#define HF_INF_EXP 0x1f +#define HF_BIAS 15 + +#define WAY_BIG_EXP 4096 + +#define isz(X) (fabs(X) == 0.0f) + + +typedef union { + double f; + size8u_t i; +#ifndef SLOWLARIS + struct { + size8u_t mant:52; + size8u_t exp:11; + size8u_t sign:1; + } x; +#else + struct { + size8u_t sign:1; + size8u_t exp:11; + size8u_t mant:52; + } x; +#endif +} df_t; + +typedef union { + float f; + size4u_t i; +#ifndef SLOWLARIS + struct { + size4u_t mant:23; + size4u_t exp:8; + size4u_t sign:1; + } x; +#else + struct { + size4u_t sign:1; + size4u_t exp:8; + size4u_t mant:23; + } x; +#endif +} sf_t; + +typedef struct { + union { + size8u_t low; + struct { +#ifndef SLOWLARIS + size4u_t w0; + size4u_t w1; +#else + size4u_t w1; + size4u_t w0; +#endif + }; + }; + union { + size8u_t high; + struct { +#ifndef SLOWLARIS + size4u_t w2; + size4u_t w3; +#else + size4u_t w3; + size4u_t w2; +#endif + }; + }; +} int128_t; + +typedef struct { + int128_t mant; + size4s_t exp; + size1u_t sign; + size1u_t guard; + size1u_t round; + size1u_t sticky; +} xf_t; + +static inline void xf_init(xf_t * p) +{ + p->mant.low = 0; + p->mant.high = 0; + p->exp = 0; + p->sign = 0; + p->guard = 0; + p->round = 0; + p->sticky = 0; +} + +size8u_t df_getmant_kvx(df_t a); +size8u_t df_getmant_kvx(df_t a) +{ + //int class = fpclassify(a.f); + //switch (class) { + //case FP_NORMAL: + return (a.x.mant | 1ULL << 52); + //case FP_ZERO: + // return 0; + //case FP_SUBNORMAL: + // return a.x.mant; + //default: + // return -1; + //}; +} + +size4s_t df_getexp_kvx(df_t a); +size4s_t df_getexp_kvx(df_t a) +{ + //int class = fpclassify(a.f); + //switch (class) { + //case FP_NORMAL: + return a.x.exp; + //case FP_SUBNORMAL: + // return a.x.exp + 1; + //default: + // return -1; + //}; +} + +size8u_t sf_getmant_kvx(sf_t a); +size8u_t sf_getmant_kvx(sf_t a) +{ + //case FP_ZERO: + if((a.x.mant == 0) && (a.x.exp == 0)) + return 0; + //case FP_SUBNORMAL: + else if((a.x.mant != 0) && (a.x.exp == 0)) + return a.x.mant; + //case FP_NORMAL: + else if((a.x.exp != 0xFF) && (a.x.exp != 0)) + return (a.x.mant | 1ULL << 23); + //default: + else + return -1; +} + +size4s_t sf_getexp_kvx(sf_t a); +size4s_t sf_getexp_kvx(sf_t a) +{ + //case FP_SUBNORMAL: + if((a.x.mant != 0) && (a.x.exp == 0)) + return a.x.exp + 1; + //case FP_NORMAL: + else if((a.x.exp != 0xFF) && (a.x.exp != 0)) + return a.x.exp; + //default: + else + return -1; +} + +static inline void xf_debug(const char *msg, xf_t a) +{ +#ifdef DEBUG + printf("%s %c0x%016llx_%016llx /%d/%d/%d p%d\n", msg, + a.sign ? '-' : '+', a.mant.high, a.mant.low, a.guard, + a.round, a.sticky, a.exp); +#endif +} + +static inline int128_t int128_shl(int128_t a, size4u_t amt) +{ + int128_t ret; + if (amt == 0) + return a; + if (amt > 128) { + ret.high = 0; + ret.low = 0; + return ret; + } + if (amt >= 64) { + amt -= 64; + a.high = a.low; + a.low = 0; + } + ret.high = a.high << amt; + ret.high |= (a.low >> (64 - amt)); + ret.low = a.low << amt; + return ret; +} + +static inline int128_t int128_shr(int128_t a, size4u_t amt) +{ + int128_t ret; + if (amt == 0) + return a; + if (amt > 128) { + ret.high = 0; + ret.low = 0; + return ret; + } + if (amt >= 64) { + amt -= 64; + a.low = a.high; + a.high = 0; + } + ret.low = a.low >> amt; + ret.low |= (a.high << (64 - amt)); + ret.high = a.high >> amt; + return ret; +} + + +#define int128_gt kvx_int128_gt +static inline int kvx_int128_gt(int128_t a, int128_t b) +{ + if (a.high == b.high) + return (a.low > b.low); + return (a.high > b.high); +} + +static inline xf_t xf_norm_left(xf_t a) +{ + a.exp--; + a.mant = int128_shl(a.mant, 1); + a.mant.low |= a.guard; + a.guard = a.round; + a.round = a.sticky; + return a; +} + +static inline xf_t xf_norm_right(xf_t a, int amt) +{ + if (amt > 130) { + a.sticky |= + a.round | a.guard | (a.mant.low != 0) | (a.mant.high != 0); + a.guard = a.round = a.mant.high = a.mant.low = 0; + a.exp += amt; + return a; + + } + while (amt >= 64) { + a.sticky |= a.round | a.guard | (a.mant.low != 0); + a.guard = (a.mant.low >> 63) & 1; + a.round = (a.mant.low >> 62) & 1; + a.mant.low = a.mant.high; + a.mant.high = 0; + a.exp += 64; + amt -= 64; + } + while (amt > 0) { + a.exp++; + a.sticky |= a.round; + a.round = a.guard; + a.guard = a.mant.low & 1; + a.mant = int128_shr(a.mant, 1); + amt--; + } + return a; +} + +#define int128_add kvx_int128_add +static inline int128_t kvx_int128_add(int128_t a, int128_t b) +{ + int128_t ret; + ret.low = a.low + b.low; + if ((ret.low < a.low) || (ret.low < b.low)) { + /* carry into high part */ + a.high += 1; + } + ret.high = a.high + b.high; + return ret; +} + +#define int128_sub kvx_int128_sub +static inline int128_t kvx_int128_sub(int128_t a, int128_t b, int borrow) +{ + int128_t ret; + ret.low = a.low - b.low; + if (ret.low > a.low) { + /* borrow into high part */ + a.high -= 1; + } + ret.high = a.high - b.high; + if (borrow == 0) { + return ret; + } else { + a.high = 0; + a.low = 1; + return int128_sub(ret, a, 0); + } +} + +/* Return an infinity with the same sign as a */ +static inline df_t infinite_df_t(xf_t a) +{ + df_t ret; + ret.x.sign = a.sign; + ret.x.exp = DF_INF_EXP; + ret.x.mant = 0ULL; + return ret; +} + +/* Return a maximum finite value with the same sign as a */ +static inline df_t maxfinite_df_t(xf_t a) +{ + df_t ret; + ret.x.sign = a.sign; + ret.x.exp = DF_INF_EXP - 1; + ret.x.mant = 0x000fffffffffffffULL; + return ret; +} + +static inline df_t f2df_t(double in) +{ + df_t ret; + ret.f = in; + return ret; +} + +/* Return an infinity with the same sign as a */ +static inline sf_t infinite_sf_t(xf_t a) +{ + sf_t ret; + ret.x.sign = a.sign; + ret.x.exp = SF_INF_EXP; + ret.x.mant = 0ULL; + return ret; +} + +/* Return a maximum finite value with the same sign as a */ +static inline sf_t maxfinite_sf_t(xf_t a) +{ + sf_t ret; + ret.x.sign = a.sign; + ret.x.exp = SF_INF_EXP - 1; + ret.x.mant = 0x007fffffUL; + return ret; +} + +static inline sf_t f2sf_t(float in) +{ + sf_t ret; + ret.f = in; + return ret; +} + +#define GEN_XF_ROUND(TYPE,MANTBITS,INF_EXP) \ +TYPE xf_round_kvx_##TYPE(xf_t a); \ +TYPE xf_round_kvx_##TYPE(xf_t a) \ +{ \ + TYPE ret; \ + ret.i = 0; \ + ret.x.sign = a.sign; \ + if ((a.mant.high == 0) && (a.mant.low == 0) \ + && ((a.guard | a.round | a.sticky) == 0)) { \ + /* result zero */ \ + /*switch (fegetround()) { */\ + /*case FE_DOWNWARD: */\ + /* return f2##TYPE(-0.0); */\ + /*default: */\ + if(a.sign) return f2##TYPE(-0.0); \ + else return f2##TYPE(0.0); \ + /*} */\ + } \ + /* Normalize right */ \ + /* We want MANTBITS bits of mantissa plus the leading one. */ \ + /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \ + /* So we need to normalize right while the high word is non-zero and \ + * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \ + xf_debug("input: ", a); \ + while ((a.mant.high != 0) || ((a.mant.low >> (MANTBITS+1)) != 0)) { \ + a = xf_norm_right(a, 1); \ + } \ + xf_debug("norm_right: ", a); \ + /* OK, now normalize left */ \ + /* We want to normalize left until we have a leading one in bit 24 */ \ + /* Theoretically, we only need to shift a maximum of one to the left if we \ + * shifted out lots of bits from B, or if we had no shift / 1 shift sticky shoudl be 0 \ + */ \ + while ((a.mant.low & (1ULL << MANTBITS)) == 0) { \ + a = xf_norm_left(a); \ + } \ + xf_debug("norm_left: ", a); \ + /* OK, now we might need to denormalize because of potential underflow. We need \ + * to do this before rounding, and rounding might make us normal again */ \ + while (a.exp <= 0) { \ + a = xf_norm_right(a, 1 - a.exp); \ + /* Do we have underflow? That's when we get an inexact answer because we \ + * ran out of bits in a denormal. */ \ + if (a.guard || a.round || a.sticky) { \ + /*feraiseexcept(FE_UNDERFLOW);*/ \ + } \ + } \ + xf_debug("norm_denorm: ", a); \ + /* OK, we're relatively canonical... now we need to round */ \ + if (a.guard || a.round || a.sticky) { \ + /*feraiseexcept(FE_INEXACT);*/ \ + /*switch (fegetround()) { */\ + /*case FE_TOWARDZERO: */\ + /* Chop and we're done */ \ + /* break; */\ + /*case FE_UPWARD: */\ + /* if (a.sign == 0) a.mant.low += 1; */\ + /* break; */\ + /*case FE_DOWNWARD: */\ + /* if (a.sign != 0) a.mant.low += 1; */\ + /* break; */\ + /*default: */\ + if (a.round || a.sticky) { \ + /* round up if guard is 1, down if guard is zero */ \ + a.mant.low += a.guard; \ + } else if (a.guard) { \ + /* exactly .5, round up if odd */ \ + a.mant.low += (a.mant.low & 1); \ + } \ + /*break; */\ + /*}*/ \ + } \ + xf_debug("post_round: ", a); \ + /* OK, now we might have carried all the way up. So we might need to shr once */ \ + /* at least we know that the lsb should be zero if we rounded and got a carry out... */ \ + if ((a.mant.low >> (MANTBITS+1)) != 0) { \ + a = xf_norm_right(a, 1); \ + } \ + xf_debug("once_norm_right: ", a); \ + /* Overflow? */ \ + if (a.exp >= INF_EXP) { \ + /* Yep, inf result */ \ + xf_debug("inf: ", a); \ + /*feraiseexcept(FE_OVERFLOW);*/ \ + /*feraiseexcept(FE_INEXACT);*/ \ + /*switch (fegetround()) { */\ + /*case FE_TOWARDZERO: */\ + /* return maxfinite_##TYPE(a); */\ + /*case FE_UPWARD: */\ + /* if (a.sign == 0) */\ + /* return infinite_##TYPE(a); */\ + /* else */\ + /* return maxfinite_##TYPE(a); */\ + /*case FE_DOWNWARD: */\ + /* if (a.sign != 0) */\ + /* return infinite_##TYPE(a); */\ + /* else */\ + /* return maxfinite_##TYPE(a); */\ + /*default: */\ + return infinite_##TYPE(a); \ + /*} */\ + } \ + /* Underflow? */ \ + if (a.mant.low & (1ULL << MANTBITS)) { \ + /* Leading one means: No, we're normal. So, we should be done... */ \ + xf_debug("norm: ", a); \ + ret.x.exp = a.exp; \ + ret.x.mant = a.mant.low; \ + return ret; \ + } \ + xf_debug("denorm: ", a); \ + if (a.exp != 1) \ + /*printf("a.exp == %d\n", a.exp);*/ \ + assert(a.exp == 1); \ + ret.x.exp = 0; \ + ret.x.mant = a.mant.low; \ + return ret; \ +} + +#define GEN_HF_ROUND(TYPE,MANTBITS,INF_EXP) \ +TYPE hf_round_##TYPE(xf_t a); \ +TYPE hf_round_##TYPE(xf_t a) \ +{ \ + TYPE ret; \ + ret.i = 0; \ + ret.x.sign = a.sign; \ + if ((a.mant.high == 0) && (a.mant.low == 0) \ + && ((a.guard | a.round | a.sticky) == 0)) { \ + /* result zero */ \ + /*switch (fegetround()) { */\ + /*case FE_DOWNWARD: */\ + /* return f2##TYPE(-0.0); */\ + /*default: */\ + if(a.sign) return f2##TYPE(-0.0); \ + else return f2##TYPE(0.0); \ + /*} */\ + } \ + /* Normalize right */ \ + /* We want MANTBITS bits of mantissa plus the leading one. */ \ + /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \ + /* So we need to normalize right while the high word is non-zero and \ + * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \ + xf_debug("input: ", a); \ + while ((a.mant.high != 0) || ((a.mant.low >> (MANTBITS+1)) != 0)) { \ + a = xf_norm_right(a, 1); \ + } \ + xf_debug("norm_right: ", a); \ + /* OK, now normalize left */ \ + /* We want to normalize left until we have a leading one in bit 24 */ \ + /* Theoretically, we only need to shift a maximum of one to the left if we \ + * shifted out lots of bits from B, or if we had no shift / 1 shift sticky shoudl be 0 \ + */ \ + while ((a.mant.low & (1ULL << MANTBITS)) == 0) { \ + a = xf_norm_left(a); \ + } \ + xf_debug("norm_left: ", a); \ + /* OK, now we might need to denormalize because of potential underflow. We need \ + * to do this before rounding, and rounding might make us normal again */ \ + while (a.exp <= 0) { \ + a = xf_norm_right(a, 1 - a.exp); \ + /* Do we have underflow? That's when we get an inexact answer because we \ + * ran out of bits in a denormal. */ \ + if (a.guard || a.round || a.sticky) { \ + /*feraiseexcept(FE_UNDERFLOW);*/ \ + } \ + } \ + xf_debug("norm_denorm: ", a); \ + /* OK, we're relatively canonical... now we need to round */ \ + /*if (a.guard || a.round || a.sticky) { */\ + /*feraiseexcept(FE_INEXACT);*/ \ + /*switch (fegetround()) { */\ + /*case FE_TOWARDZERO: */\ + /* Chop and we're done */ \ + /* break; */\ + /*case FE_UPWARD: */\ + /* if (a.sign == 0) a.mant.low += 1; */\ + /* break; */\ + /*case FE_DOWNWARD: */\ + /* if (a.sign != 0) a.mant.low += 1; */\ + /* break; */\ + /*default: */\ + if (a.round || a.sticky || a.guard) { \ + /* round up if guard is 1, down if guard is zero */ \ + if ((a.mant.low & 0xFFF) == 0) a.mant.low += 1; \ + /* } else if (a.guard) {*/ \ + /* exactly .5, round up if odd */ \ + /* a.mant.low += (a.mant.low & 1); */\ + } \ + /*break; */\ + /*}*/ \ + /*} */\ + xf_debug("post_round: ", a); \ + /* OK, now we might have carried all the way up. So we might need to shr once */ \ + /* at least we know that the lsb should be zero if we rounded and got a carry out... */ \ + if ((a.mant.low >> (MANTBITS+1)) != 0) { \ + a = xf_norm_right(a, 1); \ + } \ + xf_debug("once_norm_right: ", a); \ + /* Overflow? */ \ + if (a.exp >= INF_EXP) { \ + /* Yep, inf result */ \ + xf_debug("inf: ", a); \ + /*feraiseexcept(FE_OVERFLOW);*/ \ + /*feraiseexcept(FE_INEXACT);*/ \ + /*switch (fegetround()) { */\ + /*case FE_TOWARDZERO: */\ + /* return maxfinite_##TYPE(a); */\ + /*case FE_UPWARD: */\ + /* if (a.sign == 0) */\ + /* return infinite_##TYPE(a); */\ + /* else */\ + /* return maxfinite_##TYPE(a); */\ + /*case FE_DOWNWARD: */\ + /* if (a.sign != 0) */\ + /* return infinite_##TYPE(a); */\ + /* else */\ + /* return maxfinite_##TYPE(a); */\ + /*default: */\ + return infinite_##TYPE(a); \ + /*} */\ + } \ + /* Underflow? */ \ + if (a.mant.low & (1ULL << MANTBITS)) { \ + /* Leading one means: No, we're normal. So, we should be done... */ \ + xf_debug("norm: ", a); \ + ret.x.exp = a.exp; \ + ret.x.mant = a.mant.low; \ + return ret; \ + } \ + xf_debug("denorm: ", a); \ + if (a.exp != 1) \ + /*printf("a.exp == %d\n", a.exp);*/ \ + assert(a.exp == 1); \ + ret.x.exp = 0; \ + ret.x.mant = a.mant.low; \ + return ret; \ +} + + +GEN_XF_ROUND(df_t,DF_MANTBITS(),DF_INF_EXP) +GEN_XF_ROUND(sf_t,SF_MANTBITS(),SF_INF_EXP) +GEN_HF_ROUND(sf_t,SF_MANTBITS(),SF_INF_EXP) + +#define int128_mult_6464 kvx_int128_mult_6464 +static inline int128_t kvx_int128_mult_6464(size8u_t ai, size8u_t bi) +{ + int128_t ret; + int128_t a, b; + size8u_t pp0, pp1a, pp1b, pp1s, pp2; + +#ifdef DEBUG + printf("ai/bi: 0x%016llx/0x%016llx\n", ai, bi); +#endif + a.high = b.high = 0; + a.low = ai; + b.low = bi; + pp0 = (size8u_t) a.w0 * (size8u_t) b.w0; + pp1a = (size8u_t) a.w1 * (size8u_t) b.w0; + pp1b = (size8u_t) b.w1 * (size8u_t) a.w0; + pp2 = (size8u_t) a.w1 * (size8u_t) b.w1; +#ifdef DEBUG + printf("pp2/1b/1a/0: 0x%016llx/0x%016llx/0x%016llx/0x%016llx\n", + pp2, pp1b, pp1a, pp0); +#endif + pp1s = pp1a + pp1b; + if ((pp1s < pp1a) || (pp1s < pp1b)) { + pp2 += (1ULL << 32); + } + ret.low = pp0 + (pp1s << 32); + if ((ret.low < pp0) || (ret.low < (pp1s << 32))) + pp2 += 1; + ret.high = pp2 + (pp1s >> 32); +#ifdef DEBUG + printf("pp1s/rethi/retlo: 0x%016llx/0x%016llx/0x%016llx\n", + pp1s, ret.high, ret.low); +#endif + return ret; +} + +xf_t xf_add_kvx(xf_t a, xf_t b); + +xf_t xf_sub_kvx(xf_t a, xf_t b, int negate); +xf_t xf_sub_kvx(xf_t a, xf_t b, int negate) +{ + xf_t ret; + xf_init(&ret); + int borrow; + xf_debug("-->Sub/a: ", a); + xf_debug("-->Sub/b: ", b); + if (a.sign != b.sign) { + b.sign = !b.sign; + return xf_add_kvx(a, b); + } + if (b.exp > a.exp) { + /* small - big == - (big - small) */ + return xf_sub_kvx(b, a, !negate); + } + if ((b.exp == a.exp) && (int128_gt(b.mant, a.mant))) { + /* small - big == - (big - small) */ + return xf_sub_kvx(b, a, !negate); + } + xf_debug("OK: Sub/a: ", a); + xf_debug("OK: Sub/b: ", b); + while (a.exp > b.exp) { + /* Try to normalize exponents: shrink a exponent and grow mantissa */ + if (a.mant.high & (1ULL << 62)) { + /* Can't grow a any more */ + break; + } else { + a = xf_norm_left(a); + } + } + xf_debug("norm_l: Sub/a: ", a); + xf_debug("norm_l: Sub/b: ", b); + while (a.exp > b.exp) { + /* Try to normalize exponents: grow b exponent and shrink mantissa */ + /* Keep around shifted out bits... we might need those later */ + b = xf_norm_right(b, a.exp - b.exp); + } + xf_debug("norm_r: Sub/a: ", a); + xf_debug("norm_r: Sub/b: ", b); + if ((int128_gt(b.mant, a.mant))) { + xf_debug("retry: Sub/a: ", a); + xf_debug("retry: Sub/b: ", b); + return xf_sub_kvx(b, a, !negate); + } + /* OK, now things should be normalized! */ + ret.sign = a.sign; + ret.exp = a.exp; + assert(!int128_gt(b.mant, a.mant)); + borrow = (b.round << 2) | (b.guard << 1) | b.sticky; + ret.mant = int128_sub(a.mant, b.mant, (borrow != 0)); + borrow = 0 - borrow; + ret.guard = (borrow >> 2) & 1; + ret.round = (borrow >> 1) & 1; + ret.sticky = (borrow >> 0) & 1; + if (negate) + ret.sign = !ret.sign; + //According to the IEEE standard, Zero result in a subtraction should always be positive + if ((ret.sign) && ((ret.mant.high == 0) && (ret.mant.low == 0) && ((ret.guard | ret.round | ret.sticky) == 0))) + ret.sign = !ret.sign; + xf_debug("ret: Sub ", ret); + return ret; +} + + +xf_t xf_add_kvx(xf_t a, xf_t b) +{ + xf_t ret; + xf_init(&ret); + xf_debug("-->Add/a: ", a); + xf_debug("-->Add/b: ", b); + if (a.sign != b.sign) { + b.sign = !b.sign; + return xf_sub_kvx(a, b, 0); + } + if (b.exp > a.exp) { + /* small + big == (big + small) */ + return xf_add_kvx(b, a); + } + if ((b.exp == a.exp) && int128_gt(b.mant, a.mant)) { + /* small + big == (big + small) */ + return xf_add_kvx(b, a); + } + xf_debug("OK? Add/a: ", a); + xf_debug("OK? Add/b: ", b); + while (a.exp > b.exp) { + /* Try to normalize exponents: shrink a exponent and grow mantissa */ + if (a.mant.high & (1ULL << 62)) { + /* Can't grow a any more */ + break; + } else { + a = xf_norm_left(a); + } + } + xf_debug("norm_l: Add/a: ", a); + xf_debug("norm_l: Add/b: ", b); + while (a.exp > b.exp) { + /* Try to normalize exponents: grow b exponent and shrink mantissa */ + /* Keep around shifted out bits... we might need those later */ + b = xf_norm_right(b, a.exp - b.exp); + } + xf_debug("norm_r: Add/a: ", a); + xf_debug("norm_r: Add/b: ", b); + /* OK, now things should be normalized! */ + if (int128_gt(b.mant, a.mant)) { + xf_debug("retry: Add/a: ", a); + xf_debug("retry: Add/b: ", b); + return xf_add_kvx(b, a); + }; + ret.sign = a.sign; + ret.exp = a.exp; + assert(!int128_gt(b.mant, a.mant)); + ret.mant = int128_add(a.mant, b.mant); + ret.guard = b.guard; + ret.round = b.round; + ret.sticky = b.sticky; + xf_debug("ret: Add ", ret); + return ret; +} + + +float internal_fma_kvx(float a_in, float b_in, float c_in, int scale); +float internal_fma_kvx(float a_in, float b_in, float c_in, int scale) +{ + sf_t a, b, c; + xf_t prod; + xf_t acc; + xf_t result; +#if 0 + df_t t; + fexcept_t flags_tmp; +#endif + xf_init(&prod); + xf_init(&acc); + xf_init(&result); + a.f = a_in; + b.f = b_in; + c.f = c_in; +// printf("internal_fma_kvxx: 0x%016x * 0x%016x + 0x%016x sc: %d\n", +// fUNFLOAT(a_in), fUNFLOAT(b_in), fUNFLOAT(c_in), scale); +// if (isinf(a.f) || isinf(b.f) || isinf(c.f)) +// return special_fmaf(a, b, c); +// if (isnan(a.f) || isnan(b.f) || isnan(c.f)) +// return special_fmaf(a, b, c); + if ((scale == 0) && (isz(a.f) || isz(b.f))) + return (a.f * b.f + c.f); + /* Is a*b exact? If so, we don't have to go the slow way */ + /* EJP: axe this for simplicity? */ +#if 0 + fegetexceptflag(&flags_tmp, FE_ALL_EXCEPT); + feclearexcept(FE_ALL_EXCEPT); + t.f = a.f * b.f; + if (0 && (scale == 0) && isfinite(t.f) + && fetestexcept(FE_ALL_EXCEPT) == 0) { + /* It's exactly correct, we can just do the add and return */ + fesetexceptflag(&flags_tmp, FE_ALL_EXCEPT); + asm volatile (""); + t.f = (t.f + c.f); + return t.f; + } + fesetexceptflag(&flags_tmp, FE_ALL_EXCEPT); +#endif + /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */ + prod.mant = int128_mult_6464(sf_getmant_kvx(a), sf_getmant_kvx(b)); + /* Note: extracting the mantissa into an int is multiplying by 2**23, so adjust here: */ + prod.exp = sf_getexp_kvx(a) + sf_getexp_kvx(b) - SF_BIAS - 23; + prod.sign = a.x.sign ^ b.x.sign; + if (isz(a.f) || isz(b.f)) prod.exp = -2*WAY_BIG_EXP; + xf_debug("prod: ", prod); + if ((scale > 0) /*&& (fpclassify(c.f) == FP_SUBNORMAL)*/) { + acc.mant = int128_mult_6464(0,0); + acc.exp = -WAY_BIG_EXP; + acc.sign = c.x.sign; + acc.sticky = 1; + xf_debug("special denorm acc: ",acc); + result = xf_add_kvx(prod,acc); + } else if (!isz(c.f)) { + acc.mant = int128_mult_6464(sf_getmant_kvx(c), 1); + acc.exp = sf_getexp_kvx(c); + acc.sign = c.x.sign; + xf_debug("acc: ", acc); + result = xf_add_kvx(prod, acc); + } else { + result = prod; + } + xf_debug("sum: ", result); +#ifdef DEBUG + printf("Scaling: %d\n", scale); +#endif + result.exp += scale; + xf_debug("post-scale: ", result); + return hf_round_sf_t(result).f; +} + +// result = (a*c) + (b*d) + acc +float internal_vdmpy_acc(float a_in, float b_in, float c_in, float d_in, float acc_in, int scale); +float internal_vdmpy_acc(float a_in, float b_in, float c_in, float d_in, float acc_in, int scale) +{ + sf_t a, b, c, d, accm; + xf_t prod1; //a*c + xf_t prod2; //b*d + xf_t acc; + xf_t result_temp; + xf_t result; + + xf_init(&prod1); + xf_init(&prod2); + xf_init(&acc); + xf_init(&result_temp); + xf_init(&result); + + a.f = a_in; + b.f = b_in; + c.f = c_in; + d.f = d_in; + accm.f = acc_in; + + /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */ + prod1.mant = int128_mult_6464(sf_getmant_kvx(a), sf_getmant_kvx(c)); + /* Note: extracting the mantissa into an int is multiplying by 2**23, so adjust here: */ + prod1.exp = sf_getexp_kvx(a) + sf_getexp_kvx(c) - SF_BIAS - 23; + prod1.sign = a.x.sign ^ c.x.sign; + + /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */ + prod2.mant = int128_mult_6464(sf_getmant_kvx(b), sf_getmant_kvx(d)); + /* Note: extracting the mantissa into an int is multiplying by 2**23, so adjust here: */ + prod2.exp = sf_getexp_kvx(b) + sf_getexp_kvx(d) - SF_BIAS - 23; + prod2.sign = b.x.sign ^ d.x.sign; + + + if (isz(a.f) || isz(c.f)) prod1.exp = -2*WAY_BIG_EXP; + if (isz(b.f) || isz(d.f)) prod2.exp = -2*WAY_BIG_EXP; + + xf_debug("prod1: ", prod1); + xf_debug("prod2: ", prod2); + + if ((scale > 0) /*&& (fpclassify(c.f) == FP_SUBNORMAL)*/) { + acc.mant = int128_mult_6464(0,0); + acc.exp = -WAY_BIG_EXP; + acc.sign = c.x.sign; + acc.sticky = 1; + xf_debug("special denorm acc: ",acc); + //result = xf_add_kvx(prod,acc); + } else /*if (!isz(accm.f)) */{ + acc.mant = int128_mult_6464(sf_getmant_kvx(accm), 1); + acc.exp = sf_getexp_kvx(accm); + acc.sign = accm.x.sign; + xf_debug("acc: ", acc); + //result = xf_add_kvx(prod, acc); + } /*else { + result = xf_add_kvx(prod1, prod2); + }*/ + + //Add the 3 numbers: prod1 prod2 acc + //result_temp = xf_add_kvx(prod1,prod2); + //result = xf_add_kvx(result_temp,acc); + result_temp = xf_add_kvx(prod1,prod2); + result = xf_add_kvx(result_temp,acc); + + xf_debug("sum: ", result); +#ifdef DEBUG + printf("Scaling: %d\n", scale); +#endif + result.exp += scale; + xf_debug("post-scale: ", result); + return xf_round_kvx_sf_t(result).f; +} + + +uint32_t fp_vdmpy_acc (uint32_t acc,uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l) +{ + union ui32_f32 u_op; + union ui32_f32 u_acc; + union ui32_f32 u_rslt; + + uint32_t op1_u_f32, op1_l_f32, op2_u_f32, op2_l_f32; + float f_op1_u, f_op1_l, f_op2_u, f_op2_l, f_acc; + float f_prod_l = 0, f_prod_u = 0, rslt; + uint32_t result; + +#ifdef DEBUG + printf("Debug : op1_u =0x%04x\n",op1_u); + printf("Debug : op1_l =0x%04x\n",op1_l); + printf("Debug : op2_u =0x%04x\n",op2_u); + printf("Debug : op2_l =0x%04x\n",op2_l); + printf("Debug : acc =0x%08x\n",acc); +#endif + + if(isNaNF16UI(op1_u) || isNaNF16UI(op1_l) || isNaNF16UI(op2_u) || isNaNF16UI(op2_l) || isNaNF32UI(acc)) + return FP32_DEF_NAN; + + op1_u_f32 = f16_to_f32(op1_u); + op1_l_f32 = f16_to_f32(op1_l); + op2_u_f32 = f16_to_f32(op2_u); + op2_l_f32 = f16_to_f32(op2_l); + +#ifdef DEBUG + printf("Debug : op1_u_f32 =0x%08x\n",op1_u_f32); + printf("Debug : op1_l_f32 =0x%08x\n",op1_l_f32); + printf("Debug : op2_u_f32 =0x%08x\n",op2_u_f32); + printf("Debug : op2_l_f32 =0x%08x\n",op2_l_f32); +#endif + + u_op.ui = op1_u_f32; + f_op1_u = u_op.f; + + u_op.ui = op1_l_f32; + f_op1_l = u_op.f; + + u_op.ui = op2_l_f32; + f_op2_l = u_op.f; + + u_op.ui = op2_u_f32; + f_op2_u = u_op.f; + + u_acc.ui = acc; + f_acc = u_acc.f; + +#ifdef DEBUG + printf("Debug_0 : f_op1_u = %f\n",f_op1_u); + printf("Debug_0 : f_op1_l = %f\n",f_op1_l); + printf("Debug_0 : f_op2_u = %f\n",f_op2_u); + printf("Debug_0 : f_op2_l = %f\n",f_op2_l); + printf("Debug_0 : f_acc = %f\n",f_acc); +#endif + + f_prod_l = (f_op1_l * f_op2_l); + f_prod_u = (f_op1_u * f_op2_u); + + if(isInfF16UI(op1_u) || isInfF16UI(op1_l) || isInfF16UI(op2_u) || isInfF16UI(op2_l) || isInfF32UI(acc)) + { + rslt = (f_prod_u + f_prod_l + f_acc); +#ifdef DEBUG + printf("Debug_inf : rslt = %f\n",rslt); +#endif + u_rslt.f = rslt; + result = u_rslt.ui; +#ifdef DEBUG + printf("Debug_inf : result =0x%08x\n",result); +#endif + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; +#ifdef DEBUG + printf("Debug_inf : result final =0x%08x\n",result); +#endif + return result; + } + + //If any of the below is a zero, we can use easy approach + if(isz(f_prod_l) || isz(f_prod_u) || isz(f_acc)) + { + rslt = (f_prod_u + f_prod_l + f_acc); +#ifdef DEBUG + printf("Debug_inf : rslt = %f\n",rslt); +#endif + u_rslt.f = rslt; + result = u_rslt.ui; +#ifdef DEBUG + printf("Debug_inf : result =0x%08x\n",result); +#endif + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; +#ifdef DEBUG + printf("Debug_inf : result final =0x%08x\n",result); +#endif + return result; + } + + +////---------------------------------------------------------------------------------------------------- +// f_prod_l = (f_op1_l * f_op2_l); +// f_prod_u = (f_op1_u * f_op2_u); +// +// printf("Debug_1 : f_prod_l = %f\n",f_prod_l); +// printf("Debug_1 : f_prod_u = %f\n",f_prod_u); +// +// rslt = (f_prod_u + f_prod_l + f_acc); +// printf("Debug_1 : rslt = %f\n",rslt); +// u_rslt.f = rslt; +// result = u_rslt.ui; +// printf("Debug_1 : result =0x%08x\n",result); +////---------------------------------------------------------------------------------------------------- + + rslt = internal_vdmpy_acc(f_op1_u, f_op1_l,f_op2_u,f_op2_l,f_acc,0); + u_rslt.f = rslt; + result = u_rslt.ui; +#ifdef DEBUG + printf("Debug_2 : rslt = %f\n",rslt); + printf("Debug_2 : result =0x%08x\n",result); +#endif + + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + +#ifdef DEBUG + printf("Debug : f_op1_u = %f\n",f_op1_u); + printf("Debug : f_op1_l = %f\n",f_op1_l); + printf("Debug : f_op2_u = %f\n",f_op2_u); + printf("Debug : f_op2_l = %f\n",f_op2_l); + printf("Debug : f_acc = %f\n",f_acc); + printf("Debug : f_prod_l = %f\n",f_prod_l); + printf("Debug : f_prod_u = %f\n",f_prod_u); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); +#endif + + return result; +} + + +uint16_t fp_mult_hf_hf_acc (uint16_t op1, uint16_t op2, uint16_t acc) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_acc; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + uint32_t acc_f32; + + float a,b,facc,rslt; + uint32_t result_f32; + uint16_t result; + +#ifdef DEBUG + printf("Debug : op1 =0x%04x\n",op1); + printf("Debug : op2 =0x%04x\n",op2); + printf("Debug : acc =0x%04x\n",acc); +#endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2) || isNaNF16UI(acc)) + return FP16_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + acc_f32 = f16_to_f32(acc); + +#ifdef DEBUG + printf("Debug : op1_f32 = 0x%08x\n",op1_f32); + printf("Debug : op2_f32 = 0x%08x\n",op2_f32); + printf("Debug : acc_f32 = 0x%08x\n",acc_f32); +#endif + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + u_acc.ui = acc_f32; + a = u_op1.f; + b = u_op2.f; + facc = u_acc.f; + +#ifdef DEBUG + printf("Debug_1 : a = %f\n",a); + printf("Debug_1 : b = %f\n",b); + printf("Debug_1 : facc = %f\n",facc); +#endif + + if(isInfF16UI(op1) || isInfF16UI(op2) || isInfF16UI(acc)) + { + rslt = (a * b) + facc; +#ifdef DEBUG + printf("Debug_inf : rslt = %f\n",rslt); +#endif + u_rslt.f = rslt; + result_f32 = u_rslt.ui; + result = f32_to_f16(result_f32); +#ifdef DEBUG + printf("Debug_inf : result_f32 =0x%08x\n",result_f32); + printf("Debug_inf : result =0x%04x\n",result); +#endif + result = isNaNF16UI(result) ? FP16_DEF_NAN : result; +#ifdef DEBUG + printf("Debug_inf : result final =0x%04x\n",result); +#endif + return result; + } + +// //---------------------------------------------------------------------------------------------------- +// rslt = (a * b) + facc; +// u_rslt.f = rslt; +// result_f32 = u_rslt.ui; +// printf("Debug_3 : result_f32 =0x%08x\n",result_f32); +// result = f32_to_f16(result_f32); +// printf("Debug_3 : result =0x%04x\n",result); +// //---------------------------------------------------------------------------------------------------- + + //rslt = fma(a,b,facc); + rslt = internal_fma_kvx(a, b, facc, 0); + u_rslt.f = rslt; + result_f32 = u_rslt.ui; +#ifdef DEBUG + printf("Debug_2 : rslt = %f\n",rslt); + printf("Debug_2 : result_f32 =0x%08x\n",result_f32); +#endif + + result = f32_to_f16(result_f32); + +#ifdef DEBUG + printf("Debug_2 : result =0x%04x\n",result); +#endif + + result = isNaNF16UI(result) ? FP16_DEF_NAN : result; + +#ifdef DEBUG + printf("Debug_2 : result final =0x%04x\n",result); +#endif + + return result; +} + diff --git a/target/hexagon/mmvec/macros_auto.h b/target/hexagon/mmvec/macros_auto.h new file mode 100644 index 0000000000000..479cb225c70c3 --- /dev/null +++ b/target/hexagon/mmvec/macros_auto.h @@ -0,0 +1,221 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef HEXAGON_MMVEC_MACROS_AUTO_H +#define HEXAGON_MMVEC_MACROS_AUTO_H + + +#include "mmvec/macros.h" + +#include "q6v_defines.h" +#pragma GCC diagnostic ignored "-Wtype-limits" +#define fDUMPQ(STR,REG) do { printf(STR ":" #REG ": 0x%016llx\n",REG.ud[0]); } while (0) +#define fRT8NOTE() +#define fEXPERIMENTAL() +#define fBFLOAT() +#define fCVI_VX_NO_TMP_LD() +#define fNOTQ(VAL) ({mmqreg_t _ret ={0}; int _i_; for (_i_ = 0; _i_ < fVECSIZE()/64; _i_++) _ret.ud[_i_] = ~VAL.ud[_i_]; _ret;}) +#define fGETQBITS(REG,WIDTH,MASK,BITNO) ((MASK) & (REG.w[(BITNO)>>5] >> ((BITNO) & 0x1f))) +#define fGETQBIT(REG,BITNO) fGETQBITS(REG,1,1,BITNO) +#define fGENMASKW(QREG,IDX) (((fGETQBIT(QREG,(IDX*4+0)) ? 0xFF : 0x0) << 0) |((fGETQBIT(QREG,(IDX*4+1)) ? 0xFF : 0x0) << 8) |((fGETQBIT(QREG,(IDX*4+2)) ? 0xFF : 0x0) << 16) |((fGETQBIT(QREG,(IDX*4+3)) ? 0xFF : 0x0) << 24)) +#define fGET10BIT(COE,VAL,POS) { COE = (((((fGETUBYTE(3,VAL) >> (2 * POS)) & 3) << 8) | fGETUBYTE(POS,VAL)) << 6); COE >>= 6; } +#define fVMAX(X,Y) (X>Y) ? X : Y +#define fREAD_VEC(DST,IDX) (DST = READ_VREG(fMODCIRCU((IDX),5))) +#define fREAD_ZVEC(DST,IDX) (DST = READ_ZREG(fMODCIRCU((IDX),5))) +#define fREAD_ZVEC_WORD(DST,IDX) { mmvector_t ZReg = READ_ZREG(0); DST = ZReg.uw[IDX]; } +#define fREAD_ZVEC_ALL(DST,N,NZ) { int __idx = 0; for (__idx = 0; __idx < NZ/N; __idx++) { memcpy(&DST[N*__idx], &THREAD2STRUCT->ZRegs[__idx], N); } } +#define fZREGB(Z,IDX) ((size1s_t)Z[IDX]) +#define fZREGUB(Z,IDX) ((size1u_t)Z[IDX]) +#define fZREGH(Z,IDX) ((size2s_t)Z[IDX]) +#define fZREGUB(Z,IDX) ((size1u_t)Z[IDX]) +#define fGETNIBBLE(IDX,SRC) ( fSXTN(4,8,(SRC >> (4*IDX)) & 0xF) ) +#define fGETCRUMB(IDX,SRC) ( fSXTN(2,8,(SRC >> (2*IDX)) & 0x3) ) +#define fGETCRUMB_SYMMETRIC(IDX,SRC) ( (fGETCRUMB(IDX,SRC)>=0 ? (2-fGETCRUMB(IDX,SRC)) : fGETCRUMB(IDX,SRC) ) ) +#define fWRITE_VEC(IDX,VAR) (WRITE_VREG(fMODCIRCU((IDX),5),VAR)) +#define fGENMASKH(QREG,IDX) (((fGETQBIT(QREG,(IDX*2+0)) ? 0xFF : 0x0) << 0) |((fGETQBIT(QREG,(IDX*2+1)) ? 0xFF : 0x0) << 8)) +#define fGETMASKW(VREG,QREG,IDX) (VREG.w[IDX] & fGENMASKW((QREG),IDX)) +#define fGETMASKH(VREG,QREG,IDX) (VREG.h[IDX] & fGENMASKH((QREG),IDX)) +#define fCONDMASK8(QREG,IDX,YESVAL,NOVAL) (fGETQBIT(QREG,IDX) ? (YESVAL) : (NOVAL)) +#define fCONDMASK16(QREG,IDX,YESVAL,NOVAL) ((fGENMASKH(QREG,IDX) & (YESVAL)) | (fGENMASKH(fNOTQ(QREG),IDX) & (NOVAL))) +#define fCONDMASK32(QREG,IDX,YESVAL,NOVAL) ((fGENMASKW(QREG,IDX) & (YESVAL)) | (fGENMASKW(fNOTQ(QREG),IDX) & (NOVAL))) +#define fSETQBITS(REG,WIDTH,MASK,BITNO,VAL) do { size4u_t __TMP = (VAL); REG.w[(BITNO)>>5] &= ~((MASK) << ((BITNO) & 0x1f)); REG.w[(BITNO)>>5] |= (((__TMP) & (MASK)) << ((BITNO) & 0x1f)); } while (0) +#define fSETQBIT(REG,BITNO,VAL) fSETQBITS(REG,1,1,BITNO,VAL) +#define fVBYTES() (fVECSIZE()) +#define fVHALVES() (fVECSIZE()/2) +#define fVWORDS() (fVECSIZE()/4) +#define fVDWORDS() (fVECSIZE()/8) +#define fVALIGN(ADDR, LOG2_ALIGNMENT) ( ADDR = ADDR & ~(LOG2_ALIGNMENT-1)) +#define fVLASTBYTE(ADDR, LOG2_ALIGNMENT) ( ADDR = ADDR | (LOG2_ALIGNMENT-1)) +#define fVELEM(WIDTH) ((fVECSIZE()*8)/WIDTH) +#define fVECLOGSIZE() (MAX_VEC_SIZE_LOGBYTES) +#define fVBUF_IDX(EA) (((EA) >> fVECLOGSIZE()) & 0xFF) +#define fREAD_VBUF(IDX,WIDX) READ_VBUF(IDX,WIDX) +#define fLOG_VBUF(IDX,VAL,WIDX) LOG_VBUF(IDX,VAL,WIDX) +#define fVECSIZE() (1<<fVECLOGSIZE()) +#define fSWAPB(A, B) { size1u_t tmp = A; A = B; B = tmp; } +#define fVZERO() mmvec_zero_vector() +#define fNEWVREG(VNUM) ((THREAD2STRUCT->VRegs_updated & (((VRegMask)1)<<VNUM)) ? THREAD2STRUCT->future_VRegs[VNUM] : mmvec_zero_vector()) +#define fV_AL_CHECK(EA,MASK) if ((EA) & (MASK)) { warn("aligning misaligned vector. PC=%08x EA=%08x",thread->Regs[REG_PC],(EA)); } +#define fSCATTER_INIT( REGION_START, LENGTH, ELEMENT_SIZE) { mem_vector_scatter_init(thread, insn, REGION_START, LENGTH, ELEMENT_SIZE); if (EXCEPTION_DETECTED) return; } +#define fGATHER_INIT( REGION_START, LENGTH, ELEMENT_SIZE) { mem_vector_gather_init(thread, insn, REGION_START, LENGTH, ELEMENT_SIZE); if (EXCEPTION_DETECTED) return; } +#ifdef CONFIG_USER_ONLY +#define fSCATTER_FINISH(OP) +#define fGATHER_FINISH() +#else +#define fSCATTER_FINISH(OP) { if (EXCEPTION_DETECTED) return; mem_vector_scatter_finish(thread, insn, OP); } +#define fGATHER_FINISH() { if (EXCEPTION_DETECTED) return; mem_vector_gather_finish(thread, insn); } +#endif +#define CHECK_VTCM_PAGE(FLAG, BASE, LENGTH, OFFSET, ALIGNMENT) { int slot = insn->slot; paddr_t pa = thread->mem_access[slot].paddr+OFFSET; pa = pa & ~(ALIGNMENT-1); FLAG = (pa < (thread->mem_access[slot].paddr+LENGTH)); } +#define COUNT_OUT_OF_BOUNDS(FLAG, SIZE) { if (!FLAG) { THREAD2STRUCT->vtcm_log.oob_access += SIZE; warn("Scatter/Gather out of bounds of region"); } } +#define fLOG_SCATTER_OP(SIZE) { thread->vtcm_log.op = 1; thread->vtcm_log.op_size = SIZE; } +#define fVLOG_VTCM_GATHER_WORD(EA,OFFSET,IDX, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 4, IDX, 1); } +#define fVLOG_VTCM_GATHER_HALFWORD(EA,OFFSET,IDX, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, IDX, 1); } +#define fVLOG_VTCM_GATHER_HALFWORD_DV(EA,OFFSET,IDX,IDX2,IDX_H, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), 1); } +#define fVLOG_VTCM_GATHER_WORDQ(EA,OFFSET,IDX, Q, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 4, IDX, fGETQBIT(QsV,4*IDX+i0)); } +#define fVLOG_VTCM_GATHER_HALFWORDQ(EA,OFFSET,IDX, Q, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, IDX, fGETQBIT(QsV,2*IDX+i0)); } +#define fVLOG_VTCM_GATHER_HALFWORDQ_DV(EA,OFFSET,IDX,IDX2,IDX_H, Q, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), fGETQBIT(QsV,2*IDX+i0)); } +#define DEBUG_LOG_ADDR(OFFSET) { if (thread->processor_ptr->arch_proc_options->mmvec_network_addr_log2) { int slot = insn->slot; paddr_t pa = thread->mem_access[slot].paddr+OFFSET; } } +//#define SCATTER_OP_WRITE_TO_MEM(TYPE) { for (int i = 0; i < mmvecx->vtcm_log.size; i+=sizeof(TYPE)) { if ( mmvecx->vtcm_log.mask.ub[i] != 0) { TYPE dst = 0; TYPE inc = 0; for(int j = 0; j < sizeof(TYPE); j++) { dst |= (sim_mem_read1(thread->system_ptr, thread->threadId, mmvecx->vtcm_log.pa[i+j]) << (8*j)); inc |= mmvecx->vtcm_log.data.ub[j+i] << (8*j); mmvecx->vtcm_log.mask.ub[j+i] = 0; mmvecx->vtcm_log.data.ub[j+i] = 0; mmvecx->vtcm_log.offsets.ub[j+i] = 0; } dst += inc; for(int j = 0; j < sizeof(TYPE); j++) { sim_mem_write1(thread->system_ptr,thread->threadId, mmvecx->vtcm_log.pa[i+j], (dst >> (8*j))& 0xFF ); } } } } +#define fVLOG_VTCM_HALFWORD(EA,OFFSET,IN,IDX, LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, IDX, 1, IN); } +#define fVLOG_VTCM_WORD(EA,OFFSET,IN,IDX,LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 4, IDX, 1, IN); } +#define fVLOG_VTCM_HALFWORDQ(EA,OFFSET,IN,IDX,Q,LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, IDX, fGETQBIT(QsV,2*IDX+i0), IN); } +#define fVLOG_VTCM_WORDQ(EA,OFFSET,IN,IDX,Q,LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 4, IDX, fGETQBIT(QsV,4*IDX+i0), IN); } +#define fVLOG_VTCM_HALFWORD_DV(EA,OFFSET,IN,IDX,IDX2,IDX_H, LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), 1, IN); } +#define fVLOG_VTCM_HALFWORDQ_DV(EA,OFFSET,IN,IDX,Q,IDX2,IDX_H, LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), fGETQBIT(QsV,2*IDX+i0), IN); } +#define fSTORERELEASE(EA,TYPE) { fV_AL_CHECK(EA,fVECSIZE()-1); mem_store_release(thread, insn, fVECSIZE(), EA&~(fVECSIZE()-1), EA, TYPE, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#define fVFETCH_AL(EA) { fV_AL_CHECK(EA,fVECSIZE()-1); mem_fetch_vector(thread, insn, EA&~(fVECSIZE()-1), slot, fVECSIZE()); } +#define fLOADMMV_AL(EA, ALIGNMENT, LEN, DST) { fV_AL_CHECK(EA,ALIGNMENT-1); /*thread->last_pkt->double_access_vec = 0;*/ mem_load_vector_oddva(thread, 0, EA&~(ALIGNMENT-1), EA, slot, LEN, &DST.ub[0], LEN, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#ifdef QEMU_GENERATE +#define fLOADMMV(EA, DST) gen_vreg_load(ctx, DST##_off, EA, true) +#else +#define fLOADMMV(EA, DST) fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST) +#endif +#define fLOADMMZ(EA,DST) { mmvector_t load_vec; fV_AL_CHECK(EA,fVECSIZE()-1); mem_load_vector_oddva(thread, 0, EA&~(fVECSIZE()-1), EA, slot, fVECSIZE(), &load_vec.ub[0], fVECSIZE(), fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); int idx = (EA & 0x80)>0; DST.v[idx] = load_vec; } +#define fLOADZ_LOAD(EA,EAU,WIDTH,DST) {/* thread->last_pkt->ext_slot_cancelled = 0; thread->last_pkt->double_access_vec = 0;*/ int etm_size = ((EA % width) ==0) ? fVECSIZE() : 0; if (thread->processor_ptr->options->testgen_mode) etm_size = ((EA % width) ==0) ? WIDTH : 0; mem_load_vector_oddva(thread, 0, EA, EAU, slot, WIDTH, &DST.ub[0], etm_size, fUSE_LOOKUP_ADDRESS()); } +#define fELSE_CANCELZ() else { /*if (thread->last_pkt) { thread->mem_access[slot].dropped_z = 1; thread->last_pkt->ext_slot_cancelled |= (1<<slot); } */ } +#define fPOST_INC4(R) R+=4; +#define fPOST_INC8(R) R+=8; +#define fPOST_INC16(R) R+=16; +#define fEXTRACTZ(DST,IDX) (DST = READ_ZREG(fMODCIRCU((IDX),5))) +#define fLOADZ_UPDATE(EA,WIDTH,ZN,N,SRC) { mmvector_t Z[2]; Z[0] = READ_ZREG(0); Z[1] = READ_ZREG(1); for(int k = 0; k < WIDTH; k++) { int element_idx = (EA+k)%N; int z_idx = ((EA+k)%ZN)/N; Z[z_idx].ub[element_idx] = SRC.ub[k]; } WRITE_EXT_ZREG(0,Z[0],0); WRITE_EXT_ZREG(1,Z[1],0); } +#define fSTOREZ(EA,WIDTH,ZN,N) { mmvector_t store_vec; mmvector_t maskvec = {0}; mmvector_t Z[2]; Z[0] = READ_ZREG(0); Z[1] = READ_ZREG(1); for(int k = 0; k < WIDTH; k++) { int element_idx = (EA+k)%N; int z_idx = ((EA+k)%ZN)/N; store_vec.ub[k] = Z[z_idx].ub[element_idx]; maskvec.ub[k] = 1; } mem_store_vector_oddva(thread, 0, EA, EA, slot, WIDTH, &store_vec.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#define fLOADMMVQ(EA,DST,QVAL) do { int __i; fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST); fVFOREACH(8,__i) if (!fGETQBIT(QVAL,__i)) DST.b[__i] = 0; } while (0) +#define fLOADMMVNQ(EA,DST,QVAL) do { int __i; fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST); fVFOREACH(8,__i) if (fGETQBIT(QVAL,__i)) DST.b[__i] = 0; } while (0) +#define fLOADMMVU_AL(EA, ALIGNMENT, LEN, DST) { size4u_t size2 = (EA)&(ALIGNMENT-1); size4u_t size1 = LEN-size2; /*thread->last_pkt->double_access_vec = 1;*/ mem_load_vector_oddva(thread, 0, EA+size1, EA+fVECSIZE(), 1, size2, &DST.ub[size1], size2, fUSE_LOOKUP_ADDRESS()); mem_load_vector_oddva(thread, 0, EA, EA, 0, size1, &DST.ub[0], size1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#ifdef QEMU_GENERATE +#define fLOADMMVU(EA, DST) gen_vreg_load(ctx, DST##_off, EA, false) +#else +#define fLOADMMVU(EA, DST) { /*thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0;*/ if ( (EA & (fVECSIZE()-1)) == 0) { /*thread->last_pkt->pkt_has_vmemu_access = 0; thread->last_pkt->double_access = 0;*/ fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST); } else { /*thread->last_pkt->pkt_has_vmemu_access = 1; thread->last_pkt->double_access = 1;*/ fLOADMMVU_AL(EA,fVECSIZE(),fVECSIZE(),DST); } } +#endif +#define fSTOREMMV_AL(EA, ALIGNMENT, LEN, SRC) { fV_AL_CHECK(EA,ALIGNMENT-1); mem_store_vector_oddva(thread, 0, EA&~(ALIGNMENT-1), EA, slot, LEN, &SRC.ub[0], 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#ifdef QEMU_GENERATE +#define fSTOREMMV(EA, SRC) gen_vreg_store(ctx, EA, SRC##_off, insn->slot, true) +#else +#define fSTOREMMV(EA, SRC) fSTOREMMV_AL(EA,fVECSIZE(),fVECSIZE(),SRC) +#endif +#define fSTOREMMVQ_AL(EA, ALIGNMENT, LEN, SRC, MASK) do { mmvector_t maskvec; int i; for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i); mem_store_vector_oddva(thread, 0, EA&~(ALIGNMENT-1), EA, slot, LEN, &SRC.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } while (0) +#ifdef QEMU_GENERATE +#define fSTOREMMVQ(EA, SRC, MASK) \ + gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, false) +#else +#define fSTOREMMVQ(EA, SRC, MASK) fSTOREMMVQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK) +#endif +#define fSTOREMMVNQ_AL(EA, ALIGNMENT, LEN, SRC, MASK) { mmvector_t maskvec; int i; for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i); fV_AL_CHECK(EA,ALIGNMENT-1); mem_store_vector_oddva(thread, 0, EA&~(ALIGNMENT-1), EA, slot, LEN, &SRC.ub[0], &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#ifdef QEMU_GENERATE +#define fSTOREMMVNQ(EA, SRC, MASK) \ + gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, true) +#else +#define fSTOREMMVNQ(EA, SRC, MASK) fSTOREMMVNQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK) +#endif +#define fSTOREMMVU_AL(EA, ALIGNMENT, LEN, SRC) { size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1)); size4u_t size2; if (size1>LEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, 0, EA+size1, EA+fVECSIZE(), 1, size2, &SRC.ub[size1], 0, 0, fUSE_LOOKUP_ADDRESS()); mem_store_vector_oddva(thread, 0, EA, EA, 0, size1, &SRC.ub[0], 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#ifdef QEMU_GENERATE +#define fSTOREMMVU(EA, SRC) \ + gen_vreg_store(ctx, EA, SRC##_off, insn->slot, false) +#else +#define fSTOREMMVU(EA, SRC) { /*thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0;*/ if ( (EA & (fVECSIZE()-1)) == 0) { /*thread->last_pkt->double_access = 0;*/ fSTOREMMV_AL(EA,fVECSIZE(),fVECSIZE(),SRC); } else { /*thread->last_pkt->double_access = 1; thread->last_pkt->pkt_has_vmemu_access = 1;*/ fSTOREMMVU_AL(EA,fVECSIZE(),fVECSIZE(),SRC); } } +#endif +#define fSTOREMMVQU_AL(EA, ALIGNMENT, LEN, SRC, MASK) { size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1)); size4u_t size2; mmvector_t maskvec; int i; for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i); if (size1>LEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, 0, EA+size1, EA+fVECSIZE(), 1, size2, &SRC.ub[size1], &maskvec.ub[size1], 0, fUSE_LOOKUP_ADDRESS()); mem_store_vector_oddva(thread, 0, EA, 0, size1, &SRC.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#define fSTOREMMVQU(EA, SRC, MASK) { /*thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0;*/ if ( (EA & (fVECSIZE()-1)) == 0) { /*thread->last_pkt->double_access = 0;*/ fSTOREMMVQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK); } else { /*thread->last_pkt->double_access = 1; thread->last_pkt->pkt_has_vmemu_access = 1;*/ fSTOREMMVQU_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK); } } +#define fSTOREMMVNQU_AL(EA, ALIGNMENT, LEN, SRC, MASK) { size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1)); size4u_t size2; mmvector_t maskvec; int i; for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i); if (size1>LEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, 0, EA+size1, EA+fVECSIZE(), 1, size2, &SRC.ub[size1], &maskvec.ub[size1], 1, fUSE_LOOKUP_ADDRESS()); mem_store_vector_oddva(thread, 0, EA, EA, 0, size1, &SRC.ub[0], &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#define fSTOREMMVNQU(EA, SRC, MASK) { /*thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0;*/ if ( (EA & (fVECSIZE()-1)) == 0) { /*thread->last_pkt->double_access = 0;*/ fSTOREMMVNQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK); } else { /*thread->last_pkt->double_access = 1; thread->last_pkt->pkt_has_vmemu_access = 1;*/ fSTOREMMVNQU_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK); } } +#define fVFOREACH(WIDTH, VAR) for (VAR = 0; VAR < fVELEM(WIDTH); VAR++) +#define fVARRAY_ELEMENT_ACCESS(ARRAY, TYPE, INDEX) ARRAY.v[(INDEX) / (fVECSIZE()/(sizeof(ARRAY.TYPE[0])))].TYPE[(INDEX) % (fVECSIZE()/(sizeof(ARRAY.TYPE[0])))] +#define fVNEWCANCEL(REGNUM) do { THREAD2STRUCT->VRegs_select &= ~(1<<(REGNUM)); } while (0) +#define fTMPVDATA() mmvec_vtmp_data(thread) +#define fVSATDW(U,V) fVSATW( ( ( ((long long)U)<<32 ) | fZXTN(32,64,V) ) ) +#define fVASL_SATHI(U,V) fVSATW(((U)<<1) | ((V)>>31)) +#define fVUADDSAT(WIDTH,U,V) fVSATUN( WIDTH, fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V)) +#define fVSADDSAT(WIDTH,U,V) ({size8s_t tmp5 = fSXTN(WIDTH, 2*WIDTH, U); size8s_t tmp6 = fSXTN(WIDTH, 2*WIDTH, V); size8s_t tmp7 = tmp5 + tmp6; fVSATN( WIDTH, tmp7); }) +#define fVUSUBSAT(WIDTH,U,V) fVSATUN( WIDTH, fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V)) +#define fVSSUBSAT(WIDTH,U,V) fVSATN( WIDTH, fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)) +#define fVAVGU(WIDTH,U,V) ((fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V))>>1) +#define fVAVGURND(WIDTH,U,V) ((fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V)+1)>>1) +#define fVNAVGU(WIDTH,U,V) ((fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V))>>1) +#define fVNAVGURNDSAT(WIDTH,U,V) fVSATUN(WIDTH,((fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V)+1)>>1)) +#define fVAVGS(WIDTH,U,V) ((fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V))>>1) +#define fVAVGSRND(WIDTH,U,V) ((fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V)+1)>>1) +#define fVNAVGS(WIDTH,U,V) ((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V))>>1) +#define fVNAVGSRND(WIDTH,U,V) ((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)+1)>>1) +#define fVNAVGSRNDSAT(WIDTH,U,V) fVSATN(WIDTH,((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)+1)>>1)) +#define fVNOROUND(VAL,SHAMT) VAL +#define fVNOSAT(VAL) VAL +#define fVROUND(VAL,SHAMT) ((VAL) + (((SHAMT)>0)?(1LL<<((SHAMT)-1)):0)) +#define fCARRY_FROM_ADD32(A,B,C) (((fZXTN(32,64,A)+fZXTN(32,64,B)+C) >> 32) & 1) +#define fUARCH_NOTE_PUMP_4X() +#define fUARCH_NOTE_PUMP_2X() +#define UNLIKELY(X) __builtin_expect((X), 0) +#define fVDOCHKPAGECROSS(BASE,SUM) if (UNLIKELY(thread->timing_on)) { thread->mem_access[slot].check_page_crosses = 1; thread->mem_access[slot].page_cross_base = BASE; thread->mem_access[slot].page_cross_sum = SUM; } +#define fPARSEQF32(A) parse_qf32(A) +#define fRNDSATQF32(A,B,C) rnd_sat_qf32(A,B,C) +#define fPARSEQF16(A) parse_qf16(A) +#define fRNDSATQF16(A,B,C) rnd_sat_qf16(A,B,C) +#define fPARSESF(A) parse_sf(A) +#define fRNDSATSF(A,B) rnd_sat_sf(A,B) +#define fPARSEHF(A) parse_hf(A) +#define fRNDSATHF(A,B) rnd_sat_hf(A,B) +#define fRNDSATW(A,B) rnd_sat_w(A,B) +#define fRNDSATUW(A,B) rnd_sat_uw(A,B) +#define fRNDSATH(A,B) rnd_sat_h(A,B) +#define fRNDSATUH(A,B) rnd_sat_uh(A,B) +#define fRNDSATB(A,B) rnd_sat_b(A,B) +#define fRNDSATUB(A,B) rnd_sat_ub(A,B) +#define fNEGQF32(A) negate32(A) +#define fNEGQF16(A) negate16(A) +#define fNEGSF(A) negate_sf(A) +#define fNEGHF(A) negate_hf(A) +#define fCMPGT_QF32(A,B) cmpgt_qf32(A,B) +#define fCMPGT_QF16(A,B) cmpgt_qf16(A,B) +#define fCMPGT_SF(A,B) cmpgt_sf(A,B) +#define fCMPGT_HF(A,B) cmpgt_hf(A,B) +#define fCMPGT_BF(A,B) cmpgt_sf(((int)A) << 16,((int)B) << 16) +#define fCMPGT_QF32_SF(A,B) cmpgt_qf32_sf(A,B) +#define fCMPGT_QF16_HF(A,B) cmpgt_qf16_hf(A,B) +#define fMAX_QF32(X,Y) max_qf32(X,Y) +#define fMIN_QF32(X,Y) min_qf32(X,Y) +#define fMAX_QF32_SF(X,Y) max_qf32_sf(X,Y) +#define fMIN_QF32_SF(X,Y) min_qf32_sf(X,Y) +#define fMAX_QF16(X,Y) max_qf16(X,Y) +#define fMIN_QF16(X,Y) min_qf16(X,Y) +#define fMAX_QF16_HF(X,Y) max_qf16_hf(X,Y) +#define fMIN_QF16_HF(X,Y) min_qf16_hf(X,Y) +#define fMAX_SF(X,Y) max_sf(X,Y) +#define fMIN_SF(X,Y) min_sf(X,Y) +#define fMAX_HF(X,Y) max_hf(X,Y) +#define fMIN_HF(X,Y) min_hf(X,Y) + +#define fSTOREDOUBLEMMV(EA, SRC) fSTOREMMV_AL(EA,fVECSIZE(),2*fVECSIZE(),SRC) +#endif diff --git a/target/hexagon/mmvec/mmvec.h b/target/hexagon/mmvec/mmvec.h index 52d470709c02d..906bf16d8258a 100644 --- a/target/hexagon/mmvec/mmvec.h +++ b/target/hexagon/mmvec/mmvec.h @@ -38,6 +38,11 @@ typedef union { int16_t h[MAX_VEC_SIZE_BYTES / 2]; uint8_t ub[MAX_VEC_SIZE_BYTES / 1]; int8_t b[MAX_VEC_SIZE_BYTES / 1]; + int32_t qf32[MAX_VEC_SIZE_BYTES / 4]; + int16_t qf16[MAX_VEC_SIZE_BYTES / 2]; + int32_t sf[MAX_VEC_SIZE_BYTES / 4]; + int16_t hf[MAX_VEC_SIZE_BYTES / 2]; + int16_t bf[MAX_VEC_SIZE_BYTES / 2]; } MMVector; typedef union { diff --git a/target/hexagon/mmvec/mmvec_qfloat.c b/target/hexagon/mmvec/mmvec_qfloat.c new file mode 100644 index 0000000000000..060ac4b14d8f1 --- /dev/null +++ b/target/hexagon/mmvec/mmvec_qfloat.c @@ -0,0 +1,2563 @@ +/* + * Copyright(c) 2019-2020 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#if !defined(__clang__) +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" +#endif + +#include "qemu/osdep.h" +#include "mmvec_qfloat.h" +#include <math.h> + +#define UNUSED(var) do { (void)var; } while (0) + +//Take one's complement of the mantissa for QF32 +size4s_t negate32(size4s_t in) +{ + size4s_t out; + out = in>>8; + out = ~out; + out = (out<<8) | (in & 0xFF); + return out; +} +//Take one's complement of the mantissa for QF16 +size2s_t negate16(size2s_t in) +{ + size2s_t out; + out = in>>5; + out = ~out; + out = (out<<5) | (in & 0x1F); + return out; +} +//Change sign for SF +size4s_t negate_sf(size4s_t in) +{ + size4s_t out; + int sign; + sign = (in>>31) & 1; + sign = ~sign; + out = (sign<<31) | (in & 0x7FFFFFFF); + return out; +} +//Change sign for SF +size2s_t negate_hf(size2s_t in) +{ + size2s_t out; + int sign; + sign = (in>>15) & 1; + sign = ~sign; + out = (sign<<15) | (in & 0x7FFF); + return out; +} +unfloat parse_qf16(size2s_t in) +{ + unfloat out; + + out.sign = (in>>15) & 0x1; + + out.exp = (size1s_t)(0x00 | (in & 0x1F)); + out.exp = out.exp - BIAS_QF16; + + /*implied LSB=1*/ + size2s_t signif; + /*take signif and sign extend, add LSB=1*/ + signif= ((size4s_t)in >> 4) | 1; + + out.sig = (double)signif * epsilon_hf; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF16_parse]in=%x, exp=%d, sig=%10.20f\n", in,out.exp,out.sig); + printf("[ARCH_QF16_parse]exp_d=%d, sig_d=%10.20f\n", ilogb(out.sig),ldexp(out.sig, -ilogb(out.sig))); +#endif + return out; +} +//Take signed int and generate sign, exp and ***signed sig +unfloat parse_qf32(size4s_t in) +{ + unfloat out; + + out.sign = (in>>31) & 0x1; + + out.exp = (size2s_t)(0x0000 | (in & 0xFF)); + out.exp = out.exp - BIAS_QF32; + + /*implied LSB=1*/ + size4s_t signif; + /*take signif and sign extend, add LSB=1*/ + signif= ((size8s_t)in >> 7) | 1; + + out.sig = (double)signif * epsilon; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_parse]in=%x, exp=%d, sig=%10.20f\n", in,out.exp,out.sig); + printf("[ARCH_QF32_parse]exp_d=%d, sig_d=%10.20f\n", ilogb(out.sig),ldexp(out.sig, -ilogb(out.sig))); +#endif + return out; +} + +unfloat parse_hf(size2s_t in) +{ + unfloat out; + + out.sign = (in>>15) & 0x1; + out.exp = (size1s_t)( (0x00 | (in>>10)) & 0x1F); + + size2u_t sig; + //take signif and sign extend + sig = (size2u_t)(in & 0x3FF); + + /*implied MSB=1*/ + if(out.exp>0) + sig = (1<<10) | sig; + + out.exp = out.exp - BIAS_HF; + if(out.exp<E_MIN_HF) + out.exp = E_MIN_HF; + + //if(in == 0) + // out.exp = E_MIN_QF16; + + out.sig = (double)sig * epsilon_hf; + + //if(out.sign) + // out.sig = (-1.0)*out.sig; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_HF_parse] in=%x, sign=%d, exp=%d, sig=%10.20f\n", in,out.sign,out.exp,out.sig); + printf("[ARCH_HF_parse]exp_d=%d, sig_d=%10.20f\n", ilogb(out.sig),ldexp(out.sig, -ilogb(out.sig))); +#endif + return out; +} +//Take the magnitude and generate ******positive sig +unfloat parse_sf(size4s_t in) +{ + unfloat out; + + out.sign = (in>>31) & 0x1; + out.exp = (size2s_t)( (0x0000 | (in>>23)) & 0xFF); + + size4u_t sig; + //take signif and sign extend + sig = (size4u_t)(in & 0x7FFFFF); + + /*implied MSB=1*/ + if(out.exp>0) + sig = (1<<23) | sig; + + out.exp = out.exp - BIAS_SF; + + if(out.exp<E_MIN_SF) + out.exp = E_MIN_SF; + + //if(in == 0) + // out.exp = E_MIN_QF32; + + out.sig = (double)sig * epsilon; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_SF_parse] in=%x, sign=%d, exp=%d, sig=%x(%10.30f)\n", in,out.sign,out.exp,sig,out.sig); + printf("[ARCH_SF_parse]exp_d=%d, sig_d=%10.20f\n", ilogb(out.sig),ldexp(out.sig, -ilogb(out.sig))); +#endif + return out; +} + + +size4s_t rnd_sat_qf_sig(int* exp_in, double sig, double sig_low, f_type ft); +size4s_t rnd_sat_qf_sig(int* exp_in, double sig, double sig_low, f_type ft) +{ + double scale; + double sig_s; + double sig_f=0.0; + double R1, R2, R3, R_low; + int exp_ovf=0; + int exp_adj=0; + int exp_undf=0; + int exp = *exp_in; + int sign = (sig>=0.0)? 0:1; + +#ifndef DEBUG_MMVEC_QF + UNUSED(R_low); +#endif + + int prod_ovf=0; + if(fabs(sig)>=2.0L && sig != -2.0L) + prod_ovf = 1; + + int E_MIN=E_MIN_QF32; + int E_MAX=E_MAX_QF32; + int BIAS=BIAS_QF32; + double _epsilon=epsilon; + double _units=units; + if(ft==QF32) + { + E_MIN = E_MIN_QF32; + E_MAX = E_MAX_QF32; + BIAS = BIAS_QF32; + _epsilon = epsilon; + _units= units; + } + else if(ft==QF16) + { + E_MIN = E_MIN_QF16; + E_MAX = E_MAX_QF16; + BIAS = BIAS_QF16; + _epsilon = epsilon_hf; + _units= units_hf; + } + else if(ft==SF) + { + E_MIN = E_MIN_SF; + E_MAX = E_MAX_SF; + BIAS = BIAS_SF; + _epsilon = epsilon; + _units= units; + } + else if(ft==HF) + { + E_MIN = E_MIN_HF; + E_MAX = E_MAX_HF; + BIAS = BIAS_HF; + _epsilon = epsilon_hf; + _units= units_hf; + } + + //Set scale factor + if((exp == (E_MIN-1)) || (prod_ovf && (exp<E_MAX))) + scale = 2.0; + else + scale =1.0; + + //Scale the significand + sig_s = sig/scale; + + //Get remainder from the scaled significand + R1 = sig_s*_units; + if(sig_low>0.0) + R_low = 0.25; + else if(sig_low<0.0) + R_low = -0.25; + else + R_low = 0; + + //R2 = floor((R1+R_low)/4.0)*4.0; + //R3 = (R1+R_low) - R2; + R2 = floor(R1/4.0)*4.0; + R3 = R1 - R2; + + //Check for exp overflow/underflow + if(exp>=(E_MAX+1) || (prod_ovf && exp==E_MAX)) + { + exp_ovf=1; + } + else if(exp<=(E_MIN-2)) + { + exp_undf=1; + } + else if(exp == E_MAX)//exp=E_MAX + { + //if(R3-2.0)+sig_low<=0.0 + if((R3==0.0) && (sig_low<0.0)) + { + sig_f = sig_s + (3.0-R3-4.0)*_epsilon; + } + else if((R3<2.0) || (R3==2.0 && sig_low<=0.0)) + //if(R3<=2.0) + { + sig_f = sig_s + (1.0-R3)*_epsilon; + } + else + { + sig_f = sig_s + (3.0-R3)*_epsilon; + } + } + else if(exp == (E_MIN-1)) + { + exp_adj = 1; + if((R3==0.0) && (sig_low<0.0)) + { + sig_f = sig_s + (3.0-R3-4.0)*_epsilon; + } + else if((R3<2.0) || (R3==2.0 && sig_low<=0.0)) + //if(R3<=2.0) + { + sig_f = sig_s + (1.0-R3)*_epsilon; + } + else + { + sig_f = sig_s + (3.0-R3)*_epsilon; + } + } + else if(prod_ovf && (exp < E_MAX)) + { + exp_adj = 1; + if((R3==0.0) && (sig_low<0.0)) + { + sig_f = sig_s + (3.0-R3-4.0)*_epsilon; + } + else if((R3<2.0) || (R3==2.0 && sig_low<=0.0)) + //if(R3<=2.0) + { + sig_f = sig_s + (1.0-R3)*_epsilon; + } + else + { + sig_f = sig_s + (3.0-R3)*_epsilon; + } + } + else if(!prod_ovf) + { + if((R3==0.0) && (sig_low<0.0)) + { + sig_f = sig_s + (3.0-R3-4.0)*_epsilon; + } + else if((R3<1.5) || (R3==1.5 && sig_low<=0.0)) + //if(R3<=1.5) + { + sig_f = sig_s + (1.0-R3)*_epsilon; + } + //else if(R3<=2.5) + else if((R3<2.5) || (R3==2.5 && sig_low<=0.0)) + { + sig_f = (sig + (2.0-R3)*_epsilon)*0.5; + exp_adj=1; + } + else + { + sig_f = sig_s + (3.0-R3)*_epsilon; + } + } + //get the binary bits from the double-precision significand + //Either sig is positive or negative, IEEE double sig has magnitude + //Check for sign at the last stage and take 2's complement if negative + uint64_t sig_64_org, sig_64; + sig_64_org = *(uint64_t *)&sig_f; + sig_64 = sig_64_org; + uint32_t sig_32=0; + int32_t sig_32_out=0; + + int exp_df; + + exp_df = (sig_64_org >> 52) & 0x7FF; + exp_df = exp_df - BIAS_DF; + + if(exp_ovf) + { + exp=E_MAX+BIAS; + if(ft==QF32 || ft==SF) + sig_32 = (sign-1) & 0x7FFFFF; + else if(ft==QF16 || ft==HF) + sig_32 = (sign-1) & 0x3FF; + } + else if(exp_undf) + { + exp=E_MIN+BIAS; + if(ft==QF32 || ft==SF) + sig_32 = ((-1)*sign) & 0x7FFFFF; + else if(ft==QF16 || ft==HF) + sig_32 = ((-1)*sign) & 0x3FF; + } + else + { + exp += BIAS+exp_adj; + //Add MSB, generates 53bits (52+1) + sig_64 = (sig_64_org & 0xFFFFFFFFFFFFF) | 0x10000000000000; + //Shift out exponent 11 bits + sig_64 = sig_64<<11; + sig_64 = (exp_df>=0)? (sig_64 << exp_df):(sig_64>>abs(exp_df)); + if(ft==QF32) + { + sig_64 = sig_64 >> 41; + sig_32 = sig_64 & 0x7FFFFF; + } + else if(ft==QF16) + { + sig_64 = sig_64 >> 54; + sig_32 = sig_64 & 0x3FF; + } + + if(sign) + sig_32 = ~sig_32; + } + + sig_32_out = (sign<<23) | sig_32; + + if(ft==QF16 ||ft==HF) + sig_32_out = (sign<<10) | sig_32; + + + if( (ft ==QF16) || (ft==QF32)) { + if ((sig == 0.0) && (sig_low == 0.0)) { + exp = 0; + //printf("Squash to zero!\n"); + } + + } + + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF_rnd_sat]sign=%d exp_in=%d sig=%10.30f sig_low=%10.30f\n",sign, *exp_in, sig, sig_low); + printf("[ARCH_QF_rnd_sat]sig_s=%10.30f, sig_f=%10.30f\n",sig_s, sig_f); + printf("[ARCH_QF_rnd_sat]prod_ovf=%d exp_adj=%d exp_ovf=%d exp_undf=%d\n",prod_ovf,exp_adj, exp_ovf, exp_undf); + printf("[ARCH_QF_rnd_sat]sig_64_org=%lx sig_64=%lx sig_32=%x exp_df=%d exp=%d\n",sig_64_org, sig_64, sig_32, exp_df, exp); + printf("[ARCH_QF_rnd_sat]R1=%10.30f R_low=%1.128f R2=%10.30f R3=%10.30f eps=%10.30f\n",R1,R_low,R2,R3,_epsilon); + + double final = ldexp(sig_f, (exp-BIAS)); + printf("[ARCH_QF_norm] sig_f:%10.30f, exp-BIAS:%d, ldexp:%10.128f \n",sig_f, exp-BIAS, final); + printf("[ARCH_QF_norm] sig_32_out:%x, exp:%x \n",sig_32_out, exp); +#endif + + *exp_in = exp; + return sig_32_out; +} + +//size4s_t rnd_sat_qf32(int sign, int exp, double sig, double sig_low) +size4s_t rnd_sat_qf32(int exp, double sig, double sig_low) +{ + + //size4u_t sig_32=rnd_sat_qf_sig(sign, &exp, sig, sig_low, QF32); + //size4u_t sig_32=rnd_sat_qf_sig(&exp, sig, sig_low, QF32); + size4s_t sig_32=rnd_sat_qf_sig(&exp, sig, sig_low, QF32); + + size4s_t result; + //result = (sign<<31) | (sig_32 <<8) | (exp & 0xFF); + result = (sig_32 <<8) | (exp & 0xFF); + + return result; +} + + +size4u_t get_ieee_sig(int *exp, double sig, f_type ft); +size4u_t get_ieee_sig(int *exp, double sig, f_type ft) +{ + //Extract bits from double precision significand + uint64_t sig_64_org=0, sig_52=0, sig_53=0; + double value = 0.0; + int exp_d=0, exp_org=*exp; + int E_MIN; + E_MIN = (ft==SF)? E_MIN_SF: E_MIN_HF; + double _epsilon; + _epsilon = (ft==SF)? epsilon: epsilon_hf; + uint32_t sig_32=0; + size4s_t signif=0; + //int sign = (sig>=0.0)? 0:1; + + value = ldexp(sig, exp_org); + + sig_64_org = *(uint64_t *)&value; + exp_d = (sig_64_org >> 52) & 0x7FF; + exp_d = exp_d - BIAS_DF; + sig_52 = (sig_64_org & 0xFFFFFFFFFFFFF); + sig_53 = sig_52 | 0x10000000000000; + + //Check if exp is one less than the MIN + //shifting right the excess amount of bits from E_MIN + int shift = E_MIN - exp_d; + + int lsb =0; + int rem =0; + int sticky =0; + int sig_f =0; +#ifndef DEBUG_MMVEC_QF + UNUSED(lsb); + UNUSED(rem); + UNUSED(sticky); + UNUSED(sig_f); + UNUSED(_epsilon); +#endif + + if(exp_d <= (E_MIN-1)) + { + sig_53 = sig_53 >> shift; + } + + if(shift >=53) + sig_53=0; + + double R1, R2, R3; + if(ft==SF) + { + signif = sig_53 >> 29; + sig_32 = signif & 0x7FFFFF; + + lsb = signif & 1; + rem = (sig_53 >>28) & 1; + sticky = (sig_53 & 0xFFFFFFF)? 1:0; + + R1 = sig_53/pow(2,29); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + + if(fabs(value) >= SF_MAX) + { + //sig_32 = (1-sign)*0x7FFFFF; + sig_32 = 0x7FFFFF; + } + else if((R3>0.5 && R3<1.0) || (R3>=1.5)) + { + if(sig_32 == 0x7FFFFF) + { + sig_32 = 0; + exp_d = exp_d +1; + } + else + sig_32 = sig_32 +1; + } + sig_f = 0x800000 | (sig_32 & 0x7FFFFF); + } + else + { + signif = sig_53 >> 42; + sig_32 = signif & 0x3FF; + + lsb = signif & 1; + rem = (sig_53 >> 41) & 1; + sticky = (sig_53 & 0x1FFFFFFFFFF)? 1:0; + + R1 = sig_53/pow(2,42); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + + //if((rem==1 && sticky==1) || (lsb==1 && rem==1)) + if(fabs(value) >= HF_MAX) + { + //sig_32 = (1-sign)*0x3FF; + sig_32 = 0x3FF; + } + else if((R3>0.5 && R3<1.0) || (R3>=1.5)) + { + if(sig_32 == 0x3FF) + { + sig_32 = 0; + exp_d = exp_d +1; + } + else + sig_32 = sig_32 +1; + } + sig_f = 0x400 | (sig_32 & 0x3FF); + + } + + if(sig ==0.0 && exp_org == (E_MIN-1)) + { + sig_64_org = 0; + exp_d = 0; + sig_32=0; + sig_f =0; + } + *exp = exp_d; + + + +#ifdef DEBUG_MMVEC_QF + int sign = (sig>=0.0)? 0: 1; + double param = (double)sig_f*_epsilon; + if(sign) param = (-1.0)*param; + int exp_f = (exp_d<=E_MIN-1)? E_MIN: exp_d; + double final = ldexp(param, exp_f); + int exp_1 = (value != 0.0)? ilogb(value): 0; + int exp_2 = (exp_1 > E_MIN)? exp_1: E_MIN; + double sig_1 = ldexp(value, exp_1-exp_2); + + printf("[IEEE_sig]exp_1=%d, exp_2=%d, sig_1=%10.20f\n",exp_1,exp_2,sig_1); + printf("[IEEE_sig]exp_org=%d, sig=%10.20f, value=%10.20f, shift=%d\n",exp_org, sig, value, shift); + printf("[IEEE_sig]sign=%d exp_d=%d sig_64_org=%lx sig_52=%lx sig_53=%lx sig_32=%x signif=%x sig_f=%x\n",sign, exp_d, sig_64_org, sig_52, sig_53, sig_32, signif, sig_f); + printf("[IEEE_sig]lsb=%d, rem=%d, sticky=%d\n",lsb, rem, sticky); + printf("[IEEE_sig] param:%10.20f, exp_d:%d, exp_f:%d, ldexp:%10.20f \n",param, exp_d, exp_f, final); + printf("[IEEE_sig]R1=%lf, R2=%lf, R3=%lf\n",R1, R2, R3); +#endif + + return sig_32; +} + +size2s_t rnd_sat_hf_rint(int exp_in, double sig_in); +size2s_t rnd_sat_hf_rint(int exp_in, double sig_in) +{ + // normalize and decompose again limiting to EMIN of target + double val=0.0; + double den=0.0; + double sig=0.0; + double mant=0.0; + int exp=0, exp_d=0, exp_ub=0; + size2s_t result=0; + + val = ldexp(sig_in, exp); // normalize - convert to simple float (double) + exp_d = (val != 0.0)? ilogb(val): 0; + exp_ub = (exp_d> E_MIN_HF)? exp_d: E_MIN_HF; // EMIN=-14 for fp16 + den = ldexp(val, -exp_ub); // denormalized if we hit EMIN + int sign = (sig<0)? 1:0; + sig = fabs(den); + // round to final mantissa + mant = rint(ldexp(sig, FRAC_HF)); // FRAC=10 for fp16; RNE + // post-round exponent adjust + exp = exp_ub + BIAS_HF; // BIAS=15 for fp16 + // -1 for -1.0 (denorm) or +1 for >=2.0 (round up to next exponent) + int exp_mant = (mant != 0.0)? ilogb(mant): 0; + int exp_adj = (exp_mant-FRAC_HF > -1)? (exp_mant - FRAC_HF): -1; + exp = exp - exp_adj; + // overflow + if (exp>E_MAX_HF) { // +16 for fp16 w/o inf/nan + exp = E_MAX_HF; + mant = -1; + } + // final result// better to use a struct for fp16 instead +// result = (mant&((1<<FRAC_HF)-1)) | (exp<<FRAC_HF) | (sign<<15)); + result = (sign<<15)| (exp<<FRAC_HF) | ((int)mant & 0x3FF); + + printf("[RND_SAT_HF]sign=%d, exp_in=%d, exp_d=%d, exp_ub=%d, exp=%d\n",sign, exp_in, exp_d, exp_ub,exp); + printf("[RND_SAT_HF]sig_in=%10.20f, val=%10.20f, den=%10.20f, sig=%10.20f\n",sig_in, val, den, sig); + printf("[RND_SAT_HF]mant=%lf, result=%x\n",mant, result); + + return result; +} + + +size2s_t rnd_sat_hf(int exp, double sig) +{ + + int sign = (sig>=0.0)? 0:1; + //size4u_t sig_32=0;//rnd_sat_ieee_sig(&exp, sig, sig_low, SF); + size4u_t sig_32 = get_ieee_sig(&exp, sig, HF); + + //exp is unbiased + size2s_t result; + if(exp==(E_MIN_HF-1) && sig==0.0) + { + result = 0; + } + else if(exp > E_MAX_HF) + { + result = (sign<<15) | (0x1F << 10) | 0x3FF; + } + //else if((exp < E_MIN_HF-11) ||((exp == E_MIN_HF-11) && (sig_32 ==0))) + //{ + // result = (sign<<15); + //} + else + { + exp = exp + BIAS_HF; + if(exp < 0) + exp = 0; + else if(exp > 31) + exp = 31; + result = (sign<<15) | ((exp & 0x1F) << 10) | sig_32; + } + + + return result; +} + + +//Take signed sig, produce normalized ieee sf output +size4s_t rnd_sat_sf(int exp, double sig) +{ + + int sign = (sig>=0.0)? 0: 1; + size4u_t sig_32 = get_ieee_sig(&exp, sig, SF); + + size4s_t result; + + if(exp==0 && sig==0.0) + { + result = 0; + } + else + { + exp = exp + BIAS_SF; + if(exp < 0) + exp = 0; + else if(exp > 255) + exp = 255; + result = (sign<<31) | ((exp & 0xFF)<< 23) | (sig_32 & 0x7FFFFF); + } + + return result; +} + +//size2s_t rnd_sat_qf16(int sign, int exp_ab, double sig, double sig_low) +size2s_t rnd_sat_qf16(int exp_ab, double sig, double sig_low) +{ + int exp=exp_ab; + + + //size4u_t sig_32=rnd_sat_qf_sig(&exp, sig, sig_low, QF16); + //printf("sig low=%f sig=%f\n", sig, sig_low); + size4s_t sig_32=rnd_sat_qf_sig(&exp, sig, sig_low, QF16); + + size2s_t result; + result = (sig_32<<5) | (exp & 0x1F); + //result = (sign_ab<<15) | (sig_16<<5) | (exp_ab & 0x1F); + + return result; +} + +size4s_t mpy_qf32(size4s_t in_a, size4s_t in_b ) { + size2s_t exp; + double sig; + + unfloat a, b; + + //Get double precision significands and unbiased exp + a = parse_qf32(in_a); + b = parse_qf32(in_b); + + //Unbiased: after removing bias + exp = a.exp + b.exp; + sig = a.sig * b.sig; + + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_pre_rnd] a.sig:%10.20f, b.sig:%10.20f, sig:%10.20f, ilogb(sig):%d, exp:%d\n", a.sig, b.sig, sig, ilogb(sig), exp); +#endif + + size4s_t result; + //result = rnd_sat_qf32(sign, exp_ab, sig_ab, 0.0); + result = rnd_sat_qf32(exp, sig, 0.0); + + return result; +} + +size4s_t mpy_qf32_sf(size4s_t in_a, size4s_t in_b ) { + int sign; + size2s_t exp; + double sig; + unfloat a, b; + + //Get double precision significands and unbiased exp + a = parse_sf(in_a); + b = parse_sf(in_b); + + //Unbiased: after removing bias + sign = a.sign ^ b.sign; + exp = a.exp + b.exp; + sig = a.sig * b.sig; + + size4s_t result; + result = rnd_sat_qf32(exp, sig, 0.0); + if(sign) result = negate32(result); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_SF_parse]sign:%d, a.sig:%10.20f, b.sig:%10.20f, sig:%10.20f exp:%d\n",sign, a.sig, b.sig, sig, exp); +#endif + return result; +} + +size4s_t mpy_qf32_mix_sf(size4s_t in_a, size4s_t in_b ) { + size2s_t exp; + double sig; + unfloat a, b; + + //Get double precision significands and unbiased exp + a = parse_qf32(in_a); + b = parse_sf(in_b); + + //Unbiased: after removing bias + exp = a.exp + b.exp; + sig = a.sig * b.sig; + + size4s_t result; + result = rnd_sat_qf32(exp, sig, 0.0); + if(b.sign) result = negate32(result); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_SF_parse]a.sign:%d, a.sig:%10.20f, b.sign:%d, b.sig:%10.20f, sig:%10.20f exp:%d\n",a.sign, a.sig, b.sign, b.sig, sig, exp); +#endif + return result; +} + +//QF32 output out of two QF16 muls +size8s_t mpy_qf32_qf16(size4s_t in_a, size4s_t in_b ) { + + double sig_0, sig_1; + int exp_0, exp_1; + + unfloat u0,u1,v0,v1; + + u0 = parse_qf16((in_a & 0xFFFF)); + u1 = parse_qf16(((in_a>>16) & 0xFFFF)); + v0 = parse_qf16((in_b & 0xFFFF)); + v1 = parse_qf16(((in_b>>16) & 0xFFFF)); + + //Unbiased: after removing bias + exp_0 = u0.exp + v0.exp; + exp_1 = u1.exp + v1.exp; + sig_0 = u0.sig * v0.sig; + sig_1 = u1.sig * v1.sig; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_QF16_parse]u0.exp:%d, u0.sig:%10.20f, v0.exp:%d, v0.sig:%10.20f, sig_0:%10.20f exp_0:%d\n", u0.exp, u0.sig, v0.exp, v0.sig, sig_0, exp_0); + printf("[ARCH_QF32_QF16_parse]u1.exp:%d, u1.sig:%10.20f, v1.exp:%d, v1.sig:%10.20f, sig_1:%10.20f exp_1:%d\n", u1.exp, u1.sig, v1.exp, v1.sig, sig_1, exp_1); +#endif + + size4s_t result_0, result_1; + size8s_t result; + result_0 = rnd_sat_qf32(exp_0, sig_0, 0.0); + result_1 = rnd_sat_qf32(exp_1, sig_1, 0.0); + + result = ((size8s_t)result_1 <<32) | (result_0 &0xFFFFFFFF); +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_QF16_norm]result_1:%x, result_0:%x, result:%llx\n",result_1, result_0, result); +#endif + + return result; +} + +//QF32 output out of two HF muls +size8s_t mpy_qf32_hf(size4s_t in_a, size4s_t in_b ) { + + double sig_0, sig_1; + int exp_0, exp_1; + + unfloat u0,u1,v0,v1; + + u0 = parse_hf((in_a & 0xFFFF)); + u1 = parse_hf(((in_a>>16) & 0xFFFF)); + v0 = parse_hf((in_b & 0xFFFF)); + v1 = parse_hf(((in_b>>16) & 0xFFFF)); + + //Unbiased: after removing bias + exp_0 = u0.exp + v0.exp; + exp_1 = u1.exp + v1.exp; + sig_0 = u0.sig * v0.sig; + sig_1 = u1.sig * v1.sig; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_HF_parse]u0.exp:%d, u0.sig:%10.20f, v0.exp:%d, v0.sig:%10.20f, sig_0:%10.20f exp_0:%d\n", u0.exp, u0.sig, v0.exp, v0.sig, sig_0, exp_0); + printf("[ARCH_QF32_HF_parse]u1.exp:%d, u1.sig:%10.20f, v1.exp:%d, v1.sig:%10.20f, sig_1:%10.20f exp_1:%d\n", u1.exp, u1.sig, v1.exp, v1.sig, sig_1, exp_1); +#endif + size4s_t result_0, result_1; + size8s_t result; + result_0 = rnd_sat_qf32(exp_0, sig_0, 0.0); + result_1 = rnd_sat_qf32(exp_1, sig_1, 0.0); + + if(u0.sign ^ v0.sign) + result_0 = negate32(result_0); + + if(u1.sign ^ v1.sign) + result_1 = negate32(result_1); + + result = ((size8s_t)result_1 <<32) | (result_0 & 0xFFFFFFFF); +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_HF_norm]result_1:%x, result_0:%x, result:%llx\n",result_1, result_0, result); +#endif + + return result; +} + +//QF32 output out of mix of QF16 and HF muls +size8s_t mpy_qf32_mix_hf(size4s_t in_a, size4s_t in_b ) { + + double sig_0, sig_1; + int exp_0, exp_1; + + unfloat u0,u1,v0,v1; + + u0 = parse_qf16((in_a & 0xFFFF)); + u1 = parse_qf16(((in_a>>16) & 0xFFFF)); + v0 = parse_hf((in_b & 0xFFFF)); + v1 = parse_hf(((in_b>>16) & 0xFFFF)); + + //Unbiased: after removing bias + exp_0 = u0.exp + v0.exp; + exp_1 = u1.exp + v1.exp; + sig_0 = u0.sig * v0.sig; + sig_1 = u1.sig * v1.sig; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_mix_hf_parse]u0.exp:%d, u0.sig:%10.20f, v0.exp:%d, v0.sig:%10.20f, sig_0:%10.20f exp_0:%d\n", u0.exp, u0.sig, v0.exp, v0.sig, sig_0, exp_0); + printf("[ARCH_QF32_mix_hf_parse]u1.exp:%d, u1.sig:%10.20f, v1.exp:%d, v1.sig:%10.20f, sig_1:%10.20f exp_1:%d\n", u1.exp, u1.sig, v1.exp, v1.sig, sig_1, exp_1); +#endif + + size4s_t result_0, result_1; + size8s_t result; + result_0 = rnd_sat_qf32(exp_0, sig_0, 0.0); + result_1 = rnd_sat_qf32(exp_1, sig_1, 0.0); + + if(v0.sign) + result_0 = negate32(result_0); + if(v1.sign) + result_1 = negate32(result_1); + + result = ((size8s_t)result_1 <<32) | (result_0 & 0xFFFFFFFF); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_mix_hf_norm]result_1:%x, result_0:%x, result:%llx\n",result_1, result_0, result); +#endif + + return result; +} + +/* VMPY_QF16 */ +//ITERATOR_INSN_MPY_SLOT(16,vmpy_qf16,"Vd32.qf16=vmpy(Vu32.qf16,Vv32.qf16)", +//"Vector multiply of qf16 format", +size2s_t mpy_qf16(size2s_t in_a, size2s_t in_b ) { + size1s_t exp; + double sig; + + unfloat a, b; + + //Get double precision significands and unbiased exp + a = parse_qf16(in_a); + b = parse_qf16(in_b); + + //Unbiased: after removing bias + exp = a.exp + b.exp; + sig = a.sig * b.sig; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF16_parse] a.exp:%d, a.sig:%10.20f, b.exp:%d, b.sig:%10.20f, sig:%10.20f exp:%d\n", a.exp, a.sig, b.exp, b.sig, sig, exp); +#endif + + size2s_t result; + result = rnd_sat_qf16(exp, sig, 0.0); + + return result; +} + +size2s_t mpy_qf16_hf(size2s_t in_a, size2s_t in_b ) { + int sign; + size2s_t exp; + double sig; + + unfloat a, b; + + //Get double precision significands and unbiased exp + a = parse_hf(in_a); + b = parse_hf(in_b); + + //Unbiased: after removing bias + exp = a.exp + b.exp; + sig = a.sig * b.sig; + sign = a.sign^b.sign; + + size2s_t result; + result = rnd_sat_qf16(exp, sig, 0.0); + if(sign) result = negate16(result); +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_HF_parse]a.exp:%d, a.sig:%10.20f, b.exp:%d, b.sig:%10.20f, sig:%10.20f exp:%d\n",a.exp, a.sig, b.exp, b.sig, sig, exp); +#endif + + return result; +} + +size2s_t mpy_qf16_mix_hf(size2s_t in_a, size2s_t in_b ) { + size2s_t exp; + double sig; + unfloat a, b; + + //Get double precision significands and unbiased exp + a = parse_qf16(in_a); + b = parse_hf(in_b); + + //Unbiased: after removing bias + exp = a.exp + b.exp; + sig = a.sig * b.sig; + + size2s_t result; + result = rnd_sat_qf16(exp, sig, 0.0); + if(b.sign) result = negate16(result); +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_HF_parse]a.exp:%d, a.sig:%10.20f, b.exp:%d, b.sig:%10.20f, sig:%10.20f exp:%d\n",a.exp, a.sig, b.exp, b.sig, sig, exp); +#endif + + return result; +} + +size4s_t add_qf32(size4s_t in_a, size4s_t in_b ) { + size2s_t exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_qf32(in_a); + b = parse_qf32(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig)); + if(exp_ab<b.exp) + exp_ab= b.exp; + } + else{ + exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_SF+1)):ilogb(b.sig)); + if(exp_ab<a.exp) + exp_ab= a.exp; + } + + double sig_ab; + + //Scale sig to the bigger exp + double sig_a, sig_b; + sig_a = ldexp(a.sig, a.exp-exp_ab); + sig_b = ldexp(b.sig, b.exp-exp_ab); + + sig_ab = sig_a + sig_b; + double sig_low; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + //sig_low = (b.sign)? (-1.0*epsilon): epsilon; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_add_qf32] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_add_qf32] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); +#endif + + size4s_t result; + + result = rnd_sat_qf32(exp_ab, sig_ab, sig_low); + + return result; +} + + +size4s_t add_sf(size4s_t in_a, size4s_t in_b ) { + size2s_t exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_sf(in_a); + b = parse_sf(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig)); + if(exp_ab<b.exp) + exp_ab= b.exp; + } + else{ + exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_SF+1)):ilogb(b.sig)); + if(exp_ab<a.exp) + exp_ab= a.exp; + } + //Scale sig to the bigger exp + double sig_a, sig_b; + sig_a = ldexp(a.sig, a.exp-exp_ab); + sig_b = ldexp(b.sig, b.exp-exp_ab); + + double sig_ab; + double sig_low; + if((a.sign ^ b.sign) == 0) + { + sig_ab = sig_a + sig_b; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + } + else if(a.sign==0 && b.sign==1) + { + sig_ab = sig_a - sig_b; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + } + else// if(a.sign==1 && b.sign==0) + { + sig_ab = sig_b - sig_a; + sig_low = (b.exp>a.exp) ? ((sig_b-sig_ab)-sig_a) : (sig_b -(sig_a+sig_ab)); + } + + size4s_t result; + result = rnd_sat_qf32(exp_ab, sig_ab, sig_low); + + if((a.sign==1) && (b.sign== 1)) + result = negate32(result); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_add_sf] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_add_sf] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_b-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_b-sig_ab,sig_low); + printf("[ARCH_add_sf] result:%x \n\n", result); +#endif + + + return result; +} + +size4s_t add_qf32_mix(size4s_t in_a, size4s_t in_b ) { + int exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_qf32(in_a); + b = parse_sf(in_b); + + if(b.sign) b.sig = (-1.0)*b.sig; + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig)); + if(exp_ab<b.exp) + exp_ab= b.exp; + } + else{ + exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_SF+1)):ilogb(b.sig)); + //exp_ab = b.exp+ilogb(b.sig); + if(exp_ab<a.exp) + exp_ab= a.exp; + } + + double sig_ab; + + //Scale sig to the bigger exp + double sig_a, sig_b; + sig_a = ldexp(a.sig, a.exp-exp_ab); + sig_b = ldexp(b.sig, b.exp-exp_ab); + + sig_ab = sig_a + sig_b; + double sig_low; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + //sig_low = (b.sign)? (-1.0*epsilon): epsilon; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_add_qf32_mix] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_add_qf32_mix] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); +#endif + + size4s_t result; + + result = rnd_sat_qf32(exp_ab, sig_ab, sig_low); + + return result; +} + +size4s_t sub_qf32(size4s_t in_a, size4s_t in_b ) { + size2s_t exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_qf32(in_a); + b = parse_qf32(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig)); + if(exp_ab<b.exp) + exp_ab= b.exp; + } + else{ + exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_SF+1)):ilogb(b.sig)); + if(exp_ab<a.exp) + exp_ab= a.exp; + } + + double sig_ab; + + //Scale sig to the bigger exp + double sig_a, sig_b; + sig_a = ldexp(a.sig, a.exp-exp_ab); + sig_b = ldexp(b.sig, b.exp-exp_ab); + + sig_ab = sig_a - sig_b; + double sig_low; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + //sig_low = (b.sign)? (-1.0*epsilon): epsilon; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_sub_qf32] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_sub_qf32] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); + printf("[ARCH_sub_qf32] a:%10.30f, a_adj:%10.30f, fabs(sig_b):%f\n", ldexp(a.sig, a.exp), ldexp(sig_a, exp_ab), fabs(sig_b)); +#endif + + size4s_t result; + + result = rnd_sat_qf32(exp_ab, sig_ab, sig_low); + + return result; +} + +size4s_t sub_sf(size4s_t in_a, size4s_t in_b ) { + size2s_t exp_ab; + unfloat a, b; + + //Get double precision significands + a = parse_sf(in_a); + b = parse_sf(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig)); + if(exp_ab<b.exp) + exp_ab= b.exp; + } + else{ + exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_SF+1)):ilogb(b.sig)); + if(exp_ab<a.exp) + exp_ab= a.exp; + } + //Scale sig to the bigger exp + double sig_a, sig_b; + sig_a = ldexp(a.sig, a.exp-exp_ab); + sig_b = ldexp(b.sig, b.exp-exp_ab); + + double sig_ab; + double sig_low; + if((a.sign==0) && (b.sign==0)) + { + sig_ab = sig_a - sig_b; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + } + else if(a.sign ^ b.sign) + { + sig_ab = sig_a + sig_b; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + } + else// if(a.sign && b.sign) + { + sig_ab = sig_b - sig_a; + sig_low = (b.exp>a.exp) ? ((sig_b-sig_ab)-sig_a) : (sig_b -(sig_a+sig_ab)); + } + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_sub_sf] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_sub_sf] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_b-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_b-sig_ab,sig_low); +#endif + + size4s_t result; + + result = rnd_sat_qf32(exp_ab, sig_ab, sig_low); + + if((a.sign==1) && (b.sign==0)) + result = negate32(result); + + return result; +} + +size4s_t sub_qf32_mix(size4s_t in_a, size4s_t in_b ) { + size2s_t exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_qf32(in_a); + b = parse_sf(in_b); + + if(b.sign) b.sig = (-1.0)*b.sig; + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig)); + if(exp_ab<b.exp) + exp_ab= b.exp; + } + else{ + exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_SF+1)):ilogb(b.sig)); + if(exp_ab<a.exp) + exp_ab= a.exp; + } + + double sig_ab; + + //Scale sig to the bigger exp + double sig_a, sig_b; + sig_a = ldexp(a.sig, a.exp-exp_ab); + sig_b = ldexp(b.sig, b.exp-exp_ab); + + sig_ab = sig_a - sig_b; + double sig_low; + //sig_low = (a.exp>b.exp) ? ((sig_ab-sig_a)-sig_b) : ((sig_ab-sig_b)-sig_a); + //sig_low = (a.exp>b.exp) ? ((sig_ab-sig_a)+sig_b) : (sig_a-(sig_b+sig_ab)); + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_sub_qf32_mix] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_sub_qf32_mix] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); +#endif + + size4s_t result; + + result = rnd_sat_qf32(exp_ab, sig_ab, sig_low); + + return result; +} +//add_qf16 +size2s_t add_qf16(size2s_t in_a, size2s_t in_b ) { + size1s_t exp_ab; + unfloat a, b; + + //Get double precision significands + a = parse_qf16(in_a); + b = parse_qf16(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig)); + if(exp_ab<b.exp) + exp_ab= b.exp; + } + else{ + exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_HF+1)):ilogb(b.sig)); + if(exp_ab<a.exp) + exp_ab= a.exp; + } + + double sig_ab; + + //Scale sig to the bigger exp + double sig_a, sig_b; + sig_a = ldexp(a.sig, a.exp-exp_ab); + sig_b = ldexp(b.sig, b.exp-exp_ab); + + sig_ab = sig_a + sig_b; + double sig_low; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + //sig_low = (b.sign)? (-1.0*epsilon): epsilon; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_add_qf16] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_add_qf16] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); +#endif + + size2s_t result; + + result = rnd_sat_qf16(exp_ab, sig_ab, sig_low); + + return result; +} + +size2s_t add_hf(size2s_t in_a, size2s_t in_b ) { + size1s_t exp_ab; + unfloat a, b; + + //Get double precision significands + a = parse_hf(in_a); + b = parse_hf(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig)); + if(exp_ab<b.exp) + exp_ab= b.exp; + } + else{ + exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_HF+1)):ilogb(b.sig)); + if(exp_ab<a.exp) + exp_ab= a.exp; + } + //Scale sig to the bigger exp + double sig_a, sig_b; + sig_a = ldexp(a.sig, a.exp-exp_ab); + sig_b = ldexp(b.sig, b.exp-exp_ab); + + double sig_ab; + double sig_low; + if((a.sign ^ b.sign) == 0) + { + sig_ab = sig_a + sig_b; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + } + else if(a.sign==0 && b.sign==1) + { + sig_ab = sig_a - sig_b; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + } + else// if(a.sign==1 && b.sign==0) + { + sig_ab = sig_b - sig_a; + sig_low = (b.exp>a.exp) ? ((sig_b-sig_ab)-sig_a) : (sig_b -(sig_a+sig_ab)); + } + + size2s_t result; + + result = rnd_sat_qf16(exp_ab, sig_ab, sig_low); + if((a.sign==1) && (b.sign== 1)) + result = negate16(result); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_add_hf] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_add_hf] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_b-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_b-sig_ab,sig_low); + printf("[ARCH_add_sf] result:%x \n\n", result); +#endif + + + return result; +} + +size2s_t add_qf16_mix(size2s_t in_a, size2s_t in_b ) { + size1s_t exp_ab; + unfloat a, b; + + //Get double precision significands + a = parse_qf16(in_a); + b = parse_hf(in_b); + + if(b.sign) b.sig = (-1.0)*b.sig; + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig)); + if(exp_ab<b.exp) + exp_ab= b.exp; + } + else{ + exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_HF+1)):ilogb(b.sig)); + if(exp_ab<a.exp) + exp_ab= a.exp; + } + + double sig_ab; + + //Scale sig to the bigger exp + double sig_a, sig_b; + sig_a = ldexp(a.sig, a.exp-exp_ab); + sig_b = ldexp(b.sig, b.exp-exp_ab); + + sig_ab = sig_a + sig_b; + double sig_low; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + //sig_low = (b.sign)? (-1.0*epsilon): epsilon; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_add_qf16_mix] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_add_qf16_mix] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); +#endif + + size2s_t result; + + result = rnd_sat_qf16(exp_ab, sig_ab, sig_low); + + return result; +} + +size2s_t sub_qf16(size2s_t in_a, size2s_t in_b ) { + size1s_t exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_qf16(in_a); + b = parse_qf16(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig)); + if(exp_ab<b.exp) + exp_ab= b.exp; + } + else{ + exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_HF+1)):ilogb(b.sig)); + if(exp_ab<a.exp) + exp_ab= a.exp; + } + + double sig_ab; + + //Scale sig to the bigger exp + double sig_a, sig_b; + sig_a = ldexp(a.sig, a.exp-exp_ab); + sig_b = ldexp(b.sig, b.exp-exp_ab); + + sig_ab = sig_a - sig_b; + double sig_low; + //sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + //sig_low = (a.exp>b.exp) ? ((sig_ab-sig_a)+sig_b) : (sig_a-(sig_b+sig_ab)); + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_sub_qf16] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_sub_qf16] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); + printf("[ARCH_sub_qf32] a:%10.30f, a_adj:%10.30f, fabs(sig_b):%f\n", ldexp(a.sig, a.exp), ldexp(sig_a, exp_ab), fabs(sig_b)); +#endif + + size2s_t result; + + result = rnd_sat_qf16(exp_ab, sig_ab, sig_low); + + return result; +} + + +size2s_t sub_hf(size2s_t in_a, size2s_t in_b ) { + size1s_t exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_hf(in_a); + b = parse_hf(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig)); + if(exp_ab<b.exp) + exp_ab= b.exp; + } + else{ + exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_HF+1)):ilogb(b.sig)); + if(exp_ab<a.exp) + exp_ab= a.exp; + } + //Scale sig to the bigger exp + double sig_a, sig_b; + sig_a = ldexp(a.sig, a.exp-exp_ab); + sig_b = ldexp(b.sig, b.exp-exp_ab); + + double sig_ab; + double sig_low; + if((a.sign==0) && (b.sign==0)) + { + sig_ab = sig_a - sig_b; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + } + else if(a.sign ^ b.sign) + { + sig_ab = sig_a + sig_b; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + } + else// if(a.sign && b.sign) + { + sig_ab = sig_b - sig_a; + sig_low = (b.exp>a.exp) ? ((sig_b-sig_ab)-sig_a) : (sig_b -(sig_a+sig_ab)); + } + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_sub_hf] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_sub_hf] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.30f, sig_ab:%1.30f, sig_ab-sig_a:%1.30f, sig_low:%1.30f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_ab-sig_a,sig_low); +#endif + + size2s_t result; + + result = rnd_sat_qf16(exp_ab, sig_ab, sig_low); + if((a.sign==1) && (b.sign==0)) + result = negate16(result); + + return result; +} + +size2s_t sub_qf16_mix(size2s_t in_a, size2s_t in_b ) { + size1s_t exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_qf16(in_a); + b = parse_hf(in_b); + + if(b.sign) b.sig = (-1.0)*b.sig; + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig)); + if(exp_ab<b.exp) + exp_ab= b.exp; + } + else{ + exp_ab = b.exp+((b.sig==0.0)? (-(FRAC_HF+1)):ilogb(b.sig)); + if(exp_ab<a.exp) + exp_ab= a.exp; + } + + double sig_ab; + + //Scale sig to the bigger exp + double sig_a, sig_b; + sig_a = ldexp(a.sig, a.exp-exp_ab); + sig_b = ldexp(b.sig, b.exp-exp_ab); + + sig_ab = sig_a - sig_b; + double sig_low; + //sig_low = (a.exp>b.exp) ? ((sig_ab-sig_a)-sig_b) : ((sig_ab-sig_b)-sig_a); + //sig_low = (a.exp>b.exp) ? ((sig_ab-sig_a)+sig_b) : (sig_a-(sig_b+sig_ab)); + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_sub_qf16_mix] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_sub_qf16_mix] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); +#endif + + size2s_t result; + + result = rnd_sat_qf16(exp_ab, sig_ab, sig_low); + + return result; +} + +//FP conversion QF32 to IEEE SF +size4s_t conv_sf_qf32(size4s_t a) +{ + + size4s_t result; + unfloat u = parse_qf32(a); + + result = rnd_sat_sf(u.exp, u.sig); + +#ifdef DEBUG_MMVEC_QF + double final = ldexp(u.sig, u.exp); + printf("[SF_parse_conv_sf_qf32] u.sig:%lf, u.exp:%d, ldexp:%10.20f \n",u.sig, u.exp, final); +#endif + + return result; +} + +//FP conversion W to IEEE SF +size4s_t conv_sf_w(size4s_t a) +{ + + size4s_t result; + int exp=0; + double sig=0.0; + if(a !=0) + { + exp = ilogb(a); + sig = (double)a/scalbn(1.0, exp); + } + result = rnd_sat_sf(exp, sig); + +#ifdef DEBUG_MMVEC_QF + double final = ldexp(sig, exp); + printf("[SF_parse_conv_sf_w] sig:%lf, exp:%d, ldexp:%10.20f \n",sig, exp, final); +#endif + + return result; +} + +//FP conversion UW to IEEE SF +size4s_t conv_sf_uw(size4u_t a) +{ + + size4s_t result; + int exp=0; + double sig=0.0; + if(a !=0) + { + exp = ilogb(a); + sig = (double)(unsigned)a/scalbn(1.0, exp); + } + result = rnd_sat_sf(exp, sig); + +//#ifdef DEBUG_MMVEC_QF +// double final = ldexp(sig, exp); +// printf("[SF_parse_conv_sf_uw] sig:%lf, exp:%d, ldexp:%10.20f \n",sig, exp, final); +//#endif + + return result; +} + +//FP conversion QF16 to IEEE HF +size2s_t conv_hf_qf16(size2s_t a) +{ + + size2s_t result; + unfloat u = parse_qf16(a); + + result = rnd_sat_hf(u.exp, u.sig); + +//#ifdef DEBUG_MMVEC_QF +// double final = ldexp(u.sig, u.exp); +// printf("[HF_parse_conv_hf_qf16] u.sig:%lf, u.exp:%d, ldexp:%10.20f \n",u.sig, u.exp, final); +//#endif + + return result; +} + +//FP conversion H to IEEE HF +size2s_t conv_hf_h(size2s_t a) +{ + size2s_t result; + int exp=0; + double sig=0.0; + if(a !=0) + { + exp = ilogb(a); + sig = (double)a/scalbn(1.0, exp); + } + result = rnd_sat_hf(exp, sig); + +#ifdef DEBUG_MMVEC_QF + double final = ldexp(sig, exp); + double f_rint = rint(final); + printf("[HF_parse_conv_hf_h] sig:%lf, exp:%d, ldexp:%10.20f, rint:%lf \n",sig, exp, final, f_rint); +#endif + return result; +} + +//FP conversion UH to IEEE HF +size2s_t conv_hf_uh(size2u_t a) +{ + + size2s_t result; + int exp=0; + double sig=0.0; + if(a !=0) + { + exp = ilogb(a); + sig = (double)(unsigned)a/scalbn(1.0, exp); + } + result = rnd_sat_hf(exp, sig); + +//#ifdef DEBUG_MMVEC_QF +// double final = ldexp(sig, exp); +// printf("[SF_parse_conv_hf_uh] sig:%lf, exp:%d, ldexp:%10.20f \n",sig, exp, final); +//#endif + + return result; +} + +//FP conversion two QF32 to two QF16 +size4s_t conv_hf_qf32(size8s_t a) +{ + + size2s_t result0, result1; + size4s_t result; + size4s_t a0, a1; + a0 = a & 0xFFFFFFFF; + a1 = (a>>32) & 0xFFFFFFFF; + + unfloat u0 = parse_qf32(a0); + unfloat u1 = parse_qf32(a1); + + result0 = rnd_sat_hf(u0.exp, u0.sig); + result1 = rnd_sat_hf(u1.exp, u1.sig); + + result = ((size4s_t)result1 << 16) | (result0 & 0xFFFF); + +/* +#ifdef DEBUG_MMVEC_QF + double final0 = ldexp(u0.sig, u0.exp); + double final1 = ldexp(u1.sig, u1.exp); + + printf("[HF_parse_conv_hf_qf32] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0); + printf("[HF_parse_conv_hf_qf32] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1); +#endif +*/ + + return result; +} + +//FP conversion two W to two IEEE HF +size4s_t conv_hf_w(size8s_t a) +{ + size2s_t result0, result1; + size4s_t result; + size4s_t a0, a1; + a0 = a & 0xFFFFFFFF; + a1 = (a>>32) & 0xFFFFFFFF; + + int exp0=0, exp1=0; + double sig0=0.0, sig1=0.0; + if(a0 !=0) + { + exp0 = ilogb(a0); + sig0 = (double)a0/scalbn(1.0, exp0); + } + if(a1 !=0) + { + exp1 = ilogb(a1); + sig1 = (double)a1/scalbn(1.0, exp1); + } + result0 = rnd_sat_hf(exp0, sig0); + result1 = rnd_sat_hf(exp1, sig1); + + result = ((size4s_t)result1 << 16) | (result0 & 0xFFFF); + +/* +#ifdef DEBUG_MMVEC_QF + double final0 = ldexp(sig0, exp0); + double final1 = ldexp(sig1, exp1); + + printf("[HF_parse_conv_hf_w] sig0:%lf, exp0:%d, ldexp0:%10.20f \n",sig0, exp0, final0); + printf("[HF_parse_conv_hf_w] sig1:%lf, exp1:%d, ldexp1:%10.20f \n",sig1, exp1, final1); +#endif +*/ + return result; +} + +//FP conversion two UW to two IEEE HF +size4s_t conv_hf_uw(size8u_t a) +{ + size2s_t result0, result1; + size4s_t result; + size4u_t a0, a1; + a0 = a & 0xFFFFFFFF; + a1 = (a>>32) & 0xFFFFFFFF; + + int exp0=0, exp1=0; + double sig0=0.0, sig1=0.0; + if(a0 !=0) + { + exp0 = ilogb(a0); + sig0 = (double)(unsigned)a0/scalbn(1.0, exp0); + } + if(a1 !=0) + { + exp1 = ilogb(a1); + sig1 = (double)(unsigned)a1/scalbn(1.0, exp1); + } + result0 = rnd_sat_hf(exp0, sig0); + result1 = rnd_sat_hf(exp1, sig1); + + result = ((size4s_t)result1 << 16) | (result0 & 0xFFFF); +/* +#ifdef DEBUG_MMVEC_QF + double final0 = ldexp(sig0, exp0); + double final1 = ldexp(sig1, exp1); + + printf("[HF_parse_conv_hf_uw] sig0:%lf, exp0:%d, ldexp0:%10.20f \n",sig0, exp0, final0); + printf("[HF_parse_conv_hf_uw] sig1:%lf, exp1:%d, ldexp1:%10.20f \n",sig1, exp1, final1); +#endif +*/ + return result; +} + +size4s_t rnd_sat_w(int exp, double sig) +{ + size4s_t result=0; + size4s_t W_MAX = 0x7fffffff; + size4s_t W_MIN = 0x80000000; + + int sign = (sig>=0.0)? 0: 1; + + double R1=0.0; + double R2=0.0; + double R3=0.0; + if(exp > 30) + { + result = (sign)? W_MIN:W_MAX; + result = (sign <<31) | result; + } + else + { + R1 = ldexp(sig, exp); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + if(sign==0) + { + if(R3<=0.5) + result = (size4s_t) R1; + else if(R3>0.5 && R3<1.5) + result = (size4s_t) round(R1); + else if(R3>=1.5) + result = (size4s_t) R1+1; + } + else + result = (size4s_t)round(R1); + } + +#ifdef DEBUG_MMVEC_QF + printf("[RND_conv_w_qf32] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result); +#endif + + return result; +} + +size4u_t rnd_sat_uw(int exp, double sig) +{ + size4u_t result=0; + size4u_t W_MAX = 0xffffffff; + + double R1=0.0; + double R2=0.0; + double R3=0.0; + if(sig<0.0) + result = 0; + else if(exp > 31) + { + result = W_MAX; + } + else + { + R1 = ldexp(sig, exp); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + if(R3<=0.5) + result = (size4s_t) R1; + else if(R3>0.5 && R3<1.5) + result = (size4s_t) round(R1); + else if(R3>=1.5) + result = (size4s_t) R1+1; + } + +#ifdef DEBUG_MMVEC_QF + printf("[RND_conv_uw_qf32] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result); +#endif + + return result; +} + +size2s_t rnd_sat_h(int exp, double sig) +{ + size2s_t result=0; + size2s_t W_MAX = 0x7fff; + size2s_t W_MIN = 0x8000; + + int sign = (sig>=0.0)? 0: 1; + + double R1=0.0; + double R2=0.0; + double R3=0.0; + if(exp > 14) + { + result = (sign)? W_MIN:W_MAX; + result = (sign <<15) | result; + } + else + { + R1 = ldexp(sig, exp); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + if(sign==0) + { + if(R3<=0.5) + result = (size2s_t) R1; + else if(R3>0.5 && R3<1.5) + result = (size2s_t) round(R1); + else if(R3>=1.5) + result = (size2s_t) R1+1; + } + else + { + if(R3<=0.5 && R3 !=0.0) + result = (size2s_t)R1 -1; + else if(R3>0.5 && R3<1.5) + result = (size2s_t)round(R1); + else// if(R3>=1.5) + result = (size2s_t)R1; + } + } + +#ifdef DEBUG_MMVEC_QF + printf("[RND_conv_h_qf16] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result); +#endif + + return result; +} + +size2u_t rnd_sat_uh(int exp, double sig) +{ + size2u_t result=0; + size2u_t W_MAX = 0xffff; + + double R1=0.0; + double R2=0.0; + double R3=0.0; + if(sig<0.0) + result = 0; + else if(exp > 15) + { + result = W_MAX; + } + else + { + R1 = ldexp(sig, exp); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + if(R3<=0.5) + result = (size2s_t) R1; + else if(R3>0.5 && R3<1.5) + result = (size2s_t) round(R1); + else if(R3>=1.5) + result = (size2s_t) R1+1; + } + +#ifdef DEBUG_MMVEC_QF + printf("[RND_conv_uh_qf16] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result); +#endif + + return result; +} + +size1s_t rnd_sat_b(int exp, double sig) +{ + size1s_t result=0; + size1s_t W_MAX = 0x7f; + size1s_t W_MIN = 0x80; + + int sign = (sig>=0.0)? 0: 1; + + double R1=0.0; + double R2=0.0; + double R3=0.0; + if(exp > 6) + { + result = (sign)? W_MIN:W_MAX; + result = (sign <<7) | result; + } + else + { + R1 = ldexp(sig, exp); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + if(sign==0) + { + if(R3<=0.5) + result = (size1s_t) R1; + else if(R3>0.5 && R3<1.5) + result = (size1s_t) round(R1); + else if(R3>=1.5) + result = (size1s_t) R1+1; + } + else + { + if(R3<=0.5 && R3 !=0.0) + result = (size1s_t)R1 -1; + else if(R3>0.5 && R3<1.5) + result = (size1s_t)round(R1); + else// if(R3>=1.5) + result = (size1s_t)R1; + } + } + +#ifdef DEBUG_MMVEC_QF + printf("[RND_conv_b_qf16] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result); +#endif + + return result; +} + +size1u_t rnd_sat_ub(int exp, double sig) +{ + size1u_t result=0; + size1u_t W_MAX = 0xff; + + double R1=0.0; + double R2=0.0; + double R3=0.0; + if(sig<0.0) + result = 0; + else if(exp > 7) + { + result = W_MAX; + } + else + { + R1 = ldexp(sig, exp); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + + if(R3<=0.5) + result = (size1s_t) R1; + else if(R3>0.5 && R3<1.5) + result = (size1s_t) round(R1); + else if(R3>=1.5) + result = (size1s_t) R1+1; + } + +#ifdef DEBUG_MMVEC_QF + printf("[RND_conv_ub_qf16] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result); +#endif + + return result; +} + +//FP conversion QF32 to 32bit W +size4s_t conv_w_qf32(size4s_t a) +{ + + size4s_t result; + unfloat u = parse_qf32(a); + + result = rnd_sat_w(u.exp, u.sig); + + return result; +} + +size4s_t conv_w_sf(size4s_t op1) +{ + sf_union input; + size4s_t W_MAX = 0x7fffffff; + size4s_t W_MIN = 0x80000000; + input.i = op1; + size4s_t result; + + if(isNaNF32(op1) || isInfF32(op1) || (input.f >= (float)W_MAX) || (input.f <= (float)W_MIN)) + { + if(input.x.sign == 1){ + result = W_MIN; + } + else{ + result = W_MAX; + } + } + else{ + //convert and round to the zero + result = (int)input.f; + } + +#ifdef DEBUG_MMVEC_QF + printf("Debug : result =0x%08x\n",result); +#endif + return result; +} + +size2s_t conv_h_hf(size2s_t op1) +{ + sf_union input; + size4s_t op1_ext = op1; + size2s_t HW_MAX = 0x7fff; + size2s_t HW_MIN = 0x8000; + input.i = ((op1_ext & 0x8000) << 16) + (((op1_ext & 0x7c00) + 0x1c000) << 13) + ((op1_ext & 0x03ff) << 13); //grabbing sign, exp, and significand and ocnverting to sf32 format + size2s_t result; + + if(isNaNF16(op1) || isInfF16(op1) || (input.f >= (float)HW_MAX) || (input.f <= (float)HW_MIN)) + { + if(input.x.sign == 1){ + result = HW_MIN; + } + else{ + result = HW_MAX; + } + } + else{ + //convert and round to the zero + result = (short)input.f; + } + +#ifdef DEBUG_MMVEC_QF + printf("Debug : result =0x%08x\n",result); +#endif + return result; +} + +//FP conversion QF32 to 32bit UW +size4u_t conv_uw_qf32(size4s_t a) +{ + + size4u_t result; + unfloat u = parse_qf32(a); + + result = rnd_sat_uw(u.exp, u.sig); + + return result; +} + +//FP conversion QF16 to 16bit H +size2s_t conv_h_qf16(size2s_t a) +{ + + size2s_t result; + unfloat u = parse_qf16(a); + + result = rnd_sat_h(u.exp, u.sig); + + return result; +} + +//FP conversion QF32 to 32bit UW +size2u_t conv_uh_qf16(size2s_t a) +{ + + size2u_t result; + unfloat u = parse_qf16(a); + + result = rnd_sat_uh(u.exp, u.sig); + + return result; +} + +//FP conversion double QF32 to double H +size4s_t conv_h_qf32(size8s_t a) +{ + size2s_t result0, result1; + size4s_t result; + size4s_t a0, a1; + a0 = a & 0xFFFFFFFF; + a1 = (a>>32) & 0xFFFFFFFF; + + unfloat u0 = parse_qf32(a0); + unfloat u1 = parse_qf32(a1); + + result0 = rnd_sat_h(u0.exp, u0.sig); + result1 = rnd_sat_h(u1.exp, u1.sig); + + result = ((size4s_t)result1 << 16) | (result0 & 0xFFFF); + +#ifdef DEBUG_MMVEC_QF + double final0 = ldexp(u0.sig, u0.exp); + double final1 = ldexp(u1.sig, u1.exp); + + printf("[H_parse_conv_h_qf32] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0); + printf("[H_parse_conv_h_qf32] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1); +#endif + + return result; +} + +//FP conversion QF32 to 32bit UW +size4u_t conv_uh_qf32(size8s_t a) +{ + size2u_t result0, result1; + size4u_t result; + size4s_t a0, a1; + a0 = a & 0xFFFFFFFF; + a1 = (a>>32) & 0xFFFFFFFF; + + unfloat u0 = parse_qf32(a0); + unfloat u1 = parse_qf32(a1); + + result0 = rnd_sat_uh(u0.exp, u0.sig); + result1 = rnd_sat_uh(u1.exp, u1.sig); + + result = ((size4u_t)result1 << 16) | (result0 & 0xFFFF); + +#ifdef DEBUG_MMVEC_QF + double final0 = ldexp(u0.sig, u0.exp); + double final1 = ldexp(u1.sig, u1.exp); + + printf("[UH_parse_conv_uh_qf32] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0); + printf("[UH_parse_conv_uh_qf32] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1); +#endif + + return result; +} + +//FP conversion double QF16 to double B +size2s_t conv_b_qf16(size4s_t a) +{ + size1s_t result0, result1; + size2s_t result; + size2s_t a0, a1; + a0 = a & 0xFFFF; + a1 = (a>>16) & 0xFFFF; + + unfloat u0 = parse_qf16(a0); + unfloat u1 = parse_qf16(a1); + + result0 = rnd_sat_b(u0.exp, u0.sig); + result1 = rnd_sat_b(u1.exp, u1.sig); + + result = ((size2s_t)result1 << 8) | (result0 & 0xFF); + +#ifdef DEBUG_MMVEC_QF + double final0 = ldexp(u0.sig, u0.exp); + double final1 = ldexp(u1.sig, u1.exp); + + printf("[B_parse_conv_b_qf16] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0); + printf("[B_parse_conv_b_qf16] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1); +#endif + + return result; +} + +//FP conversion QF32 to 32bit UW +size2u_t conv_ub_qf16(size4s_t a) +{ + size1u_t result0, result1; + size2u_t result; + size2s_t a0, a1; + a0 = a & 0xFFFF; + a1 = (a>>16) & 0xFFFF; + + unfloat u0 = parse_qf16(a0); + unfloat u1 = parse_qf16(a1); + + result0 = rnd_sat_ub(u0.exp, u0.sig); + result1 = rnd_sat_ub(u1.exp, u1.sig); + + result = ((size2u_t)result1 << 8) | (result0 & 0xFF); + +#ifdef DEBUG_MMVEC_QF + double final0 = ldexp(u0.sig, u0.exp); + double final1 = ldexp(u1.sig, u1.exp); + + printf("[UB_parse_conv_ub_qf16] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0); + printf("[UB_parse_conv_ub_qf16] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1); +#endif + + return result; +} + +//Neg/Abs +size4s_t neg_qf32(size4s_t a) +{ + size4s_t result; + result = negate32(a); + return result; +} +size4s_t abs_qf32(size4s_t a) +{ + size4s_t result; + if((a>>31) & 1) + result = negate32(a); + else + result = a; + return result; +} +size2s_t neg_qf16(size2s_t a) +{ + size2s_t result; + result = negate16(a); + return result; +} +size2s_t abs_qf16(size2s_t a) +{ + size2s_t result; + if((a>>15) & 1) + result = negate16(a); + else + result = a; + return result; +} +size4s_t neg_sf(size4s_t a) +{ + size4s_t result; + result = negate_sf(a); + return result; +} +size4s_t abs_sf(size4s_t a) +{ + size4s_t result; + if((a>>31) & 1) + result = negate_sf(a); + else + result = a; + return result; +} +size2s_t neg_hf(size2s_t a) +{ + size2s_t result; + result = negate_hf(a); + return result; +} +size2s_t abs_hf(size2s_t a) +{ + size2s_t result; + if((a>>15) & 1) + result = negate_hf(a); + else + result = a; + return result; +} + +//FP Compare +int cmpgt_fp(unfloat a, unfloat b) +{ + int result=0; + double a_d, b_d; + a_d = ldexp(a.sig, a.exp); + b_d = ldexp(b.sig, b.exp); + + //Filter out +0/-0 by checking the sign + if(a_d > b_d) + result=1; + +#ifdef DEBUG_MMVEC_QF + printf("[CMPGT]a:%10.30f, b:%10.30f\n",a_d, b_d); +#endif + + return result; +} + +int cmpgt_qf32(size4s_t in_a, size4s_t in_b) +{ + unfloat a, b; + a= parse_qf32(in_a); + b= parse_qf32(in_b); + + int result=0; + + result = cmpgt_fp(a,b); + + return result; +} + +int cmpgt_qf16(size2s_t in_a, size2s_t in_b) +{ + + unfloat a, b; + a= parse_qf16(in_a); + b= parse_qf16(in_b); + + int result=0; + result = cmpgt_fp(a,b); + + return result; +} + +int cmpgt_sf(size4s_t in_a, size4s_t in_b) +{ + + unfloat a, b; + a= parse_sf(in_a); + b= parse_sf(in_b); + + if(a.sign) + a.sig = (-1.0)*a.sig; + if(b.sign) + b.sig = (-1.0)*b.sig; + + int result=0; + result = cmpgt_fp(a,b); + + return result; +} + +int cmpgt_hf(size2s_t in_a, size2s_t in_b) +{ + + unfloat a, b; + a= parse_hf(in_a); + b= parse_hf(in_b); + + if(a.sign) + a.sig = (-1.0)*a.sig; + if(b.sign) + b.sig = (-1.0)*b.sig; + + int result=0; + result = cmpgt_fp(a,b); + + return result; +} + +int cmpgt_qf32_sf(size4s_t in_a, size4s_t in_b) +{ + unfloat a = parse_qf32(in_a); + unfloat b = parse_sf(in_b); + if(b.sign) + b.sig = (-1.0)*b.sig; + + int result=0; + result = cmpgt_fp(a,b); + + return result; +} + +int cmpgt_qf16_hf(size2s_t in_a, size2s_t in_b) +{ + unfloat a = parse_qf16(in_a); + unfloat b = parse_hf(in_b); + if(b.sign) + b.sig = (-1.0)*b.sig; + + int result=0; + result = cmpgt_fp(a,b); + return result; +} +//max/min + //if a==b, a is returned +size4s_t max_qf32( size4s_t in_a, size4s_t in_b) { return cmpgt_qf32( in_b, in_a) ? in_b : in_a; } +size2s_t max_qf16( size2s_t in_a, size2s_t in_b) { return cmpgt_qf16( in_b, in_a) ? in_b : in_a; } + + + +size4s_t is_check_zero_sf(size4s_t in_a); +size4s_t is_check_zero_sf(size4s_t in_a) { + return (in_a == 0) || ((in_a & 0xFFFFFFFF) == 0x80000000); +} +size2s_t is_check_zero_hf(size2s_t in_a); +size2s_t is_check_zero_hf(size2s_t in_a) { + return (in_a == 0) || ((in_a & 0xFFFF) == 0x8000); +} + +size4s_t max_sf( size4s_t in_a, size4s_t in_b) { + if (is_check_zero_sf(in_a) && is_check_zero_sf(in_b) ) { + return (in_a == 0) ? in_a : in_b; // Return in_a if it's positive 0, otherwise return the other one + } + return cmpgt_sf( in_b, in_a) ? in_b : in_a; + +} +size2s_t max_hf( size2s_t in_a, size2s_t in_b) +{ + if (is_check_zero_hf(in_a) && is_check_zero_hf(in_b) ) { + return (in_a == 0) ? in_a : in_b; + } + return cmpgt_hf( in_b, in_a) ? in_b : in_a; +} + + +//size2s_t max_qf16_hf( size2s_t in_a, size2s_t in_b) { return cmpgt_qf16_hf( in_b, in_a) ? in_b : in_a; } +//size4s_t max_qf32_sf( size4s_t in_a, size4s_t in_b) { return cmpgt_qf32_sf( in_b, in_a) ? in_b : in_a; } + +size4s_t min_qf32( size4s_t in_a, size4s_t in_b) { return cmpgt_qf32( in_a, in_b) ? in_b : in_a; } +size2s_t min_qf16( size2s_t in_a, size2s_t in_b) { return cmpgt_qf16( in_a, in_b) ? in_b : in_a; } + +size4s_t min_sf( size4s_t in_a, size4s_t in_b) { + if (is_check_zero_sf(in_a) && is_check_zero_sf(in_b) ) { + return (in_a == 0) ? in_b : in_a; + } + return cmpgt_sf( in_a, in_b) ? in_b : in_a; +} +size2s_t min_hf( size2s_t in_a, size2s_t in_b) { + if (is_check_zero_hf(in_a) && is_check_zero_hf(in_b) ) { + return (in_a == 0) ? in_b : in_a; + } + return cmpgt_hf( in_a, in_b) ? in_b : in_a; +} +//size2s_t min_qf16_hf( size2s_t in_a, size2s_t in_b) { return cmpgt_qf16_hf( in_a, in_b) ? in_b : in_a; } +//size4s_t min_qf32_sf( size4s_t in_a, size4s_t in_b) { return cmpgt_qf32_sf( in_a, in_b) ? in_b : in_a; } + + +size4s_t max_qf32_sf(size4s_t in_a, size4s_t in_b) +{ + size4s_t result=0; + unfloat a,b; + a= parse_qf32(in_a); + b= parse_sf(in_b); + if(b.sign) + b.sig = (-1)*b.sig; + + double a_d, b_d; + a_d = ldexp(a.sig, a.exp); + b_d = ldexp(b.sig, b.exp); + + if(a_d >= b_d) + result = in_a; + else + result = in_b; + +#ifdef DEBUG_MMVEC_QF + printf("[max_qf32_sf]a:%10.30f, b:%10.30f\n",a_d, b_d); +#endif + + return result; +} +size4s_t min_qf32_sf(size4s_t in_a, size4s_t in_b) +{ + size4s_t result=0; + unfloat a,b; + a= parse_qf32(in_a); + b= parse_sf(in_b); + if(b.sign) + b.sig = (-1)*b.sig; + double a_d, b_d; + a_d = ldexp(a.sig, a.exp); + b_d = ldexp(b.sig, b.exp); + if(a_d <= b_d) + result = in_a; + else + result = in_b; +#ifdef DEBUG_MMVEC_QF + printf("[min_qf32_sf]a:%10.30f, b:%10.30f\n",a_d, b_d); +#endif + return result; +} + +size2s_t max_qf16_hf(size2s_t in_a, size2s_t in_b) +{ + size2s_t result=0; + unfloat a,b; + a= parse_qf16(in_a); + b= parse_hf(in_b); + if(b.sign) + b.sig = (-1)*b.sig; + double a_d, b_d; + a_d = ldexp(a.sig, a.exp); + b_d = ldexp(b.sig, b.exp); + if(a_d >= b_d) + result = in_a; + else + result = in_b; +#ifdef DEBUG_MMVEC_QF + printf("[max_qf16_hf]a:%10.30f, b:%10.30f\n",a_d, b_d); +#endif + return result; +} +size2s_t min_qf16_hf(size2s_t in_a, size2s_t in_b) +{ + size2s_t result=0; + unfloat a,b; + a= parse_qf16(in_a); + b= parse_hf(in_b); + if(b.sign) + b.sig = (-1)*b.sig; + double a_d, b_d; + a_d = ldexp(a.sig, a.exp); + b_d = ldexp(b.sig, b.exp); + if(a_d <= b_d) + result = in_a; + else + result = in_b; +#ifdef DEBUG_MMVEC_QF + printf("[min_qf16_hf]a:%10.30f, b:%10.30f\n",a_d, b_d); +#endif + return result; +} diff --git a/target/hexagon/mmvec/mmvec_qfloat.h b/target/hexagon/mmvec/mmvec_qfloat.h new file mode 100644 index 0000000000000..dc15cd17408b0 --- /dev/null +++ b/target/hexagon/mmvec/mmvec_qfloat.h @@ -0,0 +1,199 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MMVEC_QFLOAT_H +#define MMVEC_QFLOAT_H 1 + +#define HF_MAX 131008 //pow(2,17)-pow(2,6) =(2-1.0/pow(2,10))*pow(2,16) +#define HF_MIN 1.0/pow(2,24) +#define SF_MAX pow(2,129)-pow(2,105) //(2-1.0/pow(2,23))*pow(2,128) +#define SF_MIN 1.0/pow(2,149) + +#define E_MAX_QF32 128 +#define E_MIN_QF32 -127 +#define E_MAX_QF16 16 +#define E_MIN_QF16 -15 +#define E_MAX_SF 128 +#define E_MIN_SF -126 +#define E_MAX_HF 16 +#define E_MIN_HF -14 +#define BIAS_QF32 127 +#define BIAS_QF16 15 +#define BIAS_DF 1023 +#define BIAS_SF 127 +#define BIAS_HF 15 +#define FRAC_HF 10 +#define FRAC_SF 23 +#define isNaNF32( a ) (((~(a) & 0x7F800000) == 0) && ((a) & 0x007FFFFF)) +#define isInfF32( a ) (((~(a) & 0x7F800000) == 0) && (((a) & 0x007FFFFF) == 0)) +#define isNaNF16( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF)) +#define isInfF16( a ) (((~(a) & 0x7C00) == 0) && (((a) & 0x03FF) == 0)) + +//#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) +//#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y)) + +#include "cpu.h" +#include "hex_arch_types.h" + +#define epsilon 1.0/pow(2,23) +#define units 1.0*pow(2,23) +#define epsilon_hf 1.0/pow(2,10) +#define units_hf 1.0*pow(2,10) + +typedef struct{ + int sign; + int exp; + double sig; +} unfloat; //Un-Normalized Float + +typedef struct{ + int sign; + int sig; + int exp; +} qf_t; + +typedef struct{ + int32_t sig : 24; + uint32_t exp : 8; +} qf32_t; + +typedef struct{ + int32_t sig : 11; + uint32_t exp : 5; +} qf16_t; + +typedef enum float_type{ + QF32, + QF16, + SF, + HF +} f_type; + +typedef union { + float f; + size4u_t i; + struct { + size4u_t mant:23; + size4u_t exp:8; + size4u_t sign:1; + } x; +} sf_union; + +//MPY +size4s_t mpy_qf32(size4s_t a, size4s_t b); +size4s_t mpy_qf32_sf(size4s_t a, size4s_t b); +size4s_t mpy_qf32_mix_sf(size4s_t a, size4s_t b); +size2s_t mpy_qf16(size2s_t a, size2s_t b); +size2s_t mpy_qf16_hf(size2s_t a, size2s_t b); +size2s_t mpy_qf16_mix_hf(size2s_t a, size2s_t b); +size8s_t mpy_qf32_qf16(size4s_t a, size4s_t b); +size8s_t mpy_qf32_hf(size4s_t a, size4s_t b); +size8s_t mpy_qf32_mix_hf(size4s_t a, size4s_t b); + +unfloat parse_qf32(size4s_t a); +unfloat parse_qf16(size2s_t a); +unfloat parse_sf(size4s_t a); +unfloat parse_hf(size2s_t a); +size4s_t rnd_sat_qf32(int exp, double sig, double sig_low); +size2s_t rnd_sat_qf16(int exp, double sig, double sig_low); +size4s_t rnd_sat_sf(int exp, double sig); +size2s_t rnd_sat_hf(int exp, double sig); +size4s_t rnd_sat_w(int exp, double sig); +size4u_t rnd_sat_uw(int exp, double sig); +size2s_t rnd_sat_h(int exp, double sig); +size2u_t rnd_sat_uh(int exp, double sig); +size1s_t rnd_sat_b(int exp, double sig); +size1u_t rnd_sat_ub(int exp, double sig); +size4s_t negate32(size4s_t); +size2s_t negate16(size2s_t); +size4s_t negate_sf(size4s_t); +size2s_t negate_hf(size2s_t); + +//ADD +size4s_t add_qf32(size4s_t a, size4s_t b); +size4s_t add_sf(size4s_t a, size4s_t b); +size4s_t add_qf32_mix(size4s_t a, size4s_t b); +size2s_t add_qf16(size2s_t a, size2s_t b); +size2s_t add_hf(size2s_t a, size2s_t b); +size2s_t add_qf16_mix(size2s_t a, size2s_t b); + +//SUB +size4s_t sub_qf32(size4s_t a, size4s_t b); +size4s_t sub_sf(size4s_t a, size4s_t b); +size4s_t sub_qf32_mix(size4s_t a, size4s_t b); +size2s_t sub_qf16(size2s_t a, size2s_t b); +size2s_t sub_hf(size2s_t a, size2s_t b); +size2s_t sub_qf16_mix(size2s_t a, size2s_t b); + +//Convert +size4s_t conv_sf_qf32(size4s_t a); +size4s_t conv_sf_w(size4s_t a); +size4s_t conv_sf_uw(size4u_t a); +size2s_t conv_hf_qf16(size2s_t a); +size2s_t conv_hf_h(size2s_t a); +size2s_t conv_hf_uh(size2u_t a); +size4s_t conv_hf_qf32(size8s_t a); +size4s_t conv_hf_w(size8s_t a); +size4s_t conv_hf_uw(size8u_t a); + +size4s_t conv_w_qf32(size4s_t a); +size4u_t conv_uw_qf32(size4s_t a); +size2s_t conv_h_qf16(size2s_t a); +size2u_t conv_uh_qf16(size2s_t a); +size4s_t conv_h_qf32(size8s_t a); +size4u_t conv_uh_qf32(size8s_t a); +size2s_t conv_b_qf16(size4s_t a); +size2u_t conv_ub_qf16(size4s_t a); + +size4s_t conv_w_sf(size4s_t a); +// size4u_t conv_uw_sf(size4s_t a); +size2s_t conv_h_hf(size2s_t a); +// size2u_t conv_uh_sf(size2s_t a); + +//Neg/Abs +size4s_t neg_qf32(size4s_t a); +size4s_t abs_qf32(size4s_t a); +size2s_t neg_qf16(size2s_t a); +size2s_t abs_qf16(size2s_t a); +size4s_t neg_sf(size4s_t a); +size4s_t abs_sf(size4s_t a); +size2s_t neg_hf(size2s_t a); +size2s_t abs_hf(size2s_t a); + +//Compare +int cmpgt_fp(unfloat a, unfloat b); +int cmpgt_qf32(size4s_t a, size4s_t b); +int cmpgt_qf16(size2s_t a, size2s_t b); +int cmpgt_sf(size4s_t a, size4s_t b); +int cmpgt_hf(size2s_t a, size2s_t b); +int cmpgt_qf32_sf(size4s_t a, size4s_t b); +int cmpgt_qf16_hf(size2s_t a, size2s_t b); + +//max/min +size4s_t max_qf32(size4s_t a, size4s_t b); +size4s_t min_qf32(size4s_t a, size4s_t b); +size4s_t max_qf32_sf(size4s_t a, size4s_t b); +size4s_t min_qf32_sf(size4s_t a, size4s_t b); +size4s_t max_sf(size4s_t a, size4s_t b); +size4s_t min_sf(size4s_t a, size4s_t b); +size2s_t max_qf16(size2s_t a, size2s_t b); +size2s_t min_qf16(size2s_t a, size2s_t b); +size2s_t max_qf16_hf(size2s_t a, size2s_t b); +size2s_t min_qf16_hf(size2s_t a, size2s_t b); +size2s_t max_hf(size2s_t a, size2s_t b); +size2s_t min_hf(size2s_t a, size2s_t b); +#endif diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 7a83e8975031b..e6f11fd5f9905 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -32,6 +32,7 @@ #include "fma_emu.h" #include "mmvec/mmvec.h" #include "mmvec/macros.h" +#include "mmvec/mmvec_qfloat.h" #include "op_helper.h" #include "cpu_helper.h" #include "translate.h" @@ -2012,4 +2013,5 @@ uint64_t HELPER(creg_read_pair)(CPUHexagonState *env, uint32_t reg) #define BOGUS_HELPER(tag) \ printf("ERROR: bogus helper: " #tag "\n") +#include "mmvec/kvx_ieee.h" #include "helper_funcs_generated.c.inc" From 60e059dd5948d78dbaea75c4d63ad8c66880ed33 Mon Sep 17 00:00:00 2001 From: Marco Liebel <mliebel@quicinc.com> Date: Thu, 27 Feb 2025 12:00:02 -0800 Subject: [PATCH 119/126] target/hexagon: Add macro imports The number of parameters for `DEF_MACRO` changed and needed to be updated too. Signed-off-by: Marco Liebel <mliebel@quicinc.com> --- target/hexagon/gen_semantics.c | 2 +- target/hexagon/imported/mmvec/macros.def | 955 ++++++++++++++++++++--- target/hexagon/mmvec/macros.h | 13 + 3 files changed, 844 insertions(+), 126 deletions(-) diff --git a/target/hexagon/gen_semantics.c b/target/hexagon/gen_semantics.c index 4a2bdd70e9cc7..ed66ae4ec2417 100644 --- a/target/hexagon/gen_semantics.c +++ b/target/hexagon/gen_semantics.c @@ -106,7 +106,7 @@ int main(int argc, char *argv[]) /* * Process the macros for HVX */ -#define DEF_MACRO(MNAME, BEH, ATTRS) \ +#define DEF_MACRO(MNAME, PARAMS, SDESC, LDESC, BEH, ATTRS) \ fprintf(outfile, "MACROATTRIB( \\\n" \ " \"%s\", \\\n" \ " \"\"\"%s\"\"\", \\\n" \ diff --git a/target/hexagon/imported/mmvec/macros.def b/target/hexagon/imported/mmvec/macros.def index 7e5438a998021..e9524aa56d1e1 100755 --- a/target/hexagon/imported/mmvec/macros.def +++ b/target/hexagon/imported/mmvec/macros.def @@ -15,46 +15,76 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -DEF_MACRO(fDUMPQ, +DEF_MACRO(fDUMPQ,(STR,REG), + "dump REG", + "dump REG", do { printf(STR ":" #REG ": 0x%016llx\n",REG.ud[0]); } while (0), () ) -DEF_MACRO(fUSE_LOOKUP_ADDRESS_BY_REV, - PROC->arch_proc_options->mmvec_use_full_va_for_lookup, +DEF_MACRO(fUSE_LOOKUP_ADDRESS_BY_REV,(PROC), + "", + "Use full VA address for lookup and exception based on REV ", + PROC->arch_proc_options->HVX_USE_FULL_VA, () ) -DEF_MACRO(fUSE_LOOKUP_ADDRESS, +DEF_MACRO(fUSE_LOOKUP_ADDRESS,(), + "", + "Use full VA address for lookup and exception", 1, () ) -DEF_MACRO(fNOTQ, +DEF_MACRO(fRT8NOTE, (), + "", + "", + , + (A_NOTE_RT8) +) + +DEF_MACRO(fCVI_VX_NO_TMP_LD, (), + "", + "", + , + (A_CVI_VX_NO_TMP_LD) +) +DEF_MACRO(fNOTQ,(VAL), + "~VAL", + "~VAL", + /* Will break Visual Studio? */ ({mmqreg_t _ret = {0}; int _i_; for (_i_ = 0; _i_ < fVECSIZE()/64; _i_++) _ret.ud[_i_] = ~VAL.ud[_i_]; _ret;}), () ) -DEF_MACRO(fGETQBITS, +DEF_MACRO(fGETQBITS,(REG,WIDTH,MASK,BITNO), + "REG[BITNO+WIDTH-1:BITNO]", + "Get MASK bits at BITNO from REG", ((MASK) & (REG.w[(BITNO)>>5] >> ((BITNO) & 0x1f))), () ) -DEF_MACRO(fGETQBIT, +DEF_MACRO(fGETQBIT,(REG,BITNO), + "REG[BITNO]", + "Get bit BITNO from REG", fGETQBITS(REG,1,1,BITNO), () ) -DEF_MACRO(fGENMASKW, +DEF_MACRO(fGENMASKW,(QREG,IDX), + "maskw(QREG,IDX)", + "Generate mask from QREG for word IDX", (((fGETQBIT(QREG,(IDX*4+0)) ? 0xFF : 0x0) << 0) |((fGETQBIT(QREG,(IDX*4+1)) ? 0xFF : 0x0) << 8) |((fGETQBIT(QREG,(IDX*4+2)) ? 0xFF : 0x0) << 16) |((fGETQBIT(QREG,(IDX*4+3)) ? 0xFF : 0x0) << 24)), () ) -DEF_MACRO(fGET10BIT, +DEF_MACRO(fGET10BIT,(COE,VAL,POS), + "COE=(((((fGETUBYTE(3,VAL) >> (2 * POS)) & 3) << 8) | fGETUBYTE(POS,VAL)) << 6) >> 6;", + "Get 10-bit coefficient from current word value and byte position", { COE = (((((fGETUBYTE(3,VAL) >> (2 * POS)) & 3) << 8) | fGETUBYTE(POS,VAL)) << 6); COE >>= 6; @@ -62,62 +92,160 @@ DEF_MACRO(fGET10BIT, () ) -DEF_MACRO(fVMAX, +DEF_MACRO(fVMAX,(X,Y), + "max(X,Y)", + "", (X>Y) ? X : Y, () ) -DEF_MACRO(fGETNIBBLE, +DEF_MACRO(fREAD_VEC, + (DST,IDX), + "DST=VREG[IDX]", /* short desc */ + "Read Vector IDX", /* long desc */ + (DST = READ_VREG(fMODCIRCU((IDX),5))), + () +) +DEF_MACRO(fREAD_ZVEC, + (DST,IDX), + "DST=ZREG[IDX]", /* short desc */ + "Read Vector IDX", /* long desc */ + (DST = READ_ZREG(fMODCIRCU((IDX),5))), + () +) + +DEF_MACRO(fREAD_ZVEC_WORD, + (DST,IDX), + "DST=ZReg.uw[IDX]", /* short desc */ + "Read Z Vector IDX", /* long desc */ + { + mmvector_t ZReg = READ_ZREG(0); + DST = ZReg.uw[IDX]; + + }, + () +) +DEF_MACRO(fREAD_ZVEC_ALL, + (DST,N,NZ), + "", /* short desc */ + "Read Z Vector IDX", /* long desc */ + { + int __idx = 0; + for (__idx = 0; __idx < NZ/N; __idx++) { + memcpy(&DST[N*__idx], &THREAD2STRUCT->ZRegs[__idx], N); + } + }, + () +) +DEF_MACRO(fZREGB, + (Z,IDX), + "ZREG.b[IDX]", /* short desc */ + "Read Z IDX", /* long desc */ + ((size1s_t)Z[IDX]), + () +) +DEF_MACRO(fZREGUB, + (Z,IDX), + "ZREG.ub[IDX]", /* short desc */ + "Read Z IDX", /* long desc */ + ((size1u_t)Z[IDX]), + () +) +DEF_MACRO(fZREGH, + (Z,IDX), + "ZREG.h[IDX]", /* short desc */ + "Read Z IDX", /* long desc */ + ((size2s_t)Z[IDX]), + () +) +DEF_MACRO(fZREGUB, + (Z,IDX), + "ZREG.ub[IDX]", /* short desc */ + "Read Z IDX", /* long desc */ + ((size1u_t)Z[IDX]), + () +) + +DEF_MACRO(fGETNIBBLE,(IDX,SRC), + "SRC.s4[IDX]", + "Get nibble", ( fSXTN(4,8,(SRC >> (4*IDX)) & 0xF) ), () ) -DEF_MACRO(fGETCRUMB, +DEF_MACRO(fGETCRUMB,(IDX,SRC), + "SRC.s2[IDX]", + "Get 2bits", ( fSXTN(2,8,(SRC >> (2*IDX)) & 0x3) ), () ) -DEF_MACRO(fGETCRUMB_SYMMETRIC, +DEF_MACRO(fGETCRUMB_SYMMETRIC,(IDX,SRC), + "SRC.s2[IDX] >= 0 ? (2-SRC.s2[IDX]) : SRC.s2[IDX]", + "Get 2bits", ( (fGETCRUMB(IDX,SRC)>=0 ? (2-fGETCRUMB(IDX,SRC)) : fGETCRUMB(IDX,SRC) ) ), () ) +//#define ZERO_OFFSET_2B +(fGETCRUMB(z,VuV.uw[i])>=0) #define ZERO_OFFSET_2B + -DEF_MACRO(fGENMASKH, +DEF_MACRO(fWRITE_VEC, + (IDX,VAR), + "VREG[IDX]=VAR", /* short desc */ + "Write Vector IDX", /* long desc */ + (WRITE_VREG(fMODCIRCU((IDX),5),VAR)), + () +) + +DEF_MACRO(fGENMASKH,(QREG,IDX), + "maskh(QREG,IDX)", + "generate mask from QREG for halfword IDX", (((fGETQBIT(QREG,(IDX*2+0)) ? 0xFF : 0x0) << 0) |((fGETQBIT(QREG,(IDX*2+1)) ? 0xFF : 0x0) << 8)), () ) -DEF_MACRO(fGETMASKW, +DEF_MACRO(fGETMASKW,(VREG,QREG,IDX), + "VREG.w[IDX] & fGENMASKW(QREG,IDX)", + "Mask word IDX from VREG using QREG", (VREG.w[IDX] & fGENMASKW((QREG),IDX)), () ) -DEF_MACRO(fGETMASKH, +DEF_MACRO(fGETMASKH,(VREG,QREG,IDX), + "VREG.h[IDX] & fGENMASKH(QREG,IDX)", + "Mask word IDX from VREG using QREG", (VREG.h[IDX] & fGENMASKH((QREG),IDX)), () ) -DEF_MACRO(fCONDMASK8, +DEF_MACRO(fCONDMASK8,(QREG,IDX,YESVAL,NOVAL), + "QREG.IDX ? YESVAL : NOVAL", + "QREG.IDX ? YESVAL : NOVAL", (fGETQBIT(QREG,IDX) ? (YESVAL) : (NOVAL)), () ) -DEF_MACRO(fCONDMASK16, +DEF_MACRO(fCONDMASK16,(QREG,IDX,YESVAL,NOVAL), + "select_bytes(QREG,IDX,YESVAL,NOVAL)", + "select_bytes(QREG,IDX,YESVAL,NOVAL)", ((fGENMASKH(QREG,IDX) & (YESVAL)) | (fGENMASKH(fNOTQ(QREG),IDX) & (NOVAL))), () ) -DEF_MACRO(fCONDMASK32, +DEF_MACRO(fCONDMASK32,(QREG,IDX,YESVAL,NOVAL), + "select_bytes(QREG,IDX,YESVAL,NOVAL)", + "select_bytes(QREG,IDX,YESVAL,NOVAL)", ((fGENMASKW(QREG,IDX) & (YESVAL)) | (fGENMASKW(fNOTQ(QREG),IDX) & (NOVAL))), () ) -DEF_MACRO(fSETQBITS, +DEF_MACRO(fSETQBITS,(REG,WIDTH,MASK,BITNO,VAL), + "REG[BITNO+WIDTH-1:BITNO] = VAL", + "Put bits into REG", do { size4u_t __TMP = (VAL); REG.w[(BITNO)>>5] &= ~((MASK) << ((BITNO) & 0x1f)); @@ -126,58 +254,101 @@ DEF_MACRO(fSETQBITS, () ) -DEF_MACRO(fSETQBIT, +DEF_MACRO(fSETQBIT,(REG,BITNO,VAL), + "REG[BITNO]=VAL", + "Put bit into REG", fSETQBITS(REG,1,1,BITNO,VAL), () ) -DEF_MACRO(fVBYTES, +DEF_MACRO(fVBYTES,(), + "VWIDTH", + "Number of bytes in a vector", (fVECSIZE()), () ) -DEF_MACRO(fVHALVES, +DEF_MACRO(fVHALVES,(), + "VWIDTH/2", + "Number of halves in a vector", (fVECSIZE()/2), () ) -DEF_MACRO(fVWORDS, +DEF_MACRO(fVWORDS,(), + "VWIDTH/2", + "Number of words in a vector", (fVECSIZE()/4), () ) -DEF_MACRO(fVDWORDS, +DEF_MACRO(fVDWORDS,(), + "VWIDTH/8", + "Number of double words in a vector", (fVECSIZE()/8), () ) -DEF_MACRO(fVALIGN, +DEF_MACRO(fVALIGN, (ADDR, LOG2_ALIGNMENT), + "ADDR = ADDR & ~(LOG2_ALIGNMENT-1)", + "Align to Element Size", ( ADDR = ADDR & ~(LOG2_ALIGNMENT-1)), () ) -DEF_MACRO(fVLASTBYTE, +DEF_MACRO(fVLASTBYTE, (ADDR, LOG2_ALIGNMENT), + "ADDR = ADDR | (LOG2_ALIGNMENT-1)", + "Set LSB of length to last byte", ( ADDR = ADDR | (LOG2_ALIGNMENT-1)), () ) -DEF_MACRO(fVELEM, +DEF_MACRO(fVELEM, (WIDTH), + "VBITS/WIDTH", + "Number of WIDTH-bit elements in a vector", ((fVECSIZE()*8)/WIDTH), () ) -DEF_MACRO(fVECLOGSIZE, +DEF_MACRO(fVECLOGSIZE,(), + "log2(VECTOR_SIZE)", + "Log base 2 of the number of bytes in a vector", (mmvec_current_veclogsize(thread)), () ) -DEF_MACRO(fVECSIZE, +DEF_MACRO(fVBUF_IDX,(EA), + "(EA >> log2(VECTOR_SIZE)) & 0xFF", + "(EA >> log2(VECTOR_SIZE)) & 0xFF", + (((EA) >> fVECLOGSIZE()) & 0xFF), + (A_FAKEINSN) +) + +DEF_MACRO(fREAD_VBUF,(IDX,WIDX), + "vbuf[IDX].w[WIDX]", + "vbuf[IDX].w[WIDX]", + READ_VBUF(IDX,WIDX), + (A_FAKEINSN) +) + +DEF_MACRO(fLOG_VBUF,(IDX,VAL,WIDX), + "vbuf[IDX].w[WIDX] = VAL", + "vbuf[IDX].w[WIDX] = VAL", + LOG_VBUF(IDX,VAL,WIDX), + (A_FAKEINSN) +) + +DEF_MACRO(fVECSIZE,(), + "VBYTES", + "Number of bytes in a vector currently", (1<<fVECLOGSIZE()), () ) -DEF_MACRO(fSWAPB, +DEF_MACRO(fSWAPB,(A, B), + "SWAP(A,B)", + "Swap bytes", { size1u_t tmp = A; A = B; @@ -187,41 +358,54 @@ DEF_MACRO(fSWAPB, ) DEF_MACRO( - fVZERO, + fVZERO,(), + "0", + "0", mmvec_zero_vector(), () ) DEF_MACRO( - fNEWVREG, + fNEWVREG,(VNUM), + "VNUM.new", + "Register value produced in this packet", ((THREAD2STRUCT->VRegs_updated & (((VRegMask)1)<<VNUM)) ? THREAD2STRUCT->future_VRegs[VNUM] : mmvec_zero_vector()), (A_DOTNEWVALUE,A_RESTRICT_SLOT0ONLY) ) DEF_MACRO( fV_AL_CHECK, + (EA,MASK), + "", + "", if ((EA) & (MASK)) { warn("aligning misaligned vector. PC=%08x EA=%08x",thread->Regs[REG_PC],(EA)); }, () ) -DEF_MACRO(fSCATTER_INIT, +DEF_MACRO(fSCATTER_INIT, ( REGION_START, LENGTH, ELEMENT_SIZE), + "", + "", { mem_vector_scatter_init(thread, insn, REGION_START, LENGTH, ELEMENT_SIZE); if (EXCEPTION_DETECTED) return; }, - (A_STORE,A_MEMLIKE,A_RESTRICT_SLOT0ONLY) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_SLOT0ONLY) ) -DEF_MACRO(fGATHER_INIT, +DEF_MACRO(fGATHER_INIT, ( REGION_START, LENGTH, ELEMENT_SIZE), + "", + "", { mem_vector_gather_init(thread, insn, REGION_START, LENGTH, ELEMENT_SIZE); if (EXCEPTION_DETECTED) return; }, - (A_LOAD,A_MEMLIKE,A_RESTRICT_SLOT1ONLY) + (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fSCATTER_FINISH, +DEF_MACRO(fSCATTER_FINISH, (OP), + "", + "", { if (EXCEPTION_DETECTED) return; mem_vector_scatter_finish(thread, insn, OP); @@ -229,7 +413,9 @@ DEF_MACRO(fSCATTER_FINISH, () ) -DEF_MACRO(fGATHER_FINISH, +DEF_MACRO(fGATHER_FINISH, (), + "", + "", { if (EXCEPTION_DETECTED) return; mem_vector_gather_finish(thread, insn); @@ -238,7 +424,9 @@ DEF_MACRO(fGATHER_FINISH, ) -DEF_MACRO(CHECK_VTCM_PAGE, +DEF_MACRO(CHECK_VTCM_PAGE, (FLAG, BASE, LENGTH, OFFSET, ALIGNMENT), + "FLAG=((BASE+OFFSET) < (BASE+LENGTH))", + "FLAG=((BASE+OFFSET) < (BASE+LENGTH))", { int slot = insn->slot; paddr_t pa = thread->mem_access[slot].paddr+OFFSET; @@ -247,7 +435,9 @@ DEF_MACRO(CHECK_VTCM_PAGE, }, () ) -DEF_MACRO(COUNT_OUT_OF_BOUNDS, +DEF_MACRO(COUNT_OUT_OF_BOUNDS, (FLAG, SIZE), + " ", + "", { if (!FLAG) { @@ -258,7 +448,9 @@ DEF_MACRO(COUNT_OUT_OF_BOUNDS, () ) -DEF_MACRO(fLOG_SCATTER_OP, +DEF_MACRO(fLOG_SCATTER_OP, (SIZE), + " ", + " ", { // Log the size and indicate that the extension ext.c file needs to increment right before memory write THREAD2STRUCT->vtcm_log.op = 1; @@ -269,7 +461,9 @@ DEF_MACRO(fLOG_SCATTER_OP, -DEF_MACRO(fVLOG_VTCM_WORD_INCREMENT, +DEF_MACRO(fVLOG_VTCM_WORD_INCREMENT, (EA,OFFSET,INC,IDX,ALIGNMENT,LEN), + "if (RtV <= EA <= RtV + LEN) *EA += INC.uw[IDX] ", + "if (RtV <= EA <= RtV + LEN) *EA += INC.uw[IDX] ", { int slot = insn->slot; int log_bank = 0; @@ -287,7 +481,9 @@ DEF_MACRO(fVLOG_VTCM_WORD_INCREMENT, () ) -DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT, +DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT, (EA,OFFSET,INC,IDX,ALIGNMENT,LEN), + "if (RtV <= EA <= RtV + LEN) *EA += INC.uh[IDX] ", + "if (RtV <= EA <= RtV + LEN) *EA += INC.uh[IDX] ", { int slot = insn->slot; int log_bank = 0; @@ -304,7 +500,9 @@ DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT, () ) -DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT_DV, +DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT_DV, (EA,OFFSET,INC,IDX,IDX2,IDX_H,ALIGNMENT,LEN), + "if (RtV <= EA <= RtV + LEN) *EA += INC.w[IDX2].uh[IDX_H] ", + "if (RtV <= EA <= RtV + LEN) *EA += INC.w[IDX2].uh[IDX_H] ", { int slot = insn->slot; int log_bank = 0; @@ -323,7 +521,9 @@ DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT_DV, -DEF_MACRO(GATHER_FUNCTION, +DEF_MACRO(GATHER_FUNCTION, (EA,OFFSET,IDX, LEN, ELEMENT_SIZE, BANK_IDX, QVAL), +"", +"", { int slot = insn->slot; int i0; @@ -336,6 +536,9 @@ DEF_MACRO(GATHER_FUNCTION, log_byte = ((OFFSET>=0)&&((pa+i0)<=pa_high)) && QVAL; log_bank |= (log_byte<<i0); size1u_t B = sim_mem_read1(thread->system_ptr, thread->threadId, thread->mem_access[slot].paddr+OFFSET+i0); +#ifdef VERIFICATION + warn("Gather[%d] sim_mem_read1 pa:%llx val: %x", ELEMENT_SIZE*IDX+i0, thread->mem_access[slot].paddr+OFFSET+i0, B); +#endif THREAD2STRUCT->tmp_VRegs[0].ub[ELEMENT_SIZE*IDX+i0] = B; LOG_VTCM_BYTE(pa+i0,log_byte,B,ELEMENT_SIZE*IDX+i0); } @@ -346,38 +549,50 @@ DEF_MACRO(GATHER_FUNCTION, -DEF_MACRO(fVLOG_VTCM_GATHER_WORD, +DEF_MACRO(fVLOG_VTCM_GATHER_WORD, (EA,OFFSET,IDX, LEN), + "if (RtV <= EA <= RtV + LEN) TEMP.uw[IDX] = *EA ", + "if (RtV <= EA <= RtV + LEN) TEMP.uw[IDX] = *EA ", { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 4, IDX, 1); }, () ) -DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORD, +DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORD, (EA,OFFSET,IDX, LEN), + " if (RtV <= EA <= RtV + LEN) TEMP.uh[IDX] = *EA ", + " if (RtV <= EA <= RtV + LEN) TEMP.uh[IDX] = *EA ", { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, IDX, 1); }, () ) -DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORD_DV, +DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORD_DV, (EA,OFFSET,IDX,IDX2,IDX_H, LEN), + "if (RtV <= EA <= RtV + LEN) TEMP.uw[IDX2].uh[IDX_H] = *EA ", + "if (RtV <= EA <= RtV + LEN) TEMP.uw[IDX2].uh[IDX_H] = *EA ", { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), 1); }, () ) -DEF_MACRO(fVLOG_VTCM_GATHER_WORDQ, +DEF_MACRO(fVLOG_VTCM_GATHER_WORDQ, (EA,OFFSET,IDX, Q, LEN), + " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uw[IDX] = *EA ", + " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uw[IDX] = *EA ", { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 4, IDX, fGETQBIT(QsV,4*IDX+i0)); }, () ) -DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ, +DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ, (EA,OFFSET,IDX, Q, LEN), + " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uh[IDX] = *EA ", + " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uh[IDX] = *EA ", { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, IDX, fGETQBIT(QsV,2*IDX+i0)); }, () ) -DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ_DV, +DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ_DV, (EA,OFFSET,IDX,IDX2,IDX_H, Q, LEN), + " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uw[IDX2].uh[IDX_H] = *EA ", + " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uw[IDX2].uh[IDX_H] = *EA ", { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), fGETQBIT(QsV,2*IDX+i0)); }, @@ -385,7 +600,9 @@ DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ_DV, ) -DEF_MACRO(DEBUG_LOG_ADDR, +DEF_MACRO(DEBUG_LOG_ADDR, (OFFSET), + " ", + " ", { if (thread->processor_ptr->arch_proc_options->mmvec_network_addr_log2) @@ -393,6 +610,7 @@ DEF_MACRO(DEBUG_LOG_ADDR, int slot = insn->slot; paddr_t pa = thread->mem_access[slot].paddr+OFFSET; + // pa = pa & ~(ALIGNMENT-1); } }, () @@ -404,7 +622,9 @@ DEF_MACRO(DEBUG_LOG_ADDR, -DEF_MACRO(SCATTER_OP_WRITE_TO_MEM, +DEF_MACRO(SCATTER_OP_WRITE_TO_MEM, (TYPE), + " Read, accumulate, and write to VTCM", + " ", { for (int i = 0; i < mmvecx->vtcm_log.size; i+=sizeof(TYPE)) { @@ -430,7 +650,9 @@ DEF_MACRO(SCATTER_OP_WRITE_TO_MEM, () ) -DEF_MACRO(SCATTER_FUNCTION, +DEF_MACRO(SCATTER_FUNCTION, (EA,OFFSET,IDX, LEN, ELEMENT_SIZE, BANK_IDX, QVAL, IN), +"", +"", { int slot = insn->slot; int i0; @@ -449,26 +671,34 @@ DEF_MACRO(SCATTER_FUNCTION, () ) -DEF_MACRO(fVLOG_VTCM_HALFWORD, +DEF_MACRO(fVLOG_VTCM_HALFWORD, (EA,OFFSET,IN,IDX, LEN), + "if (RtV <= EA <= RtV + LEN) *EA = IN.uh[IDX] ", + "if (RtV <= EA <= RtV + LEN) *EA = IN.uh[IDX] ", { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, IDX, 1, IN); }, () ) -DEF_MACRO(fVLOG_VTCM_WORD, +DEF_MACRO(fVLOG_VTCM_WORD, (EA,OFFSET,IN,IDX,LEN), + "if (RtV <= EA <= RtV + LEN) *EA = IN.uw[IDX] ", + "if (RtV <= EA <= RtV + LEN) *EA = IN.uw[IDX] ", { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 4, IDX, 1, IN); }, () ) -DEF_MACRO(fVLOG_VTCM_HALFWORDQ, +DEF_MACRO(fVLOG_VTCM_HALFWORDQ, (EA,OFFSET,IN,IDX,Q,LEN), + " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.uh[IDX] ", + " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.uh[IDX] ", { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, IDX, fGETQBIT(QsV,2*IDX+i0), IN); }, () ) -DEF_MACRO(fVLOG_VTCM_WORDQ, +DEF_MACRO(fVLOG_VTCM_WORDQ, (EA,OFFSET,IN,IDX,Q,LEN), + " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.uw[IDX] ", + " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.uw[IDX] ", { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 4, IDX, fGETQBIT(QsV,4*IDX+i0), IN); }, @@ -479,14 +709,18 @@ DEF_MACRO(fVLOG_VTCM_WORDQ, -DEF_MACRO(fVLOG_VTCM_HALFWORD_DV, +DEF_MACRO(fVLOG_VTCM_HALFWORD_DV, (EA,OFFSET,IN,IDX,IDX2,IDX_H, LEN), + "if (RtV <= EA <= RtV + LEN) *EA = IN.w[IDX2].uh[IDX_H] ", + "if (RtV <= EA <= RtV + LEN) *EA = IN.w[IDX2].uh[IDX_H] ", { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), 1, IN); }, () ) -DEF_MACRO(fVLOG_VTCM_HALFWORDQ_DV, +DEF_MACRO(fVLOG_VTCM_HALFWORDQ_DV, (EA,OFFSET,IN,IDX,Q,IDX2,IDX_H, LEN), + " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.w[IDX2].uh[IDX_H] ", + " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.w[IDX2].uh[IDX_H] ", { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), fGETQBIT(QsV,2*IDX+i0), IN); }, @@ -498,39 +732,161 @@ DEF_MACRO(fVLOG_VTCM_HALFWORDQ_DV, -DEF_MACRO(fSTORERELEASE, +DEF_MACRO(fSTORERELEASE, (EA,TYPE), + "char* addr = EA&~(ALIGNMENT-1); Zero Byte Store Release (Non-blocking Sync)", + "Zero Byte Store Release (Sync)", { fV_AL_CHECK(EA,fVECSIZE()-1); mem_store_release(thread, insn, fVECSIZE(), EA&~(fVECSIZE()-1), EA, TYPE, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_STORE,A_MEMLIKE) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fVFETCH_AL, +DEF_MACRO(fVFETCH_AL, (EA), + "Prefetch vector into L2 cache at EA", + "Prefetch vector into L2 cache at EA", { fV_AL_CHECK(EA,fVECSIZE()-1); mem_fetch_vector(thread, insn, EA&~(fVECSIZE()-1), insn->slot, fVECSIZE()); }, - (A_LOAD,A_MEMLIKE) + (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE) ) -DEF_MACRO(fLOADMMV_AL, +DEF_MACRO(fLOADMMV_AL, (EA, ALIGNMENT, LEN, DST), + "char* addr = EA&~(ALIGNMENT-1); for (i=0; i<LEN; ++i) DST[i] = addr[i]", + "Load LEN bytes from memory at EA (forced alignment) to DST.", { fV_AL_CHECK(EA,ALIGNMENT-1); thread->last_pkt->double_access_vec = 0; mem_load_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, &DST.ub[0], LEN, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_LOAD,A_MEMLIKE) + (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE) ) -DEF_MACRO(fLOADMMV, +DEF_MACRO(fLOADMMV, (EA, DST), + "DST = *(EA&~(ALIGNMENT-1))", + "Load vector from memory at EA (forced alignment) to DST.", fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST), () ) -DEF_MACRO(fLOADMMVQ, +DEF_MACRO(fLOADMMZ, (EA,DST), + "DST[EA[7]] = *(EA)", + "Load splatter register from memory at EA (forced alignment) to DST.", + { + mmvector_t load_vec; + fV_AL_CHECK(EA,fVECSIZE()-1); + mem_load_vector_oddva(thread, insn, EA&~(fVECSIZE()-1), EA, insn->slot, fVECSIZE(), &load_vec.ub[0], fVECSIZE(), fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + int idx = (EA & 0x80)>0; + DST.v[idx] = load_vec; + + }, + () +) +DEF_MACRO(fLOADZ_LOAD, (EA,EAU,WIDTH,DST), + "", + "", + { + thread->last_pkt->ext_slot_cancelled = 0; + thread->last_pkt->double_access_vec = 0; + int etm_size = ((EA % width) ==0) ? fVECSIZE() : 0; + if (thread->processor_ptr->options->testgen_mode) + etm_size = ((EA % width) ==0) ? WIDTH : 0; + + mem_load_vector_oddva(thread, insn, EA, EAU, insn->slot, WIDTH, &DST.ub[0], etm_size, fUSE_LOOKUP_ADDRESS()); + }, + (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE) +) + +DEF_MACRO(fELSE_CANCELZ, (), + "", + "", + else { + if (thread->last_pkt) { + thread->mem_access[insn->slot].dropped_z = 1; + thread->last_pkt->ext_slot_cancelled |= (1<<insn->slot); + } + }, + (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE) +) + + + + +DEF_MACRO(fPOST_INC4, (R), + "R+=4", + "", + R+=4; + , + (A_CVI_Z_INC_4) +) +DEF_MACRO(fPOST_INC8, (R), + "R+=8", + "", + R+=8; + , + (A_CVI_Z_INC_8) +) +DEF_MACRO(fPOST_INC16, (R), + "R+=16", + "", + R+=16; + , + (A_CVI_Z_INC_16) +) + +DEF_MACRO(fEXTRACTZ, + (DST,IDX), + "DST=ZREG[IDX]", /* short desc */ + "Read Vector IDX", /* long desc */ + (DST = READ_ZREG(fMODCIRCU((IDX),5))), + () +) + +DEF_MACRO(fLOADZ_UPDATE, (EA,WIDTH,ZN,N,SRC), + "for(i = 0; i < width; i++) ZREG.b[(EA+i)%ZN] = *(EA+i)", + "Load splatter register from memory at EA (forced alignment) to DST.", + { + mmvector_t Z[2]; + Z[0] = READ_ZREG(0); + Z[1] = READ_ZREG(1); + for(int k = 0; k < WIDTH; k++) { + int element_idx = (EA+k)%N; + int z_idx = ((EA+k)%ZN)/N; + Z[z_idx].ub[element_idx] = SRC.ub[k]; + } + + WRITE_EXT_ZREG(0,Z[0],0); + WRITE_EXT_ZREG(1,Z[1],0); + }, + (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE) +) +DEF_MACRO(fSTOREZ, (EA,WIDTH,ZN,N), + "for(i = 0; i < width; i++) *(EA+i) = ZREG.b[(EA+i)%ZN]", + "Store splatter register from memory at EA (forced alignment) to DST.", + { + mmvector_t store_vec; + mmvector_t maskvec = {0}; + mmvector_t Z[2]; + Z[0] = READ_ZREG(0); + Z[1] = READ_ZREG(1); + + for(int k = 0; k < WIDTH; k++) { + int element_idx = (EA+k)%N; + int z_idx = ((EA+k)%ZN)/N; + store_vec.ub[k] = Z[z_idx].ub[element_idx]; + maskvec.ub[k] = 1; + } + mem_store_vector_oddva(thread, insn, EA, EA, insn->slot, WIDTH, &store_vec.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + }, + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) +) + +DEF_MACRO(fLOADMMVQ, (EA,DST,QVAL), + "DST = vmux(QVAL,*(EA&~(ALIGNMENT-1)),0)", + "Load vector from memory at EA (forced alignment) to DST.", do { int __i; fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST); @@ -539,7 +895,9 @@ DEF_MACRO(fLOADMMVQ, () ) -DEF_MACRO(fLOADMMVNQ, +DEF_MACRO(fLOADMMVNQ, (EA,DST,QVAL), + "DST = vmux(QVAL,0,*(EA&~(ALIGNMENT-1)))", + "Load vector from memory at EA (forced alignment) to DST.", do { int __i; fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST); @@ -548,7 +906,9 @@ DEF_MACRO(fLOADMMVNQ, () ) -DEF_MACRO(fLOADMMVU_AL, +DEF_MACRO(fLOADMMVU_AL, (EA, ALIGNMENT, LEN, DST), + "char* addr = EA; for (i=0; i<LEN; ++i) DST[i] = addr[i]", + "Load LEN bytes from memory at EA (unaligned) to DST.", { size4u_t size2 = (EA)&(ALIGNMENT-1); size4u_t size1 = LEN-size2; @@ -556,10 +916,12 @@ DEF_MACRO(fLOADMMVU_AL, mem_load_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(), /* slot */ 1, size2, &DST.ub[size1], size2, fUSE_LOOKUP_ADDRESS()); mem_load_vector_oddva(thread, insn, EA, EA,/* slot */ 0, size1, &DST.ub[0], size1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_LOAD,A_MEMLIKE) + (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE) ) -DEF_MACRO(fLOADMMVU, +DEF_MACRO(fLOADMMVU, (EA, DST), + "DST = *EA", + "Load vector from memory at EA (unaligned) to DST.", { /* if address happens to be aligned, only do aligned load */ thread->last_pkt->pkt_has_vtcm_access = 0; @@ -579,63 +941,79 @@ DEF_MACRO(fLOADMMVU, () ) -DEF_MACRO(fSTOREMMV_AL, +DEF_MACRO(fSTOREMMV_AL, (EA, ALIGNMENT, LEN, SRC), + "char* addr = EA&~(ALIGNMENT-1); for (i=0; i<LEN; ++i) addr[i] = SRC[i]", + "Store LEN bytes from SRC into memory at EA (forced alignment).", { fV_AL_CHECK(EA,ALIGNMENT-1); - mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, &SRC.ub[0], 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, SRC.ub, 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_STORE,A_MEMLIKE) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fSTOREMMV, +DEF_MACRO(fSTOREMMV, (EA, SRC), + "*(EA&~(ALIGNMENT-1)) = SRC", + "Store vector SRC to memory at EA (unaligned).", fSTOREMMV_AL(EA,fVECSIZE(),fVECSIZE(),SRC), () ) -DEF_MACRO(fSTOREMMVQ_AL, +DEF_MACRO(fSTOREMMVQ_AL, (EA, ALIGNMENT, LEN, SRC, MASK), + "char* addr = EA&~(ALIGNMENT-1); for (i=0; i<LEN; ++i) if (MASK[i]) addr[i] = SRC[i]", + "Store LEN bytes from SRC into memory at EA (forced alignment).", do { mmvector_t maskvec; int i; for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i); - mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, &SRC.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, SRC.ub, &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } while (0), - (A_STORE,A_MEMLIKE) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fSTOREMMVQ, +DEF_MACRO(fSTOREMMVQ, (EA, SRC, MASK), + "*(EA&~(ALIGNMENT-1)) = SRC", + "Masked store vector SRC to memory at EA (forced alignment).", fSTOREMMVQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK), () ) -DEF_MACRO(fSTOREMMVNQ_AL, +DEF_MACRO(fSTOREMMVNQ_AL, (EA, ALIGNMENT, LEN, SRC, MASK), + "char* addr = EA&~(ALIGNMENT-1); for (i=0; i<LEN; ++i) if (!MASK[i]) addr[i] = SRC[i]", + "Store LEN bytes from SRC into memory at EA (forced alignment).", { mmvector_t maskvec; int i; for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i); fV_AL_CHECK(EA,ALIGNMENT-1); - mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, &SRC.ub[0], &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, SRC.ub, &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_STORE,A_MEMLIKE) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fSTOREMMVNQ, +DEF_MACRO(fSTOREMMVNQ, (EA, SRC, MASK), + "*(EA&~(ALIGNMENT-1)) = SRC", + "Masked negated store vector SRC to memory at EA (forced alignment).", fSTOREMMVNQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK), () ) -DEF_MACRO(fSTOREMMVU_AL, +DEF_MACRO(fSTOREMMVU_AL, (EA, ALIGNMENT, LEN, SRC), + "char* addr = EA; for (i=0; i<LEN; ++i) addr[i] = SRC[i]", + "Store LEN bytes from SRC into memory at EA (unaligned).", { size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1)); size4u_t size2; if (size1>LEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(), /* slot */ 1, size2, &SRC.ub[size1], 0, 0, fUSE_LOOKUP_ADDRESS()); - mem_store_vector_oddva(thread, insn, EA, EA, /* slot */ 0, size1, &SRC.ub[0], 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + mem_store_vector_oddva(thread, insn, EA, EA, /* slot */ 0, size1, SRC.ub, 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_STORE,A_MEMLIKE) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fSTOREMMVU, +DEF_MACRO(fSTOREMMVU, (EA, SRC), + "*EA = SRC", + "Store vector SRC to memory at EA (unaligned).", { thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0; @@ -651,7 +1029,9 @@ DEF_MACRO(fSTOREMMVU, () ) -DEF_MACRO(fSTOREMMVQU_AL, +DEF_MACRO(fSTOREMMVQU_AL, (EA, ALIGNMENT, LEN, SRC, MASK), + "char* addr = EA; for (i=0; i<LEN; ++i) if (MASK[i]) addr[i] = SRC[i]", + "Store LEN bytes from SRC into memory at EA (unaligned).", { size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1)); size4u_t size2; @@ -661,12 +1041,14 @@ DEF_MACRO(fSTOREMMVQU_AL, if (size1>LEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(),/* slot */ 1, size2, &SRC.ub[size1], &maskvec.ub[size1], 0, fUSE_LOOKUP_ADDRESS()); - mem_store_vector_oddva(thread, insn, EA, /* slot */ 0, size1, &SRC.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + mem_store_vector_oddva(thread, insn, EA, /* slot */ 0, size1, SRC.ub, &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_STORE,A_MEMLIKE) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fSTOREMMVQU, +DEF_MACRO(fSTOREMMVQU, (EA, SRC, MASK), + "*EA = SRC", + "Store vector SRC to memory at EA (unaligned).", { thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0; @@ -682,7 +1064,9 @@ DEF_MACRO(fSTOREMMVQU, () ) -DEF_MACRO(fSTOREMMVNQU_AL, +DEF_MACRO(fSTOREMMVNQU_AL, (EA, ALIGNMENT, LEN, SRC, MASK), + "char* addr = EA; for (i=0; i<LEN; ++i) if (!MASK[i]) addr[i] = SRC[i]", + "Store LEN bytes from SRC into memory at EA (unaligned).", { size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1)); size4u_t size2; @@ -692,12 +1076,14 @@ DEF_MACRO(fSTOREMMVNQU_AL, if (size1>LEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(), /* slot */ 1, size2, &SRC.ub[size1], &maskvec.ub[size1], 1, fUSE_LOOKUP_ADDRESS()); - mem_store_vector_oddva(thread, insn, EA, EA, /* slot */ 0, size1, &SRC.ub[0], &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + mem_store_vector_oddva(thread, insn, EA, EA, /* slot */ 0, size1, SRC.ub, &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_STORE,A_MEMLIKE) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fSTOREMMVNQU, +DEF_MACRO(fSTOREMMVNQU, (EA, SRC, MASK), + "*EA = SRC", + "Store vector SRC to memory at EA (unaligned).", { thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0; @@ -716,127 +1102,446 @@ DEF_MACRO(fSTOREMMVNQU, -DEF_MACRO(fVFOREACH, +DEF_MACRO(fVFOREACH,(WIDTH, VAR), + "for (VAR = 0; VAR < VELEM(WIDTH); VAR++)", + "For VAR in each WIDTH-bit vector index", for (VAR = 0; VAR < fVELEM(WIDTH); VAR++), /* NOTHING */ ) -DEF_MACRO(fVARRAY_ELEMENT_ACCESS, +DEF_MACRO(fVARRAY_ELEMENT_ACCESS, (ARRAY, TYPE, INDEX), + "ARRAY.TYPE[INDEX]", + "Access element of type TYPE at position INDEX of flattened ARRAY", ARRAY.v[(INDEX) / (fVECSIZE()/(sizeof(ARRAY.TYPE[0])))].TYPE[(INDEX) % (fVECSIZE()/(sizeof(ARRAY.TYPE[0])))], () ) -DEF_MACRO(fVNEWCANCEL, +DEF_MACRO(fVNEWCANCEL,(REGNUM), + "Ignore current value for register REGNUM", + "Ignore current value for register REGNUM", do { THREAD2STRUCT->VRegs_select &= ~(1<<(REGNUM)); } while (0), () ) -DEF_MACRO(fTMPVDATA, +DEF_MACRO(fTMPVDATA,(), + "Data from .tmp load", + "Data from .tmp load and clear tmp status", mmvec_vtmp_data(thread), - (A_CVI) + (A_CVI,A_CVI_REQUIRES_TMPLOAD) ) -DEF_MACRO(fVSATDW, +DEF_MACRO(fVSATDW, (U,V), + "usat_32(U:V)", + "Use 32-bits of U as MSW and 32-bits of V as LSW and saturate the resultant 64-bits to 32 bits", fVSATW( ( ( ((long long)U)<<32 ) | fZXTN(32,64,V) ) ), /* attribs */ ) -DEF_MACRO(fVASL_SATHI, +DEF_MACRO(fVASL_SATHI, (U,V), + "uasl_sathi(U:V)", + "Use 32-bits of U as MSW and 32-bits of V as LSW, left shift by 1 and saturate the result and take high word", fVSATW(((U)<<1) | ((V)>>31)), /* attribs */ ) -DEF_MACRO(fVUADDSAT, +DEF_MACRO(fVUADDSAT,(WIDTH,U,V), + "usat_##WIDTH(U+V)", + "Add WIDTH-bit values U and V with saturation", fVSATUN( WIDTH, fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V)), /* attribs */ ) -DEF_MACRO(fVSADDSAT, - fVSATN( WIDTH, fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V)), +DEF_MACRO(fVSADDSAT,(WIDTH,U,V), + "sat_##WIDTH(U+V)", + "Add WIDTH-bit values U and V with saturation", + ({size8s_t tmp5 = fSXTN(WIDTH, 2*WIDTH, U); + size8s_t tmp6 = fSXTN(WIDTH, 2*WIDTH, V); + size8s_t tmp7 = tmp5 + tmp6; + fVSATN( WIDTH, tmp7); + }), /* attribs */ ) -DEF_MACRO(fVUSUBSAT, +DEF_MACRO(fVUSUBSAT,(WIDTH,U,V), + "usat_##WIDTH(U-V)", + "sub WIDTH-bit values U and V with saturation", fVSATUN( WIDTH, fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V)), /* attribs */ ) -DEF_MACRO(fVSSUBSAT, +DEF_MACRO(fVSSUBSAT,(WIDTH,U,V), + "sat_##WIDTH(U-V)", + "sub WIDTH-bit values U and V with saturation", fVSATN( WIDTH, fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)), /* attribs */ ) -DEF_MACRO(fVAVGU, +DEF_MACRO(fVAVGU,(WIDTH,U,V), + "(U+V)/2", + "average WIDTH-bit values U and V with saturation", ((fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V))>>1), /* attribs */ ) -DEF_MACRO(fVAVGURND, +DEF_MACRO(fVAVGURND,(WIDTH,U,V), + "(U+V+1)/2", + "average WIDTH-bit values U and V with saturation", ((fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V)+1)>>1), /* attribs */ ) -DEF_MACRO(fVNAVGU, +DEF_MACRO(fVNAVGU,(WIDTH,U,V), + "(U-V)/2", + "average WIDTH-bit values U and V with saturation", ((fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V))>>1), /* attribs */ ) -DEF_MACRO(fVNAVGURNDSAT, +DEF_MACRO(fVNAVGURNDSAT,(WIDTH,U,V), + "(U-V+1)/2", + "average WIDTH-bit values U and V with saturation", fVSATUN(WIDTH,((fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V)+1)>>1)), /* attribs */ ) -DEF_MACRO(fVAVGS, +DEF_MACRO(fVAVGS,(WIDTH,U,V), + "(U+V)/2", + "average WIDTH-bit values U and V with saturation", ((fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V))>>1), /* attribs */ ) -DEF_MACRO(fVAVGSRND, +DEF_MACRO(fVAVGSRND,(WIDTH,U,V), + "(U+V+1)/2", + "average WIDTH-bit values U and V with saturation", ((fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V)+1)>>1), /* attribs */ ) -DEF_MACRO(fVNAVGS, +DEF_MACRO(fVNAVGS,(WIDTH,U,V), + "(U-V)/2", + "average WIDTH-bit values U and V with saturation", ((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V))>>1), /* attribs */ ) -DEF_MACRO(fVNAVGSRND, +DEF_MACRO(fVNAVGSRND,(WIDTH,U,V), + "(U-V+1)/2", + "average WIDTH-bit values U and negative V followed by rounding", ((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)+1)>>1), /* attribs */ ) -DEF_MACRO(fVNAVGSRNDSAT, +DEF_MACRO(fVNAVGSRNDSAT,(WIDTH,U,V), + "(U-V+1)/2", + "average WIDTH-bit values U and V with saturation", fVSATN(WIDTH,((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)+1)>>1)), /* attribs */ ) -DEF_MACRO(fVNOROUND, +DEF_MACRO(fVNOROUND,(VAL,SHAMT), + "VAL", + "VAL", VAL, /* NOTHING */ ) -DEF_MACRO(fVNOSAT, +DEF_MACRO(fVNOSAT,(VAL), + "VAL", + "VAL", VAL, /* NOTHING */ ) -DEF_MACRO(fVROUND, +DEF_MACRO(fVROUND,(VAL,SHAMT), + "VAL + (1<<(SHAMT-1))", + "VAL + RNDBIT", ((VAL) + (((SHAMT)>0)?(1LL<<((SHAMT)-1)):0)), /* NOTHING */ ) -DEF_MACRO(fCARRY_FROM_ADD32, +DEF_MACRO(fCARRY_FROM_ADD32,(A,B,C), + "carry_from(A,B,C)", + "carry_from(A,B,C)", (((fZXTN(32,64,A)+fZXTN(32,64,B)+C) >> 32) & 1), /* NOTHING */ ) -DEF_MACRO(fUARCH_NOTE_PUMP_4X, +DEF_MACRO(fUARCH_NOTE_PUMP_4X,(), + "", + "", , - () + (A_CVI_PUMP_4X) ) -DEF_MACRO(fUARCH_NOTE_PUMP_2X, +DEF_MACRO(fUARCH_NOTE_PUMP_2X,(), + "", + "", , + (A_CVI_PUMP_2X) +) + +DEF_MACRO(fVDOCHKPAGECROSS,(BASE,SUM), + "", + "", + if (UNLIKELY(thread->timing_on)) { + thread->mem_access[slot].check_page_crosses = 1; + thread->mem_access[slot].page_cross_base = BASE; + thread->mem_access[slot].page_cross_sum = SUM; + }, + (A_EA_PAGECROSS) +) + +/* FP instructions */ +/*Qfloat Macros for muls*/ +DEF_MACRO(fPARSEQF32,(A), + "A", + "Parsing QF32 to extract exp/sig", + parse_qf32(A), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATQF32,(A,B,C), + "rnd_sat(A,B,C)", + "Rnd/Sat/Norm of Vector Multiply of two QF32 inputs", + rnd_sat_qf32(A,B,C), + (A_HVX_FLT) +) + +DEF_MACRO(fPARSEQF16,(A), + "A", + "Parsing QF16 to extract exp/sig", + parse_qf16(A), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATQF16,(A,B,C), + "rnd_sat(A,B,C)", + "Rnd/Sat/Norm of Vector Multiply of two QF16 inputs", + rnd_sat_qf16(A,B,C), () ) +/*Qfloat Macros for others*/ +DEF_MACRO(fPARSESF,(A), + "A", + "Parsing IEEE SF to extract sign/exp/sig", + parse_sf(A), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATSF,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector Multiply of two IEEE SF inputs", + rnd_sat_sf(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fPARSEHF,(A), + "A", + "Parsing IEEE HF to extract sign/exp/sig", + parse_hf(A), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATHF,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector Multiply of two IEEE HF inputs", + rnd_sat_hf(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATW,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector convert of W inputs", + rnd_sat_w(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATUW,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector convert of UW inputs", + rnd_sat_uw(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATH,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector convert of H inputs", + rnd_sat_h(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATUH,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector convert of UW inputs", + rnd_sat_uh(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATB,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector convert of B inputs", + rnd_sat_b(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATUB,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector convert of UB inputs", + rnd_sat_ub(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fNEGQF32,(A), + "-(A)", + "Take Ones complement", + negate32(A), + (A_HVX_FLT) +) + +DEF_MACRO(fNEGQF16,(A), + "-(A)", + "Take Ones complement", + negate16(A), + (A_HVX_FLT) +) + +DEF_MACRO(fNEGSF,(A), + "-(A)", + "Change sign", + negate_sf(A), + (A_HVX_FLT) +) +DEF_MACRO(fNEGHF,(A), + "-(A)", + "Change sign", + negate_hf(A), + (A_HVX_FLT) +) + +//FP vector compare +DEF_MACRO(fCMPGT_QF32,(A,B), + "(A > B)", + "Vector compare of QF32 format", + cmpgt_qf32(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fCMPGT_QF16,(A,B), + "(A > B)", + "Vector compare of QF16 format", + cmpgt_qf16(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fCMPGT_SF,(A,B), + "(A > B)", + "Vector compare of SF format", + cmpgt_sf(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fCMPGT_HF,(A,B), + "(A > B)", + "Vector compare of HF format", + cmpgt_hf(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fCMPGT_BF,(A,B), + "(A > B)", + "Vector compare of BF format", + cmpgt_sf(((int)A) << 16,((int)B) << 16), + (A_HVX_FLT) +) + +DEF_MACRO(fCMPGT_QF32_SF,(A,B), + "(A > B)", + "Vector compare of QF32/SF format", + cmpgt_qf32_sf(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fCMPGT_QF16_HF,(A,B), + "(A > B)", + "Vector compare of QF16/HF format", + cmpgt_qf16_hf(A,B), + (A_HVX_FLT) +) + +//VMAX/VMIN_QF32/QF16 +DEF_MACRO(fMAX_QF32,(X,Y), + "max(X,Y)", + "", + max_qf32(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMIN_QF32,(X,Y), + "min(X,Y)", + "", + min_qf32(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMAX_QF32_SF,(X,Y), + "max(X,Y)", + "", + max_qf32_sf(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMIN_QF32_SF,(X,Y), + "min(X,Y)", + "", + min_qf32_sf(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMAX_QF16,(X,Y), + "max(X,Y)", + "", + max_qf16(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMIN_QF16,(X,Y), + "min(X,Y)", + "", + min_qf16(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMAX_QF16_HF,(X,Y), + "max(X,Y)", + "", + max_qf16_hf(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMIN_QF16_HF,(X,Y), + "min(X,Y)", + "", + min_qf16_hf(X,Y), + (A_HVX_FLT) +) + +//MAX/MIN_SF/HF +DEF_MACRO(fMAX_SF,(X,Y), + "max(X,Y)", + "", + max_sf(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMIN_SF,(X,Y), + "min(X,Y)", + "", + min_sf(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMAX_HF,(X,Y), + "max(X,Y)", + "", + max_hf(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMIN_HF,(X,Y), + "min(X,Y)", + "", + min_hf(X,Y), + (A_HVX_FLT) +) + diff --git a/target/hexagon/mmvec/macros.h b/target/hexagon/mmvec/macros.h index bcd4a1e8973c3..645ec9280972a 100644 --- a/target/hexagon/mmvec/macros.h +++ b/target/hexagon/mmvec/macros.h @@ -354,3 +354,16 @@ } while (0); #endif + +#define fPARSEHF(A) parse_hf(A) +#define fPARSESF(A) parse_sf(A) +#define fPARSEQF16(A) parse_qf16(A) +#define fPARSEQF32(A) parse_qf32(A) + +#define fRNDSATHF(A,B) rnd_sat_hf(A,B) +#define fRNDSATSF(A,B) rnd_sat_sf(A,B) +#define fRNDSATQF16(A,B,C) rnd_sat_qf16(A,B,C) +#define fRNDSATQF32(A,B,C) rnd_sat_qf32(A,B,C) + +#define fNEGQF16(A) negate16(A) +#define fNEGQF32(A) negate32(A) From 57680086033320c3923fccff43bea08b9e2059e2 Mon Sep 17 00:00:00 2001 From: Marco Liebel <mliebel@quicinc.com> Date: Wed, 5 Mar 2025 12:54:22 -0800 Subject: [PATCH 120/126] target/hexagon: Update encoding of vunpackob Signed-off-by: Marco Liebel <mliebel@quicinc.com> --- target/hexagon/imported/mmvec/encode_ext.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/hexagon/imported/mmvec/encode_ext.def b/target/hexagon/imported/mmvec/encode_ext.def index 402438f566c17..6982998421639 100644 --- a/target/hexagon/imported/mmvec/encode_ext.def +++ b/target/hexagon/imported/mmvec/encode_ext.def @@ -647,7 +647,7 @@ DEF_ENC(V6_vsubububb_sat, ICLASS_CJ" 1 110 101 vvvvv PP 0 uuuuu 101 ddddd") DEF_ENC(V6_vmpyewuh_64, ICLASS_CJ" 1 110 101 vvvvv PP 0 uuuuu 110 ddddd") DEF_FIELDROW_DESC32( ICLASS_CJ" 1 110 --0 ----- PP 1 ----- ----- ---","Vx32=Vu32") -DEF_ENC(V6_vunpackob, ICLASS_CJ" 1 110 --0 ---00 PP 1 uuuuu 000 xxxxx") // +DEF_ENC(V6_vunpackob, ICLASS_CJ" 1 110 --0 --000 PP 1 uuuuu 000 xxxxx") // DEF_ENC(V6_vunpackoh, ICLASS_CJ" 1 110 --0 ---00 PP 1 uuuuu 001 xxxxx") // //DEF_ENC(V6_vunpackow, ICLASS_CJ" 1 110 --0 ---00 PP 1 uuuuu 010 xxxxx") // From b1ae06a6faf2f963e67b45fe4256afe216c5bec0 Mon Sep 17 00:00:00 2001 From: Marco Liebel <mliebel@quicinc.com> Date: Wed, 5 Mar 2025 11:55:40 -0800 Subject: [PATCH 121/126] target/hexagon: Add encodings Signed-off-by: Marco Liebel <mliebel@quicinc.com> --- target/hexagon/imported/mmvec/encode_ext.def | 26 ++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/target/hexagon/imported/mmvec/encode_ext.def b/target/hexagon/imported/mmvec/encode_ext.def index 6982998421639..9df9204764419 100644 --- a/target/hexagon/imported/mmvec/encode_ext.def +++ b/target/hexagon/imported/mmvec/encode_ext.def @@ -804,5 +804,31 @@ DEF_ENC(V6_vmpyewuh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 101 ddddd") DEF_ENC(V6_vmpyowh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 111 ddddd") DEF_ENC(V6_vmpyuhvs,"00011111110vvvvvPP1uuuuu111ddddd") +DEF_ENC(V6_vadd_hf,"00011111011vvvvvPP1uuuuu011ddddd") +DEF_ENC(V6_vadd_sf,"00011111101vvvvvPP1uuuuu001ddddd") +DEF_ENC(V6_vadd_qf16,"00011111011vvvvvPP1uuuuu010ddddd") +DEF_ENC(V6_vadd_qf16_mix,"00011111011vvvvvPP1uuuuu100ddddd") +DEF_ENC(V6_vadd_qf32,"00011111101vvvvvPP1uuuuu000ddddd") +DEF_ENC(V6_vadd_qf32_mix,"00011111101vvvvvPP1uuuuu010ddddd") + +DEF_ENC(V6_vconv_hf_qf16,"00011110--0--100PP1uuuuu011ddddd") +DEF_ENC(V6_vconv_hf_qf32,"00011110--0--100PP1uuuuu110ddddd") +DEF_ENC(V6_vconv_sf_qf32,"00011110--0--100PP1uuuuu000ddddd") + +DEF_ENC(V6_vmpy_qf16,"00011111111vvvvvPP1uuuuu011ddddd") +DEF_ENC(V6_vmpy_qf16_hf,"00011111111vvvvvPP1uuuuu100ddddd") +DEF_ENC(V6_vmpy_qf16_mix_hf,"00011111111vvvvvPP1uuuuu101ddddd") +DEF_ENC(V6_vmpy_qf32,"00011111111vvvvvPP1uuuuu000ddddd") +DEF_ENC(V6_vmpy_qf32_hf,"00011111111vvvvvPP1uuuuu111ddddd") +DEF_ENC(V6_vmpy_qf32_mix_hf,"00011111100vvvvvPP1uuuuu000ddddd") +DEF_ENC(V6_vmpy_qf32_qf16,"00011111111vvvvvPP1uuuuu110ddddd") +DEF_ENC(V6_vmpy_qf32_sf,"00011111111vvvvvPP1uuuuu001ddddd") + +DEF_ENC(V6_vsub_hf,"00011111011vvvvvPP1uuuuu110ddddd") +DEF_ENC(V6_vsub_sf,"00011111101vvvvvPP1uuuuu100ddddd") +DEF_ENC(V6_vsub_qf32,"00011111101vvvvvPP1uuuuu011ddddd") +DEF_ENC(V6_vsub_qf32_mix,"00011111101vvvvvPP1uuuuu101ddddd") +DEF_ENC(V6_vsub_qf16,"00011111011vvvvvPP1uuuuu101ddddd") +DEF_ENC(V6_vsub_qf16_mix,"00011111011vvvvvPP1uuuuu111ddddd") #endif /* NO MMVEC */ From 2a8242a4bc3962ef9258827d3fb5d1b6ac99be8e Mon Sep 17 00:00:00 2001 From: Marco Liebel <mliebel@quicinc.com> Date: Wed, 5 Mar 2025 11:03:59 -0800 Subject: [PATCH 122/126] target/hexagon: Add simple qfloat test Signed-off-by: Marco Liebel <mliebel@quicinc.com> --- tests/tcg/hexagon/hvx_misc.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c index 90c3733da0711..319d7c0dd052e 100644 --- a/tests/tcg/hexagon/hvx_misc.c +++ b/tests/tcg/hexagon/hvx_misc.c @@ -495,6 +495,28 @@ void test_store_new() check_output_w(__LINE__, 1); } +void test_qfloat() +{ + asm volatile( + "r0 = #0xf\n" + "v0 = vsplat(r0)\n" + "v1 = vsplat(r0)\n" + "{\n" + " v2.qf16 = vadd(v0.qf16, v1.qf16)\n" + "}\n" + "vmem(%0) = v2\n" + : + : "r"(&output[0]) + : "r0", "v0", "v1", "v2", "memory" + ); + + for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) { + expect[0].w[i] = 0x10010; + } + + check_output_w(__LINE__, 1); +} + int main() { init_buffers(); @@ -538,6 +560,8 @@ int main() test_store_new(); + test_qfloat(); + puts(err ? "FAIL" : "PASS"); return err ? 1 : 0; } From 3f0034c19d3ca2211bd3e06dad71cc00b982b3de Mon Sep 17 00:00:00 2001 From: Sid Manning <sidneym@quicinc.com> Date: Fri, 7 Mar 2025 12:22:42 -0800 Subject: [PATCH 123/126] Merge qtmr_rg0/rg1 into just qtmr_region A single mapping is made by qct-qtimer.c and the extraneous region caused confusion. Signed-off-by: Sid Manning <sidneym@quicinc.com> --- hw/hexagon/hexagon_dsp.c | 1 + hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc | 3 +-- hw/hexagon/machine_cfg_v66g_1024.h.inc | 3 +-- hw/hexagon/machine_cfg_v68n_1024.h.inc | 3 +-- hw/hexagon/virt.c | 4 ++-- include/hw/hexagon/hexagon.h | 3 +-- 6 files changed, 7 insertions(+), 10 deletions(-) diff --git a/hw/hexagon/hexagon_dsp.c b/hw/hexagon/hexagon_dsp.c index 348977a542fa4..f4440de80ce03 100644 --- a/hw/hexagon/hexagon_dsp.c +++ b/hw/hexagon/hexagon_dsp.c @@ -122,6 +122,7 @@ static void hexagon_common_init(MachineState *machine, Rev_t rev, qdev_prop_set_bit(DEVICE(cpu), "start-powered-off", (i != 0)); qdev_prop_set_uint32(DEVICE(cpu), "l2vic-base-addr", m_cfg->l2vic_base); qdev_prop_set_uint32(DEVICE(cpu), "config-table-addr", m_cfg->cfgbase); + qdev_prop_set_uint32(DEVICE(cpu), "qtimer-base-addr", m_cfg->qtmr_region); qdev_prop_set_uint32(DEVICE(cpu), "hvx-contexts", m_cfg->cfgtable.ext_contexts); qdev_prop_set_uint32(DEVICE(cpu), "jtlb-entries", diff --git a/hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc b/hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc index d8fa961f6d091..70b1eabfe9617 100644 --- a/hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc +++ b/hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc @@ -5,8 +5,7 @@ static hexagon_machine_config SA8775P_cdsp0 = { .l2vic_base = 0x26300000 + 0x90000, .l2vic_size = 0x00001000, .csr_base = 0x26300000, - .qtmr_rg0 = 0x26300000 + 0xA1000, - .qtmr_rg1 = 0x26300000 + 0xA2000, + .qtmr_region = 0x26300000 + 0xA1000, .cfgtable = { .l2tcm_base = 0x00002400, .reserved0 = 0x00000000, diff --git a/hw/hexagon/machine_cfg_v66g_1024.h.inc b/hw/hexagon/machine_cfg_v66g_1024.h.inc index 604cc7777cbf0..8f2a593bb8602 100644 --- a/hw/hexagon/machine_cfg_v66g_1024.h.inc +++ b/hw/hexagon/machine_cfg_v66g_1024.h.inc @@ -5,8 +5,7 @@ static hexagon_machine_config v66g_1024 = { .l2vic_base = 0xfc910000, .l2vic_size = 0x00001000, .csr_base = 0xfc900000, - .qtmr_rg0 = 0xfc921000, - .qtmr_rg1 = 0xfc922000, + .qtmr_region = 0xfc921000, .cfgtable = { .l2tcm_base = 0x0000d800, .reserved0 = 0x0000d400, diff --git a/hw/hexagon/machine_cfg_v68n_1024.h.inc b/hw/hexagon/machine_cfg_v68n_1024.h.inc index 60eb112a1199c..257c133df8f34 100644 --- a/hw/hexagon/machine_cfg_v68n_1024.h.inc +++ b/hw/hexagon/machine_cfg_v68n_1024.h.inc @@ -5,8 +5,7 @@ static hexagon_machine_config v68n_1024 = { .l2vic_base = 0xfc910000, .l2vic_size = 0x00001000, .csr_base = 0xfc900000, - .qtmr_rg0 = 0xfc921000, - .qtmr_rg1 = 0xfc922000, + .qtmr_region = 0xfc921000, .cfgtable = { .l2tcm_base = 0x0000d800, .reserved0 = 0x00000000, diff --git a/hw/hexagon/virt.c b/hw/hexagon/virt.c index b991bc94a838f..1e7ac4e5b70b6 100644 --- a/hw/hexagon/virt.c +++ b/hw/hexagon/virt.c @@ -257,8 +257,7 @@ static void create_qtimer(HexagonVirtMachineState *vms, sysbus_realize_and_unref(SYS_BUS_DEVICE(qtimer), errp); - sysbus_mmio_map(SYS_BUS_DEVICE(qtimer), 0, m_cfg->qtmr_rg1); - sysbus_mmio_map(SYS_BUS_DEVICE(qtimer), 1, m_cfg->qtmr_rg0); + sysbus_mmio_map(SYS_BUS_DEVICE(qtimer), 1, m_cfg->qtmr_region); sysbus_connect_irq(SYS_BUS_DEVICE(qtimer), 0, qdev_get_gpio_in(vms->l2vic, irqmap[VIRT_QTMR0])); sysbus_connect_irq(SYS_BUS_DEVICE(qtimer), 1, @@ -354,6 +353,7 @@ static void virt_init(MachineState *ms) m_cfg->cfgtable.ext_contexts); qdev_prop_set_uint32(DEVICE(cpu), "config-table-addr", m_cfg->cfgbase); qdev_prop_set_uint32(DEVICE(cpu), "l2vic-base-addr", m_cfg->l2vic_base); + qdev_prop_set_uint32(DEVICE(cpu), "qtimer-base-addr", m_cfg->qtmr_region); qdev_prop_set_uint32(DEVICE(cpu), "jtlb-entries", m_cfg->cfgtable.jtlb_size_entries); diff --git a/include/hw/hexagon/hexagon.h b/include/hw/hexagon/hexagon.h index 0afaac3b1f85a..ce356325fcd77 100644 --- a/include/hw/hexagon/hexagon.h +++ b/include/hw/hexagon/hexagon.h @@ -143,8 +143,7 @@ typedef struct { uint32_t l2vic_size; /* QTimer csr base */ uint32_t csr_base; - uint32_t qtmr_rg0; - uint32_t qtmr_rg1; + uint32_t qtmr_region; hexagon_config_table cfgtable; } hexagon_machine_config; From e6d5f634e95341d2e71c0dce470a0e0c321d6ec6 Mon Sep 17 00:00:00 2001 From: Brian Cain <brian.cain@oss.qualcomm.com> Date: Sat, 8 Mar 2025 20:42:51 -0800 Subject: [PATCH 124/126] FIXME: Add unimplemented DMA instructions * TODO: forward the instruction tag to the unimp log? * TODO: why do we need_env() for these? * TODO: filter out some attributes? These instructions are unimplemented for now, they are used by h2. --- target/hexagon/hex_common.py | 1 + target/hexagon/imported/encode_pp.def | 12 +++++++- target/hexagon/imported/system.idef | 41 ++++++++++++++++++++++++++- target/hexagon/macros.h | 2 ++ 4 files changed, 54 insertions(+), 2 deletions(-) diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index 6e4dd8d1c8aa2..4ce275363acfa 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -253,6 +253,7 @@ def is_hvx_insn(tag): def need_env(tag): return ("A_STORE" in attribdict[tag] or "A_LOAD" in attribdict[tag] or + "A_DMA" in attribdict[tag] or "A_CVI_GATHER" in attribdict[tag] or "A_CVI_SCATTER" in attribdict[tag] or "A_IMPLICIT_WRITES_USR" in attribdict[tag] or diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index 41e4ab9e3a268..2c45388ab6290 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -788,6 +788,17 @@ DEF_ENC32(Y6_diag, ICLASS_CR" 0010 010sssss PP------ 001-----") DEF_ENC32(Y6_diag0, ICLASS_CR" 0010 010sssss PP-ttttt 010-----") DEF_ENC32(Y6_diag1, ICLASS_CR" 0010 010sssss PP-ttttt 011-----") +DEF_ENC32(Y6_dmcfgrd,"10101000000sssssPP------101ddddd") +DEF_ENC32(Y6_dmcfgwr,"10101000000sssssPP-ttttt110-----") +DEF_ENC32(Y6_dmlink,"10100110000sssssPP-ttttt010-----") +DEF_ENC32(Y6_dmpause,"10101000000-----PP------011ddddd") +DEF_ENC32(Y6_dmpoll,"10101000000-----PP------010ddddd") +DEF_ENC32(Y6_dmresume,"10100110000sssssPP------100-----") +DEF_ENC32(Y6_dmstart,"10100110000sssssPP------001-----") +DEF_ENC32(Y6_dmsyncht,"10101000000-----PP-----0111ddddd") +DEF_ENC32(Y6_dmtlbsynch,"10101000000-----PP-----1111ddddd") +DEF_ENC32(Y6_dmwait,"10101000000-----PP------001ddddd") + DEF_FIELDROW_DESC32( ICLASS_CR" 0011 -------- PP------ --------","[#3] Cdd=Rss ") DEF_ENC32(A4_tfrpcp, ICLASS_CR" 0011 001sssss PP------ ---ddddd") DEF_ENC32(G4_tfrgpcp, ICLASS_CR" 0011 000sssss PP------ ---ddddd") @@ -2230,4 +2241,3 @@ DEF_ENC32(M4_mpyri_addr_u2, ICLASS_ALU64" 1111 0ii sssss PPiddddd iiiuuuuu") DEF_ENC32(M4_mpyri_addr, ICLASS_ALU64" 1111 1ii sssss PPiddddd iiiuuuuu") - diff --git a/target/hexagon/imported/system.idef b/target/hexagon/imported/system.idef index fd7ef18b3e340..aa57149a1ceb2 100644 --- a/target/hexagon/imported/system.idef +++ b/target/hexagon/imported/system.idef @@ -256,6 +256,46 @@ Q6INSN(Y4_l2fetch,"l2fetch(Rs32,Rt32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_RESTRICT_P 0); /*extra attrib flags*/ }) +Q6INSN(Y6_dmstart,"dmstart(Rs32)",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_SYNC_MARKER,A_NO_TIMING_LOG),"DMA Start", { + fUNIMP(); +}) + +Q6INSN(Y6_dmlink,"dmlink(Rs32,Rt32)",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_SYNC_MARKER,A_NO_TIMING_LOG),"DMA Link", { + fUNIMP(); +}) + +Q6INSN(Y6_dmpoll,"Rd32=dmpoll",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA Poll", { + fUNIMP(); +}) + +Q6INSN(Y6_dmwait,"Rd32=dmwait",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA Wait", { + fUNIMP(); +}) + +Q6INSN(Y6_dmsyncht,"Rd32=dmsyncht",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA SynchT",{ + fUNIMP(); +}) +Q6INSN(Y6_dmtlbsynch,"Rd32=dmtlbsynch",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA TLB Synch",{ + fUNIMP(); +}) + +Q6INSN(Y6_dmcfgrd,"Rd32=dmcfgrd(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG), + "DMA Config Read", { + fUNIMP(); +}) + +Q6INSN(Y6_dmcfgwr,"dmcfgwr(Rs32,Rt32)",ATTRIBS(A_NOTE_PRIV,A_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG), + "DMA Config Write", { + fUNIMP(); +}) + +Q6INSN(Y6_dmpause,"Rd32=dmpause",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA Pause",{ + fUNIMP(); +}) + +Q6INSN(Y6_dmresume,"dmresume(Rs32)",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_SYNC_MARKER,A_NO_TIMING_LOG),"DMA Resume",{ + fUNIMP(); +}) Q6INSN(Y5_l2fetch,"l2fetch(Rs32,Rtt32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK),"L2 Cache Prefetch", @@ -283,4 +323,3 @@ Q6INSN(Y5_l2gcleaninv,"l2gcleaninv",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A Q6INSN(Y6_l2gcleanpa,"l2gclean(Rtt32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Clean by PA Range",{fL2CLEANPA(RttV);}) Q6INSN(Y6_l2gcleaninvpa,"l2gcleaninv(Rtt32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Clean and Invalidate by PA Range",{fL2CLEANINVPA(RttV);}) - diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 06c1dd2f407e6..01469a28a0cce 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -687,3 +687,5 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #endif #define fPREDUSE_TIMING() + +#define fUNIMP() qemu_log_mask(LOG_UNIMP, "Unimplemented instruction\n") From 78f3f550cb8b0b4909e075cbdbf07a0f475f7566 Mon Sep 17 00:00:00 2001 From: Sid Manning <sidneym@quicinc.com> Date: Mon, 10 Mar 2025 10:50:31 -0700 Subject: [PATCH 125/126] fixup! hw/intc: Add l2vic interrupt controller Signed-off-by: Sid Manning <sidneym@quicinc.com> --- hw/intc/l2vic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/intc/l2vic.c b/hw/intc/l2vic.c index 9df657521407d..1c450179dd6d6 100644 --- a/hw/intc/l2vic.c +++ b/hw/intc/l2vic.c @@ -18,7 +18,7 @@ #define L2VICA(s, n) (s[(n) >> 2]) #define TYPE_L2VIC "l2vic" -#define L2VIC(obj) OBJECT_CHECK(L2VICState, (obj), TYPE_L2VIC) +OBJECT_DECLARE_SIMPLE_TYPE(L2VICState, L2VIC) #define SLICE_MAX (L2VIC_INTERRUPT_MAX / 32) From d324787edebf401808d6f6e8a4e103d64e2fe106 Mon Sep 17 00:00:00 2001 From: Sid Manning <sidneym@quicinc.com> Date: Thu, 13 Mar 2025 07:38:06 -0700 Subject: [PATCH 126/126] fixup! target/hexagon: Add guest, system reg number defs Signed-off-by: Sid Manning <sidneym@quicinc.com> --- target/hexagon/cpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index 8b334068e295b..70ed3d5ba7b9e 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -21,9 +21,9 @@ #include "fpu/softfloat-types.h" #define NUM_GREGS 32 -#define GREG_WRITES_MAX 32 +#define GREG_WRITES_MAX 2 #define NUM_SREGS 64 -#define SREG_WRITES_MAX 64 +#define SREG_WRITES_MAX 2 #include "cpu-qom.h" #include "exec/cpu-defs.h"