diff --git a/MAINTAINERS b/MAINTAINERS index 5df6020ed545..5d7a5753b593 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -232,14 +232,25 @@ Hexagon TCG CPUs M: Brian Cain S: Supported F: target/hexagon/ +F: hw/intc/l2vic.[ch] +F: hw/hexagon/ +F: hw/timer/qct-qtimer.c +F: include/hw/hexagon/ +F: include/hw/timer/qct-qtimer.h X: target/hexagon/idef-parser/ X: target/hexagon/gen_idef_parser_funcs.py F: linux-user/hexagon/ F: tests/tcg/hexagon/ F: disas/hexagon.c F: configs/targets/hexagon-linux-user/default.mak +F: configs/devices/hexagon-softmmu/default.mak F: docker/dockerfiles/debian-hexagon-cross.docker F: gdb-xml/hexagon*.xml +F: docs/system/target-hexagon.rst +F: docs/devel/hexagon-sys.rst +F: docs/devel/hexagon-l2vic.rst +F: tests/functional/test_hexagon_minivm.py +F: docs/devel/hexagon-vm.rst T: git https://github.com/quic/qemu.git hex-next Hexagon idef-parser diff --git a/configs/devices/hexagon-softmmu/default.mak b/configs/devices/hexagon-softmmu/default.mak new file mode 100644 index 000000000000..37b4f9f3237a --- /dev/null +++ b/configs/devices/hexagon-softmmu/default.mak @@ -0,0 +1,8 @@ +# Default configuration for hexagon-softmmu + +# Uncomment the following lines to disable these optional devices: + +# Boards are selected by default, uncomment to keep out of the build. +# CONFIG_HEX_VIRT=y +# CONFIG_HEX_DSP=y +# CONFIG_L2VIC=y diff --git a/configs/targets/hexagon-softmmu.mak b/configs/targets/hexagon-softmmu.mak new file mode 100644 index 000000000000..03cf1306a348 --- /dev/null +++ b/configs/targets/hexagon-softmmu.mak @@ -0,0 +1,10 @@ +# Default configuration for hexagon-softmmu + +TARGET_ARCH=hexagon +TARGET_SUPPORTS_MTTCG=y +TARGET_XML_FILES=gdb-xml/hexagon-core.xml gdb-xml/hexagon-hvx.xml gdb-xml/hexagon-sys.xml +TARGET_LONG_BITS=32 +TARGET_NEED_FDT=y +CONFIG_SEMIHOSTING=y +CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y +CONFIG_SEMIHOSTING_USE_STDIO=y diff --git a/docs/devel/hexagon-l2vic.rst b/docs/devel/hexagon-l2vic.rst new file mode 100644 index 000000000000..088563627445 --- /dev/null +++ b/docs/devel/hexagon-l2vic.rst @@ -0,0 +1,59 @@ +Hexagon L2 Vectored Interrupt Controller +======================================== + + +.. code-block:: none + + +-------+ + | | +----------------+ + | l2vic | | hexagon core | + | | | | + | +-----| | | + ------> |VID0 >------------->irq2 -\ | + ------> | | | | | + ... > | | | | | + ------> | | | | + | +-----| | / | | \ | + | ... | | | | | | | + | +-----| | t0 t1 t2 t3 ...| + ------> |VIDN | | | + ------> | | | | + ------> | | | | + ------> | | | | + | +-----| | | + | | |Global SREG File| + | State | | | + | [ ]|<============|=>[VID ] | + | [ ]|<============|=>[VID1] | + | [ ]| | | + | [ ]| | | + | | | | + +-------+ +----------------+ + +L2VIC/Core Integration +---------------------- + +* hexagon core supports 8 external interrupt sources +* l2vic supports 1024 input interrupts mapped among 4 output interrupts +* l2vic has four output signals: { VID0, VID1, VID2, VID3 } +* l2vic device has a bank of registers per-VID that can be used to query + the status or assert new interrupts. +* Interrupts are 'steered' to threads based on { thread priority, 'EX' state, + thread interrupt mask, thread interrupt enable, global interrupt enable, + etc. }. +* Any hardware thread could conceivably handle any input interrupt, dependent + on state. +* The system register transfer instruction can read the VID0-VID3 values from + the l2vic when reading from hexagon core system registers "VID" and "VID1". +* When l2vic VID0 has multiple active interrupts, it pulses the VID0 output + IRQ and stores the IRQ number for the VID0 register field. Only after this + interrupt is cleared can the l2vic pulse the VID0 output IRQ again and provide + the next interrupt number on the VID0 register. +* The ``ciad`` instruction clears the l2vic input interrupt and un-disables the + core interrupt. If some/an l2vic VID0 interrupt is pending when this occurs, + the next interrupt should fire and any subseqeunt reads of the VID register + should reflect the newly raised interrupt. +* In QEMU, on an external interrupt or an unmasked-pending interrupt, + all vCPUs are triggered (has_work==true) and each will grab the IO lock + while considering the steering logic to determine whether they're the thread + that must handle the interrupt. diff --git a/docs/devel/hexagon-sys.rst b/docs/devel/hexagon-sys.rst new file mode 100644 index 000000000000..3972261a2bbe --- /dev/null +++ b/docs/devel/hexagon-sys.rst @@ -0,0 +1,106 @@ +.. _Hexagon-System-arch: + +Hexagon System Architecture +=========================== + +The hexagon architecture has some unique elements which are described here. + +Interrupts +---------- +When interrupts arrive at a Hexagon DSP core, they are priority-steered to +be handled by an eligible hardware thread with the lowest priority. + +Memory +------ +Each hardware thread has an ``SSR.ASID`` field that contains its Address +Space Identifier. This value is catenated with a 32-bit virtual address - +the MMU can then resolve this extended virtual address to a physical address. + +TLBs +---- +The format of a TLB entry is shown below. + +.. note:: + The Small Core DSPs have a different TLB format which is not yet + supported. + +.. admonition:: Diagram + + .. code:: text + + 6 5 4 3 + 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |v|g|x|A|A| | | + |a|l|P|1|0| ASID | Virtual Page | + |l|b| | | | | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + 3 2 1 0 + 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | | | | | | | + |x|w|r|u|Cacheab| Physical Page |S| + | | | | | | | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + +* ASID: the address-space identifier +* A1, A0: the behavior of these cache line attributes are not modeled by QEMU. +* xP: the extra-physical bit is the most significant physical address bit. +* S: the S bit and the LSBs of the physical page indicate the page size +* val: this is the 'valid' bit, when set it indicates that page matching + should consider this entry. + +.. list-table:: Page sizes + :widths: 25 25 50 + :header-rows: 1 + + * - S-bit + - Phys page LSBs + - Page size + * - 1 + - N/A + - 4kb + * - 0 + - 0b1 + - 16kb + * - 0 + - 0b10 + - 64kb + * - 0 + - 0b100 + - 256kb + * - 0 + - 0b1000 + - 1MB + * - 0 + - 0b10000 + - 4MB + * - 0 + - 0b100000 + - 16MB + +* glb: if the global bit is set, the ASID is not considered when matching + TLBs. +* Cacheab: the cacheability attributes of TLBs are not modeled, these bits + are ignored. +* RWX: read-, write-, execute-, enable bits. Indicates if user programs + are permitted to read/write/execute the given page. +* U: indicates if user programs can access this page. + +Scheduler +--------- +The Hexagon system architecture has a feature to assist the guest OS +task scheduler. The guest OS can enable this feature by setting +``SCHEDCFG.EN``. The ``BESTWAIT`` register is programmed by the guest OS +to indicate the priority of the highest priority task waiting to run on a +hardware thread. The reschedule interrupt is triggered when any hardware +thread's priority in ``STID.PRIO`` is worse than the ``BESTWAIT``. When +it is triggered, the ``BESTWAIT.PRIO`` value is reset to 0x1ff. + +HVX Coprocessor +--------------- +The Supervisor Status Register field ``SSR.XA`` binds a DSP hardware thread +to one of the eight possible HVX contexts. The guest OS is responsible for +managing this resource. diff --git a/docs/devel/hexagon-vm.rst b/docs/devel/hexagon-vm.rst new file mode 100644 index 000000000000..fb16d56d59de --- /dev/null +++ b/docs/devel/hexagon-vm.rst @@ -0,0 +1,190 @@ +Hexagon Virtual Machine +======================= + +The hexagon virtual machine is a hypervisor that can partition a single +Hexagon DSP among multiple guest operating systems, and abstracts the +specific details of a DSP architectural revision for the sake of consistency +among generations. + +Events +------ + +The guest operating system should register the Guest Event Vector Base +via the ``vmsetvec`` virtual instruction at system startup. The vector table +and handlers are determined by the guest OS. + +Guests return from event handlers with ``vmrte``. This instruction will restore +the mode (user versus guest), interrupt enable state, PC, SP. + +.. list-table:: Event types + :header-rows: 1 + + * - Number + - Name + - Description + - Maskable + - Detail + * - 0 + - Reserved + - + - + - + * - 1 + - Machine check event + - unrecoverable VM state + - No + - execution terminates if unhandled + * - 2 + - General exception + - internal hardware or software exception + - No + - + * - 3-4 + - Reserved + - + - + - + * - 5 + - ``trap0`` + - ``trap0`` instruction + - No + - + * - 6 + - Reserved + - + - + - + * - 7 + - Interrupt + - external interrupts + - Yes + - increasing interrupt numbers have descending priority + +Startup +------- +In order to transition to user-mode, the guest OS must set the ``UM`` bit in +the guest status register and specify the address to start executing in +user mode in the guest event link register. + +Virtual Instructions +-------------------- + +.. list-table:: Virtual Instructions + :header-rows: 1 + + * - Instruction + - Behavior + - Operand + - Input + - Output + * - vmversion + - returns the VM version + - 0x0 + - requested VM version + - provided VM version + * - vmrte + - return from event + - 0x1 + - Event info in g3:0 + - N/A + * - vmsetvec + - set event vector + - 0x2 + - r0 is set to vector table addr + - r0 is 0 on success, 1 otherwise + * - vmsetie + - set interrupt enabled + - 0x3 + - r0 is set to 1 to enable, 0 to disable + - previous IE bit is stored as LSB of r0 + * - vmgetie + - get interrupt enabled + - 0x4 + - N/A + - current IE bit is stored as LSB of r0 + * - vmintop + - interrupt operation + - 0x5 + - r0 = Interrupt Op, r1-r4: Depends on Op + - r0 - value depends on operation + * - vmclrmap + - clear virtual memory map + - 0xa + - r0 = Interrupt Op, r1-r4: Depends on Op + - r0 - value depends on operation + * - vmnewmap + - set new virtual memory map + - 0xb + - + r0 contains logical address of new segment table + + r1 = type of translations: 0 indicates a logical address of a zero-terminated linear list, 1 indicates a set of page tables. + - r0 contains 0 on success, otherwise negative error code + * - vmcache + - VM cache control: not modeled + - 0xd + - + r0 contains the operation to be performed + + r1 = Starting virtual address + + r2 contains the length in bytes + - r0 contains 0 on success, otherwise -1. Cache behavior is not modeled so this operation always succeeds. + * - vmgettime + - Get virtual machine time + - 0xe + - N/A + - r0 contains the least significant 32 bits of timestamp, r1 contains the most significant 32 bits of timestamp + * - vmsettime + - Set virtual machine time + - 0xf + - r0 contains the least significant 32 bits of timestamp, r1 contains the most significant 32 bits of timestamp + - N/A + * - vmwait + - wait for interrupt + - 0x10 + - N/A + - r0 contains the interrupt number of the interrupt waking the guest + * - vmyield + - voluntarily yield VM task + - 0x11 + - N/A + - N/A + * - vmstart + - Create new virtual processor instance + - 0x12 + - r0 contains the starting execution address, r1 contains the starting stack pointer + - r0 contains the Virtual processor number of new virtual processor on success, otherwise -1 + * - vmstop + - terminate current virtual processor instance + - 0x13 + - N/A + - N/A + * - vmvpid + - get the virtual processor ID + - 0x14 + - N/A + - r0 contains the virtual processor number of virtual processor executing the instruction + * - vmsetregs + - Set guest registers + - 0x15 + - r0-3 hold g0-3 values + - N/A + * - vmgetregs + - Get guest registers + - 0x16 + - N/A + - r0-3 hold g0-3 values + * - vmtimerop + - perform an operation on a system timer + - 0x18 + - + getfreq = 0 + + getres = 1 + + gettime = 2 + + gettimeout = 3 + + settimeout = 4 + + deltatimeout = 5 + - r0 contains result of the timer operation call + * - vmgetinfo + - Get system info + - 0x1a + - Index of the system info parameter: + + + build_id = 0 + + info_boot_flags = 1 + - value of the indicated system info parameter diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst index 7a0678cbdd3a..82f788682bb4 100644 --- a/docs/devel/index-internals.rst +++ b/docs/devel/index-internals.rst @@ -14,6 +14,9 @@ Details about QEMU's various subsystems including how to add features to them. block-coroutine-wrapper clocks ebpf_rss + hexagon-sys + hexagon-l2vic + hexagon-vm migration/index multi-process reset diff --git a/docs/system/hexagon/cdsp.rst b/docs/system/hexagon/cdsp.rst new file mode 100644 index 000000000000..f755fbe0a5ba --- /dev/null +++ b/docs/system/hexagon/cdsp.rst @@ -0,0 +1,10 @@ +Compute DSP +=========== + +A Hexagon CDSP is designed as a computation offload device for an SoC. The +``V66G_1024`` machine contains: + +* L2VIC interrupt controller +* QTimer timer device + +This machine will support any Hexagon CPU, but will default to ``v66``. diff --git a/docs/system/hexagon/emulation.rst b/docs/system/hexagon/emulation.rst new file mode 100644 index 000000000000..03a6092a1281 --- /dev/null +++ b/docs/system/hexagon/emulation.rst @@ -0,0 +1,16 @@ +.. _Hexagon Emulation: + +Hexagon CPU architecture support +================================ + +QEMU's TCG emulation includes support for v65, v66, v67, v68, v69, v71, v73. +It also has support for the following architecture extensions: + +- HVX (Hexagon Vector eXtensions) + +For information on the specifics of the HVX extension, please refer +to the `Qualcomm Hexagon V69 HVX Programmer's Reference Manual +`_. + +.. code-block:: bash + diff --git a/docs/system/target-hexagon.rst b/docs/system/target-hexagon.rst new file mode 100644 index 000000000000..894337a533cd --- /dev/null +++ b/docs/system/target-hexagon.rst @@ -0,0 +1,112 @@ +.. _Hexagon-System-emulator: + +Hexagon System emulator +----------------------- + +Use the ``qemu-system-hexagon`` executable to simulate a 32-bit Hexagon +machine. + +Hexagon Machines +================ + +Hexagon DSPs are suited to various functions and generally appear in a +"DSP subsystem" of a larger system-on-chip (SoC). + +Hexagon DSPs are often included in a subsystem that looks like the diagram +below. Instructions are loaded into DDR before the DSP is brought out of +reset and the first instructions are fetched from DDR via the EVB/reset vector. + +In a real system, a TBU/SMMU would normally arbitrate AXI accesses but +we don't have a need to model that for QEMU. + +Hexagon DSP cores use simultaneous multithreading (SMT) with as many as 8 +hardware threads. + +.. admonition:: Diagram + + .. code:: text + + AHB (local) bus AXI (global) bus + │ │ + │ │ + ┌─────────┐ │ ┌─────────────────┐ │ + │ L2VIC ├──┤ │ │ │ + │ ├──┼───────► ├───────┤ + └─────▲───┘ │ │ Hexagon DSP │ │ + │ │ │ │ │ ┌─────┐ + │ │ │ N threads │ │ │ DDR │ + │ ├───────┤ │ │ │ │ + ┌────┴──┐ │ │ │ ├────────┤ │ + │QTimer ├───┤ │ │ │ │ │ + │ │ │ │ │ │ │ │ + └───────┘ │ │ ┌─────────┐ │ │ │ │ + │ │ ┌─────────┐│ │ │ │ │ + ┌───────┐ │ │ │ HVX xM ││ │ │ │ │ + │QDSP6SS├───┤ │ │ │┘ │ │ │ │ + └───────┘ │ │ └─────────┘ │ │ └─────┘ + │ │ │ │ + ┌───────┐ │ └─────────────────┘ │ + │ CSR ├───┤ + └───────┘ │ ┌──────┐ ┌───────────┐ + │ │ TCM │ │ VTCM │ + │ │ │ │ + └──────┘ │ │ + │ │ + │ │ + │ │ + └───────────┘ + +Components +---------- +Other than l2vic and HVX, the components below are not implemented in QEMU. + +* L2VIC: the L2 vectored interrupt controller. Supports 1024 input + interrupts, edge- or level-triggered. The core ISA has system registers + ``VID``, ``VID1`` which read through to the L2VIC device. +* QTimer: ARMSSE-based programmable timer device. Its interrupts are + wired to the L2VIC. System registers ``TIMER``, ``UTIMER`` read + through to the QTimer device. +* QDSP6SS: DSP subsystem features, accessible to the entire SoC, including + DSP NMI, watchdog, reset, etc. +* CSR: Configuration/Status Registers. +* TCM: DSP-exclusive tightly-coupled memory. This memory can be used for + DSPs when isolated from DDR and in some bootstrapping modes. +* VTCM: DSP-exclusive vector tightly-coupled memory. This memory is accessed + by some HVX instructions. +* HVX: the vector coprocessor supports 64 and 128-byte vector registers. + 64-byte mode is not implemented in QEMU. + + +Bootstrapping +------------- +Hexagon systems do not generally have access to a block device. So, for +QEMU the typical use case involves loading a binary or ELF file into memory +and executing from the indicated start address:: + + $ qemu-system-hexagon -kernel ./prog -append 'arg1 arg2' + +Semihosting +----------- +Hexagon supports a semihosting interface similar to other architectures'. +The ``trap0`` instruction can activate these semihosting calls so that the +guest software can access the host console and filesystem. Semihosting +is not yet implemented in QEMU hexagon. + +Hexagon Virtual Machine +----------------------- + +The hexagon virtual machine is a hypervisor that can partition a single +Hexagon DSP among multiple guest operating systems, and abstracts the +specific details of a DSP architectural revision for the sake of consistency +among generations. + +[minivm](https://github.com/quic/hexagonMVM) is a reference implementation +of this VM interface. + + +Hexagon Features +================ +.. toctree:: + hexagon/emulation + hexagon/cdsp + diff --git a/docs/system/targets.rst b/docs/system/targets.rst index 224fadae71c4..e6dcdb9d4161 100644 --- a/docs/system/targets.rst +++ b/docs/system/targets.rst @@ -29,3 +29,4 @@ Contents: target-sparc64 target-i386 target-xtensa + target-hexagon diff --git a/gdb-xml/hexagon-sys.xml b/gdb-xml/hexagon-sys.xml new file mode 100644 index 000000000000..1d9c21172253 --- /dev/null +++ b/gdb-xml/hexagon-sys.xml @@ -0,0 +1,116 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index c59cd6637b97..ecdbdf623d11 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -196,7 +196,8 @@ SRST ERST #if defined(TARGET_I386) || defined(TARGET_SH4) || defined(TARGET_SPARC) || \ - defined(TARGET_PPC) || defined(TARGET_XTENSA) || defined(TARGET_M68K) + defined(TARGET_PPC) || defined(TARGET_XTENSA) || defined(TARGET_M68K) || \ + defined(TARGET_HEXAGON) { .name = "tlb", .args_type = "", diff --git a/hw/Kconfig b/hw/Kconfig index 9a86a6a28a64..4dc7914c13f5 100644 --- a/hw/Kconfig +++ b/hw/Kconfig @@ -67,6 +67,7 @@ source sparc/Kconfig source sparc64/Kconfig source tricore/Kconfig source xtensa/Kconfig +source hexagon/Kconfig # Symbols used by multiple targets config TEST_DEVICES diff --git a/hw/hexagon/Kconfig b/hw/hexagon/Kconfig new file mode 100644 index 000000000000..9a2369974e09 --- /dev/null +++ b/hw/hexagon/Kconfig @@ -0,0 +1,15 @@ +config HEX_DSP + bool + default y + depends on HEXAGON && TCG + imply PTIMER + select L2VIC # Vector PIC + select ARM_COMPATIBLE_SEMIHOSTING + +config HEX_VIRT + bool + default y + depends on HEX_DSP && FDT + select DEVICE_TREE + select VIRTIO_MMIO + select PL011 diff --git a/hw/hexagon/hexagon_dsp.c b/hw/hexagon/hexagon_dsp.c new file mode 100644 index 000000000000..f4440de80ce0 --- /dev/null +++ b/hw/hexagon/hexagon_dsp.c @@ -0,0 +1,206 @@ +/* + * Hexagon DSP Subsystem emulation. This represents a generic DSP + * subsystem with few peripherals, like the Compute DSP. + * + * Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "exec/address-spaces.h" +#include "hw/hw.h" +#include "hw/boards.h" +#include "hw/qdev-properties.h" +#include "hw/hexagon/hexagon.h" +#include "hw/loader.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "elf.h" +#include "cpu.h" +#include "include/migration/cpu.h" +#include "include/system/system.h" +#include "target/hexagon/internal.h" +#include "system/reset.h" +#include "include/semihosting/semihost.h" + +#include "machine_cfg_v66g_1024.h.inc" + +static hwaddr isdb_secure_flag; +static hwaddr isdb_trusted_flag; +static void hex_symbol_callback(const char *st_name, int st_info, + uint64_t st_value, uint64_t st_size) +{ + if (!g_strcmp0("isdb_secure_flag", st_name)) { + isdb_secure_flag = st_value; + } + if (!g_strcmp0("isdb_trusted_flag", st_name)) { + isdb_trusted_flag = st_value; + } +} + +/* Board init. */ +static struct hexagon_board_boot_info hexagon_binfo; + +static void hexagon_load_kernel(HexagonCPU *cpu) +{ + uint64_t pentry; + long kernel_size; + + kernel_size = load_elf_ram_sym(hexagon_binfo.kernel_filename, NULL, NULL, + NULL, &pentry, NULL, NULL, + &hexagon_binfo.kernel_elf_flags, 0, EM_HEXAGON, 0, 0, + &address_space_memory, false, hex_symbol_callback); + + if (kernel_size <= 0) { + error_report("no kernel file '%s'", + hexagon_binfo.kernel_filename); + exit(1); + } + + qdev_prop_set_uint32(DEVICE(cpu), "exec-start-addr", pentry); +} + +static void hexagon_init_bootstrap(MachineState *machine, HexagonCPU *cpu) +{ + if (machine->kernel_filename) { + hexagon_load_kernel(cpu); + uint32_t mem = 1; + if (isdb_secure_flag) { + cpu_physical_memory_write(isdb_secure_flag, &mem, sizeof(mem)); + } + if (isdb_trusted_flag) { + cpu_physical_memory_write(isdb_trusted_flag, &mem, sizeof(mem)); + } + } +} + +static void do_cpu_reset(void *opaque) +{ + HexagonCPU *cpu = opaque; + CPUState *cs = CPU(cpu); + cpu_reset(cs); +} + +static void hexagon_common_init(MachineState *machine, Rev_t rev, + hexagon_machine_config *m_cfg) +{ + memset(&hexagon_binfo, 0, sizeof(hexagon_binfo)); + if (machine->kernel_filename) { + hexagon_binfo.ram_size = machine->ram_size; + hexagon_binfo.kernel_filename = machine->kernel_filename; + } + + machine->enable_graphics = 0; + + MemoryRegion *address_space = get_system_memory(); + + MemoryRegion *config_table_rom = g_new(MemoryRegion, 1); + memory_region_init_rom(config_table_rom, NULL, "config_table.rom", + sizeof(m_cfg->cfgtable), &error_fatal); + memory_region_add_subregion(address_space, m_cfg->cfgbase, + config_table_rom); + + MemoryRegion *sram = g_new(MemoryRegion, 1); + memory_region_init_ram(sram, NULL, "ddr.ram", + machine->ram_size, &error_fatal); + memory_region_add_subregion(address_space, 0x0, sram); + + Error **errp = NULL; + + for (int i = 0; i < machine->smp.cpus; i++) { + HexagonCPU *cpu = HEXAGON_CPU(object_new(machine->cpu_type)); + CPUHexagonState *env = &cpu->env; + qemu_register_reset(do_cpu_reset, cpu); + + /* + * CPU #0 is the only CPU running at boot, others must be + * explicitly enabled via start instruction. + */ + qdev_prop_set_bit(DEVICE(cpu), "start-powered-off", (i != 0)); + qdev_prop_set_uint32(DEVICE(cpu), "l2vic-base-addr", m_cfg->l2vic_base); + qdev_prop_set_uint32(DEVICE(cpu), "config-table-addr", m_cfg->cfgbase); + qdev_prop_set_uint32(DEVICE(cpu), "qtimer-base-addr", m_cfg->qtmr_region); + qdev_prop_set_uint32(DEVICE(cpu), "hvx-contexts", + m_cfg->cfgtable.ext_contexts); + qdev_prop_set_uint32(DEVICE(cpu), "jtlb-entries", + m_cfg->cfgtable.jtlb_size_entries); + + + if (i == 0) { + hexagon_init_bootstrap(machine, cpu); + if (!qdev_realize_and_unref(DEVICE(cpu), NULL, errp)) { + return; + } + DeviceState *l2vic_dev; + l2vic_dev = sysbus_create_varargs("l2vic", m_cfg->l2vic_base, + /* IRQ#, Evnt#,CauseCode */ + qdev_get_gpio_in(DEVICE(cpu), 0), + qdev_get_gpio_in(DEVICE(cpu), 1), + qdev_get_gpio_in(DEVICE(cpu), 2), + qdev_get_gpio_in(DEVICE(cpu), 3), + qdev_get_gpio_in(DEVICE(cpu), 4), + qdev_get_gpio_in(DEVICE(cpu), 5), + qdev_get_gpio_in(DEVICE(cpu), 6), + qdev_get_gpio_in(DEVICE(cpu), 7), + NULL); + sysbus_mmio_map(SYS_BUS_DEVICE(l2vic_dev), 1, + m_cfg->cfgtable.fastl2vic_base << 16); + } else if (!qdev_realize_and_unref(DEVICE(cpu), NULL, errp)) { + env->dir_list = NULL; + return; + } + + } + + rom_add_blob_fixed_as("config_table.rom", &m_cfg->cfgtable, + sizeof(m_cfg->cfgtable), m_cfg->cfgbase, + &address_space_memory); +} + +static void init_mc(MachineClass *mc) +{ + mc->block_default_type = IF_SD; + mc->default_ram_size = 4 * GiB; + mc->no_parallel = 1; + mc->no_floppy = 1; + mc->no_cdrom = 1; + mc->no_serial = 1; + mc->is_default = false; + mc->max_cpus = 8; + qemu_semihosting_enable(); +} + +/* ----------------------------------------------------------------- */ +/* Core-specific configuration settings are defined below this line. */ +/* Config table values defined in machine_configs.h.inc */ +/* ----------------------------------------------------------------- */ + +static void v66g_1024_config_init(MachineState *machine) +{ + hexagon_common_init(machine, v66_rev, &v66g_1024); +} + +static void v66g_1024_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->desc = "Hexagon V66G_1024"; + mc->init = v66g_1024_config_init; + init_mc(mc); + mc->is_default = true; + mc->default_cpu_type = TYPE_HEXAGON_CPU_V66; + mc->default_cpus = 4; +} + +static const TypeInfo hexagon_machine_types[] = { + { + .name = MACHINE_TYPE_NAME("V66G_1024"), + .parent = TYPE_MACHINE, + .class_init = v66g_1024_init, + }, +}; + +DEFINE_TYPES(hexagon_machine_types) diff --git a/hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc b/hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc new file mode 100644 index 000000000000..70b1eabfe961 --- /dev/null +++ b/hw/hexagon/machine_cfg_sa8775_cdsp0.h.inc @@ -0,0 +1,63 @@ + +static hexagon_machine_config SA8775P_cdsp0 = { + .cfgbase = 0x24000000 + 0x180000, + .l2tcm_size = 0x00000000, + .l2vic_base = 0x26300000 + 0x90000, + .l2vic_size = 0x00001000, + .csr_base = 0x26300000, + .qtmr_region = 0x26300000 + 0xA1000, + .cfgtable = { + .l2tcm_base = 0x00002400, + .reserved0 = 0x00000000, + .subsystem_base = 0x00002638, + .etm_base = 0x00002419, + .l2cfg_base = 0x0000241a, + .reserved1 = 0x0000241b, + .l1s0_base = 0x00002500, + .axi2_lowaddr = 0x00000000, + .streamer_base = 0x00000000, + .reserved2 = 0x00000000, + .fastl2vic_base = 0x0000241e, + .jtlb_size_entries = 0x00000080, + .coproc_present = 0x00000001, + .ext_contexts = 0x00000004, + .vtcm_base = 0x00002500, + .vtcm_size_kb = 0x00002000, + .l2tag_size = 0x00000400, + .l2ecomem_size = 0x00000000, + .thread_enable_mask = 0x0000003f, + .eccreg_base = 0x0000241f, + .l2line_size = 0x00000080, + .tiny_core = 0x00000000, + .l2itcm_size = 0x00000000, + .l2itcm_base = 0x00002400, + .reserved3 = 0x00000000, + .dtm_present = 0x00000000, + .dma_version = 0x00000003, + .hvx_vec_log_length = 0x00000007, + .core_id = 0x00000000, + .core_count = 0x00000000, + .coproc2_reg0 = 0x00000040, + .coproc2_reg1 = 0x00000020, + .v2x_mode = 0x00000001, + .coproc2_reg2 = 0x00000008, + .coproc2_reg3 = 0x00000020, + .coproc2_reg4 = 0x00000000, + .coproc2_reg5 = 0x00000002, + .coproc2_reg6 = 0x00000016, + .coproc2_reg7 = 0x00000006, + .acd_preset = 0x00000001, + .mnd_preset = 0x00000000, + .l1d_size_kb = 0x00000010, + .l1i_size_kb = 0x00000020, + .l1d_write_policy = 0x00000002, + .vtcm_bank_width = 0x00000080, + .reserved3 = 0x00000001, + .reserved4 = 0x00000000, + .reserved5 = 0x00000003, + .coproc2_cvt_mpy_size = 0x0000000a, + .consistency_domain = 0x000000e0, + .capacity_domain = 0x00000080, + .axi3_lowaddr = 0x00000000, + }, +}; diff --git a/hw/hexagon/machine_cfg_v66g_1024.h.inc b/hw/hexagon/machine_cfg_v66g_1024.h.inc new file mode 100644 index 000000000000..8f2a593bb860 --- /dev/null +++ b/hw/hexagon/machine_cfg_v66g_1024.h.inc @@ -0,0 +1,63 @@ + +static hexagon_machine_config v66g_1024 = { + .cfgbase = 0xd8180000, + .l2tcm_size = 0x00000000, + .l2vic_base = 0xfc910000, + .l2vic_size = 0x00001000, + .csr_base = 0xfc900000, + .qtmr_region = 0xfc921000, + .cfgtable = { + .l2tcm_base = 0x0000d800, + .reserved0 = 0x0000d400, + .subsystem_base = 0x0000fc90, + .etm_base = 0x0000d805, + .l2cfg_base = 0x0000d81a, + .reserved1 = 0x00000000, + .l1s0_base = 0x0000d820, + .axi2_lowaddr = 0x00003000, + .streamer_base = 0x00000000, + .reserved2 = 0x0000d819, + .fastl2vic_base = 0x0000d81e, + .jtlb_size_entries = 0x00000080, + .coproc_present = 0x00000001, + .ext_contexts = 0x00000004, + .vtcm_base = 0x0000d820, + .vtcm_size_kb = 0x00000100, + .l2tag_size = 0x00000400, + .l2ecomem_size = 0x00000400, + .thread_enable_mask = 0x0000000f, + .eccreg_base = 0x0000d81f, + .l2line_size = 0x00000080, + .tiny_core = 0x00000000, + .l2itcm_size = 0x00000000, + .l2itcm_base = 0x0000d820, + .reserved3 = 0x00000000, + .dtm_present = 0x00000000, + .dma_version = 0x00000000, + .hvx_vec_log_length = 0x00000080, + .core_id = 0x00000000, + .core_count = 0x00000000, + .coproc2_reg0 = 0x00000000, + .coproc2_reg1 = 0x00000000, + .v2x_mode = 0x00000000, + .coproc2_reg2 = 0x00000000, + .coproc2_reg3 = 0x00000000, + .coproc2_reg4 = 0x00000000, + .coproc2_reg5 = 0x00000000, + .coproc2_reg6 = 0x00000000, + .coproc2_reg7 = 0x00000000, + .acd_preset = 0x00000000, + .mnd_preset = 0x00000000, + .l1d_size_kb = 0x00000000, + .l1i_size_kb = 0x00000000, + .l1d_write_policy = 0x00000000, + .vtcm_bank_width = 0x00000000, + .reserved3 = 0x00000000, + .reserved4 = 0x00000000, + .reserved5 = 0x00000000, + .coproc2_cvt_mpy_size = 0x00000000, + .consistency_domain = 0x00000000, + .capacity_domain = 0x00000000, + .axi3_lowaddr = 0x00000000, + }, +}; diff --git a/hw/hexagon/machine_cfg_v68n_1024.h.inc b/hw/hexagon/machine_cfg_v68n_1024.h.inc new file mode 100644 index 000000000000..257c133df8f3 --- /dev/null +++ b/hw/hexagon/machine_cfg_v68n_1024.h.inc @@ -0,0 +1,64 @@ + +static hexagon_machine_config v68n_1024 = { + .cfgbase = 0xde000000, + .l2tcm_size = 0x00000000, + .l2vic_base = 0xfc910000, + .l2vic_size = 0x00001000, + .csr_base = 0xfc900000, + .qtmr_region = 0xfc921000, + .cfgtable = { + .l2tcm_base = 0x0000d800, + .reserved0 = 0x00000000, + .subsystem_base = 0x0000fc90, + .etm_base = 0x0000d819, + .l2cfg_base = 0x0000d81a, + .reserved1 = 0x00000000, + .l1s0_base = 0x0000d840, + .axi2_lowaddr = 0x00003000, + .streamer_base = 0x0000d81c, + .reserved2 = 0x0000d81d, + .fastl2vic_base = 0x0000d81e, + .jtlb_size_entries = 0x00000080, + .coproc_present = 0x00000001, + .ext_contexts = 0x00000004, + .vtcm_base = 0x0000d840, + .vtcm_size_kb = 0x00001000, + .l2tag_size = 0x00000400, + .l2ecomem_size = 0x00000400, + .thread_enable_mask = 0x0000003f, + .eccreg_base = 0x0000d81f, + .l2line_size = 0x00000080, + .tiny_core = 0x00000000, + .l2itcm_size = 0x00000000, + .l2itcm_base = 0x0000d820, + .reserved3 = 0x00000000, + .dtm_present = 0x00000000, + .dma_version = 0x00000001, + .hvx_vec_log_length = 0x00000007, + .core_id = 0x00000000, + .core_count = 0x00000000, + .coproc2_reg0 = 0x00000040, + .coproc2_reg1 = 0x00000020, + .v2x_mode = 0x1f1f1f1f, + .coproc2_reg2 = 0x1f1f1f1f, + .coproc2_reg3 = 0x1f1f1f1f, + .coproc2_reg4 = 0x1f1f1f1f, + .coproc2_reg5 = 0x1f1f1f1f, + .coproc2_reg6 = 0x1f1f1f1f, + .coproc2_reg7 = 0x1f1f1f1f, + .acd_preset = 0x1f1f1f1f, + .mnd_preset = 0x1f1f1f1f, + .l1d_size_kb = 0x1f1f1f1f, + .l1i_size_kb = 0x1f1f1f1f, + .l1d_write_policy = 0x1f1f1f1f, + .vtcm_bank_width = 0x1f1f1f1f, + .reserved3 = 0x1f1f1f1f, + .reserved4 = 0x1f1f1f1f, + .reserved5 = 0x1f1f1f1f, + .coproc2_cvt_mpy_size = 0x1f1f1f1f, + .consistency_domain = 0x1f1f1f1f, + .capacity_domain = 0x1f1f1f1f, + .axi3_lowaddr = 0x1f1f1f1f, + }, +}; + diff --git a/hw/hexagon/meson.build b/hw/hexagon/meson.build new file mode 100644 index 000000000000..649ad6dc02b3 --- /dev/null +++ b/hw/hexagon/meson.build @@ -0,0 +1,7 @@ +hexagon_ss = ss.source_set() +hexagon_ss.add(when: 'CONFIG_HEX_DSP', if_true: files('hexagon_dsp.c',)) + +hw_arch += {'hexagon': hexagon_ss} + +hexagon_ss.add(when: 'CONFIG_HEX_VIRT', if_true: files('virt.c',)) + diff --git a/hw/hexagon/virt.c b/hw/hexagon/virt.c new file mode 100644 index 000000000000..1e7ac4e5b70b --- /dev/null +++ b/hw/hexagon/virt.c @@ -0,0 +1,415 @@ +/* + * Hexagon virt emulation + * + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "exec/address-spaces.h" +#include "hw/char/pl011.h" +#include "hw/core/sysbus-fdt.h" +#include "hw/hexagon/hexagon.h" +#include "hw/hexagon/virt.h" +#include "hw/loader.h" +#include "hw/qdev-properties.h" +#include "hw/register.h" +#include "hw/timer/qct-qtimer.h" +#include "qemu/error-report.h" +#include "qemu/guest-random.h" +#include "qemu/units.h" +#include "elf.h" +#include "machine_cfg_v68n_1024.h.inc" +#include "system/device_tree.h" +#include "system/reset.h" +#include "system/system.h" +#include + +static const int VIRTIO_DEV_COUNT = 2; + +static const MemMapEntry base_memmap[] = { + [VIRT_UART0] = { 0x10000000, 0x00000200 }, + [VIRT_MMIO] = { 0x11000000, 0x1000000, }, + [VIRT_GPT] = { 0xab000000, 0x00001000 }, + [VIRT_FDT] = { 0x99900000, 0x00000200 }, +}; + +static const int irqmap[] = { + [VIRT_MMIO] = 18, /* ...to 18 + VIRTIO_DEV_COUNT - 1 */ + [VIRT_GPT] = 12, + [VIRT_UART0] = 15, + [VIRT_QTMR0] = 2, + [VIRT_QTMR1] = 4, +}; + + +static void create_fdt(HexagonVirtMachineState *vms) +{ + MachineState *ms = MACHINE(vms); + void *fdt = create_device_tree(&vms->fdt_size); + + if (!fdt) { + error_report("create_device_tree() failed"); + exit(1); + } + + ms->fdt = fdt; + + qemu_fdt_setprop_string(fdt, "/", "compatible", "linux,hexagon-virt"); + qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 0x2); + qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x1); + qemu_fdt_setprop_string(fdt, "/", "model", "linux,hexagon-virt"); + + qemu_fdt_setprop_string(fdt, "/", "model", "hexagon-virt,qemu"); + qemu_fdt_setprop_string(fdt, "/", "compatible", "qcom,sm8150"); + + qemu_fdt_add_subnode(fdt, "/soc"); + qemu_fdt_setprop_cell(fdt, "/soc", "#address-cells", 0x2); + qemu_fdt_setprop_cell(fdt, "/soc", "#size-cells", 0x1); + qemu_fdt_setprop(fdt, "/soc", "ranges", NULL, 0); + + qemu_fdt_add_subnode(fdt, "/chosen"); + + uint8_t rng_seed[32]; + qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed)); + qemu_fdt_setprop(fdt, "/chosen", "rng-seed", rng_seed, sizeof(rng_seed)); +} + +static void fdt_add_hvx(HexagonVirtMachineState *vms, + const hexagon_machine_config *m_cfg, Error **errp) +{ + const MachineState *ms = MACHINE(vms); + uint32_t vtcm_size_bytes = m_cfg->cfgtable.vtcm_size_kb * 1024; + if (vtcm_size_bytes > 0) { + memory_region_init_ram(&vms->vtcm, NULL, "vtcm.ram", vtcm_size_bytes, + errp); + memory_region_add_subregion(vms->sys, m_cfg->cfgtable.vtcm_base << 16, + &vms->vtcm); + + qemu_fdt_add_subnode(ms->fdt, "/soc/vtcm"); + qemu_fdt_setprop_string(ms->fdt, "/soc/vtcm", "compatible", + "qcom,hexagon_vtcm"); + + assert(sizeof(m_cfg->cfgtable.vtcm_base) == sizeof(uint32_t)); + qemu_fdt_setprop_cells(ms->fdt, "/soc/vtcm", "reg", 0, + m_cfg->cfgtable.vtcm_base << 16, + vtcm_size_bytes); + } + + if (m_cfg->cfgtable.ext_contexts > 0) { + qemu_fdt_add_subnode(ms->fdt, "/soc/hvx"); + qemu_fdt_setprop_string(ms->fdt, "/soc/hvx", "compatible", + "qcom,hexagon-hvx"); + qemu_fdt_setprop_cells(ms->fdt, "/soc/hvx", "qcom,hvx-max-ctxts", + m_cfg->cfgtable.ext_contexts); + qemu_fdt_setprop_cells(ms->fdt, "/soc/hvx", "qcom,hvx-vlength", + m_cfg->cfgtable.hvx_vec_log_length); + } +} + +static int32_t irq_hvm_ic_phandle = -1; +static void fdt_add_hvm_pic_node(HexagonVirtMachineState *vms, + const hexagon_machine_config *m_cfg) +{ + MachineState *ms = MACHINE(vms); + irq_hvm_ic_phandle = qemu_fdt_alloc_phandle(ms->fdt); + + qemu_fdt_setprop_cell(ms->fdt, "/soc", "interrupt-parent", + irq_hvm_ic_phandle); + + qemu_fdt_add_subnode(ms->fdt, "/soc/interrupt-controller"); + qemu_fdt_setprop_cell(ms->fdt, "/soc/interrupt-controller", + "#address-cells", 2); + qemu_fdt_setprop_cell(ms->fdt, "/soc/interrupt-controller", + "#interrupt-cells", 2); + qemu_fdt_setprop_string(ms->fdt, "/soc/interrupt-controller", "compatible", + "qcom,h2-pic,hvm-pic"); + qemu_fdt_setprop(ms->fdt, "/soc/interrupt-controller", + "interrupt-controller", NULL, 0); + qemu_fdt_setprop_cell(ms->fdt, "/soc/interrupt-controller", "phandle", + irq_hvm_ic_phandle); + + sysbus_mmio_map(SYS_BUS_DEVICE(vms->l2vic), 1, + m_cfg->cfgtable.fastl2vic_base << 16); +} + + +static void fdt_add_gpt_node(HexagonVirtMachineState *vms) +{ + g_autofree char *name = NULL; + MachineState *ms = MACHINE(vms); + + name = g_strdup_printf("/soc/gpt@%" PRIx64, + (int64_t)base_memmap[VIRT_GPT].base); + qemu_fdt_add_subnode(ms->fdt, name); + qemu_fdt_setprop_string(ms->fdt, name, "compatible", + "qcom,h2-timer,hvm-timer"); + qemu_fdt_setprop_cells(ms->fdt, name, "interrupts", irqmap[VIRT_GPT], 0); + qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0x0, + base_memmap[VIRT_GPT].base, + base_memmap[VIRT_GPT].size); +} + +static int32_t clock_phandle = -1; +static void fdt_add_clocks(const HexagonVirtMachineState *vms) +{ + MachineState *ms = MACHINE(vms); + clock_phandle = qemu_fdt_alloc_phandle(ms->fdt); + qemu_fdt_add_subnode(ms->fdt, "/apb-pclk"); + qemu_fdt_setprop_string(ms->fdt, "/apb-pclk", "compatible", "fixed-clock"); + qemu_fdt_setprop_cell(ms->fdt, "/apb-pclk", "#clock-cells", 0x0); + qemu_fdt_setprop_cell(ms->fdt, "/apb-pclk", "clock-frequency", 24000000); + qemu_fdt_setprop_string(ms->fdt, "/apb-pclk", "clock-output-names", + "clk24mhz"); + qemu_fdt_setprop_cell(ms->fdt, "/apb-pclk", "phandle", clock_phandle); +} + +static void fdt_add_uart(const HexagonVirtMachineState *vms, int uart) +{ + char *nodename; + hwaddr base = base_memmap[uart].base; + hwaddr size = base_memmap[uart].size; + assert(uart == 0); + int irq = irqmap[VIRT_UART0 + uart]; + const char compat[] = "arm,pl011\0arm,primecell"; + const char clocknames[] = "uartclk\0apb_pclk"; + MachineState *ms = MACHINE(vms); + + pl011_create(base, qdev_get_gpio_in(vms->l2vic, irq), serial_hd(0)); + + nodename = g_strdup_printf("/pl011@%" PRIx64, base); + qemu_fdt_add_subnode(ms->fdt, nodename); + + /* Note that we can't use setprop_string because of the embedded NUL */ + qemu_fdt_setprop(ms->fdt, nodename, "compatible", compat, sizeof(compat)); + qemu_fdt_setprop_cells(ms->fdt, nodename, "reg", 0, base, size); + qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", 32 + irq, 0); + qemu_fdt_setprop_cells(ms->fdt, nodename, "clocks", clock_phandle, + clock_phandle); + qemu_fdt_setprop(ms->fdt, nodename, "clock-names", clocknames, + sizeof(clocknames)); + qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", + irq_hvm_ic_phandle); + + qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", nodename); + qemu_fdt_add_subnode(ms->fdt, "/aliases"); + qemu_fdt_setprop_string(ms->fdt, "/aliases", "serial0", nodename); + + g_free(nodename); +} + +static void fdt_add_cpu_nodes(const HexagonVirtMachineState *vms) +{ + MachineState *ms = MACHINE(vms); + qemu_fdt_add_subnode(ms->fdt, "/cpus"); + qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", 0x1); + qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0); + + /* cpu nodes */ + for (int num = ms->smp.cpus - 1; num >= 0; num--) { + char *nodename = g_strdup_printf("/cpus/cpu@%d", num); + qemu_fdt_add_subnode(ms->fdt, nodename); + qemu_fdt_setprop_string(ms->fdt, nodename, "device_type", "cpu"); + qemu_fdt_setprop_cell(ms->fdt, nodename, "reg", num); + qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", + qemu_fdt_alloc_phandle(ms->fdt)); + g_free(nodename); + } +} + + +static void fdt_add_virtio_devices(const HexagonVirtMachineState *vms) +{ + MachineState *ms = MACHINE(vms); + /* VirtIO MMIO devices */ + for (int i = 0; i < VIRTIO_DEV_COUNT; i++) { + char *nodename; + int irq = irqmap[VIRT_MMIO] + i; + size_t size = base_memmap[VIRT_MMIO].size; + hwaddr base = base_memmap[VIRT_MMIO].base + i * size; + + nodename = g_strdup_printf("/virtio_mmio@%" PRIx64, base); + qemu_fdt_add_subnode(ms->fdt, nodename); + qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "virtio,mmio"); + qemu_fdt_setprop_sized_cells(ms->fdt, nodename, "reg", 2, base, 1, + size); + qemu_fdt_setprop_cells(ms->fdt, nodename, "interrupts", irq, 0); + qemu_fdt_setprop_cell(ms->fdt, nodename, "interrupt-parent", + irq_hvm_ic_phandle); + + sysbus_create_simple( + "virtio-mmio", base, + qdev_get_gpio_in(vms->l2vic, irqmap[VIRT_MMIO] + i)); + + g_free(nodename); + } +} + +static void create_qtimer(HexagonVirtMachineState *vms, + const hexagon_machine_config *m_cfg) +{ + Error **errp = NULL; + QCTQtimerState *qtimer = QCT_QTIMER(qdev_new(TYPE_QCT_QTIMER)); + + object_property_set_uint(OBJECT(qtimer), "nr_frames", 2, errp); + object_property_set_uint(OBJECT(qtimer), "nr_views", 1, errp); + object_property_set_uint(OBJECT(qtimer), "cnttid", 0x111, errp); + sysbus_realize_and_unref(SYS_BUS_DEVICE(qtimer), errp); + + + sysbus_mmio_map(SYS_BUS_DEVICE(qtimer), 1, m_cfg->qtmr_region); + sysbus_connect_irq(SYS_BUS_DEVICE(qtimer), 0, + qdev_get_gpio_in(vms->l2vic, irqmap[VIRT_QTMR0])); + sysbus_connect_irq(SYS_BUS_DEVICE(qtimer), 1, + qdev_get_gpio_in(vms->l2vic, irqmap[VIRT_QTMR1])); +} + +static void virt_instance_init(Object *obj) +{ + HexagonVirtMachineState *vms = HEXAGON_VIRT_MACHINE(obj); + + create_fdt(vms); +} + +void hexagon_load_fdt(const HexagonVirtMachineState *vms) +{ + MachineState *ms = MACHINE(vms); + hwaddr fdt_addr = base_memmap[VIRT_FDT].base; + uint32_t fdtsize = vms->fdt_size; + + /* copy in the device tree */ + rom_add_blob_fixed_as("fdt", ms->fdt, fdtsize, fdt_addr, + &address_space_memory); + qemu_register_reset_nosnapshotload( + qemu_fdt_randomize_seeds, + rom_ptr_for_as(&address_space_memory, fdt_addr, fdtsize)); +} + +static uint64_t load_kernel(const HexagonVirtMachineState *vms) +{ + MachineState *ms = MACHINE(vms); + uint64_t entry = 0; + if (load_elf_ram_sym(ms->kernel_filename, NULL, NULL, NULL, &entry, NULL, + NULL, NULL, 0, EM_HEXAGON, 0, 0, &address_space_memory, + false, NULL) > 0) { + return entry; + } + error_report("error loading '%s'", ms->kernel_filename); + exit(1); +} + +static void do_cpu_reset(void *opaque) +{ + HexagonCPU *cpu = opaque; + CPUState *cs = CPU(cpu); + cpu_reset(cs); +} + +static void virt_init(MachineState *ms) +{ + HexagonVirtMachineState *vms = HEXAGON_VIRT_MACHINE(ms); + Error **errp = NULL; + const hexagon_machine_config *m_cfg = &v68n_1024; + + qemu_fdt_setprop_string(ms->fdt, "/chosen", "bootargs", ms->kernel_cmdline); + + vms->sys = get_system_memory(); + + memory_region_init_ram(&vms->ram, NULL, "ddr.ram", ms->ram_size, errp); + memory_region_add_subregion(vms->sys, 0x0, &vms->ram); + + if (m_cfg->l2tcm_size) { + memory_region_init_ram(&vms->tcm, NULL, "tcm.ram", m_cfg->l2tcm_size, + errp); + memory_region_add_subregion(vms->sys, m_cfg->cfgtable.l2tcm_base << 16, + &vms->tcm); + } + + memory_region_init_rom(&vms->cfgtable, NULL, "config_table.rom", + sizeof(m_cfg->cfgtable), errp); + memory_region_add_subregion(vms->sys, m_cfg->cfgbase, &vms->cfgtable); + fdt_add_hvx(vms, m_cfg, errp); + const char *cpu_model = ms->cpu_type; + + if (!cpu_model) { + cpu_model = HEXAGON_CPU_TYPE_NAME("v73"); + } + + HexagonCPU *cpu_0 = NULL; + for (int i = 0; i < ms->smp.cpus; i++) { + HexagonCPU *cpu = HEXAGON_CPU(object_new(ms->cpu_type)); + qemu_register_reset(do_cpu_reset, cpu); + + if (i == 0) { + cpu_0 = cpu; + if (ms->kernel_filename) { + uint64_t entry = load_kernel(vms); + + qdev_prop_set_uint32(DEVICE(cpu_0), "exec-start-addr", entry); + } + } + qdev_prop_set_bit(DEVICE(cpu), "start-powered-off", (i != 0)); + qdev_prop_set_uint32(DEVICE(cpu), "hvx-contexts", + m_cfg->cfgtable.ext_contexts); + qdev_prop_set_uint32(DEVICE(cpu), "config-table-addr", m_cfg->cfgbase); + qdev_prop_set_uint32(DEVICE(cpu), "l2vic-base-addr", m_cfg->l2vic_base); + qdev_prop_set_uint32(DEVICE(cpu), "qtimer-base-addr", m_cfg->qtmr_region); + qdev_prop_set_uint32(DEVICE(cpu), "jtlb-entries", + m_cfg->cfgtable.jtlb_size_entries); + + if (!qdev_realize_and_unref(DEVICE(cpu), NULL, errp)) { + return; + } + } + vms->l2vic = sysbus_create_varargs( + "l2vic", m_cfg->l2vic_base, qdev_get_gpio_in(DEVICE(cpu_0), 0), + qdev_get_gpio_in(DEVICE(cpu_0), 1), qdev_get_gpio_in(DEVICE(cpu_0), 2), + qdev_get_gpio_in(DEVICE(cpu_0), 3), qdev_get_gpio_in(DEVICE(cpu_0), 4), + qdev_get_gpio_in(DEVICE(cpu_0), 5), qdev_get_gpio_in(DEVICE(cpu_0), 6), + qdev_get_gpio_in(DEVICE(cpu_0), 7), NULL); + + fdt_add_hvm_pic_node(vms, m_cfg); + fdt_add_virtio_devices(vms); + fdt_add_cpu_nodes(vms); + fdt_add_clocks(vms); + fdt_add_uart(vms, VIRT_UART0); + fdt_add_gpt_node(vms); + create_qtimer(vms, m_cfg); + + rom_add_blob_fixed_as("config_table.rom", &m_cfg->cfgtable, + sizeof(m_cfg->cfgtable), m_cfg->cfgbase, + &address_space_memory); + + + hexagon_load_fdt(vms); +} + + +static void virt_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->init = virt_init; + mc->default_cpu_type = HEXAGON_CPU_TYPE_NAME("v73"); + mc->default_ram_size = 4 * GiB; + mc->max_cpus = 8; + mc->default_cpus = 8; + mc->is_default = false; + mc->default_kernel_irqchip_split = false; + mc->block_default_type = IF_VIRTIO; + mc->default_boot_order = NULL; + mc->no_cdrom = 1; + mc->numa_mem_supported = false; + mc->default_nic = "virtio-mmio-bus"; +} + + +static const TypeInfo virt_machine_types[] = { { + .name = TYPE_HEXAGON_VIRT_MACHINE, + .parent = TYPE_MACHINE, + .instance_size = sizeof(HexagonVirtMachineState), + .class_init = virt_class_init, + .instance_init = virt_instance_init, +} }; + +DEFINE_TYPES(virt_machine_types) diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig index 7547528f2c27..a5b136e2fa72 100644 --- a/hw/intc/Kconfig +++ b/hw/intc/Kconfig @@ -8,6 +8,9 @@ config I8259 config PL190 bool +config L2VIC + bool + config IOAPIC bool select I8259 diff --git a/hw/intc/l2vic.c b/hw/intc/l2vic.c new file mode 100644 index 000000000000..1c450179dd6d --- /dev/null +++ b/hw/intc/l2vic.c @@ -0,0 +1,417 @@ +/* + * QEMU L2VIC Interrupt Controller + * + * Arm PrimeCell PL190 Vector Interrupt Controller was used as a reference. + * Copyright(c) 2020-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/irq.h" +#include "hw/sysbus.h" +#include "migration/vmstate.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "hw/intc/l2vic.h" +#include "trace.h" + +#define L2VICA(s, n) (s[(n) >> 2]) + +#define TYPE_L2VIC "l2vic" +OBJECT_DECLARE_SIMPLE_TYPE(L2VICState, L2VIC) + +#define SLICE_MAX (L2VIC_INTERRUPT_MAX / 32) + +typedef struct L2VICState { + SysBusDevice parent_obj; + + QemuMutex active; + MemoryRegion iomem; + MemoryRegion fast_iomem; + uint32_t level; + /* + * offset 0:vid group 0 etc, 10 bits in each group + * are used: + */ + uint32_t vid_group[4]; + uint32_t vid0; + /* Clear Status of Active Edge interrupt, not used: */ + uint32_t int_clear[SLICE_MAX] QEMU_ALIGNED(16); + /* Enable interrupt source */ + uint32_t int_enable[SLICE_MAX] QEMU_ALIGNED(16); + /* Clear (set to 0) corresponding bit in int_enable */ + uint32_t int_enable_clear; + /* Set (to 1) corresponding bit in int_enable */ + uint32_t int_enable_set; + /* Present for debugging, not used */ + uint32_t int_pending[SLICE_MAX] QEMU_ALIGNED(16); + /* Generate an interrupt */ + uint32_t int_soft; + /* Which enabled interrupt is active */ + uint32_t int_status[SLICE_MAX] QEMU_ALIGNED(16); + /* Edge or Level interrupt */ + uint32_t int_type[SLICE_MAX] QEMU_ALIGNED(16); + /* L2 interrupt group 0-3 0x600-0x7FF */ + uint32_t int_group_n0[SLICE_MAX] QEMU_ALIGNED(16); + uint32_t int_group_n1[SLICE_MAX] QEMU_ALIGNED(16); + uint32_t int_group_n2[SLICE_MAX] QEMU_ALIGNED(16); + uint32_t int_group_n3[SLICE_MAX] QEMU_ALIGNED(16); + qemu_irq irq[8]; +} L2VICState; + + +/* + * Find out if this irq is associated with a group other than + * the default group + */ +static uint32_t *get_int_group(L2VICState *s, int irq) +{ + int n = irq & 0x1f; + if (n < 8) { + return s->int_group_n0; + } + if (n < 16) { + return s->int_group_n1; + } + if (n < 24) { + return s->int_group_n2; + } + return s->int_group_n3; +} + +static int find_slice(int irq) +{ + return irq / 32; +} + +static int get_vid(L2VICState *s, int irq) +{ + uint32_t *group = get_int_group(s, irq); + uint32_t slice = group[find_slice(irq)]; + /* Mask with 0x7 to remove the GRP:EN bit */ + uint32_t val = slice >> ((irq & 0x7) * 4); + if (val & 0x8) { + return val & 0x7; + } else { + return 0; + } +} + +static inline bool vid_active(L2VICState *s) + +{ + /* scan all 1024 bits in int_status arrary */ + const int size = sizeof(s->int_status) * CHAR_BIT; + const int active_irq = find_first_bit((unsigned long *)s->int_status, size); + return ((active_irq != size)) ? true : false; +} + +static bool l2vic_update(L2VICState *s, int irq) +{ + if (vid_active(s)) { + return true; + } + + bool pending = test_bit(irq, (unsigned long *)s->int_pending); + bool enable = test_bit(irq, (unsigned long *)s->int_enable); + if (pending && enable) { + int vid = get_vid(s, irq); + set_bit(irq, (unsigned long *)s->int_status); + clear_bit(irq, (unsigned long *)s->int_pending); + clear_bit(irq, (unsigned long *)s->int_enable); + /* ensure the irq line goes low after going high */ + s->vid0 = irq; + s->vid_group[get_vid(s, irq)] = irq; + + /* already low: now call pulse */ + /* pulse: calls qemu_upper() and then qemu_lower()) */ + qemu_irq_pulse(s->irq[vid + 2]); + trace_l2vic_delivered(irq, vid); + return true; + } + return false; +} + +static void l2vic_update_all(L2VICState *s) +{ + for (int i = 0; i < L2VIC_INTERRUPT_MAX; i++) { + if (l2vic_update(s, i) == true) { + /* once vid is active, no-one else can set it until ciad */ + return; + } + } +} + +static void l2vic_set_irq(void *opaque, int irq, int level) +{ + L2VICState *s = (L2VICState *)opaque; + if (level) { + qemu_mutex_lock(&s->active); + set_bit(irq, (unsigned long *)s->int_pending); + qemu_mutex_unlock(&s->active); + } + l2vic_update(s, irq); +} + +static void l2vic_write(void *opaque, hwaddr offset, uint64_t val, + unsigned size) +{ + L2VICState *s = (L2VICState *)opaque; + qemu_mutex_lock(&s->active); + trace_l2vic_reg_write((unsigned)offset, (uint32_t)val); + + if (offset == L2VIC_VID_0) { + if ((int)val != L2VIC_CIAD_INSTRUCTION) { + s->vid0 = val; + } else { + /* ciad issued: clear int_status */ + clear_bit(s->vid0, (unsigned long *)s->int_status); + } + } else if (offset >= L2VIC_INT_ENABLEn && + offset < (L2VIC_INT_ENABLE_CLEARn)) { + L2VICA(s->int_enable, offset - L2VIC_INT_ENABLEn) = val; + } else if (offset >= L2VIC_INT_ENABLE_CLEARn && + offset < L2VIC_INT_ENABLE_SETn) { + L2VICA(s->int_enable, offset - L2VIC_INT_ENABLE_CLEARn) &= ~val; + } else if (offset >= L2VIC_INT_ENABLE_SETn && offset < L2VIC_INT_TYPEn) { + L2VICA(s->int_enable, offset - L2VIC_INT_ENABLE_SETn) |= val; + } else if (offset >= L2VIC_INT_TYPEn && offset < L2VIC_INT_TYPEn + 0x80) { + L2VICA(s->int_type, offset - L2VIC_INT_TYPEn) = val; + } else if (offset >= L2VIC_INT_STATUSn && offset < L2VIC_INT_CLEARn) { + L2VICA(s->int_status, offset - L2VIC_INT_STATUSn) = val; + } else if (offset >= L2VIC_INT_CLEARn && offset < L2VIC_SOFT_INTn) { + L2VICA(s->int_clear, offset - L2VIC_INT_CLEARn) = val; + } else if (offset >= L2VIC_INT_PENDINGn && + offset < L2VIC_INT_PENDINGn + 0x80) { + L2VICA(s->int_pending, offset - L2VIC_INT_PENDINGn) = val; + } else if (offset >= L2VIC_SOFT_INTn && offset < L2VIC_INT_PENDINGn) { + L2VICA(s->int_enable, offset - L2VIC_SOFT_INTn) |= val; + /* + * Need to reverse engineer the actual irq number. + */ + int irq = find_first_bit((unsigned long *)&val, + sizeof(s->int_enable[0]) * CHAR_BIT); + hwaddr byteoffset = offset - L2VIC_SOFT_INTn; + g_assert(irq != sizeof(s->int_enable[0]) * CHAR_BIT); + irq += byteoffset * 8; + + /* The soft-int interface only works with edge-triggered interrupts */ + if (test_bit(irq, (unsigned long *)s->int_type)) { + qemu_mutex_unlock(&s->active); + l2vic_set_irq(opaque, irq, 1); + qemu_mutex_lock(&s->active); + } + } else if (offset >= L2VIC_INT_GRPn_0 && offset < L2VIC_INT_GRPn_1) { + L2VICA(s->int_group_n0, offset - L2VIC_INT_GRPn_0) = val; + } else if (offset >= L2VIC_INT_GRPn_1 && offset < L2VIC_INT_GRPn_2) { + L2VICA(s->int_group_n1, offset - L2VIC_INT_GRPn_1) = val; + } else if (offset >= L2VIC_INT_GRPn_2 && offset < L2VIC_INT_GRPn_3) { + L2VICA(s->int_group_n2, offset - L2VIC_INT_GRPn_2) = val; + } else if (offset >= L2VIC_INT_GRPn_3 && offset < L2VIC_INT_GRPn_3 + 0x80) { + L2VICA(s->int_group_n3, offset - L2VIC_INT_GRPn_3) = val; + } else { + qemu_log_mask(LOG_UNIMP, "%s: offset %x unimplemented\n", __func__, + (int)offset); + } + l2vic_update_all(s); + qemu_mutex_unlock(&s->active); + return; +} + +static uint64_t l2vic_read(void *opaque, hwaddr offset, unsigned size) +{ + uint64_t value; + L2VICState *s = (L2VICState *)opaque; + qemu_mutex_lock(&s->active); + + if (offset == L2VIC_VID_GRP_0) { + value = s->vid_group[0]; + } else if (offset == L2VIC_VID_GRP_1) { + value = s->vid_group[1]; + } else if (offset == L2VIC_VID_GRP_2) { + value = s->vid_group[2]; + } else if (offset == L2VIC_VID_GRP_3) { + value = s->vid_group[3]; + } else if (offset == L2VIC_VID_0) { + value = s->vid0; + } else if (offset >= L2VIC_INT_ENABLEn && + offset < L2VIC_INT_ENABLE_CLEARn) { + value = L2VICA(s->int_enable, offset - L2VIC_INT_ENABLEn); + } else if (offset >= L2VIC_INT_ENABLE_CLEARn && + offset < L2VIC_INT_ENABLE_SETn) { + value = 0; + } else if (offset >= L2VIC_INT_ENABLE_SETn && offset < L2VIC_INT_TYPEn) { + value = 0; + } else if (offset >= L2VIC_INT_TYPEn && offset < L2VIC_INT_TYPEn + 0x80) { + value = L2VICA(s->int_type, offset - L2VIC_INT_TYPEn); + } else if (offset >= L2VIC_INT_STATUSn && offset < L2VIC_INT_CLEARn) { + value = L2VICA(s->int_status, offset - L2VIC_INT_STATUSn); + } else if (offset >= L2VIC_INT_CLEARn && offset < L2VIC_SOFT_INTn) { + value = L2VICA(s->int_clear, offset - L2VIC_INT_CLEARn); + } else if (offset >= L2VIC_SOFT_INTn && offset < L2VIC_INT_PENDINGn) { + value = 0; + } else if (offset >= L2VIC_INT_PENDINGn && + offset < L2VIC_INT_PENDINGn + 0x80) { + value = L2VICA(s->int_pending, offset - L2VIC_INT_PENDINGn); + } else if (offset >= L2VIC_INT_GRPn_0 && offset < L2VIC_INT_GRPn_1) { + value = L2VICA(s->int_group_n0, offset - L2VIC_INT_GRPn_0); + } else if (offset >= L2VIC_INT_GRPn_1 && offset < L2VIC_INT_GRPn_2) { + value = L2VICA(s->int_group_n1, offset - L2VIC_INT_GRPn_1); + } else if (offset >= L2VIC_INT_GRPn_2 && offset < L2VIC_INT_GRPn_3) { + value = L2VICA(s->int_group_n2, offset - L2VIC_INT_GRPn_2); + } else if (offset >= L2VIC_INT_GRPn_3 && offset < L2VIC_INT_GRPn_3 + 0x80) { + value = L2VICA(s->int_group_n3, offset - L2VIC_INT_GRPn_3); + } else { + value = 0; + qemu_log_mask(LOG_GUEST_ERROR, "L2VIC: %s: offset 0x%x\n", __func__, + (int)offset); + } + + trace_l2vic_reg_read((unsigned)offset, value); + qemu_mutex_unlock(&s->active); + + return value; +} + +static const MemoryRegionOps l2vic_ops = { + .read = l2vic_read, + .write = l2vic_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, + .valid.unaligned = false, +}; + +#define FASTL2VIC_ENABLE 0x0 +#define FASTL2VIC_DISABLE 0x1 +#define FASTL2VIC_INT 0x2 + +static void fastl2vic_write(void *opaque, hwaddr offset, uint64_t val, + unsigned size) +{ + if (offset == 0) { + uint32_t cmd = (val >> 16) & 0x3; + uint32_t irq = val & 0x3ff; + uint32_t slice = (irq / 32) * 4; + val = 1 << (irq % 32); + + if (cmd == FASTL2VIC_ENABLE) { + l2vic_write(opaque, L2VIC_INT_ENABLE_SETn + slice, val, size); + } else if (cmd == FASTL2VIC_DISABLE) { + l2vic_write(opaque, L2VIC_INT_ENABLE_CLEARn + slice, val, size); + } else if (cmd == FASTL2VIC_INT) { + l2vic_write(opaque, L2VIC_SOFT_INTn + slice, val, size); + } + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid write cmd %" PRId32 "\n", + __func__, cmd); + return; + } + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid write offset 0x%08" HWADDR_PRIx + "\n", __func__, offset); +} + +static const MemoryRegionOps fastl2vic_ops = { + .write = fastl2vic_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 4, + .valid.max_access_size = 4, + .valid.unaligned = false, +}; + +static void l2vic_reset_hold(Object *obj, G_GNUC_UNUSED ResetType res_type) +{ + L2VICState *s = L2VIC(obj); + memset(s->int_clear, 0, sizeof(s->int_clear)); + memset(s->int_enable, 0, sizeof(s->int_enable)); + memset(s->int_pending, 0, sizeof(s->int_pending)); + memset(s->int_status, 0, sizeof(s->int_status)); + memset(s->int_type, 0, sizeof(s->int_type)); + memset(s->int_group_n0, 0, sizeof(s->int_group_n0)); + memset(s->int_group_n1, 0, sizeof(s->int_group_n1)); + memset(s->int_group_n2, 0, sizeof(s->int_group_n2)); + memset(s->int_group_n3, 0, sizeof(s->int_group_n3)); + s->int_soft = 0; + s->vid0 = 0; + + l2vic_update_all(s); +} + + +static void reset_irq_handler(void *opaque, int irq, int level) +{ + L2VICState *s = (L2VICState *)opaque; + Object *obj = OBJECT(opaque); + if (level) { + l2vic_reset_hold(obj, RESET_TYPE_COLD); + } + l2vic_update_all(s); +} + +static void l2vic_init(Object *obj) +{ + DeviceState *dev = DEVICE(obj); + L2VICState *s = L2VIC(obj); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + int i; + + memory_region_init_io(&s->iomem, obj, &l2vic_ops, s, "l2vic", 0x1000); + sysbus_init_mmio(sbd, &s->iomem); + memory_region_init_io(&s->fast_iomem, obj, &fastl2vic_ops, s, "fast", + 0x10000); + sysbus_init_mmio(sbd, &s->fast_iomem); + + qdev_init_gpio_in(dev, l2vic_set_irq, L2VIC_INTERRUPT_MAX); + qdev_init_gpio_in_named(dev, reset_irq_handler, "reset", 1); + for (i = 0; i < 8; i++) { + sysbus_init_irq(sbd, &s->irq[i]); + } + qemu_mutex_init(&s->active); /* TODO: Remove this is an experiment */ +} + +static const VMStateDescription vmstate_l2vic = { + .name = "l2vic", + .version_id = 1, + .minimum_version_id = 1, + .fields = + (VMStateField[]){ + VMSTATE_UINT32(level, L2VICState), + VMSTATE_UINT32_ARRAY(vid_group, L2VICState, 4), + VMSTATE_UINT32(vid0, L2VICState), + VMSTATE_UINT32_ARRAY(int_enable, L2VICState, SLICE_MAX), + VMSTATE_UINT32(int_enable_clear, L2VICState), + VMSTATE_UINT32(int_enable_set, L2VICState), + VMSTATE_UINT32_ARRAY(int_type, L2VICState, SLICE_MAX), + VMSTATE_UINT32_ARRAY(int_status, L2VICState, SLICE_MAX), + VMSTATE_UINT32_ARRAY(int_clear, L2VICState, SLICE_MAX), + VMSTATE_UINT32(int_soft, L2VICState), + VMSTATE_UINT32_ARRAY(int_pending, L2VICState, SLICE_MAX), + VMSTATE_UINT32_ARRAY(int_group_n0, L2VICState, SLICE_MAX), + VMSTATE_UINT32_ARRAY(int_group_n1, L2VICState, SLICE_MAX), + VMSTATE_UINT32_ARRAY(int_group_n2, L2VICState, SLICE_MAX), + VMSTATE_UINT32_ARRAY(int_group_n3, L2VICState, SLICE_MAX), + VMSTATE_END_OF_LIST() } +}; + +static void l2vic_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); + + dc->vmsd = &vmstate_l2vic; + rc->phases.hold = l2vic_reset_hold; +} + +static const TypeInfo l2vic_info = { + .name = TYPE_L2VIC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(L2VICState), + .instance_init = l2vic_init, + .class_init = l2vic_class_init, +}; + +static void l2vic_register_types(void) +{ + type_register_static(&l2vic_info); +} + +type_init(l2vic_register_types) diff --git a/hw/intc/meson.build b/hw/intc/meson.build index 602da304b02d..35f4a7bad5ef 100644 --- a/hw/intc/meson.build +++ b/hw/intc/meson.build @@ -67,6 +67,8 @@ specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('xics_spapr.c', 'spapr_xi specific_ss.add(when: 'CONFIG_XIVE', if_true: files('xive.c')) specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_XIVE'], if_true: files('spapr_xive_kvm.c')) + +specific_ss.add(when: 'CONFIG_L2VIC', if_true: files('l2vic.c')) specific_ss.add(when: 'CONFIG_M68K_IRQC', if_true: files('m68k_irqc.c')) specific_ss.add(when: 'CONFIG_LOONGSON_IPI_COMMON', if_true: files('loongson_ipi_common.c')) specific_ss.add(when: 'CONFIG_LOONGSON_IPI', if_true: files('loongson_ipi.c')) diff --git a/hw/intc/trace-events b/hw/intc/trace-events index 3dcf14719833..bc66260fc0cb 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -303,6 +303,10 @@ sh_intc_register(const char *s, int id, unsigned short v, int c, int m) "%s %u - sh_intc_read(unsigned size, uint64_t offset, unsigned long val) "size %u 0x%" PRIx64 " -> 0x%lx" sh_intc_write(unsigned size, uint64_t offset, unsigned long val) "size %u 0x%" PRIx64 " <- 0x%lx" sh_intc_set(int id, int enable) "setting interrupt group %d to %d" +# l2vic.c +l2vic_reg_write(unsigned int addr, uint32_t value) "addr: 0x%03x value: 0x%08"PRIx32 +l2vic_reg_read(unsigned int addr, uint32_t value) "addr: 0x%03x value: 0x%08"PRIx32 +l2vic_delivered(int irq, int vid) "l2vic: delivered %d (vid %d)" # loongson_ipi.c loongson_ipi_read(unsigned size, uint64_t addr, uint64_t val) "size: %u addr: 0x%"PRIx64 "val: 0x%"PRIx64 diff --git a/hw/meson.build b/hw/meson.build index b91f761fe08a..6aaf469f95e4 100644 --- a/hw/meson.build +++ b/hw/meson.build @@ -66,3 +66,4 @@ subdir('sparc') subdir('sparc64') subdir('tricore') subdir('xtensa') +subdir('hexagon') diff --git a/hw/timer/meson.build b/hw/timer/meson.build index f5f9eed2d0a9..6c30bf602226 100644 --- a/hw/timer/meson.build +++ b/hw/timer/meson.build @@ -34,3 +34,5 @@ specific_ss.add(when: 'CONFIG_IBEX', if_true: files('ibex_timer.c')) system_ss.add(when: 'CONFIG_SIFIVE_PWM', if_true: files('sifive_pwm.c')) specific_ss.add(when: 'CONFIG_AVR_TIMER16', if_true: files('avr_timer16.c')) + +specific_ss.add(when: 'CONFIG_HEX_DSP', if_true: files('qct-qtimer.c')) diff --git a/hw/timer/qct-qtimer.c b/hw/timer/qct-qtimer.c new file mode 100644 index 000000000000..413f7249eef0 --- /dev/null +++ b/hw/timer/qct-qtimer.c @@ -0,0 +1,519 @@ +/* + * Qualcomm QCT QTimer + * + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + +#include "qemu/osdep.h" +#include "hw/irq.h" +#include "hw/qdev-properties.h" +#include "hw/timer/qct-qtimer.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "qemu/timer.h" + +/* Common timer implementation. */ + +#define QTIMER_MEM_SIZE_BYTES 0x1000 +#define QTIMER_MEM_REGION_SIZE_BYTES 0x1000 +#define QTIMER_DEFAULT_FREQ_HZ 19200000ULL +#define QTMR_TIMER_INDEX_MASK (0xf000) +#define HIGH_32(val) (0x0ffffffffULL & (val >> 32)) +#define LOW_32(val) (0x0ffffffffULL & val) + +/* + * QTimer version reg: + * + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Major | Minor | Step | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +static unsigned int TIMER_VERSION = 0x20020000; + +/* + * qct_qtimer_read/write: + * if offset < 0x1000 read restricted registers: + * QCT_QTIMER_AC_CNTFREQ/CNTSR/CNTTID/CNTACR/CNTOFF_(LO/HI)/QCT_QTIMER_VERSION + */ +static uint64_t qct_qtimer_read(void *opaque, hwaddr offset, unsigned size) +{ + QCTQtimerState *s = (QCTQtimerState *)opaque; + uint32_t frame = 0; + + switch (offset) { + case QCT_QTIMER_AC_CNTFRQ: + return s->freq; + case QCT_QTIMER_AC_CNTSR: + return s->secure; + case QCT_QTIMER_AC_CNTTID: + return s->cnttid; + case QCT_QTIMER_AC_CNTACR_START ... QCT_QTIMER_AC_CNTACR_END: + frame = (offset - 0x40) / 0x4; + if (frame >= s->nr_frames) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: QCT_QTIMER_AC_CNT: Bad offset %x\n", __func__, + (int)offset); + return 0x0; + } + return s->timer[frame].cnt_ctrl; + case QCT_QTIMER_VERSION: + return TIMER_VERSION; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: QCT_QTIMER_AC_CNT: Bad offset %x\n", + __func__, (int)offset); + return 0x0; + } + + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%x\n", __func__, + (int)offset); + return 0; +} + +static void qct_qtimer_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size) +{ + QCTQtimerState *s = (QCTQtimerState *)opaque; + uint32_t frame = 0; + + if (offset < 0x1000) { + switch (offset) { + case QCT_QTIMER_AC_CNTFRQ: + s->freq = value; + return; + case QCT_QTIMER_AC_CNTSR: + if (value > 0xFF) + qemu_log_mask(LOG_GUEST_ERROR, + "%s: QCT_QTIMER_AC_CNTSR: Bad value %x\n", + __func__, (int)value); + else + s->secure = value; + return; + case QCT_QTIMER_AC_CNTACR_START ... QCT_QTIMER_AC_CNTACR_END: + frame = (offset - QCT_QTIMER_AC_CNTACR_START) / 0x4; + if (frame >= s->nr_frames) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: QCT_QTIMER_AC_CNT: Bad offset %x\n", + __func__, (int)offset); + return; + } + s->timer[frame].cnt_ctrl = value; + return; + default: + qemu_log_mask(LOG_GUEST_ERROR, + "%s: QCT_QTIMER_AC_CNT: Bad offset %x\n", __func__, + (int)offset); + return; + } + } else + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %x\n", __func__, + (int)offset); +} + +static const MemoryRegionOps qct_qtimer_ops = { + .read = qct_qtimer_read, + .write = qct_qtimer_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const VMStateDescription vmstate_qct_qtimer = { + .name = "qct-qtimer", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]){ VMSTATE_END_OF_LIST() } +}; + +static void qct_qtimer_init(Object *obj) +{ + QCTQtimerState *s = QCT_QTIMER(obj); + + object_property_add_uint32_ptr(obj, "secure", &s->secure, + OBJ_PROP_FLAG_READ); + object_property_add_uint32_ptr(obj, "frame_id", &s->frame_id, + OBJ_PROP_FLAG_READ); +} + +static void hex_timer_update(QCTHextimerState *s) +{ + /* Update interrupts. */ + int level = s->int_level && (s->control & QCT_QTIMER_CNTP_CTL_ENABLE); + qemu_set_irq(s->irq, level); +} + +static MemTxResult hex_timer_read(void *opaque, hwaddr offset, uint64_t *data, + unsigned size, MemTxAttrs attrs) +{ + QCTQtimerState *qct_s = (QCTQtimerState *)opaque; + uint32_t slot_nr = (offset & 0xF000) >> 12; + uint32_t reg_offset = offset & 0xFFF; + uint32_t view = slot_nr % qct_s->nr_views; + uint32_t frame = slot_nr / qct_s->nr_views; + + if (frame >= qct_s->nr_frames) { + *data = 0; + return MEMTX_ACCESS_ERROR; + } + QCTHextimerState *s = &qct_s->timer[frame]; + + + /* + * This is the case where we have 2 views, but the second one is not + * implemented. + */ + if (view && !(qct_s->cnttid & (0x4 << (frame * 4)))) { + *data = 0; + return MEMTX_OK; + } + + switch (reg_offset) { + case (QCT_QTIMER_CNT_FREQ): /* Ticks/Second */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RFRQ)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !((s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0PCTEN) || + (s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0VCTEN))) { + return MEMTX_ACCESS_ERROR; + } + + *data = s->freq; + return MEMTX_OK; + case (QCT_QTIMER_CNTP_CVAL_LO): /* TimerLoad */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + *data = LOW_32((s->cntval)); + return MEMTX_OK; + case (QCT_QTIMER_CNTP_CVAL_HI): /* TimerLoad */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + *data = HIGH_32((s->cntval)); + return MEMTX_OK; + case QCT_QTIMER_CNTPCT_LO: + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RPCT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0PCTEN)) { + return MEMTX_ACCESS_ERROR; + } + + *data = LOW_32((s->cntpct + (ptimer_get_count(s->timer)))); + return MEMTX_OK; + case QCT_QTIMER_CNTPCT_HI: + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RPCT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0PCTEN)) { + return MEMTX_ACCESS_ERROR; + } + + *data = HIGH_32((s->cntpct + (ptimer_get_count(s->timer)))); + return MEMTX_OK; + case (QCT_QTIMER_CNTP_TVAL): /* CVAL - CNTP */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + *data = + (s->cntval - (HIGH_32((s->cntpct + (ptimer_get_count(s->timer)))) + + LOW_32((s->cntpct + (ptimer_get_count(s->timer)))))); + return MEMTX_OK; + case (QCT_QTIMER_CNTP_CTL): /* TimerMIS */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + *data = s->int_level; + return MEMTX_OK; + case QCT_QTIMER_CNTPL0ACR: + if (view) { + *data = 0; + } else { + *data = s->cntpl0acr; + } + return MEMTX_OK; + + case QCT_QTIMER_VERSION: + *data = TIMER_VERSION; + return MEMTX_OK; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %x\n", __func__, + (int)offset); + *data = 0; + return MEMTX_ACCESS_ERROR; + } +} + +/* + * Reset the timer limit after settings have changed. + * May only be called from inside a ptimer transaction block. + */ +static void hex_timer_recalibrate(QCTHextimerState *s, int reload) +{ + uint64_t limit; + /* Periodic. */ + limit = s->limit; + ptimer_set_limit(s->timer, limit, reload); +} + +static MemTxResult hex_timer_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size, MemTxAttrs attrs) +{ + QCTQtimerState *qct_s = (QCTQtimerState *)opaque; + uint32_t slot_nr = (offset & 0xF000) >> 12; + uint32_t reg_offset = offset & 0xFFF; + uint32_t view = slot_nr % qct_s->nr_views; + uint32_t frame = slot_nr / qct_s->nr_views; + + if (frame >= qct_s->nr_frames) { + return MEMTX_ACCESS_ERROR; + } + QCTHextimerState *s = &qct_s->timer[frame]; + + /* + * This is the case where we have 2 views, but the second one is not + * implemented. + */ + if (view && !(qct_s->cnttid & (0x4 << (frame * 4)))) { + return MEMTX_OK; + } + + switch (reg_offset) { + case (QCT_QTIMER_CNTP_CVAL_LO): /* TimerLoad */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + + s->int_level = 0; + s->cntval = value; + ptimer_transaction_begin(s->timer); + if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) { + /* + * Pause the timer if it is running. This may cause some + * inaccuracy due to rounding, but avoids other issues. + */ + ptimer_stop(s->timer); + } + hex_timer_recalibrate(s, 1); + if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) { + ptimer_run(s->timer, 0); + } + ptimer_transaction_commit(s->timer); + break; + case (QCT_QTIMER_CNTP_CVAL_HI): + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + break; + case (QCT_QTIMER_CNTP_CTL): /* Timer control register */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + ptimer_transaction_begin(s->timer); + if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) { + /* + * Pause the timer if it is running. This may cause some + * inaccuracy due to rounding, but avoids other issues. + */ + ptimer_stop(s->timer); + } + s->control = value; + hex_timer_recalibrate(s, s->control & QCT_QTIMER_CNTP_CTL_ENABLE); + ptimer_set_freq(s->timer, s->freq); + ptimer_set_period(s->timer, 1); + if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) { + ptimer_run(s->timer, 0); + } + ptimer_transaction_commit(s->timer); + break; + case (QCT_QTIMER_CNTP_TVAL): /* CVAL - CNTP */ + if (!(s->cnt_ctrl & QCT_QTIMER_AC_CNTACR_RWPT)) { + return MEMTX_ACCESS_ERROR; + } + + if (view && !(s->cntpl0acr & QCT_QTIMER_CNTPL0ACR_PL0CTEN)) { + return MEMTX_ACCESS_ERROR; + } + + ptimer_transaction_begin(s->timer); + if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) { + /* + * Pause the timer if it is running. This may cause some + * inaccuracy due to rounding, but avoids other issues. + */ + ptimer_stop(s->timer); + } + s->cntval = s->cntpct + value; + ptimer_set_freq(s->timer, s->freq); + ptimer_set_period(s->timer, 1); + if (s->control & QCT_QTIMER_CNTP_CTL_ENABLE) { + ptimer_run(s->timer, 0); + } + ptimer_transaction_commit(s->timer); + break; + case QCT_QTIMER_CNTPL0ACR: + if (view) { + break; + } + + s->cntpl0acr = value; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset %x\n", __func__, + (int)offset); + return MEMTX_ACCESS_ERROR; + } + hex_timer_update(s); + return MEMTX_OK; +} + +static void hex_timer_tick(void *opaque) +{ + QCTHextimerState *s = (QCTHextimerState *)opaque; + if ((s->cntpct >= s->cntval) && (s->int_level != 1)) { + s->int_level = 1; + hex_timer_update(s); + return; + } + s->cntpct += s->limit; +} + +static const MemoryRegionOps hex_timer_ops = { + .read_with_attrs = hex_timer_read, + .write_with_attrs = hex_timer_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static const VMStateDescription vmstate_hex_timer = { + .name = "hex_timer", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]){ VMSTATE_UINT32(control, QCTHextimerState), + VMSTATE_UINT32(cnt_ctrl, QCTHextimerState), + VMSTATE_UINT64(cntpct, QCTHextimerState), + VMSTATE_UINT64(cntval, QCTHextimerState), + VMSTATE_UINT64(limit, QCTHextimerState), + VMSTATE_UINT32(int_level, QCTHextimerState), + VMSTATE_PTIMER(timer, QCTHextimerState), + VMSTATE_END_OF_LIST() } +}; + +static void qct_qtimer_realize(DeviceState *dev, Error **errp) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + QCTQtimerState *s = QCT_QTIMER(dev); + unsigned int i; + + if (s->nr_frames > QCT_QTIMER_TIMER_FRAME_ELTS) { + error_setg(errp, "nr_frames too high"); + return; + } + + if (s->nr_views > QCT_QTIMER_TIMER_VIEW_ELTS) { + error_setg(errp, "nr_views too high"); + return; + } + + memory_region_init_io(&s->iomem, OBJECT(sbd), &qct_qtimer_ops, s, "qutimer", + QTIMER_MEM_SIZE_BYTES); + sysbus_init_mmio(sbd, &s->iomem); + + memory_region_init_io(&s->view_iomem, OBJECT(sbd), &hex_timer_ops, s, + "qutimer_views", + QTIMER_MEM_SIZE_BYTES * s->nr_frames * s->nr_views); + sysbus_init_mmio(sbd, &s->view_iomem); + + for (i = 0; i < s->nr_frames; i++) { + s->timer[i].limit = 1; + s->timer[i].control = QCT_QTIMER_CNTP_CTL_ENABLE; + s->timer[i].cnt_ctrl = + (QCT_QTIMER_AC_CNTACR_RWPT | QCT_QTIMER_AC_CNTACR_RWVT | + QCT_QTIMER_AC_CNTACR_RVOFF | QCT_QTIMER_AC_CNTACR_RFRQ | + QCT_QTIMER_AC_CNTACR_RPVCT | QCT_QTIMER_AC_CNTACR_RPCT); + s->timer[i].qtimer = s; + s->timer[i].freq = QTIMER_DEFAULT_FREQ_HZ; + + s->secure |= (1 << i); + + sysbus_init_irq(sbd, &(s->timer[i].irq)); + + (s->timer[i]).timer = + ptimer_init(hex_timer_tick, &s->timer[i], PTIMER_POLICY_LEGACY); + vmstate_register(NULL, VMSTATE_INSTANCE_ID_ANY, &vmstate_hex_timer, + &s->timer[i]); + } +} + +static const Property qct_qtimer_properties[] = { + DEFINE_PROP_UINT32("freq", QCTQtimerState, freq, QTIMER_DEFAULT_FREQ_HZ), + DEFINE_PROP_UINT32("nr_frames", QCTQtimerState, nr_frames, 2), + DEFINE_PROP_UINT32("nr_views", QCTQtimerState, nr_views, 1), + DEFINE_PROP_UINT32("cnttid", QCTQtimerState, cnttid, 0x11), +}; + +static void qct_qtimer_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *k = DEVICE_CLASS(klass); + + device_class_set_props(k, qct_qtimer_properties); + k->realize = qct_qtimer_realize; + k->vmsd = &vmstate_qct_qtimer; +} + +static const TypeInfo qct_qtimer_info = { + .name = TYPE_QCT_QTIMER, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(QCTQtimerState), + .instance_init = qct_qtimer_init, + .class_init = qct_qtimer_class_init, +}; + +static void qct_qtimer_register_types(void) +{ + type_register_static(&qct_qtimer_info); +} + +type_init(qct_qtimer_register_types) diff --git a/include/hw/hexagon/hexagon.h b/include/hw/hexagon/hexagon.h new file mode 100644 index 000000000000..ce356325fcd7 --- /dev/null +++ b/include/hw/hexagon/hexagon.h @@ -0,0 +1,150 @@ +/* + * Hexagon Baseboard System emulation. + * + * Copyright (c) 2020-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + +#ifndef HW_HEXAGON_H +#define HW_HEXAGON_H + +#include "exec/memory.h" + +struct hexagon_board_boot_info { + uint64_t ram_size; + const char *kernel_filename; + uint32_t kernel_elf_flags; +}; + +typedef enum { + unknown_rev = 0, + v66_rev = 0xa666, + v67_rev = 0x2667, + v68_rev = 0x8d68, + v69_rev = 0x8c69, + v71_rev = 0x8c71, + v73_rev = 0x8c73, + v73m_rev = 0xcc73, +} Rev_t; +#define HEXAGON_LATEST_REV v73 +#define HEXAGON_LATEST_REV_UPPER V73 + +/* + * Config table address bases represent bits [35:16]. + */ +#define HEXAGON_CFG_ADDR_BASE(addr) (((addr) >> 16) & 0x0fffff) + +#define HEXAGON_CFGSPACE_ENTRIES (128) + +typedef union { + struct { + /* Base address of L2TCM space */ + uint32_t l2tcm_base; + uint32_t reserved0; + /* Base address of subsystem space */ + uint32_t subsystem_base; + /* Base address of ETM space */ + uint32_t etm_base; + /* Base address of L2 configuration space */ + uint32_t l2cfg_base; + uint32_t reserved1; + /* Base address of L1S */ + uint32_t l1s0_base; + /* Base address of AXI2 */ + uint32_t axi2_lowaddr; + /* Base address of streamer base */ + uint32_t streamer_base; + uint32_t reserved2; + /* Base address of fast L2VIC */ + uint32_t fastl2vic_base; + /* Number of entries in JTLB */ + uint32_t jtlb_size_entries; + /* Coprocessor type */ + uint32_t coproc_present; + /* Number of extension execution contexts available */ + uint32_t ext_contexts; + /* Base address of Hexagon Vector Tightly Coupled Memory (VTCM) */ + uint32_t vtcm_base; + /* Size of VTCM (in KB) */ + uint32_t vtcm_size_kb; + /* L2 tag size */ + uint32_t l2tag_size; + /* Amount of physical L2 memory in released version */ + uint32_t l2ecomem_size; + /* Hardware threads available on the core */ + uint32_t thread_enable_mask; + /* Base address of the ECC registers */ + uint32_t eccreg_base; + /* L2 line size */ + uint32_t l2line_size; + /* Small Core processor (also implies audio extension) */ + uint32_t tiny_core; + /* Size of L2TCM */ + uint32_t l2itcm_size; + /* Base address of L2-ITCM */ + uint32_t l2itcm_base; + uint32_t reserved3; + /* DTM is present */ + uint32_t dtm_present; + /* Version of the DMA */ + uint32_t dma_version; + /* Native HVX vector length in log of bytes */ + uint32_t hvx_vec_log_length; + /* Core ID of the multi-core */ + uint32_t core_id; + /* Number of multi-core cores */ + uint32_t core_count; + uint32_t coproc2_reg0; + uint32_t coproc2_reg1; + /* Supported HVX vector length */ + uint32_t v2x_mode; + uint32_t coproc2_reg2; + uint32_t coproc2_reg3; + uint32_t coproc2_reg4; + uint32_t coproc2_reg5; + uint32_t coproc2_reg6; + uint32_t coproc2_reg7; + /* Voltage droop mitigation technique parameter */ + uint32_t acd_preset; + /* Voltage droop mitigation technique parameter */ + uint32_t mnd_preset; + /* L1 data cache size (in KB) */ + uint32_t l1d_size_kb; + /* L1 instruction cache size in (KB) */ + uint32_t l1i_size_kb; + /* L1 data cache write policy: see HexagonL1WritePolicy */ + uint32_t l1d_write_policy; + /* VTCM bank width */ + uint32_t vtcm_bank_width; + uint32_t reserved4; + uint32_t reserved5; + uint32_t reserved6; + uint32_t coproc2_cvt_mpy_size; + uint32_t consistency_domain; + uint32_t capacity_domain; + uint32_t axi3_lowaddr; + uint32_t coproc2_int8_subcolumns; + uint32_t corecfg_present; + uint32_t coproc2_fp16_acc_exp; + uint32_t AXIM2_secondary_base; + }; + uint32_t raw[HEXAGON_CFGSPACE_ENTRIES]; +} hexagon_config_table; + +typedef struct { + /* Base address of config table */ + uint32_t cfgbase; + /* Size of L2 TCM */ + uint32_t l2tcm_size; + /* Base address of L2VIC */ + uint32_t l2vic_base; + /* Size of L2VIC region */ + uint32_t l2vic_size; + /* QTimer csr base */ + uint32_t csr_base; + uint32_t qtmr_region; + hexagon_config_table cfgtable; +} hexagon_machine_config; + +#endif diff --git a/include/hw/hexagon/virt.h b/include/hw/hexagon/virt.h new file mode 100644 index 000000000000..0c165a786d30 --- /dev/null +++ b/include/hw/hexagon/virt.h @@ -0,0 +1,41 @@ +/* + * Definitions for hexagon virt board. + * + * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_HEXAGONVIRT_H +#define HW_HEXAGONVIRT_H + +#include "hw/boards.h" +#include "target/hexagon/cpu.h" + +struct HexagonVirtMachineState { + /*< private >*/ + MachineState parent_obj; + + int fdt_size; + MemoryRegion *sys; + MemoryRegion cfgtable; + MemoryRegion ram; + MemoryRegion tcm; + MemoryRegion vtcm; + DeviceState *l2vic; +}; + +void hexagon_load_fdt(const struct HexagonVirtMachineState *vms); + +enum { + VIRT_UART0, + VIRT_QTMR0, + VIRT_QTMR1, + VIRT_GPT, + VIRT_MMIO, + VIRT_FDT, +}; + +#define TYPE_HEXAGON_VIRT_MACHINE MACHINE_TYPE_NAME("virt") +OBJECT_DECLARE_SIMPLE_TYPE(HexagonVirtMachineState, HEXAGON_VIRT_MACHINE) + +#endif /* HW_HEXAGONVIRT_H */ diff --git a/include/hw/intc/l2vic.h b/include/hw/intc/l2vic.h new file mode 100644 index 000000000000..ed8ccf33b1f8 --- /dev/null +++ b/include/hw/intc/l2vic.h @@ -0,0 +1,37 @@ +/* + * QEMU L2VIC Interrupt Controller + * + * Copyright(c) 2020-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#define L2VIC_VID_GRP_0 0x0 /* Read */ +#define L2VIC_VID_GRP_1 0x4 /* Read */ +#define L2VIC_VID_GRP_2 0x8 /* Read */ +#define L2VIC_VID_GRP_3 0xC /* Read */ +#define L2VIC_VID_0 0x10 /* Read SOFTWARE DEFINED */ +#define L2VIC_VID_1 0x14 /* Read SOFTWARE DEFINED NOT YET USED */ +#define L2VIC_INT_ENABLEn 0x100 /* Read/Write */ +#define L2VIC_INT_ENABLE_CLEARn 0x180 /* Write */ +#define L2VIC_INT_ENABLE_SETn 0x200 /* Write */ +#define L2VIC_INT_TYPEn 0x280 /* Read/Write */ +#define L2VIC_INT_STATUSn 0x380 /* Read */ +#define L2VIC_INT_CLEARn 0x400 /* Write */ +#define L2VIC_SOFT_INTn 0x480 /* Write */ +#define L2VIC_INT_PENDINGn 0x500 /* Read */ +#define L2VIC_INT_GRPn_0 0x600 /* Read/Write */ +#define L2VIC_INT_GRPn_1 0x680 /* Read/Write */ +#define L2VIC_INT_GRPn_2 0x700 /* Read/Write */ +#define L2VIC_INT_GRPn_3 0x780 /* Read/Write */ + +#define L2VIC_INTERRUPT_MAX 1024 +#define L2VIC_CIAD_INSTRUCTION -1 +/* + * Note about l2vic groups: + * Each interrupt to L2VIC can be configured to associate with one of + * four groups. + * Group 0 interrupts go to IRQ2 via VID 0 (SSR: 0xC2, the default) + * Group 1 interrupts go to IRQ3 via VID 1 (SSR: 0xC3) + * Group 2 interrupts go to IRQ4 via VID 2 (SSR: 0xC4) + * Group 3 interrupts go to IRQ5 via VID 3 (SSR: 0xC5) + */ diff --git a/include/hw/timer/qct-qtimer.h b/include/hw/timer/qct-qtimer.h new file mode 100644 index 000000000000..90f7981ccf8d --- /dev/null +++ b/include/hw/timer/qct-qtimer.h @@ -0,0 +1,85 @@ +/* + * Qualcomm QCT QTimer + * + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef TIMER_QCT_QTIMER_H +#define TIMER_QCT_QTIMER_H + +#include "hw/ptimer.h" +#include "hw/sysbus.h" + +#define TYPE_QCT_QTIMER "qct-qtimer" +#define TYPE_QCT_HEXTIMER "qct-hextimer" +OBJECT_DECLARE_SIMPLE_TYPE(QCTQtimerState, QCT_QTIMER) +OBJECT_DECLARE_SIMPLE_TYPE(QCTHextimerState, QCT_HEXTIMER) + +struct QCTHextimerState { + QCTQtimerState *qtimer; + ptimer_state *timer; + uint64_t cntval; /* + * Physical timer compare value interrupt when cntpct > + * cntval + */ + uint64_t cntpct; /* Physical counter */ + uint32_t control; + uint32_t cnt_ctrl; + uint32_t cntpl0acr; + uint64_t limit; + uint32_t freq; + uint32_t int_level; + qemu_irq irq; +}; + +#define QCT_QTIMER_TIMER_FRAME_ELTS (8) +#define QCT_QTIMER_TIMER_VIEW_ELTS (2) +struct QCTQtimerState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + MemoryRegion view_iomem; + uint32_t secure; + struct QCTHextimerState timer[QCT_QTIMER_TIMER_FRAME_ELTS]; + uint32_t frame_id; + uint32_t freq; + uint32_t nr_frames; + uint32_t nr_views; + uint32_t cnttid; +}; + +#define QCT_QTIMER_AC_CNTFRQ (0x000) +#define QCT_QTIMER_AC_CNTSR (0x004) +#define QCT_QTIMER_AC_CNTSR_NSN_1 (1 << 0) +#define QCT_QTIMER_AC_CNTSR_NSN_2 (1 << 1) +#define QCT_QTIMER_AC_CNTSR_NSN_3 (1 << 2) +#define QCT_QTIMER_AC_CNTTID (0x08) +#define QCT_QTIMER_AC_CNTACR_0 (0x40) +#define QCT_QTIMER_AC_CNTACR_1 (0x44) +#define QCT_QTIMER_AC_CNTACR_2 (0x48) +#define QCT_QTIMER_AC_CNTACR_RWPT (1 << 5) /* R/W of CNTP_* regs */ +#define QCT_QTIMER_AC_CNTACR_RWVT (1 << 4) /* R/W of CNTV_* regs */ +#define QCT_QTIMER_AC_CNTACR_RVOFF (1 << 3) /* R/W of CNTVOFF register */ +#define QCT_QTIMER_AC_CNTACR_RFRQ (1 << 2) /* R/W of CNTFRQ register */ +#define QCT_QTIMER_AC_CNTACR_RPVCT (1 << 1) /* R/W of CNTVCT register */ +#define QCT_QTIMER_AC_CNTACR_RPCT (1 << 0) /* R/W of CNTPCT register */ +#define QCT_QTIMER_VERSION (0x0fd0) +#define QCT_QTIMER_CNTPCT_LO (0x000) +#define QCT_QTIMER_CNTPCT_HI (0x004) +#define QCT_QTIMER_CNT_FREQ (0x010) +#define QCT_QTIMER_CNTPL0ACR (0x014) +#define QCT_QTIMER_CNTPL0ACR_PL0CTEN (1 << 9) +#define QCT_QTIMER_CNTPL0ACR_PL0TVEN (1 << 8) +#define QCT_QTIMER_CNTPL0ACR_PL0VCTEN (1 << 1) +#define QCT_QTIMER_CNTPL0ACR_PL0PCTEN (1 << 0) +#define QCT_QTIMER_CNTP_CVAL_LO (0x020) +#define QCT_QTIMER_CNTP_CVAL_HI (0x024) +#define QCT_QTIMER_CNTP_TVAL (0x028) +#define QCT_QTIMER_CNTP_CTL (0x02c) +#define QCT_QTIMER_CNTP_CTL_ISTAT (1 << 2) +#define QCT_QTIMER_CNTP_CTL_INTEN (1 << 1) +#define QCT_QTIMER_CNTP_CTL_ENABLE (1 << 0) +#define QCT_QTIMER_AC_CNTACR_START 0x40 +#define QCT_QTIMER_AC_CNTACR_END 0x5C + +#endif /* TIMER_QCT_QTIMER_H */ diff --git a/include/semihosting/common-semi.h b/include/semihosting/common-semi.h index 0a91db7c4149..58dfb99d7a5b 100644 --- a/include/semihosting/common-semi.h +++ b/include/semihosting/common-semi.h @@ -34,6 +34,7 @@ #ifndef COMMON_SEMI_H #define COMMON_SEMI_H +void common_semi_cb(CPUState *cs, uint64_t ret, int err); void do_common_semihosting(CPUState *cs); #endif /* COMMON_SEMI_H */ diff --git a/include/semihosting/semihost.h b/include/semihosting/semihost.h index 97d2a2ba996d..6e0776610651 100644 --- a/include/semihosting/semihost.h +++ b/include/semihosting/semihost.h @@ -51,6 +51,11 @@ static inline const char *semihosting_get_cmdline(void) { return NULL; } + +static inline const char *semihosting_get_usefs(void) +{ + return NULL; +} #else /* !CONFIG_USER_ONLY */ /** * semihosting_enabled: @@ -63,6 +68,7 @@ SemihostingTarget semihosting_get_target(void); const char *semihosting_get_arg(int i); int semihosting_get_argc(void); const char *semihosting_get_cmdline(void); +const char *semihosting_get_usefs(void); void semihosting_arg_fallback(const char *file, const char *cmd); /* for vl.c hooks */ void qemu_semihosting_enable(void); diff --git a/include/semihosting/syscalls.h b/include/semihosting/syscalls.h index 6627c45fb281..dec2ee0ad4ac 100644 --- a/include/semihosting/syscalls.h +++ b/include/semihosting/syscalls.h @@ -75,4 +75,6 @@ void semihost_sys_gettimeofday(CPUState *cs, gdb_syscall_complete_cb complete, void semihost_sys_poll_one(CPUState *cs, gdb_syscall_complete_cb complete, int fd, GIOCondition cond, int timeout); +void semihost_sys_ftruncate(CPUState *cs, gdb_syscall_complete_cb complete, + int fd, off_t len); #endif /* SEMIHOSTING_SYSCALLS_H */ diff --git a/qapi/machine.json b/qapi/machine.json index a6b8795b09ed..a7070bad4d52 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -33,7 +33,7 @@ # Since: 3.0 ## { 'enum' : 'SysEmuTarget', - 'data' : [ 'aarch64', 'alpha', 'arm', 'avr', 'hppa', 'i386', + 'data' : [ 'aarch64', 'alpha', 'arm', 'avr', 'hexagon', 'hppa', 'i386', 'loongarch64', 'm68k', 'microblaze', 'microblazeel', 'mips', 'mips64', 'mips64el', 'mipsel', 'or1k', 'ppc', 'ppc64', 'riscv32', 'riscv64', 'rx', 's390x', 'sh4', diff --git a/qemu-options.hx b/qemu-options.hx index dc694a99a30a..888b3092bef7 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -5110,7 +5110,7 @@ ERST DEF("semihosting", 0, QEMU_OPTION_semihosting, "-semihosting semihosting mode\n", QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | - QEMU_ARCH_MIPS | QEMU_ARCH_RISCV) + QEMU_ARCH_MIPS | QEMU_ARCH_RISCV | QEMU_ARCH_HEXAGON) SRST ``-semihosting`` Enable :ref:`Semihosting` mode (ARM, M68K, Xtensa, MIPS, RISC-V only). @@ -5126,11 +5126,11 @@ DEF("semihosting-config", HAS_ARG, QEMU_OPTION_semihosting_config, "-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,userspace=on|off][,arg=str[,...]]\n" \ " semihosting configuration\n", QEMU_ARCH_ARM | QEMU_ARCH_M68K | QEMU_ARCH_XTENSA | -QEMU_ARCH_MIPS | QEMU_ARCH_RISCV) +QEMU_ARCH_MIPS | QEMU_ARCH_RISCV | QEMU_ARCH_HEXAGON) SRST -``-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,userspace=on|off][,arg=str[,...]]`` - Enable and configure :ref:`Semihosting` (ARM, M68K, Xtensa, MIPS, RISC-V - only). +``-semihosting-config [enable=on|off][,target=native|gdb|auto][,chardev=id][,userspace=on|off][,usefs=][,arg=str[,...]]`` + Enable and configure :ref:`Semihosting` (ARM, M68K, Xtensa, MIPS, RISC-V, + Hexagon only). .. warning:: Note that this allows guest direct access to the host filesystem, so @@ -5152,6 +5152,11 @@ SRST only be used if all guest code is trusted (for example, in bare-metal test case code). + ``usefs=`` + Sets a fallback directory to be used by the open semihosting call. If + the requested file is not found QEMU will search again at the given + path. + ``arg=str1,arg=str2,...`` Allows the user to pass input arguments, and can be used multiple times to build up a list. The old-style diff --git a/semihosting/arm-compat-semi.c b/semihosting/arm-compat-semi.c index 86e5260e504b..e4825a866718 100644 --- a/semihosting/arm-compat-semi.c +++ b/semihosting/arm-compat-semi.c @@ -85,7 +85,30 @@ #define O_BINARY 0 #endif -static int gdb_open_modeflags[12] = { +struct semihosting_opt_callbacks { + void (*set_err)(CPUState *cs, target_ulong err); + void (*prepare_for_read)(CPUState *cs, target_ulong fd, target_ulong buf, + target_ulong len); +} opt_callbacks; + +#define SEMIHOSTING_REGISTER_OPT_CALLBACKS(callbacks) \ + struct semihosting_opt_callbacks opt_callbacks = callbacks; + +#define CALL_OPT_CALLBACK(FN, ARGS...) do { \ + if (opt_callbacks.FN) { \ + opt_callbacks.FN(ARGS); \ + } \ +} while (0) + +#include "common-semi-target.h" + +#ifdef SEMIHOSTING_EXT_OPEN_MODES +#define GDB_OPEN_MODES_NR 14 +#else +#define GDB_OPEN_MODES_NR 12 +#endif + +static int gdb_open_modeflags[GDB_OPEN_MODES_NR] = { GDB_O_RDONLY, GDB_O_RDONLY, GDB_O_RDWR, @@ -98,6 +121,10 @@ static int gdb_open_modeflags[12] = { GDB_O_WRONLY | GDB_O_CREAT | GDB_O_APPEND, GDB_O_RDWR | GDB_O_CREAT | GDB_O_APPEND, GDB_O_RDWR | GDB_O_CREAT | GDB_O_APPEND, +#ifdef SEMIHOSTING_EXT_OPEN_MODES + GDB_O_RDWR | GDB_O_CREAT, + GDB_O_RDWR | GDB_O_CREAT | GDB_O_EXCL, +#endif }; #ifndef CONFIG_USER_ONLY @@ -180,17 +207,10 @@ static LayoutInfo common_semi_find_bases(CPUState *cs) * error indication (0 on success, non-0 for error) which the caller * should check. */ - -#define GET_ARG(n) do { \ - if (is_64bit_semihosting(env)) { \ - if (get_user_u64(arg ## n, args + (n) * 8)) { \ - goto do_fault; \ - } \ - } else { \ - if (get_user_u32(arg ## n, args + (n) * 4)) { \ - goto do_fault; \ - } \ - } \ +#define GET_ARG(n) do { \ + if (common_semi_read_arg_word(env, &arg ## n, args, n)) { \ + goto do_fault; \ + } \ } while (0) #define SET_ARG(n, val) \ @@ -223,7 +243,7 @@ static inline uint32_t get_swi_errno(CPUState *cs) #endif } -static void common_semi_cb(CPUState *cs, uint64_t ret, int err) +void common_semi_cb(CPUState *cs, uint64_t ret, int err) { if (err) { #ifdef CONFIG_USER_ONLY @@ -231,6 +251,7 @@ static void common_semi_cb(CPUState *cs, uint64_t ret, int err) ts->swi_errno = err; #else syscall_err = err; + CALL_OPT_CALLBACK(set_err, cs, err); #endif } common_semi_set_ret(cs, ret); @@ -386,7 +407,7 @@ void do_common_semihosting(CPUState *cs) if (!s) { goto do_fault; } - if (arg1 >= 12) { + if (arg1 >= GDB_OPEN_MODES_NR) { unlock_user(s, arg0, 0); common_semi_cb(cs, -1, EINVAL); break; @@ -466,6 +487,7 @@ void do_common_semihosting(CPUState *cs) GET_ARG(0); GET_ARG(1); GET_ARG(2); + CALL_OPT_CALLBACK(prepare_for_read, cs, arg0, arg1, arg2); semihost_sys_read(cs, common_semi_rw_cb, arg0, arg1, arg2); break; diff --git a/semihosting/config.c b/semihosting/config.c index 56283b5c3c38..a64a8dfd27da 100644 --- a/semihosting/config.c +++ b/semihosting/config.c @@ -46,6 +46,9 @@ QemuOptsList qemu_semihosting_config_opts = { }, { .name = "arg", .type = QEMU_OPT_STRING, + }, { + .name = "usefs", + .type = QEMU_OPT_STRING, }, { /* end of list */ } }, @@ -58,6 +61,7 @@ typedef struct SemihostingConfig { char **argv; int argc; const char *cmdline; /* concatenated argv */ + const char *usefs; } SemihostingConfig; static SemihostingConfig semihosting; @@ -94,6 +98,11 @@ const char *semihosting_get_cmdline(void) return semihosting.cmdline; } +const char *semihosting_get_usefs(void) +{ + return semihosting.usefs; +} + static int add_semihosting_arg(void *opaque, const char *name, const char *val, Error **errp) @@ -144,6 +153,8 @@ int qemu_semihosting_config_options(const char *optstr) true); semihosting.userspace_enabled = qemu_opt_get_bool(opts, "userspace", false); + semihosting.usefs = qemu_opt_get(opts, "usefs"); + const char *target = qemu_opt_get(opts, "target"); /* setup of chardev is deferred until they are initialised */ semihost_chardev = qemu_opt_get(opts, "chardev"); diff --git a/semihosting/guestfd.c b/semihosting/guestfd.c index d3241434c516..4d846f4e5d10 100644 --- a/semihosting/guestfd.c +++ b/semihosting/guestfd.c @@ -23,6 +23,18 @@ GuestFD console_in_gf; GuestFD console_out_gf; #endif +static void semihosting_use_stdio(void) +{ + console_in_gf.type = GuestFDHost; + console_in_gf.hostfd = 0; + console_out_gf.type = GuestFDHost; + console_out_gf.hostfd = 1; + guestfd_array = g_array_set_size(guestfd_array, 3); + associate_guestfd(0, 0); + associate_guestfd(1, 1); + associate_guestfd(2, 2); +} + void qemu_semihosting_guestfd_init(void) { /* New entries zero-initialized, i.e. type GuestFDUnused */ @@ -36,8 +48,12 @@ void qemu_semihosting_guestfd_init(void) console_out_gf.type = GuestFDGDB; console_out_gf.hostfd = 2; } else { +#ifdef CONFIG_SEMIHOSTING_USE_STDIO + semihosting_use_stdio(); +#else console_in_gf.type = GuestFDConsole; console_out_gf.type = GuestFDConsole; +#endif } #else /* Otherwise, the stdio file descriptors apply. */ diff --git a/semihosting/syscalls.c b/semihosting/syscalls.c index f6451d9bb0e6..e790c79efe85 100644 --- a/semihosting/syscalls.c +++ b/semihosting/syscalls.c @@ -13,6 +13,7 @@ #include "semihosting/guestfd.h" #include "semihosting/syscalls.h" #include "semihosting/console.h" +#include "semihosting/semihost.h" #ifdef CONFIG_USER_ONLY #include "qemu.h" #else @@ -261,7 +262,8 @@ static void host_open(CPUState *cs, gdb_syscall_complete_cb complete, { CPUArchState *env G_GNUC_UNUSED = cpu_env(cs); char *p; - int ret, host_flags = O_BINARY; + int ret, err, host_flags = O_BINARY; + const char *usefs = semihosting_get_usefs(); ret = validate_lock_user_string(&p, cs, fname, fname_len); if (ret < 0) { @@ -287,9 +289,17 @@ static void host_open(CPUState *cs, gdb_syscall_complete_cb complete, } ret = open(p, host_flags, mode); + err = errno; + if (ret < 0 && err == ENOENT && usefs) { + g_autoptr(GString) usefs_fname = g_string_new(NULL); + g_string_append_printf(usefs_fname, "%s/%s", usefs, p); + ret = open(usefs_fname->str, host_flags, mode); + err = errno; + } + if (ret < 0) { qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to open %s\n", __func__, p); - complete(cs, -1, errno); + complete(cs, -1, err); } else { int guestfd = alloc_guestfd(); associate_guestfd(guestfd, ret); @@ -542,6 +552,13 @@ static void host_poll_one(CPUState *cs, gdb_syscall_complete_cb complete, } #endif +static void host_ftruncate(CPUState *cs, gdb_syscall_complete_cb complete, + GuestFD *gf, off_t len) +{ + int err = ftruncate(gf->hostfd, len); + complete(cs, err, err < 0 ? errno : 0); +} + /* * Static file semihosting syscall implementations. */ @@ -983,3 +1000,22 @@ void semihost_sys_poll_one(CPUState *cs, gdb_syscall_complete_cb complete, } } #endif + +void semihost_sys_ftruncate(CPUState *cs, gdb_syscall_complete_cb complete, + int fd, off_t len) +{ + GuestFD *gf = get_guestfd(fd); + if (!gf) { + complete(cs, -1, EBADF); + return; + } + + switch (gf->type) { + case GuestFDHost: + host_ftruncate(cs, complete, gf, len); + break; + default: + fprintf(stderr, "ftruncate call not implemented for this semihosting mode.\n"); + g_assert_not_reached(); + } +} diff --git a/target/Kconfig b/target/Kconfig index d0c7b59d9c71..37781146b9bb 100644 --- a/target/Kconfig +++ b/target/Kconfig @@ -16,6 +16,7 @@ source sh4/Kconfig source sparc/Kconfig source tricore/Kconfig source xtensa/Kconfig +source hexagon/Kconfig config TARGET_BIG_ENDIAN bool diff --git a/target/arm/common-semi-target.h b/target/arm/common-semi-target.h index da51f2d7f540..69429a45c652 100644 --- a/target/arm/common-semi-target.h +++ b/target/arm/common-semi-target.h @@ -12,6 +12,17 @@ #include "target/arm/cpu-qom.h" +static inline bool common_semi_read_arg_word(CPUArchState *env, + target_ulong *save_to, + target_ulong args_addr, + int arg_num) +{ + if (is_64bit_semihosting(env)) { + return get_user_u64(*save_to, args_addr + (arg_num) * 8)); + } + return get_user_u32(*save_to, args_addr + (arg_num) * 4)); +} + static inline target_ulong common_semi_arg(CPUState *cs, int argno) { ARMCPU *cpu = ARM_CPU(cs); diff --git a/target/hexagon/Kconfig b/target/hexagon/Kconfig new file mode 100644 index 000000000000..7e556f350633 --- /dev/null +++ b/target/hexagon/Kconfig @@ -0,0 +1,2 @@ +config HEXAGON + bool diff --git a/target/hexagon/arch.c b/target/hexagon/arch.c index d053d6848715..87c2f6a53f6c 100644 --- a/target/hexagon/arch.c +++ b/target/hexagon/arch.c @@ -208,6 +208,11 @@ void arch_fpop_start(CPUHexagonState *env) * model it in qemu user mode. */ #define RAISE_FP_EXCEPTION do {} while (0) +#else + /* + * To be implemented. + */ +#define RAISE_FP_EXCEPTION do { g_assert_not_reached(); } while (0) #endif #define SOFTFLOAT_TEST_FLAG(FLAG, MYF, MYE) \ diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc index 9e3a05f88281..e6523a739b10 100644 --- a/target/hexagon/attribs_def.h.inc +++ b/target/hexagon/attribs_def.h.inc @@ -19,20 +19,41 @@ DEF_ATTRIB(AA_DUMMY, "Dummy Zeroth Attribute", "", "") /* Misc */ +DEF_ATTRIB(FAKEINSN, "Not a real instruction", "", "") +DEF_ATTRIB(MAPPING, "Not real -- asm mapped", "", "") +DEF_ATTRIB(CONDMAPPING, "Not real -- mapped based on values", "", "") DEF_ATTRIB(EXTENSION, "Extension instruction", "", "") +DEF_ATTRIB(SHARED_EXTENSION, "Shared extension instruction", "", "") +DEF_ATTRIB(CABAC, + "Cabac Instruction. Used in conjuction with QDSP6_CABAC_PRESENT", "", + "") +DEF_ATTRIB(EXPERIMENTAL, "This may not work correctly not supported by RTL.", + "", "") DEF_ATTRIB(PRIV, "Not available in user or guest mode", "", "") DEF_ATTRIB(GUEST, "Not available in user mode", "", "") DEF_ATTRIB(FPOP, "Floating Point Operation", "", "") +DEF_ATTRIB(FPDOUBLE, "Double-precision Floating Point Operation", "", "") +DEF_ATTRIB(FPSINGLE, "Single-precision Floating Point Operation", "", "") +DEF_ATTRIB(SFMAKE, "Single Float Make", "", "") +DEF_ATTRIB(DFMAKE, "Single Float Make", "", "") + +DEF_ATTRIB(NO_TIMING_LOG, "Does not get logged to the timing model", "", "") DEF_ATTRIB(EXTENDABLE, "Immediate may be extended", "", "") +DEF_ATTRIB(EXT_UPPER_IMMED, "Extend upper case immediate", "", "") +DEF_ATTRIB(EXT_LOWER_IMMED, "Extend lower case immediate", "", "") +DEF_ATTRIB(MUST_EXTEND, "Immediate must be extended", "", "") +DEF_ATTRIB(NA_NT, "Non-Allocating Non-Temporal instruction", "", "") +DEF_ATTRIB(INVPRED, "The predicate is inverted for true/false sense", "", "") DEF_ATTRIB(ARCHV2, "V2 architecture", "", "") DEF_ATTRIB(ARCHV3, "V3 architecture", "", "") DEF_ATTRIB(ARCHV4, "V4 architecture", "", "") DEF_ATTRIB(ARCHV5, "V5 architecture", "", "") +DEF_ATTRIB(PACKED, "Packable instruction", "", "") DEF_ATTRIB(SUBINSN, "sub-instruction", "", "") /* Load and Store attributes */ @@ -46,21 +67,48 @@ DEF_ATTRIB(MEMSIZE_4B, "Memory width is 4 bytes", "", "") DEF_ATTRIB(MEMSIZE_8B, "Memory width is 8 bytes", "", "") DEF_ATTRIB(SCALAR_LOAD, "Load is scalar", "", "") DEF_ATTRIB(SCALAR_STORE, "Store is scalar", "", "") -DEF_ATTRIB(REGWRSIZE_1B, "Memory width is 1 byte", "", "") -DEF_ATTRIB(REGWRSIZE_2B, "Memory width is 2 bytes", "", "") -DEF_ATTRIB(REGWRSIZE_4B, "Memory width is 4 bytes", "", "") -DEF_ATTRIB(REGWRSIZE_8B, "Memory width is 8 bytes", "", "") +DEF_ATTRIB(REGWRSIZE_1B, "ETM Memory width is 1 byte", "", "") +DEF_ATTRIB(REGWRSIZE_2B, "ETM Memory width is 2 bytes", "", "") +DEF_ATTRIB(REGWRSIZE_4B, "ETM Memory width is 4 bytes", "", "") +DEF_ATTRIB(REGWRSIZE_8B, "ETM Memory width is 8 bytes", "", "") DEF_ATTRIB(MEMLIKE, "Memory-like instruction", "", "") DEF_ATTRIB(MEMLIKE_PACKET_RULES, "follows Memory-like packet rules", "", "") +DEF_ATTRIB(CACHEOP, "Cache operation", "", "") +DEF_ATTRIB(COPBYADDRESS, "Cache operation by address", "", "") +DEF_ATTRIB(COPBYIDX, "Cache operation by index", "", "") DEF_ATTRIB(RELEASE, "Releases a lock", "", "") DEF_ATTRIB(ACQUIRE, "Acquires a lock", "", "") +DEF_ATTRIB(LLSC, "load-locked/store-conditional instruction", "", "") DEF_ATTRIB(RLS_INNER, "Store release inner visibility", "", "") +DEF_ATTRIB(RLS_OUTER, "Store release outer visibility", "", "") DEF_ATTRIB(RLS_ALL_THREAD, "Store release among all threads", "", "") DEF_ATTRIB(RLS_SAME_THREAD, "Store release with the same thread", "", "") +/* Load and Store Addressing Mode Attributes */ +DEF_ATTRIB(EA_REG_ONLY, "EA = input register only", "", "") +DEF_ATTRIB(EA_IMM_ONLY, "EA = immediate only", "", "") +DEF_ATTRIB(EA_REG_PLUS_IMM, "EA = register plus immediate", "", "") +DEF_ATTRIB(EA_REG_PLUS_REGSCALED, "EA = register plus scaled register", "", "") +DEF_ATTRIB(EA_IMM_PLUS_REGSCALED, "EA = immediate plus scaled register", "", "") +DEF_ATTRIB(EA_BREV_REG, "EA = bit-reversed input register", "", "") +DEF_ATTRIB(EA_GP_IMM, "EA = GP plus immediate (unless extended)", "", "") +DEF_ATTRIB(EA_PAGECROSS, "EA calculation can have a Page Cross Stall", "", "") + +DEF_ATTRIB(PM_ANY, "Post Modify", "", "") +DEF_ATTRIB(PM_I, "Post Modify by Immediate", "", "") +DEF_ATTRIB(PM_M, "Post Modify by M register", "", "") +DEF_ATTRIB(PM_CIRI, "Post Modify with Circular Addressing by immediate", "", "") +DEF_ATTRIB(PM_CIRR, "Post Modify with Circular Addressing by I field", "", "") + +DEF_ATTRIB(VMEM, "VMEM-type", "", "") +DEF_ATTRIB(VBUF, "Touches the VBUF", "", "") +DEF_ATTRIB(VDBG, "Vector debugging instruction", "", "") + /* V6 Vector attributes */ DEF_ATTRIB(CVI, "Executes on the HVX extension", "", "") +DEF_ATTRIB(NT_VMEM, "Non-temporal memory access", "", "") +DEF_ATTRIB(VMEMU, "Unaligned memory access", "", "") DEF_ATTRIB(CVI_NEW, "New value memory instruction executes on HVX", "", "") DEF_ATTRIB(CVI_VM, "Memory instruction executes on HVX", "", "") @@ -69,109 +117,415 @@ DEF_ATTRIB(CVI_VP_VS, "Double vector permute/shft insn executes on HVX", "", "") DEF_ATTRIB(CVI_VX, "Multiply instruction executes on HVX", "", "") DEF_ATTRIB(CVI_VX_DV, "Double vector multiply insn executes on HVX", "", "") DEF_ATTRIB(CVI_VS, "Shift instruction executes on HVX", "", "") -DEF_ATTRIB(CVI_VS_3SRC, "This shift needs to borrow a source register", "", "") +DEF_ATTRIB( + CVI_VS_3SRC, + "This shift instruction needs to borrow a source register from the VP slot", + "", "") DEF_ATTRIB(CVI_VS_VX, "Permute/shift and multiply insn executes on HVX", "", "") DEF_ATTRIB(CVI_VA, "ALU instruction executes on HVX", "", "") +DEF_ATTRIB(CVI_VA_2SRC, + "This alu instruction executes on multimedia vector engine and " + "requires two vectro sources", + "", "") DEF_ATTRIB(CVI_VA_DV, "Double vector alu instruction executes on HVX", "", "") DEF_ATTRIB(CVI_4SLOT, "Consumes all the vector execution resources", "", "") DEF_ATTRIB(CVI_TMP, "Transient Memory Load not written to register", "", "") DEF_ATTRIB(CVI_REMAP, "Register Renaming not written to register file", "", "") +DEF_ATTRIB(CVI_TMP_SRC, "Transient reassign", "", "") +DEF_ATTRIB(CVI_EXTRACT, "HVX Extract Instruction that goes through L2", "", "") +DEF_ATTRIB(CVI_EARLY, "HVX instructions that require early sources", "", "") +DEF_ATTRIB(CVI_LATE, "HVX insn that always require late sources", "", "") +DEF_ATTRIB(CVI_VV_LATE, "HVX insn that always require late Vv source", "", "") +DEF_ATTRIB(CVI_REQUIRES_TMPLOAD, ".tmp load must be included in packet", "", "") +DEF_ATTRIB(CVI_PUMP_2X, "Goes through the pipeline twice", "", "") +DEF_ATTRIB(CVI_PUMP_4X, "Goes through the pipeline four times", "", "") DEF_ATTRIB(CVI_GATHER, "CVI Gather operation", "", "") DEF_ATTRIB(CVI_SCATTER, "CVI Scatter operation", "", "") DEF_ATTRIB(CVI_SCATTER_RELEASE, "CVI Store Release for scatter", "", "") +DEF_ATTRIB(CVI_GATHER_RELEASE, "CVI Store Release for gather", "", "") DEF_ATTRIB(CVI_TMP_DST, "CVI instruction that doesn't write a register", "", "") +DEF_ATTRIB(CVI_SCATTER_WORD_ACC, "CVI Scatter Word Accum (second pass)", "", "") +DEF_ATTRIB(CVI_SCATTER_ACC, "CVI Scatter Accumulate", "", "") +DEF_ATTRIB(CVI_VX_VSRC0_IS_DST, + "For the assembler to handle the special case of non-linear " + "instructions with Vxx specified both as src and dst in syntax ", + "", "") + +DEF_ATTRIB(CVI_VX_ACC_FWD, "VX Accumulator Forwarding", "", "") + +DEF_ATTRIB(CVI_VX_NO_TMP_LD, + "VX Accumulator renaming not allowed from tmp load instruction", "", + "") + +DEF_ATTRIB(RESTRICT_CVI_NOVP, + "Instructions with this attribute are assigned to the original " + "shift unit and can not be assigned to the shift/permute unit", + "", "") + +DEF_ATTRIB(CVI_GATHER_ADDR_2B, "CVI Scatter/Gather address is halfword", "", "") +DEF_ATTRIB(CVI_GATHER_ADDR_4B, "CVI Scatter/Gather address is word", "", "") + +DEF_ATTRIB(VFETCH, "memory fetch op to L2 for a single vector", "", "") + DEF_ATTRIB(CVI_SLOT23, "Can execute in slot 2 or slot 3 (HVX)", "", "") -DEF_ATTRIB(VTCM_ALLBANK_ACCESS, "Allocates in all VTCM schedulers.", "", "") +DEF_ATTRIB(HVX_FLT, "This a floating point HVX instruction.", "", "") + +DEF_ATTRIB( + VTCM_ALLBANK_ACCESS, + "This instruction allocates in all VTCM schedulers due to a region access.", + "", "") +DEF_ATTRIB(XUMINOR, "XU minor SMTable instruction", "", "") + +DEF_ATTRIB(SYNC_MARKER, "This instruction needs a sync marker.", "", "") + /* Change-of-flow attributes */ DEF_ATTRIB(JUMP, "Jump-type instruction", "", "") +DEF_ATTRIB(DIRECT, "Uses an PC-relative immediate field", "", "") DEF_ATTRIB(INDIRECT, "Absolute register jump", "", "") +DEF_ATTRIB(CJUMP, "Conditional jump", "", "") DEF_ATTRIB(CALL, "Function call instruction", "", "") +DEF_ATTRIB(RET, "Function return instruction", "", "") +DEF_ATTRIB(PERM, "Permute instruction", "", "") DEF_ATTRIB(COF, "Change-of-flow instruction", "", "") DEF_ATTRIB(HINTED_COF, "This instruction is a hinted change-of-flow", "", "") DEF_ATTRIB(CONDEXEC, "May be cancelled by a predicate", "", "") +DEF_ATTRIB(DOTOLD, "Uses a predicate generated in a previous packet", "", "") +DEF_ATTRIB(DOTNEW, "Uses a predicate generated in the same packet", "", "") DEF_ATTRIB(DOTNEWVALUE, "Uses a register value generated in this pkt", "", "") DEF_ATTRIB(NEWCMPJUMP, "Compound compare and jump", "", "") DEF_ATTRIB(NVSTORE, "New-value store", "", "") DEF_ATTRIB(MEMOP, "memop", "", "") -DEF_ATTRIB(ROPS_2, "Compound instruction worth 2 RISC-ops", "", "") -DEF_ATTRIB(ROPS_3, "Compound instruction worth 3 RISC-ops", "", "") +DEF_ATTRIB(ROPS_2, "Compound instruction worth 2 wimpy RISC-ops", "", "") +DEF_ATTRIB(ROPS_3, "Compound instruction worth 3 wimpy RISC-ops", "", "") /* access to implicit registers */ DEF_ATTRIB(IMPLICIT_WRITES_LR, "Writes the link register", "", "UREG.LR") +DEF_ATTRIB(IMPLICIT_READS_LR, "Reads the link register", "UREG.LR", "") +DEF_ATTRIB(IMPLICIT_READS_LC0, "Reads loop count for loop 0", "UREG.LC0", "") +DEF_ATTRIB(IMPLICIT_READS_LC1, "Reads loop count for loop 1", "UREG.LC1", "") +DEF_ATTRIB(IMPLICIT_READS_SA0, "Reads start address for loop 0", "UREG.SA0", "") +DEF_ATTRIB(IMPLICIT_READS_SA1, "Reads start address for loop 1", "UREG.SA1", "") +DEF_ATTRIB(IMPLICIT_WRITES_PC, "Writes the program counter", "", "UREG.PC") +DEF_ATTRIB(IMPLICIT_READS_PC, "Reads the program counter", "UREG.PC", "") DEF_ATTRIB(IMPLICIT_WRITES_SP, "Writes the stack pointer", "", "UREG.SP") +DEF_ATTRIB(IMPLICIT_READS_SP, "Reads the stack pointer", "UREG.SP", "") DEF_ATTRIB(IMPLICIT_WRITES_FP, "Writes the frame pointer", "", "UREG.FP") +DEF_ATTRIB(IMPLICIT_READS_FP, "Reads the frame pointer", "UREG.FP", "") +DEF_ATTRIB(IMPLICIT_WRITES_GP, "Writes the GP register", "", "UREG.GP") +DEF_ATTRIB(IMPLICIT_READS_GP, "Reads the GP register", "UREG.GP", "") DEF_ATTRIB(IMPLICIT_WRITES_LC0, "Writes loop count for loop 0", "", "UREG.LC0") DEF_ATTRIB(IMPLICIT_WRITES_LC1, "Writes loop count for loop 1", "", "UREG.LC1") DEF_ATTRIB(IMPLICIT_WRITES_SA0, "Writes start addr for loop 0", "", "UREG.SA0") DEF_ATTRIB(IMPLICIT_WRITES_SA1, "Writes start addr for loop 1", "", "UREG.SA1") +DEF_ATTRIB(IMPLICIT_WRITES_R00, "Writes Register 0", "", "UREG.R00") DEF_ATTRIB(IMPLICIT_WRITES_P0, "Writes Predicate 0", "", "UREG.P0") DEF_ATTRIB(IMPLICIT_WRITES_P1, "Writes Predicate 1", "", "UREG.P1") DEF_ATTRIB(IMPLICIT_WRITES_P2, "Writes Predicate 1", "", "UREG.P2") DEF_ATTRIB(IMPLICIT_WRITES_P3, "May write Predicate 3", "", "UREG.P3") -DEF_ATTRIB(IMPLICIT_READS_PC, "Reads the PC register", "", "") -DEF_ATTRIB(IMPLICIT_READS_P0, "Reads the P0 register", "", "") -DEF_ATTRIB(IMPLICIT_READS_P1, "Reads the P1 register", "", "") -DEF_ATTRIB(IMPLICIT_READS_P2, "Reads the P2 register", "", "") -DEF_ATTRIB(IMPLICIT_READS_P3, "Reads the P3 register", "", "") +DEF_ATTRIB(IMPLICIT_READS_R00, "Reads Register 0", "UREG.R00", "") +DEF_ATTRIB(IMPLICIT_READS_P0, "Reads Predicate 0", "UREG.P0", "") +DEF_ATTRIB(IMPLICIT_READS_P1, "Reads Predicate 1", "UREG.P1", "") +DEF_ATTRIB(IMPLICIT_READS_P3, "Reads Predicate 3", "UREG.P3", "") +DEF_ATTRIB(IMPLICIT_READS_Q3, "Reads Vector Predicate 3", "UREG.Q3", "") +DEF_ATTRIB(IMPLICIT_READS_CS, "Reads the CS/M register", "UREG.CS", "") +DEF_ATTRIB(IMPLICIT_READS_FRAMEKEY, "Reads FRAMEKEY", "UREG.FRAMEKEY", "") +DEF_ATTRIB(IMPLICIT_READS_FRAMELIMIT, "Reads FRAMELIMIT", "UREG.FRAMELIMIT", "") +DEF_ATTRIB(IMPLICIT_READS_ELR, "Reads the ELR register", "MREG.ELR", "") +DEF_ATTRIB(IMPLICIT_READS_SGP0, "Reads the SGP0 register", "MREG.SGP0", "") +DEF_ATTRIB(IMPLICIT_READS_SGP1, "Reads the SGP1 register", "MREG.SGP1", "") +DEF_ATTRIB(IMPLICIT_WRITES_SGP0, "Reads the SGP0 register", "", "MREG.SGP0") +DEF_ATTRIB(IMPLICIT_WRITES_SGP1, "Reads the SGP1 register", "", "MREG.SGP1") +DEF_ATTRIB(IMPLICIT_WRITES_STID_PRIO_ANYTHREAD, "Reads", "", "MREG.STID.PRIO") +DEF_ATTRIB(IMPLICIT_WRITES_SRBIT, "Writes the OVF bit", "", "UREG.SR.OVF") +DEF_ATTRIB(IMPLICIT_WRITES_FPFLAGS, "May write FP flags", "", "UREG.SR.FPFLAGS") +DEF_ATTRIB(IMPLICIT_WRITES_LPCFG, "Writes the loop config", "", "UREG.SR.LPCFG") +DEF_ATTRIB(IMPLICIT_WRITES_CVBITS, "Writes the CV flags", "", "UREG.SR.CV") +DEF_ATTRIB(IMPLICIT_READS_FPRND, "May read FP rnd mode", "UREG.SR.FPRND", "") +DEF_ATTRIB(IMPLICIT_READS_SSR, "May read SSR values", "MREG.SSR", "") +DEF_ATTRIB(IMPLICIT_READS_CCR, "May read CCR values", "MREG.CCR", "") +DEF_ATTRIB(IMPLICIT_WRITES_CCR, "May write CCR values", "", "MREG.CCR") +DEF_ATTRIB(IMPLICIT_WRITES_SSR, "May write SSR values", "", "MREG.SSR") +DEF_ATTRIB(IMPLICIT_READS_GELR, "May read GELR values", "GREG.GELR", "") +DEF_ATTRIB(IMPLICIT_READS_GEVB, "May read GEVB values", "MREG.GEVB", "") +DEF_ATTRIB(IMPLICIT_READS_GSR, "May read GSR values", "GREG.GSR", "") +DEF_ATTRIB(IMPLICIT_READS_GOSP, "May read GOSP values", "GREG.GOSP", "") +DEF_ATTRIB(IMPLICIT_WRITES_GELR, "May write GELR values", "", "GREG.GELR") +DEF_ATTRIB(IMPLICIT_WRITES_GSR, "May write GSR values", "", "GREG.GSR") +DEF_ATTRIB(IMPLICIT_WRITES_GOSP, "May write GOSP values", "", "GREG.GOSP") +DEF_ATTRIB(IMPLICIT_READS_IPENDAD_IPEND, "May read", "MREG.IPENDAD.IPEND", "") +DEF_ATTRIB(IMPLICIT_WRITES_IPENDAD_IPEND, "May write", "", "MREG.IPENDAD.IPEND") +DEF_ATTRIB(IMPLICIT_READS_IPENDAD_IAD, "May read", "MREG.IPENDAD.IAD", "") +DEF_ATTRIB(IMPLICIT_WRITES_IPENDAD_IAD, "May write", "", "MREG.IPENDAD.IAD") +DEF_ATTRIB(IMPLICIT_WRITES_IMASK_ANYTHREAD, "May write", "", "MREG.IMASK") +DEF_ATTRIB(IMPLICIT_READS_IMASK_ANYTHREAD, "May read", "MREG.IMASK", "") +DEF_ATTRIB(IMPLICIT_READS_SYSCFG_K0LOCK, "May read", "MREG.SYSCFG.K0LOCK", "") +DEF_ATTRIB(IMPLICIT_WRITES_SYSCFG_K0LOCK, "May write", "", "MREG.SYSCFG.K0LOCK") +DEF_ATTRIB(IMPLICIT_READS_SYSCFG_TLBLOCK, "May read", "MREG.SYSCFG.TLBLOCK", "") +DEF_ATTRIB(IMPLICIT_WRITES_SYSCFG_TLBLOCK, "May wr", "", "MREG.SYSCFG.TLBLOCK") +DEF_ATTRIB(IMPLICIT_WRITES_SYSCFG_GCA, "May write", "", "MREG.SYSCFG.GCA") +DEF_ATTRIB(IMPLICIT_READS_SYSCFG_GCA, "May read", "MREG.SYSCFG.GCA", "") DEF_ATTRIB(IMPLICIT_WRITES_USR, "May write USR", "", "") -DEF_ATTRIB(IMPLICIT_READS_SP, "Reads the SP register", "", "") + +/* Other things the instruction does */ +DEF_ATTRIB(ACC, "Has a multiply", "", "") +DEF_ATTRIB(MPY, "Has a multiply", "", "") +DEF_ATTRIB(SATURATE, "Does signed saturation", "", "") +DEF_ATTRIB(USATURATE, "Does unsigned saturation", "", "") +DEF_ATTRIB(CIRCADDR, "Uses circular addressing mode", "", "") +DEF_ATTRIB(BREVADDR, "Uses bit reverse addressing mode", "", "") +DEF_ATTRIB(BIDIRSHIFTL, "Uses a bidirectional shift left", "", "") +DEF_ATTRIB(BIDIRSHIFTR, "Uses a bidirectional shift right", "", "") +DEF_ATTRIB(BRANCHADDER, "Contains a PC-plus-immediate operation.", "", "") +DEF_ATTRIB(CRSLOT23, "Can execute in slot 2 or slot 3 (CR)", "", "") DEF_ATTRIB(COMMUTES, "The operation is communitive", "", "") DEF_ATTRIB(DEALLOCRET, "dealloc_return", "", "") DEF_ATTRIB(DEALLOCFRAME, "deallocframe", "", "") -DEF_ATTRIB(CRSLOT23, "Can execute in slot 2 or slot 3 (CR)", "", "") +/* Instruction Types */ + +DEF_ATTRIB(IT_ALU, "ALU type", "", "") +DEF_ATTRIB(IT_ALU_ADDSUB, "ALU add or subtract type", "", "") +DEF_ATTRIB(IT_ALU_MINMAX, "ALU MIN or MAX type", "", "") +DEF_ATTRIB(IT_ALU_MOVE, "ALU data movement type", "", "") +DEF_ATTRIB(IT_ALU_LOGICAL, "ALU logical operation type", "", "") +DEF_ATTRIB(IT_ALU_SHIFT, "ALU shift operation type", "", "") +DEF_ATTRIB(IT_ALU_SHIFT_AND_OP, "ALU shift and additional op type", "", "") +DEF_ATTRIB(IT_ALU_CMP, "ALU compare operation type", "", "") + +DEF_ATTRIB(IT_LOAD, "Loads from memory", "", "") +DEF_ATTRIB(IT_STORE, "Stores to memory", "", "") + +DEF_ATTRIB(IT_MPY, "Multiply type", "", "") +DEF_ATTRIB(IT_MPY_32, "32-bit Multiply type", "", "") + +DEF_ATTRIB(IT_COF, "Change-of-flow type", "", "") +DEF_ATTRIB(IT_HWLOOP, "Sets up hardware loop registers", "", "") + +DEF_ATTRIB(IT_MISC, "misc instruction type", "", "") + DEF_ATTRIB(IT_NOP, "nop instruction", "", "") DEF_ATTRIB(IT_EXTENDER, "constant extender instruction", "", "") +/* Exceptions the instruction can generate */ + +DEF_ATTRIB(EXCEPTION_TLB, "Can generate a TLB Miss Exception", "", "") +DEF_ATTRIB(EXCEPTION_ACCESS, "Can generate Access Violation Exception", "", "") +DEF_ATTRIB(EXCEPTION_SWI, "Software Interrupt (trap) exception", "", "") + + +/* Documentation Notes */ +DEF_ATTRIB(NOTE_ARCHV2, "Only available in the V2 architecture", "", "") + +DEF_ATTRIB(NOTE_PACKET_PC, "The PC is the addr of the start of the pkt", "", "") + +DEF_ATTRIB(NOTE_PACKET_NPC, "Next PC is the address following pkt", "", "") + +DEF_ATTRIB(NOTE_CONDITIONAL, "can be conditionally executed", "", "") + +DEF_ATTRIB(NOTE_NEWVAL_SLOT0, "New-value oprnd must execute on slot 0", "", "") + +DEF_ATTRIB(NOTE_RELATIVE_ADDRESS, "A PC-relative address is formed", "", "") + +DEF_ATTRIB(NOTE_LA_RESTRICT, "Cannot be in the last pkt of a HW loop", "", "") + +DEF_ATTRIB(NOTE_OOBVSHIFT, "Possible shift overflow", "", "") +DEF_ATTRIB(NOTE_BIDIRSHIFT, "Bidirectional shift", "", "") + +DEF_ATTRIB(NOTE_CVFLAGS, "Sets the Carry and Overflow flags in USR.", "", "") +DEF_ATTRIB(NOTE_SR_OVF_WHEN_SATURATING, "Might set OVF bit", "", "") +DEF_ATTRIB(NOTE_STNT, + "Non Temporal Data. The :nt appendix is a hint to the " + "microarchitecture indicating that the life of the cache line is " + "short. This information is used throughout the cache hierarchy to " + "make replacement and allocation decisions.", + "", "") +DEF_ATTRIB(NOTE_PRIV, "Monitor-level feature", "", "") +DEF_ATTRIB(NOTE_GUEST, "Guest-level feature", "", "") +DEF_ATTRIB(NOTE_NOPACKET, "solo instruction", "", "") +DEF_ATTRIB(NOTE_AXOK, "May only be grouped with ALU32 or non-FP XTYPE.", "", "") +DEF_ATTRIB(NOTE_NOSLOT1, "Packet with this insn must have slot 1 empty", "", "") +DEF_ATTRIB(NOTE_SLOT1_AOK, "Packet must have slot 1 empty or ALU32", "", "") +DEF_ATTRIB(NOTE_NOSLOT01, "Packet must have both slot 1 and 2 empty", "", "") +DEF_ATTRIB(NOTE_NEEDS_MEMLD, "Must be grouped with a memory load", "", "") +DEF_ATTRIB(NOTE_LATEPRED, "The predicate can not be used as a .new", "", "") +DEF_ATTRIB(NOTE_COMPAT_ACCURACY, "In the future accuracy may increase", "", "") +DEF_ATTRIB(NOTE_NVSLOT0, "Can execute only in slot 0 (ST)", "", "") +DEF_ATTRIB(NOTE_DEPRECATED, "Will be deprecated in a future version.", "", "") +DEF_ATTRIB(NOTE_NONAPALIV1, "may not work correctly in Napali V1.", "", "") +DEF_ATTRIB(NOTE_NOLAHAINAV1, "This may not work correctly in Lahaina V1.", "", + "") +DEF_ATTRIB(NOTE_BADTAG_UNDEF, "Undefined if a tag is non-present", "", "") +DEF_ATTRIB(NOTE_NOSLOT2_MPY, "Packet cannot have a slot 2 multiply", "", "") +DEF_ATTRIB(NOTE_HVX_ONLY, "Only available on a core with HVX.", "", "") + +DEF_ATTRIB(NOTE_NOCOF_RESTRICT, "Cannot be grouped with any COF", "", "") +DEF_ATTRIB(NOTE_BRANCHADDER_MAX1, "One PC-plus-offset calculation", "", "") + +DEF_ATTRIB(NOTE_CRSLOT23, "Execute on either slot2 or slot3 (CR)", "", "") +DEF_ATTRIB(NOTE_EXTENSION_AUDIO, "Hexagon audio extensions", "", "") +DEF_ATTRIB(NOTE_FETCHNT, + "Non Temporal Data Cache Prefetch. The :nt appendix is a hint to " + "the microarchitecture indicating that the life of the cache line " + "fetched is short. This information is used throughout the cache " + "hierarchy to make replacement and allocation decisions.", + "", "") +DEF_ATTRIB(NOTE_VECX_V67, "This instruction is only available on V67", "", "") + +DEF_ATTRIB(NOTE_NOVP, + "This instruction cannot be paired with a HVX permute instruction", + "", "") +DEF_ATTRIB(NOTE_VA_UNARY, + "If a packet contains this instruction and a HVX ALU op then the " + "ALU OP must be unary.", + "", "") + + +/* V6 MMVector Notes for Documentation */ +DEF_ATTRIB(NOTE_ANY_RESOURCE, "Can use any HVX resource.", "", "") +DEF_ATTRIB(NOTE_ANY2_RESOURCE, "Uses any pair of the HVX resources", "", "") +DEF_ATTRIB(NOTE_PERMUTE_RESOURCE, "Uses the HVX permute resource.", "", "") +DEF_ATTRIB(NOTE_SHIFT_RESOURCE, "Uses the HVX shift resource.", "", "") +DEF_ATTRIB(NOTE_MPY_RESOURCE, "Uses a HVX multiply resource.", "", "") +DEF_ATTRIB(NOTE_MPYDV_RESOURCE, "Uses both HVX multiply resources.", "", "") +DEF_ATTRIB(NOTE_NT_VMEM, "Non-temporal hint to the micro-architecture", "", "") +DEF_ATTRIB(NOTE_ALL_RESOURCE, "Uses all HVX resources.", "", "") +DEF_ATTRIB(NOTE_VMEM, "Immediates are in multiples of vector length.", "", "") +DEF_ATTRIB(NOTE_ANY_VS_VX_RESOURCE, "Consumes two resources", "", "") + +DEF_ATTRIB(NOTE_RT8, "Input scalar register Rt is limited to R0-R7", "", "") + +DEF_ATTRIB(NOTE_MX, "This is in-memory matrix multiply instruction.", "", "") +DEF_ATTRIB(NOTE_VX_ACC_FWD, + "The accumulator (Vxx) source of this instruction must be generate " + "in the previous packet to avoid a stall. The accumulator cannot " + "come from a .tmp operation.", + "", "") +DEF_ATTRIB(NOTE_TMP_NO_VX, + "The tmp load instruction destination register cannot be an " + "accumulator register.", + "", "") + +DEF_ATTRIB( + NOTE_NO_ECC, + "ECC is not supported for scatter and gather instructions. Enabling ECC " + "with unprotected access instructions result in undetermined behavior.", + "", "") + +/* FP8 instructions */ +DEF_ATTRIB(HVX_FP8, "HVX FP8 extension instruction", "", "") +DEF_ATTRIB(HVX_IEEE_FP_OUT_8, "HVX IEEE FP extension instruction: 8-bit output", + "", "") + /* Restrictions to make note of */ +DEF_ATTRIB(RESTRICT_LOOP_LA, "Cannot be in the last packet of a loop", "", "") +DEF_ATTRIB(RESTRICT_NEEDS_MEMLD, "Must be grouped with a load", "", "") DEF_ATTRIB(RESTRICT_COF_MAX1, "One change-of-flow per packet", "", "") DEF_ATTRIB(RESTRICT_NOPACKET, "Not allowed in a packet", "", "") +DEF_ATTRIB(RESTRICT_NOSRMOVE, "Do not write SR in the same packet", "", "") DEF_ATTRIB(RESTRICT_SLOT0ONLY, "Must execute on slot0", "", "") DEF_ATTRIB(RESTRICT_SLOT1ONLY, "Must execute on slot1", "", "") DEF_ATTRIB(RESTRICT_SLOT2ONLY, "Must execute on slot2", "", "") DEF_ATTRIB(RESTRICT_SLOT3ONLY, "Must execute on slot3", "", "") +DEF_ATTRIB(RESTRICT_NOSLOT2_MPY, "A packet cannot have a slot 2 mpy", "", "") DEF_ATTRIB(RESTRICT_NOSLOT1, "No slot 1 instruction in parallel", "", "") +DEF_ATTRIB(RESTRICT_SLOT1_AOK, "Slot 1 insn must be empty or A-type", "", "") +DEF_ATTRIB(RESTRICT_NOSLOT01, "No slot 0 or 1 instructions in parallel", "", "") +DEF_ATTRIB(RESTRICT_NOSLOT1_STORE, "Packet must not have slot 1 store", "", "") +DEF_ATTRIB(RESTRICT_NOSLOT0_LOAD, "Packet must not have a slot 1 load", "", "") +DEF_ATTRIB(RESTRICT_NOCOF, "Cannot be grouped with any COF", "", "") +DEF_ATTRIB(RESTRICT_BRANCHADDER_MAX1, "One PC-plus-offset calculation", "", "") DEF_ATTRIB(RESTRICT_PREFERSLOT0, "Try to encode into slot 0", "", "") +DEF_ATTRIB(RESTRICT_SINGLE_MEM_FIRST, "Single memory op must be last", "", "") DEF_ATTRIB(RESTRICT_PACKET_AXOK, "May exist with A-type or X-type", "", "") +DEF_ATTRIB(RESTRICT_PACKET_SOMEREGS_OK, "Relaxed grouping rules", "", "") +DEF_ATTRIB(RESTRICT_LATEPRED, "Predicate can not be used as a .new.", "", "") + +DEF_ATTRIB(PAIR_1OF2, "For assembler", "", "") +DEF_ATTRIB(PAIR_2OF2, "For assembler", "", "") +DEF_ATTRIB(NOTE_MX_PAIR, + "Weights and Activations need to be paired in a packet.", "", "") +DEF_ATTRIB(NOTE_RESTRICT_CVI_NOVP, + "This instruction cannot use the permute/shift resource", "", "") + +/* Performance based preferences */ +DEF_ATTRIB(PREFER_SLOT3, "Complex XU prefering slot3", "", "") + +DEF_ATTRIB(RELAX_COF_1ST, "COF can be fisrt in assembly order", "", "") +DEF_ATTRIB(RELAX_COF_2ND, "COF can be second in assembly order", "", "") DEF_ATTRIB(ICOP, "Instruction cache op", "", "") +DEF_ATTRIB(INTRINSIC_RETURNS_UNSIGNED, "Intrinsic returns an unsigned", "", "") + +DEF_ATTRIB(PRED_BIT_1, "The branch uses bit 1 as the prediction bit", "", "") +DEF_ATTRIB(PRED_BIT_4, "The branch uses bit 4 as the prediction bit", "", "") +DEF_ATTRIB(PRED_BIT_8, "The branch uses bit 8 as the prediction bit", "", "") +DEF_ATTRIB(PRED_BIT_12, "The branch uses bit 12 as the prediction bit", "", "") +DEF_ATTRIB(PRED_BIT_13, "The branch uses bit 13 as the prediction bit", "", "") +DEF_ATTRIB(PRED_BIT_7, "The branch uses bit 7 as the prediction bit", "", "") +DEF_ATTRIB(HWLOOP0_SETUP, "Sets up HW loop0", "", "") +DEF_ATTRIB(HWLOOP1_SETUP, "Sets up HW loop1", "", "") DEF_ATTRIB(HWLOOP0_END, "Ends HW loop0", "", "") DEF_ATTRIB(HWLOOP1_END, "Ends HW loop1", "", "") DEF_ATTRIB(RET_TYPE, "return type", "", "") +DEF_ATTRIB(HINTJR, "hintjr type", "", "") DEF_ATTRIB(DCZEROA, "dczeroa type", "", "") +DEF_ATTRIB(ICTAGOP, "ictag op type", "", "") DEF_ATTRIB(ICFLUSHOP, "icflush op type", "", "") DEF_ATTRIB(DCFLUSHOP, "dcflush op type", "", "") +DEF_ATTRIB(DCTAGOP, "dctag op type", "", "") DEF_ATTRIB(L2FLUSHOP, "l2flush op type", "", "") +DEF_ATTRIB(L2TAGOP, "l2tag op type", "", "") DEF_ATTRIB(DCFETCH, "dcfetch type", "", "") +DEF_ATTRIB(BIMODAL_BRANCH, "Updates the bimodal branch predictor", "", "") +DEF_ATTRIB(VECINSN, "Long Vector Instruction", "", "") +DEF_ATTRIB(MEMSIZE_32B, "Memory width is 32 bytes", "", "") +DEF_ATTRIB(FOUR_PHASE, "Four Phase Instruction", "", "") DEF_ATTRIB(L2FETCH, "Instruction is l2fetch type", "", "") +DEF_ATTRIB(PREDUSE_BSB, "Instructions need back-skip-back scheduling", "", "") DEF_ATTRIB(ICINVA, "icinva", "", "") DEF_ATTRIB(DCCLEANINVA, "dccleaninva", "", "") +DEF_ATTRIB(EXTENSION_AUDIO, "audio extension", "", "") + +DEF_ATTRIB(MEMCPY, "memcpy or dma-type instruction", "", "") DEF_ATTRIB(NO_INTRINSIC, "Don't generate an intrisic", "", "") -/* Documentation Notes */ -DEF_ATTRIB(NOTE_CONDITIONAL, "can be conditionally executed", "", "") -DEF_ATTRIB(NOTE_NEWVAL_SLOT0, "New-value oprnd must execute on slot 0", "", "") -DEF_ATTRIB(NOTE_PRIV, "Monitor-level feature", "", "") -DEF_ATTRIB(NOTE_NOPACKET, "solo instruction", "", "") -DEF_ATTRIB(NOTE_AXOK, "May only be grouped with ALU32 or non-FP XTYPE.", "", "") -DEF_ATTRIB(NOTE_LATEPRED, "The predicate can not be used as a .new", "", "") -DEF_ATTRIB(NOTE_NVSLOT0, "Can execute only in slot 0 (ST)", "", "") -DEF_ATTRIB(NOTE_NOVP, "Cannot be paired with a HVX permute instruction", "", "") -DEF_ATTRIB(NOTE_VA_UNARY, "Combined with HVX ALU op (must be unary)", "", "") +DEF_ATTRIB(NO_XML, "Don't generate a XML docs for this instruction", "", "") -/* V6 MMVector Notes for Documentation */ -DEF_ATTRIB(NOTE_SHIFT_RESOURCE, "Uses the HVX shift resource.", "", "") -/* Restrictions to make note of */ -DEF_ATTRIB(RESTRICT_NOSLOT1_STORE, "Packet must not have slot 1 store", "", "") -DEF_ATTRIB(RESTRICT_LATEPRED, "Predicate can not be used as a .new.", "", "") +DEF_ATTRIB(DMA, "User-DMA instruction", "", "") +DEF_ATTRIB(VERIF_DMASTEP, + "Hiphop needs to step dma prior to executing this packet", "", "") +DEF_ATTRIB(VERIF_DMATICK, + "DMA gets a tick in verif mode for this instruction after a commit", + "", "") + +DEF_ATTRIB(HVX_IEEE_FP, "HVX IEEE FP extension instruction", "", "") +DEF_ATTRIB(NOTE_HVX_IEEE_FP, + "Only supported on the HVX cores with the IEEE FP extension", "", "") + +DEF_ATTRIB(HVX_IEEE_FP_DV_ONE, + "HVX IEEE FP extension instruction - dual pipes: P2 and P3 - output " + "only on P2", + "", "") +DEF_ATTRIB(HVX_IEEE_FP_ACC, "HVX IEEE FP accumulate instruction", "", "") +DEF_ATTRIB(HVX_IEEE_BF, + "HVX IEEE BF extension instruction: 16-bit bfloat input", "", "") +DEF_ATTRIB(HVX_IEEE_FP_OUT_BF, + "HVX IEEE FP extension instruction: 16-bit bfloat output", "", "") +DEF_ATTRIB(HVX_IEEE_FP_OUT_16, + "HVX IEEE FP extension instruction: 16-bit output", "", "") +DEF_ATTRIB(HVX_IEEE_FP_OUT_32, + "HVX IEEE FP extension instruction: 32-bit output", "", "") +DEF_ATTRIB(HVX_IEEE_FP_BINARY_LATE, + "HVX IEEE FP extension instruction: Both inputs can arrive late", "", + "") /* Keep this as the last attribute: */ DEF_ATTRIB(ZZ_LASTATTRIB, "Last attribute in the file", "", "") diff --git a/target/hexagon/common-semi-target.h b/target/hexagon/common-semi-target.h new file mode 100644 index 000000000000..759aaeba905f --- /dev/null +++ b/target/hexagon/common-semi-target.h @@ -0,0 +1,87 @@ +/* + * Target-specific parts of semihosting/arm-compat-semi.c. + * + * Copyright(c) 2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef TARGET_HEXAGON_COMMON_SEMI_TARGET_H +#define TARGET_HEXAGON_COMMON_SEMI_TARGET_H + +#include "cpu.h" +#include "cpu_helper.h" +#include "qemu/log.h" +#include "semihosting/uaccess.h" + +static inline bool common_semi_read_arg_word(CPUArchState *env, + target_ulong *save_to, + target_ulong args_addr, + int arg_num) +{ + hexagon_read_memory(env, args_addr + (arg_num) * 4, 4, save_to, 0); + return false; +} + +static inline target_ulong common_semi_arg(CPUState *cs, int argno) +{ + CPUHexagonState *env = cpu_env(cs); + return arch_get_thread_reg(env, HEX_REG_R00 + argno); +} + +static inline void common_semi_set_ret(CPUState *cs, target_ulong ret) +{ + CPUHexagonState *env = cpu_env(cs); + arch_set_thread_reg(env, HEX_REG_R00, ret); +} + +static inline void hex_semi_set_err(CPUState *cs, target_ulong err) +{ + CPUHexagonState *env = cpu_env(cs); + arch_set_thread_reg(env, HEX_REG_R01, err); +} + +static inline bool common_semi_sys_exit_extended(CPUState *cs, int nr) +{ + return false; +} + +static inline bool is_64bit_semihosting(CPUArchState *env) +{ + return false; +} + +static inline target_ulong common_semi_stack_bottom(CPUState *cs) +{ + CPUHexagonState *env = cpu_env(cs); + return arch_get_thread_reg(env, HEX_REG_SP); +} + +static inline bool common_semi_has_synccache(CPUArchState *env) +{ + return false; +} + +static inline void hex_prepare_for_read(CPUState *cs, target_ulong fd, + target_ulong buf, target_ulong len) +{ + CPUHexagonState *env = cpu_env(cs); + /* + * Need to make sure the page we are going to write to is available. + * The file pointer advances with the read. If the write to bufaddr + * faults the swi function will be restarted but the file pointer + * will be wrong. + */ + hexagon_touch_memory(env, buf, len, 0); +} + +const struct semihosting_opt_callbacks hex_opt_callbacks = { + .prepare_for_read = hex_prepare_for_read, + .set_err = hex_semi_set_err, +}; + +SEMIHOSTING_REGISTER_OPT_CALLBACKS(hex_opt_callbacks) + +#define SEMIHOSTING_EXT_OPEN_MODES + +#endif diff --git a/target/hexagon/cpu-param.h b/target/hexagon/cpu-param.h index 45ee7b46409c..d414ca89d690 100644 --- a/target/hexagon/cpu-param.h +++ b/target/hexagon/cpu-param.h @@ -18,9 +18,18 @@ #ifndef HEXAGON_CPU_PARAM_H #define HEXAGON_CPU_PARAM_H +#ifdef CONFIG_USER_ONLY #define TARGET_PAGE_BITS 16 /* 64K pages */ +#else +#define TARGET_PAGE_BITS 12 /* 4K pages */ +#endif #define TARGET_PHYS_ADDR_SPACE_BITS 36 #define TARGET_VIRT_ADDR_SPACE_BITS 32 +/* + * Hexagon processors have a strong memory model. + */ +#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL) + #endif diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index a9beb9a17572..f90e8f726a88 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,13 +26,32 @@ #include "fpu/softfloat-helpers.h" #include "tcg/tcg.h" #include "exec/gdbstub.h" +#include "cpu_helper.h" +#include "max.h" +#include "hex_mmu.h" +#include "hw/hexagon/hexagon.h" + +#ifndef CONFIG_USER_ONLY +#include "macros.h" +#include "sys_macros.h" +#include "qemu/main-loop.h" +#include "hex_interrupts.h" +#include "hexswi.h" +#endif + +#define DEFINE_STD_CPU_INIT_FUNC(REV) \ + static void hexagon_##REV##_cpu_init(Object *obj) \ + { \ + HexagonCPU *cpu = HEXAGON_CPU(obj); \ + cpu->rev_reg = REV##_rev; \ + } -static void hexagon_v66_cpu_init(Object *obj) { } -static void hexagon_v67_cpu_init(Object *obj) { } -static void hexagon_v68_cpu_init(Object *obj) { } -static void hexagon_v69_cpu_init(Object *obj) { } -static void hexagon_v71_cpu_init(Object *obj) { } -static void hexagon_v73_cpu_init(Object *obj) { } +DEFINE_STD_CPU_INIT_FUNC(v66) +DEFINE_STD_CPU_INIT_FUNC(v67) +DEFINE_STD_CPU_INIT_FUNC(v68) +DEFINE_STD_CPU_INIT_FUNC(v69) +DEFINE_STD_CPU_INIT_FUNC(v71) +DEFINE_STD_CPU_INIT_FUNC(v73) static ObjectClass *hexagon_cpu_class_by_name(const char *cpu_model) { @@ -50,6 +69,18 @@ static ObjectClass *hexagon_cpu_class_by_name(const char *cpu_model) } static const Property hexagon_cpu_properties[] = { +#if !defined(CONFIG_USER_ONLY) + DEFINE_PROP_UINT32("jtlb-entries", HexagonCPU, num_tlbs, MAX_TLB_ENTRIES), + DEFINE_PROP_UINT32("l2vic-base-addr", HexagonCPU, l2vic_base_addr, + 0xffffffffULL), + DEFINE_PROP_UINT32("qtimer-base-addr", HexagonCPU, qtimer_base_addr, + 0xffffffffULL), + DEFINE_PROP_UINT32("hvx-contexts", HexagonCPU, hvx_contexts, 0), + DEFINE_PROP_UINT32("exec-start-addr", HexagonCPU, boot_addr, 0xffffffffULL), + DEFINE_PROP_UINT64("config-table-addr", HexagonCPU, config_table_addr, + 0xffffffffULL), +#endif + DEFINE_PROP_UINT32("dsp-rev", HexagonCPU, rev_reg, 0), DEFINE_PROP_BOOL("lldb-compat", HexagonCPU, lldb_compat, false), DEFINE_PROP_UNSIGNED("lldb-stack-adjust", HexagonCPU, lldb_stack_adjust, 0, qdev_prop_uint32, target_ulong), @@ -62,11 +93,41 @@ const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS] = { "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", "sa0", "lc0", "sa1", "lc1", "p3_0", "c5", "m0", "m1", - "usr", "pc", "ugp", "gp", "cs0", "cs1", "c14", "c15", - "c16", "c17", "c18", "c19", "pkt_cnt", "insn_cnt", "hvx_cnt", "c23", - "c24", "c25", "c26", "c27", "c28", "c29", "c30", "c31", + "usr", "pc", "ugp", "gp", "cs0", "cs1", "upcyclelo", "upcyclehi", + "framelimit", "framekey", "pktcountlo", "pktcounthi", "upmucnt0", + "upmucnt1", "upmucnt2", "upmucnt3", "upmucnt4", "upmucnt5", "upmucnt6", + "upmucnt7", "c28", "c29", "utimerlo", "utimerhi", }; +#ifndef CONFIG_USER_ONLY +const char * const hexagon_sregnames[] = { + "sgp0", "sgp1", "stid", "elr", "badva0", + "badva1", "ssr", "ccr", "htid", "badva", + "imask", "gevb", "vwctrl", "s13", "s14", + "s15", "evb", "modectl", "syscfg", "segment", + "ipendad", "vid", "vid1", "bestwait", "s24", + "schedcfg", "s26", "cfgbase", "diag", "rev", + "pcyclelo", "pcyclehi", "isdbst", "isdbcfg0", "isdbcfg1", + "livelock", "brkptpc0", "brkptcfg0", "brkptpc1", "brkptcfg1", + "isdbmbxin", "isdbmbxout", "isdben", "isdbgpr", "pmucnt4", + "pmucnt5", "pmucnt6", "pmucnt7", "pmucnt0", "pmucnt1", + "pmucnt2", "pmucnt3", "pmuevtcfg", "pmustid0", "pmuevtcfg1", + "pmustid1", "timerlo", "timerhi", "pmucfg", "rgdr2", + "rgdr", "turkey", "duck", "chicken", +}; + +G_STATIC_ASSERT(NUM_SREGS == ARRAY_SIZE(hexagon_sregnames)); + +const char * const hexagon_gregnames[] = { + "gelr", "gsr", "gosp", "gbadva", "gcommit1t", + "gcommit2t", "gcommit3t", "gcommit4t", "gcommit5t", "gcommit6t", + "gpcycle1t", "gpcycle2t", "gpcycle3t", "gpcycle4t", "gpcycle5t", + "gpcycle6t", "gpmucnt4", "gpmucnt5", "gpmucnt6", "gpmucnt7", + "gcommit7t", "gcommit8t", "gpcycle7t", "gpcycle8t", "gpcyclelo", + "gpcyclehi", "gpmucnt0", "gpmucnt1", "gpmucnt2", "gpmucnt3", + "g30", "g31", +}; +#endif /* * One of the main debugging techniques is to use "-d cpu" and compare against * LLDB output when single stepping. However, the target and qemu put the @@ -176,7 +237,7 @@ void hexagon_debug_qreg(CPUHexagonState *env, int regnum) print_qreg(stdout, env, regnum, false); } -static void hexagon_dump(CPUHexagonState *env, FILE *f, int flags) +void hexagon_dump(CPUHexagonState *env, FILE *f, int flags) { HexagonCPU *cpu = env_archcpu(env); @@ -216,8 +277,7 @@ static void hexagon_dump(CPUHexagonState *env, FILE *f, int flags) qemu_fprintf(f, " cs0 = 0x00000000\n"); qemu_fprintf(f, " cs1 = 0x00000000\n"); #else - print_reg(f, env, HEX_REG_CAUSE); - print_reg(f, env, HEX_REG_BADVA); + print_reg(f, env, HEX_SREG_BADVA); print_reg(f, env, HEX_REG_CS0); print_reg(f, env, HEX_REG_CS1); #endif @@ -262,9 +322,28 @@ static void hexagon_cpu_synchronize_from_tb(CPUState *cs, cpu_env(cs)->gpr[HEX_REG_PC] = tb->pc; } +#ifndef CONFIG_USER_ONLY +bool hexagon_thread_is_enabled(CPUHexagonState *env) +{ + target_ulong modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + uint32_t thread_enabled_mask = GET_FIELD(MODECTL_E, modectl); + bool E_bit = thread_enabled_mask & (0x1 << env->threadId); + + return E_bit; +} +#endif + static bool hexagon_cpu_has_work(CPUState *cs) { +#ifndef CONFIG_USER_ONLY + CPUHexagonState *env = cpu_env(cs); + + return hexagon_thread_is_enabled(env) && + (cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_SWI + | CPU_INTERRUPT_K0_UNLOCK | CPU_INTERRUPT_TLB_UNLOCK)); +#else return true; +#endif } static void hexagon_restore_state_to_opc(CPUState *cs, @@ -274,6 +353,29 @@ static void hexagon_restore_state_to_opc(CPUState *cs, cpu_env(cs)->gpr[HEX_REG_PC] = data[0]; } + +#ifndef CONFIG_USER_ONLY +static void mmu_reset(CPUHexagonState *env) +{ + CPUState *cs = env_cpu(env); + if (cs->cpu_index == 0) { + memset(env->hex_tlb, 0, sizeof(*env->hex_tlb)); + } +} + +void hexagon_cpu_soft_reset(CPUHexagonState *env) +{ + BQL_LOCK_GUARD(); + arch_set_system_reg(env, HEX_SREG_SSR, 0); + hexagon_ssr_set_cause(env, HEX_CAUSE_RESET); + + target_ulong evb = arch_get_system_reg(env, HEX_SREG_EVB); + arch_set_thread_reg(env, HEX_REG_PC, evb); +} +#endif + + +#define HEXAGON_CFG_ADDR_BASE(addr) (((addr) >> 16) & 0x0fffff) static void hexagon_cpu_reset_hold(Object *obj, ResetType type) { CPUState *cs = CPU(obj); @@ -288,6 +390,45 @@ static void hexagon_cpu_reset_hold(Object *obj, ResetType type) set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status); /* Default NaN value: sign bit set, all frac bits set */ set_float_default_nan_pattern(0b11111111, &env->fp_status); + +#ifndef CONFIG_USER_ONLY + HexagonCPU *cpu = HEXAGON_CPU(cs); + + if (cs->cpu_index == 0) { + memset(env->g_sreg, 0, sizeof(target_ulong) * NUM_SREGS); + } + memset(env->t_sreg, 0, sizeof(target_ulong) * NUM_SREGS); + memset(env->greg, 0, sizeof(target_ulong) * NUM_GREGS); + + if (cs->cpu_index == 0) { + arch_set_system_reg(env, HEX_SREG_REV, cpu->rev_reg); + arch_set_system_reg(env, HEX_SREG_MODECTL, 0x1); + *(env->g_pcycle_base) = 0; + } + + memset(env->gpr, 0, sizeof(target_ulong) * TOTAL_PER_THREAD_REGS); + memset(env->pred, 0, sizeof(target_ulong) * NUM_PREGS); + memset(env->VRegs, 0, sizeof(MMVector) * NUM_VREGS); + memset(env->QRegs, 0, sizeof(MMQReg) * NUM_QREGS); + memset(env->vstore_pending, 0, sizeof(target_ulong) * VSTORES_MAX); + env->t_cycle_count = 0; + env->vtcm_pending = false; + + mmu_reset(env); + arch_set_system_reg(env, HEX_SREG_HTID, cs->cpu_index); + hexagon_cpu_soft_reset(env); + env->threadId = cs->cpu_index; + env->tlb_lock_state = HEX_LOCK_UNLOCKED; + env->k0_lock_state = HEX_LOCK_UNLOCKED; + env->tlb_lock_count = 0; + env->k0_lock_count = 0; + env->next_PC = 0; + env->wait_next_pc = 0; + env->cause_code = -1; + arch_set_thread_reg(env, HEX_REG_PC, cpu->boot_addr); + arch_set_system_reg(env, HEX_SREG_CFGBASE, + HEXAGON_CFG_ADDR_BASE(cpu->config_table_addr)); +#endif } static void hexagon_cpu_disas_set_info(CPUState *s, disassemble_info *info) @@ -308,29 +449,291 @@ static void hexagon_cpu_realize(DeviceState *dev, Error **errp) return; } +#ifndef CONFIG_USER_ONLY + HexagonCPU *cpu = HEXAGON_CPU(cs); + if (cpu->num_tlbs > MAX_TLB_ENTRIES) { + error_setg(errp, "Number of TLBs selected is invalid"); + return; + } +#endif + gdb_register_coprocessor(cs, hexagon_hvx_gdb_read_register, hexagon_hvx_gdb_write_register, gdb_find_static_feature("hexagon-hvx.xml"), 0); +#ifndef CONFIG_USER_ONLY + gdb_register_coprocessor(cs, hexagon_sys_gdb_read_register, + hexagon_sys_gdb_write_register, + gdb_find_static_feature("hexagon-sys.xml"), 0); +#endif + qemu_init_vcpu(cs); - cpu_reset(cs); + CPUHexagonState *env = cpu_env(cs); +#ifndef CONFIG_USER_ONLY + hex_mmu_realize(env); + if (cs->cpu_index == 0) { + env->g_sreg = g_new0(target_ulong, NUM_SREGS); + } else { + CPUState *cpu0 = qemu_get_cpu(0); + CPUHexagonState *env0 = cpu_env(cpu0); + env->g_sreg = env0->g_sreg; + } +#endif + if (cs->cpu_index == 0) { + env->g_pcycle_base = g_malloc0(sizeof(*env->g_pcycle_base)); + } else { + CPUState *cpu0 = qemu_get_cpu(0); + env->g_pcycle_base = cpu_env(cpu0)->g_pcycle_base; + } mcc->parent_realize(dev, errp); } +#if !defined(CONFIG_USER_ONLY) +static void hexagon_cpu_set_irq(void *opaque, int irq, int level) +{ + HexagonCPU *cpu = HEXAGON_CPU(opaque); + CPUState *cs = CPU(cpu); + CPUHexagonState *env = cpu_env(cs); + + switch (irq) { + case HEXAGON_CPU_IRQ_0 ... HEXAGON_CPU_IRQ_7: + qemu_log_mask(CPU_LOG_INT, "%s: irq %d, level %d\n", + __func__, irq, level); + if (level) { + hex_raise_interrupts(env, 1 << irq, CPU_INTERRUPT_HARD); + } + break; + default: + g_assert_not_reached(); + } +} +#endif + + static void hexagon_cpu_init(Object *obj) { +#if !defined(CONFIG_USER_ONLY) + HexagonCPU *cpu = HEXAGON_CPU(obj); + qdev_init_gpio_in(DEVICE(cpu), hexagon_cpu_set_irq, 8); +#endif } #include "accel/tcg/cpu-ops.h" +#if !defined(CONFIG_USER_ONLY) +static bool get_physical_address(CPUHexagonState *env, hwaddr *phys, int *prot, + int *size, int32_t *excp, target_ulong address, + MMUAccessType access_type, int mmu_idx) + +{ + if (hexagon_cpu_mmu_enabled(env)) { + return hex_tlb_find_match(env, address, access_type, phys, prot, size, + excp, mmu_idx); + } else { + *phys = address & 0xFFFFFFFF; + *prot = PAGE_VALID | PAGE_READ | PAGE_WRITE | PAGE_EXEC; + *size = TARGET_PAGE_SIZE; + return true; + } +} + +/* qemu seems to only want to know about TARGET_PAGE_SIZE pages */ +static void find_qemu_subpage(vaddr *addr, hwaddr *phys, int page_size) +{ + vaddr page_start = *addr & ~((vaddr)(page_size - 1)); + vaddr offset = ((*addr - page_start) / TARGET_PAGE_SIZE) * TARGET_PAGE_SIZE; + *addr = page_start + offset; + *phys += offset; +} + +static hwaddr hexagon_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) +{ + CPUHexagonState *env = cpu_env(cs); + hwaddr phys_addr; + int prot; + int page_size = 0; + int32_t excp = 0; + int mmu_idx = MMU_KERNEL_IDX; + + if (get_physical_address(env, &phys_addr, &prot, &page_size, &excp, + addr, 0, mmu_idx)) { + find_qemu_subpage(&addr, &phys_addr, page_size); + return phys_addr; + } + + return -1; +} + + +#define INVALID_BADVA 0xbadabada + +static void set_badva_regs(CPUHexagonState *env, target_ulong VA, int slot, + MMUAccessType access_type) +{ + arch_set_system_reg(env, HEX_SREG_BADVA, VA); + + if (access_type == MMU_INST_FETCH || slot == 0) { + arch_set_system_reg(env, HEX_SREG_BADVA0, VA); + arch_set_system_reg(env, HEX_SREG_BADVA1, INVALID_BADVA); + SET_SSR_FIELD(env, SSR_V0, 1); + SET_SSR_FIELD(env, SSR_V1, 0); + SET_SSR_FIELD(env, SSR_BVS, 0); + } else if (slot == 1) { + arch_set_system_reg(env, HEX_SREG_BADVA0, INVALID_BADVA); + arch_set_system_reg(env, HEX_SREG_BADVA1, VA); + SET_SSR_FIELD(env, SSR_V0, 0); + SET_SSR_FIELD(env, SSR_V1, 1); + SET_SSR_FIELD(env, SSR_BVS, 1); + } else { + g_assert_not_reached(); + } +} + +static void raise_tlbmiss_exception(CPUState *cs, target_ulong VA, int slot, + MMUAccessType access_type) +{ + CPUHexagonState *env = cpu_env(cs); + + set_badva_regs(env, VA, slot, access_type); + + switch (access_type) { + case MMU_INST_FETCH: + cs->exception_index = HEX_EVENT_TLB_MISS_X; + if ((VA & ~TARGET_PAGE_MASK) == 0) { + env->cause_code = HEX_CAUSE_TLBMISSX_CAUSE_NEXTPAGE; + } else { + env->cause_code = HEX_CAUSE_TLBMISSX_CAUSE_NORMAL; + } + break; + case MMU_DATA_LOAD: + cs->exception_index = HEX_EVENT_TLB_MISS_RW; + env->cause_code = HEX_CAUSE_TLBMISSRW_CAUSE_READ; + break; + case MMU_DATA_STORE: + cs->exception_index = HEX_EVENT_TLB_MISS_RW; + env->cause_code = HEX_CAUSE_TLBMISSRW_CAUSE_WRITE; + break; + } +} + +static void raise_perm_exception(CPUState *cs, target_ulong VA, int slot, + MMUAccessType access_type, int32_t excp) +{ + CPUHexagonState *env = cpu_env(cs); + + set_badva_regs(env, VA, slot, access_type); + cs->exception_index = excp; +} + +static const char *access_type_names[] = { "MMU_DATA_LOAD ", "MMU_DATA_STORE", + "MMU_INST_FETCH" }; + +static const char *mmu_idx_names[] = { "MMU_USER_IDX", "MMU_GUEST_IDX", + "MMU_KERNEL_IDX" }; + +static bool hexagon_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, bool probe, + uintptr_t retaddr) +{ + CPUHexagonState *env = cpu_env(cs); + static int slot = 0 /* This is always zero for now */; + hwaddr phys; + int prot = 0; + int page_size = 0; + int32_t excp = 0; + bool ret = 0; + + qemu_log_mask( + CPU_LOG_MMU, + "%s: tid = 0x%x, pc = 0x%08" PRIx32 ", vaddr = 0x%08" VADDR_PRIx + ", size = %d, %s,\tprobe = %d, %s\n", + __func__, env->threadId, env->gpr[HEX_REG_PC], address, size, + access_type_names[access_type], probe, mmu_idx_names[mmu_idx]); + ret = get_physical_address(env, &phys, &prot, &page_size, &excp, address, + access_type, mmu_idx); + if (ret) { + if (!excp) { + find_qemu_subpage(&address, &phys, page_size); + tlb_set_page(cs, address, phys, prot, mmu_idx, TARGET_PAGE_SIZE); + return ret; + } else { + raise_perm_exception(cs, address, slot, access_type, excp); + do_raise_exception(env, cs->exception_index, env->gpr[HEX_REG_PC], + retaddr); + } + } + if (probe) { + return false; + } + raise_tlbmiss_exception(cs, address, slot, access_type); + do_raise_exception(env, cs->exception_index, env->gpr[HEX_REG_PC], retaddr); +} + + +#include "hw/core/sysemu-cpu-ops.h" + +static const struct SysemuCPUOps hexagon_sysemu_ops = { + .get_phys_page_debug = hexagon_cpu_get_phys_page_debug, +}; + +static bool hexagon_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +{ + CPUHexagonState *env = cpu_env(cs); + if (interrupt_request & CPU_INTERRUPT_TLB_UNLOCK) { + cs->halted = false; + cpu_reset_interrupt(cs, CPU_INTERRUPT_TLB_UNLOCK); + return true; + } + if (interrupt_request & CPU_INTERRUPT_K0_UNLOCK) { + cs->halted = false; + cpu_reset_interrupt(cs, CPU_INTERRUPT_K0_UNLOCK); + return true; + } + if (interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_SWI)) { + return hex_check_interrupts(env); + } + return false; +} + +#endif + static const TCGCPUOps hexagon_tcg_ops = { .initialize = hexagon_translate_init, .translate_code = hexagon_translate_code, .synchronize_from_tb = hexagon_cpu_synchronize_from_tb, .restore_state_to_opc = hexagon_restore_state_to_opc, +#if !defined(CONFIG_USER_ONLY) + .cpu_exec_interrupt = hexagon_cpu_exec_interrupt, + .tlb_fill = hexagon_tlb_fill, + .cpu_exec_halt = hexagon_cpu_has_work, + .do_interrupt = hexagon_cpu_do_interrupt, +#endif /* !CONFIG_USER_ONLY */ }; +static int hexagon_cpu_mmu_index(CPUState *cs, bool ifetch) +{ +#ifndef CONFIG_USER_ONLY + BQL_LOCK_GUARD(); + CPUHexagonState *env = cpu_env(cs); + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + uint8_t mmuen = GET_SYSCFG_FIELD(SYSCFG_MMUEN, syscfg); + if (!mmuen) { + return MMU_KERNEL_IDX; + } + + int cpu_mode = get_cpu_mode(env); + if (cpu_mode == HEX_CPU_MODE_MONITOR) { + return MMU_KERNEL_IDX; + } else if (cpu_mode == HEX_CPU_MODE_GUEST) { + return MMU_GUEST_IDX; + } +#endif + + return MMU_USER_IDX; +} + + static void hexagon_cpu_class_init(ObjectClass *c, void *data) { HexagonCPUClass *mcc = HEXAGON_CPU_CLASS(c); @@ -347,6 +750,7 @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data) cc->class_by_name = hexagon_cpu_class_by_name; cc->has_work = hexagon_cpu_has_work; + cc->mmu_index = hexagon_cpu_mmu_index; cc->dump_state = hexagon_dump_state; cc->set_pc = hexagon_cpu_set_pc; cc->get_pc = hexagon_cpu_get_pc; @@ -355,9 +759,39 @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data) cc->gdb_stop_before_watchpoint = true; cc->gdb_core_xml_file = "hexagon-core.xml"; cc->disas_set_info = hexagon_cpu_disas_set_info; +#ifndef CONFIG_USER_ONLY + cc->sysemu_ops = &hexagon_sysemu_ops; + dc->vmsd = &vmstate_hexagon_cpu; +#endif +#ifdef CONFIG_TCG cc->tcg_ops = &hexagon_tcg_ops; +#endif } +#ifndef CONFIG_USER_ONLY +uint32_t hexagon_greg_read(CPUHexagonState *env, uint32_t reg) +{ + target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR); + int ssr_ce = GET_SSR_FIELD(SSR_CE, ssr); + + if (reg <= HEX_GREG_G3) { + return env->greg[reg]; + } + switch (reg) { + case HEX_GREG_GPCYCLELO: + return ssr_ce ? hexagon_get_sys_pcycle_count_low(env) : 0; + + case HEX_GREG_GPCYCLEHI: + return ssr_ce ? hexagon_get_sys_pcycle_count_high(env) : 0; + + default: + qemu_log_mask(LOG_UNIMP, "reading greg %" PRId32 + " not yet supported.\n", reg); + return 0; + } +} +#endif + #define DEFINE_CPU(type_name, initfn) \ { \ .name = type_name, \ diff --git a/target/hexagon/cpu.h b/target/hexagon/cpu.h index f78c8f9c2a00..70ed3d5ba7b9 100644 --- a/target/hexagon/cpu.h +++ b/target/hexagon/cpu.h @@ -20,14 +20,21 @@ #include "fpu/softfloat-types.h" +#define NUM_GREGS 32 +#define GREG_WRITES_MAX 2 +#define NUM_SREGS 64 +#define SREG_WRITES_MAX 2 + #include "cpu-qom.h" #include "exec/cpu-defs.h" +#include "exec/cpu-common.h" #include "hex_regs.h" #include "mmvec/mmvec.h" #include "hw/registerfields.h" #ifndef CONFIG_USER_ONLY -#error "Hexagon does not support system emulation" +#include "reg_fields.h" +typedef struct CPUHexagonTLBContext CPUHexagonTLBContext; #endif #define NUM_PREGS 4 @@ -38,10 +45,47 @@ #define REG_WRITES_MAX 32 #define PRED_WRITES_MAX 5 /* 4 insns + endloop */ #define VSTORES_MAX 2 +#define THREADS_MAX 8 +#define VECTOR_UNIT_MAX 8 -#define CPU_RESOLVING_TYPE TYPE_HEXAGON_CPU +#ifndef CONFIG_USER_ONLY +#define CPU_INTERRUPT_SWI CPU_INTERRUPT_TGT_INT_0 +#define CPU_INTERRUPT_K0_UNLOCK CPU_INTERRUPT_TGT_INT_1 +#define CPU_INTERRUPT_TLB_UNLOCK CPU_INTERRUPT_TGT_INT_2 + +#define HEX_CPU_MODE_USER 1 +#define HEX_CPU_MODE_GUEST 2 +#define HEX_CPU_MODE_MONITOR 3 + +#define HEX_EXE_MODE_OFF 1 +#define HEX_EXE_MODE_RUN 2 +#define HEX_EXE_MODE_WAIT 3 +#define HEX_EXE_MODE_DEBUG 4 +#endif -#define MMU_USER_IDX 0 +#define MMU_USER_IDX 0 +#ifndef CONFIG_USER_ONLY +#define MMU_GUEST_IDX 1 +#define MMU_KERNEL_IDX 2 + +#define HEXAGON_CPU_IRQ_0 0 +#define HEXAGON_CPU_IRQ_1 1 +#define HEXAGON_CPU_IRQ_2 2 +#define HEXAGON_CPU_IRQ_3 3 +#define HEXAGON_CPU_IRQ_4 4 +#define HEXAGON_CPU_IRQ_5 5 +#define HEXAGON_CPU_IRQ_6 6 +#define HEXAGON_CPU_IRQ_7 7 + +typedef enum { + HEX_LOCK_UNLOCKED = 0, + HEX_LOCK_WAITING = 1, + HEX_LOCK_OWNER = 2, + HEX_LOCK_QUEUED = 3 +} hex_lock_state_t; +#endif + +#define CPU_RESOLVING_TYPE TYPE_HEXAGON_CPU typedef struct { target_ulong va; @@ -75,12 +119,33 @@ typedef struct { typedef struct CPUArchState { target_ulong gpr[TOTAL_PER_THREAD_REGS]; target_ulong pred[NUM_PREGS]; + target_ulong cause_code; /* For comparing with LLDB on target - see adjust_stack_ptrs function */ target_ulong last_pc_dumped; target_ulong stack_start; uint8_t slot_cancelled; + uint64_t t_cycle_count; + uint64_t *g_pcycle_base; +#ifndef CONFIG_USER_ONLY + /* Some system registers are per thread and some are global. */ + target_ulong t_sreg[NUM_SREGS]; + target_ulong *g_sreg; + + target_ulong greg[NUM_GREGS]; + target_ulong wait_next_pc; + + /* This alias of CPUState.cpu_index is used by imported sources: */ + target_ulong threadId; + hex_lock_state_t tlb_lock_state; + hex_lock_state_t k0_lock_state; + target_ulong tlb_lock_count; + target_ulong k0_lock_count; + CPUHexagonTLBContext *hex_tlb; + GList *dir_list; +#endif + target_ulong next_PC; target_ulong new_value_usr; MemLog mem_log_stores[STORES_MAX]; @@ -123,19 +188,48 @@ struct ArchCPU { CPUHexagonState env; + uint32_t rev_reg; bool lldb_compat; target_ulong lldb_stack_adjust; bool short_circuit; +#ifndef CONFIG_USER_ONLY + uint32_t num_tlbs; + uint32_t l2vic_base_addr; + uint32_t qtimer_base_addr; + uint32_t hvx_contexts; + uint32_t boot_addr; + uint64_t config_table_addr; +#endif }; #include "cpu_bits.h" FIELD(TB_FLAGS, IS_TIGHT_LOOP, 0, 1) +FIELD(TB_FLAGS, MMU_INDEX, 1, 3) +FIELD(TB_FLAGS, PCYCLE_ENABLED, 4, 1) G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env, uint32_t exception, uintptr_t pc); +#ifndef CONFIG_USER_ONLY +/* + * @return true if the @a thread_env hardware thread is + * not stopped. + */ +bool hexagon_thread_is_enabled(CPUHexagonState *thread_env); +uint32_t hexagon_greg_read(CPUHexagonState *env, uint32_t reg); +uint32_t hexagon_sreg_read(CPUHexagonState *env, uint32_t reg); +void hexagon_gdb_sreg_write(CPUHexagonState *env, uint32_t reg, uint32_t val); +void hexagon_cpu_soft_reset(CPUHexagonState *env); +#endif + +#include "exec/cpu-all.h" + +#ifndef CONFIG_USER_ONLY +#include "cpu_helper.h" +#endif + static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc, uint64_t *cs_base, uint32_t *flags) { @@ -145,10 +239,27 @@ static inline void cpu_get_tb_cpu_state(CPUHexagonState *env, vaddr *pc, if (*pc == env->gpr[HEX_REG_SA0]) { hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP, 1); } - *flags = hex_flags; if (*pc & PCALIGN_MASK) { hexagon_raise_exception_err(env, HEX_CAUSE_PC_NOT_ALIGNED, 0); } +#ifndef CONFIG_USER_ONLY + target_ulong syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + + bool pcycle_enabled = extract32(syscfg, + reg_field_info[SYSCFG_PCYCLEEN].offset, + reg_field_info[SYSCFG_PCYCLEEN].width); + + hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX, + cpu_mmu_index(env_cpu(env), false)); + + if (pcycle_enabled) { + hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, PCYCLE_ENABLED, 1); + } +#else + hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, PCYCLE_ENABLED, true); + hex_flags = FIELD_DP32(hex_flags, TB_FLAGS, MMU_INDEX, MMU_USER_IDX); +#endif + *flags = hex_flags; } typedef HexagonCPU ArchCPU; @@ -156,7 +267,4 @@ typedef HexagonCPU ArchCPU; void hexagon_translate_init(void); void hexagon_translate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, vaddr pc, void *host_pc); - -#include "exec/cpu-all.h" - #endif /* HEXAGON_CPU_H */ diff --git a/target/hexagon/cpu_bits.h b/target/hexagon/cpu_bits.h index ff596e2a94c9..c7cc426ec888 100644 --- a/target/hexagon/cpu_bits.h +++ b/target/hexagon/cpu_bits.h @@ -24,19 +24,88 @@ #define PCALIGN_MASK (PCALIGN - 1) enum hex_event { - HEX_EVENT_NONE = -1, - HEX_EVENT_TRAP0 = 0x008, + HEX_EVENT_NONE = -1, + HEX_EVENT_RESET = 0x0, + HEX_EVENT_IMPRECISE = 0x1, + HEX_EVENT_PRECISE = 0x2, + HEX_EVENT_TLB_MISS_X = 0x4, + HEX_EVENT_TLB_MISS_RW = 0x6, + HEX_EVENT_TRAP0 = 0x8, + HEX_EVENT_TRAP1 = 0x9, + HEX_EVENT_FPTRAP = 0xb, + HEX_EVENT_DEBUG = 0xc, + HEX_EVENT_INT0 = 0x10, + HEX_EVENT_INT1 = 0x11, + HEX_EVENT_INT2 = 0x12, + HEX_EVENT_INT3 = 0x13, + HEX_EVENT_INT4 = 0x14, + HEX_EVENT_INT5 = 0x15, + HEX_EVENT_INT6 = 0x16, + HEX_EVENT_INT7 = 0x17, + HEX_EVENT_INT8 = 0x18, + HEX_EVENT_INT9 = 0x19, + HEX_EVENT_INTA = 0x1a, + HEX_EVENT_INTB = 0x1b, + HEX_EVENT_INTC = 0x1c, + HEX_EVENT_INTD = 0x1d, + HEX_EVENT_INTE = 0x1e, + HEX_EVENT_INTF = 0x1f, }; enum hex_cause { HEX_CAUSE_NONE = -1, - HEX_CAUSE_TRAP0 = 0x172, - HEX_CAUSE_FETCH_NO_UPAGE = 0x012, - HEX_CAUSE_INVALID_PACKET = 0x015, - HEX_CAUSE_INVALID_OPCODE = 0x015, - HEX_CAUSE_PC_NOT_ALIGNED = 0x01e, - HEX_CAUSE_PRIV_NO_UREAD = 0x024, - HEX_CAUSE_PRIV_NO_UWRITE = 0x025, + HEX_CAUSE_RESET = 0x000, + HEX_CAUSE_BIU_PRECISE = 0x001, + HEX_CAUSE_UNSUPORTED_HVX_64B = 0x002, /* QEMU-specific */ + HEX_CAUSE_DOUBLE_EXCEPT = 0x003, + HEX_CAUSE_TRAP0 = 0x008, + HEX_CAUSE_TRAP1 = 0x009, + HEX_CAUSE_FETCH_NO_XPAGE = 0x011, + HEX_CAUSE_FETCH_NO_UPAGE = 0x012, + HEX_CAUSE_INVALID_PACKET = 0x015, + HEX_CAUSE_INVALID_OPCODE = 0x015, + HEX_CAUSE_NO_COPROC_ENABLE = 0x016, + HEX_CAUSE_NO_COPROC2_ENABLE = 0x018, + HEX_CAUSE_PRIV_USER_NO_GINSN = 0x01a, + HEX_CAUSE_PRIV_USER_NO_SINSN = 0x01b, + HEX_CAUSE_REG_WRITE_CONFLICT = 0x01d, + HEX_CAUSE_PC_NOT_ALIGNED = 0x01e, + HEX_CAUSE_MISALIGNED_LOAD = 0x020, + HEX_CAUSE_MISALIGNED_STORE = 0x021, + HEX_CAUSE_PRIV_NO_READ = 0x022, + HEX_CAUSE_PRIV_NO_WRITE = 0x023, + HEX_CAUSE_PRIV_NO_UREAD = 0x024, + HEX_CAUSE_PRIV_NO_UWRITE = 0x025, + HEX_CAUSE_COPROC_LDST = 0x026, + HEX_CAUSE_STACK_LIMIT = 0x027, + HEX_CAUSE_VWCTRL_WINDOW_MISS = 0x029, + HEX_CAUSE_IMPRECISE_NMI = 0x043, + HEX_CAUSE_IMPRECISE_MULTI_TLB_MATCH = 0x044, + HEX_CAUSE_TLBMISSX_CAUSE_NORMAL = 0x060, + HEX_CAUSE_TLBMISSX_CAUSE_NEXTPAGE = 0x061, + HEX_CAUSE_TLBMISSRW_CAUSE_READ = 0x070, + HEX_CAUSE_TLBMISSRW_CAUSE_WRITE = 0x071, + HEX_CAUSE_DEBUG_SINGLESTEP = 0x80, + HEX_CAUSE_FPTRAP_CAUSE_BADFLOAT = 0x0bf, + HEX_CAUSE_INT0 = 0x0c0, + HEX_CAUSE_INT1 = 0x0c1, + HEX_CAUSE_INT2 = 0x0c2, + HEX_CAUSE_INT3 = 0x0c3, + HEX_CAUSE_INT4 = 0x0c4, + HEX_CAUSE_INT5 = 0x0c5, + HEX_CAUSE_INT6 = 0x0c6, + HEX_CAUSE_INT7 = 0x0c7, + HEX_CAUSE_VIC0 = 0x0c2, + HEX_CAUSE_VIC1 = 0x0c3, + HEX_CAUSE_VIC2 = 0x0c4, + HEX_CAUSE_VIC3 = 0x0c5, +}; + +enum data_cache_state { + HEX_DC_STATE_INVALID = 0x0, + HEX_DC_STATE_VALID = 0x1, + HEX_DC_STATE_RESERVED = 0x2, + HEX_DC_STATE_UNUSED_WT = 0x3, }; #define PACKET_WORDS_MAX 4 diff --git a/target/hexagon/cpu_helper.c b/target/hexagon/cpu_helper.c new file mode 100644 index 000000000000..5a651de0514f --- /dev/null +++ b/target/hexagon/cpu_helper.c @@ -0,0 +1,582 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "cpu_helper.h" +#include "system/cpus.h" +#ifdef CONFIG_USER_ONLY +#include "qemu.h" +#include "exec/helper-proto.h" +#else +#include "hw/boards.h" +#include "hw/hexagon/hexagon.h" +#include "hex_interrupts.h" +#include "hex_mmu.h" +#endif +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "qemu/log.h" +#include "tcg/tcg-op.h" +#include "internal.h" +#include "macros.h" +#include "sys_macros.h" +#include "arch.h" + + +#ifndef CONFIG_USER_ONLY + +static bool hexagon_read_memory_small(CPUHexagonState *env, target_ulong addr, + int byte_count, unsigned char *dstbuf, + int mmu_idx, uintptr_t retaddr) + { + /* handle small sizes */ + switch (byte_count) { + case 1: + *dstbuf = cpu_ldub_mmuidx_ra(env, addr, mmu_idx, retaddr); + return true; + + case 2: + if (QEMU_IS_ALIGNED(addr, 2)) { + *(unsigned short *)dstbuf = + cpu_lduw_mmuidx_ra(env, addr, mmu_idx, retaddr); + return true; + } + break; + + case 4: + if (QEMU_IS_ALIGNED(addr, 4)) { + *(uint32_t *)dstbuf = + cpu_ldl_mmuidx_ra(env, addr, mmu_idx, retaddr); + return true; + } + break; + + case 8: + if (QEMU_IS_ALIGNED(addr, 8)) { + *(uint64_t *)dstbuf = + cpu_ldq_mmuidx_ra(env, addr, mmu_idx, retaddr); + return true; + } + break; + + default: + /* larger request, handle elsewhere */ + return false; + } + + /* not aligned, copy bytes */ + for (int i = 0; i < byte_count; ++i) { + *dstbuf++ = cpu_ldub_mmuidx_ra(env, addr++, mmu_idx, retaddr); + } + return true; +} + +void hexagon_read_memory(CPUHexagonState *env, target_ulong vaddr, int size, + void *retptr, uintptr_t retaddr) +{ + BQL_LOCK_GUARD(); + CPUState *cs = env_cpu(env); + unsigned mmu_idx = cpu_mmu_index(cs, false); + if (!hexagon_read_memory_small(env, vaddr, size, retptr, mmu_idx, retaddr)) { + cpu_abort(cs, "%s: ERROR: bad size = %d!\n", __func__, size); + } +} + +static bool hexagon_write_memory_small(CPUHexagonState *env, target_ulong addr, + int byte_count, unsigned char *srcbuf, + int mmu_idx, uintptr_t retaddr) +{ + /* handle small sizes */ + switch (byte_count) { + case 1: + cpu_stb_mmuidx_ra(env, addr, *srcbuf, mmu_idx, retaddr); + return true; + + case 2: + if (QEMU_IS_ALIGNED(addr, 2)) { + cpu_stw_mmuidx_ra(env, addr, *(uint16_t *)srcbuf, mmu_idx, retaddr); + return true; + } + break; + + case 4: + if (QEMU_IS_ALIGNED(addr, 4)) { + cpu_stl_mmuidx_ra(env, addr, *(uint32_t *)srcbuf, mmu_idx, retaddr); + return true; + } + break; + + case 8: + if (QEMU_IS_ALIGNED(addr, 8)) { + cpu_stq_mmuidx_ra(env, addr, *(uint64_t *)srcbuf, mmu_idx, retaddr); + return true; + } + break; + + default: + /* larger request, handle elsewhere */ + return false; + } + + /* not aligned, copy bytes */ + for (int i = 0; i < byte_count; ++i) { + cpu_stb_mmuidx_ra(env, addr++, *srcbuf++, mmu_idx, retaddr); + } + + return true; +} + +void hexagon_write_memory(CPUHexagonState *env, target_ulong vaddr, + int size, uint64_t data, uintptr_t retaddr) +{ + CPUState *cs = env_cpu(env); + unsigned mmu_idx = cpu_mmu_index(cs, false); + if (!hexagon_write_memory_small(env, vaddr, size, (unsigned char *)&data, + mmu_idx, retaddr)) { + cpu_abort(cs, "%s: ERROR: bad size = %d!\n", __func__, size); + } +} + +static inline uint32_t page_start(uint32_t addr) +{ + uint32_t page_align = ~(TARGET_PAGE_SIZE - 1); + return addr & page_align; +} + +void hexagon_touch_memory(CPUHexagonState *env, uint32_t start_addr, + uint32_t length, uintptr_t retaddr) +{ + unsigned int warm; + uint32_t first = page_start(start_addr); + uint32_t last = page_start(start_addr + length - 1); + for (uint32_t page = first; page <= last; page += TARGET_PAGE_SIZE) { + hexagon_read_memory(env, page, 1, &warm, retaddr); + } +} + +uint32_t hexagon_get_pmu_counter(CPUHexagonState *cur_env, int index) +{ + g_assert_not_reached(); +} + +uint32_t arch_get_system_reg(CPUHexagonState *env, uint32_t reg) +{ + if (reg == HEX_SREG_PCYCLELO) { + return hexagon_get_sys_pcycle_count_low(env); + } else if (reg == HEX_SREG_PCYCLEHI) { + return hexagon_get_sys_pcycle_count_high(env); + } + + g_assert(reg < NUM_SREGS); + return reg < HEX_SREG_GLB_START ? env->t_sreg[reg] : env->g_sreg[reg]; +} + +#endif + +uint64_t hexagon_get_sys_pcycle_count(CPUHexagonState *env) +{ + uint64_t cycles = 0; + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *env_ = cpu_env(cs); + cycles += env_->t_cycle_count; + } + return *(env->g_pcycle_base) + cycles; +} + +uint32_t hexagon_get_sys_pcycle_count_high(CPUHexagonState *env) +{ + return hexagon_get_sys_pcycle_count(env) >> 32; +} + +uint32_t hexagon_get_sys_pcycle_count_low(CPUHexagonState *env) +{ + return extract64(hexagon_get_sys_pcycle_count(env), 0, 32); +} + +void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env, + uint32_t cycles_hi) +{ + uint64_t cur_cycles = hexagon_get_sys_pcycle_count(env); + uint64_t cycles = + ((uint64_t)cycles_hi << 32) | extract64(cur_cycles, 0, 32); + hexagon_set_sys_pcycle_count(env, cycles); +} + +void hexagon_set_sys_pcycle_count_low(CPUHexagonState *env, + uint32_t cycles_lo) +{ + uint64_t cur_cycles = hexagon_get_sys_pcycle_count(env); + uint64_t cycles = extract64(cur_cycles, 32, 32) | cycles_lo; + hexagon_set_sys_pcycle_count(env, cycles); +} + +void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t cycles) +{ + *(env->g_pcycle_base) = cycles; + + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *env_ = cpu_env(cs); + env_->t_cycle_count = 0; + } +} + +#ifndef CONFIG_USER_ONLY + +static void set_wait_mode(CPUHexagonState *env) +{ + g_assert(bql_locked()); + + const uint32_t modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + uint32_t thread_wait_mask = GET_FIELD(MODECTL_W, modectl); + thread_wait_mask |= 0x1 << env->threadId; + SET_SYSTEM_FIELD(env, HEX_SREG_MODECTL, MODECTL_W, thread_wait_mask); +} + +void hexagon_wait_thread(CPUHexagonState *env, target_ulong PC) +{ + g_assert(bql_locked()); + + if (qemu_loglevel_mask(LOG_GUEST_ERROR) && + (env->k0_lock_state != HEX_LOCK_UNLOCKED || + env->tlb_lock_state != HEX_LOCK_UNLOCKED)) { + qemu_log("WARNING: executing wait() with acquired lock" + "may lead to deadlock\n"); + } + g_assert(get_exe_mode(env) != HEX_EXE_MODE_WAIT); + + CPUState *cs = env_cpu(env); + /* + * The addtion of cpu_has_work is borrowed from arm's wfi helper + * and is critical for our stability + */ + if ((cs->exception_index != HEX_EVENT_NONE) || + (cpu_has_work(cs))) { + qemu_log_mask(CPU_LOG_INT, + "%s: thread %d skipping WAIT mode, have some work\n", + __func__, env->threadId); + return; + } + set_wait_mode(env); + env->wait_next_pc = PC + 4; + + cpu_interrupt(cs, CPU_INTERRUPT_HALT); +} + +static void hexagon_resume_thread(CPUHexagonState *env) +{ + CPUState *cs = env_cpu(env); + clear_wait_mode(env); + /* + * The wait instruction keeps the PC pointing to itself + * so that it has an opportunity to check for interrupts. + * + * When we come out of wait mode, adjust the PC to the + * next executable instruction. + */ + env->gpr[HEX_REG_PC] = env->wait_next_pc; + cs = env_cpu(env); + ASSERT_DIRECT_TO_GUEST_UNSET(env, cs->exception_index); + cs->halted = false; + cs->exception_index = HEX_EVENT_NONE; + qemu_cpu_kick(cs); +} + +void hexagon_resume_threads(CPUHexagonState *current_env, uint32_t mask) +{ + CPUState *cs; + CPUHexagonState *env; + + g_assert(bql_locked()); + CPU_FOREACH(cs) { + env = cpu_env(cs); + g_assert(env->threadId < THREADS_MAX); + if ((mask & (0x1 << env->threadId))) { + if (get_exe_mode(env) == HEX_EXE_MODE_WAIT) { + hexagon_resume_thread(env); + } + } + } +} + + +static MMVector VRegs[VECTOR_UNIT_MAX][NUM_VREGS]; +static MMQReg QRegs[VECTOR_UNIT_MAX][NUM_QREGS]; + +/* + * EXT_CONTEXTS + * SSR.XA 2 4 6 8 + * 000 HVX Context 0 HVX Context 0 HVX Context 0 HVX Context 0 + * 001 HVX Context 1 HVX Context 1 HVX Context 1 HVX Context 1 + * 010 HVX Context 0 HVX Context 2 HVX Context 2 HVX Context 2 + * 011 HVX Context 1 HVX Context 3 HVX Context 3 HVX Context 3 + * 100 HVX Context 0 HVX Context 0 HVX Context 4 HVX Context 4 + * 101 HVX Context 1 HVX Context 1 HVX Context 5 HVX Context 5 + * 110 HVX Context 0 HVX Context 2 HVX Context 2 HVX Context 6 + * 111 HVX Context 1 HVX Context 3 HVX Context 3 HVX Context 7 + */ +static int parse_context_idx(CPUHexagonState *env, uint8_t XA) +{ + int ret; + HexagonCPU *cpu = env_archcpu(env); + if (cpu->hvx_contexts == 6 && XA >= 6) { + ret = XA - 6 + 2; + } else { + ret = XA % cpu->hvx_contexts; + } + g_assert(ret >= 0 && ret < VECTOR_UNIT_MAX); + return ret; +} + +static void check_overcommitted_hvx(CPUHexagonState *env, uint32_t ssr) +{ + if (!GET_FIELD(SSR_XE, ssr)) { + return; + } + + uint8_t XA = GET_SSR_FIELD(SSR_XA, ssr); + + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *env_ = cpu_env(cs); + if (env_ == env) { + continue; + } + /* Check if another thread has the XE bit set and same XA */ + uint32_t ssr_ = arch_get_system_reg(env_, HEX_SREG_SSR); + if (GET_SSR_FIELD(SSR_XE2, ssr_) && GET_FIELD(SSR_XA, ssr_) == XA) { + qemu_log_mask(LOG_GUEST_ERROR, + "setting SSR.XA '%d' on thread %d but thread" + " %d has same extension active\n", XA, env->threadId, + env_->threadId); + } + } +} + +void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t old) +{ + g_assert(bql_locked()); + + bool old_EX = GET_SSR_FIELD(SSR_EX, old); + bool old_UM = GET_SSR_FIELD(SSR_UM, old); + bool old_GM = GET_SSR_FIELD(SSR_GM, old); + bool old_IE = GET_SSR_FIELD(SSR_IE, old); + uint8_t old_XA = GET_SSR_FIELD(SSR_XA, old); + bool new_EX = GET_SSR_FIELD(SSR_EX, new); + bool new_UM = GET_SSR_FIELD(SSR_UM, new); + bool new_GM = GET_SSR_FIELD(SSR_GM, new); + bool new_IE = GET_SSR_FIELD(SSR_IE, new); + uint8_t new_XA = GET_SSR_FIELD(SSR_XA, new); + + if ((old_EX != new_EX) || + (old_UM != new_UM) || + (old_GM != new_GM)) { + hex_mmu_mode_change(env); + } + + uint8_t old_asid = GET_SSR_FIELD(SSR_ASID, old); + uint8_t new_asid = GET_SSR_FIELD(SSR_ASID, new); + if (new_asid != old_asid) { + CPUState *cs = env_cpu(env); + tlb_flush(cs); + } + + if (old_XA != new_XA) { + int old_unit = parse_context_idx(env, old_XA); + int new_unit = parse_context_idx(env, new_XA); + + /* Ownership exchange */ + memcpy(VRegs[old_unit], env->VRegs, sizeof(env->VRegs)); + memcpy(QRegs[old_unit], env->QRegs, sizeof(env->QRegs)); + memcpy(env->VRegs, VRegs[new_unit], sizeof(env->VRegs)); + memcpy(env->QRegs, QRegs[new_unit], sizeof(env->QRegs)); + + check_overcommitted_hvx(env, new); + } + + /* See if the interrupts have been enabled or we have exited EX mode */ + if ((new_IE && !old_IE) || + (!new_EX && old_EX)) { + hex_interrupt_update(env); + } +} + +void clear_wait_mode(CPUHexagonState *env) +{ + g_assert(bql_locked()); + + const uint32_t modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + uint32_t thread_wait_mask = GET_FIELD(MODECTL_W, modectl); + thread_wait_mask &= ~(0x1 << env->threadId); + SET_SYSTEM_FIELD(env, HEX_SREG_MODECTL, MODECTL_W, thread_wait_mask); +} + +void hexagon_ssr_set_cause(CPUHexagonState *env, uint32_t cause) +{ + g_assert(bql_locked()); + + const uint32_t old = arch_get_system_reg(env, HEX_SREG_SSR); + SET_SYSTEM_FIELD(env, HEX_SREG_SSR, SSR_EX, 1); + SET_SYSTEM_FIELD(env, HEX_SREG_SSR, SSR_CAUSE, cause); + const uint32_t new = arch_get_system_reg(env, HEX_SREG_SSR); + + hexagon_modify_ssr(env, new, old); +} + + +int get_exe_mode(CPUHexagonState *env) +{ + g_assert(bql_locked()); + + target_ulong modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + uint32_t thread_enabled_mask = GET_FIELD(MODECTL_E, modectl); + bool E_bit = thread_enabled_mask & (0x1 << env->threadId); + uint32_t thread_wait_mask = GET_FIELD(MODECTL_W, modectl); + bool W_bit = thread_wait_mask & (0x1 << env->threadId); + target_ulong isdbst = arch_get_system_reg(env, HEX_SREG_ISDBST); + uint32_t debugmode = GET_FIELD(ISDBST_DEBUGMODE, isdbst); + bool D_bit = debugmode & (0x1 << env->threadId); + + /* Figure 4-2 */ + if (!D_bit && !W_bit && !E_bit) { + return HEX_EXE_MODE_OFF; + } + if (!D_bit && !W_bit && E_bit) { + return HEX_EXE_MODE_RUN; + } + if (!D_bit && W_bit && E_bit) { + return HEX_EXE_MODE_WAIT; + } + if (D_bit && !W_bit && E_bit) { + return HEX_EXE_MODE_DEBUG; + } + g_assert_not_reached(); +} + +static void set_enable_mask(CPUHexagonState *env) +{ + g_assert(bql_locked()); + + const uint32_t modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + uint32_t thread_enabled_mask = GET_FIELD(MODECTL_E, modectl); + thread_enabled_mask |= 0x1 << env->threadId; + SET_SYSTEM_FIELD(env, HEX_SREG_MODECTL, MODECTL_E, thread_enabled_mask); +} + +static uint32_t clear_enable_mask(CPUHexagonState *env) +{ + g_assert(bql_locked()); + + const uint32_t modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + uint32_t thread_enabled_mask = GET_FIELD(MODECTL_E, modectl); + thread_enabled_mask &= ~(0x1 << env->threadId); + SET_SYSTEM_FIELD(env, HEX_SREG_MODECTL, MODECTL_E, thread_enabled_mask); + return thread_enabled_mask; +} +static void do_start_thread(CPUState *cs, run_on_cpu_data tbd) +{ + BQL_LOCK_GUARD(); + + CPUHexagonState *env = cpu_env(cs); + + hexagon_cpu_soft_reset(env); + + set_enable_mask(env); + + cs->halted = 0; + cs->exception_index = HEX_EVENT_NONE; + cpu_resume(cs); +} + +void hexagon_start_threads(CPUHexagonState *current_env, uint32_t mask) +{ + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *env = cpu_env(cs); + if (!(mask & (0x1 << env->threadId))) { + continue; + } + + if (current_env->threadId != env->threadId) { + async_safe_run_on_cpu(cs, do_start_thread, RUN_ON_CPU_NULL); + } + } +} + +/* + * When we have all threads stopped, the return + * value to the shell is register 2 from thread 0. + */ +static target_ulong get_thread0_r2(void) +{ + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *thread = cpu_env(cs); + if (thread->threadId == 0) { + return thread->gpr[2]; + } + } + g_assert_not_reached(); +} + +void hexagon_stop_thread(CPUHexagonState *env) + +{ + BQL_LOCK_GUARD(); + + uint32_t thread_enabled_mask = clear_enable_mask(env); + CPUState *cs = env_cpu(env); + cpu_interrupt(cs, CPU_INTERRUPT_HALT); + if (!thread_enabled_mask) { + /* All threads are stopped, exit */ + exit(get_thread0_r2()); + } +} + +static int sys_in_monitor_mode_ssr(uint32_t ssr) +{ + if ((GET_SSR_FIELD(SSR_EX, ssr) != 0) || + ((GET_SSR_FIELD(SSR_EX, ssr) == 0) && (GET_SSR_FIELD(SSR_UM, ssr) == 0))) + return 1; + return 0; +} + +static int sys_in_guest_mode_ssr(uint32_t ssr) +{ + if ((GET_SSR_FIELD(SSR_EX, ssr) == 0) && + (GET_SSR_FIELD(SSR_UM, ssr) != 0) && + (GET_SSR_FIELD(SSR_GM, ssr) != 0)) + return 1; + return 0; +} + +static int sys_in_user_mode_ssr(uint32_t ssr) +{ + if ((GET_SSR_FIELD(SSR_EX, ssr) == 0) && + (GET_SSR_FIELD(SSR_UM, ssr) != 0) && + (GET_SSR_FIELD(SSR_GM, ssr) == 0)) + return 1; + return 0; +} + +int get_cpu_mode(CPUHexagonState *env) + +{ + uint32_t ssr = arch_get_system_reg(env, HEX_SREG_SSR); + + if (sys_in_monitor_mode_ssr(ssr)) { + return HEX_CPU_MODE_MONITOR; + } else if (sys_in_guest_mode_ssr(ssr)) { + return HEX_CPU_MODE_GUEST; + } else if (sys_in_user_mode_ssr(ssr)) { + return HEX_CPU_MODE_USER; + } + return HEX_CPU_MODE_MONITOR; +} + +#endif diff --git a/target/hexagon/cpu_helper.h b/target/hexagon/cpu_helper.h new file mode 100644 index 000000000000..f86f5e744fd4 --- /dev/null +++ b/target/hexagon/cpu_helper.h @@ -0,0 +1,62 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HEXAGON_CPU_HELPER_H +#define HEXAGON_CPU_HELPER_H + +void hexagon_read_memory(CPUHexagonState *env, target_ulong vaddr, int size, + void *retptr, uintptr_t retaddr); +void hexagon_write_memory(CPUHexagonState *env, target_ulong vaddr, + int size, uint64_t data, uintptr_t retaddr); +void hexagon_touch_memory(CPUHexagonState *env, uint32_t start_addr, + uint32_t length, uintptr_t retaddr); +uint32_t hexagon_get_pmu_counter(CPUHexagonState *cur_env, int index); +uint64_t hexagon_get_sys_pcycle_count(CPUHexagonState *env); +uint32_t hexagon_get_sys_pcycle_count_low(CPUHexagonState *env); +uint32_t hexagon_get_sys_pcycle_count_high(CPUHexagonState *env); +void hexagon_set_sys_pcycle_count(CPUHexagonState *env, uint64_t); +void hexagon_set_sys_pcycle_count_low(CPUHexagonState *env, uint32_t); +void hexagon_set_sys_pcycle_count_high(CPUHexagonState *env, uint32_t); +void hexagon_modify_ssr(CPUHexagonState *env, uint32_t new, uint32_t old); +int get_cpu_mode(CPUHexagonState *env); +int get_exe_mode(CPUHexagonState *env); +void clear_wait_mode(CPUHexagonState *env); +void hexagon_ssr_set_cause(CPUHexagonState *env, uint32_t cause); +void hexagon_start_threads(CPUHexagonState *env, uint32_t mask); +void hexagon_stop_thread(CPUHexagonState *env); +void hexagon_wait_thread(CPUHexagonState *env, target_ulong PC); +void hexagon_resume_threads(CPUHexagonState *env, uint32_t mask); + +static inline void arch_set_thread_reg(CPUHexagonState *env, uint32_t reg, + uint32_t val) +{ + g_assert(reg < TOTAL_PER_THREAD_REGS); + env->gpr[reg] = val; +} + +static inline uint32_t arch_get_thread_reg(CPUHexagonState *env, uint32_t reg) +{ + g_assert(reg < TOTAL_PER_THREAD_REGS); + return env->gpr[reg]; +} + +#ifndef CONFIG_USER_ONLY +static inline void arch_set_system_reg(CPUHexagonState *env, uint32_t reg, + uint32_t val) +{ + g_assert(reg < NUM_SREGS); + if (reg < HEX_SREG_GLB_START) { + env->t_sreg[reg] = val; + } else { + env->g_sreg[reg] = val; + } +} +#endif + +uint32_t arch_get_system_reg(CPUHexagonState *env, uint32_t reg); + +#endif + diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c index 23deba2426f8..41bf03c9b513 100644 --- a/target/hexagon/decode.c +++ b/target/hexagon/decode.c @@ -193,6 +193,8 @@ static bool decode_opcode_can_jump(int opcode) if ((GET_ATTRIB(opcode, A_JUMP)) || (GET_ATTRIB(opcode, A_CALL)) || (opcode == J2_trap0) || + (opcode == J2_trap1) || + (opcode == J2_rte) || (opcode == J2_pause)) { /* Exception to A_JUMP attribute */ if (opcode == J4_hintjumpr) { @@ -236,9 +238,9 @@ static void decode_set_insn_attr_fields(Packet *pkt) if (GET_ATTRIB(opcode, A_SCALAR_STORE) && !GET_ATTRIB(opcode, A_MEMSIZE_0B)) { if (pkt->insn[i].slot == 0) { - pkt->pkt_has_store_s0 = true; + pkt->pkt_has_scalar_store_s0 = true; } else { - pkt->pkt_has_store_s1 = true; + pkt->pkt_has_scalar_store_s1 = true; } } } @@ -371,6 +373,18 @@ static void decode_shuffle_for_execution(Packet *packet) break; } } + /* + * And at the very very very end, move any RTE's, since they update + * user/supervisor mode. + */ +#if !defined(CONFIG_USER_ONLY) + for (i = 0; i < last_insn; i++) { + if (packet->insn[i].opcode == J2_rte) { + decode_send_insn_to(packet, i, last_insn); + break; + } + } +#endif } static void diff --git a/target/hexagon/gdbstub.c b/target/hexagon/gdbstub.c index 12d6b3bbcbb1..8476199b753e 100644 --- a/target/hexagon/gdbstub.c +++ b/target/hexagon/gdbstub.c @@ -76,6 +76,51 @@ int hexagon_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) g_assert_not_reached(); } +#ifndef CONFIG_USER_ONLY +int hexagon_sys_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) +{ + CPUHexagonState *env = cpu_env(cs); + + if (n < NUM_SREGS) { + return gdb_get_regl(mem_buf, hexagon_sreg_read(env, n)); + } + n -= NUM_SREGS; + + if (n < NUM_GREGS) { + return gdb_get_regl(mem_buf, hexagon_greg_read(env, n)); + } + n -= NUM_GREGS; + + n -= TOTAL_PER_THREAD_REGS; + + if (n < NUM_PREGS) { + env->pred[n] = ldtul_p(mem_buf) & 0xff; + return sizeof(uint8_t); + } + + n -= NUM_PREGS; + + g_assert_not_reached(); +} + +int hexagon_sys_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) +{ + CPUHexagonState *env = cpu_env(cs); + + if (n < NUM_SREGS) { + hexagon_gdb_sreg_write(env, n, ldtul_p(mem_buf)); + return sizeof(target_ulong); + } + n -= NUM_SREGS; + + if (n < NUM_GREGS) { + return env->greg[n] = ldtul_p(mem_buf); + } + n -= NUM_GREGS; + + g_assert_not_reached(); +} +#endif static int gdb_get_vreg(CPUHexagonState *env, GByteArray *mem_buf, int n) { int total = 0; diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py index 3ac7cc2cfe57..dfdf5f3b87ba 100755 --- a/target/hexagon/gen_analyze_funcs.py +++ b/target/hexagon/gen_analyze_funcs.py @@ -22,6 +22,8 @@ import string import hex_common +def has_analyze_func(reg, mode): + return callable(getattr(reg, f"analyze_{mode}", None)) ## ## Generate the code to analyze the instruction @@ -42,6 +44,14 @@ def gen_analyze_func(f, tag, regs, imms): f.write(f"static void analyze_{tag}(DisasContext *ctx)\n") f.write("{\n") + if hex_common.tag_ignore(tag): + f.write("}\n\n") + return + + if ("A_PRIV" in hex_common.attribdict[tag] or + "A_GUEST" in hex_common.attribdict[tag]): + f.write("#ifndef CONFIG_USER_ONLY\n") + f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n") if (hex_common.is_hvx_insn(tag)): if hex_common.has_hvx_helper(tag): @@ -58,22 +68,27 @@ def gen_analyze_func(f, tag, regs, imms): for regno, register in enumerate(regs): reg_type, reg_id = register reg = hex_common.get_register(tag, reg_type, reg_id) - reg.decl_reg_num(f, regno) + if has_analyze_func(reg, "read") or has_analyze_func(reg, "write"): + reg.decl_reg_num(f, regno) ## Analyze the register reads for regno, register in enumerate(regs): reg_type, reg_id = register reg = hex_common.get_register(tag, reg_type, reg_id) - if reg.is_read(): + if reg.is_read() and has_analyze_func(reg, "read"): reg.analyze_read(f, regno) ## Analyze the register writes for regno, register in enumerate(regs): reg_type, reg_id = register reg = hex_common.get_register(tag, reg_type, reg_id) - if reg.is_written(): + if reg.is_written() and has_analyze_func(reg, "write"): reg.analyze_write(f, tag, regno) + if ("A_PRIV" in hex_common.attribdict[tag] or + "A_GUEST" in hex_common.attribdict[tag]): + f.write("#endif /* !CONFIG_USER_ONLY */\n") + f.write("}\n\n") diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py index c1f806ac4b25..32e3bac74625 100755 --- a/target/hexagon/gen_helper_funcs.py +++ b/target/hexagon/gen_helper_funcs.py @@ -69,7 +69,7 @@ def gen_helper_function(f, tag, tagregs, tagimms): if hex_common.need_slot(tag): if "A_LOAD" in hex_common.attribdict[tag]: f.write(hex_common.code_fmt(f"""\ - bool pkt_has_store_s1 = slotval & 0x1; + bool pkt_has_scalar_store_s1 = slotval & 0x1; """)) f.write(hex_common.code_fmt(f"""\ uint32_t slot = slotval >> 1; @@ -109,26 +109,23 @@ def main(): tagimms = hex_common.get_tagimms() with open(args.out, "w") as f: - for tag in hex_common.tags: - ## Skip the priv instructions - if "A_PRIV" in hex_common.attribdict[tag]: + for tag in hex_common.get_user_tags(): + if hex_common.tag_ignore(tag): continue - ## Skip the guest instructions - if "A_GUEST" in hex_common.attribdict[tag]: - continue - ## Skip the diag instructions - if tag == "Y6_diag": - continue - if tag == "Y6_diag0": + if hex_common.skip_qemu_helper(tag): continue - if tag == "Y6_diag1": + if hex_common.is_idef_parser_enabled(tag): continue + gen_helper_function(f, tag, tagregs, tagimms) + + f.write("#if !defined(CONFIG_USER_ONLY)\n") + for tag in hex_common.get_sys_tags(): if hex_common.skip_qemu_helper(tag): continue if hex_common.is_idef_parser_enabled(tag): continue - gen_helper_function(f, tag, tagregs, tagimms) + f.write("#endif\n") if __name__ == "__main__": diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py index 77f8e0a6a322..59c8bdd05c0f 100755 --- a/target/hexagon/gen_helper_protos.py +++ b/target/hexagon/gen_helper_protos.py @@ -59,27 +59,28 @@ def main(): tagimms = hex_common.get_tagimms() with open(args.out, "w") as f: - for tag in hex_common.tags: - ## Skip the priv instructions - if "A_PRIV" in hex_common.attribdict[tag]: + for tag in hex_common.get_user_tags(): + if hex_common.tag_ignore(tag): continue - ## Skip the guest instructions - if "A_GUEST" in hex_common.attribdict[tag]: - continue - ## Skip the diag instructions - if tag == "Y6_diag": - continue - if tag == "Y6_diag0": + + if hex_common.skip_qemu_helper(tag): continue - if tag == "Y6_diag1": + if hex_common.is_idef_parser_enabled(tag): continue + gen_helper_prototype(f, tag, tagregs, tagimms) + + f.write("#if !defined(CONFIG_USER_ONLY)\n") + for tag in hex_common.get_sys_tags(): + if hex_common.tag_ignore(tag): + continue if hex_common.skip_qemu_helper(tag): continue if hex_common.is_idef_parser_enabled(tag): continue gen_helper_prototype(f, tag, tagregs, tagimms) + f.write("#endif\n") if __name__ == "__main__": diff --git a/target/hexagon/gen_idef_parser_funcs.py b/target/hexagon/gen_idef_parser_funcs.py index 2f6e826f76d6..32bce9b00286 100644 --- a/target/hexagon/gen_idef_parser_funcs.py +++ b/target/hexagon/gen_idef_parser_funcs.py @@ -60,6 +60,8 @@ def main(): f.write('#include "macros.h.inc"\n\n') for tag in hex_common.tags: + if hex_common.tag_ignore(tag): + continue ## Skip the priv instructions if "A_PRIV" in hex_common.attribdict[tag]: continue diff --git a/target/hexagon/gen_op_attribs.py b/target/hexagon/gen_op_attribs.py index bbbb02df3a23..94dd1f876b21 100755 --- a/target/hexagon/gen_op_attribs.py +++ b/target/hexagon/gen_op_attribs.py @@ -38,7 +38,7 @@ def main(): ## Generate all the attributes associated with each instruction ## with open(args.out, "w") as f: - for tag in hex_common.tags: + for tag in hex_common.get_all_tags(): f.write( f"OP_ATTRIB({tag},ATTRIBS(" f'{",".join(sorted(hex_common.attribdict[tag]))}))\n' diff --git a/target/hexagon/gen_opcodes_def.py b/target/hexagon/gen_opcodes_def.py index 94a19ff412e2..17ba3f9db95e 100755 --- a/target/hexagon/gen_opcodes_def.py +++ b/target/hexagon/gen_opcodes_def.py @@ -37,7 +37,10 @@ def main(): ## Generate a list of all the opcodes ## with open(args.out, "w") as f: - for tag in hex_common.tags: + for tag in hex_common.get_user_tags(): + f.write(f"OPCODE({tag}),\n") + + for tag in hex_common.get_sys_tags(): f.write(f"OPCODE({tag}),\n") diff --git a/target/hexagon/gen_semantics.c b/target/hexagon/gen_semantics.c index 4a2bdd70e9cc..ed66ae4ec241 100644 --- a/target/hexagon/gen_semantics.c +++ b/target/hexagon/gen_semantics.c @@ -106,7 +106,7 @@ int main(int argc, char *argv[]) /* * Process the macros for HVX */ -#define DEF_MACRO(MNAME, BEH, ATTRS) \ +#define DEF_MACRO(MNAME, PARAMS, SDESC, LDESC, BEH, ATTRS) \ fprintf(outfile, "MACROATTRIB( \\\n" \ " \"%s\", \\\n" \ " \"\"\"%s\"\"\", \\\n" \ diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 8a3b801287c7..146aadc73764 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -488,6 +488,7 @@ /* dczeroa clears the 32 byte cache line at the address given */ #define fGEN_TCG_Y2_dczeroa(SHORTCODE) SHORTCODE +#define fGEN_TCG_Y2_dczeroa_nt(SHORTCODE) SHORTCODE /* In linux-user mode, these are not modelled, suppress compiler warning */ #define fGEN_TCG_Y2_dcinva(SHORTCODE) \ @@ -1133,6 +1134,9 @@ RdV, tcg_constant_tl(0)); \ } while (0) +#define fGEN_TCG_Y2_break(SHORTCODE) +#define fGEN_TCG_J2_unpause(SHORTCODE) + #define fGEN_TCG_J2_pause(SHORTCODE) \ do { \ uiV = uiV; \ @@ -1342,6 +1346,11 @@ RsV = RsV; \ uiV = uiV; \ } while (0) +#define fGEN_TCG_Y2_dcfetchbo_nt(SHORTCODE) \ + do { \ + RsV = RsV; \ + uiV = uiV; \ + } while (0) #define fGEN_TCG_L2_loadw_aq(SHORTCODE) SHORTCODE #define fGEN_TCG_L4_loadd_aq(SHORTCODE) SHORTCODE @@ -1361,13 +1370,6 @@ #define fGEN_TCG_S2_storew_rl_st_vi(SHORTCODE) SHORTCODE #define fGEN_TCG_S4_stored_rl_st_vi(SHORTCODE) SHORTCODE -#define fGEN_TCG_J2_trap0(SHORTCODE) \ - do { \ - uiV = uiV; \ - tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->pkt->pc); \ - TCGv excp = tcg_constant_tl(HEX_EVENT_TRAP0); \ - gen_helper_raise_exception(tcg_env, excp); \ - } while (0) #endif #define fGEN_TCG_A2_nop(SHORTCODE) do { } while (0) diff --git a/target/hexagon/gen_tcg_func_table.py b/target/hexagon/gen_tcg_func_table.py index 299a39b1aa02..70c8db5c44c8 100755 --- a/target/hexagon/gen_tcg_func_table.py +++ b/target/hexagon/gen_tcg_func_table.py @@ -41,19 +41,9 @@ def main(): f.write("#define HEXAGON_FUNC_TABLE_H\n\n") f.write("const SemanticInsn opcode_genptr[XX_LAST_OPCODE] = {\n") + for tag in hex_common.tags: - ## Skip the priv instructions - if "A_PRIV" in hex_common.attribdict[tag]: - continue - ## Skip the guest instructions - if "A_GUEST" in hex_common.attribdict[tag]: - continue - ## Skip the diag instructions - if tag == "Y6_diag": - continue - if tag == "Y6_diag0": - continue - if tag == "Y6_diag1": + if hex_common.tag_ignore(tag): continue f.write(f" [{tag}] = generate_{tag},\n") diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index c2ba91ddc044..65bfa046b867 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -21,7 +21,7 @@ import re import string import hex_common - +from textwrap import dedent ## ## Generate the TCG code to call the helper @@ -50,6 +50,18 @@ def gen_tcg_func(f, tag, regs, imms): f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n") + if "A_PRIV" in hex_common.attribdict[tag]: + f.write(dedent("""\ +#ifdef CONFIG_USER_ONLY + hex_gen_exception_end_tb(ctx, HEX_CAUSE_PRIV_USER_NO_SINSN); +#else +""")) + if "A_GUEST" in hex_common.attribdict[tag]: + f.write(dedent("""\ +#ifdef CONFIG_USER_ONLY + hex_gen_exception_end_tb(ctx, HEX_CAUSE_PRIV_USER_NO_GINSN); +#else +""")) if hex_common.need_ea(tag): f.write(" TCGv EA G_GNUC_UNUSED = tcg_temp_new();\n") @@ -97,6 +109,11 @@ def gen_tcg_func(f, tag, regs, imms): if reg.is_written(): reg.log_write(f, tag) + if ( + "A_PRIV" in hex_common.attribdict[tag] + or "A_GUEST" in hex_common.attribdict[tag] + ): + f.write("#endif /* CONFIG_USER_ONLY */\n") f.write("}\n\n") @@ -121,18 +138,7 @@ def main(): f.write('#include "idef-generated-emitter.h.inc"\n\n') for tag in hex_common.tags: - ## Skip the priv instructions - if "A_PRIV" in hex_common.attribdict[tag]: - continue - ## Skip the guest instructions - if "A_GUEST" in hex_common.attribdict[tag]: - continue - ## Skip the diag instructions - if tag == "Y6_diag": - continue - if tag == "Y6_diag0": - continue - if tag == "Y6_diag1": + if hex_common.tag_ignore(tag): continue gen_def_tcg_func(f, tag, tagregs, tagimms) diff --git a/target/hexagon/gen_tcg_sys.h b/target/hexagon/gen_tcg_sys.h new file mode 100644 index 000000000000..e56553462fb0 --- /dev/null +++ b/target/hexagon/gen_tcg_sys.h @@ -0,0 +1,128 @@ +/* + * Copyright(c) 2022-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HEXAGON_GEN_TCG_SYS_H +#define HEXAGON_GEN_TCG_SYS_H + +/* System mode instructions */ +#define fGEN_TCG_Y2_swi(SHORTCODE) \ + gen_helper_swi(tcg_env, RsV) + +#define fGEN_TCG_Y2_cswi(SHORTCODE) \ + gen_helper_cswi(tcg_env, RsV) + +#define fGEN_TCG_Y2_ciad(SHORTCODE) \ + gen_helper_ciad(tcg_env, RsV) + +#define fGEN_TCG_Y4_siad(SHORTCODE) \ + gen_helper_siad(tcg_env, RsV) + +#define fGEN_TCG_Y2_iassignw(SHORTCODE) \ + gen_helper_iassignw(tcg_env, RsV) + +#define fGEN_TCG_Y2_iassignr(SHORTCODE) \ + gen_helper_iassignr(RdV, tcg_env, RsV) + +#define fGEN_TCG_Y2_getimask(SHORTCODE) \ + gen_helper_getimask(RdV, tcg_env, RsV) + +#define fGEN_TCG_Y2_setimask(SHORTCODE) \ + gen_helper_setimask(tcg_env, PtV, RsV) + +#define fGEN_TCG_Y2_setprio(SHORTCODE) \ + gen_helper_setprio(tcg_env, PtV, RsV) + +#define fGEN_TCG_Y2_crswap0(SHORTCODE) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + tcg_gen_mov_tl(tmp, RxV); \ + tcg_gen_mov_tl(RxV, hex_t_sreg[HEX_SREG_SGP0]); \ + tcg_gen_mov_tl(ctx->t_sreg_new_value[HEX_SREG_SGP0], tmp); \ + } while (0) + +#define fGEN_TCG_Y4_crswap1(SHORTCODE) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + tcg_gen_mov_tl(tmp, RxV); \ + tcg_gen_mov_tl(RxV, hex_t_sreg[HEX_SREG_SGP1]); \ + tcg_gen_mov_tl(ctx->t_sreg_new_value[HEX_SREG_SGP1], tmp); \ + } while (0) + +#define fGEN_TCG_Y4_crswap10(SHORTCODE) \ + do { \ + g_assert_not_reached(); \ + TCGv_i64 tmp = tcg_temp_new_i64(); \ + tcg_gen_mov_i64(tmp, RxxV); \ + tcg_gen_concat_i32_i64(RxxV, \ + hex_t_sreg[HEX_SREG_SGP0], \ + hex_t_sreg[HEX_SREG_SGP1]); \ + tcg_gen_extrl_i64_i32(ctx->t_sreg_new_value[HEX_SREG_SGP0], tmp); \ + tcg_gen_extrh_i64_i32(ctx->t_sreg_new_value[HEX_SREG_SGP1], tmp); \ + } while (0) + +#define fGEN_TCG_Y2_wait(SHORTCODE) \ + do { \ + RsV = RsV; \ + gen_helper_wait(tcg_env, tcg_constant_tl(ctx->pkt->pc)); \ + } while (0) + +#define fGEN_TCG_Y2_resume(SHORTCODE) \ + gen_helper_resume(tcg_env, RsV) + +#define fGEN_TCG_Y2_start(SHORTCODE) \ + gen_helper_start(tcg_env, RsV) + +#define fGEN_TCG_Y2_stop(SHORTCODE) \ + do { \ + RsV = RsV; \ + gen_helper_stop(tcg_env); \ + } while (0) + +#define fGEN_TCG_Y2_tfrscrr(SHORTCODE) \ + tcg_gen_mov_tl(RdV, SsV) + +#define fGEN_TCG_Y2_tfrsrcr(SHORTCODE) \ + tcg_gen_mov_tl(SdV, RsV) + +#define fGEN_TCG_Y4_tfrscpp(SHORTCODE) \ + tcg_gen_mov_i64(RddV, SssV) + +#define fGEN_TCG_Y4_tfrspcp(SHORTCODE) \ + tcg_gen_mov_i64(SddV, RssV) + +#define fGEN_TCG_G4_tfrgcrr(SHORTCODE) \ + tcg_gen_mov_tl(RdV, GsV) + +#define fGEN_TCG_G4_tfrgrcr(SHORTCODE) \ + tcg_gen_mov_tl(GdV, RsV) + +#define fGEN_TCG_G4_tfrgcpp(SHORTCODE) \ + tcg_gen_mov_i64(RddV, GssV) + +#define fGEN_TCG_G4_tfrgpcp(SHORTCODE) \ + tcg_gen_mov_i64(GddV, RssV) + + +/* + * rte (return from exception) + * Clear the EX bit in SSR + * Jump to ELR + */ +#define fGEN_TCG_J2_rte(SHORTCODE) \ + do { \ + TCGv new_ssr = tcg_temp_new(); \ + tcg_gen_deposit_tl(new_ssr, hex_t_sreg[HEX_SREG_SSR], \ + tcg_constant_tl(0), \ + reg_field_info[SSR_EX].offset, \ + reg_field_info[SSR_EX].width); \ + gen_log_sreg_write(ctx, HEX_SREG_SSR, new_ssr); \ + gen_jumpr(ctx, hex_t_sreg[HEX_SREG_ELR]); \ + } while (0) + +#define fGEN_TCG_Y4_nmi(SHORTCODE) \ + gen_helper_nmi(tcg_env, RsV) + +#endif diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index 2c5e15cfcf6f..1dde04529bbe 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -23,6 +23,7 @@ #include "exec/helper-gen.h" #include "insn.h" #include "opcodes.h" +#include "sys_macros.h" #include "translate.h" #define QEMU_GENERATE /* Used internally by macros.h */ #include "macros.h" @@ -30,6 +31,10 @@ #undef QEMU_GENERATE #include "gen_tcg.h" #include "gen_tcg_hvx.h" +#ifndef CONFIG_USER_ONLY +#include "gen_tcg_sys.h" +#endif + #include "genptr.h" TCGv gen_read_reg(TCGv result, int num) @@ -128,6 +133,164 @@ TCGv get_result_pred(DisasContext *ctx, int pnum) } } +#ifndef CONFIG_USER_ONLY +G_GNUC_UNUSED +static bool greg_writable(int rnum, bool pair) +{ + if (pair) { + if (rnum < HEX_GREG_G3) { + return true; + } + qemu_log_mask(LOG_UNIMP, + "Warning: ignoring write to guest register pair G%d:%d\n", + rnum + 1, rnum); + } else { + if (rnum <= HEX_GREG_G3) { + return true; + } + qemu_log_mask(LOG_UNIMP, + "Warning: ignoring write to guest register G%d\n", rnum); + } + return false; +} + +G_GNUC_UNUSED +static void check_greg_impl(int rnum, bool pair) +{ + if (pair && (!greg_implemented(rnum) || !greg_implemented(rnum + 1))) { + qemu_log_mask(LOG_UNIMP, + "Warning: guest register pair G%d:%d is unimplemented or " + "reserved. Read will yield 0.\n", + rnum + 1, rnum); + } else if (!pair && !greg_implemented(rnum)) { + qemu_log_mask(LOG_UNIMP, + "Warning: guest register G%d is unimplemented or reserved." + " Read will yield 0.\n", rnum); + } +} + +G_GNUC_UNUSED +static inline void gen_log_greg_write(DisasContext *ctx, int rnum, TCGv val) +{ + tcg_gen_mov_tl(ctx->greg_new_value[rnum], val); +} + +G_GNUC_UNUSED +static void gen_log_greg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val) +{ + TCGv val32 = tcg_temp_new(); + + /* Low word */ + tcg_gen_extrl_i64_i32(val32, val); + gen_log_greg_write(ctx, rnum, val32); + + /* High word */ + tcg_gen_extrh_i64_i32(val32, val); + gen_log_greg_write(ctx, rnum + 1, val32); +} + +static const target_ulong sreg_immut_masks[NUM_SREGS] = { + [HEX_SREG_STID] = 0xff00ff00, + [HEX_SREG_ELR] = 0x00000003, + [HEX_SREG_SSR] = 0x00008000, + [HEX_SREG_CCR] = 0x10e0ff24, + [HEX_SREG_HTID] = IMMUTABLE, + [HEX_SREG_IMASK] = 0xffff0000, + [HEX_SREG_GEVB] = 0x000000ff, + [HEX_SREG_EVB] = 0x000000ff, + [HEX_SREG_MODECTL] = IMMUTABLE, + [HEX_SREG_SYSCFG] = 0x80001c00, + [HEX_SREG_IPENDAD] = IMMUTABLE, + [HEX_SREG_VID] = 0xfc00fc00, + [HEX_SREG_VID1] = 0xfc00fc00, + [HEX_SREG_BESTWAIT] = 0xfffffe00, + [HEX_SREG_SCHEDCFG] = 0xfffffef0, + [HEX_SREG_CFGBASE] = IMMUTABLE, + [HEX_SREG_REV] = IMMUTABLE, + [HEX_SREG_ISDBST] = IMMUTABLE, + [HEX_SREG_ISDBCFG0] = 0xe0000000, + [HEX_SREG_BRKPTPC0] = 0x00000003, + [HEX_SREG_BRKPTCFG0] = 0xfc007000, + [HEX_SREG_BRKPTPC1] = 0x00000003, + [HEX_SREG_BRKPTCFG1] = 0xfc007000, + [HEX_SREG_ISDBMBXIN] = IMMUTABLE, + [HEX_SREG_ISDBEN] = 0xfffffffe, + [HEX_SREG_TIMERLO] = IMMUTABLE, + [HEX_SREG_TIMERHI] = IMMUTABLE, +}; + +G_GNUC_UNUSED +static void gen_log_sreg_write(DisasContext *ctx, int rnum, TCGv val) +{ + const target_ulong reg_mask = sreg_immut_masks[rnum]; + + if (reg_mask != IMMUTABLE) { + if (rnum < HEX_SREG_GLB_START) { + gen_masked_reg_write(val, hex_t_sreg[rnum], reg_mask); + tcg_gen_mov_tl(ctx->t_sreg_new_value[rnum], val); + } else { + gen_masked_reg_write(val, hex_g_sreg[rnum], reg_mask); + gen_helper_sreg_write(tcg_env, tcg_constant_i32(rnum), val); + } + } +} + +G_GNUC_UNUSED +static void gen_log_sreg_write_pair(DisasContext *ctx, int rnum, TCGv_i64 val) +{ + TCGv val32 = tcg_temp_new(); + + /* Low word */ + tcg_gen_extrl_i64_i32(val32, val); + gen_log_sreg_write(ctx, rnum, val32); + + /* High word */ + tcg_gen_extrh_i64_i32(val32, val); + gen_log_sreg_write(ctx, rnum + 1, val32); +} + +G_GNUC_UNUSED +static void gen_read_sreg(TCGv dst, int reg_num) +{ + if (reg_num >= HEX_SREG_GLB_START || reg_num == HEX_SREG_BADVA) { + gen_helper_sreg_read(dst, tcg_env, tcg_constant_i32(reg_num)); + } else { + tcg_gen_mov_tl(dst, hex_t_sreg[reg_num]); + } +} + +G_GNUC_UNUSED +static void gen_read_sreg_pair(TCGv_i64 dst, int reg_num) +{ + if (reg_num < HEX_SREG_GLB_START) { + if (reg_num + 1 == HEX_SREG_BADVA) { + TCGv badva = tcg_temp_new(); + gen_helper_sreg_read(badva, tcg_env, + tcg_constant_tl(HEX_SREG_BADVA)); + tcg_gen_concat_i32_i64(dst, hex_t_sreg[reg_num], badva); + } else { + tcg_gen_concat_i32_i64(dst, hex_t_sreg[reg_num], + hex_t_sreg[reg_num + 1]); + } + } else { + gen_helper_sreg_read_pair(dst, tcg_env, tcg_constant_tl(reg_num)); + } +} + +G_GNUC_UNUSED +static void gen_read_greg(TCGv dst, int reg_num) +{ + gen_helper_greg_read(dst, tcg_env, tcg_constant_tl(reg_num)); +} + +G_GNUC_UNUSED +static void gen_read_greg_pair(TCGv_i64 dst, int reg_num) +{ + gen_helper_greg_read_pair(dst, tcg_env, tcg_constant_tl(reg_num)); +} +#endif + + void gen_log_pred_write(DisasContext *ctx, int pnum, TCGv val) { TCGv pred = get_result_pred(ctx, pnum); @@ -183,6 +346,11 @@ static inline void gen_read_ctrl_reg(DisasContext *ctx, const int reg_num, } else if (reg_num == HEX_REG_QEMU_HVX_CNT) { tcg_gen_addi_tl(dest, hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns); + } else if ((reg_num == HEX_REG_PKTCNTLO) + || (reg_num == HEX_REG_PKTCNTHI) + || (reg_num == HEX_REG_UTIMERLO) + || (reg_num == HEX_REG_UTIMERHI)) { + gen_helper_creg_read(dest, tcg_env, tcg_constant_tl(reg_num)); } else { tcg_gen_mov_tl(dest, hex_gpr[reg_num]); } @@ -211,6 +379,10 @@ static inline void gen_read_ctrl_reg_pair(DisasContext *ctx, const int reg_num, tcg_gen_addi_tl(hvx_cnt, hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns); tcg_gen_concat_i32_i64(dest, hvx_cnt, hex_gpr[reg_num + 1]); + } else if ((reg_num == HEX_REG_PKTCNTLO) + || (reg_num == HEX_REG_UTIMERLO) + || (reg_num == HEX_REG_UPCYCLELO)) { + gen_helper_creg_read_pair(dest, tcg_env, tcg_constant_i32(reg_num)); } else { tcg_gen_concat_i32_i64(dest, hex_gpr[reg_num], @@ -395,7 +567,8 @@ static inline void gen_store_conditional8(DisasContext *ctx, #ifndef CONFIG_HEXAGON_IDEF_PARSER static TCGv gen_slotval(DisasContext *ctx) { - int slotval = (ctx->pkt->pkt_has_store_s1 & 1) | (ctx->insn->slot << 1); + int slotval = + (ctx->pkt->pkt_has_scalar_store_s1 & 1) | (ctx->insn->slot << 1); return tcg_constant_tl(slotval); } #endif @@ -471,14 +644,15 @@ static void gen_write_new_pc_addr(DisasContext *ctx, TCGv addr, tcg_gen_brcondi_tl(cond, pred, 0, pred_false); } + TCGv PC_wr = ctx->need_next_pc ? hex_next_PC : hex_gpr[HEX_REG_PC]; if (ctx->pkt->pkt_has_multi_cof) { /* If there are multiple branches in a packet, ignore the second one */ - tcg_gen_movcond_tl(TCG_COND_NE, hex_gpr[HEX_REG_PC], + tcg_gen_movcond_tl(TCG_COND_NE, PC_wr, ctx->branch_taken, tcg_constant_tl(0), - hex_gpr[HEX_REG_PC], addr); + PC_wr, addr); tcg_gen_movi_tl(ctx->branch_taken, 1); } else { - tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], addr); + tcg_gen_mov_tl(PC_wr, addr); } if (cond != TCG_COND_ALWAYS) { diff --git a/target/hexagon/helper.h b/target/hexagon/helper.h index f8baa599c88c..b381e0e116b3 100644 --- a/target/hexagon/helper.h +++ b/target/hexagon/helper.h @@ -18,7 +18,7 @@ #include "internal.h" #include "helper_protos_generated.h.inc" -DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32) +DEF_HELPER_FLAGS_3(raise_exception, TCG_CALL_NO_RETURN, noreturn, env, i32, i32) DEF_HELPER_2(commit_store, void, env, int) DEF_HELPER_3(gather_store, void, env, i32, int) DEF_HELPER_1(commit_hvx_stores, void, env) @@ -107,3 +107,31 @@ DEF_HELPER_4(probe_noshuf_load, void, env, i32, int, int) DEF_HELPER_2(probe_pkt_scalar_store_s0, void, env, int) DEF_HELPER_2(probe_hvx_stores, void, env, int) DEF_HELPER_2(probe_pkt_scalar_hvx_stores, void, env, int) + +DEF_HELPER_2(creg_read, i32, env, i32) +DEF_HELPER_2(creg_read_pair, i64, env, i32) +#if !defined(CONFIG_USER_ONLY) +DEF_HELPER_2(swi, void, env, i32) +DEF_HELPER_2(cswi, void, env, i32) +DEF_HELPER_2(ciad, void, env, i32) +DEF_HELPER_2(siad, void, env, i32) +DEF_HELPER_2(iassignw, void, env, i32) +DEF_HELPER_2(iassignr, i32, env, i32) +DEF_HELPER_2(getimask, i32, env, i32) +DEF_HELPER_3(setimask, void, env, i32, i32) +DEF_HELPER_2(sreg_read, i32, env, i32) +DEF_HELPER_2(sreg_read_pair, i64, env, i32) +DEF_HELPER_2(greg_read, i32, env, i32) +DEF_HELPER_2(greg_read_pair, i64, env, i32) +DEF_HELPER_3(sreg_write, void, env, i32, i32) +DEF_HELPER_3(sreg_write_pair, void, env, i32, i64) +DEF_HELPER_3(setprio, void, env, i32, i32) +DEF_HELPER_2(start, void, env, i32) +DEF_HELPER_1(stop, void, env) +DEF_HELPER_2(wait, void, env, i32) +DEF_HELPER_2(resume, void, env, i32) +DEF_HELPER_2(nmi, void, env, i32) +DEF_HELPER_1(resched, void, env) +DEF_HELPER_3(modify_ssr, void, env, i32, i32) +DEF_HELPER_1(pending_interrupt, void, env) +#endif diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index 758e5fd12dfe..4ce275363acf 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -33,6 +33,41 @@ overrides = {} # tags with helper overrides idef_parser_enabled = {} # tags enabled for idef-parser + +def is_sysemu_tag(tag): + return "A_PRIV" in attribdict[tag] or "A_GUEST" in attribdict[tag] + + +def tag_ignore(tag): + tag_skips = ( + "Y6_diag", + "Y6_diag0", + "Y6_diag1", + ) + attr_skips = ( + "A_FAKEINSN", + "A_MAPPING", + ) + return tag in tag_skips or \ + any(attr in attribdict[tag] for attr in attr_skips) + + +def get_sys_tags(): + return sorted( + tag for tag in frozenset(tags) if is_sysemu_tag(tag) + ) + + +def get_user_tags(): + return sorted( + tag for tag in frozenset(tags) if not is_sysemu_tag(tag) + ) + + +def get_all_tags(): + return get_user_tags() + get_sys_tags() + + # We should do this as a hash for performance, # but to keep order let's keep it as a list. def uniquify(seq): @@ -93,8 +128,13 @@ def calculate_attribs(): add_qemu_macro_attrib("fTRAP", "A_IMPLICIT_READS_PC") add_qemu_macro_attrib("fSET_OVERFLOW", "A_IMPLICIT_WRITES_USR") add_qemu_macro_attrib("fSET_LPCFG", "A_IMPLICIT_WRITES_USR") + add_qemu_macro_attrib("fLOAD_LOCKED", "A_LLSC") + add_qemu_macro_attrib("fSTORE_LOCKED", "A_LLSC") + add_qemu_macro_attrib("fCLEAR_RTE_EX", "A_IMPLICIT_WRITES_SSR") add_qemu_macro_attrib("fLOAD", "A_SCALAR_LOAD") add_qemu_macro_attrib("fSTORE", "A_SCALAR_STORE") + add_qemu_macro_attrib("fSET_K0_LOCK", "A_IMPLICIT_READS_PC") + add_qemu_macro_attrib("fSET_TLB_LOCK", "A_IMPLICIT_READS_PC") add_qemu_macro_attrib('fLSBNEW0', 'A_IMPLICIT_READS_P0') add_qemu_macro_attrib('fLSBNEW0NOT', 'A_IMPLICIT_READS_P0') add_qemu_macro_attrib('fREAD_P0', 'A_IMPLICIT_READS_P0') @@ -213,9 +253,12 @@ def is_hvx_insn(tag): def need_env(tag): return ("A_STORE" in attribdict[tag] or "A_LOAD" in attribdict[tag] or + "A_DMA" in attribdict[tag] or "A_CVI_GATHER" in attribdict[tag] or "A_CVI_SCATTER" in attribdict[tag] or - "A_IMPLICIT_WRITES_USR" in attribdict[tag]) + "A_IMPLICIT_WRITES_USR" in attribdict[tag] or + "A_PRIV" in attribdict[tag] or + "J2_trap" in tag) def need_slot(tag): @@ -224,6 +267,9 @@ def need_slot(tag): and "A_CVI_GATHER" not in attribdict[tag] and ("A_STORE" in attribdict[tag] or "A_LOAD" in attribdict[tag]) + and tag != "L4_loadw_phys" + and tag != "L6_memcpy" + and tag != "Y6_dmlink" ): return 1 else: @@ -247,7 +293,11 @@ def need_next_PC(tag): def need_pkt_has_multi_cof(tag): - return "A_COF" in attribdict[tag] + if "A_JUMP" in attribdict[tag] or "A_CALL" in attribdict[tag]: + if tag == "J4_hintjumpr": + return False + return True + return False def need_pkt_need_commit(tag): @@ -366,12 +416,16 @@ def helper_proto_type(self): return "s32" def helper_arg_type(self): return "int32_t" + def is_pair(self): + return False class Pair(Scalar): def helper_proto_type(self): return "s64" def helper_arg_type(self): return "int64_t" + def is_pair(self): + return True class Hvx: def is_scalar_reg(self): @@ -1009,6 +1063,120 @@ def analyze_write(self, f, tag, regno): ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper); """)) +class GuestRegister(Register): + def gen_check_impl(self, f, regno): + if self.is_written(): + f.write(code_fmt(f"""\ + if (!greg_writable(insn->regno[{regno}], + {str(self.is_pair()).lower()})) {{ + return; + }} + """)) + else: + f.write(code_fmt(f"""\ +check_greg_impl(insn->regno[{regno}], {str(self.is_pair()).lower()}); + """)) + +class GuestDest(GuestRegister, Single, Dest): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno) + self.gen_check_impl(f, regno) + f.write(code_fmt(f"""\ + TCGv {self.reg_tcg()} = tcg_temp_new(); + gen_read_greg({self.reg_tcg()}, {self.reg_num}); + """)) + def log_write(self, f, tag): + f.write(code_fmt(f"""\ + gen_log_greg_write(ctx, {self.reg_num}, {self.reg_tcg()}); + """)) + def analyze_write(self, f, tag, regno): + f.write(code_fmt(f"""\ + ctx_log_greg_write(ctx, {self.reg_num}); + """)) + +class GuestSource(GuestRegister, Single, OldSource): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno); + self.gen_check_impl(f, regno) + f.write(code_fmt(f"""\ + TCGv {self.reg_tcg()} = tcg_temp_new(); + gen_read_greg({self.reg_tcg()}, {self.reg_num}); + """)) + +class GuestPairDest(GuestRegister, Pair, Dest): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno) + self.gen_check_impl(f, regno) + f.write(code_fmt(f"""\ + TCGv_i64 {self.reg_tcg()} = tcg_temp_new_i64(); + gen_read_greg_pair({self.reg_tcg()}, {self.reg_num}); + """)) + def log_write(self, f, tag): + f.write(code_fmt(f"""\ + gen_log_greg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()}); + """)) + def analyze_write(self, f, tag, regno): + f.write(code_fmt(f"""\ + ctx_log_greg_write_pair(ctx, {self.reg_num}); + """)) + +class GuestPairSource(GuestRegister, Pair, OldSource): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno) + self.gen_check_impl(f, regno) + f.write(code_fmt(f"""\ + TCGv_i64 {self.reg_tcg()} = tcg_temp_new_i64(); + gen_read_greg_pair({self.reg_tcg()}, {self.reg_num}); + """)) + +class SystemDest(Register, Single, Dest): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno) + f.write(code_fmt(f"""\ + TCGv {self.reg_tcg()} = tcg_temp_new(); + gen_read_sreg({self.reg_tcg()}, {self.reg_num}); + """)) + def log_write(self, f, tag): + f.write(code_fmt(f"""\ + gen_log_sreg_write(ctx, {self.reg_num}, {self.reg_tcg()}); + """)) + def analyze_write(self, f, tag, regno): + f.write(code_fmt(f"""\ + ctx_log_sreg_write(ctx, {self.reg_num}); + """)) + +class SystemSource(Register, Single, OldSource): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno); + f.write(code_fmt(f"""\ + TCGv {self.reg_tcg()} = tcg_temp_new(); + gen_read_sreg({self.reg_tcg()}, {self.reg_num}); + """)) + +class SystemPairDest(Register, Pair, Dest): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno) + f.write(code_fmt(f"""\ + TCGv_i64 {self.reg_tcg()} = tcg_temp_new_i64(); + gen_read_sreg_pair({self.reg_tcg()}, {self.reg_num}); + """)) + def log_write(self, f, tag): + f.write(code_fmt(f"""\ + gen_log_sreg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()}); + """)) + def analyze_write(self, f, tag, regno): + f.write(code_fmt(f"""\ + ctx_log_sreg_write_pair(ctx, {self.reg_num}); + """)) + +class SystemPairSource(Register, Pair, OldSource): + def decl_tcg(self, f, tag, regno): + self.decl_reg_num(f, regno) + f.write(code_fmt(f"""\ + TCGv_i64 {self.reg_tcg()} = tcg_temp_new_i64(); + gen_read_sreg_pair({self.reg_tcg()}, {self.reg_num}); + """)) + def init_registers(): regs = { GprDest("R", "d"), @@ -1055,6 +1223,16 @@ def init_registers(): QRegSource("Q", "u"), QRegSource("Q", "v"), QRegReadWrite("Q", "x"), + + # system regs + GuestDest("G", "d"), + GuestSource("G", "s"), + GuestPairDest("G", "dd"), + GuestPairSource("G", "ss"), + SystemDest("S", "d"), + SystemSource("S", "s"), + SystemPairDest("S", "dd"), + SystemPairSource("S", "ss"), } for reg in regs: registers[f"{reg.regtype}{reg.regid}"] = reg @@ -1070,11 +1248,18 @@ def init_registers(): for reg in new_regs: new_registers[f"{reg.regtype}{reg.regid}"] = reg +def is_new_reg(tag, regid): + if regid[0] in "NO": + return True + return regid[0] == "P" and \ + f"{regid}N" in semdict[tag] and \ + f"{regid}V" not in semdict[tag] + def get_register(tag, regtype, regid): - if f"{regtype}{regid}V" in semdict[tag]: - return registers[f"{regtype}{regid}"] - else: - return new_registers[f"{regtype}{regid}"] + regid = f"{regtype}{regid}" + is_new = is_new_reg(tag, regid) + reg = new_registers[regid] if is_new else registers[regid] + return reg def helper_ret_type(tag, regs): ## If there is a scalar result, it is the return type @@ -1187,6 +1372,7 @@ def parse_common_args(desc): parser.add_argument("semantics", help="semantics file") parser.add_argument("overrides", help="overrides file") parser.add_argument("overrides_vec", help="vector overrides file") + parser.add_argument("overrides_sys", help="system overrides file") parser.add_argument("out", help="output file") parser.add_argument("--idef-parser", help="file of instructions translated by idef-parser") @@ -1194,6 +1380,7 @@ def parse_common_args(desc): read_semantics_file(args.semantics) read_overrides_file(args.overrides) read_overrides_file(args.overrides_vec) + read_overrides_file(args.overrides_sys) if args.idef_parser: read_idef_parser_enabled_file(args.idef_parser) calculate_attribs() diff --git a/target/hexagon/hex_interrupts.c b/target/hexagon/hex_interrupts.c new file mode 100644 index 000000000000..fd00bcfb9a57 --- /dev/null +++ b/target/hexagon/hex_interrupts.c @@ -0,0 +1,324 @@ +/* + * Copyright(c) 2022-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" +#include "cpu.h" +#include "hex_interrupts.h" +#include "macros.h" +#include "sys_macros.h" +#include "system/cpus.h" + +static bool hex_is_qualified_for_int(CPUHexagonState *env, int int_num); + +static bool get_syscfg_gie(CPUHexagonState *env) +{ + target_ulong syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + return GET_SYSCFG_FIELD(SYSCFG_GIE, syscfg); +} + +static bool get_ssr_ex(CPUHexagonState *env) +{ + target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR); + return GET_SSR_FIELD(SSR_EX, ssr); +} + +static bool get_ssr_ie(CPUHexagonState *env) +{ + target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR); + return GET_SSR_FIELD(SSR_IE, ssr); +} + +/* Do these together so we only have to call hexagon_modify_ssr once */ +static void set_ssr_ex_cause(CPUHexagonState *env, int ex, uint32_t cause) +{ + target_ulong old = arch_get_system_reg(env, HEX_SREG_SSR); + SET_SYSTEM_FIELD(env, HEX_SREG_SSR, SSR_EX, ex); + SET_SYSTEM_FIELD(env, HEX_SREG_SSR, SSR_CAUSE, cause); + target_ulong new = arch_get_system_reg(env, HEX_SREG_SSR); + hexagon_modify_ssr(env, new, old); +} + +static bool get_iad_bit(CPUHexagonState *env, int int_num) +{ + target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD); + target_ulong iad = GET_FIELD(IPENDAD_IAD, ipendad); + return extract32(iad, int_num, 1); +} + +static void set_iad_bit(CPUHexagonState *env, int int_num, int val) +{ + target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD); + target_ulong iad = GET_FIELD(IPENDAD_IAD, ipendad); + iad = deposit32(iad, int_num, 1, val); + fSET_FIELD(ipendad, IPENDAD_IAD, iad); + arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad); +} + +static uint32_t get_ipend(CPUHexagonState *env) +{ + target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD); + return GET_FIELD(IPENDAD_IPEND, ipendad); +} + +static inline bool get_ipend_bit(CPUHexagonState *env, int int_num) +{ + target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD); + target_ulong ipend = GET_FIELD(IPENDAD_IPEND, ipendad); + return extract32(ipend, int_num, 1); +} + +static void clear_ipend(CPUHexagonState *env, uint32_t mask) +{ + target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD); + target_ulong ipend = GET_FIELD(IPENDAD_IPEND, ipendad); + ipend &= ~mask; + fSET_FIELD(ipendad, IPENDAD_IPEND, ipend); + arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad); +} + +static void set_ipend(CPUHexagonState *env, uint32_t mask) +{ + target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD); + target_ulong ipend = GET_FIELD(IPENDAD_IPEND, ipendad); + ipend |= mask; + fSET_FIELD(ipendad, IPENDAD_IPEND, ipend); + arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad); +} + +static void set_ipend_bit(CPUHexagonState *env, int int_num, int val) +{ + target_ulong ipendad = arch_get_system_reg(env, HEX_SREG_IPENDAD); + target_ulong ipend = GET_FIELD(IPENDAD_IPEND, ipendad); + ipend = deposit32(ipend, int_num, 1, val); + fSET_FIELD(ipendad, IPENDAD_IPEND, ipend); + arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad); +} + +static bool get_imask_bit(CPUHexagonState *env, int int_num) +{ + target_ulong imask = arch_get_system_reg(env, HEX_SREG_IMASK); + return extract32(imask, int_num, 1); +} + +static uint32_t get_prio(CPUHexagonState *env) +{ + target_ulong stid = arch_get_system_reg(env, HEX_SREG_STID); + return extract32(stid, reg_field_info[STID_PRIO].offset, + reg_field_info[STID_PRIO].width); +} + +static void set_elr(CPUHexagonState *env, target_ulong val) +{ + arch_set_system_reg(env, HEX_SREG_ELR, val); +} + +static bool get_schedcfgen(CPUHexagonState *env) +{ + target_ulong schedcfg = arch_get_system_reg(env, HEX_SREG_SCHEDCFG); + return extract32(schedcfg, reg_field_info[SCHEDCFG_EN].offset, + reg_field_info[SCHEDCFG_EN].width); +} + +static bool is_lowest_prio(CPUHexagonState *env, int int_num) +{ + uint32_t my_prio = get_prio(env); + CPUState *cs; + + CPU_FOREACH(cs) { + CPUHexagonState *hex_env = cpu_env(cs); + if (!hex_is_qualified_for_int(hex_env, int_num)) { + continue; + } + + /* Note that lower values indicate *higher* priority */ + if (my_prio < get_prio(hex_env)) { + return false; + } + } + return true; +} + +static bool hex_is_qualified_for_int(CPUHexagonState *env, int int_num) +{ + bool syscfg_gie = get_syscfg_gie(env); + bool iad = get_iad_bit(env, int_num); + bool ssr_ie = get_ssr_ie(env); + bool ssr_ex = get_ssr_ex(env); + bool imask = get_imask_bit(env, int_num); + + return syscfg_gie && !iad && ssr_ie && !ssr_ex && !imask; +} + +static void clear_pending_locks(CPUHexagonState *env) +{ + g_assert(bql_locked()); + if (env->k0_lock_state == HEX_LOCK_WAITING) { + env->k0_lock_state = HEX_LOCK_UNLOCKED; + } + if (env->tlb_lock_state == HEX_LOCK_WAITING) { + env->tlb_lock_state = HEX_LOCK_UNLOCKED; + } +} + +static bool should_not_exec(CPUHexagonState *env) +{ + return (get_exe_mode(env) == HEX_EXE_MODE_WAIT); +} + +static void restore_state(CPUHexagonState *env, bool int_accepted) +{ + CPUState *cs = env_cpu(env); + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD | CPU_INTERRUPT_SWI); + if (!int_accepted && should_not_exec(env)) { + cpu_interrupt(cs, CPU_INTERRUPT_HALT); + } +} + +static void hex_accept_int(CPUHexagonState *env, int int_num) +{ + CPUState *cs = env_cpu(env); + target_ulong evb = arch_get_system_reg(env, HEX_SREG_EVB); + const int exe_mode = get_exe_mode(env); + const bool in_wait_mode = exe_mode == HEX_EXE_MODE_WAIT; + + set_ipend_bit(env, int_num, 0); + set_iad_bit(env, int_num, 1); + set_ssr_ex_cause(env, 1, HEX_CAUSE_INT0 | int_num); + cs->exception_index = HEX_EVENT_INT0 + int_num; + env->cause_code = HEX_EVENT_INT0 + int_num; + clear_pending_locks(env); + if (in_wait_mode) { + qemu_log_mask(CPU_LOG_INT, + "%s: thread %d resuming, exiting WAIT mode\n", + __func__, env->threadId); + set_elr(env, env->wait_next_pc); + clear_wait_mode(env); + cs->halted = false; + } else if (env->k0_lock_state == HEX_LOCK_WAITING) { + g_assert_not_reached(); + } else { + set_elr(env, env->gpr[HEX_REG_PC]); + } + env->gpr[HEX_REG_PC] = evb | (cs->exception_index << 2); + if (get_ipend(env) == 0) { + restore_state(env, true); + } +} + + +bool hex_check_interrupts(CPUHexagonState *env) +{ + CPUState *cs = env_cpu(env); + bool int_handled = false; + bool ssr_ex = get_ssr_ex(env); + int max_ints = 32; + bool schedcfgen; + + /* Early exit if nothing pending */ + if (get_ipend(env) == 0) { + restore_state(env, false); + return false; + } + + BQL_LOCK_GUARD(); + /* Only check priorities when schedcfgen is set */ + schedcfgen = get_schedcfgen(env); + for (int i = 0; i < max_ints; i++) { + if (!get_iad_bit(env, i) && get_ipend_bit(env, i)) { + qemu_log_mask(CPU_LOG_INT, + "%s: thread[%d] pc = 0x%x found int %d\n", __func__, + env->threadId, env->gpr[HEX_REG_PC], i); + if (hex_is_qualified_for_int(env, i) && + (!schedcfgen || is_lowest_prio(env, i))) { + qemu_log_mask(CPU_LOG_INT, "%s: thread[%d] int %d handled_\n", + __func__, env->threadId, i); + hex_accept_int(env, i); + int_handled = true; + break; + } + bool syscfg_gie = get_syscfg_gie(env); + bool iad = get_iad_bit(env, i); + bool ssr_ie = get_ssr_ie(env); + bool imask = get_imask_bit(env, i); + + qemu_log_mask(CPU_LOG_INT, + "%s: thread[%d] int %d not handled, qualified: %d, " + "schedcfg_en: %d, low prio %d\n", + __func__, env->threadId, i, + hex_is_qualified_for_int(env, i), schedcfgen, + is_lowest_prio(env, i)); + + qemu_log_mask(CPU_LOG_INT, + "%s: thread[%d] int %d not handled, GIE %d, iad %d, " + "SSR:IE %d, SSR:EX: %d, imask bit %d\n", + __func__, env->threadId, i, syscfg_gie, iad, ssr_ie, + ssr_ex, imask); + } + } + + /* + * If we didn't handle the interrupt and it wasn't + * because we were in EX state, then we won't be able + * to execute the interrupt on this CPU unless something + * changes in the CPU state. Clear the interrupt_request bits + * while preserving the IPEND bits, and we can re-assert the + * interrupt_request bit(s) when we execute one of those instructions. + */ + if (!int_handled && !ssr_ex) { + restore_state(env, int_handled); + } else if (int_handled) { + assert(!cs->halted); + } + + return int_handled; +} + +void hex_clear_interrupts(CPUHexagonState *env, uint32_t mask, uint32_t type) +{ + if (mask == 0) { + return; + } + + /* + * Notify all CPUs that the interrupt has happened + */ + BQL_LOCK_GUARD(); + clear_ipend(env, mask); + hex_interrupt_update(env); +} + +void hex_raise_interrupts(CPUHexagonState *env, uint32_t mask, uint32_t type) +{ + g_assert(bql_locked()); + if (mask == 0) { + return; + } + + /* + * Notify all CPUs that the interrupt has happened + */ + set_ipend(env, mask); + hex_interrupt_update(env); +} + +void hex_interrupt_update(CPUHexagonState *env) +{ + CPUState *cs; + + g_assert(bql_locked()); + if (get_ipend(env) != 0) { + CPU_FOREACH(cs) { + CPUHexagonState *hex_env = cpu_env(cs); + const int exe_mode = get_exe_mode(hex_env); + if (exe_mode != HEX_EXE_MODE_OFF) { + cs->interrupt_request |= CPU_INTERRUPT_SWI; + cpu_resume(cs); + } + } + } +} diff --git a/target/hexagon/hex_interrupts.h b/target/hexagon/hex_interrupts.h new file mode 100644 index 000000000000..17a243946ce2 --- /dev/null +++ b/target/hexagon/hex_interrupts.h @@ -0,0 +1,15 @@ +/* + * Copyright(c) 2022-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HEX_INTERRUPTS_H +#define HEX_INTERRUPTS_H + +bool hex_check_interrupts(CPUHexagonState *env); +void hex_clear_interrupts(CPUHexagonState *env, uint32_t mask, uint32_t type); +void hex_raise_interrupts(CPUHexagonState *env, uint32_t mask, uint32_t type); +void hex_interrupt_update(CPUHexagonState *env); + +#endif diff --git a/target/hexagon/hex_mmu.c b/target/hexagon/hex_mmu.c new file mode 100644 index 000000000000..8037528a2ccd --- /dev/null +++ b/target/hexagon/hex_mmu.c @@ -0,0 +1,603 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "qemu/qemu-print.h" +#include "cpu.h" +#include "system/cpus.h" +#include "internal.h" +#include "exec/exec-all.h" +#include "hex_mmu.h" +#include "macros.h" +#include "sys_macros.h" +#include "reg_fields.h" + +#define GET_TLB_FIELD(ENTRY, FIELD) \ + ((uint64_t)fEXTRACTU_BITS(ENTRY, reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset)) + +/* PPD (physical page descriptor) */ +static inline uint64_t GET_PPD(uint64_t entry) +{ + return GET_TLB_FIELD(entry, PTE_PPD) | + (GET_TLB_FIELD(entry, PTE_PA35) << reg_field_info[PTE_PPD].width); +} + +#define NO_ASID (1 << 8) + +typedef enum { + PGSIZE_4K, + PGSIZE_16K, + PGSIZE_64K, + PGSIZE_256K, + PGSIZE_1M, + PGSIZE_4M, + PGSIZE_16M, + PGSIZE_64M, + PGSIZE_256M, + PGSIZE_1G, + NUM_PGSIZE_TYPES +} tlb_pgsize_t; + +static const char *pgsize_str[NUM_PGSIZE_TYPES] = { + "4K", + "16K", + "64K", + "256K", + "1M", + "4M", + "16M", + "64M", + "256M", + "1G", +}; + +#define INVALID_MASK 0xffffffffLL + +static const uint64_t encmask_2_mask[] = { + 0x0fffLL, /* 4k, 0000 */ + 0x3fffLL, /* 16k, 0001 */ + 0xffffLL, /* 64k, 0010 */ + 0x3ffffLL, /* 256k, 0011 */ + 0xfffffLL, /* 1m, 0100 */ + 0x3fffffLL, /* 4m, 0101 */ + 0xffffffLL, /* 16m, 0110 */ + 0x3ffffffLL, /* 64m, 0111 */ + 0xfffffffLL, /* 256m, 1000 */ + 0x3fffffffLL, /* 1g, 1001 */ + INVALID_MASK, /* RSVD, 0111 */ +}; + +/* + * @return the page size type from @a entry. + */ +static inline tlb_pgsize_t hex_tlb_pgsize_type(uint64_t entry) +{ + if (entry == 0) { + qemu_log_mask(CPU_LOG_MMU, "%s: Supplied TLB entry was 0!\n", __func__); + return 0; + } + tlb_pgsize_t size = ctz64(entry); + g_assert(size < NUM_PGSIZE_TYPES); + return size; +} + +/* + * @return the page size of @a entry, in bytes. + */ +static inline uint64_t hex_tlb_page_size_bytes(uint64_t entry) +{ + return 1ull << (TARGET_PAGE_BITS + 2 * hex_tlb_pgsize_type(entry)); +} + +static inline uint64_t hex_tlb_phys_page_num(uint64_t entry) +{ + uint32_t ppd = GET_PPD(entry); + return ppd >> 1; +} + +static inline uint64_t hex_tlb_phys_addr(uint64_t entry) +{ + uint64_t pagemask = encmask_2_mask[hex_tlb_pgsize_type(entry)]; + uint64_t pagenum = hex_tlb_phys_page_num(entry); + uint64_t PA = (pagenum << TARGET_PAGE_BITS) & (~pagemask); + return PA; +} + +static inline uint64_t hex_tlb_virt_addr(uint64_t entry) +{ + return (uint64_t)GET_TLB_FIELD(entry, PTE_VPN) << TARGET_PAGE_BITS; +} + +static bool hex_dump_mmu_entry(FILE *f, uint64_t entry) +{ + if (GET_TLB_FIELD(entry, PTE_V)) { + fprintf(f, "0x%016" PRIx64 ": ", entry); + uint64_t PA = hex_tlb_phys_addr(entry); + uint64_t VA = hex_tlb_virt_addr(entry); + fprintf(f, "V:%" PRId64 " G:%" PRId64 " A1:%" PRId64 " A0:%" PRId64, + GET_TLB_FIELD(entry, PTE_V), GET_TLB_FIELD(entry, PTE_G), + GET_TLB_FIELD(entry, PTE_ATR1), GET_TLB_FIELD(entry, PTE_ATR0)); + fprintf(f, " ASID:0x%02" PRIx64 " VA:0x%08" PRIx64, + GET_TLB_FIELD(entry, PTE_ASID), VA); + fprintf(f, + " X:%" PRId64 " W:%" PRId64 " R:%" PRId64 " U:%" PRId64 + " C:%" PRId64, + GET_TLB_FIELD(entry, PTE_X), GET_TLB_FIELD(entry, PTE_W), + GET_TLB_FIELD(entry, PTE_R), GET_TLB_FIELD(entry, PTE_U), + GET_TLB_FIELD(entry, PTE_C)); + fprintf(f, " PA:0x%09" PRIx64 " SZ:%s (0x%" PRIx64 ")", PA, + pgsize_str[hex_tlb_pgsize_type(entry)], + hex_tlb_page_size_bytes(entry)); + fprintf(f, "\n"); + return true; + } + + /* Not valid */ + return false; +} + +void dump_mmu(CPUHexagonState *env) +{ + HexagonCPU *cpu = env_archcpu(env); + for (uint32_t i = 0; i < cpu->num_tlbs; i++) { + uint64_t entry = env->hex_tlb->entries[i]; + if (GET_TLB_FIELD(entry, PTE_V)) { + qemu_printf("[%03" PRIu32 "] ", i); + qemu_printf("0x%016" PRIx64 ": ", entry); + uint64_t PA = hex_tlb_phys_addr(entry); + uint64_t VA = hex_tlb_virt_addr(entry); + qemu_printf( + "V:%" PRId64 " G:%" PRId64 " A1:%" PRId64 " A0:%" PRId64, + GET_TLB_FIELD(entry, PTE_V), GET_TLB_FIELD(entry, PTE_G), + GET_TLB_FIELD(entry, PTE_ATR1), GET_TLB_FIELD(entry, PTE_ATR0)); + qemu_printf(" ASID:0x%02" PRIx64 " VA:0x%08" PRIx64, + GET_TLB_FIELD(entry, PTE_ASID), VA); + qemu_printf( + " X:%" PRId64 " W:%" PRId64 " R:%" PRId64 " U:%" PRId64 + " C:%" PRId64, + GET_TLB_FIELD(entry, PTE_X), GET_TLB_FIELD(entry, PTE_W), + GET_TLB_FIELD(entry, PTE_R), GET_TLB_FIELD(entry, PTE_U), + GET_TLB_FIELD(entry, PTE_C)); + qemu_printf(" PA:0x%09" PRIx64 " SZ:%s (0x%" PRIx64 ")", PA, + pgsize_str[hex_tlb_pgsize_type(entry)], + hex_tlb_page_size_bytes(entry)); + qemu_printf("\n"); + } + } +} + +static inline void hex_log_tlbw(uint32_t index, uint64_t entry) +{ + if (qemu_loglevel_mask(CPU_LOG_MMU)) { + if (qemu_log_enabled()) { + FILE *logfile = qemu_log_trylock(); + if (logfile) { + fprintf(logfile, "tlbw[%03d]: ", index); + if (!hex_dump_mmu_entry(logfile, entry)) { + fprintf(logfile, "invalid\n"); + } + qemu_log_unlock(logfile); + } + } + } +} + +void hex_tlbw(CPUHexagonState *env, uint32_t index, uint64_t value) +{ + uint32_t myidx = fTLB_NONPOW2WRAP(fTLB_IDXMASK(index)); + bool old_entry_valid = GET_TLB_FIELD(env->hex_tlb->entries[myidx], PTE_V); + if (old_entry_valid && hexagon_cpu_mmu_enabled(env)) { + CPUState *cs = env_cpu(env); + + tlb_flush(cs); + } + env->hex_tlb->entries[myidx] = (value); + hex_log_tlbw(myidx, value); +} + +void hex_mmu_realize(CPUHexagonState *env) +{ + CPUState *cs = env_cpu(env); + if (cs->cpu_index == 0) { + env->hex_tlb = g_malloc0(sizeof(CPUHexagonTLBContext)); + } else { + CPUState *cpu0_s = NULL; + CPUHexagonState *env0 = NULL; + CPU_FOREACH(cpu0_s) { + assert(cpu0_s->cpu_index == 0); + env0 = &(HEXAGON_CPU(cpu0_s)->env); + break; + } + env->hex_tlb = env0->hex_tlb; + } +} + +void hex_mmu_on(CPUHexagonState *env) +{ + CPUState *cs = env_cpu(env); + qemu_log_mask(CPU_LOG_MMU, "Hexagon MMU turned on!\n"); + tlb_flush(cs); +} + +void hex_mmu_off(CPUHexagonState *env) +{ + CPUState *cs = env_cpu(env); + qemu_log_mask(CPU_LOG_MMU, "Hexagon MMU turned off!\n"); + tlb_flush(cs); +} + +void hex_mmu_mode_change(CPUHexagonState *env) +{ + qemu_log_mask(CPU_LOG_MMU, "Hexagon mode change!\n"); + CPUState *cs = env_cpu(env); + tlb_flush(cs); +} + +static inline bool hex_tlb_entry_match_noperm(uint64_t entry, uint32_t asid, + uint64_t VA) +{ + if (GET_TLB_FIELD(entry, PTE_V)) { + if (GET_TLB_FIELD(entry, PTE_G)) { + /* Global entry - ingnore ASID */ + } else if (asid != NO_ASID) { + uint32_t tlb_asid = GET_TLB_FIELD(entry, PTE_ASID); + if (tlb_asid != asid) { + return false; + } + } + + uint64_t page_size = hex_tlb_page_size_bytes(entry); + uint64_t page_start = + ROUND_DOWN(hex_tlb_virt_addr(entry), page_size); + if (page_start <= VA && VA < page_start + page_size) { + return true; + } + } + return false; +} + +static inline void hex_tlb_entry_get_perm(CPUHexagonState *env, uint64_t entry, + MMUAccessType access_type, + int mmu_idx, int *prot, + int32_t *excp) +{ + bool perm_x = GET_TLB_FIELD(entry, PTE_X); + bool perm_w = GET_TLB_FIELD(entry, PTE_W); + bool perm_r = GET_TLB_FIELD(entry, PTE_R); + bool perm_u = GET_TLB_FIELD(entry, PTE_U); + bool user_idx = mmu_idx == MMU_USER_IDX; + + if (mmu_idx == MMU_KERNEL_IDX) { + *prot = PAGE_VALID | PAGE_READ | PAGE_WRITE | PAGE_EXEC; + return; + } + + *prot = PAGE_VALID; + switch (access_type) { + case MMU_INST_FETCH: + if (user_idx && !perm_u) { + *excp = HEX_EVENT_PRECISE; + env->cause_code = HEX_CAUSE_FETCH_NO_UPAGE; + } else if (!perm_x) { + *excp = HEX_EVENT_PRECISE; + env->cause_code = HEX_CAUSE_FETCH_NO_XPAGE; + } + break; + case MMU_DATA_LOAD: + if (user_idx && !perm_u) { + *excp = HEX_EVENT_PRECISE; + env->cause_code = HEX_CAUSE_PRIV_NO_UREAD; + } else if (!perm_r) { + *excp = HEX_EVENT_PRECISE; + env->cause_code = HEX_CAUSE_PRIV_NO_READ; + } + break; + case MMU_DATA_STORE: + if (user_idx && !perm_u) { + *excp = HEX_EVENT_PRECISE; + env->cause_code = HEX_CAUSE_PRIV_NO_UWRITE; + } else if (!perm_w) { + *excp = HEX_EVENT_PRECISE; + env->cause_code = HEX_CAUSE_PRIV_NO_WRITE; + } + break; + } + + if (!user_idx || perm_u) { + if (perm_x) { + *prot |= PAGE_EXEC; + } + if (perm_r) { + *prot |= PAGE_READ; + } + if (perm_w) { + *prot |= PAGE_WRITE; + } + } +} + +static inline bool hex_tlb_entry_match(CPUHexagonState *env, uint64_t entry, + uint8_t asid, target_ulong VA, + MMUAccessType access_type, hwaddr *PA, + int *prot, int *size, int32_t *excp, + int mmu_idx) +{ + if (hex_tlb_entry_match_noperm(entry, asid, VA)) { + hex_tlb_entry_get_perm(env, entry, access_type, mmu_idx, prot, excp); + *PA = hex_tlb_phys_addr(entry); + *size = hex_tlb_page_size_bytes(entry); + return true; + } + return false; +} + +bool hex_tlb_find_match(CPUHexagonState *env, target_ulong VA, + MMUAccessType access_type, hwaddr *PA, int *prot, + int *size, int32_t *excp, int mmu_idx) +{ + *PA = 0; + *prot = 0; + *size = 0; + *excp = 0; + uint32_t ssr = arch_get_system_reg(env, HEX_SREG_SSR); + uint8_t asid = GET_SSR_FIELD(SSR_ASID, ssr); + int i; + HexagonCPU *cpu = env_archcpu(env); + for (i = 0; i < cpu->num_tlbs; i++) { + uint64_t entry = env->hex_tlb->entries[i]; + if (hex_tlb_entry_match(env, entry, asid, VA, access_type, PA, prot, + size, excp, mmu_idx)) { + return true; + } + } + return false; +} + +static uint32_t hex_tlb_lookup_by_asid(CPUHexagonState *env, uint32_t asid, + uint32_t VA) +{ + uint32_t not_found = 0x80000000; + uint32_t idx = not_found; + int i; + + HexagonCPU *cpu = env_archcpu(env); + for (i = 0; i < cpu->num_tlbs; i++) { + uint64_t entry = env->hex_tlb->entries[i]; + if (hex_tlb_entry_match_noperm(entry, asid, VA)) { + if (idx != not_found) { + env->cause_code = HEX_CAUSE_IMPRECISE_MULTI_TLB_MATCH; + break; + } + idx = i; + } + } + + if (idx == not_found) { + qemu_log_mask(CPU_LOG_MMU, "%s: 0x%x, 0x%08x => NOT FOUND\n", + __func__, asid, VA); + } else { + qemu_log_mask(CPU_LOG_MMU, "%s: 0x%x, 0x%08x => %d\n", + __func__, asid, VA, idx); + } + + return idx; +} + +/* Called from tlbp instruction */ +uint32_t hex_tlb_lookup(CPUHexagonState *env, uint32_t ssr, uint32_t VA) +{ + return hex_tlb_lookup_by_asid(env, GET_SSR_FIELD(SSR_ASID, ssr), VA); +} + +static bool hex_tlb_is_match(CPUHexagonState *env, + uint64_t entry1, uint64_t entry2, + bool consider_gbit) +{ + bool valid1 = GET_TLB_FIELD(entry1, PTE_V); + bool valid2 = GET_TLB_FIELD(entry2, PTE_V); + uint64_t size1 = hex_tlb_page_size_bytes(entry1); + uint64_t vaddr1 = ROUND_DOWN(hex_tlb_virt_addr(entry1), size1); + uint64_t size2 = hex_tlb_page_size_bytes(entry2); + uint64_t vaddr2 = ROUND_DOWN(hex_tlb_virt_addr(entry2), size2); + int asid1 = GET_TLB_FIELD(entry1, PTE_ASID); + int asid2 = GET_TLB_FIELD(entry2, PTE_ASID); + bool gbit1 = GET_TLB_FIELD(entry1, PTE_G); + bool gbit2 = GET_TLB_FIELD(entry2, PTE_G); + + if (!valid1 || !valid2) { + return false; + } + + if (((vaddr1 <= vaddr2) && (vaddr2 < (vaddr1 + size1))) || + ((vaddr2 <= vaddr1) && (vaddr1 < (vaddr2 + size2)))) { + if (asid1 == asid2) { + return true; + } + if ((consider_gbit && gbit1) || gbit2) { + return true; + } + } + return false; +} + +/* + * Return codes: + * 0 or positive index of match + * -1 multiple matches + * -2 no match + */ +int hex_tlb_check_overlap(CPUHexagonState *env, uint64_t entry, uint64_t index) +{ + int matches = 0; + int last_match = 0; + int i; + + HexagonCPU *cpu = env_archcpu(env); + for (i = 0; i < cpu->num_tlbs; i++) { + if (hex_tlb_is_match(env, entry, env->hex_tlb->entries[i], false)) { + matches++; + last_match = i; + } + } + + if (matches == 1) { + return last_match; + } + if (matches == 0) { + return -2; + } + return -1; +} + +static inline void print_thread(const char *str, CPUState *cs) +{ + g_assert(bql_locked()); + CPUHexagonState *thread = cpu_env(cs); + bool is_stopped = cpu_is_stopped(cs); + int exe_mode = get_exe_mode(thread); + hex_lock_state_t lock_state = thread->tlb_lock_state; + qemu_log_mask(CPU_LOG_MMU, + "%s: threadId = %d: %s, exe_mode = %s, tlb_lock_state = %s\n", + str, + thread->threadId, + is_stopped ? "stopped" : "running", + exe_mode == HEX_EXE_MODE_OFF ? "off" : + exe_mode == HEX_EXE_MODE_RUN ? "run" : + exe_mode == HEX_EXE_MODE_WAIT ? "wait" : + exe_mode == HEX_EXE_MODE_DEBUG ? "debug" : + "unknown", + lock_state == HEX_LOCK_UNLOCKED ? "unlocked" : + lock_state == HEX_LOCK_WAITING ? "waiting" : + lock_state == HEX_LOCK_OWNER ? "owner" : + "unknown"); +} + +static inline void print_thread_states(const char *str) +{ + CPUState *cs; + CPU_FOREACH(cs) { + print_thread(str, cs); + } +} + +void hex_tlb_lock(CPUHexagonState *env) +{ + qemu_log_mask(CPU_LOG_MMU, "hex_tlb_lock: %d\n", env->threadId); + BQL_LOCK_GUARD(); + g_assert((env->tlb_lock_count == 0) || (env->tlb_lock_count == 1)); + + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + uint8_t tlb_lock = GET_SYSCFG_FIELD(SYSCFG_TLBLOCK, syscfg); + if (tlb_lock) { + if (env->tlb_lock_state == HEX_LOCK_QUEUED) { + env->next_PC += 4; + env->tlb_lock_count++; + env->tlb_lock_state = HEX_LOCK_OWNER; + SET_SYSCFG_FIELD(env, SYSCFG_TLBLOCK, 1); + return; + } + if (env->tlb_lock_state == HEX_LOCK_OWNER) { + qemu_log_mask(CPU_LOG_MMU | LOG_GUEST_ERROR, + "Double tlblock at PC: 0x%x, thread may hang\n", + env->next_PC); + env->next_PC += 4; + CPUState *cs = env_cpu(env); + cpu_interrupt(cs, CPU_INTERRUPT_HALT); + return; + } + env->tlb_lock_state = HEX_LOCK_WAITING; + CPUState *cs = env_cpu(env); + cpu_interrupt(cs, CPU_INTERRUPT_HALT); + } else { + env->next_PC += 4; + env->tlb_lock_count++; + env->tlb_lock_state = HEX_LOCK_OWNER; + SET_SYSCFG_FIELD(env, SYSCFG_TLBLOCK, 1); + } + + if (qemu_loglevel_mask(CPU_LOG_MMU)) { + qemu_log_mask(CPU_LOG_MMU, "Threads after hex_tlb_lock:\n"); + print_thread_states("\tThread"); + } +} + +void hex_tlb_unlock(CPUHexagonState *env) +{ + BQL_LOCK_GUARD(); + g_assert((env->tlb_lock_count == 0) || (env->tlb_lock_count == 1)); + + /* Nothing to do if the TLB isn't locked by this thread */ + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + uint8_t tlb_lock = GET_SYSCFG_FIELD(SYSCFG_TLBLOCK, syscfg); + if ((tlb_lock == 0) || + (env->tlb_lock_state != HEX_LOCK_OWNER)) { + qemu_log_mask(LOG_GUEST_ERROR, + "thread %d attempted to tlbunlock without having the " + "lock, tlb_lock state = %d\n", + env->threadId, env->tlb_lock_state); + g_assert(env->tlb_lock_state != HEX_LOCK_WAITING); + return; + } + + env->tlb_lock_count--; + env->tlb_lock_state = HEX_LOCK_UNLOCKED; + SET_SYSCFG_FIELD(env, SYSCFG_TLBLOCK, 0); + + /* Look for a thread to unlock */ + unsigned int this_threadId = env->threadId; + CPUHexagonState *unlock_thread = NULL; + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *thread = cpu_env(cs); + + /* + * The hardware implements round-robin fairness, so we look for threads + * starting at env->threadId + 1 and incrementing modulo the number of + * threads. + * + * To implement this, we check if thread is a earlier in the modulo + * sequence than unlock_thread. + * if unlock thread is higher than this thread + * thread must be between this thread and unlock_thread + * else + * thread higher than this thread is ahead of unlock_thread + * thread must be lower then unlock thread + */ + if (thread->tlb_lock_state == HEX_LOCK_WAITING) { + if (!unlock_thread) { + unlock_thread = thread; + } else if (unlock_thread->threadId > this_threadId) { + if (this_threadId < thread->threadId && + thread->threadId < unlock_thread->threadId) { + unlock_thread = thread; + } + } else { + if (thread->threadId > this_threadId) { + unlock_thread = thread; + } + if (thread->threadId < unlock_thread->threadId) { + unlock_thread = thread; + } + } + } + } + if (unlock_thread) { + cs = env_cpu(unlock_thread); + print_thread("\tWaiting thread found", cs); + unlock_thread->tlb_lock_state = HEX_LOCK_QUEUED; + SET_SYSCFG_FIELD(unlock_thread, SYSCFG_TLBLOCK, 1); + cpu_interrupt(cs, CPU_INTERRUPT_TLB_UNLOCK); + } + + if (qemu_loglevel_mask(CPU_LOG_MMU)) { + qemu_log_mask(CPU_LOG_MMU, "Threads after hex_tlb_unlock:\n"); + print_thread_states("\tThread"); + } + +} + diff --git a/target/hexagon/hex_mmu.h b/target/hexagon/hex_mmu.h new file mode 100644 index 000000000000..fae8aefcac1d --- /dev/null +++ b/target/hexagon/hex_mmu.h @@ -0,0 +1,30 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HEXAGON_MMU_H +#define HEXAGON_MMU_H + +#include "max.h" + +struct CPUHexagonTLBContext { + uint64_t entries[MAX_TLB_ENTRIES]; +}; + +extern void hex_tlbw(CPUHexagonState *env, uint32_t index, uint64_t value); +extern uint32_t hex_tlb_lookup(CPUHexagonState *env, uint32_t ssr, uint32_t VA); +extern void hex_mmu_realize(CPUHexagonState *env); +extern void hex_mmu_on(CPUHexagonState *env); +extern void hex_mmu_off(CPUHexagonState *env); +extern void hex_mmu_mode_change(CPUHexagonState *env); +extern bool hex_tlb_find_match(CPUHexagonState *env, target_ulong VA, + MMUAccessType access_type, hwaddr *PA, int *prot, + int *size, int32_t *excp, int mmu_idx); +extern int hex_tlb_check_overlap(CPUHexagonState *env, uint64_t entry, + uint64_t index); +extern void hex_tlb_lock(CPUHexagonState *env); +extern void hex_tlb_unlock(CPUHexagonState *env); +void dump_mmu(CPUHexagonState *env); +#endif diff --git a/target/hexagon/hex_regs.h b/target/hexagon/hex_regs.h index bddfc28021c6..ea8c62eba9ce 100644 --- a/target/hexagon/hex_regs.h +++ b/target/hexagon/hex_regs.h @@ -81,4 +81,119 @@ enum { HEX_REG_UTIMERHI = 63, }; +#ifndef CONFIG_USER_ONLY + +#define HEX_GREG_VALUES \ + DECL_HEX_GREG(G0, 0) \ + DECL_HEX_GREG(GELR, 0) \ + DECL_HEX_GREG(G1, 1) \ + DECL_HEX_GREG(GSR, 1) \ + DECL_HEX_GREG(G2, 2) \ + DECL_HEX_GREG(GOSP, 2) \ + DECL_HEX_GREG(G3, 3) \ + DECL_HEX_GREG(GBADVA, 3) \ + DECL_HEX_GREG(GCYCLE_1T, 10) \ + DECL_HEX_GREG(GCYCLE_2T, 11) \ + DECL_HEX_GREG(GCYCLE_3T, 12) \ + DECL_HEX_GREG(GCYCLE_4T, 13) \ + DECL_HEX_GREG(GCYCLE_5T, 14) \ + DECL_HEX_GREG(GCYCLE_6T, 15) \ + DECL_HEX_GREG(GPMUCNT4, 16) \ + DECL_HEX_GREG(GPMUCNT5, 17) \ + DECL_HEX_GREG(GPMUCNT6, 18) \ + DECL_HEX_GREG(GPMUCNT7, 19) \ + DECL_HEX_GREG(GPCYCLELO, 24) \ + DECL_HEX_GREG(GPCYCLEHI, 25) \ + DECL_HEX_GREG(GPMUCNT0, 26) \ + DECL_HEX_GREG(GPMUCNT1, 27) \ + DECL_HEX_GREG(GPMUCNT2, 28) \ + DECL_HEX_GREG(GPMUCNT3, 29) \ + DECL_HEX_GREG_DONE + +#define DECL_HEX_GREG_DONE +#define DECL_HEX_GREG(name, val) HEX_GREG_ ##name = val, +enum hex_greg { + HEX_GREG_VALUES +}; +#undef DECL_HEX_GREG +#undef DECL_HEX_GREG_DONE + +#define DECL_HEX_GREG_DONE 0 +#define DECL_HEX_GREG(_, val) (1 << val) | +static inline bool greg_implemented(enum hex_greg greg) +{ +#if NUM_GREGS > 32 +#error "NUM_GREGS too large for greg_implemented(): update `impl_bitmap`" +#endif + static int32_t impl_bitmap = HEX_GREG_VALUES; + return impl_bitmap & (1 << greg); +} +#undef DECL_HEX_GREG +#undef DECL_HEX_GREG_DONE + +#endif /* CONFIG_USER_ONLY */ + +enum { + HEX_SREG_SGP0 = 0, + HEX_SREG_SGP1 = 1, + HEX_SREG_STID = 2, + HEX_SREG_ELR = 3, + HEX_SREG_BADVA0 = 4, + HEX_SREG_BADVA1 = 5, + HEX_SREG_SSR = 6, + HEX_SREG_CCR = 7, + HEX_SREG_HTID = 8, + HEX_SREG_BADVA = 9, + HEX_SREG_IMASK = 10, + HEX_SREG_GEVB = 11, + HEX_SREG_GLB_START = 16, + HEX_SREG_EVB = 16, + HEX_SREG_MODECTL = 17, + HEX_SREG_SYSCFG = 18, + HEX_SREG_IPENDAD = 20, + HEX_SREG_VID = 21, + HEX_SREG_VID1 = 22, + HEX_SREG_BESTWAIT = 23, + HEX_SREG_IEL = 24, + HEX_SREG_SCHEDCFG = 25, + HEX_SREG_IAHL = 26, + HEX_SREG_CFGBASE = 27, + HEX_SREG_DIAG = 28, + HEX_SREG_REV = 29, + HEX_SREG_PCYCLELO = 30, + HEX_SREG_PCYCLEHI = 31, + HEX_SREG_ISDBST = 32, + HEX_SREG_ISDBCFG0 = 33, + HEX_SREG_ISDBCFG1 = 34, + HEX_SREG_LIVELOCK = 35, + HEX_SREG_BRKPTPC0 = 36, + HEX_SREG_BRKPTCFG0 = 37, + HEX_SREG_BRKPTPC1 = 38, + HEX_SREG_BRKPTCFG1 = 39, + HEX_SREG_ISDBMBXIN = 40, + HEX_SREG_ISDBMBXOUT = 41, + HEX_SREG_ISDBEN = 42, + HEX_SREG_ISDBGPR = 43, + HEX_SREG_PMUCNT4 = 44, + HEX_SREG_PMUCNT5 = 45, + HEX_SREG_PMUCNT6 = 46, + HEX_SREG_PMUCNT7 = 47, + HEX_SREG_PMUCNT0 = 48, + HEX_SREG_PMUCNT1 = 49, + HEX_SREG_PMUCNT2 = 50, + HEX_SREG_PMUCNT3 = 51, + HEX_SREG_PMUEVTCFG = 52, + HEX_SREG_PMUSTID0 = 53, + HEX_SREG_PMUEVTCFG1 = 54, + HEX_SREG_PMUSTID1 = 55, + HEX_SREG_TIMERLO = 56, + HEX_SREG_TIMERHI = 57, + HEX_SREG_PMUCFG = 58, + HEX_SREG_S59 = 59, + HEX_SREG_S60 = 60, + HEX_SREG_S61 = 61, + HEX_SREG_S62 = 62, + HEX_SREG_S63 = 63, +}; + #endif diff --git a/target/hexagon/hexswi.c b/target/hexagon/hexswi.c new file mode 100644 index 000000000000..a08d7f68917c --- /dev/null +++ b/target/hexagon/hexswi.c @@ -0,0 +1,728 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#ifdef CONFIG_USER_ONLY +#include "exec/helper-proto.h" +#include "qemu.h" +#endif +#include "exec/cpu_ldst.h" +#include "exec/exec-all.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" +#include "arch.h" +#include "internal.h" +#include "macros.h" +#include "sys_macros.h" +#include "tcg/tcg-op.h" +#ifndef CONFIG_USER_ONLY +#include "hex_mmu.h" +#include "hexswi.h" +#include "semihosting/common-semi.h" +#include "semihosting/syscalls.h" +#include "semihosting/guestfd.h" +#endif + +#ifndef CONFIG_USER_ONLY + +/* non-arm-compatible semihosting calls */ +#define HEXAGON_SPECIFIC_SWI_FLAGS \ + DEF_SWI_FLAG(EXCEPTION, 0x18) \ + DEF_SWI_FLAG(READ_CYCLES, 0x40) \ + DEF_SWI_FLAG(PROF_ON, 0x41) \ + DEF_SWI_FLAG(PROF_OFF, 0x42) \ + DEF_SWI_FLAG(WRITECREG, 0x43) \ + DEF_SWI_FLAG(READ_TCYCLES, 0x44) \ + DEF_SWI_FLAG(READ_ICOUNT, 0x47) \ + DEF_SWI_FLAG(PROF_STATSRESET, 0x48) \ + DEF_SWI_FLAG(DUMP_PMU_STATS, 0x4a) \ + DEF_SWI_FLAG(READ_PCYCLES, 0x52) \ + DEF_SWI_FLAG(COREDUMP, 0xCD) \ + DEF_SWI_FLAG(FTELL, 0x100) \ + DEF_SWI_FLAG(FSTAT, 0x101) \ + DEF_SWI_FLAG(STAT, 0x103) \ + DEF_SWI_FLAG(GETCWD, 0x104) \ + DEF_SWI_FLAG(ACCESS, 0x105) \ + DEF_SWI_FLAG(OPENDIR, 0x180) \ + DEF_SWI_FLAG(CLOSEDIR, 0x181) \ + DEF_SWI_FLAG(READDIR, 0x182) \ + DEF_SWI_FLAG(EXEC, 0x185) \ + DEF_SWI_FLAG(FTRUNC, 0x186) + +#define DEF_SWI_FLAG(name, val) HEX_SYS_ ##name = val, +enum hex_swi_flag { + HEXAGON_SPECIFIC_SWI_FLAGS +}; +#undef DEF_SWI_FLAG + +#define DEF_SWI_FLAG(_, val) case val: +static inline bool is_hexagon_specific_swi_flag(enum hex_swi_flag what_swi) +{ + switch (what_swi) { + HEXAGON_SPECIFIC_SWI_FLAGS + return true; + } + return false; +} +#undef DEF_SWI_FLAG + +/* We start from 1 as 0 is used to signal an error from opendir() */ +static const int DIR_INDEX_OFFSET = 1; + +static void common_semi_ftell_cb(CPUState *cs, uint64_t ret, int err) +{ + if (err) { + ret = -1; + } + common_semi_cb(cs, ret, err); +} + +static void coredump(CPUHexagonState *env) +{ + uint32_t ssr = arch_get_system_reg(env, HEX_SREG_SSR); + printf("CRASH!\n"); + printf("I think the exception was: "); + switch (GET_SSR_FIELD(SSR_CAUSE, ssr)) { + case 0x43: + printf("0x43, NMI"); + break; + case 0x42: + printf("0x42, Data abort"); + break; + case 0x44: + printf("0x44, Multi TLB match"); + break; + case HEX_CAUSE_BIU_PRECISE: + printf("0x%x, Bus Error (Precise BIU error)", + HEX_CAUSE_BIU_PRECISE); + break; + case HEX_CAUSE_DOUBLE_EXCEPT: + printf("0x%x, Exception observed when EX = 1 (double exception)", + HEX_CAUSE_DOUBLE_EXCEPT); + break; + case HEX_CAUSE_FETCH_NO_XPAGE: + printf("0x%x, Privilege violation: User/Guest mode execute" + " to page with no execute permissions", + HEX_CAUSE_FETCH_NO_XPAGE); + break; + case HEX_CAUSE_FETCH_NO_UPAGE: + printf("0x%x, Privilege violation: " + "User mode exececute to page with no user permissions", + HEX_CAUSE_FETCH_NO_UPAGE); + break; + case HEX_CAUSE_INVALID_PACKET: + printf("0x%x, Invalid packet", + HEX_CAUSE_INVALID_PACKET); + break; + case HEX_CAUSE_PRIV_USER_NO_GINSN: + printf("0x%x, Privilege violation: guest mode insn in user mode", + HEX_CAUSE_PRIV_USER_NO_GINSN); + break; + case HEX_CAUSE_PRIV_USER_NO_SINSN: + printf("0x%x, Privilege violation: " + "monitor mode insn ins user/guest mode", + HEX_CAUSE_PRIV_USER_NO_SINSN); + break; + case HEX_CAUSE_REG_WRITE_CONFLICT: + printf("0x%x, Multiple writes to same register", + HEX_CAUSE_REG_WRITE_CONFLICT); + break; + case HEX_CAUSE_PC_NOT_ALIGNED: + printf("0x%x, PC not aligned", + HEX_CAUSE_PC_NOT_ALIGNED); + break; + case HEX_CAUSE_MISALIGNED_LOAD: + printf("0x%x, Misaligned Load @ 0x%x", + HEX_CAUSE_MISALIGNED_LOAD, + arch_get_system_reg(env, HEX_SREG_BADVA)); + break; + case HEX_CAUSE_MISALIGNED_STORE: + printf("0x%x, Misaligned Store @ 0x%x", + HEX_CAUSE_MISALIGNED_STORE, + arch_get_system_reg(env, HEX_SREG_BADVA)); + break; + case HEX_CAUSE_PRIV_NO_READ: + printf("0x%x, Privilege violation: " + "user/guest read permission @ 0x%x", + HEX_CAUSE_PRIV_NO_READ, + arch_get_system_reg(env, HEX_SREG_BADVA)); + break; + case HEX_CAUSE_PRIV_NO_WRITE: + printf("0x%x, Privilege violation: " + "user/guest write permission @ 0x%x", + HEX_CAUSE_PRIV_NO_WRITE, + arch_get_system_reg(env, HEX_SREG_BADVA)); + break; + case HEX_CAUSE_PRIV_NO_UREAD: + printf("0x%x, Privilege violation: user read permission @ 0x%x", + HEX_CAUSE_PRIV_NO_UREAD, + arch_get_system_reg(env, HEX_SREG_BADVA)); + break; + case HEX_CAUSE_PRIV_NO_UWRITE: + printf("0x%x, Privilege violation: user write permission @ 0x%x", + HEX_CAUSE_PRIV_NO_UWRITE, + arch_get_system_reg(env, HEX_SREG_BADVA)); + break; + case HEX_CAUSE_COPROC_LDST: + printf("0x%x, Coprocessor VMEM address error. @ 0x%x", + HEX_CAUSE_COPROC_LDST, + arch_get_system_reg(env, HEX_SREG_BADVA)); + break; + case HEX_CAUSE_STACK_LIMIT: + printf("0x%x, Stack limit check error", HEX_CAUSE_STACK_LIMIT); + break; + case HEX_CAUSE_FPTRAP_CAUSE_BADFLOAT: + printf("0x%X, Floating-Point: Execution of Floating-Point " + "instruction resulted in exception", + HEX_CAUSE_FPTRAP_CAUSE_BADFLOAT); + break; + case HEX_CAUSE_NO_COPROC_ENABLE: + printf("0x%x, Illegal Execution of Coprocessor Instruction", + HEX_CAUSE_NO_COPROC_ENABLE); + break; + case HEX_CAUSE_NO_COPROC2_ENABLE: + printf("0x%x, " + "Illegal Execution of Secondary Coprocessor Instruction", + HEX_CAUSE_NO_COPROC2_ENABLE); + break; + case HEX_CAUSE_UNSUPORTED_HVX_64B: + printf("0x%x, " + "Unsuported Execution of Coprocessor Instruction with 64bits Mode On", + HEX_CAUSE_UNSUPORTED_HVX_64B); + break; + case HEX_CAUSE_VWCTRL_WINDOW_MISS: + printf("0x%x, " + "Thread accessing a region outside VWCTRL window", + HEX_CAUSE_VWCTRL_WINDOW_MISS); + break; + default: + printf("Don't know"); + break; + } + printf("\nRegister Dump:\n"); + hexagon_dump(env, stdout, 0); +} + +static void sim_handle_trap0(CPUHexagonState *env) +{ + g_assert(bql_locked()); + target_ulong what_swi = arch_get_thread_reg(env, HEX_REG_R00); + target_ulong swi_info = arch_get_thread_reg(env, HEX_REG_R01); + uintptr_t retaddr = 0; + CPUState *cs = env_cpu(env); + + if (!is_hexagon_specific_swi_flag(what_swi)) { + do_common_semihosting(cs); + return; + } + + switch (what_swi) { + + case HEX_SYS_EXCEPTION: + arch_set_system_reg(env, HEX_SREG_MODECTL, 0); + exit(arch_get_thread_reg(env, HEX_REG_R02)); + break; + + case HEX_SYS_WRITECREG: + fprintf(stdout, "%c", swi_info); + fflush(stdout); + common_semi_cb(cs, 0, 0); + break; + + case HEX_SYS_STAT: + case HEX_SYS_FSTAT: + { + /* + * This must match the caller's definition, it would be in the + * caller's angel.h or equivalent header. + */ + struct __SYS_STAT { + uint64_t dev; + uint64_t ino; + uint32_t mode; + uint32_t nlink; + uint64_t rdev; + uint32_t size; + uint32_t __pad1; + uint32_t atime; + uint32_t mtime; + uint32_t ctime; + uint32_t __pad2; + } sys_stat; + struct stat st_buf; + uint8_t *st_bufptr = (uint8_t *)&sys_stat; + int rc, err = 0; + char filename[BUFSIZ]; + target_ulong physicalFilenameAddr; + target_ulong statBufferAddr; + hexagon_read_memory(env, swi_info, 4, &physicalFilenameAddr, retaddr); + + if (what_swi == HEX_SYS_STAT) { + int i = 0; + do { + hexagon_read_memory(env, physicalFilenameAddr + i, 1, + &filename[i], retaddr); + i++; + } while ((i < BUFSIZ) && filename[i - 1]); + rc = stat(filename, &st_buf); + err = errno; + } else{ + int fd = physicalFilenameAddr; + GuestFD *gf = get_guestfd(fd); + if (gf->type != GuestFDHost) { + fprintf(stderr, "fstat semihosting only implemented for native mode.\n"); + g_assert_not_reached(); + } + rc = fstat(gf->hostfd, &st_buf); + err = errno; + } + if (rc == 0) { + sys_stat.dev = st_buf.st_dev; + sys_stat.ino = st_buf.st_ino; + sys_stat.mode = st_buf.st_mode; + sys_stat.nlink = (uint32_t) st_buf.st_nlink; + sys_stat.rdev = st_buf.st_rdev; + sys_stat.size = (uint32_t) st_buf.st_size; +#if defined(__linux__) + sys_stat.atime = (uint32_t) st_buf.st_atim.tv_sec; + sys_stat.mtime = (uint32_t) st_buf.st_mtim.tv_sec; + sys_stat.ctime = (uint32_t) st_buf.st_ctim.tv_sec; +#elif defined(_WIN32) + sys_stat.atime = st_buf.st_atime; + sys_stat.mtime = st_buf.st_mtime; + sys_stat.ctime = st_buf.st_ctime; +#endif + } + hexagon_read_memory(env, swi_info + 4, 4, &statBufferAddr, retaddr); + + for (int i = 0; i < sizeof(sys_stat); i++) { + hexagon_write_memory(env, statBufferAddr + i, 1, st_bufptr[i], + retaddr); + } + common_semi_cb(cs, rc, err); + } + break; + + case HEX_SYS_FTRUNC: + { + int fd; + off_t size_limit; + hexagon_read_memory(env, swi_info, 4, &fd, retaddr); + hexagon_read_memory(env, swi_info + 4, 8, &size_limit, retaddr); + semihost_sys_ftruncate(cs, common_semi_cb, fd, size_limit); + } + break; + + case HEX_SYS_ACCESS: + { + char filename[BUFSIZ]; + uint32_t FileNameAddr; + uint32_t BufferMode; + int rc, err = 0; + + int i = 0; + + hexagon_read_memory(env, swi_info, 4, &FileNameAddr, retaddr); + do { + hexagon_read_memory(env, FileNameAddr + i, 1, &filename[i], retaddr); + i++; + } while ((i < BUFSIZ) && (filename[i - 1])); + filename[i] = 0; + + hexagon_read_memory(env, swi_info + 4, 4, &BufferMode, retaddr); + + rc = access(filename, BufferMode); + if (rc != 0) { + err = errno; + } + common_semi_cb(cs, rc, err); + } + break; + + case HEX_SYS_GETCWD: + { + char cwdPtr[PATH_MAX]; + uint32_t BufferAddr; + uint32_t BufferSize; + uint32_t rc = 0, err = 0; + + hexagon_read_memory(env, swi_info, 4, &BufferAddr, retaddr); + hexagon_read_memory(env, swi_info + 4, 4, &BufferSize, retaddr); + + if (!getcwd(cwdPtr, PATH_MAX)) { + err = errno; + } else { + size_t cwd_size = strlen(cwdPtr); + if (cwd_size > BufferSize) { + err = ERANGE; + } else { + for (int i = 0; i < cwd_size; i++) { + hexagon_write_memory(env, BufferAddr + i, 1, + (uint64_t)cwdPtr[i], retaddr); + } + rc = BufferAddr; + } + } + common_semi_cb(cs, rc, err); + break; + } + + case HEX_SYS_EXEC: + { + qemu_log_mask(LOG_UNIMP, "SYS_EXEC is deprecated\n"); + common_semi_cb(cs, -1, ENOSYS); + } + break; + + case HEX_SYS_OPENDIR: + { + DIR *dir; + char buf[BUFSIZ]; + int rc = 0, err = 0; + + int i = 0; + do { + hexagon_read_memory(env, swi_info + i, 1, &buf[i], retaddr); + i++; + } while (buf[i - 1]); + + dir = opendir(buf); + if (dir != NULL) { + env->dir_list = g_list_append(env->dir_list, dir); + rc = g_list_index(env->dir_list, dir) + DIR_INDEX_OFFSET; + } else { + err = errno; + } + common_semi_cb(cs, rc, err); + break; + } + + case HEX_SYS_READDIR: + { + struct dirent *host_dir_entry = NULL; + int dir_index = swi_info - DIR_INDEX_OFFSET; + DIR *dir = g_list_nth_data(env->dir_list, dir_index); + uint32_t rc = 0, err = 0; + + if (dir) { + errno = 0; + host_dir_entry = readdir(dir); + if (host_dir_entry == NULL) { + err = errno; + } + } else { + err = EBADF; + } + + if (host_dir_entry) { + uint32_t guest_dir_entry = arch_get_thread_reg(env, HEX_REG_R02); + hexagon_write_memory(env, guest_dir_entry, 4, host_dir_entry->d_ino, + retaddr); + for (int i = 0; i < sizeof(host_dir_entry->d_name); i++) { + hexagon_write_memory(env, guest_dir_entry + 4 + i, 1, + host_dir_entry->d_name[i], retaddr); + if (!host_dir_entry->d_name[i]) { + break; + } + } + rc = guest_dir_entry; + } + common_semi_cb(cs, rc, err); + break; + } + + case HEX_SYS_CLOSEDIR: + { + DIR *dir; + int ret = 0, err = 0; + + dir = g_list_nth_data(env->dir_list, swi_info); + if (dir != NULL) { + ret = closedir(dir); + if (ret != 0) { + err = errno; + } + } else { + err = EBADF; + } + common_semi_cb(cs, ret, err); + break; + } + + case HEX_SYS_COREDUMP: + coredump(env); + break; + + case HEX_SYS_FTELL: + { + int fd; + hexagon_read_memory(env, swi_info, 4, &fd, retaddr); + semihost_sys_lseek(cs, common_semi_ftell_cb, fd, 0, GDB_SEEK_CUR); + } + break; + + case HEX_SYS_READ_CYCLES: + case HEX_SYS_READ_TCYCLES: + case HEX_SYS_READ_ICOUNT: + { + arch_set_thread_reg(env, HEX_REG_R00, 0); + arch_set_thread_reg(env, HEX_REG_R01, 0); + break; + } + + case HEX_SYS_READ_PCYCLES: + { + arch_set_thread_reg(env, HEX_REG_R00, + arch_get_system_reg(env, HEX_SREG_PCYCLELO)); + arch_set_thread_reg(env, HEX_REG_R01, + arch_get_system_reg(env, HEX_SREG_PCYCLEHI)); + break; + } + + case HEX_SYS_PROF_ON: + case HEX_SYS_PROF_OFF: + case HEX_SYS_PROF_STATSRESET: + case HEX_SYS_DUMP_PMU_STATS: + common_semi_cb(cs, -1, ENOSYS); + qemu_log_mask(LOG_UNIMP, "SWI call %x is unimplemented in QEMU\n", + what_swi); + break; + + default: + qemu_log_mask(LOG_GUEST_ERROR, "error: unknown swi call 0x%x\n", what_swi); + cpu_abort(cs, "Hexagon Unsupported swi call 0x%x\n", what_swi); + } +} + +static void set_addresses(CPUHexagonState *env, target_ulong pc_offset, + target_ulong exception_index) + +{ + arch_set_system_reg(env, HEX_SREG_ELR, + arch_get_thread_reg(env, HEX_REG_PC) + pc_offset); + arch_set_thread_reg(env, HEX_REG_PC, + arch_get_system_reg(env, HEX_SREG_EVB) | + (exception_index << 2)); +} + +static const char *event_name[] = { + [HEX_EVENT_RESET] = "HEX_EVENT_RESET", + [HEX_EVENT_IMPRECISE] = "HEX_EVENT_IMPRECISE", + [HEX_EVENT_TLB_MISS_X] = "HEX_EVENT_TLB_MISS_X", + [HEX_EVENT_TLB_MISS_RW] = "HEX_EVENT_TLB_MISS_RW", + [HEX_EVENT_TRAP0] = "HEX_EVENT_TRAP0", + [HEX_EVENT_TRAP1] = "HEX_EVENT_TRAP1", + [HEX_EVENT_FPTRAP] = "HEX_EVENT_FPTRAP", + [HEX_EVENT_DEBUG] = "HEX_EVENT_DEBUG", + [HEX_EVENT_INT0] = "HEX_EVENT_INT0", + [HEX_EVENT_INT1] = "HEX_EVENT_INT1", + [HEX_EVENT_INT2] = "HEX_EVENT_INT2", + [HEX_EVENT_INT3] = "HEX_EVENT_INT3", + [HEX_EVENT_INT4] = "HEX_EVENT_INT4", + [HEX_EVENT_INT5] = "HEX_EVENT_INT5", + [HEX_EVENT_INT6] = "HEX_EVENT_INT6", + [HEX_EVENT_INT7] = "HEX_EVENT_INT7", + [HEX_EVENT_INT8] = "HEX_EVENT_INT8", + [HEX_EVENT_INT9] = "HEX_EVENT_INT9", + [HEX_EVENT_INTA] = "HEX_EVENT_INTA", + [HEX_EVENT_INTB] = "HEX_EVENT_INTB", + [HEX_EVENT_INTC] = "HEX_EVENT_INTC", + [HEX_EVENT_INTD] = "HEX_EVENT_INTD", + [HEX_EVENT_INTE] = "HEX_EVENT_INTE", + [HEX_EVENT_INTF] = "HEX_EVENT_INTF" +}; + +void hexagon_cpu_do_interrupt(CPUState *cs) + +{ + CPUHexagonState *env = cpu_env(cs); + BQL_LOCK_GUARD(); + + qemu_log_mask(CPU_LOG_INT, "\t%s: event 0x%x:%s, cause 0x%x(%d)\n", + __func__, cs->exception_index, + event_name[cs->exception_index], env->cause_code, + env->cause_code); + + env->llsc_addr = ~0; + + uint32_t ssr = arch_get_system_reg(env, HEX_SREG_SSR); + if (GET_SSR_FIELD(SSR_EX, ssr) == 1) { + arch_set_system_reg(env, HEX_SREG_DIAG, env->cause_code); + env->cause_code = HEX_CAUSE_DOUBLE_EXCEPT; + cs->exception_index = HEX_EVENT_PRECISE; + } + + switch (cs->exception_index) { + case HEX_EVENT_TRAP0: + if (env->cause_code == 0) { + sim_handle_trap0(env); + } + + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 4, cs->exception_index); + break; + + case HEX_EVENT_TRAP1: + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 4, cs->exception_index); + break; + + case HEX_EVENT_TLB_MISS_X: + switch (env->cause_code) { + case HEX_CAUSE_TLBMISSX_CAUSE_NORMAL: + case HEX_CAUSE_TLBMISSX_CAUSE_NEXTPAGE: + qemu_log_mask(CPU_LOG_MMU, + "TLB miss EX exception (0x%x) caught: " + "Cause code (0x%x) " + "TID = 0x%" PRIx32 ", PC = 0x%" PRIx32 + ", BADVA = 0x%" PRIx32 "\n", + cs->exception_index, env->cause_code, env->threadId, + arch_get_thread_reg(env, HEX_REG_PC), + arch_get_system_reg(env, HEX_SREG_BADVA)); + + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 0, cs->exception_index); + break; + + default: + cpu_abort(cs, + "1:Hexagon exception %d/0x%x: " + "Unknown cause code %d/0x%x\n", + cs->exception_index, cs->exception_index, env->cause_code, + env->cause_code); + break; + } + break; + + case HEX_EVENT_TLB_MISS_RW: + switch (env->cause_code) { + case HEX_CAUSE_TLBMISSRW_CAUSE_READ: + case HEX_CAUSE_TLBMISSRW_CAUSE_WRITE: + qemu_log_mask(CPU_LOG_MMU, + "TLB miss RW exception (0x%x) caught: " + "Cause code (0x%x) " + "TID = 0x%" PRIx32 ", PC = 0x%" PRIx32 + ", BADVA = 0x%" PRIx32 "\n", + cs->exception_index, env->cause_code, env->threadId, + env->gpr[HEX_REG_PC], + arch_get_system_reg(env, HEX_SREG_BADVA)); + + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 0, cs->exception_index); + /* env->sreg[HEX_SREG_BADVA] is set when the exception is raised */ + break; + + default: + cpu_abort(cs, + "2:Hexagon exception %d/0x%x: " + "Unknown cause code %d/0x%x\n", + cs->exception_index, cs->exception_index, env->cause_code, + env->cause_code); + break; + } + break; + + case HEX_EVENT_FPTRAP: + hexagon_ssr_set_cause(env, env->cause_code); + arch_set_thread_reg(env, HEX_REG_PC, + arch_get_system_reg(env, HEX_SREG_EVB) | + (cs->exception_index << 2)); + break; + + case HEX_EVENT_DEBUG: + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 0, cs->exception_index); + qemu_log_mask(LOG_UNIMP, "single-step exception is not handled\n"); + break; + + case HEX_EVENT_PRECISE: + switch (env->cause_code) { + case HEX_CAUSE_FETCH_NO_XPAGE: + case HEX_CAUSE_FETCH_NO_UPAGE: + case HEX_CAUSE_PRIV_NO_READ: + case HEX_CAUSE_PRIV_NO_UREAD: + case HEX_CAUSE_PRIV_NO_WRITE: + case HEX_CAUSE_PRIV_NO_UWRITE: + case HEX_CAUSE_MISALIGNED_LOAD: + case HEX_CAUSE_MISALIGNED_STORE: + case HEX_CAUSE_PC_NOT_ALIGNED: + qemu_log_mask(CPU_LOG_MMU, + "MMU permission exception (0x%x) caught: " + "Cause code (0x%x) " + "TID = 0x%" PRIx32 ", PC = 0x%" PRIx32 + ", BADVA = 0x%" PRIx32 "\n", + cs->exception_index, env->cause_code, env->threadId, + env->gpr[HEX_REG_PC], + arch_get_system_reg(env, HEX_SREG_BADVA)); + + + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 0, cs->exception_index); + /* env->sreg[HEX_SREG_BADVA] is set when the exception is raised */ + break; + + case HEX_CAUSE_DOUBLE_EXCEPT: + case HEX_CAUSE_PRIV_USER_NO_SINSN: + case HEX_CAUSE_PRIV_USER_NO_GINSN: + case HEX_CAUSE_INVALID_OPCODE: + case HEX_CAUSE_NO_COPROC_ENABLE: + case HEX_CAUSE_NO_COPROC2_ENABLE: + case HEX_CAUSE_UNSUPORTED_HVX_64B: + case HEX_CAUSE_REG_WRITE_CONFLICT: + case HEX_CAUSE_VWCTRL_WINDOW_MISS: + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 0, cs->exception_index); + break; + + case HEX_CAUSE_COPROC_LDST: + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 0, cs->exception_index); + break; + + case HEX_CAUSE_STACK_LIMIT: + hexagon_ssr_set_cause(env, env->cause_code); + set_addresses(env, 0, cs->exception_index); + break; + + default: + cpu_abort(cs, + "3:Hexagon exception %d/0x%x: " + "Unknown cause code %d/0x%x\n", + cs->exception_index, cs->exception_index, env->cause_code, + env->cause_code); + break; + } + break; + + case HEX_EVENT_IMPRECISE: + qemu_log_mask(LOG_UNIMP, + "Imprecise exception: this case is not yet handled"); + break; + + default: + qemu_log_mask(LOG_UNIMP, + "Hexagon Unsupported exception 0x%x/0x%x\n", + cs->exception_index, env->cause_code); + break; + } + + cs->exception_index = HEX_EVENT_NONE; +} + +void register_trap_exception(CPUHexagonState *env, int traptype, int imm, + target_ulong PC) +{ + CPUState *cs = env_cpu(env); + + cs->exception_index = (traptype == 0) ? HEX_EVENT_TRAP0 : HEX_EVENT_TRAP1; + ASSERT_DIRECT_TO_GUEST_UNSET(env, cs->exception_index); + + env->cause_code = imm; + env->gpr[HEX_REG_PC] = PC; + cpu_loop_exit(cs); +} +#endif diff --git a/target/hexagon/hexswi.h b/target/hexagon/hexswi.h new file mode 100644 index 000000000000..5d232cb06cb0 --- /dev/null +++ b/target/hexagon/hexswi.h @@ -0,0 +1,17 @@ +/* + * Copyright(c) 2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HEXSWI_H +#define HEXSWI_H + + +#include "cpu.h" + +void hexagon_cpu_do_interrupt(CPUState *cpu); +void register_trap_exception(CPUHexagonState *env, int type, int imm, + target_ulong PC); + +#endif /* HEXSWI_H */ diff --git a/target/hexagon/idef-parser/README.rst b/target/hexagon/idef-parser/README.rst index 7199177ee33e..235e3debee3c 100644 --- a/target/hexagon/idef-parser/README.rst +++ b/target/hexagon/idef-parser/README.rst @@ -637,7 +637,7 @@ tinycode for the Hexagon ``add`` instruction :: ---- 00021094 - mov_i32 pkt_has_store_s1,$0x0 + mov_i32 pkt_has_scalar_store_s1,$0x0 add_i32 tmp0,r2,r2 mov_i32 loc2,tmp0 mov_i32 new_r1,loc2 diff --git a/target/hexagon/idef-parser/parser-helpers.c b/target/hexagon/idef-parser/parser-helpers.c index a7dcd85fe43d..3316c230f8a5 100644 --- a/target/hexagon/idef-parser/parser-helpers.c +++ b/target/hexagon/idef-parser/parser-helpers.c @@ -1725,7 +1725,7 @@ void gen_cancel(Context *c, YYLTYPE *locp) void gen_load_cancel(Context *c, YYLTYPE *locp) { - OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_store_s1) {\n"); + OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_scalar_store_s1) {\n"); OUT(c, locp, "ctx->s1_store_processed = false;\n"); OUT(c, locp, "process_store(ctx, 1);\n"); OUT(c, locp, "}\n"); @@ -1750,7 +1750,7 @@ void gen_load(Context *c, YYLTYPE *locp, HexValue *width, /* Lookup the effective address EA */ find_variable(c, locp, ea, ea); - OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_store_s1) {\n"); + OUT(c, locp, "if (insn->slot == 0 && pkt->pkt_has_scalar_store_s1) {\n"); OUT(c, locp, "probe_noshuf_load(", ea, ", ", width, ", ctx->mem_idx);\n"); OUT(c, locp, "process_store(ctx, 1);\n"); OUT(c, locp, "}\n"); diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index 0cd30a5e8575..2c45388ab629 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2020 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,6 +16,7 @@ */ /* + * encode32.def * Encodings for 32 bit instructions * */ @@ -341,6 +342,8 @@ DEF_ENC32(L4_pload##TAG##tnew_abs,ICLASS_LD" 1 11 "OPC" iiiii PP110tti 1--ddd DEF_ENC32(L4_pload##TAG##fnew_abs,ICLASS_LD" 1 11 "OPC" iiiii PP111tti 1--ddddd") + + /* 0 000 misc: dealloc,loadw_locked,dcfetch */ STD_LD_ENC(bzw4,"0 101") STD_LD_ENC(bzw2,"0 011") @@ -375,6 +378,7 @@ DEF_ANTICLASS32(ICLASS_LD" 1110 000----- PP------ --------",LD_ADDR_POST_REG) DEF_ENC32(L2_deallocframe, ICLASS_LD" 000 0 000 sssss PP0----- ---ddddd") DEF_ENC32(L4_return, ICLASS_LD" 011 0 000 sssss PP0000-- ---ddddd") + DEF_ENC32(L4_return_t, ICLASS_LD" 011 0 000 sssss PP0100vv ---ddddd") DEF_ENC32(L4_return_f, ICLASS_LD" 011 0 000 sssss PP1100vv ---ddddd") DEF_ENC32(L4_return_tnew_pt, ICLASS_LD" 011 0 000 sssss PP0110vv ---ddddd") @@ -382,15 +386,19 @@ DEF_ENC32(L4_return_fnew_pt, ICLASS_LD" 011 0 000 sssss PP1110vv ---ddddd") DEF_ENC32(L4_return_tnew_pnt, ICLASS_LD" 011 0 000 sssss PP0010vv ---ddddd") DEF_ENC32(L4_return_fnew_pnt, ICLASS_LD" 011 0 000 sssss PP1010vv ---ddddd") -DEF_ENC32(L2_loadw_locked,ICLASS_LD" 001 0 000 sssss PP000--- 000ddddd") - +/** Load Acquire Store Release Encoding **/ +DEF_ENC32(L4_loadw_phys, ICLASS_LD" 001 0 000 sssss PP1ttttt -00ddddd") +DEF_ENC32(L2_loadw_locked, ICLASS_LD" 001 0 000 sssss PP000--- 000ddddd") +DEF_ENC32(L4_loadd_locked, ICLASS_LD" 001 0 000 sssss PP010--- 000ddddd") DEF_ENC32(L2_loadw_aq, ICLASS_LD" 001 0 000 sssss PP001--- 000ddddd") DEF_ENC32(L4_loadd_aq, ICLASS_LD" 001 0 000 sssss PP011--- 000ddddd") -DEF_ENC32(R6_release_at_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --0011dd") -DEF_ENC32(R6_release_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1011dd") + +DEF_ENC32(S2_storew_locked, ICLASS_ST" 000 01 01sssss PP-ttttt ----00dd") +DEF_ENC32(S4_stored_locked, ICLASS_ST" 000 01 11sssss PP0ttttt ----00dd") + DEF_ENC32(S2_storew_rl_at_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --0010dd") DEF_ENC32(S2_storew_rl_st_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --1010dd") @@ -398,13 +406,11 @@ DEF_ENC32(S2_storew_rl_st_vi, ICLASS_ST" 000 01 01sssss PP-ttttt --1010dd") DEF_ENC32(S4_stored_rl_at_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --0010dd") DEF_ENC32(S4_stored_rl_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1010dd") -DEF_ENC32(L4_loadd_locked,ICLASS_LD" 001 0 000 sssss PP010--- 000ddddd") -DEF_EXT_SPACE(EXTRACTW, ICLASS_LD" 001 0 000 iiiii PP0iiiii -01iiiii") -DEF_ENC32(Y2_dcfetchbo, ICLASS_LD" 010 0 000 sssss PP0--iii iiiiiiii") - - - +DEF_ENC32(R6_release_at_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --0011dd") +DEF_ENC32(R6_release_st_vi, ICLASS_ST" 000 01 11sssss PP0ttttt --1011dd") +DEF_EXT_SPACE(EXTRACTW, ICLASS_LD" 001 0 000 iiiii PP0iiiii 001iiiii") +DEF_ENC32(Y2_dcfetchbo, ICLASS_LD" 010 0 000 sssss PP0--iii iiiiiiii") @@ -488,13 +494,17 @@ STD_PST_ENC(rinew, "1 101","10ttt") /* x bus/cache */ /* x store/cache */ DEF_ENC32(S2_allocframe, ICLASS_ST" 000 01 00xxxxx PP000iii iiiiiiii") -DEF_ENC32(S2_storew_locked,ICLASS_ST" 000 01 01sssss PP-ttttt ----00dd") -DEF_ENC32(S4_stored_locked,ICLASS_ST" 000 01 11sssss PP0ttttt ----00dd") +DEF_ENC32(Y5_l2locka, ICLASS_ST" 000 01 11sssss PP1----- ------dd") DEF_ENC32(Y2_dczeroa, ICLASS_ST" 000 01 10sssss PP0----- --------") -DEF_ENC32(Y2_barrier, ICLASS_ST" 100 00 00----- PP------ 000-----") +DEF_ENC32(Y2_barrier, ICLASS_ST" 100 00 00----- PP------ 000-----") DEF_ENC32(Y2_syncht, ICLASS_ST" 100 00 10----- PP------ --------") +DEF_ENC32(Y2_l2kill, ICLASS_ST" 100 00 01----- PP-000-- --------") +DEF_ENC32(Y5_l2gunlock, ICLASS_ST" 100 00 01----- PP-010-- --------") +DEF_ENC32(Y5_l2gclean, ICLASS_ST" 100 00 01----- PP-100-- --------") +DEF_ENC32(Y5_l2gcleaninv, ICLASS_ST" 100 00 01----- PP-110-- --------") +DEF_ENC32(Y2_l2cleaninvidx,ICLASS_ST" 100 00 11sssss PP------ --------") @@ -502,9 +512,34 @@ DEF_ENC32(Y2_dccleana, ICLASS_ST" 000 00 00sssss PP------ --------") DEF_ENC32(Y2_dcinva, ICLASS_ST" 000 00 01sssss PP------ --------") DEF_ENC32(Y2_dccleaninva, ICLASS_ST" 000 00 10sssss PP------ --------") -DEF_ENC32(Y4_l2fetch, ICLASS_ST" 011 00 00sssss PP-ttttt 000-----") +/* Super */ +DEF_ENC32(Y2_dckill, ICLASS_ST" 001 00 00----- PP------ --------") +DEF_ENC32(Y2_dccleanidx, ICLASS_ST" 001 00 01sssss PP------ --------") +DEF_ENC32(Y2_dcinvidx, ICLASS_ST" 001 00 10sssss PP------ --------") +DEF_ENC32(Y2_dccleaninvidx,ICLASS_ST" 001 00 11sssss PP------ --------") + +DEF_ENC32(Y2_dctagw ,ICLASS_ST" 010 00 00sssss PP-ttttt --------") +DEF_ENC32(Y2_dctagr ,ICLASS_ST" 010 00 01sssss PP------ ---ddddd") + +DEF_ENC32(Y4_l2tagw ,ICLASS_ST" 010 00 10sssss PP0ttttt --------") +DEF_ENC32(Y4_l2tagr ,ICLASS_ST" 010 00 11sssss PP------ ---ddddd") + +DEF_ENC32(Y4_l2fetch, ICLASS_ST" 011 00 00sssss PP-ttttt 000-----") +DEF_ENC32(Y5_l2cleanidx, ICLASS_ST" 011 00 01sssss PP------ --------") +DEF_ENC32(Y5_l2invidx, ICLASS_ST" 011 00 10sssss PP------ --------") +DEF_ENC32(Y5_l2unlocka, ICLASS_ST" 011 00 11sssss PP------ --------") DEF_ENC32(Y5_l2fetch, ICLASS_ST" 011 01 00sssss PP-ttttt --------") +DEF_ENC32(Y6_l2gcleanpa, ICLASS_ST" 011 01 01----- PP-ttttt --------") +DEF_ENC32(Y6_l2gcleaninvpa,ICLASS_ST" 011 01 10----- PP-ttttt --------") + + + + + + + + /*******************************/ /* */ /* */ @@ -547,13 +582,23 @@ DEF_ENC32(J2_jumprfnewpt, ICLASS_J" 0011 011sssss PP-11-uu --------") DEF_FIELDROW_DESC32(ICLASS_J" 0100 -------- PP------ --------","[#4] (#u8) ") DEF_ENC32(J2_trap0, ICLASS_J" 0100 00------ PP-iiiii ---iii--") -DEF_ENC32(J2_pause, ICLASS_J" 0100 01------ PP-iiiii ---iii--") +DEF_ENC32(J2_trap1, ICLASS_J" 0100 10-xxxxx PP-iiiii ---iii--") +DEF_ENC32(J2_pause, ICLASS_J" 0100 01----ii PP-iiiii ---iii--") + +DEF_FIELDROW_DESC32(ICLASS_J" 0101 -------- PP------ --------","[#5] Rd=(Rs) ") +DEF_ENC32(Y2_icdatar, ICLASS_J" 0101 101sssss PP------ ---ddddd") +DEF_ENC32(Y2_ictagr, ICLASS_J" 0101 111sssss PP------ ---ddddd") +DEF_ENC32(Y2_ictagw, ICLASS_J" 0101 110sssss PP0ttttt --------") +DEF_ENC32(Y2_icdataw, ICLASS_J" 0101 110sssss PP1ttttt --------") DEF_FIELDROW_DESC32(ICLASS_J" 0110 -------- PP------ --------","[#6] icop(Rs) ") DEF_ENC32(Y2_icinva, ICLASS_J" 0110 110sssss PP000--- --------") +DEF_ENC32(Y2_icinvidx, ICLASS_J" 0110 110sssss PP001--- --------") +DEF_ENC32(Y2_ickill, ICLASS_J" 0110 110----- PP010--- --------") DEF_FIELDROW_DESC32(ICLASS_J" 0111 -------- PP------ --------","[#7] () ") DEF_ENC32(Y2_isync, ICLASS_J" 0111 11000000 PP0---00 00000010") +DEF_ENC32(J2_rte, ICLASS_J" 0111 111----- PP00---- 000-----") /* JUMP */ DEF_FIELDROW_DESC32(ICLASS_J" 100- -------- PP------ --------","[#8,9] PC=(#r22)") @@ -591,7 +636,6 @@ DEF_ENC32(J2_callf, ICLASS_J" 1101 ii1iiiii PPi-0-uu iiiiiii-") /*******************************/ -/* EJP: this has to match what we have in htmldocs.py... so I will call it CJ, we can change it */ DEF_CLASS32(ICLASS_CJ" 0--- -------- PP------ --------",CJ) DEF_FIELDROW_DESC32(ICLASS_CJ" 00-- -------- -------- --------","[#0-3] pd=cmp.xx(R,#u5) ; if ([!]p0.new) jump:[h] #s9:2 ") @@ -738,12 +782,30 @@ DEF_ENC32(J2_jumprltezpt,ICLASS_CR" 0001 11isssss PPi1iiii iiiiiii-") DEF_FIELDROW_DESC32( ICLASS_CR" 0010 -------- PP------ --------","[#2] Cd=Rs ") DEF_ENC32(A2_tfrrcr, ICLASS_CR" 0010 001sssss PP------ ---ddddd") +DEF_ENC32(G4_tfrgrcr, ICLASS_CR" 0010 000sssss PP------ ---ddddd") +DEF_ENC32(Y4_trace, ICLASS_CR" 0010 010sssss PP------ 000-----") +DEF_ENC32(Y6_diag, ICLASS_CR" 0010 010sssss PP------ 001-----") +DEF_ENC32(Y6_diag0, ICLASS_CR" 0010 010sssss PP-ttttt 010-----") +DEF_ENC32(Y6_diag1, ICLASS_CR" 0010 010sssss PP-ttttt 011-----") + +DEF_ENC32(Y6_dmcfgrd,"10101000000sssssPP------101ddddd") +DEF_ENC32(Y6_dmcfgwr,"10101000000sssssPP-ttttt110-----") +DEF_ENC32(Y6_dmlink,"10100110000sssssPP-ttttt010-----") +DEF_ENC32(Y6_dmpause,"10101000000-----PP------011ddddd") +DEF_ENC32(Y6_dmpoll,"10101000000-----PP------010ddddd") +DEF_ENC32(Y6_dmresume,"10100110000sssssPP------100-----") +DEF_ENC32(Y6_dmstart,"10100110000sssssPP------001-----") +DEF_ENC32(Y6_dmsyncht,"10101000000-----PP-----0111ddddd") +DEF_ENC32(Y6_dmtlbsynch,"10101000000-----PP-----1111ddddd") +DEF_ENC32(Y6_dmwait,"10101000000-----PP------001ddddd") DEF_FIELDROW_DESC32( ICLASS_CR" 0011 -------- PP------ --------","[#3] Cdd=Rss ") DEF_ENC32(A4_tfrpcp, ICLASS_CR" 0011 001sssss PP------ ---ddddd") +DEF_ENC32(G4_tfrgpcp, ICLASS_CR" 0011 000sssss PP------ ---ddddd") DEF_FIELDROW_DESC32( ICLASS_CR" 1000 -------- PP------ --------","[#8] Rdd=Css ") DEF_ENC32(A4_tfrcpp, ICLASS_CR" 1000 000sssss PP------ ---ddddd") +DEF_ENC32(G4_tfrgcpp, ICLASS_CR" 1000 001sssss PP------ ---ddddd") DEF_FIELDROW_DESC32( ICLASS_CR" 1001 -------- PP------ --------","[#9] (#r8,#U10)") DEF_ENC32(J2_ploop1si, ICLASS_CR" 1001 101IIIII PP-iiiii IIIii-II") @@ -754,6 +816,7 @@ DEF_ENC32(J2_loop1i, ICLASS_CR" 1001 001IIIII PP-iiiii IIIii-II") DEF_FIELDROW_DESC32( ICLASS_CR" 1010 -------- PP------ --------","[#10] Rd=Cs ") DEF_ENC32(A2_tfrcrr, ICLASS_CR" 1010 000sssss PP------ ---ddddd") +DEF_ENC32(G4_tfrgcrr, ICLASS_CR" 1010 001sssss PP------ ---ddddd") DEF_ENC32(C4_addipc, ICLASS_CR" 1010 01001001 PP-iiiii i--ddddd") @@ -776,8 +839,66 @@ DEF_ENC32(C4_and_orn, ICLASS_CR" 1011 1011--ss PP0---tt uu----dd") DEF_ENC32(C4_or_andn, ICLASS_CR" 1011 1101--ss PP0---tt uu----dd") DEF_ENC32(C4_or_orn, ICLASS_CR" 1011 1111--ss PP0---tt uu----dd") -DEF_ENC32(C4_fastcorner9, ICLASS_CR"1011 0000--ss PP1---tt 1--1--dd") -DEF_ENC32(C4_fastcorner9_not, ICLASS_CR"1011 0001--ss PP1---tt 1--1--dd") +DEF_ENC32(C4_fastcorner9, ICLASS_CR"1011 0000--ss PP1---tt 1--1--dd") +DEF_ENC32(C4_fastcorner9_not, ICLASS_CR"1011 0001--ss PP1---tt 1--1--dd") + + + +/* Supervisor CR ops */ +/* Interrupts */ +DEF_FIELDROW_DESC32( ICLASS_CR" 0100 -------- PP------ --------","[#4] (Rs,Pt)") +DEF_ENC32(Y2_swi, ICLASS_CR" 0100 000sssss PP------ 000-----") +DEF_ENC32(Y2_cswi, ICLASS_CR" 0100 000sssss PP------ 001-----") +DEF_ENC32(Y2_iassignw, ICLASS_CR" 0100 000sssss PP------ 010-----") +DEF_ENC32(Y2_ciad, ICLASS_CR" 0100 000sssss PP------ 011-----") +DEF_ENC32(Y2_setimask, ICLASS_CR" 0100 100sssss PP----tt 000-----") +DEF_ENC32(Y2_setprio, ICLASS_CR" 0100 100sssss PP----tt 001-----") +DEF_ENC32(Y4_siad, ICLASS_CR" 0100 100sssss PP------ 011-----") + +DEF_ENC32(Y2_wait, ICLASS_CR" 0100 010sssss PP------ 000-----") +DEF_ENC32(Y2_resume, ICLASS_CR" 0100 010sssss PP------ 001-----") +DEF_ENC32(Y2_stop, ICLASS_CR" 0100 011sssss PP------ 000-----") +DEF_ENC32(Y2_start, ICLASS_CR" 0100 011sssss PP------ 001-----") +DEF_ENC32(Y4_nmi, ICLASS_CR" 0100 011sssss PP------ 010-----") + +DEF_FIELDROW_DESC32( ICLASS_CR" 0101 -------- PP------ --------","[#5] Rx ") +DEF_ENC32(Y2_crswap0, ICLASS_CR" 0101 000xxxxx PP------ --------") +DEF_ENC32(Y4_crswap1, ICLASS_CR" 0101 001xxxxx PP------ --------") + +DEF_FIELDROW_DESC32( ICLASS_CR" 0110 -------- PP------ --------","[#6] Rd=(Rs)") +DEF_ENC32(Y2_getimask, ICLASS_CR" 0110 000sssss PP------ ---ddddd") +DEF_ENC32(Y2_iassignr, ICLASS_CR" 0110 011sssss PP------ ---ddddd") + +DEF_FIELDROW_DESC32( ICLASS_CR" 0111 -------- PP------ --------","[#7] cr=Rs ") +DEF_ENC32(Y2_tfrsrcr, ICLASS_CR" 0111 00-sssss PP------ -ddddddd") + +DEF_FIELDROW_DESC32( ICLASS_CR" 1100 -------- PP------ --------","[#12] ") +DEF_ENC32(Y2_break, ICLASS_CR" 1100 001----- PP------ 000-----") +DEF_ENC32(Y2_tlblock, ICLASS_CR" 1100 001----- PP------ 001-----") +DEF_ENC32(Y2_tlbunlock,ICLASS_CR" 1100 001----- PP------ 010-----") +DEF_ENC32(Y2_k0lock, ICLASS_CR" 1100 001----- PP------ 011-----") +DEF_ENC32(Y2_k0unlock, ICLASS_CR" 1100 001----- PP------ 100-----") +DEF_ENC32(Y2_tlbp, ICLASS_CR" 1100 100sssss PP------ ---ddddd") +DEF_ENC32(Y5_tlboc, ICLASS_CR" 1100 111sssss PP------ ---ddddd") +DEF_ENC32(Y5_tlbasidi, ICLASS_CR" 1100 101sssss PP------ --------") +DEF_ENC32(Y2_tlbr, ICLASS_CR" 1100 010sssss PP------ ---ddddd") +DEF_ENC32(Y2_tlbw, ICLASS_CR" 1100 000sssss PP0ttttt --------") +DEF_ENC32(Y5_ctlbw, ICLASS_CR" 1100 110sssss PP0ttttt ---ddddd") + +DEF_FIELDROW_DESC32( ICLASS_CR" 1101 -------- PP------ --------","[#13] Rxx ") +DEF_ENC32(Y4_crswap10, ICLASS_CR" 1101 10-xxxxx PP------ ---00000") +DEF_ENC32(Y4_tfrspcp, ICLASS_CR" 1101 00-sssss PP------ -ddddddd") + +DEF_FIELDROW_DESC32( ICLASS_CR" 1110 -------- PP------ --------","[#14] Rd=cr ") +DEF_ENC32(Y2_tfrscrr, ICLASS_CR" 1110 1sssssss PP------ ---ddddd") + +DEF_FIELDROW_DESC32( ICLASS_CR" 1111 -------- PP------ --------","[#15] Rdd=Sss ") +DEF_ENC32(Y4_tfrscpp, ICLASS_CR" 1111 0sssssss PP------ ---ddddd") + + + + + @@ -956,9 +1077,9 @@ MPY_ENC(F2_dfmin, "1000","ddddd","0","0","1","1","11") MPY_ENC(F2_dfmax, "1000","ddddd","0","1","0","0","11") MPY_ENC(F2_dfmpyll, "1000","ddddd","0","1","0","1","11") -MPY_ENC(M7_dcmpyrw, "1000","ddddd","0","0","0","1","10") +MPY_ENC(M7_dcmpyrw, "1000","ddddd","0","0","0","1","10") MPY_ENC(M7_dcmpyrwc, "1000","ddddd","0","0","1","1","10") -MPY_ENC(M7_dcmpyiw, "1000","ddddd","0","1","1","0","10") +MPY_ENC(M7_dcmpyiw, "1000","ddddd","0","1","1","0","10") MPY_ENC(M7_dcmpyiwc, "1000","ddddd","0","1","1","1","10") @@ -967,14 +1088,14 @@ DEF_FIELDROW_DESC32(ICLASS_M" 1001 -------- PP------ --------","[#9] Rd=(Rss,Rtt MPY_ENC(M2_vdmpyrs_s0, "1001","ddddd","0","0","0","0","00") MPY_ENC(M2_vdmpyrs_s1, "1001","ddddd","0","0","0","1","00") -MPY_ENC(M7_wcmpyrw, "1001","ddddd","0","0","1","0","00") +MPY_ENC(M7_wcmpyrw, "1001","ddddd","0","0","1","0","00") MPY_ENC(M7_wcmpyrw_rnd, "1001","ddddd","0","0","1","1","00") -MPY_ENC(M7_wcmpyiw, "1001","ddddd","0","1","0","0","00") +MPY_ENC(M7_wcmpyiw, "1001","ddddd","0","1","0","0","00") MPY_ENC(M7_wcmpyiw_rnd, "1001","ddddd","0","1","0","1","00") -MPY_ENC(M7_wcmpyrwc, "1001","ddddd","0","1","1","0","00") +MPY_ENC(M7_wcmpyrwc, "1001","ddddd","0","1","1","0","00") MPY_ENC(M7_wcmpyrwc_rnd, "1001","ddddd","0","1","1","1","00") -MPY_ENC(M7_wcmpyiwc, "1001","ddddd","1","0","0","0","00") +MPY_ENC(M7_wcmpyiwc, "1001","ddddd","1","0","0","0","00") MPY_ENC(M7_wcmpyiwc_rnd, "1001","ddddd","1","0","0","1","00") @@ -1030,10 +1151,10 @@ MPY_ENC(F2_dfmpylh, "1010","xxxxx","0","0","0","0","11") MPY_ENC(F2_dfmpyhh, "1010","xxxxx","0","0","0","1","11") -MPY_ENC(M7_dcmpyrw_acc, "1010","xxxxx","0","0","0","1","10") -MPY_ENC(M7_dcmpyrwc_acc, "1010","xxxxx","0","0","1","1","10") -MPY_ENC(M7_dcmpyiw_acc, "1010","xxxxx","0","1","1","0","10") -MPY_ENC(M7_dcmpyiwc_acc, "1010","xxxxx","1","0","1","0","10") +MPY_ENC(M7_dcmpyrw_acc, "1010","xxxxx","0","0","0","1","10") +MPY_ENC(M7_dcmpyrwc_acc, "1010","xxxxx","0","0","1","1","10") +MPY_ENC(M7_dcmpyiw_acc, "1010","xxxxx","0","1","1","0","10") +MPY_ENC(M7_dcmpyiwc_acc, "1010","xxxxx","1","0","1","0","10") @@ -1063,7 +1184,6 @@ SP_MPY(M2_mpy_sat_rnd, "1100","ddddd","1","1","0") SP_MPY(M2_mpyu, "1100","ddddd","0","0","1") DEF_FIELDROW_DESC32(ICLASS_M" 1101 -------- PP------ --------","[#13] Rd=(Rs,Rt)") -/* EJP: same as mpyi MPY_ENC(M2_mpyui, "1101","ddddd","0","0","1","0","00") */ MPY_ENC(M2_mpyi, "1101","ddddd","0","0","0","0","00") MPY_ENC(M2_mpy_up, "1101","ddddd","0","0","0","0","01") MPY_ENC(M2_mpyu_up, "1101","ddddd","0","0","1","0","01") @@ -1266,7 +1386,6 @@ DEF_ENC32(C2_cmovenewif,ICLASS_ALU2op" 1110 1uu0iiii PP1iiiii iiiddddd") DEF_ENC32(C2_cmoveit, ICLASS_ALU2op" 1110 0uu0iiii PP0iiiii iiiddddd") DEF_ENC32(C2_cmoveif, ICLASS_ALU2op" 1110 1uu0iiii PP0iiiii iiiddddd") - DEF_FIELDROW_DESC32( ICLASS_ALU2op" 1111 -------- PP------ --------","[#15] nop") DEF_ENC32(A2_nop, ICLASS_ALU2op" 1111 -------- PP------ --------") @@ -1408,9 +1527,6 @@ DEF_FIELDROW_DESC32(ICLASS_ALU3op" 1110 -------- PP------ --------","[#14] Rese - - - /*******************************/ /* */ /* */ @@ -1508,7 +1624,6 @@ SH_RRI6_ENC(S6_rol_i_##TAGEND,MAJ4,MIN3,SMOD1 "11",DSTCHARS) DEF_FIELDROW_DESC32(ICLASS_S2op" 0000 -------- PP------ --------","[#0] Rdd=(Rss,#u6)") -/* EJP: there is actually quite a bit of space here, look at the reserved bits */ I6SHIFTTYPES(p, "0000","000","0","ddddd") I5SHIFTTYPES_NOROL(vw, "0000","010","0","ddddd") I4SHIFTTYPES(vh, "0000","100","0","ddddd") @@ -1620,8 +1735,8 @@ SH2_RR_ENC(A2_roundsat, "1000","110","-","001","ddddd") SH_RRI5_ENC(S2_asr_i_svw_trun, "1000","110", "010","ddddd") SH_RRI5_ENC(A4_bitspliti, "1000","110", "100","ddddd") -SH_RRI5_ENC(A7_clip, "1000","110", "101","ddddd") -SH_RRI5_ENC(A7_vclip, "1000","110", "110","ddddd") +SH_RRI5_ENC(A7_clip, "1000","110", "101","ddddd") +SH_RRI5_ENC(A7_vclip, "1000","110", "110","ddddd") SH2_RR_ENC(S4_clbpnorm, "1000","011","-","000","ddddd") @@ -1743,10 +1858,11 @@ SH_RRR_ENC(S2_shuffob, "0001","00-","-","10-","ddddd") SH_RRR_ENC(S2_shuffeh, "0001","00-","-","11-","ddddd") SH_RRR_ENC(S2_shuffoh, "0001","10-","-","000","ddddd") +// 001 SH_RRR_ENC(S2_vtrunewh, "0001","10-","-","010","ddddd") -SH_RRR_ENC(S6_vtrunehb_ppp, "0001","10-","-","011","ddddd") +SH_RRR_ENC(S6_vtrunehb_ppp, "0001","10-","-","011","ddddd") SH_RRR_ENC(S2_vtrunowh, "0001","10-","-","100","ddddd") -SH_RRR_ENC(S6_vtrunohb_ppp, "0001","10-","-","101","ddddd") +SH_RRR_ENC(S6_vtrunohb_ppp, "0001","10-","-","101","ddddd") SH_RRR_ENC(S2_lfsp, "0001","10-","-","110","ddddd") SH_RRR_ENC(S4_vxaddsubw, "0001","01-","-","000","ddddd") @@ -1780,8 +1896,6 @@ SH_RRR_ENC(S4_vrcrotate, "0011","11-","i","11i","ddddd") DEF_FIELDROW_DESC32(ICLASS_S3op" 0100 -------- PP------ --------","[#4] Rd=(Rs,Rt,#u3)") DEF_ENC32(S2_addasl_rrri, ICLASS_S3op" 0100 000 sssss PP0ttttt iiiddddd") - - DEF_FIELDROW_DESC32(ICLASS_S3op" 0101 -------- PP------ --------","[#5] Rd=(Rss,Rt)") SH_RRR_ENC(S2_asr_r_svw_trun, "0101","---","-","010","ddddd") SH_RRR_ENC(M4_cmpyi_wh, "0101","---","-","100","ddddd") @@ -1841,6 +1955,7 @@ DEF_FIELDROW_DESC32(ICLASS_S3op" 1010 -------- PP------ --------","[#10] Rxx=(Rs SH_RRR_ENC(S2_insertp_rp, "1010","0--","0","---","xxxxx") SH_RRR_ENC(M4_xor_xacc, "1010","10-","0","000","xxxxx") + DEF_FIELDROW_DESC32(ICLASS_S3op" 1011 -------- PP------ --------","[#11] Rxx=(Rss,Rt)") RSHIFTTYPES(p_or, "1011","000","-","-","xxxxx") RSHIFTTYPES(p_and, "1011","010","-","-","xxxxx") @@ -1848,19 +1963,19 @@ RSHIFTTYPES(p_nac, "1011","100","-","-","xxxxx") RSHIFTTYPES(p_acc, "1011","110","-","-","xxxxx") RSHIFTTYPES(p_xor, "1011","011","-","-","xxxxx") -SH_RRR_ENCX(A4_vrmaxh, "1011","001","0","001","uuuuu") -SH_RRR_ENCX(A4_vrmaxuh, "1011","001","1","001","uuuuu") -SH_RRR_ENCX(A4_vrmaxw, "1011","001","0","010","uuuuu") -SH_RRR_ENCX(A4_vrmaxuw, "1011","001","1","010","uuuuu") +SH_RRR_ENCX(A4_vrmaxh, "1011","001","0","001","uuuuu") +SH_RRR_ENCX(A4_vrmaxuh, "1011","001","1","001","uuuuu") +SH_RRR_ENCX(A4_vrmaxw, "1011","001","0","010","uuuuu") +SH_RRR_ENCX(A4_vrmaxuw, "1011","001","1","010","uuuuu") -SH_RRR_ENCX(A4_vrminh, "1011","001","0","101","uuuuu") -SH_RRR_ENCX(A4_vrminuh, "1011","001","1","101","uuuuu") -SH_RRR_ENCX(A4_vrminw, "1011","001","0","110","uuuuu") -SH_RRR_ENCX(A4_vrminuw, "1011","001","1","110","uuuuu") +SH_RRR_ENCX(A4_vrminh, "1011","001","0","101","uuuuu") +SH_RRR_ENCX(A4_vrminuh, "1011","001","1","101","uuuuu") +SH_RRR_ENCX(A4_vrminw, "1011","001","0","110","uuuuu") +SH_RRR_ENCX(A4_vrminuw, "1011","001","1","110","uuuuu") -SH_RRR_ENC(S2_vrcnegh, "1011","001","1","111","xxxxx") +SH_RRR_ENC(S2_vrcnegh, "1011","001","1","111","xxxxx") -SH_RRR_ENC(S4_vrcrotate_acc, "1011","101","i","--i","xxxxx") +SH_RRR_ENC(S4_vrcrotate_acc, "1011","101","i","--i","xxxxx") DEF_FIELDROW_DESC32(ICLASS_S3op" 1100 -------- PP------ --------","[#12] Rx=(Rs,Rt)") @@ -1874,11 +1989,6 @@ DEF_FIELDROW_DESC32(ICLASS_S3op" 1101 -------- PP------ --------","[#13] Reserve DEF_FIELDROW_DESC32(ICLASS_S3op" 1110 -------- PP------ --------","[#14] Reserved") -DEF_FIELDROW_DESC32(ICLASS_S3op" 1111 -------- PP------ --------","[#14] User Instruction") - - - - @@ -2129,3 +2239,5 @@ OP_OPI_RI(lsr,"1") DEF_FIELDROW_DESC32(ICLASS_ALU64" 1111 -------- PP------ --------","[#15] Rd=(Rs,Ru,#u6:2)") DEF_ENC32(M4_mpyri_addr_u2, ICLASS_ALU64" 1111 0ii sssss PPiddddd iiiuuuuu") DEF_ENC32(M4_mpyri_addr, ICLASS_ALU64" 1111 1ii sssss PPiddddd iiiuuuuu") + + diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef index 53198176a994..4e1e5d5326dd 100644 --- a/target/hexagon/imported/ldst.idef +++ b/target/hexagon/imported/ldst.idef @@ -203,6 +203,9 @@ Q6INSN(S2_storew_locked,"memw_locked(Rs32,Pd4)=Rt32", ATTRIBS(A_REGWRSIZE_4B,A_M Q6INSN(L4_loadd_locked,"Rdd32=memd_locked(Rs32)", ATTRIBS(A_REGWRSIZE_8B,A_MEMSIZE_8B,A_LOAD,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK), "Load double with lock", { fEA_REG(RsV); fLOAD_LOCKED(1,8,u,EA,RddV) }) +Q6INSN(L4_loadw_phys,"Rd32=memw_phys(Rs32,Rt32)", ATTRIBS(A_REGWRSIZE_4B,A_PRIV,A_RESTRICT_SLOT0ONLY,A_NOTE_PRIV,A_MEMSIZE_4B,A_LOAD,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Load word from physical address", +{ fLOAD_PHYS(1,4,u,RsV,RtV,RdV); }) + Q6INSN(S4_stored_locked,"memd_locked(Rs32,Pd4)=Rtt32", ATTRIBS(A_REGWRSIZE_8B,A_MEMSIZE_8B,A_STORE,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_LATEPRED,A_NOTE_LATEPRED), "Store word with lock", { fEA_REG(RsV); fSTORE_LOCKED(1,8,EA,RttV,PdV) }) diff --git a/target/hexagon/imported/macros.def b/target/hexagon/imported/macros.def old mode 100755 new mode 100644 index 4bbcfdd5e194..f24f89f36126 --- a/target/hexagon/imported/macros.def +++ b/target/hexagon/imported/macros.def @@ -353,6 +353,12 @@ DEF_MACRO( () ) +DEF_MACRO( + fREAD_SSR, /* read SSR register */ + (READ_RREG(REG_SSR)), /* behavior */ + () +) + DEF_MACRO( fWRITE_LR, /* write lr */ WRITE_RREG(REG_LR,A), /* behavior */ @@ -371,12 +377,36 @@ DEF_MACRO( (A_IMPLICIT_WRITES_SP) ) +DEF_MACRO( + fWRITE_GOSP, /* write gosp */ + WRITE_RREG(REG_GOSP,A), /* behavior */ + (A_IMPLICIT_WRITES_GOSP) +) + DEF_MACRO( fREAD_SP, /* read stack pointer */ (READ_RREG(REG_SP)), /* behavior */ () ) +DEF_MACRO( + fREAD_GOSP, /* read guest other stack pointer */ + (READ_RREG(REG_GOSP)), /* behavior */ + () +) + +DEF_MACRO( + fREAD_GELR, /* read guest other stack pointer */ + (READ_RREG(REG_GELR)), /* behavior */ + () +) + +DEF_MACRO( + fREAD_GEVB, /* read guest other stack pointer */ + (READ_RREG(REG_GEVB)), /* behavior */ + () +) + DEF_MACRO( fREAD_CSREG, /* read CS register */ (READ_RREG(REG_CSA+N)), /* behavior */ @@ -570,6 +600,11 @@ DEF_MACRO( WRITE_PREG(3,VAL), /* behavior */ (A_IMPLICIT_WRITES_P3) ) +DEF_MACRO( + fWRITE_P3_LATE, /* write Predicate 0 */ + {WRITE_PREG(3,VAL); fHIDE(MARK_LATE_PRED_WRITE(3))} , /* behavior */ + (A_IMPLICIT_WRITES_P3,A_RESTRICT_LATEPRED) +) DEF_MACRO( fPART1, /* write Predicate 0 */ @@ -660,6 +695,7 @@ DEF_MACRO( ((size8s_t)((size2s_t)(A))), /* optional attributes */ ) + DEF_MACRO( fCAST2_8u, /* macro name */ ((size8u_t)((size2u_t)(A))), @@ -1532,18 +1568,209 @@ DEF_MACRO(fECHO, /* OS interface and stop/wait */ /********************************************/ +DEF_MACRO(RUNNABLE_THREADS_MAX, + (thread->processor_ptr->runnable_threads_max), + () +) + +DEF_MACRO(THREAD_IS_ON, + ((PROC->arch_proc_options->thread_enable_mask>>TNUM) & 0x1), + () +) + +DEF_MACRO(THREAD_EN_MASK, + ((PROC->arch_proc_options->thread_enable_mask)), + () +) + + + +DEF_MACRO(READ_IMASK, + (((TH) >= (thread->processor_ptr->runnable_threads_max)) ? 0 : (thread->processor_ptr->thread[TH]->Regs[REG_IMASK])), + () +) +DEF_MACRO(WRITE_IMASK, + if ((TH) < (thread->processor_ptr->runnable_threads_max)) { thread->processor_ptr->thread[TH]->Regs[REG_IMASK]=(VAL & reg_mutability[REG_IMASK] ); }, + (A_IMPLICIT_WRITES_IMASK_ANYTHREAD) +) + + +DEF_MACRO(WRITE_PRIO, + { + if ((TH) < (thread->processor_ptr->runnable_threads_max)) { + size4u_t tid_reg = thread->processor_ptr->thread[TH]->Regs[REG_TID]; + fINSERT_BITS(tid_reg, reg_field_info[STID_PRIO].width, reg_field_info[STID_PRIO].offset, VAL); + LOG_OTHER_THREAD_REG_WRITE(thread,REG_TID,tid_reg,TH); + } + }, + (A_IMPLICIT_WRITES_STID_PRIO_ANYTHREAD) +) + + +DEF_MACRO(DO_IASSIGNW, + { + int i; + int intbitpos = ((REG>>16)&0xF); + for (i=0;iprocessor_ptr->arch_proc_options->thread_enable_mask>>i) & 0x1)) { + fINSERT_BITS(thread->processor_ptr->thread[i]->Regs[REG_IMASK],1, intbitpos, (REG>>i) & 1); + } + } + }, + (A_IMPLICIT_WRITES_IMASK_ANYTHREAD) +) + + + + +DEF_MACRO(fDO_NMI, + { + int i; + for (i=0;iprocessor_ptr->arch_proc_options->thread_enable_mask>>i) & 0x1) ) { + if (SREG & (1<processor_ptr->thread[i]); + } + } + } + }, +) + +DEF_MACRO(fDO_TRACE, + { + fHIDE(HEX_CALLBACK(thread->processor_ptr->options->trace_callback, + thread->system_ptr,thread->processor_ptr, + thread->threadId,SREG);) + }, +) + +DEF_MACRO(DO_IASSIGNR, + { + int i; + int result=0; + int intbitpos = ((SREG>>16)&0xF); + for (i=0;iprocessor_ptr->arch_proc_options->thread_enable_mask>>i) & 0x1)) { + result |= (((thread->processor_ptr->thread[i]->Regs[REG_IMASK]>>intbitpos)&1)<processor_ptr->options->swi_callback, + thread->system_ptr,thread->processor_ptr, + thread->threadId,REG)); + LOG_GLOBAL_REG_WRITE(REG_IPEND,(GLOBAL_REG_READ(REG_IPEND) | (REG & GLOBAL_REG_READ(REG_IEL)))); + }, + (A_EXCEPTION_SWI) +) + +DEF_MACRO(DO_CSWI, + LOG_GLOBAL_REG_WRITE(REG_IPEND,GLOBAL_REG_READ(REG_IPEND) & ~((REG) & GLOBAL_REG_READ(REG_IEL)));, + () +) + +DEF_MACRO(DO_CIAD, + sys_ciad(thread,VAL); LOG_GLOBAL_REG_WRITE(REG_IAD,GLOBAL_REG_READ(REG_IAD) & ~(VAL));, + (A_EXCEPTION_SWI) +) + +DEF_MACRO(DO_SIAD, + sys_siad(thread,VAL); LOG_GLOBAL_REG_WRITE(REG_IAD,GLOBAL_REG_READ(REG_IAD) | (VAL));, + (A_EXCEPTION_SWI) +) + +DEF_MACRO(fBREAK, + {isdb_brkpt_insn(thread->processor_ptr,thread->threadId);}, + () +) + DEF_MACRO(fPAUSE, {sys_pause(thread, insn->slot, IMM);}, () ) + DEF_MACRO(fTRAP, warn("Trap NPC=%x ",fREAD_NPC()); warn("Trap exception, PCYCLE=%lld TYPE=%d NPC=%x IMM=0x%x",thread->processor_ptr->pstats[pcycles],TRAPTYPE,fREAD_NPC(),IMM); register_trap_exception(thread,fREAD_NPC(),TRAPTYPE,IMM);, + (A_EXCEPTION_SWI) +) + +DEF_MACRO(fINTERNAL_CLEAR_SAMEPAGE, + /* force re-xlate at next fetch, refresh of in_user_mode, etc */ + /* Permissions change too... */ + sys_utlb_invalidate(thread->processor_ptr,thread), + /* NOTHING */ +) + +DEF_MACRO(fCLEAR_RTE_EX, + { + fLOG_REG_FIELD(SSR,SSR_EX,0); + fINTERNAL_CLEAR_SAMEPAGE(); + }, + () +) + +DEF_MACRO(fTLB_LOCK_AVAILABLE, + (fREAD_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_TLBLOCK) == 0), () ) +DEF_MACRO(fK0_LOCK_AVAILABLE, + (fREAD_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_K0LOCK) == 0), + () +) + +DEF_MACRO(fSET_TLB_LOCK, + { + if (fTLB_LOCK_AVAILABLE()) { + fLOG_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_TLBLOCK,1); + } else { + sys_waiting_for_tlb_lock(thread); + } + }, + () +) + +DEF_MACRO(fSET_K0_LOCK, + { + if (fK0_LOCK_AVAILABLE() && sys_k0lock_queue_ready(thread)) { + warn("k0lock: T%d: PC=0x%x: PCycle=%lld",thread->threadId,thread->Regs[REG_PC],thread->processor_ptr->pstats[pcycles]); + fLOG_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_K0LOCK,1); + } else { + warn("k0lock_waiting: T%d: PC=0x%x: PCycle=%lld",thread->threadId,thread->Regs[REG_PC],thread->processor_ptr->pstats[pcycles]); + sys_waiting_for_k0_lock(thread); + } + }, + () +) + +DEF_MACRO(fCLEAR_TLB_LOCK, + { + int i; + fLOG_GLOBAL_REG_FIELD(SYSCONF,SYSCFG_TLBLOCK,0); + for (i = 0; i < RUNNABLE_THREADS_MAX; i++) { + if(( (thread->processor_ptr->arch_proc_options->thread_enable_mask>>i) & 0x1)) { + thread->processor_ptr->thread[i]->cu_tlb_lock_waiting = 0; + } + } + }, + () +) + +DEF_MACRO(fCLEAR_K0_LOCK, + do { + warn("k0unlock: T%d: PC=0x%x: Pcycle=%lld",thread->threadId,thread->Regs[REG_PC], thread->processor_ptr->pstats[pcycles]); + sys_initiate_clear_k0_lock(thread); + } while (0), + () +) + DEF_MACRO(fALIGN_REG_FIELD_VALUE, ((VAL)<processor_ptr->global_regs[REG_##REG], + reg_field_info[FIELD].width, + reg_field_info[FIELD].offset,VAL), +) + +DEF_MACRO(fLOG_GLOBAL_REG_FIELD, + LOG_MASKED_GLOBAL_REG_WRITE(REG_##REG, + fALIGN_REG_FIELD_VALUE(FIELD,VAL), + fGET_REG_FIELD_MASK(FIELD)), + () +) + DEF_MACRO(fREAD_REG_FIELD, fEXTRACTU_BITS(thread->Regs[REG_##REG], reg_field_info[FIELD].width, @@ -1561,6 +1808,13 @@ DEF_MACRO(fREAD_REG_FIELD, /* ATTRIBS */ ) +DEF_MACRO(fREAD_GLOBAL_REG_FIELD, + fEXTRACTU_BITS(thread->processor_ptr->global_regs[REG_##REG], + reg_field_info[FIELD].width, + reg_field_info[FIELD].offset), + /* ATTRIBS */ +) + DEF_MACRO(fGET_FIELD, fEXTRACTU_BITS(VAL, reg_field_info[FIELD].width, @@ -1576,6 +1830,185 @@ DEF_MACRO(fSET_FIELD, /* ATTRIBS */ ) +DEF_MACRO(fSET_RUN_MODE_NOW, + {thread->processor_ptr->global_regs[REG_MODECTL] |= (1<last_commit_cycle = thread->processor_ptr->pcycle_counter; + sys_recalc_num_running_threads(thread->processor_ptr);}, +) + +DEF_MACRO(fIN_DEBUG_MODE, + (thread->debug_mode || (fREAD_GLOBAL_REG_FIELD(ISDBST,ISDBST_DEBUGMODE) & 1<debug_mode), + () +) + + +DEF_MACRO(fIN_DEBUG_MODE_WARN, + { + if (fREAD_GLOBAL_REG_FIELD(ISDBST,ISDBST_DEBUGMODE) & 1<processor_ptr); + } while (0), + /* NOTHING */ +) + +DEF_MACRO(fGET_RUN_MODE, + ((thread->processor_ptr->global_regs[REG_MODECTL]>>TNUM)&0x1), +) + +DEF_MACRO(fSET_WAIT_MODE, + {fLOG_GLOBAL_REG_FIELD(MODECTL,MODECTL_W, + fREAD_GLOBAL_REG_FIELD(MODECTL,MODECTL_W) | 1<<(TNUM))}, + /* NOTHING */ +) + +DEF_MACRO(fCLEAR_WAIT_MODE, + {thread->processor_ptr->global_regs[REG_MODECTL] &= ~(1<<(TNUM+16)); + thread->last_commit_cycle = thread->processor_ptr->pcycle_counter; + sys_recalc_num_running_threads(thread->processor_ptr);}, +) + +DEF_MACRO(fGET_WAIT_MODE, + ((thread->processor_ptr->global_regs[REG_MODECTL]>>(TNUM+16))&0x1), +) + + +DEF_MACRO(fRESET_THREAD, + register_reset_interrupt(T,NUM), +) + +DEF_MACRO(fREAD_CURRENT_EVB, + (GLOBAL_REG_READ(REG_EVB)), + /* nothing */ +) + +DEF_MACRO(fREAD_ELR, + READ_RREG(REG_ELR), + () +) + +DEF_MACRO(fPOW2_HELP_ROUNDUP, + ((VAL) | ((VAL) >> 1) | ((VAL) >> 2) | ((VAL) >> 4) | ((VAL) >> 8) | ((VAL) >> 16)), + () +) + +DEF_MACRO(fPOW2_ROUNDUP, + fPOW2_HELP_ROUNDUP((VAL)-1)+1, + () +) + +DEF_MACRO(fTLB_IDXMASK, + ((INDEX) & (fPOW2_ROUNDUP(fCAST4u(thread->processor_ptr->arch_proc_options->jtlb_size)) - 1)), + () +) + +DEF_MACRO(fTLB_NONPOW2WRAP, + (((INDEX) >= thread->processor_ptr->arch_proc_options->jtlb_size) ? ((INDEX) - thread->processor_ptr->arch_proc_options->jtlb_size) : (INDEX)), + /* ATTRIBS */ +) + +DEF_MACRO(fTLBW, + do {size4u_t __myidx = fTLB_NONPOW2WRAP(fTLB_IDXMASK(INDEX)); + TLB_REG_WRITE(__myidx,VALUE); + fHIDE(HEX_CALLBACK(thread->processor_ptr->options->tlbw_callback,thread->system_ptr,thread->processor_ptr,thread->threadId,__myidx);) + fHIDE(sys_tlb_write(thread,__myidx,VALUE);)} while (0), + /* ATTRIBS */ +) + +DEF_MACRO(fTLB_ENTRY_OVERLAP, + fHIDE( (sys_check_overlap(thread,VALUE)!=-2) ), + /* ATTRIBS */ +) + +DEF_MACRO(fTLB_ENTRY_OVERLAP_IDX, + fHIDE(sys_check_overlap(thread,VALUE)), + /* ATTRIBS */ +) + + +DEF_MACRO(fTLBR, + TLB_REG_READ(fTLB_NONPOW2WRAP(fTLB_IDXMASK(INDEX))), + /* ATTRIBS */ +) + +DEF_MACRO(fTLBP, + tlb_lookup(thread,((TLBHI)>>12),((TLBHI)<<12),1), + /* attribs */ +) + + + +DEF_MACRO(READ_SGP0, + READ_RREG(REG_SGP), + () +) + +DEF_MACRO(READ_SGP1, + READ_RREG(REG_SGP+1), + () +) + +DEF_MACRO(READ_SGP10, + READ_RREG_PAIR(REG_SGP), + () +) + +DEF_MACRO(READ_UGP, + READ_RREG(REG_UGP), +) + +DEF_MACRO(WRITE_SGP0, + WRITE_RREG(REG_SGP,VAL), + (A_IMPLICIT_WRITES_SGP0) +) + +DEF_MACRO(WRITE_SGP1, + WRITE_RREG(REG_SGP+1,VAL), + (A_IMPLICIT_WRITES_SGP1) +) + +DEF_MACRO(WRITE_SGP10, + WRITE_RREG_PAIR(REG_SGP,VAL), + (A_IMPLICIT_WRITES_SGP0,A_IMPLICIT_WRITES_SGP1) +) + +DEF_MACRO(WRITE_UGP, + WRITE_RREG(REG_UGP,VAL), +) + +DEF_MACRO(fSTART, + fLOG_GLOBAL_REG_FIELD(MODECTL,MODECTL_E, fREAD_GLOBAL_REG_FIELD(MODECTL,MODECTL_E) | (((REG & ((1<processor_ptr))), + () +) + +DEF_MACRO(fRESUME, + fLOG_GLOBAL_REG_FIELD(MODECTL,MODECTL_W, + fREAD_GLOBAL_REG_FIELD(MODECTL,MODECTL_W) & (~(REG))), + () +) + +DEF_MACRO(fGET_TNUM, + thread->threadId, + () +) + /********************************************/ /* Cache Management */ /********************************************/ @@ -1602,6 +2035,11 @@ DEF_MACRO(fISYNC, ) +DEF_MACRO(fICFETCH, + , + () +) + DEF_MACRO(fDCFETCH, sys_dcfetch(thread, (REG), insn->slot), (A_MEMLIKE) @@ -1615,6 +2053,34 @@ DEF_MACRO(fICINVA, (A_ICINVA) ) +DEF_MACRO(fDCTAGR, + ({DST=sys_dctagr(thread, INDEX, insn->slot,DSTREGNO);})/* FIXME */, + () +) + +DEF_MACRO(fDCTAGW, + (sys_dctagw(thread, INDEX, PART2, insn->slot)), + () +) +DEF_MACRO(fICTAGR, + ({DST=sys_ictagr(thread, INDEX, insn->slot,REGNO);}), + () +) + +DEF_MACRO(fICDATAR, + ({DST=sys_icdatar(thread, INDEX, insn->slot);}), + () +) + +DEF_MACRO(fICTAGW, + (sys_ictagw(thread, INDEX, PART2, insn->slot)), + () +) +DEF_MACRO(fICDATAW, + ({ fHIDE(); }), + () +) + DEF_MACRO(fL2FETCH, sys_l2fetch(thread, ADDR,HEIGHT,WIDTH,STRIDE,FLAGS, insn->slot), (A_MEMLIKE,A_L2FETCH) @@ -1635,6 +2101,12 @@ DEF_MACRO(fDCZEROA, (A_MEMLIKE) ) +DEF_MACRO(fDCINVA, + sys_dcinva(thread, (REG)), + (A_MEMLIKE) +) + + DEF_MACRO(fCHECKFORPRIV, {sys_check_privs(thread); if (EXCEPTION_DETECTED) return; }, () @@ -1645,6 +2117,16 @@ DEF_MACRO(fCHECKFORGUEST, () ) +DEF_MACRO(fTAKEN_INTERRUPT_EDGECLEAR, + { proc->global_regs[REG_IPEND] &= ~(INT_NUMTOMASK(intnum) & proc->global_regs[REG_IEL]); }, + () +) + +DEF_MACRO(fSET_IAD, + { sys_siad(thread,INT_NUMTOMASK(intnum)); thread->processor_ptr->global_regs[REG_IAD] |= INT_NUMTOMASK(intnum); }, + () +) + DEF_MACRO(fBRANCH_SPECULATE_STALL, { sys_speculate_branch_stall(thread, insn->slot, JUMP_COND(JUMP_PRED_SET), @@ -1664,3 +2146,79 @@ DEF_MACRO(IV1DEAD, , () ) + +DEF_MACRO(fIN_MONITOR_MODE, + sys_in_monitor_mode(thread), + () +) + +DEF_MACRO(fIN_USER_MODE, + sys_in_user_mode(thread), + () +) + +DEF_MACRO(fIN_GUEST_MODE, + sys_in_guest_mode(thread), + () +) + +DEF_MACRO(fGRE_ENABLED, + fREAD_REG_FIELD(CCR,CCR_GRE), + () +) + +DEF_MACRO(fGTE_ENABLED, + fREAD_REG_FIELD(CCR,CCR_GRE), + () +) + +DEF_MACRO(fTRAP1_VIRTINSN, + ((fIN_GUEST_MODE()) + && (fGRE_ENABLED()) + && ( ((IMM) == 1) + || ((IMM) == 3) + || ((IMM) == 4) + || ((IMM) == 6))), + () +) + +DEF_MACRO(fVIRTINSN_RTE, + do { + thread->trap1_info = TRAP1_VIRTINSN_RTE; + fLOG_REG_FIELD(SSR,SSR_SS,fREAD_REG_FIELD(GSR,GSR_SS)); + fLOG_REG_FIELD(CCR,CCR_GIE,fREAD_REG_FIELD(GSR,GSR_IE)); + fLOG_REG_FIELD(SSR,SSR_GM,!fREAD_REG_FIELD(GSR,GSR_UM)); + fBRANCH((fREAD_GELR() & -4),COF_TYPE_RTE); + fINTERNAL_CLEAR_SAMEPAGE(); + } while (0), + (A_IMPLICIT_WRITES_CCR,A_IMPLICIT_WRITES_SSR) +) + +DEF_MACRO(fVIRTINSN_SETIE, + do { + fLOG_REG_FIELD(CCR,CCR_GIE,(REG) & 1); + REG = fREAD_REG_FIELD(CCR,CCR_GIE); + thread->trap1_info = TRAP1_VIRTINSN_SETIE; + } while (0), + (A_IMPLICIT_WRITES_CCR) +) + +DEF_MACRO(fVIRTINSN_GETIE, + { + thread->trap1_info = TRAP1_VIRTINSN_GETIE; + REG = fREAD_REG_FIELD(CCR,CCR_GIE); + }, + () +) + +DEF_MACRO(fVIRTINSN_SPSWAP, + do { + if (fREAD_REG_FIELD(GSR,GSR_UM)) { + size4u_t TEMP = REG; + REG = fREAD_GOSP(); + fWRITE_GOSP(TEMP); + thread->trap1_info = TRAP1_VIRTINSN_SPSWAP; + } + } while (0), + (A_IMPLICIT_WRITES_GOSP) +) diff --git a/target/hexagon/imported/mmvec/encode_ext.def b/target/hexagon/imported/mmvec/encode_ext.def index 402438f566c1..9df920476441 100644 --- a/target/hexagon/imported/mmvec/encode_ext.def +++ b/target/hexagon/imported/mmvec/encode_ext.def @@ -647,7 +647,7 @@ DEF_ENC(V6_vsubububb_sat, ICLASS_CJ" 1 110 101 vvvvv PP 0 uuuuu 101 ddddd") DEF_ENC(V6_vmpyewuh_64, ICLASS_CJ" 1 110 101 vvvvv PP 0 uuuuu 110 ddddd") DEF_FIELDROW_DESC32( ICLASS_CJ" 1 110 --0 ----- PP 1 ----- ----- ---","Vx32=Vu32") -DEF_ENC(V6_vunpackob, ICLASS_CJ" 1 110 --0 ---00 PP 1 uuuuu 000 xxxxx") // +DEF_ENC(V6_vunpackob, ICLASS_CJ" 1 110 --0 --000 PP 1 uuuuu 000 xxxxx") // DEF_ENC(V6_vunpackoh, ICLASS_CJ" 1 110 --0 ---00 PP 1 uuuuu 001 xxxxx") // //DEF_ENC(V6_vunpackow, ICLASS_CJ" 1 110 --0 ---00 PP 1 uuuuu 010 xxxxx") // @@ -804,5 +804,31 @@ DEF_ENC(V6_vmpyewuh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 101 ddddd") DEF_ENC(V6_vmpyowh, ICLASS_CJ" 1 111 111 vvvvv PP 0 uuuuu 111 ddddd") DEF_ENC(V6_vmpyuhvs,"00011111110vvvvvPP1uuuuu111ddddd") +DEF_ENC(V6_vadd_hf,"00011111011vvvvvPP1uuuuu011ddddd") +DEF_ENC(V6_vadd_sf,"00011111101vvvvvPP1uuuuu001ddddd") +DEF_ENC(V6_vadd_qf16,"00011111011vvvvvPP1uuuuu010ddddd") +DEF_ENC(V6_vadd_qf16_mix,"00011111011vvvvvPP1uuuuu100ddddd") +DEF_ENC(V6_vadd_qf32,"00011111101vvvvvPP1uuuuu000ddddd") +DEF_ENC(V6_vadd_qf32_mix,"00011111101vvvvvPP1uuuuu010ddddd") + +DEF_ENC(V6_vconv_hf_qf16,"00011110--0--100PP1uuuuu011ddddd") +DEF_ENC(V6_vconv_hf_qf32,"00011110--0--100PP1uuuuu110ddddd") +DEF_ENC(V6_vconv_sf_qf32,"00011110--0--100PP1uuuuu000ddddd") + +DEF_ENC(V6_vmpy_qf16,"00011111111vvvvvPP1uuuuu011ddddd") +DEF_ENC(V6_vmpy_qf16_hf,"00011111111vvvvvPP1uuuuu100ddddd") +DEF_ENC(V6_vmpy_qf16_mix_hf,"00011111111vvvvvPP1uuuuu101ddddd") +DEF_ENC(V6_vmpy_qf32,"00011111111vvvvvPP1uuuuu000ddddd") +DEF_ENC(V6_vmpy_qf32_hf,"00011111111vvvvvPP1uuuuu111ddddd") +DEF_ENC(V6_vmpy_qf32_mix_hf,"00011111100vvvvvPP1uuuuu000ddddd") +DEF_ENC(V6_vmpy_qf32_qf16,"00011111111vvvvvPP1uuuuu110ddddd") +DEF_ENC(V6_vmpy_qf32_sf,"00011111111vvvvvPP1uuuuu001ddddd") + +DEF_ENC(V6_vsub_hf,"00011111011vvvvvPP1uuuuu110ddddd") +DEF_ENC(V6_vsub_sf,"00011111101vvvvvPP1uuuuu100ddddd") +DEF_ENC(V6_vsub_qf32,"00011111101vvvvvPP1uuuuu011ddddd") +DEF_ENC(V6_vsub_qf32_mix,"00011111101vvvvvPP1uuuuu101ddddd") +DEF_ENC(V6_vsub_qf16,"00011111011vvvvvPP1uuuuu101ddddd") +DEF_ENC(V6_vsub_qf16_mix,"00011111011vvvvvPP1uuuuu111ddddd") #endif /* NO MMVEC */ diff --git a/target/hexagon/imported/mmvec/ext.idef b/target/hexagon/imported/mmvec/ext.idef index 03d31f6181d7..1b7c5afb42f7 100644 --- a/target/hexagon/imported/mmvec/ext.idef +++ b/target/hexagon/imported/mmvec/ext.idef @@ -1400,6 +1400,376 @@ ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(32,vmpyhus_acc, "Vxx32+=vmpyhus(Vu32,Vv32)"," VxxV.v[1].w[i] += fMPY16SU(fGETHALF(1, VuV.w[i]), fGETUHALF(1, VvV.uw[i]))) +/* VMPY_QF32 */ +/* multiply qf32 input, produce qf32 output*/ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32,"Vd32.qf32=vmpy(Vu32.qf32,Vv32.qf32)","Vector multiply: qf32 output from qf32 input", + fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]); + fHIDE(unfloat )v = fPARSEQF32(VvV.qf32[i]); + VdV.qf32[i] = fRNDSATQF32(u.exp+v.exp, u.sig*v.sig, 0)) + +/* VMPY_QF32_SF */ +/* multiply ieee sf input, produce qf32 output*/ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32_sf,"Vd32.qf32=vmpy(Vu32.sf,Vv32.sf)","Vector multiply: qf32 output from IEEE sf input", + fHIDE(unfloat )u = fPARSESF(VuV.sf[i]); + fHIDE(unfloat )v = fPARSESF(VvV.sf[i]); + VdV.qf32[i] = fRNDSATQF32(u.exp+v.exp, u.sig*v.sig, 0); + if(u.sign^v.sign) VdV.qf32[i] = fNEGQF32(VdV.qf32[i])) + + +/* VMPY_QF16 */ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(16,vmpy_qf16,"Vd32.qf16=vmpy(Vu32.qf16,Vv32.qf16)","Vector multiply: qf16 output from qf16 inupt", + fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]); + fHIDE(unfloat )v = fPARSEQF16(VvV.qf16[i]); + VdV.qf16[i] = fRNDSATQF16(u.exp+v.exp, u.sig*v.sig, 0)) + +/* VMPY_QF16_HF */ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(16,vmpy_qf16_hf,"Vd32.qf16=vmpy(Vu32.hf,Vv32.hf)","Vector multiply: qf16 output from ieee hf input", + fHIDE(unfloat )u = fPARSEHF(VuV.hf[i]); + fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]); + VdV.qf16[i] = fRNDSATQF16(u.exp+v.exp, u.sig*v.sig, 0); + if(u.sign^v.sign) VdV.qf16[i] = fNEGQF16(VdV.qf16[i])) + +/* VMPY_QF16_with_QF16_HF */ +/* get the magnitude of qf16 before multiply */ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(16,vmpy_qf16_mix_hf,"Vd32.qf16=vmpy(Vu32.qf16,Vv32.hf)","Vector multiply: qf16 output from mixed input of qf16 and ieee hf", + fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]); + fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]); + VdV.qf16[i] = fRNDSATQF16(u.exp+v.exp, u.sig*v.sig, 0); + if(v.sign) VdV.qf16[i] = fNEGQF16(VdV.qf16[i])) + +/* VMPY_QF32_QF16 */ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32_qf16,"Vdd32.qf32=vmpy(Vu32.qf16,Vv32.qf16)","Vector multiply: double qf32 output from qf16 input", + fHIDE(unfloat )u0 = fPARSEQF16(VuV.w[i] & 0xFFFF); + fHIDE(unfloat )u1 = fPARSEQF16((VuV.w[i]>>16) & 0xFFFF); + fHIDE(unfloat )v0 = fPARSEQF16(VvV.w[i] & 0xFFFF); + fHIDE(unfloat )v1 = fPARSEQF16((VvV.w[i]>>16) & 0xFFFF); + VddV.v[0].qf32[i] = fRNDSATQF32(u0.exp+v0.exp, u0.sig*v0.sig, 0); + VddV.v[1].qf32[i] = fRNDSATQF32(u1.exp+v1.exp, u1.sig*v1.sig, 0)) + +/* VMPY_QF32_HF */ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32_hf,"Vdd32.qf32=vmpy(Vu32.hf,Vv32.hf)","Vector multiply: double qf32 output from ieee hf input", + fHIDE(unfloat )u0 = fPARSEHF(VuV.w[i] & 0xFFFF); + fHIDE(unfloat )u1 = fPARSEHF((VuV.w[i]>>16) & 0xFFFF); + fHIDE(unfloat )v0 = fPARSEHF(VvV.w[i] & 0xFFFF); + fHIDE(unfloat )v1 = fPARSEHF((VvV.w[i]>>16) & 0xFFFF); + VddV.v[0].qf32[i] = fRNDSATQF32(u0.exp+v0.exp, u0.sig*v0.sig, 0); + VddV.v[1].qf32[i] = fRNDSATQF32(u1.exp+v1.exp, u1.sig*v1.sig, 0); + if(u0.sign^v0.sign) VddV.v[0].qf32[i] = fNEGQF32(VddV.v[0].qf32[i]); + if(u1.sign^v1.sign) VddV.v[1].qf32[i] = fNEGQF32(VddV.v[1].qf32[i])) + +/* VMPY_QF32_with_QF16_HF */ +ITERATOR_INSN_MPY_SLOT_DOUBLE_VEC(32,vmpy_qf32_mix_hf,"Vdd32.qf32=vmpy(Vu32.qf16,Vv32.hf)","Vector multiply: double qf32 output from mixed input of qf16 and ieee hf", + fHIDE(unfloat )u0 = fPARSEQF16(VuV.w[i] & 0xFFFF); + fHIDE(unfloat )u1 = fPARSEQF16((VuV.w[i]>>16) & 0xFFFF); + fHIDE(unfloat )v0 = fPARSEHF(VvV.w[i] & 0xFFFF); + fHIDE(unfloat )v1 = fPARSEHF((VvV.w[i]>>16) & 0xFFFF); + VddV.v[0].qf32[i] = fRNDSATQF32(u0.exp+v0.exp, u0.sig*v0.sig, 0); + VddV.v[1].qf32[i] = fRNDSATQF32(u1.exp+v1.exp, u1.sig*v1.sig, 0); + if(v0.sign) VddV.v[0].qf32[i] = fNEGQF32(VddV.v[0].qf32[i]); + if(v1.sign) VddV.v[1].qf32[i] = fNEGQF32(VddV.v[1].qf32[i])) + +/* VADD_QF32 */ +ITERATOR_INSN_SHIFT_SLOT(32,vadd_qf32,"Vd32.qf32=vadd(Vu32.qf32,Vv32.qf32)","Vector addition of qf32 input", + fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]); + fHIDE(unfloat )v = fPARSEQF32(VvV.qf32[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig)); + if (expv.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low)) + +/* VADD_SF */ +ITERATOR_INSN_SHIFT_SLOT(32,vadd_sf,"Vd32.qf32=vadd(Vu32.sf,Vv32.sf)","Vector addition of sf input", + fHIDE(unfloat )u = fPARSESF(VuV.sf[i]); + fHIDE(unfloat )v = fPARSESF(VvV.sf[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig)); + if (expv.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + } + else if((u.sign==0) && (v.sign==1)) + { + sig = sig_u - sig_v; + sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : sig_u-(sig_v+sig); + } + else{ + sig = sig_v - sig_u; + sig_low = (v.exp>u.exp) ? (sig_v-sig)-sig_u : sig_v-(sig_u+sig); + } + VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low); + //printf("ARCHSIM: output:%x\\n", VdV.qf32[i]); + if(u.sign && v.sign) VdV.qf32[i] = fNEGQF32(VdV.qf32[i])) + +/* VADD_QF32_MIX */ +ITERATOR_INSN_SHIFT_SLOT(32,vadd_qf32_mix,"Vd32.qf32=vadd(Vu32.qf32,Vv32.sf)","Vector addition of mixed qf32 and sf", + fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]); + fHIDE(unfloat )v = fPARSESF(VvV.sf[i]); + if(v.sign) v.sig = (-1.0)*v.sig; + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig)); + if (expv.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low)) + +/* VSUB_QF32 */ +ITERATOR_INSN_SHIFT_SLOT(32,vsub_qf32,"Vd32.qf32=vsub(Vu32.qf32,Vv32.qf32)","Vector subtraction of qf32 input", + fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]); + fHIDE(unfloat )v = fPARSEQF32(VvV.qf32[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig)); + if (expv.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig)); + VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low)) + +/* VSUB_SF */ +ITERATOR_INSN_SHIFT_SLOT(32,vsub_sf,"Vd32.qf32=vsub(Vu32.sf,Vv32.sf)","Vector subtraction of ieee sf input", + fHIDE(unfloat )u = fPARSESF(VuV.sf[i]); + fHIDE(unfloat )v = fPARSESF(VvV.sf[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig)); + if (expv.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig)); + } + else if(u.sign ^ v.sign){ + sig = sig_u + sig_v; + sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + } + else{ + sig = sig_v - sig_u; + sig_low = (v.exp>u.exp) ? (sig_v-sig)-sig_u : sig_v-(sig_u+sig); + } + VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low); + if((u.sign==1) && (v.sign==0)) VdV.qf32[i] = fNEGQF32(VdV.qf32[i])) + +/* VSUB_QF32_MIX */ +ITERATOR_INSN_SHIFT_SLOT(32,vsub_qf32_mix,"Vd32.qf32=vsub(Vu32.qf32,Vv32.sf)","Vector subtraction of mixed qf32 input and sf", + fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]); + fHIDE(unfloat )v = fPARSESF(VvV.sf[i]); + if(v.sign) v.sig = (-1.0)*v.sig; + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_SF+1)):ilogb(u.sig)); + if (expv.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig)); + VdV.qf32[i] = fRNDSATQF32(exp, sig, sig_low)) + +/* VADD_QF16 */ +ITERATOR_INSN_SHIFT_SLOT(16,vadd_qf16,"Vd32.qf16=vadd(Vu32.qf16,Vv32.qf16)","Vector addition of qf16 input", + fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]); + fHIDE(unfloat )v = fPARSEQF16(VvV.qf16[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig)); + if (expv.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low)) + +/* VADD_HF */ +ITERATOR_INSN_SHIFT_SLOT(16,vadd_hf,"Vd32.qf16=vadd(Vu32.hf,Vv32.hf)","Vector addition of hf input", + fHIDE(unfloat )u = fPARSEHF(VuV.hf[i]); + fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig)); + if (expv.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + } + else if((u.sign==0) && (v.sign==1)) + { + sig = sig_u - sig_v; + sig_low = (u.exp>v.exp) ? (sig_u-sig)-sig_v : sig_u-(sig_v+sig); + } + else{ + sig = sig_v - sig_u; + sig_low = (v.exp>u.exp) ? (sig_v-sig)-sig_u : sig_v-(sig_u+sig); + } + VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low); + if(u.sign && v.sign) + VdV.qf16[i] = fNEGQF16(VdV.qf16[i])) + +/* VADD_QF16_MIX */ +ITERATOR_INSN_SHIFT_SLOT(16,vadd_qf16_mix,"Vd32.qf16=vadd(Vu32.qf16,Vv32.hf)","Vector addition of mixed qf16 and hf", + fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]); + fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]); + if(v.sign) v.sig = (-1.0)*v.sig; + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig)); + if (expv.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low)) + +/* VSUB_QF16 */ +ITERATOR_INSN_SHIFT_SLOT(16,vsub_qf16,"Vd32.qf16=vsub(Vu32.qf16,Vv32.qf16)","Vector subtraction of qf16 input", + fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]); + fHIDE(unfloat )v = fPARSEQF16(VvV.qf16[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig)); + if (expv.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig)); + VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low)) + +/* VSUB_HF */ +ITERATOR_INSN_SHIFT_SLOT(16,vsub_hf,"Vd32.qf16=vsub(Vu32.hf,Vv32.hf)","Vector subtraction of hf input", + fHIDE(unfloat )u = fPARSEHF(VuV.hf[i]); + fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]); + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig)); + if (expv.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig)); + } + else if(u.sign ^ v.sign){ + sig = sig_u + sig_v; + sig_low = (u.exp>v.exp) ? (sig_u-sig)+sig_v : (sig_v-sig)+sig_u; + } + else{ + sig = sig_v - sig_u; + sig_low = (v.exp>u.exp) ? (sig_v-sig)-sig_u : sig_v-(sig_u+sig); + } + VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low); + if((u.sign==1) && (v.sign==0)) + VdV.qf16[i] = fNEGQF16(VdV.qf16[i])) + + +/* VSUB_QF16_MIXED */ +ITERATOR_INSN_SHIFT_SLOT(16,vsub_qf16_mix,"Vd32.qf16=vsub(Vu32.qf16,Vv32.hf)","Vector subtraction of mixed qf16 and hf", + fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]); + fHIDE(unfloat )v = fPARSEHF(VvV.hf[i]); + if(v.sign) v.sig = (-1.0)*v.sig; + fHIDE(size2s_t exp=0;) + if (u.exp>v.exp) { + exp = u.exp+((u.sig==0.0)? (-(FRAC_HF+1)):ilogb(u.sig)); + if (expv.exp) ? (sig_u-sig)-sig_v : (sig_u-(sig_v+sig)); + VdV.qf16[i] = fRNDSATQF16(exp, sig, sig_low)) + +// FP Convert QF32/W/UW to ieee SF +ITERATOR_INSN_SHIFT_SLOT(32,vconv_sf_qf32,"Vd32.sf=Vu32.qf32","Vector conversion of qf32 format to ieee SF", + fHIDE(unfloat )u = fPARSEQF32(VuV.qf32[i]); + VdV.sf[i] = fRNDSATSF(u.exp, u.sig)) + +// FP Convert QF16/H/UH to ieee HF +ITERATOR_INSN_SHIFT_SLOT(16,vconv_hf_qf16,"Vd32.hf=Vu32.qf16","Vector conversion of qf16 format to ieee HF", + fHIDE(unfloat )u = fPARSEQF16(VuV.qf16[i]); + VdV.hf[i] = fRNDSATHF(u.exp, u.sig)) + +// FP Convert double QF32 to two packed ieee HF in one vector +ITERATOR_INSN_SHIFT_SLOT(32,vconv_hf_qf32,"Vd32.hf=Vuu32.qf32","Vector conversion of double qf32 to ieee HF", + fHIDE(unfloat )u0 = fPARSEQF32(VuuV.v[0].qf32[i]); + fHIDE(unfloat )u1 = fPARSEQF32(VuuV.v[1].qf32[i]); + VdV.hf[2*i] = fRNDSATHF(u0.exp, u0.sig); + VdV.hf[2*i+1] = fRNDSATHF(u1.exp, u1.sig)) ITERATOR_INSN2_MPY_SLOT_DOUBLE_VEC(16,vmpyih,"Vd32=vmpyih(Vu32,Vv32)","Vd32.h=vmpyi(Vu32.h,Vv32.h)", diff --git a/target/hexagon/imported/mmvec/macros.def b/target/hexagon/imported/mmvec/macros.def index 7e5438a99802..e9524aa56d1e 100755 --- a/target/hexagon/imported/mmvec/macros.def +++ b/target/hexagon/imported/mmvec/macros.def @@ -15,46 +15,76 @@ * along with this program; if not, see . */ -DEF_MACRO(fDUMPQ, +DEF_MACRO(fDUMPQ,(STR,REG), + "dump REG", + "dump REG", do { printf(STR ":" #REG ": 0x%016llx\n",REG.ud[0]); } while (0), () ) -DEF_MACRO(fUSE_LOOKUP_ADDRESS_BY_REV, - PROC->arch_proc_options->mmvec_use_full_va_for_lookup, +DEF_MACRO(fUSE_LOOKUP_ADDRESS_BY_REV,(PROC), + "", + "Use full VA address for lookup and exception based on REV ", + PROC->arch_proc_options->HVX_USE_FULL_VA, () ) -DEF_MACRO(fUSE_LOOKUP_ADDRESS, +DEF_MACRO(fUSE_LOOKUP_ADDRESS,(), + "", + "Use full VA address for lookup and exception", 1, () ) -DEF_MACRO(fNOTQ, +DEF_MACRO(fRT8NOTE, (), + "", + "", + , + (A_NOTE_RT8) +) + +DEF_MACRO(fCVI_VX_NO_TMP_LD, (), + "", + "", + , + (A_CVI_VX_NO_TMP_LD) +) +DEF_MACRO(fNOTQ,(VAL), + "~VAL", + "~VAL", + /* Will break Visual Studio? */ ({mmqreg_t _ret = {0}; int _i_; for (_i_ = 0; _i_ < fVECSIZE()/64; _i_++) _ret.ud[_i_] = ~VAL.ud[_i_]; _ret;}), () ) -DEF_MACRO(fGETQBITS, +DEF_MACRO(fGETQBITS,(REG,WIDTH,MASK,BITNO), + "REG[BITNO+WIDTH-1:BITNO]", + "Get MASK bits at BITNO from REG", ((MASK) & (REG.w[(BITNO)>>5] >> ((BITNO) & 0x1f))), () ) -DEF_MACRO(fGETQBIT, +DEF_MACRO(fGETQBIT,(REG,BITNO), + "REG[BITNO]", + "Get bit BITNO from REG", fGETQBITS(REG,1,1,BITNO), () ) -DEF_MACRO(fGENMASKW, +DEF_MACRO(fGENMASKW,(QREG,IDX), + "maskw(QREG,IDX)", + "Generate mask from QREG for word IDX", (((fGETQBIT(QREG,(IDX*4+0)) ? 0xFF : 0x0) << 0) |((fGETQBIT(QREG,(IDX*4+1)) ? 0xFF : 0x0) << 8) |((fGETQBIT(QREG,(IDX*4+2)) ? 0xFF : 0x0) << 16) |((fGETQBIT(QREG,(IDX*4+3)) ? 0xFF : 0x0) << 24)), () ) -DEF_MACRO(fGET10BIT, +DEF_MACRO(fGET10BIT,(COE,VAL,POS), + "COE=(((((fGETUBYTE(3,VAL) >> (2 * POS)) & 3) << 8) | fGETUBYTE(POS,VAL)) << 6) >> 6;", + "Get 10-bit coefficient from current word value and byte position", { COE = (((((fGETUBYTE(3,VAL) >> (2 * POS)) & 3) << 8) | fGETUBYTE(POS,VAL)) << 6); COE >>= 6; @@ -62,62 +92,160 @@ DEF_MACRO(fGET10BIT, () ) -DEF_MACRO(fVMAX, +DEF_MACRO(fVMAX,(X,Y), + "max(X,Y)", + "", (X>Y) ? X : Y, () ) -DEF_MACRO(fGETNIBBLE, +DEF_MACRO(fREAD_VEC, + (DST,IDX), + "DST=VREG[IDX]", /* short desc */ + "Read Vector IDX", /* long desc */ + (DST = READ_VREG(fMODCIRCU((IDX),5))), + () +) +DEF_MACRO(fREAD_ZVEC, + (DST,IDX), + "DST=ZREG[IDX]", /* short desc */ + "Read Vector IDX", /* long desc */ + (DST = READ_ZREG(fMODCIRCU((IDX),5))), + () +) + +DEF_MACRO(fREAD_ZVEC_WORD, + (DST,IDX), + "DST=ZReg.uw[IDX]", /* short desc */ + "Read Z Vector IDX", /* long desc */ + { + mmvector_t ZReg = READ_ZREG(0); + DST = ZReg.uw[IDX]; + + }, + () +) +DEF_MACRO(fREAD_ZVEC_ALL, + (DST,N,NZ), + "", /* short desc */ + "Read Z Vector IDX", /* long desc */ + { + int __idx = 0; + for (__idx = 0; __idx < NZ/N; __idx++) { + memcpy(&DST[N*__idx], &THREAD2STRUCT->ZRegs[__idx], N); + } + }, + () +) +DEF_MACRO(fZREGB, + (Z,IDX), + "ZREG.b[IDX]", /* short desc */ + "Read Z IDX", /* long desc */ + ((size1s_t)Z[IDX]), + () +) +DEF_MACRO(fZREGUB, + (Z,IDX), + "ZREG.ub[IDX]", /* short desc */ + "Read Z IDX", /* long desc */ + ((size1u_t)Z[IDX]), + () +) +DEF_MACRO(fZREGH, + (Z,IDX), + "ZREG.h[IDX]", /* short desc */ + "Read Z IDX", /* long desc */ + ((size2s_t)Z[IDX]), + () +) +DEF_MACRO(fZREGUB, + (Z,IDX), + "ZREG.ub[IDX]", /* short desc */ + "Read Z IDX", /* long desc */ + ((size1u_t)Z[IDX]), + () +) + +DEF_MACRO(fGETNIBBLE,(IDX,SRC), + "SRC.s4[IDX]", + "Get nibble", ( fSXTN(4,8,(SRC >> (4*IDX)) & 0xF) ), () ) -DEF_MACRO(fGETCRUMB, +DEF_MACRO(fGETCRUMB,(IDX,SRC), + "SRC.s2[IDX]", + "Get 2bits", ( fSXTN(2,8,(SRC >> (2*IDX)) & 0x3) ), () ) -DEF_MACRO(fGETCRUMB_SYMMETRIC, +DEF_MACRO(fGETCRUMB_SYMMETRIC,(IDX,SRC), + "SRC.s2[IDX] >= 0 ? (2-SRC.s2[IDX]) : SRC.s2[IDX]", + "Get 2bits", ( (fGETCRUMB(IDX,SRC)>=0 ? (2-fGETCRUMB(IDX,SRC)) : fGETCRUMB(IDX,SRC) ) ), () ) +//#define ZERO_OFFSET_2B +(fGETCRUMB(z,VuV.uw[i])>=0) #define ZERO_OFFSET_2B + -DEF_MACRO(fGENMASKH, +DEF_MACRO(fWRITE_VEC, + (IDX,VAR), + "VREG[IDX]=VAR", /* short desc */ + "Write Vector IDX", /* long desc */ + (WRITE_VREG(fMODCIRCU((IDX),5),VAR)), + () +) + +DEF_MACRO(fGENMASKH,(QREG,IDX), + "maskh(QREG,IDX)", + "generate mask from QREG for halfword IDX", (((fGETQBIT(QREG,(IDX*2+0)) ? 0xFF : 0x0) << 0) |((fGETQBIT(QREG,(IDX*2+1)) ? 0xFF : 0x0) << 8)), () ) -DEF_MACRO(fGETMASKW, +DEF_MACRO(fGETMASKW,(VREG,QREG,IDX), + "VREG.w[IDX] & fGENMASKW(QREG,IDX)", + "Mask word IDX from VREG using QREG", (VREG.w[IDX] & fGENMASKW((QREG),IDX)), () ) -DEF_MACRO(fGETMASKH, +DEF_MACRO(fGETMASKH,(VREG,QREG,IDX), + "VREG.h[IDX] & fGENMASKH(QREG,IDX)", + "Mask word IDX from VREG using QREG", (VREG.h[IDX] & fGENMASKH((QREG),IDX)), () ) -DEF_MACRO(fCONDMASK8, +DEF_MACRO(fCONDMASK8,(QREG,IDX,YESVAL,NOVAL), + "QREG.IDX ? YESVAL : NOVAL", + "QREG.IDX ? YESVAL : NOVAL", (fGETQBIT(QREG,IDX) ? (YESVAL) : (NOVAL)), () ) -DEF_MACRO(fCONDMASK16, +DEF_MACRO(fCONDMASK16,(QREG,IDX,YESVAL,NOVAL), + "select_bytes(QREG,IDX,YESVAL,NOVAL)", + "select_bytes(QREG,IDX,YESVAL,NOVAL)", ((fGENMASKH(QREG,IDX) & (YESVAL)) | (fGENMASKH(fNOTQ(QREG),IDX) & (NOVAL))), () ) -DEF_MACRO(fCONDMASK32, +DEF_MACRO(fCONDMASK32,(QREG,IDX,YESVAL,NOVAL), + "select_bytes(QREG,IDX,YESVAL,NOVAL)", + "select_bytes(QREG,IDX,YESVAL,NOVAL)", ((fGENMASKW(QREG,IDX) & (YESVAL)) | (fGENMASKW(fNOTQ(QREG),IDX) & (NOVAL))), () ) -DEF_MACRO(fSETQBITS, +DEF_MACRO(fSETQBITS,(REG,WIDTH,MASK,BITNO,VAL), + "REG[BITNO+WIDTH-1:BITNO] = VAL", + "Put bits into REG", do { size4u_t __TMP = (VAL); REG.w[(BITNO)>>5] &= ~((MASK) << ((BITNO) & 0x1f)); @@ -126,58 +254,101 @@ DEF_MACRO(fSETQBITS, () ) -DEF_MACRO(fSETQBIT, +DEF_MACRO(fSETQBIT,(REG,BITNO,VAL), + "REG[BITNO]=VAL", + "Put bit into REG", fSETQBITS(REG,1,1,BITNO,VAL), () ) -DEF_MACRO(fVBYTES, +DEF_MACRO(fVBYTES,(), + "VWIDTH", + "Number of bytes in a vector", (fVECSIZE()), () ) -DEF_MACRO(fVHALVES, +DEF_MACRO(fVHALVES,(), + "VWIDTH/2", + "Number of halves in a vector", (fVECSIZE()/2), () ) -DEF_MACRO(fVWORDS, +DEF_MACRO(fVWORDS,(), + "VWIDTH/2", + "Number of words in a vector", (fVECSIZE()/4), () ) -DEF_MACRO(fVDWORDS, +DEF_MACRO(fVDWORDS,(), + "VWIDTH/8", + "Number of double words in a vector", (fVECSIZE()/8), () ) -DEF_MACRO(fVALIGN, +DEF_MACRO(fVALIGN, (ADDR, LOG2_ALIGNMENT), + "ADDR = ADDR & ~(LOG2_ALIGNMENT-1)", + "Align to Element Size", ( ADDR = ADDR & ~(LOG2_ALIGNMENT-1)), () ) -DEF_MACRO(fVLASTBYTE, +DEF_MACRO(fVLASTBYTE, (ADDR, LOG2_ALIGNMENT), + "ADDR = ADDR | (LOG2_ALIGNMENT-1)", + "Set LSB of length to last byte", ( ADDR = ADDR | (LOG2_ALIGNMENT-1)), () ) -DEF_MACRO(fVELEM, +DEF_MACRO(fVELEM, (WIDTH), + "VBITS/WIDTH", + "Number of WIDTH-bit elements in a vector", ((fVECSIZE()*8)/WIDTH), () ) -DEF_MACRO(fVECLOGSIZE, +DEF_MACRO(fVECLOGSIZE,(), + "log2(VECTOR_SIZE)", + "Log base 2 of the number of bytes in a vector", (mmvec_current_veclogsize(thread)), () ) -DEF_MACRO(fVECSIZE, +DEF_MACRO(fVBUF_IDX,(EA), + "(EA >> log2(VECTOR_SIZE)) & 0xFF", + "(EA >> log2(VECTOR_SIZE)) & 0xFF", + (((EA) >> fVECLOGSIZE()) & 0xFF), + (A_FAKEINSN) +) + +DEF_MACRO(fREAD_VBUF,(IDX,WIDX), + "vbuf[IDX].w[WIDX]", + "vbuf[IDX].w[WIDX]", + READ_VBUF(IDX,WIDX), + (A_FAKEINSN) +) + +DEF_MACRO(fLOG_VBUF,(IDX,VAL,WIDX), + "vbuf[IDX].w[WIDX] = VAL", + "vbuf[IDX].w[WIDX] = VAL", + LOG_VBUF(IDX,VAL,WIDX), + (A_FAKEINSN) +) + +DEF_MACRO(fVECSIZE,(), + "VBYTES", + "Number of bytes in a vector currently", (1<VRegs_updated & (((VRegMask)1)<future_VRegs[VNUM] : mmvec_zero_vector()), (A_DOTNEWVALUE,A_RESTRICT_SLOT0ONLY) ) DEF_MACRO( fV_AL_CHECK, + (EA,MASK), + "", + "", if ((EA) & (MASK)) { warn("aligning misaligned vector. PC=%08x EA=%08x",thread->Regs[REG_PC],(EA)); }, () ) -DEF_MACRO(fSCATTER_INIT, +DEF_MACRO(fSCATTER_INIT, ( REGION_START, LENGTH, ELEMENT_SIZE), + "", + "", { mem_vector_scatter_init(thread, insn, REGION_START, LENGTH, ELEMENT_SIZE); if (EXCEPTION_DETECTED) return; }, - (A_STORE,A_MEMLIKE,A_RESTRICT_SLOT0ONLY) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_SLOT0ONLY) ) -DEF_MACRO(fGATHER_INIT, +DEF_MACRO(fGATHER_INIT, ( REGION_START, LENGTH, ELEMENT_SIZE), + "", + "", { mem_vector_gather_init(thread, insn, REGION_START, LENGTH, ELEMENT_SIZE); if (EXCEPTION_DETECTED) return; }, - (A_LOAD,A_MEMLIKE,A_RESTRICT_SLOT1ONLY) + (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fSCATTER_FINISH, +DEF_MACRO(fSCATTER_FINISH, (OP), + "", + "", { if (EXCEPTION_DETECTED) return; mem_vector_scatter_finish(thread, insn, OP); @@ -229,7 +413,9 @@ DEF_MACRO(fSCATTER_FINISH, () ) -DEF_MACRO(fGATHER_FINISH, +DEF_MACRO(fGATHER_FINISH, (), + "", + "", { if (EXCEPTION_DETECTED) return; mem_vector_gather_finish(thread, insn); @@ -238,7 +424,9 @@ DEF_MACRO(fGATHER_FINISH, ) -DEF_MACRO(CHECK_VTCM_PAGE, +DEF_MACRO(CHECK_VTCM_PAGE, (FLAG, BASE, LENGTH, OFFSET, ALIGNMENT), + "FLAG=((BASE+OFFSET) < (BASE+LENGTH))", + "FLAG=((BASE+OFFSET) < (BASE+LENGTH))", { int slot = insn->slot; paddr_t pa = thread->mem_access[slot].paddr+OFFSET; @@ -247,7 +435,9 @@ DEF_MACRO(CHECK_VTCM_PAGE, }, () ) -DEF_MACRO(COUNT_OUT_OF_BOUNDS, +DEF_MACRO(COUNT_OUT_OF_BOUNDS, (FLAG, SIZE), + " ", + "", { if (!FLAG) { @@ -258,7 +448,9 @@ DEF_MACRO(COUNT_OUT_OF_BOUNDS, () ) -DEF_MACRO(fLOG_SCATTER_OP, +DEF_MACRO(fLOG_SCATTER_OP, (SIZE), + " ", + " ", { // Log the size and indicate that the extension ext.c file needs to increment right before memory write THREAD2STRUCT->vtcm_log.op = 1; @@ -269,7 +461,9 @@ DEF_MACRO(fLOG_SCATTER_OP, -DEF_MACRO(fVLOG_VTCM_WORD_INCREMENT, +DEF_MACRO(fVLOG_VTCM_WORD_INCREMENT, (EA,OFFSET,INC,IDX,ALIGNMENT,LEN), + "if (RtV <= EA <= RtV + LEN) *EA += INC.uw[IDX] ", + "if (RtV <= EA <= RtV + LEN) *EA += INC.uw[IDX] ", { int slot = insn->slot; int log_bank = 0; @@ -287,7 +481,9 @@ DEF_MACRO(fVLOG_VTCM_WORD_INCREMENT, () ) -DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT, +DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT, (EA,OFFSET,INC,IDX,ALIGNMENT,LEN), + "if (RtV <= EA <= RtV + LEN) *EA += INC.uh[IDX] ", + "if (RtV <= EA <= RtV + LEN) *EA += INC.uh[IDX] ", { int slot = insn->slot; int log_bank = 0; @@ -304,7 +500,9 @@ DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT, () ) -DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT_DV, +DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT_DV, (EA,OFFSET,INC,IDX,IDX2,IDX_H,ALIGNMENT,LEN), + "if (RtV <= EA <= RtV + LEN) *EA += INC.w[IDX2].uh[IDX_H] ", + "if (RtV <= EA <= RtV + LEN) *EA += INC.w[IDX2].uh[IDX_H] ", { int slot = insn->slot; int log_bank = 0; @@ -323,7 +521,9 @@ DEF_MACRO(fVLOG_VTCM_HALFWORD_INCREMENT_DV, -DEF_MACRO(GATHER_FUNCTION, +DEF_MACRO(GATHER_FUNCTION, (EA,OFFSET,IDX, LEN, ELEMENT_SIZE, BANK_IDX, QVAL), +"", +"", { int slot = insn->slot; int i0; @@ -336,6 +536,9 @@ DEF_MACRO(GATHER_FUNCTION, log_byte = ((OFFSET>=0)&&((pa+i0)<=pa_high)) && QVAL; log_bank |= (log_byte<system_ptr, thread->threadId, thread->mem_access[slot].paddr+OFFSET+i0); +#ifdef VERIFICATION + warn("Gather[%d] sim_mem_read1 pa:%llx val: %x", ELEMENT_SIZE*IDX+i0, thread->mem_access[slot].paddr+OFFSET+i0, B); +#endif THREAD2STRUCT->tmp_VRegs[0].ub[ELEMENT_SIZE*IDX+i0] = B; LOG_VTCM_BYTE(pa+i0,log_byte,B,ELEMENT_SIZE*IDX+i0); } @@ -346,38 +549,50 @@ DEF_MACRO(GATHER_FUNCTION, -DEF_MACRO(fVLOG_VTCM_GATHER_WORD, +DEF_MACRO(fVLOG_VTCM_GATHER_WORD, (EA,OFFSET,IDX, LEN), + "if (RtV <= EA <= RtV + LEN) TEMP.uw[IDX] = *EA ", + "if (RtV <= EA <= RtV + LEN) TEMP.uw[IDX] = *EA ", { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 4, IDX, 1); }, () ) -DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORD, +DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORD, (EA,OFFSET,IDX, LEN), + " if (RtV <= EA <= RtV + LEN) TEMP.uh[IDX] = *EA ", + " if (RtV <= EA <= RtV + LEN) TEMP.uh[IDX] = *EA ", { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, IDX, 1); }, () ) -DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORD_DV, +DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORD_DV, (EA,OFFSET,IDX,IDX2,IDX_H, LEN), + "if (RtV <= EA <= RtV + LEN) TEMP.uw[IDX2].uh[IDX_H] = *EA ", + "if (RtV <= EA <= RtV + LEN) TEMP.uw[IDX2].uh[IDX_H] = *EA ", { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), 1); }, () ) -DEF_MACRO(fVLOG_VTCM_GATHER_WORDQ, +DEF_MACRO(fVLOG_VTCM_GATHER_WORDQ, (EA,OFFSET,IDX, Q, LEN), + " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uw[IDX] = *EA ", + " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uw[IDX] = *EA ", { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 4, IDX, fGETQBIT(QsV,4*IDX+i0)); }, () ) -DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ, +DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ, (EA,OFFSET,IDX, Q, LEN), + " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uh[IDX] = *EA ", + " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uh[IDX] = *EA ", { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, IDX, fGETQBIT(QsV,2*IDX+i0)); }, () ) -DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ_DV, +DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ_DV, (EA,OFFSET,IDX,IDX2,IDX_H, Q, LEN), + " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uw[IDX2].uh[IDX_H] = *EA ", + " if ( (RtV <= EA <= RtV + LEN) & Q) TEMP.uw[IDX2].uh[IDX_H] = *EA ", { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), fGETQBIT(QsV,2*IDX+i0)); }, @@ -385,7 +600,9 @@ DEF_MACRO(fVLOG_VTCM_GATHER_HALFWORDQ_DV, ) -DEF_MACRO(DEBUG_LOG_ADDR, +DEF_MACRO(DEBUG_LOG_ADDR, (OFFSET), + " ", + " ", { if (thread->processor_ptr->arch_proc_options->mmvec_network_addr_log2) @@ -393,6 +610,7 @@ DEF_MACRO(DEBUG_LOG_ADDR, int slot = insn->slot; paddr_t pa = thread->mem_access[slot].paddr+OFFSET; + // pa = pa & ~(ALIGNMENT-1); } }, () @@ -404,7 +622,9 @@ DEF_MACRO(DEBUG_LOG_ADDR, -DEF_MACRO(SCATTER_OP_WRITE_TO_MEM, +DEF_MACRO(SCATTER_OP_WRITE_TO_MEM, (TYPE), + " Read, accumulate, and write to VTCM", + " ", { for (int i = 0; i < mmvecx->vtcm_log.size; i+=sizeof(TYPE)) { @@ -430,7 +650,9 @@ DEF_MACRO(SCATTER_OP_WRITE_TO_MEM, () ) -DEF_MACRO(SCATTER_FUNCTION, +DEF_MACRO(SCATTER_FUNCTION, (EA,OFFSET,IDX, LEN, ELEMENT_SIZE, BANK_IDX, QVAL, IN), +"", +"", { int slot = insn->slot; int i0; @@ -449,26 +671,34 @@ DEF_MACRO(SCATTER_FUNCTION, () ) -DEF_MACRO(fVLOG_VTCM_HALFWORD, +DEF_MACRO(fVLOG_VTCM_HALFWORD, (EA,OFFSET,IN,IDX, LEN), + "if (RtV <= EA <= RtV + LEN) *EA = IN.uh[IDX] ", + "if (RtV <= EA <= RtV + LEN) *EA = IN.uh[IDX] ", { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, IDX, 1, IN); }, () ) -DEF_MACRO(fVLOG_VTCM_WORD, +DEF_MACRO(fVLOG_VTCM_WORD, (EA,OFFSET,IN,IDX,LEN), + "if (RtV <= EA <= RtV + LEN) *EA = IN.uw[IDX] ", + "if (RtV <= EA <= RtV + LEN) *EA = IN.uw[IDX] ", { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 4, IDX, 1, IN); }, () ) -DEF_MACRO(fVLOG_VTCM_HALFWORDQ, +DEF_MACRO(fVLOG_VTCM_HALFWORDQ, (EA,OFFSET,IN,IDX,Q,LEN), + " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.uh[IDX] ", + " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.uh[IDX] ", { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, IDX, fGETQBIT(QsV,2*IDX+i0), IN); }, () ) -DEF_MACRO(fVLOG_VTCM_WORDQ, +DEF_MACRO(fVLOG_VTCM_WORDQ, (EA,OFFSET,IN,IDX,Q,LEN), + " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.uw[IDX] ", + " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.uw[IDX] ", { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 4, IDX, fGETQBIT(QsV,4*IDX+i0), IN); }, @@ -479,14 +709,18 @@ DEF_MACRO(fVLOG_VTCM_WORDQ, -DEF_MACRO(fVLOG_VTCM_HALFWORD_DV, +DEF_MACRO(fVLOG_VTCM_HALFWORD_DV, (EA,OFFSET,IN,IDX,IDX2,IDX_H, LEN), + "if (RtV <= EA <= RtV + LEN) *EA = IN.w[IDX2].uh[IDX_H] ", + "if (RtV <= EA <= RtV + LEN) *EA = IN.w[IDX2].uh[IDX_H] ", { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), 1, IN); }, () ) -DEF_MACRO(fVLOG_VTCM_HALFWORDQ_DV, +DEF_MACRO(fVLOG_VTCM_HALFWORDQ_DV, (EA,OFFSET,IN,IDX,Q,IDX2,IDX_H, LEN), + " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.w[IDX2].uh[IDX_H] ", + " if ( (RtV <= EA <= RtV + LEN) & Q) *EA = IN.w[IDX2].uh[IDX_H] ", { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), fGETQBIT(QsV,2*IDX+i0), IN); }, @@ -498,39 +732,161 @@ DEF_MACRO(fVLOG_VTCM_HALFWORDQ_DV, -DEF_MACRO(fSTORERELEASE, +DEF_MACRO(fSTORERELEASE, (EA,TYPE), + "char* addr = EA&~(ALIGNMENT-1); Zero Byte Store Release (Non-blocking Sync)", + "Zero Byte Store Release (Sync)", { fV_AL_CHECK(EA,fVECSIZE()-1); mem_store_release(thread, insn, fVECSIZE(), EA&~(fVECSIZE()-1), EA, TYPE, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_STORE,A_MEMLIKE) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fVFETCH_AL, +DEF_MACRO(fVFETCH_AL, (EA), + "Prefetch vector into L2 cache at EA", + "Prefetch vector into L2 cache at EA", { fV_AL_CHECK(EA,fVECSIZE()-1); mem_fetch_vector(thread, insn, EA&~(fVECSIZE()-1), insn->slot, fVECSIZE()); }, - (A_LOAD,A_MEMLIKE) + (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE) ) -DEF_MACRO(fLOADMMV_AL, +DEF_MACRO(fLOADMMV_AL, (EA, ALIGNMENT, LEN, DST), + "char* addr = EA&~(ALIGNMENT-1); for (i=0; ilast_pkt->double_access_vec = 0; mem_load_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, &DST.ub[0], LEN, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_LOAD,A_MEMLIKE) + (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE) ) -DEF_MACRO(fLOADMMV, +DEF_MACRO(fLOADMMV, (EA, DST), + "DST = *(EA&~(ALIGNMENT-1))", + "Load vector from memory at EA (forced alignment) to DST.", fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST), () ) -DEF_MACRO(fLOADMMVQ, +DEF_MACRO(fLOADMMZ, (EA,DST), + "DST[EA[7]] = *(EA)", + "Load splatter register from memory at EA (forced alignment) to DST.", + { + mmvector_t load_vec; + fV_AL_CHECK(EA,fVECSIZE()-1); + mem_load_vector_oddva(thread, insn, EA&~(fVECSIZE()-1), EA, insn->slot, fVECSIZE(), &load_vec.ub[0], fVECSIZE(), fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + int idx = (EA & 0x80)>0; + DST.v[idx] = load_vec; + + }, + () +) +DEF_MACRO(fLOADZ_LOAD, (EA,EAU,WIDTH,DST), + "", + "", + { + thread->last_pkt->ext_slot_cancelled = 0; + thread->last_pkt->double_access_vec = 0; + int etm_size = ((EA % width) ==0) ? fVECSIZE() : 0; + if (thread->processor_ptr->options->testgen_mode) + etm_size = ((EA % width) ==0) ? WIDTH : 0; + + mem_load_vector_oddva(thread, insn, EA, EAU, insn->slot, WIDTH, &DST.ub[0], etm_size, fUSE_LOOKUP_ADDRESS()); + }, + (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE) +) + +DEF_MACRO(fELSE_CANCELZ, (), + "", + "", + else { + if (thread->last_pkt) { + thread->mem_access[insn->slot].dropped_z = 1; + thread->last_pkt->ext_slot_cancelled |= (1<slot); + } + }, + (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE) +) + + + + +DEF_MACRO(fPOST_INC4, (R), + "R+=4", + "", + R+=4; + , + (A_CVI_Z_INC_4) +) +DEF_MACRO(fPOST_INC8, (R), + "R+=8", + "", + R+=8; + , + (A_CVI_Z_INC_8) +) +DEF_MACRO(fPOST_INC16, (R), + "R+=16", + "", + R+=16; + , + (A_CVI_Z_INC_16) +) + +DEF_MACRO(fEXTRACTZ, + (DST,IDX), + "DST=ZREG[IDX]", /* short desc */ + "Read Vector IDX", /* long desc */ + (DST = READ_ZREG(fMODCIRCU((IDX),5))), + () +) + +DEF_MACRO(fLOADZ_UPDATE, (EA,WIDTH,ZN,N,SRC), + "for(i = 0; i < width; i++) ZREG.b[(EA+i)%ZN] = *(EA+i)", + "Load splatter register from memory at EA (forced alignment) to DST.", + { + mmvector_t Z[2]; + Z[0] = READ_ZREG(0); + Z[1] = READ_ZREG(1); + for(int k = 0; k < WIDTH; k++) { + int element_idx = (EA+k)%N; + int z_idx = ((EA+k)%ZN)/N; + Z[z_idx].ub[element_idx] = SRC.ub[k]; + } + + WRITE_EXT_ZREG(0,Z[0],0); + WRITE_EXT_ZREG(1,Z[1],0); + }, + (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE) +) +DEF_MACRO(fSTOREZ, (EA,WIDTH,ZN,N), + "for(i = 0; i < width; i++) *(EA+i) = ZREG.b[(EA+i)%ZN]", + "Store splatter register from memory at EA (forced alignment) to DST.", + { + mmvector_t store_vec; + mmvector_t maskvec = {0}; + mmvector_t Z[2]; + Z[0] = READ_ZREG(0); + Z[1] = READ_ZREG(1); + + for(int k = 0; k < WIDTH; k++) { + int element_idx = (EA+k)%N; + int z_idx = ((EA+k)%ZN)/N; + store_vec.ub[k] = Z[z_idx].ub[element_idx]; + maskvec.ub[k] = 1; + } + mem_store_vector_oddva(thread, insn, EA, EA, insn->slot, WIDTH, &store_vec.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + }, + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) +) + +DEF_MACRO(fLOADMMVQ, (EA,DST,QVAL), + "DST = vmux(QVAL,*(EA&~(ALIGNMENT-1)),0)", + "Load vector from memory at EA (forced alignment) to DST.", do { int __i; fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST); @@ -539,7 +895,9 @@ DEF_MACRO(fLOADMMVQ, () ) -DEF_MACRO(fLOADMMVNQ, +DEF_MACRO(fLOADMMVNQ, (EA,DST,QVAL), + "DST = vmux(QVAL,0,*(EA&~(ALIGNMENT-1)))", + "Load vector from memory at EA (forced alignment) to DST.", do { int __i; fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST); @@ -548,7 +906,9 @@ DEF_MACRO(fLOADMMVNQ, () ) -DEF_MACRO(fLOADMMVU_AL, +DEF_MACRO(fLOADMMVU_AL, (EA, ALIGNMENT, LEN, DST), + "char* addr = EA; for (i=0; iprocessor_ptr)); }, - (A_LOAD,A_MEMLIKE) + (A_LOAD,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST,A_RESTRICT_NOSLOT1_STORE) ) -DEF_MACRO(fLOADMMVU, +DEF_MACRO(fLOADMMVU, (EA, DST), + "DST = *EA", + "Load vector from memory at EA (unaligned) to DST.", { /* if address happens to be aligned, only do aligned load */ thread->last_pkt->pkt_has_vtcm_access = 0; @@ -579,63 +941,79 @@ DEF_MACRO(fLOADMMVU, () ) -DEF_MACRO(fSTOREMMV_AL, +DEF_MACRO(fSTOREMMV_AL, (EA, ALIGNMENT, LEN, SRC), + "char* addr = EA&~(ALIGNMENT-1); for (i=0; islot, LEN, &SRC.ub[0], 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, SRC.ub, 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_STORE,A_MEMLIKE) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fSTOREMMV, +DEF_MACRO(fSTOREMMV, (EA, SRC), + "*(EA&~(ALIGNMENT-1)) = SRC", + "Store vector SRC to memory at EA (unaligned).", fSTOREMMV_AL(EA,fVECSIZE(),fVECSIZE(),SRC), () ) -DEF_MACRO(fSTOREMMVQ_AL, +DEF_MACRO(fSTOREMMVQ_AL, (EA, ALIGNMENT, LEN, SRC, MASK), + "char* addr = EA&~(ALIGNMENT-1); for (i=0; islot, LEN, &SRC.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, SRC.ub, &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } while (0), - (A_STORE,A_MEMLIKE) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fSTOREMMVQ, +DEF_MACRO(fSTOREMMVQ, (EA, SRC, MASK), + "*(EA&~(ALIGNMENT-1)) = SRC", + "Masked store vector SRC to memory at EA (forced alignment).", fSTOREMMVQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK), () ) -DEF_MACRO(fSTOREMMVNQ_AL, +DEF_MACRO(fSTOREMMVNQ_AL, (EA, ALIGNMENT, LEN, SRC, MASK), + "char* addr = EA&~(ALIGNMENT-1); for (i=0; islot, LEN, &SRC.ub[0], &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + mem_store_vector_oddva(thread, insn, EA&~(ALIGNMENT-1), EA, insn->slot, LEN, SRC.ub, &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_STORE,A_MEMLIKE) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fSTOREMMVNQ, +DEF_MACRO(fSTOREMMVNQ, (EA, SRC, MASK), + "*(EA&~(ALIGNMENT-1)) = SRC", + "Masked negated store vector SRC to memory at EA (forced alignment).", fSTOREMMVNQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK), () ) -DEF_MACRO(fSTOREMMVU_AL, +DEF_MACRO(fSTOREMMVU_AL, (EA, ALIGNMENT, LEN, SRC), + "char* addr = EA; for (i=0; iLEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(), /* slot */ 1, size2, &SRC.ub[size1], 0, 0, fUSE_LOOKUP_ADDRESS()); - mem_store_vector_oddva(thread, insn, EA, EA, /* slot */ 0, size1, &SRC.ub[0], 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + mem_store_vector_oddva(thread, insn, EA, EA, /* slot */ 0, size1, SRC.ub, 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_STORE,A_MEMLIKE) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fSTOREMMVU, +DEF_MACRO(fSTOREMMVU, (EA, SRC), + "*EA = SRC", + "Store vector SRC to memory at EA (unaligned).", { thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0; @@ -651,7 +1029,9 @@ DEF_MACRO(fSTOREMMVU, () ) -DEF_MACRO(fSTOREMMVQU_AL, +DEF_MACRO(fSTOREMMVQU_AL, (EA, ALIGNMENT, LEN, SRC, MASK), + "char* addr = EA; for (i=0; iLEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(),/* slot */ 1, size2, &SRC.ub[size1], &maskvec.ub[size1], 0, fUSE_LOOKUP_ADDRESS()); - mem_store_vector_oddva(thread, insn, EA, /* slot */ 0, size1, &SRC.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + mem_store_vector_oddva(thread, insn, EA, /* slot */ 0, size1, SRC.ub, &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_STORE,A_MEMLIKE) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fSTOREMMVQU, +DEF_MACRO(fSTOREMMVQU, (EA, SRC, MASK), + "*EA = SRC", + "Store vector SRC to memory at EA (unaligned).", { thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0; @@ -682,7 +1064,9 @@ DEF_MACRO(fSTOREMMVQU, () ) -DEF_MACRO(fSTOREMMVNQU_AL, +DEF_MACRO(fSTOREMMVNQU_AL, (EA, ALIGNMENT, LEN, SRC, MASK), + "char* addr = EA; for (i=0; iLEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, insn, EA+size1, EA+fVECSIZE(), /* slot */ 1, size2, &SRC.ub[size1], &maskvec.ub[size1], 1, fUSE_LOOKUP_ADDRESS()); - mem_store_vector_oddva(thread, insn, EA, EA, /* slot */ 0, size1, &SRC.ub[0], &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); + mem_store_vector_oddva(thread, insn, EA, EA, /* slot */ 0, size1, SRC.ub, &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); }, - (A_STORE,A_MEMLIKE) + (A_STORE,A_MEMLIKE,A_RESTRICT_SINGLE_MEM_FIRST) ) -DEF_MACRO(fSTOREMMVNQU, +DEF_MACRO(fSTOREMMVNQU, (EA, SRC, MASK), + "*EA = SRC", + "Store vector SRC to memory at EA (unaligned).", { thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0; @@ -716,127 +1102,446 @@ DEF_MACRO(fSTOREMMVNQU, -DEF_MACRO(fVFOREACH, +DEF_MACRO(fVFOREACH,(WIDTH, VAR), + "for (VAR = 0; VAR < VELEM(WIDTH); VAR++)", + "For VAR in each WIDTH-bit vector index", for (VAR = 0; VAR < fVELEM(WIDTH); VAR++), /* NOTHING */ ) -DEF_MACRO(fVARRAY_ELEMENT_ACCESS, +DEF_MACRO(fVARRAY_ELEMENT_ACCESS, (ARRAY, TYPE, INDEX), + "ARRAY.TYPE[INDEX]", + "Access element of type TYPE at position INDEX of flattened ARRAY", ARRAY.v[(INDEX) / (fVECSIZE()/(sizeof(ARRAY.TYPE[0])))].TYPE[(INDEX) % (fVECSIZE()/(sizeof(ARRAY.TYPE[0])))], () ) -DEF_MACRO(fVNEWCANCEL, +DEF_MACRO(fVNEWCANCEL,(REGNUM), + "Ignore current value for register REGNUM", + "Ignore current value for register REGNUM", do { THREAD2STRUCT->VRegs_select &= ~(1<<(REGNUM)); } while (0), () ) -DEF_MACRO(fTMPVDATA, +DEF_MACRO(fTMPVDATA,(), + "Data from .tmp load", + "Data from .tmp load and clear tmp status", mmvec_vtmp_data(thread), - (A_CVI) + (A_CVI,A_CVI_REQUIRES_TMPLOAD) ) -DEF_MACRO(fVSATDW, +DEF_MACRO(fVSATDW, (U,V), + "usat_32(U:V)", + "Use 32-bits of U as MSW and 32-bits of V as LSW and saturate the resultant 64-bits to 32 bits", fVSATW( ( ( ((long long)U)<<32 ) | fZXTN(32,64,V) ) ), /* attribs */ ) -DEF_MACRO(fVASL_SATHI, +DEF_MACRO(fVASL_SATHI, (U,V), + "uasl_sathi(U:V)", + "Use 32-bits of U as MSW and 32-bits of V as LSW, left shift by 1 and saturate the result and take high word", fVSATW(((U)<<1) | ((V)>>31)), /* attribs */ ) -DEF_MACRO(fVUADDSAT, +DEF_MACRO(fVUADDSAT,(WIDTH,U,V), + "usat_##WIDTH(U+V)", + "Add WIDTH-bit values U and V with saturation", fVSATUN( WIDTH, fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V)), /* attribs */ ) -DEF_MACRO(fVSADDSAT, - fVSATN( WIDTH, fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V)), +DEF_MACRO(fVSADDSAT,(WIDTH,U,V), + "sat_##WIDTH(U+V)", + "Add WIDTH-bit values U and V with saturation", + ({size8s_t tmp5 = fSXTN(WIDTH, 2*WIDTH, U); + size8s_t tmp6 = fSXTN(WIDTH, 2*WIDTH, V); + size8s_t tmp7 = tmp5 + tmp6; + fVSATN( WIDTH, tmp7); + }), /* attribs */ ) -DEF_MACRO(fVUSUBSAT, +DEF_MACRO(fVUSUBSAT,(WIDTH,U,V), + "usat_##WIDTH(U-V)", + "sub WIDTH-bit values U and V with saturation", fVSATUN( WIDTH, fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V)), /* attribs */ ) -DEF_MACRO(fVSSUBSAT, +DEF_MACRO(fVSSUBSAT,(WIDTH,U,V), + "sat_##WIDTH(U-V)", + "sub WIDTH-bit values U and V with saturation", fVSATN( WIDTH, fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)), /* attribs */ ) -DEF_MACRO(fVAVGU, +DEF_MACRO(fVAVGU,(WIDTH,U,V), + "(U+V)/2", + "average WIDTH-bit values U and V with saturation", ((fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V))>>1), /* attribs */ ) -DEF_MACRO(fVAVGURND, +DEF_MACRO(fVAVGURND,(WIDTH,U,V), + "(U+V+1)/2", + "average WIDTH-bit values U and V with saturation", ((fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V)+1)>>1), /* attribs */ ) -DEF_MACRO(fVNAVGU, +DEF_MACRO(fVNAVGU,(WIDTH,U,V), + "(U-V)/2", + "average WIDTH-bit values U and V with saturation", ((fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V))>>1), /* attribs */ ) -DEF_MACRO(fVNAVGURNDSAT, +DEF_MACRO(fVNAVGURNDSAT,(WIDTH,U,V), + "(U-V+1)/2", + "average WIDTH-bit values U and V with saturation", fVSATUN(WIDTH,((fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V)+1)>>1)), /* attribs */ ) -DEF_MACRO(fVAVGS, +DEF_MACRO(fVAVGS,(WIDTH,U,V), + "(U+V)/2", + "average WIDTH-bit values U and V with saturation", ((fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V))>>1), /* attribs */ ) -DEF_MACRO(fVAVGSRND, +DEF_MACRO(fVAVGSRND,(WIDTH,U,V), + "(U+V+1)/2", + "average WIDTH-bit values U and V with saturation", ((fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V)+1)>>1), /* attribs */ ) -DEF_MACRO(fVNAVGS, +DEF_MACRO(fVNAVGS,(WIDTH,U,V), + "(U-V)/2", + "average WIDTH-bit values U and V with saturation", ((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V))>>1), /* attribs */ ) -DEF_MACRO(fVNAVGSRND, +DEF_MACRO(fVNAVGSRND,(WIDTH,U,V), + "(U-V+1)/2", + "average WIDTH-bit values U and negative V followed by rounding", ((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)+1)>>1), /* attribs */ ) -DEF_MACRO(fVNAVGSRNDSAT, +DEF_MACRO(fVNAVGSRNDSAT,(WIDTH,U,V), + "(U-V+1)/2", + "average WIDTH-bit values U and V with saturation", fVSATN(WIDTH,((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)+1)>>1)), /* attribs */ ) -DEF_MACRO(fVNOROUND, +DEF_MACRO(fVNOROUND,(VAL,SHAMT), + "VAL", + "VAL", VAL, /* NOTHING */ ) -DEF_MACRO(fVNOSAT, +DEF_MACRO(fVNOSAT,(VAL), + "VAL", + "VAL", VAL, /* NOTHING */ ) -DEF_MACRO(fVROUND, +DEF_MACRO(fVROUND,(VAL,SHAMT), + "VAL + (1<<(SHAMT-1))", + "VAL + RNDBIT", ((VAL) + (((SHAMT)>0)?(1LL<<((SHAMT)-1)):0)), /* NOTHING */ ) -DEF_MACRO(fCARRY_FROM_ADD32, +DEF_MACRO(fCARRY_FROM_ADD32,(A,B,C), + "carry_from(A,B,C)", + "carry_from(A,B,C)", (((fZXTN(32,64,A)+fZXTN(32,64,B)+C) >> 32) & 1), /* NOTHING */ ) -DEF_MACRO(fUARCH_NOTE_PUMP_4X, +DEF_MACRO(fUARCH_NOTE_PUMP_4X,(), + "", + "", , - () + (A_CVI_PUMP_4X) ) -DEF_MACRO(fUARCH_NOTE_PUMP_2X, +DEF_MACRO(fUARCH_NOTE_PUMP_2X,(), + "", + "", , + (A_CVI_PUMP_2X) +) + +DEF_MACRO(fVDOCHKPAGECROSS,(BASE,SUM), + "", + "", + if (UNLIKELY(thread->timing_on)) { + thread->mem_access[slot].check_page_crosses = 1; + thread->mem_access[slot].page_cross_base = BASE; + thread->mem_access[slot].page_cross_sum = SUM; + }, + (A_EA_PAGECROSS) +) + +/* FP instructions */ +/*Qfloat Macros for muls*/ +DEF_MACRO(fPARSEQF32,(A), + "A", + "Parsing QF32 to extract exp/sig", + parse_qf32(A), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATQF32,(A,B,C), + "rnd_sat(A,B,C)", + "Rnd/Sat/Norm of Vector Multiply of two QF32 inputs", + rnd_sat_qf32(A,B,C), + (A_HVX_FLT) +) + +DEF_MACRO(fPARSEQF16,(A), + "A", + "Parsing QF16 to extract exp/sig", + parse_qf16(A), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATQF16,(A,B,C), + "rnd_sat(A,B,C)", + "Rnd/Sat/Norm of Vector Multiply of two QF16 inputs", + rnd_sat_qf16(A,B,C), () ) +/*Qfloat Macros for others*/ +DEF_MACRO(fPARSESF,(A), + "A", + "Parsing IEEE SF to extract sign/exp/sig", + parse_sf(A), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATSF,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector Multiply of two IEEE SF inputs", + rnd_sat_sf(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fPARSEHF,(A), + "A", + "Parsing IEEE HF to extract sign/exp/sig", + parse_hf(A), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATHF,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector Multiply of two IEEE HF inputs", + rnd_sat_hf(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATW,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector convert of W inputs", + rnd_sat_w(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATUW,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector convert of UW inputs", + rnd_sat_uw(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATH,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector convert of H inputs", + rnd_sat_h(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATUH,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector convert of UW inputs", + rnd_sat_uh(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATB,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector convert of B inputs", + rnd_sat_b(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fRNDSATUB,(A,B), + "rnd_sat(A,B)", + "Rnd/Sat/Norm of Vector convert of UB inputs", + rnd_sat_ub(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fNEGQF32,(A), + "-(A)", + "Take Ones complement", + negate32(A), + (A_HVX_FLT) +) + +DEF_MACRO(fNEGQF16,(A), + "-(A)", + "Take Ones complement", + negate16(A), + (A_HVX_FLT) +) + +DEF_MACRO(fNEGSF,(A), + "-(A)", + "Change sign", + negate_sf(A), + (A_HVX_FLT) +) +DEF_MACRO(fNEGHF,(A), + "-(A)", + "Change sign", + negate_hf(A), + (A_HVX_FLT) +) + +//FP vector compare +DEF_MACRO(fCMPGT_QF32,(A,B), + "(A > B)", + "Vector compare of QF32 format", + cmpgt_qf32(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fCMPGT_QF16,(A,B), + "(A > B)", + "Vector compare of QF16 format", + cmpgt_qf16(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fCMPGT_SF,(A,B), + "(A > B)", + "Vector compare of SF format", + cmpgt_sf(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fCMPGT_HF,(A,B), + "(A > B)", + "Vector compare of HF format", + cmpgt_hf(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fCMPGT_BF,(A,B), + "(A > B)", + "Vector compare of BF format", + cmpgt_sf(((int)A) << 16,((int)B) << 16), + (A_HVX_FLT) +) + +DEF_MACRO(fCMPGT_QF32_SF,(A,B), + "(A > B)", + "Vector compare of QF32/SF format", + cmpgt_qf32_sf(A,B), + (A_HVX_FLT) +) + +DEF_MACRO(fCMPGT_QF16_HF,(A,B), + "(A > B)", + "Vector compare of QF16/HF format", + cmpgt_qf16_hf(A,B), + (A_HVX_FLT) +) + +//VMAX/VMIN_QF32/QF16 +DEF_MACRO(fMAX_QF32,(X,Y), + "max(X,Y)", + "", + max_qf32(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMIN_QF32,(X,Y), + "min(X,Y)", + "", + min_qf32(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMAX_QF32_SF,(X,Y), + "max(X,Y)", + "", + max_qf32_sf(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMIN_QF32_SF,(X,Y), + "min(X,Y)", + "", + min_qf32_sf(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMAX_QF16,(X,Y), + "max(X,Y)", + "", + max_qf16(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMIN_QF16,(X,Y), + "min(X,Y)", + "", + min_qf16(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMAX_QF16_HF,(X,Y), + "max(X,Y)", + "", + max_qf16_hf(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMIN_QF16_HF,(X,Y), + "min(X,Y)", + "", + min_qf16_hf(X,Y), + (A_HVX_FLT) +) + +//MAX/MIN_SF/HF +DEF_MACRO(fMAX_SF,(X,Y), + "max(X,Y)", + "", + max_sf(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMIN_SF,(X,Y), + "min(X,Y)", + "", + min_sf(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMAX_HF,(X,Y), + "max(X,Y)", + "", + max_hf(X,Y), + (A_HVX_FLT) +) +DEF_MACRO(fMIN_HF,(X,Y), + "min(X,Y)", + "", + min_hf(X,Y), + (A_HVX_FLT) +) + diff --git a/target/hexagon/imported/system.idef b/target/hexagon/imported/system.idef index 7c6568e75e42..aa57149a1ceb 100644 --- a/target/hexagon/imported/system.idef +++ b/target/hexagon/imported/system.idef @@ -25,44 +25,301 @@ /* User->OS interface */ /********************************************/ -Q6INSN(J2_trap0,"trap0(#u8)",ATTRIBS(A_COF), +Q6INSN(J2_trap0,"trap0(#u8)",ATTRIBS(A_COF,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Trap to Operating System", - fTRAP(0,uiV); + fTRAP(0,uiV); ) -Q6INSN(J2_pause,"pause(#u8)",ATTRIBS(A_COF), +Q6INSN(J2_trap1,"trap1(Rx32,#u8)",ATTRIBS(A_COF,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), +"Trap to Operating System", + /* + * Note: if RxV is not written, we get the same as the input. + * Since trap1 is SOLO, this means the register will effectively not be updated + */ + if (!fTRAP1_VIRTINSN(uiV)) { + fTRAP(1,uiV); + } else if (uiV == 1) { + fVIRTINSN_RTE(uiV,RxV); + } else if (uiV == 3) { + fVIRTINSN_SETIE(uiV,RxV); + } else if (uiV == 4) { + fVIRTINSN_GETIE(uiV,RxV); + } else if (uiV == 6) { + fVIRTINSN_SPSWAP(uiV,RxV); + }) + +Q6INSN(J2_pause,"pause(#u8)",ATTRIBS(A_COF,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Enter low-power state for #u8 cycles",{fPAUSE(uiV);}) -Q6INSN(Y2_icinva,"icinva(Rs32)",ATTRIBS(A_ICOP,A_ICFLUSHOP),"Instruction Cache Invalidate Address",{fEA_REG(RsV); fICINVA(EA);}) +Q6INSN(J2_rte, "rte", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NO_TIMING_LOG), +"Return from Exception", +{ +fHIDE(if((thread->timing_on) && (thread->status & EXEC_STATUS_REPLAY)) { return; }) +fHIDE(CALLBACK(thread->processor_ptr->options->rte_callback, + thread->system_ptr,thread->processor_ptr, + thread->threadId,0);) +fCLEAR_RTE_EX(); +fBRANCH(fREAD_ELR(),COF_TYPE_RTE);}) + + +/********************************************/ +/* Interrupt Management */ +/********************************************/ + +Q6INSN(Y2_swi,"swi(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Software Interrupt",{DO_SWI(RsV);}) +Q6INSN(Y2_cswi,"cswi(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Cancel Software Interrupt",{DO_CSWI(RsV);}) +Q6INSN(Y2_ciad,"ciad(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Re-enable interrupt in IAD",{DO_CIAD(RsV);}) +Q6INSN(Y4_siad,"siad(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Disable interrupt in IAD",{DO_SIAD(RsV);}) +Q6INSN(Y2_iassignr,"Rd32=iassignr(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Read interrupt to thread assignments",{DO_IASSIGNR(RsV,RdV);}) +Q6INSN(Y2_iassignw,"iassignw(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Write interrupt to thread assignments",{DO_IASSIGNW(RsV);}) + + +Q6INSN(Y2_getimask,"Rd32=getimask(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Read imask register of another thread", +{RdV = READ_IMASK(RsV & thread->processor_ptr->thread_system_mask); }) + +Q6INSN(Y2_setimask,"setimask(Pt4,Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Change imask register of another thread", +{fPREDUSE_TIMING();WRITE_IMASK(PtV & thread->processor_ptr->thread_system_mask,RsV); }) + + + +/********************************************/ +/* TLB management */ +/********************************************/ + +Q6INSN(Y2_tlbw,"tlbw(Rss32,Rt32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), +"Write TLB entry", {fTLBW(RtV,RssV);}) + +Q6INSN(Y5_ctlbw,"Rd32=ctlbw(Rss32,Rt32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), +"Conditional Write TLB entry", +{ + if (fTLB_ENTRY_OVERLAP( (1LL<<63) | RssV )) { + RdV=fTLB_ENTRY_OVERLAP_IDX( (1LL<<63) | RssV); + } else { + fTLBW(RtV,RssV); + RdV=0x80000000; + } +}) + +Q6INSN(Y5_tlboc,"Rd32=tlboc(Rss32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), +"TLB overlap check", +{ + if (fTLB_ENTRY_OVERLAP( (1LL<<63) | RssV )) { + RdV=fTLB_ENTRY_OVERLAP_IDX( (1LL<<63) | RssV); + } else { + RdV=0x80000000; + } +}) + +Q6INSN(Y2_tlbr,"Rdd32=tlbr(Rs32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Read TLB entry", +{RddV = fTLBR(RsV);}) + +Q6INSN(Y2_tlbp,"Rd32=tlbp(Rs32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Probe TLB", {RdV=fTLBP(RsV);}) + +Q6INSN(Y5_tlbasidi,"tlbinvasid(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Invalidate ASID", +{ + fHIDE(int i;) + fHIDE(unsigned int NUM_TLB_ENTRIES = NUM_TLB_REGS(thread->processor_ptr);) + for (i = 0; i < NUM_TLB_ENTRIES; i++) { + if ((fGET_FIELD(fTLBR(i),PTE_G) == 0) && + (fGET_FIELD(fTLBR(i),PTE_ASID) == fEXTRACTU_RANGE(RsV,26,20))) { + fTLBW(i,fTLBR(i) & ~(1ULL << 63)); + } + } +}) + +Q6INSN(Y2_tlblock,"tlblock", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_NO_TIMING_LOG), "Lock TLB", +{fSET_TLB_LOCK();}) + +Q6INSN(Y2_tlbunlock,"tlbunlock", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Unlock TLB", +{fCLEAR_TLB_LOCK();}) + +Q6INSN(Y2_k0lock,"k0lock", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_NO_TIMING_LOG), "Lock K0", +{fSET_K0_LOCK();}) + +Q6INSN(Y2_k0unlock,"k0unlock", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET), "Unlock K0", +{fCLEAR_K0_LOCK();}) + +/********************************************/ +/* Supervisor Reg Management */ +/********************************************/ + +Q6INSN(Y2_crswap0,"crswap(Rx32,sgp0)",ATTRIBS(A_PRIV,A_NOTE_PRIV), "Swap system general pointer 0 with GPR", +{fHIDE(size4s_t tmp;) tmp = RxV; RxV = READ_SGP0(); WRITE_SGP0(tmp);}) +Q6INSN(Y4_crswap1,"crswap(Rx32,sgp1)",ATTRIBS(A_PRIV,A_NOTE_PRIV), "Swap system general pointer 1 with GPR", +{fHIDE(size4s_t tmp;) tmp = RxV; RxV = READ_SGP1(); WRITE_SGP1(tmp);}) + +Q6INSN(Y4_crswap10,"crswap(Rxx32,sgp1:0)",ATTRIBS(A_PRIV,A_NOTE_PRIV), "Swap system general purpose 0/1 with GPR Pair", +{fHIDE(size8s_t tmp;) tmp = RxxV; RxxV=READ_SGP10(); WRITE_SGP10(tmp);}) + +Q6INSN(Y2_tfrscrr,"Rd32=Ss128",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Transfer Supervisor Reg to GPR", {RdV=SsV;}) +Q6INSN(Y2_tfrsrcr,"Sd128=Rs32",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Transfer GPR to Supervisor Reg", {SdV=RsV;}) +Q6INSN(Y4_tfrscpp,"Rdd32=Sss128",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Transfer Supervisor Reg to GPR", {RddV=SssV;}) +Q6INSN(Y4_tfrspcp,"Sdd128=Rss32",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Transfer GPR to Supervisor Reg", {SddV=RssV;}) + +Q6INSN(G4_tfrgcrr,"Rd32=Gs32",ATTRIBS(A_GUEST,A_NOTE_GUEST),"Transfer Guest Reg to GPR", {RdV=GsV;}) +Q6INSN(G4_tfrgrcr,"Gd32=Rs32",ATTRIBS(A_GUEST,A_NOTE_GUEST),"Transfer GPR to Guest Reg", {GdV=RsV;}) +Q6INSN(G4_tfrgcpp,"Rdd32=Gss32",ATTRIBS(A_GUEST,A_NOTE_GUEST),"Transfer Guest Reg to GPR", {RddV=GssV;}) +Q6INSN(G4_tfrgpcp,"Gdd32=Rss32",ATTRIBS(A_GUEST,A_NOTE_GUEST),"Transfer GPR to Guest Reg", {GddV=RssV;}) + + + +Q6INSN(Y2_setprio,"setprio(Pt4,Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV),"Change TID Prio of another thread", +{fPREDUSE_TIMING();WRITE_PRIO(PtV & thread->processor_ptr->thread_system_mask,RsV); }) + + + + +/********************************************/ +/* Power Management / Thread on/off */ +/********************************************/ +Q6INSN(Y6_diag,"diag(Rs32)",ATTRIBS(),"Send value to Diag trace module",{ +}) +Q6INSN(Y6_diag0,"diag0(Rss32,Rtt32)",ATTRIBS(),"Send values of two register to DIAG Trace. Set X=0",{ +}) +Q6INSN(Y6_diag1,"diag1(Rss32,Rtt32)",ATTRIBS(),"Send values of two register to DIAG Trace. Set X=1",{ +}) + -Q6INSN(Y2_isync,"isync",ATTRIBS(),"Memory Synchronization",{fISYNC();}) -Q6INSN(Y2_barrier,"barrier",ATTRIBS(A_RESTRICT_SLOT0ONLY),"Memory Barrier",{fBARRIER();}) -Q6INSN(Y2_syncht,"syncht",ATTRIBS(A_RESTRICT_SLOT0ONLY),"Memory Synchronization",{fSYNCH();}) +Q6INSN(Y4_trace,"trace(Rs32)",ATTRIBS(A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK),"Send value to ETM trace",{ + fDO_TRACE(RsV); +}) + +Q6INSN(Y2_stop,"stop(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Stop thread(s)",{ + fHIDE(RsV=RsV;) + if (!fIN_DEBUG_MODE_NO_ISDB(fGET_TNUM())) fCLEAR_RUN_MODE(fGET_TNUM()); +}) + +Q6INSN(Y4_nmi,"nmi(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_NO_TIMING_LOG),"Raise NMI on thread(s)",{ + fDO_NMI(RsV); +}) + +Q6INSN(Y2_start,"start(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Start thread(s)",fSTART(RsV);) + +Q6INSN(Y2_wait,"wait(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_NO_TIMING_LOG),"Make thread(s) wait",{ + fHIDE(RsV=RsV;) + if (!fIN_DEBUG_MODE(fGET_TNUM())) fSET_WAIT_MODE(fGET_TNUM()); + fIN_DEBUG_MODE_WARN(fGET_TNUM()); +}) + +Q6INSN(Y2_resume,"resume(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Make thread(s) stop waiting",fRESUME(RsV);) + +Q6INSN(Y2_break,"brkpt",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Breakpoint",{fBREAK();}) + + +/********************************************/ +/* Cache Management */ +/********************************************/ + +Q6INSN(Y2_ictagr,"Rd32=ictagr(Rs32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICTAGOP),"Instruction Cache Tag Read",{fICTAGR(RsV,RdV,RdN);}) +Q6INSN(Y2_ictagw,"ictagw(Rs32,Rt32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICTAGOP),"Instruction Cache Tag Write",{fICTAGW(RsV,RtV);}) +Q6INSN(Y2_icdataw,"icdataw(Rs32,Rt32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICTAGOP),"Instruction Cache Data Write",{fICDATAW(RsV,RtV);}) +Q6INSN(Y2_icdatar,"Rd32=icdatar(Rs32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICTAGOP),"Instruction Cache Data Read",{fICDATAR(RsV, RdV);}) +Q6INSN(Y2_icinva,"icinva(Rs32)",ATTRIBS(A_ICOP,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYADDRESS,A_ICFLUSHOP),"Instruction Cache Invalidate Address",{fEA_REG(RsV); fICINVA(EA);}) +Q6INSN(Y2_icinvidx,"icinvidx(Rs32)",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_ICFLUSHOP),"Instruction Cache Invalidate Index",{fICINVIDX(RsV);}) +Q6INSN(Y2_ickill,"ickill",ATTRIBS(A_ICOP,A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_ICFLUSHOP),"Instruction Cache Invalidate",{fICKILL();}) + +Q6INSN(Y2_isync,"isync",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET),"Memory Synchronization",{fISYNC();}) +Q6INSN(Y2_barrier,"barrier",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK),"Memory Barrier",{fBARRIER();}) +Q6INSN(Y2_syncht,"syncht",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET),"Memory Synchronization",{fSYNCH();}) + + +Q6INSN(Y2_dcfetchbo,"dcfetch(Rs32+#u11:3)",ATTRIBS(A_RESTRICT_PREFERSLOT0,A_DCFETCH,A_RESTRICT_NOSLOT1_STORE),"Data Cache Prefetch",{fEA_RI(RsV,uiV); fDCFETCH(EA);}) +Q6INSN(Y2_dckill,"dckill",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_DCFLUSHOP),"Data Cache Invalidate",{fDCKILL();}) -Q6INSN(Y2_dcfetchbo,"dcfetch(Rs32+#u11:3)",ATTRIBS(A_RESTRICT_PREFERSLOT0,A_DCFETCH),"Data Cache Prefetch",{fEA_RI(RsV,uiV); fDCFETCH(EA);}) +Q6INSN(Y2_dczeroa,"dczeroa(Rs32)",ATTRIBS(A_STORE,A_RESTRICT_SLOT1_AOK,A_NOTE_SLOT1_AOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYADDRESS,A_DCZEROA),"Zero an aligned 32-byte cacheline",{fEA_REG(RsV); fDCZEROA(EA);}) +Q6INSN(Y2_dccleana,"dccleana(Rs32)",ATTRIBS(A_RESTRICT_SLOT1_AOK,A_NOTE_SLOT1_AOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYADDRESS,A_DCFLUSHOP),"Data Cache Clean Address",{fEA_REG(RsV); fDCCLEANA(EA);}) +Q6INSN(Y2_dccleanidx,"dccleanidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_DCFLUSHOP),"Data Cache Clean Index",{fDCCLEANIDX(RsV);}) +Q6INSN(Y2_dccleaninva,"dccleaninva(Rs32)",ATTRIBS(A_RESTRICT_SLOT1_AOK,A_NOTE_SLOT1_AOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYADDRESS,A_DCFLUSHOP),"Data Cache Clean and Invalidate Address",{fEA_REG(RsV); fDCCLEANINVA(EA);}) +Q6INSN(Y2_dccleaninvidx,"dccleaninvidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_DCFLUSHOP),"Data Cache Clean and Invalidate Index",{fDCCLEANINVIDX(RsV);}) +Q6INSN(Y2_dcinva,"dcinva(Rs32)",ATTRIBS(A_RESTRICT_SLOT1_AOK,A_NOTE_SLOT1_AOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYADDRESS,A_DCFLUSHOP),"Data Cache Invalidate Address",{fEA_REG(RsV); fDCCLEANINVA(EA);}) +Q6INSN(Y2_dcinvidx,"dcinvidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_DCFLUSHOP),"Data Cache Invalidate Index",{fDCINVIDX(RsV);}) +Q6INSN(Y2_dctagr,"Rd32=dctagr(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_DCTAGOP),"Data Cache Tag Read",{fDCTAGR(RsV,RdV,RdN);}) +Q6INSN(Y2_dctagw,"dctagw(Rs32,Rt32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_RESTRICT_SLOT0ONLY,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_DCTAGOP),"Data Cache Tag Write",{fDCTAGW(RsV,RtV);}) -Q6INSN(Y2_dczeroa,"dczeroa(Rs32)",ATTRIBS(A_STORE,A_RESTRICT_SLOT0ONLY,A_DCZEROA),"Zero an aligned 32-byte cacheline",{fEA_REG(RsV); fDCZEROA(EA);}) -Q6INSN(Y2_dccleana,"dccleana(Rs32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_DCFLUSHOP),"Data Cache Clean Address",{fEA_REG(RsV); fDCCLEANA(EA);}) -Q6INSN(Y2_dccleaninva,"dccleaninva(Rs32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_DCFLUSHOP),"Data Cache Clean and Invalidate Address",{fEA_REG(RsV); fDCCLEANINVA(EA);}) -Q6INSN(Y2_dcinva,"dcinva(Rs32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_DCFLUSHOP),"Data Cache Invalidate Address",{fEA_REG(RsV); fDCCLEANINVA(EA);}) +Q6INSN(Y2_l2kill,"l2kill",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Cache Invalidate",{fL2KILL();}) +Q6INSN(Y4_l2tagw,"l2tagw(Rs32,Rt32)",ATTRIBS(A_PRIV,A_NOTE_BADTAG_UNDEF,A_NOTE_PRIV,A_RESTRICT_SLOT0ONLY,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_CACHEOP,A_COPBYIDX,A_L2TAGOP),"L2 Cache Tag Write",{fL2TAGW(RsV,RtV);}) +Q6INSN(Y4_l2tagr,"Rd32=l2tagr(Rs32)",ATTRIBS(A_PRIV,A_NOTE_BADTAG_UNDEF,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_L2TAGOP),"L2 Cache Tag Read",{fL2TAGR(RsV,RdV,RdN);}) +Q6INSN(Y2_l2cleaninvidx,"l2cleaninvidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_L2FLUSHOP),"L2 Cache Clean and Invalidate Index",{fL2CLEANINVIDX(RsV); }) +Q6INSN(Y5_l2cleanidx,"l2cleanidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_L2FLUSHOP),"L2 Cache Clean by Index",{fL2CLEANIDX(RsV); }) +Q6INSN(Y5_l2invidx,"l2invidx(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_AXOK,A_RESTRICT_PACKET_AXOK,A_RESTRICT_SLOT0ONLY,A_CACHEOP,A_COPBYIDX,A_L2FLUSHOP),"L2 Cache Invalidate by Index",{fL2INVIDX(RsV); }) -Q6INSN(Y4_l2fetch,"l2fetch(Rs32,Rt32)",ATTRIBS(A_RESTRICT_SLOT0ONLY),"L2 Cache Prefetch", + + +Q6INSN(Y4_l2fetch,"l2fetch(Rs32,Rt32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK),"L2 Cache Prefetch", { fL2FETCH(RsV, - (RtV&0xff), /*height*/ - ((RtV>>8)&0xff), /*width*/ - ((RtV>>16)&0xffff), /*stride*/ - 0); /*extra attrib flags*/ + (RtV&0xff), /*height*/ + ((RtV>>8)&0xff), /*width*/ + ((RtV>>16)&0xffff), /*stride*/ + 0); /*extra attrib flags*/ +}) + +Q6INSN(Y6_dmstart,"dmstart(Rs32)",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_SYNC_MARKER,A_NO_TIMING_LOG),"DMA Start", { + fUNIMP(); +}) + +Q6INSN(Y6_dmlink,"dmlink(Rs32,Rt32)",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_SYNC_MARKER,A_NO_TIMING_LOG),"DMA Link", { + fUNIMP(); }) +Q6INSN(Y6_dmpoll,"Rd32=dmpoll",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA Poll", { + fUNIMP(); +}) + +Q6INSN(Y6_dmwait,"Rd32=dmwait",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA Wait", { + fUNIMP(); +}) + +Q6INSN(Y6_dmsyncht,"Rd32=dmsyncht",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA SynchT",{ + fUNIMP(); +}) +Q6INSN(Y6_dmtlbsynch,"Rd32=dmtlbsynch",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA TLB Synch",{ + fUNIMP(); +}) + +Q6INSN(Y6_dmcfgrd,"Rd32=dmcfgrd(Rs32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG), + "DMA Config Read", { + fUNIMP(); +}) + +Q6INSN(Y6_dmcfgwr,"dmcfgwr(Rs32,Rt32)",ATTRIBS(A_NOTE_PRIV,A_PRIV,A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG), + "DMA Config Write", { + fUNIMP(); +}) + +Q6INSN(Y6_dmpause,"Rd32=dmpause",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_NO_TIMING_LOG),"DMA Pause",{ + fUNIMP(); +}) + +Q6INSN(Y6_dmresume,"dmresume(Rs32)",ATTRIBS(A_NOTE_NOPACKET,A_RESTRICT_NOPACKET,A_DMA,A_RESTRICT_SLOT0ONLY,A_SYNC_MARKER,A_NO_TIMING_LOG),"DMA Resume",{ + fUNIMP(); +}) -Q6INSN(Y5_l2fetch,"l2fetch(Rs32,Rtt32)",ATTRIBS(A_RESTRICT_SLOT0ONLY),"L2 Cache Prefetch", +Q6INSN(Y5_l2fetch,"l2fetch(Rs32,Rtt32)",ATTRIBS(A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK),"L2 Cache Prefetch", { fL2FETCH(RsV, - fGETUHALF(0,RttV), /*height*/ - fGETUHALF(1,RttV), /*width*/ - fGETUHALF(2,RttV), /*stride*/ - fGETUHALF(3,RttV)); /*flags*/ + fGETUHALF(0,RttV), /*height*/ + fGETUHALF(1,RttV), /*width*/ + fGETUHALF(2,RttV), /*stride*/ + fGETUHALF(3,RttV)); /*flags*/ }) + +Q6INSN(Y5_l2locka,"Pd4=l2locka(Rs32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_CACHEOP,A_COPBYADDRESS,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK,A_RESTRICT_LATEPRED,A_NOTE_LATEPRED), +"Lock L2 cache line by address", { fEA_REG(RsV); fL2LOCKA(EA,PdV,PdN); fHIDE(MARK_LATE_PRED_WRITE(PdN)) }) + + +Q6INSN(Y5_l2unlocka,"l2unlocka(Rs32)", ATTRIBS(A_PRIV,A_NOTE_PRIV,A_CACHEOP,A_COPBYADDRESS,A_RESTRICT_SLOT0ONLY,A_RESTRICT_PACKET_AXOK,A_NOTE_AXOK), "UnLock L2 cache line by address", { fEA_REG(RsV); fL2UNLOCKA(EA); }) + + + +Q6INSN(Y5_l2gunlock,"l2gunlock",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Unlock",{fL2UNLOCK();}) + +Q6INSN(Y5_l2gclean,"l2gclean",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Clean",{fL2CLEAN();}) + +Q6INSN(Y5_l2gcleaninv,"l2gcleaninv",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Clean and Invalidate",{fL2CLEANINV();}) + +Q6INSN(Y6_l2gcleanpa,"l2gclean(Rtt32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Clean by PA Range",{fL2CLEANPA(RttV);}) + +Q6INSN(Y6_l2gcleaninvpa,"l2gcleaninv(Rtt32)",ATTRIBS(A_PRIV,A_NOTE_PRIV,A_NOTE_NOPACKET,A_RESTRICT_SLOT0ONLY,A_RESTRICT_NOPACKET,A_CACHEOP,A_L2FLUSHOP),"L2 Global Clean and Invalidate by PA Range",{fL2CLEANINVPA(RttV);}) diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h index 24dcf7fe9f38..5d59430da9e1 100644 --- a/target/hexagon/insn.h +++ b/target/hexagon/insn.h @@ -66,8 +66,8 @@ struct Packet { bool pkt_has_dczeroa; - bool pkt_has_store_s0; - bool pkt_has_store_s1; + bool pkt_has_scalar_store_s0; + bool pkt_has_scalar_store_s1; bool pkt_has_hvx; Insn *vhist_insn; diff --git a/target/hexagon/internal.h b/target/hexagon/internal.h index 32e96f00d97a..ff89c9cda43f 100644 --- a/target/hexagon/internal.h +++ b/target/hexagon/internal.h @@ -22,13 +22,32 @@ int hexagon_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int hexagon_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); +#ifndef CONFIG_USER_ONLY +int hexagon_sys_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n); +int hexagon_sys_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n); +#endif int hexagon_hvx_gdb_read_register(CPUState *env, GByteArray *mem_buf, int n); int hexagon_hvx_gdb_write_register(CPUState *env, uint8_t *mem_buf, int n); void hexagon_debug_vreg(CPUHexagonState *env, int regnum); void hexagon_debug_qreg(CPUHexagonState *env, int regnum); void hexagon_debug(CPUHexagonState *env); +void hexagon_dump(CPUHexagonState *env, FILE *f, int flags); extern const char * const hexagon_regnames[TOTAL_PER_THREAD_REGS]; +extern const char * const hexagon_sregnames[]; +extern const char * const hexagon_gregnames[]; + +void G_NORETURN do_raise_exception(CPUHexagonState *env, + uint32_t exception, + target_ulong PC, + uintptr_t retaddr); + +#define hexagon_cpu_mmu_enabled(env) \ + GET_SYSCFG_FIELD(SYSCFG_MMUEN, arch_get_system_reg(env, HEX_SREG_SYSCFG)) + +#ifndef CONFIG_USER_ONLY +extern const VMStateDescription vmstate_hexagon_cpu; +#endif #endif diff --git a/target/hexagon/machine.c b/target/hexagon/machine.c new file mode 100644 index 000000000000..79e9b7effa5e --- /dev/null +++ b/target/hexagon/machine.c @@ -0,0 +1,85 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "migration/cpu.h" +#include "cpu.h" +#include "hex_mmu.h" + +static int get_u64_ptr(QEMUFile *f, void *pv, size_t size, + const VMStateField *field) +{ + uint64_t *p = pv; + *p = qemu_get_be64(f); + return 0; +} + +static int put_u64_ptr(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, JSONWriter *vmdesc) +{ + qemu_put_be64(f, *((uint64_t *)pv)); + return 0; +} + +const VMStateInfo vmstate_info_uint64_ptr = { + .name = "uint64_t_pointer", + .get = get_u64_ptr, + .put = put_u64_ptr, +}; + +static int get_hex_tlb_ptr(QEMUFile *f, void *pv, size_t size, + const VMStateField *field) +{ + CPUHexagonTLBContext *tlb = pv; + for (int i = 0; i < ARRAY_SIZE(tlb->entries); i++) { + tlb->entries[i] = qemu_get_be64(f); + } + return 0; +} + +static int put_hex_tlb_ptr(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, JSONWriter *vmdesc) +{ + CPUHexagonTLBContext *tlb = pv; + for (int i = 0; i < ARRAY_SIZE(tlb->entries); i++) { + qemu_put_be64(f, tlb->entries[i]); + } + return 0; +} + +const VMStateInfo vmstate_info_hex_tlb_ptr = { + .name = "hex_tlb_pointer", + .get = get_hex_tlb_ptr, + .put = put_hex_tlb_ptr, +}; + +const VMStateDescription vmstate_hexagon_cpu = { + .name = "cpu", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_CPU(), + VMSTATE_UINTTL_ARRAY(env.gpr, HexagonCPU, TOTAL_PER_THREAD_REGS), + VMSTATE_UINTTL_ARRAY(env.pred, HexagonCPU, NUM_PREGS), + VMSTATE_UINTTL_ARRAY(env.t_sreg, HexagonCPU, NUM_SREGS), + VMSTATE_UINTTL_ARRAY(env.greg, HexagonCPU, NUM_GREGS), + VMSTATE_UINTTL(env.next_PC, HexagonCPU), + VMSTATE_UINTTL(env.tlb_lock_state, HexagonCPU), + VMSTATE_UINTTL(env.k0_lock_state, HexagonCPU), + VMSTATE_UINTTL(env.tlb_lock_count, HexagonCPU), + VMSTATE_UINTTL(env.k0_lock_count, HexagonCPU), + VMSTATE_UINTTL(env.threadId, HexagonCPU), + VMSTATE_UINTTL(env.cause_code, HexagonCPU), + VMSTATE_UINTTL(env.wait_next_pc, HexagonCPU), + VMSTATE_POINTER(env.hex_tlb, HexagonCPU, 0, + vmstate_info_hex_tlb_ptr, CPUHexagonTLBContext *), + VMSTATE_UINT64(env.t_cycle_count, HexagonCPU), + VMSTATE_POINTER(env.g_pcycle_base, HexagonCPU, 0, + vmstate_info_uint64_ptr, uint64_t *), + VMSTATE_END_OF_LIST() + }, +}; + diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index ee3d4c88e7bd..01469a28a0cc 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -82,7 +82,7 @@ */ #define CHECK_NOSHUF(VA, SIZE) \ do { \ - if (insn->slot == 0 && ctx->pkt->pkt_has_store_s1) { \ + if (insn->slot == 0 && ctx->pkt->pkt_has_scalar_store_s1) { \ probe_noshuf_load(VA, SIZE, ctx->mem_idx); \ process_store(ctx, 1); \ } \ @@ -93,11 +93,11 @@ TCGLabel *noshuf_label = gen_new_label(); \ tcg_gen_brcondi_tl(TCG_COND_EQ, PRED, 0, noshuf_label); \ GET_EA; \ - if (insn->slot == 0 && ctx->pkt->pkt_has_store_s1) { \ + if (insn->slot == 0 && ctx->pkt->pkt_has_scalar_store_s1) { \ probe_noshuf_load(EA, SIZE, ctx->mem_idx); \ } \ gen_set_label(noshuf_label); \ - if (insn->slot == 0 && ctx->pkt->pkt_has_store_s1) { \ + if (insn->slot == 0 && ctx->pkt->pkt_has_scalar_store_s1) { \ process_store(ctx, 1); \ } \ } while (0) @@ -524,7 +524,7 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fLOAD(NUM, SIZE, SIGN, EA, DST) \ do { \ - check_noshuf(env, pkt_has_store_s1, slot, EA, SIZE, GETPC()); \ + check_noshuf(env, pkt_has_scalar_store_s1, slot, EA, SIZE, GETPC()); \ DST = (size##SIZE##SIGN##_t)MEM_LOAD##SIZE(env, EA, GETPC()); \ } while (0) #endif @@ -537,9 +537,6 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #ifdef CONFIG_USER_ONLY #define fFRAMECHECK(ADDR, EA) do { } while (0) /* Not modelled in linux-user */ -#else -/* System mode not implemented yet */ -#define fFRAMECHECK(ADDR, EA) g_assert_not_reached(); #endif #ifdef QEMU_GENERATE @@ -630,8 +627,18 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fCONSTLL(A) A##LL #define fECHO(A) (A) -#define fTRAP(TRAPTYPE, IMM) helper_raise_exception(env, HEX_EXCP_TRAP0) +#ifdef CONFIG_USER_ONLY +#define fTRAP(TRAPTYPE, IMM) \ + do { \ + hexagon_raise_exception_err(env, HEX_EVENT_TRAP0, PC); \ + } while (0) +#endif + +#define fDO_TRACE(SREG) +#define fBREAK() +#define fUNPAUSE() #define fPAUSE(IMM) +#define fDCFETCH(REG) #define fALIGN_REG_FIELD_VALUE(FIELD, VAL) \ ((VAL) << reg_field_info[FIELD].offset) @@ -642,16 +649,43 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) reg_field_info[FIELD].width, \ reg_field_info[FIELD].offset) +#define fGET_FIELD(VAL, FIELD) \ + fEXTRACTU_BITS(VAL, \ + reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset) +#define fSET_FIELD(VAL, FIELD, NEWVAL) \ + fINSERT_BITS(VAL, \ + reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset, \ + (NEWVAL)) + #ifdef QEMU_GENERATE #define fDCZEROA(REG) \ do { \ ctx->dczero_addr = tcg_temp_new(); \ tcg_gen_mov_tl(ctx->dczero_addr, (REG)); \ } while (0) +#else +#define fDCZEROA(REG) ((void) REG) #endif #define fBRANCH_SPECULATE_STALL(DOTNEWVAL, JUMP_COND, SPEC_DIR, HINTBITNUM, \ STRBITNUM) /* Nothing */ +#ifdef CONFIG_USER_ONLY +/* + * This macro can only be true in guest mode. + * In user mode, the 4 VIRTINSN's can't be reached + */ +#define fTRAP1_VIRTINSN(IMM) (false) +#define fVIRTINSN_SPSWAP(IMM, REG) g_assert_not_reached() +#define fVIRTINSN_GETIE(IMM, REG) g_assert_not_reached() +#define fVIRTINSN_SETIE(IMM, REG) g_assert_not_reached() +#define fVIRTINSN_RTE(IMM, REG) g_assert_not_reached() +#endif #endif + +#define fPREDUSE_TIMING() + +#define fUNIMP() qemu_log_mask(LOG_UNIMP, "Unimplemented instruction\n") diff --git a/target/hexagon/max.h b/target/hexagon/max.h new file mode 100644 index 000000000000..0f595bcb736d --- /dev/null +++ b/target/hexagon/max.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HEXAGON_MAX_H +#define HEXAGON_MAX_H + +#define MAX_EXT_CONTEXTS 8 +#define MAX_L2_INTERLEAVES 2 +#define MAX_VFIFO_COUNT 4 + +#define SLOTS_MAX 4 + +#define REG_WRITES_MAX 32 +#define PRED_WRITES_MAX 5 +#define STORES_MAX 2 +#define LOADS_MAX 2 +#define MAX_PRED 4 + +#define PACKET_BYTES_MAX 16 +#define MAX_TLB_ENTRIES 1024 +#define DTLB_ENTRIES 16 +#define ITLB_ENTRIES 16 + +#endif /* HEXAGON_MAX_H */ diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build index bb4ebaae816e..280b5dc58ac5 100644 --- a/target/hexagon/meson.build +++ b/target/hexagon/meson.build @@ -20,6 +20,7 @@ hexagon_ss = ss.source_set() hex_common_py = 'hex_common.py' gen_tcg_h = meson.current_source_dir() / 'gen_tcg.h' gen_tcg_hvx_h = meson.current_source_dir() / 'gen_tcg_hvx.h' +gen_tcg_sys_h = meson.current_source_dir() / 'gen_tcg_sys.h' idef_parser_dir = meson.current_source_dir() / 'idef-parser' # @@ -244,11 +245,13 @@ decodetree_trans_funcs_generated = custom_target( command: [python, files('gen_trans_funcs.py'), semantics_generated, '@OUTPUT@'], ) hexagon_ss.add(decodetree_trans_funcs_generated) +hexagon_softmmu_ss = ss.source_set() hexagon_ss.add(files( 'cpu.c', 'translate.c', 'op_helper.c', + 'cpu_helper.c', 'gdbstub.c', 'genptr.c', 'reg_fields.c', @@ -260,6 +263,16 @@ hexagon_ss.add(files( 'fma_emu.c', 'mmvec/decode_ext_mmvec.c', 'mmvec/system_ext_mmvec.c', + 'mmvec/mmvec_qfloat.c', + 'mmvec/kvx_ieee.c', +)) + +hexagon_softmmu_ss.add(files( + 'hex_mmu.c', + 'hex_interrupts.c', + 'hexswi.c', + 'machine.c', + 'monitor.c', )) # @@ -271,7 +284,8 @@ hexagon_ss.add(files( # idef-generated-enabled-instructions # idef_parser_enabled = get_option('hexagon_idef_parser') -if idef_parser_enabled and 'hexagon-linux-user' in target_dirs +if idef_parser_enabled and ('hexagon-linux-user' in target_dirs or + 'hexagon-softmmu' in target_dirs) idef_parser_input_generated = custom_target( 'idef_parser_input.h.inc', output: 'idef_parser_input.h.inc', @@ -346,12 +360,12 @@ if idef_parser_enabled and 'hexagon-linux-user' in target_dirs # Setup input and dependencies for the next step, this depends on whether or # not idef-parser is enabled helper_dep = [semantics_generated, idef_generated_tcg_c, idef_generated_tcg] - helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h, '--idef-parser', idef_generated_list] + helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h, '--idef-parser', idef_generated_list] else # Setup input and dependencies for the next step, this depends on whether or # not idef-parser is enabled helper_dep = [semantics_generated] - helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h] + helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h] endif # @@ -365,7 +379,7 @@ helper_protos_generated = custom_target( 'helper_protos_generated.h.inc', output: 'helper_protos_generated.h.inc', depends: helper_dep, - depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h], + depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h], command: [python, files('gen_helper_protos.py'), helper_in, '@OUTPUT@'], ) hexagon_ss.add(helper_protos_generated) @@ -374,7 +388,7 @@ helper_funcs_generated = custom_target( 'helper_funcs_generated.c.inc', output: 'helper_funcs_generated.c.inc', depends: helper_dep, - depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h], + depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h], command: [python, files('gen_helper_funcs.py'), helper_in, '@OUTPUT@'], ) hexagon_ss.add(helper_funcs_generated) @@ -383,7 +397,7 @@ tcg_funcs_generated = custom_target( 'tcg_funcs_generated.c.inc', output: 'tcg_funcs_generated.c.inc', depends: helper_dep, - depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h], + depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h], command: [python, files('gen_tcg_funcs.py'), helper_in, '@OUTPUT@'], ) hexagon_ss.add(tcg_funcs_generated) @@ -392,9 +406,10 @@ analyze_funcs_generated = custom_target( 'analyze_funcs_generated.c.inc', output: 'analyze_funcs_generated.c.inc', depends: helper_dep, - depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h], + depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h, gen_tcg_sys_h], command: [python, files('gen_analyze_funcs.py'), helper_in, '@OUTPUT@'], ) hexagon_ss.add(analyze_funcs_generated) target_arch += {'hexagon': hexagon_ss} +target_system_arch += {'hexagon': hexagon_softmmu_ss} diff --git a/target/hexagon/mmvec/kvx_ieee.c b/target/hexagon/mmvec/kvx_ieee.c new file mode 100644 index 000000000000..3e67230f62e4 --- /dev/null +++ b/target/hexagon/mmvec/kvx_ieee.c @@ -0,0 +1,1460 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "kvx_ieee.h" +#include "kvx_mac_reduce.c" +#include "qemu/host-utils.h" + +uint32_t shiftRightJam32( uint32_t a, uint_fast16_t dist ) +{ + return + (dist < 31) ? a>>dist | ((uint32_t) (a<<(-dist & 31)) != 0) : (a != 0); +} + +uint_fast8_t countLeadingZeros16( uint16_t a ) +{ + return clz16(a); +} + +struct exp8_sig16 normSubnormalF16Sig( uint_fast16_t sig ) +{ + int_fast8_t shiftDist; + struct exp8_sig16 z; + + shiftDist = countLeadingZeros16( sig ) - 5; + z.exp = 1 - shiftDist; + z.sig = sig<>4; + sig &= ~(uint_fast16_t) (! (roundBits ^ 8) & roundNearEven); + if ( ! sig ) exp = 0; + + return packToF16UI( sign, exp, sig ); + +} + + +uint32_t fp_mult_sf_sf (uint32_t op1, uint32_t op2) +{ + + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("fp_mult_sf_sf"); + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF32UI(op1) || isNaNF32UI(op2)) + return FP32_DEF_NAN; + + u_op1.ui = op1; + u_op2.ui = op2; + a = u_op1.f; + b = u_op2.f; + rslt = a*b; + u_rslt.f = rslt; + result = u_rslt.ui; + + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint32_t fp_add_sf_sf (uint32_t op1, uint32_t op2) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("fp_add_sf_sf"); + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF32UI(op1) || isNaNF32UI(op2)) + return FP32_DEF_NAN; + + u_op1.ui = op1; + u_op2.ui = op2; + a = u_op1.f; + b = u_op2.f; + rslt = a+b; + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint32_t fp_sub_sf_sf (uint32_t op1, uint32_t op2) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF32UI(op1) || isNaNF32UI(op2)) + return FP32_DEF_NAN; + + u_op1.ui = op1; + u_op2.ui = op2; + a = u_op1.f; + b = u_op2.f; + rslt = a-b; + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +//-------------------------------------------------------------- +//Function to convert FP32 to FP16 +//-------------------------------------------------------------- + +uint16_t f32_to_f16 ( uint32_t a) +{ + bool sign; + int_fast16_t exp; + uint_fast32_t frac; + uint_fast16_t frac16; + + sign = signF32UI( a ); + exp = expF32UI ( a ); + frac = fracF32UI( a ); + + // Inf and NaN case + if ( exp == 0xFF ) { + if ( frac ) { + return FP16_DEF_NAN; + } else { + return packToF16UI( sign, 0x1F, 0 ); + } + } + + /*------------------------------------------------------------------------ + frac>>9 : keeping 14 bit of precision out ot 23 bits in FP32 + (frac & 0x1FF) != 0) : setting the sticky bit required for rounding + *------------------------------------------------------------------------*/ + frac16 = frac>>9 | ((frac & 0x1FF) != 0); + + //If input was a Zero + if ( ! (exp | frac16) ) { + return packToF16UI( sign, 0, 0 ); + } + + return roundPackToF16( sign, exp - 0x71, frac16 | 0x4000 ); + +} + +//-------------------------------------------------------------- +//Function to convert FP16 to FP32 +//-------------------------------------------------------------- + +uint32_t f16_to_f32( uint16_t a ) +{ + bool sign; + int_fast8_t exp; + uint_fast16_t frac; + struct exp8_sig16 normExpSig; + + sign = signF16UI( a ); + exp = expF16UI ( a ); + frac = fracF16UI( a ); + + + if ( exp == 0x1F ) { + if ( frac ) { + return FP32_DEF_NAN; + } else { + return packToF32UI( sign, 0xFF, 0 ); + } + } + + + if ( ! exp ) { + if ( ! frac ) { + return packToF32UI( sign, 0, 0 ); + } + normExpSig = normSubnormalF16Sig( frac ); + exp = normExpSig.exp - 1; + frac = normExpSig.sig; + } + + + return packToF32UI( sign, exp + 0x70, (uint_fast32_t) frac<<13 ); + +} + +uint16_t fp_mult_hf_hf (uint16_t op1, uint16_t op2) +{ + + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result_f32; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP16_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + rslt = a*b; + u_rslt.f = rslt; + result_f32 = u_rslt.ui; + + result = f32_to_f16(result_f32); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint16_t fp_add_hf_hf (uint16_t op1, uint16_t op2) +{ + + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result_f32; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP16_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + rslt = a+b; + u_rslt.f = rslt; + result_f32 = u_rslt.ui; + + result = f32_to_f16(result_f32); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint16_t fp_sub_hf_hf (uint16_t op1, uint16_t op2) +{ + + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result_f32; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP16_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + rslt = a-b; + u_rslt.f = rslt; + result_f32 = u_rslt.ui; + + result = f32_to_f16(result_f32); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint32_t fp_mult_sf_hf (uint16_t op1, uint16_t op2) +{ + + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP32_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + rslt = a*b; + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint32_t fp_add_sf_hf (uint16_t op1, uint16_t op2) +{ + + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP32_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + rslt = a+b; + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint32_t fp_sub_sf_hf (uint16_t op1, uint16_t op2) +{ + + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP32_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + rslt = a-b; + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint32_t fp_mult_sf_bf_acc (uint16_t op1, uint16_t op2, uint32_t acc) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_acc; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + double a,b,facc,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%04x\n",op1); + printf("Debug : op2 =0x%04x\n",op2); + printf("Debug : acc =0x%08x\n",acc); + #endif + + op1_f32 = ((uint32_t)op1) << 16; + op2_f32 = ((uint32_t)op2) << 16; + + if(isNaNF32UI(op1_f32) || isNaNF32UI(op2_f32) || isNaNF32UI(acc)) + return FP32_DEF_NAN; + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + u_acc.ui = acc; + a = u_op1.f; + b = u_op2.f; + facc = u_acc.f; + //rslt = fma(a,b,facc); + rslt = (a * b) + facc; + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : facc = %f\n",facc); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%04x\n",result); + #endif + + return result; +} + +uint32_t fp_mult_sf_bf (uint16_t op1, uint16_t op2) +{ + uint32_t op1_f32; + uint32_t op2_f32; + op1_f32 = ((uint32_t)op1) << 16; + op2_f32 = ((uint32_t)op2) << 16; + return fp_mult_sf_sf(op1_f32, op2_f32); +} + +uint32_t fp_add_sf_bf (uint16_t op1, uint16_t op2) +{ + uint32_t op1_f32; + uint32_t op2_f32; + op1_f32 = ((uint32_t)op1) << 16; + op2_f32 = ((uint32_t)op2) << 16; + return fp_add_sf_sf(op1_f32, op2_f32); +} + +uint32_t fp_sub_sf_bf (uint16_t op1, uint16_t op2) +{ + uint32_t op1_f32; + uint32_t op2_f32; + op1_f32 = ((uint32_t)op1) << 16; + op2_f32 = ((uint32_t)op2) << 16; + return fp_sub_sf_sf(op1_f32, op2_f32); +} + +uint16_t f16_to_uh( uint16_t op1) +{ + union ui32_f32 u_op1; + + float a,frac; + uint32_t op1_f32; + uint16_t result; + + //converting a NaN to an integral ----> Vx4Rslt is +MAX_INT + if(isNaNF16UI(op1)) + { + result = UHW_MAX; + goto end; + } + //converting a negative floating-point value to + //unsigned integer U(h|b) ----> (Vx4Rslt is 0) + if(signF16UI(op1)) + { + result = 0x0; + goto end; + } + //converting ±Inf to an integral ----> Vx4Rslt is ±MAX_INT + if(isInfF16UI(op1)) + { + result = UHW_MAX; + goto end; + } + //out of range FP to integer ------> Vx4Rslt is ±MAX_INT + + //The default float-to-integer conversion in C does not + //round to the nearest integer, but instead truncates toward zero. + op1_f32 = f16_to_f32(op1); + u_op1.ui = op1_f32; + a = u_op1.f; + frac = a - (float)((uint16_t) a); + //round to the nearest + result = (uint16_t) (a + 0.5); + //Ties to Even + if(frac == 0.5) + { + if((result % 2)) result--; + } + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : a frac = %f\n",frac); + #endif + + end: + #ifdef DEBUG + printf("Debug : result =0x%x\n",result); + #endif + return result; +} + +int16_t f16_to_h( uint16_t op1) +{ + union ui32_f32 u_op1; + + float a,frac; + uint32_t op1_f32; + int16_t result; + + //converting a NaN to an integral ----> Vx4Rslt is +MAX_INT + if(isNaNF16UI(op1)) + { + result = HW_MAX; + goto end; + } + //converting ±Inf to an integral ----> Vx4Rslt is ±MAX_INT + if(isInfF16UI(op1)) + { + result = signF16UI(op1) ? HW_MIN : HW_MAX; + goto end; + } + + //The default float-to-integer conversion in C does not round + //to the nearest integer, but instead truncates toward zero. + op1_f32 = f16_to_f32(op1); + u_op1.ui = op1_f32; + a = u_op1.f; + + //out of range FP to integer ------> Vx4Rslt is ±MAX_INT + if(a > (float)(HW_MAX)) + { + result = HW_MAX; + goto end; + } + if(a < (float)(HW_MIN)) + { + result = HW_MIN; + goto end; + } + + frac = fabs(a - (float)((int16_t) a)); + //round to the nearest + result = (a > 0) ? ((int16_t) (a + 0.5)) : ((int16_t) (a - 0.5)); + //Ties to Even + if(frac == 0.5) + { + if((result % 2)) + { + if(a > 0) result--; + if(a < 0) result++; + } + } + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : a frac = %f\n",frac); + #endif + + end: + #ifdef DEBUG + printf("Debug : result =0x%04x\n",result); + #endif + return result; +} + +uint8_t f16_to_ub( uint16_t op1) +{ + union ui32_f32 u_op1; + + float a,frac; + uint32_t op1_f32; + uint8_t result; + + //converting a NaN to an integral ----> Vx4Rslt is +MAX_INT + if(isNaNF16UI(op1)) + { + result = UBYTE_MAX; + goto end; + } + //converting a negative floating-point value to + //unsigned integer U(h|b) ----> (Vx4Rslt is 0) + if(signF16UI(op1)) + { + result = 0x0; + goto end; + } + //converting ±Inf to an integral ----> Vx4Rslt is ±MAX_INT + if(isInfF16UI(op1)) + { + result = UBYTE_MAX; + goto end; + } + + //The default float-to-integer conversion in C does + //not round to the nearest integer, but instead truncates toward zero. + op1_f32 = f16_to_f32(op1); + u_op1.ui = op1_f32; + a = u_op1.f; + + //out of range FP to integer ------> Vx4Rslt is ±MAX_INT + if( a > (float)(UBYTE_MAX)) + { + result = UBYTE_MAX; + goto end; + } + + frac = a - (float)((uint16_t) a); + //round to the nearest + result = (uint8_t) (a + 0.5); + //Ties to Even + if(frac == 0.5) + { + if((result % 2)) + { + if(a > 0) result--; + if(a < 0) result++; + } + } + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : a frac = %f\n",frac); + #endif + + end: + #ifdef DEBUG + printf("Debug : result =0x%x\n",result); + #endif + return result; +} + +int8_t f16_to_b( uint16_t op1) +{ + union ui32_f32 u_op1; + + float a,frac; + uint32_t op1_f32; + int16_t result; + + //converting a NaN to an integral ----> Vx4Rslt is +MAX_INT + if(isNaNF16UI(op1)) + { + result = BYTE_MAX; + goto end; + } + //converting ±Inf to an integral ----> Vx4Rslt is ±MAX_INT + if(isInfF16UI(op1)) + { + result = signF16UI(op1) ? BYTE_MIN : BYTE_MAX; + goto end; + } + + //The default float-to-integer conversion in C does not + //round to the nearest integer, but instead truncates toward zero. + op1_f32 = f16_to_f32(op1); + u_op1.ui = op1_f32; + a = u_op1.f; + + //out of range FP to integer ------> Vx4Rslt is ±MAX_INT + if(a > (float)(BYTE_MAX)) + { + result = BYTE_MAX; + goto end; + } + if(a < (float)(BYTE_MIN)) + { + result = BYTE_MIN; + goto end; + } + + frac = fabs(a - (float)((int16_t) a)); + //round to the nearest + result = (a > 0) ? ((int16_t) (a + 0.5)) : ((int16_t) (a - 0.5)); + //Ties to Even + if(frac == 0.5) + { + if((result % 2)) + { + if(a > 0) result--; + if(a < 0) result++; + } + } + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : a frac = %f\n",frac); + #endif + + end: + #ifdef DEBUG + printf("Debug : result =0x%04x\n",result); + #endif + return result; +} + +uint16_t uh_to_f16(uint16_t op1) +{ + union ui32_f32 u_op1; + + float a; + uint32_t rslt; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + #endif + + a = (float) op1; + u_op1.f = a; + rslt = u_op1.ui; + result = f32_to_f16(rslt); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : rslt = 0x%08x\n",rslt); + printf("Debug : result =0x%04x\n",result); + #endif + + return result; +} + +uint16_t h_to_f16 (int16_t op1) +{ + union ui32_f32 u_op1; + + float a; + uint32_t rslt; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + #endif + + a = (float) op1; + u_op1.f = a; + rslt = u_op1.ui; + result = f32_to_f16(rslt); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : rslt = 0x%08x\n",rslt); + printf("Debug : result =0x%04x\n",result); + #endif + + return result; +} + +uint16_t ub_to_f16(uint8_t op1) +{ + union ui32_f32 u_op1; + + float a; + uint32_t rslt; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + #endif + + a = (float) op1; + u_op1.f = a; + rslt = u_op1.ui; + result = f32_to_f16(rslt); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : rslt = 0x%08x\n",rslt); + printf("Debug : result =0x%04x\n",result); + #endif + + return result; +} + +uint16_t b_to_f16 (int8_t op1) +{ + union ui32_f32 u_op1; + + float a; + uint32_t rslt; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + #endif + + a = (float) op1; + u_op1.f = a; + rslt = u_op1.ui; + result = f32_to_f16(rslt); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : rslt = 0x%08x\n",rslt); + printf("Debug : result =0x%04x\n",result); + #endif + + return result; +} + +uint16_t sf_to_bf (int32_t op1) +{ + uint32_t rslt = op1; + if((rslt & 0x1FFFF) == 0x08000){ + //break; // do not round up if exactly .5 and even already + } + else if ((rslt & 0x8000) == 0x8000){ + rslt += 0x8000; //rounding to nearest number + } + rslt = isNaNF32UI(op1) ? FP32_DEF_NAN : rslt; + uint16_t result = (rslt >> 16); + return result; +} + +uint32_t fp_vdmpy (uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l) +{ + union ui32_f32 u_op; + union ui32_f32 u_rslt; + + uint32_t op1_u_f32, op1_l_f32, op2_u_f32, op2_l_f32; + float f_op1_u, f_op1_l, f_op2_u, f_op2_l; + double f_prod_l, f_prod_u, rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1_u =0x%04x\n",op1_u); + printf("Debug : op1_l =0x%04x\n",op1_l); + printf("Debug : op2_u =0x%04x\n",op2_u); + printf("Debug : op2_l =0x%04x\n",op2_l); + #endif + + if(isNaNF16UI(op1_u) || isNaNF16UI(op1_l) || isNaNF16UI(op2_u) || + isNaNF16UI(op2_l)) + { result = FP32_DEF_NAN; + goto end; + } + + op1_u_f32 = f16_to_f32(op1_u); + op1_l_f32 = f16_to_f32(op1_l); + op2_u_f32 = f16_to_f32(op2_u); + op2_l_f32 = f16_to_f32(op2_l); + + u_op.ui = op1_u_f32; + f_op1_u = u_op.f; + + u_op.ui = op1_l_f32; + f_op1_l = u_op.f; + + u_op.ui = op2_l_f32; + f_op2_l = u_op.f; + + u_op.ui = op2_u_f32; + f_op2_u = u_op.f; + + f_prod_l = f_op1_l * f_op2_l; + f_prod_u = f_op1_u * f_op2_u; + rslt = f_prod_u + f_prod_l; + + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : f_op1_u = %f\n",f_op1_u); + printf("Debug : f_op1_l = %f\n",f_op1_l); + printf("Debug : f_op2_u = %f\n",f_op2_u); + printf("Debug : f_op2_l = %f\n",f_op2_l); + printf("Debug : f_prod_l = %f\n",f_prod_l); + printf("Debug : f_prod_u = %f\n",f_prod_u); + printf("Debug : rslt = %f\n",rslt); + #endif + +end: + #ifdef DEBUG + printf("Debug : result =0x%08x\n",result); + #endif + return result; +} + +uint32_t fp_vdmpy_acc_dumb (uint32_t acc,uint16_t op1_u,uint16_t op1_l, + uint16_t op2_u,uint16_t op2_l) +{ + union ui32_f32 u_op; + union ui32_f32 u_acc; + union ui32_f32 u_rslt; + + uint32_t op1_u_f32, op1_l_f32, op2_u_f32, op2_l_f32; + float f_op1_u, f_op1_l, f_op2_u, f_op2_l, f_acc; + long double f_prod_l, f_prod_u, rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1_u =0x%04x\n",op1_u); + printf("Debug : op1_l =0x%04x\n",op1_l); + printf("Debug : op2_u =0x%04x\n",op2_u); + printf("Debug : op2_l =0x%04x\n",op2_l); + printf("Debug : acc =0x%08x\n",acc); + #endif + + op1_u_f32 = f16_to_f32(op1_u); + op1_l_f32 = f16_to_f32(op1_l); + op2_u_f32 = f16_to_f32(op2_u); + op2_l_f32 = f16_to_f32(op2_l); + + u_op.ui = op1_u_f32; + f_op1_u = u_op.f; + + u_op.ui = op1_l_f32; + f_op1_l = u_op.f; + + u_op.ui = op2_l_f32; + f_op2_l = u_op.f; + + u_op.ui = op2_u_f32; + f_op2_u = u_op.f; + + u_acc.ui = acc; + f_acc = u_acc.f; + + f_prod_l = (long double)(f_op1_l * f_op2_l); + f_prod_u = (long double)(f_op1_u * f_op2_u); + rslt = (long double)((long double)f_acc + f_prod_u + f_prod_l); + + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : f_op1_u = %f\n",f_op1_u); + printf("Debug : f_op1_l = %f\n",f_op1_l); + printf("Debug : f_op2_u = %f\n",f_op2_u); + printf("Debug : f_op2_l = %f\n",f_op2_l); + printf("Debug : f_acc = %f\n",f_acc); + printf("Debug : f_prod_l = %Lf\n",f_prod_l); + printf("Debug : f_prod_u = %Lf\n",f_prod_u); + printf("Debug : rslt = %Lf\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint16_t fp_min_hf(uint16_t op1,uint16_t op2) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result_f32; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP16_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + + rslt = (a>b) ? b : a; + // +0 is evaluated equal to -0 in C. Handeling that case separatly + if( (fabs(a) == 0.0f) && (fabs(b) == 0.0f) && (signF16UI(op1) != + signF16UI(op2)) ) + { + rslt = signF16UI(op1) ? a : b; + } + u_rslt.f = rslt; + result_f32 = u_rslt.ui; + + result = f32_to_f16(result_f32); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; + +} + +uint32_t fp_min_sf(uint32_t op1,uint32_t op2) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF32UI(op1) || isNaNF32UI(op2)) + return FP32_DEF_NAN; + + u_op1.ui = op1; + u_op2.ui = op2; + a = u_op1.f; + b = u_op2.f; + rslt = (a>b) ? b : a; + // +0 is evaluated equal to -0 in C. Handeling that case separatly + if( (fabs(a) == 0.0f) && (fabs(b) == 0.0f) && + (signF32UI(op1) != signF32UI(op2)) ) + { + rslt = signF32UI(op1) ? a : b; + } + u_rslt.f = rslt; + result = u_rslt.ui; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint16_t fp_min_bf(uint16_t op1,uint16_t op2) +{ + uint32_t op1_f32; + uint32_t op2_f32; + + uint32_t result_f32; + uint16_t result; + + op1_f32 = ((uint32_t)op1) << 16; + op2_f32 = ((uint32_t)op2) << 16; + + result_f32 = fp_min_sf(op1_f32, op2_f32); + result_f32 = result_f32 >> 16; + result = result_f32 & 0xFFFF; + return result; +} + + +uint16_t fp_max_hf(uint16_t op1,uint16_t op2) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,rslt; + uint32_t result_f32; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2)) + return FP16_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + a = u_op1.f; + b = u_op2.f; + + rslt = (a>b) ? a : b; + // +0 is evaluated equal to -0 in C. Handeling that case separatly + if( (fabs(a) == 0.0f) && + (fabs(b) == 0.0f) && (signF16UI(op1) != signF16UI(op2)) ) + { + rslt = signF16UI(op1) ? b : a; + } + u_rslt.f = rslt; + result_f32 = u_rslt.ui; + + result = f32_to_f16(result_f32); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; + +} + +uint32_t fp_max_sf(uint32_t op1,uint32_t op2) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_rslt; + + float a,b,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%08x\n",op1); + printf("Debug : op2 =0x%08x\n",op2); + #endif + + if(isNaNF32UI(op1) || isNaNF32UI(op2)) + return FP32_DEF_NAN; + + u_op1.ui = op1; + u_op2.ui = op2; + a = u_op1.f; + b = u_op2.f; + rslt = (a>b) ? a : b; + // +0 is evaluated equal to -0 in C. Handeling that case separatly + if( (fabs(a) == 0.0f) && (fabs(b) == 0.0f) && + (signF32UI(op1) != signF32UI(op2)) ) + { + rslt = signF32UI(op1) ? b : a; + } + u_rslt.f = rslt; + result = u_rslt.ui; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); + #endif + + return result; +} + +uint16_t fp_max_bf(uint16_t op1,uint16_t op2) +{ + uint32_t op1_f32; + uint32_t op2_f32; + + uint32_t result_f32; + uint16_t result; + + op1_f32 = ((uint32_t)op1) << 16; + op2_f32 = ((uint32_t)op2) << 16; + + result_f32 = fp_max_sf(op1_f32, op2_f32); + result_f32 = result_f32 >> 16; + result = result_f32 & 0xFFFF; + return result; +} + +uint16_t fp_abs_bf(uint16_t op1) +{ + union ui32_f32 u_op1; + + float result_f; + uint32_t result_f32; + uint16_t result; + + u_op1.ui = ((uint32_t)op1) << 16; + + result_f = fabs(u_op1.f); + u_op1.f = result_f; + result_f32 = u_op1.ui >> 16; + result = result_f32 & 0xFFFF; + return result; +} + +uint16_t fp_neg_bf(uint16_t op1) +{ + union ui32_f32 u_op1; + + float result_f; + uint32_t result_f32; + uint16_t result; + + u_op1.ui = ((uint32_t)op1) << 16; + + result_f = -(u_op1.f); + u_op1.f = result_f; + result_f32 = u_op1.ui >> 16; + result = result_f32 & 0xFFFF; + return result; +} + +//float fmaf( float x, float y, float z ); +uint16_t fp_mult_hf_hf_acc_dumb (uint16_t op1, uint16_t op2, uint16_t acc) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_acc; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + uint32_t acc_f32; + + float a,b,facc,rslt; + uint32_t result_f32; + uint16_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%04x\n",op1); + printf("Debug : op2 =0x%04x\n",op2); + printf("Debug : acc =0x%04x\n",acc); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2) || isNaNF16UI(acc)) + return FP16_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + acc_f32 = f16_to_f32(acc); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + u_acc.ui = acc_f32; + a = u_op1.f; + b = u_op2.f; + facc = u_acc.f; + //rslt = fma(a,b,facc); + rslt = (a * b) + facc; + u_rslt.f = rslt; + result_f32 = u_rslt.ui; + + result = f32_to_f16(result_f32); + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : facc = %f\n",facc); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%04x\n",result); + #endif + + return result; +} + +uint32_t fp_mult_sf_hf_acc (uint16_t op1, uint16_t op2, uint32_t acc) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_acc; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + + float a,b,facc,rslt; + uint32_t result; + + #ifdef DEBUG + printf("Debug : op1 =0x%04x\n",op1); + printf("Debug : op2 =0x%04x\n",op2); + printf("Debug : acc =0x%08x\n",acc); + #endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2) || isNaNF32UI(acc)) + return FP32_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + u_acc.ui = acc; + a = u_op1.f; + b = u_op2.f; + facc = u_acc.f; + //rslt = fma(a,b,facc); + rslt = (a * b) + facc; + u_rslt.f = rslt; + result = u_rslt.ui; + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + + #ifdef DEBUG + printf("Debug : a = %f\n",a); + printf("Debug : b = %f\n",b); + printf("Debug : facc = %f\n",facc); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%04x\n",result); + #endif + + return result; +} diff --git a/target/hexagon/mmvec/kvx_ieee.h b/target/hexagon/mmvec/kvx_ieee.h new file mode 100644 index 000000000000..ad80b7023925 --- /dev/null +++ b/target/hexagon/mmvec/kvx_ieee.h @@ -0,0 +1,141 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef KVX_COMPACT_H +#define KVX_COMPACT_H 1 + +#include +#include "hex_arch_types.h" + +//Double precision +#define signF64UI( a ) ((bool) ((uint64_t) (a)>>63)) +#define expF64UI( a ) ((int_fast16_t) ((a)>>52) & 0x7FF) +#define fracF64UI( a ) ((a) & UINT64_C( 0x000FFFFFFFFFFFFF )) +#define packToF64UI( sign, exp, sig ) ((uint64_t) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<52) + (sig))) +#define isNaNF64UI( a ) (((~(a) & UINT64_C( 0x7FF0000000000000 )) == 0) && ((a) & UINT64_C( 0x000FFFFFFFFFFFFF ))) + +//SF defines +#define FP32_DEF_NAN 0x7FFFFFFF +#define isNaNF32UI( a ) (((~(a) & 0x7F800000) == 0) && ((a) & 0x007FFFFF)) +#define isInfF32UI( a ) (((~(a) & 0x7F800000) == 0) && (((a) & 0x007FFFFF) == 0)) +#define signF32UI( a ) ((bool) ((uint32_t) (a)>>31)) +#define expF32UI( a ) ((int_fast16_t) ((a)>>23) & 0xFF) +#define fracF32UI( a ) ((a) & 0x007FFFFF) +#define packToF32UI( sign, exp, sig ) (((uint32_t) (sign)<<31) + ((uint32_t) (exp)<<23) + (sig)) + +//HF defines +#define FP16_DEF_NAN 0x7FFF +#define isNaNF16UI( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF)) +#define isInfF16UI( a ) (((~(a) & 0x7C00) == 0) && (((a) & 0x03FF) == 0)) +#define signF16UI( a ) ((bool) ((uint16_t) (a)>>15)) +#define expF16UI( a ) ((int_fast8_t) ((a)>>10) & 0x1F) +#define fracF16UI( a ) ((a) & 0x03FF) +#define packToF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<10) + (sig)) + +#define UHW_MIN 0 +#define UHW_MAX 65535 +#define HW_MIN -32768 +#define HW_MAX 32767 + +#define UBYTE_MIN 0 +#define UBYTE_MAX 255 +#define BYTE_MIN -128 +#define BYTE_MAX 127 + +//union ui16_f16 { uint16_t ui; float16_t f; }; +union ui32_f32 { uint32_t ui; float f; }; +union ui64_f64 { uint64_t ui; double f; }; +struct exp8_sig16 { int_fast8_t exp; uint_fast16_t sig; }; + +uint32_t shiftRightJam32( uint32_t a, uint_fast16_t dist ); +uint_fast8_t countLeadingZeros16( uint16_t a ); +struct exp8_sig16 normSubnormalF16Sig( uint_fast16_t sig ); +uint16_t roundPackToF16( bool sign, int_fast16_t exp, uint_fast16_t sig ); + +//-------------------------------------------------------------------------- +// IEEE - FP Convert instructions +//-------------------------------------------------------------------------- +uint16_t f32_to_f16 ( uint32_t a); +uint32_t f16_to_f32( uint16_t a ); + +uint16_t f16_to_uh( uint16_t op1); +int16_t f16_to_h ( uint16_t op1); +uint8_t f16_to_ub( uint16_t op1); +int8_t f16_to_b ( uint16_t op1); + +uint16_t uh_to_f16(uint16_t op1); +uint16_t h_to_f16 (int16_t op1); +uint16_t ub_to_f16(uint8_t op1); +uint16_t b_to_f16 (int8_t op1); + +uint16_t sf_to_bf (int32_t op1); + +//-------------------------------------------------------------------------- +// IEEE - FP ADD/SUB/MPY instructions +//-------------------------------------------------------------------------- + +//size4s_t fp_mult(size4s_t input_1, size4s_t input_2); +uint32_t fp_mult_sf_sf (uint32_t op1, uint32_t op2); +uint32_t fp_add_sf_sf (uint32_t op1, uint32_t op2); +uint32_t fp_sub_sf_sf (uint32_t op1, uint32_t op2); + +uint16_t fp_mult_hf_hf (uint16_t op1, uint16_t op2); +uint16_t fp_add_hf_hf (uint16_t op1, uint16_t op2); +uint16_t fp_sub_hf_hf (uint16_t op1, uint16_t op2); + +uint32_t fp_mult_sf_hf (uint16_t op1, uint16_t op2); +uint32_t fp_add_sf_hf (uint16_t op1, uint16_t op2); +uint32_t fp_sub_sf_hf (uint16_t op1, uint16_t op2); + +uint32_t fp_mult_sf_bf (uint16_t op1, uint16_t op2); +uint32_t fp_add_sf_bf (uint16_t op1, uint16_t op2); +uint32_t fp_sub_sf_bf (uint16_t op1, uint16_t op2); + +//-------------------------------------------------------------------------- +// IEEE - FP Accumulate instructions +//-------------------------------------------------------------------------- + +uint16_t fp_mult_hf_hf_acc (uint16_t op1, uint16_t op2, uint16_t acc); +uint32_t fp_mult_sf_bf_acc (uint16_t op1, uint16_t op2, uint32_t acc); +uint32_t fp_mult_sf_hf_acc (uint16_t op1, uint16_t op2, uint32_t acc); + +//-------------------------------------------------------------------------- +// IEEE - FP Reduce instructions +//-------------------------------------------------------------------------- + +uint32_t fp_vdmpy (uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l); +uint32_t fp_vdmpy_acc (uint32_t acc,uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l); + +//-------------------------------------------------------------------------- +// IEEE - FP Select instructions +//-------------------------------------------------------------------------- + +uint16_t fp_min_hf(uint16_t op1,uint16_t op2); +uint16_t fp_max_hf(uint16_t op1,uint16_t op2); +uint32_t fp_min_sf(uint32_t op1,uint32_t op2); +uint32_t fp_max_sf(uint32_t op1,uint32_t op2); +uint16_t fp_min_bf(uint16_t op1,uint16_t op2); +uint16_t fp_max_bf(uint16_t op1,uint16_t op2); +uint16_t fp_abs_bf(uint16_t op1); +uint16_t fp_neg_bf(uint16_t op1); + +//-------------------------------------------------------------------------- +// IEEE - FP Experiment Implementations +//-------------------------------------------------------------------------- +uint16_t fp_mult_hf_hf_acc_dumb (uint16_t op1, uint16_t op2, uint16_t acc); +uint32_t fp_vdmpy_acc_dumb (uint32_t acc,uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l); +#endif diff --git a/target/hexagon/mmvec/kvx_mac_reduce.c b/target/hexagon/mmvec/kvx_mac_reduce.c new file mode 100644 index 000000000000..e11e41ae5891 --- /dev/null +++ b/target/hexagon/mmvec/kvx_mac_reduce.c @@ -0,0 +1,1156 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "kvx_ieee.h" + +#define DF_MANTBITS() 52 +#define SF_MANTBITS() 23 +#define HF_MANTBITS() 10 + +#define DF_INF_EXP 0x7ff +#define DF_BIAS 1023 + +#define SF_INF_EXP 0xff +#define SF_BIAS 127 + +#define HF_INF_EXP 0x1f +#define HF_BIAS 15 + +#define WAY_BIG_EXP 4096 + +#define isz(X) (fabs(X) == 0.0f) + + +typedef union { + double f; + size8u_t i; +#ifndef SLOWLARIS + struct { + size8u_t mant:52; + size8u_t exp:11; + size8u_t sign:1; + } x; +#else + struct { + size8u_t sign:1; + size8u_t exp:11; + size8u_t mant:52; + } x; +#endif +} df_t; + +typedef union { + float f; + size4u_t i; +#ifndef SLOWLARIS + struct { + size4u_t mant:23; + size4u_t exp:8; + size4u_t sign:1; + } x; +#else + struct { + size4u_t sign:1; + size4u_t exp:8; + size4u_t mant:23; + } x; +#endif +} sf_t; + +typedef struct { + union { + size8u_t low; + struct { +#ifndef SLOWLARIS + size4u_t w0; + size4u_t w1; +#else + size4u_t w1; + size4u_t w0; +#endif + }; + }; + union { + size8u_t high; + struct { +#ifndef SLOWLARIS + size4u_t w2; + size4u_t w3; +#else + size4u_t w3; + size4u_t w2; +#endif + }; + }; +} int128_t; + +typedef struct { + int128_t mant; + size4s_t exp; + size1u_t sign; + size1u_t guard; + size1u_t round; + size1u_t sticky; +} xf_t; + +static inline void xf_init(xf_t * p) +{ + p->mant.low = 0; + p->mant.high = 0; + p->exp = 0; + p->sign = 0; + p->guard = 0; + p->round = 0; + p->sticky = 0; +} + +size8u_t df_getmant_kvx(df_t a); +size8u_t df_getmant_kvx(df_t a) +{ + //int class = fpclassify(a.f); + //switch (class) { + //case FP_NORMAL: + return (a.x.mant | 1ULL << 52); + //case FP_ZERO: + // return 0; + //case FP_SUBNORMAL: + // return a.x.mant; + //default: + // return -1; + //}; +} + +size4s_t df_getexp_kvx(df_t a); +size4s_t df_getexp_kvx(df_t a) +{ + //int class = fpclassify(a.f); + //switch (class) { + //case FP_NORMAL: + return a.x.exp; + //case FP_SUBNORMAL: + // return a.x.exp + 1; + //default: + // return -1; + //}; +} + +size8u_t sf_getmant_kvx(sf_t a); +size8u_t sf_getmant_kvx(sf_t a) +{ + //case FP_ZERO: + if((a.x.mant == 0) && (a.x.exp == 0)) + return 0; + //case FP_SUBNORMAL: + else if((a.x.mant != 0) && (a.x.exp == 0)) + return a.x.mant; + //case FP_NORMAL: + else if((a.x.exp != 0xFF) && (a.x.exp != 0)) + return (a.x.mant | 1ULL << 23); + //default: + else + return -1; +} + +size4s_t sf_getexp_kvx(sf_t a); +size4s_t sf_getexp_kvx(sf_t a) +{ + //case FP_SUBNORMAL: + if((a.x.mant != 0) && (a.x.exp == 0)) + return a.x.exp + 1; + //case FP_NORMAL: + else if((a.x.exp != 0xFF) && (a.x.exp != 0)) + return a.x.exp; + //default: + else + return -1; +} + +static inline void xf_debug(const char *msg, xf_t a) +{ +#ifdef DEBUG + printf("%s %c0x%016llx_%016llx /%d/%d/%d p%d\n", msg, + a.sign ? '-' : '+', a.mant.high, a.mant.low, a.guard, + a.round, a.sticky, a.exp); +#endif +} + +static inline int128_t int128_shl(int128_t a, size4u_t amt) +{ + int128_t ret; + if (amt == 0) + return a; + if (amt > 128) { + ret.high = 0; + ret.low = 0; + return ret; + } + if (amt >= 64) { + amt -= 64; + a.high = a.low; + a.low = 0; + } + ret.high = a.high << amt; + ret.high |= (a.low >> (64 - amt)); + ret.low = a.low << amt; + return ret; +} + +static inline int128_t int128_shr(int128_t a, size4u_t amt) +{ + int128_t ret; + if (amt == 0) + return a; + if (amt > 128) { + ret.high = 0; + ret.low = 0; + return ret; + } + if (amt >= 64) { + amt -= 64; + a.low = a.high; + a.high = 0; + } + ret.low = a.low >> amt; + ret.low |= (a.high << (64 - amt)); + ret.high = a.high >> amt; + return ret; +} + + +#define int128_gt kvx_int128_gt +static inline int kvx_int128_gt(int128_t a, int128_t b) +{ + if (a.high == b.high) + return (a.low > b.low); + return (a.high > b.high); +} + +static inline xf_t xf_norm_left(xf_t a) +{ + a.exp--; + a.mant = int128_shl(a.mant, 1); + a.mant.low |= a.guard; + a.guard = a.round; + a.round = a.sticky; + return a; +} + +static inline xf_t xf_norm_right(xf_t a, int amt) +{ + if (amt > 130) { + a.sticky |= + a.round | a.guard | (a.mant.low != 0) | (a.mant.high != 0); + a.guard = a.round = a.mant.high = a.mant.low = 0; + a.exp += amt; + return a; + + } + while (amt >= 64) { + a.sticky |= a.round | a.guard | (a.mant.low != 0); + a.guard = (a.mant.low >> 63) & 1; + a.round = (a.mant.low >> 62) & 1; + a.mant.low = a.mant.high; + a.mant.high = 0; + a.exp += 64; + amt -= 64; + } + while (amt > 0) { + a.exp++; + a.sticky |= a.round; + a.round = a.guard; + a.guard = a.mant.low & 1; + a.mant = int128_shr(a.mant, 1); + amt--; + } + return a; +} + +#define int128_add kvx_int128_add +static inline int128_t kvx_int128_add(int128_t a, int128_t b) +{ + int128_t ret; + ret.low = a.low + b.low; + if ((ret.low < a.low) || (ret.low < b.low)) { + /* carry into high part */ + a.high += 1; + } + ret.high = a.high + b.high; + return ret; +} + +#define int128_sub kvx_int128_sub +static inline int128_t kvx_int128_sub(int128_t a, int128_t b, int borrow) +{ + int128_t ret; + ret.low = a.low - b.low; + if (ret.low > a.low) { + /* borrow into high part */ + a.high -= 1; + } + ret.high = a.high - b.high; + if (borrow == 0) { + return ret; + } else { + a.high = 0; + a.low = 1; + return int128_sub(ret, a, 0); + } +} + +/* Return an infinity with the same sign as a */ +static inline df_t infinite_df_t(xf_t a) +{ + df_t ret; + ret.x.sign = a.sign; + ret.x.exp = DF_INF_EXP; + ret.x.mant = 0ULL; + return ret; +} + +/* Return a maximum finite value with the same sign as a */ +static inline df_t maxfinite_df_t(xf_t a) +{ + df_t ret; + ret.x.sign = a.sign; + ret.x.exp = DF_INF_EXP - 1; + ret.x.mant = 0x000fffffffffffffULL; + return ret; +} + +static inline df_t f2df_t(double in) +{ + df_t ret; + ret.f = in; + return ret; +} + +/* Return an infinity with the same sign as a */ +static inline sf_t infinite_sf_t(xf_t a) +{ + sf_t ret; + ret.x.sign = a.sign; + ret.x.exp = SF_INF_EXP; + ret.x.mant = 0ULL; + return ret; +} + +/* Return a maximum finite value with the same sign as a */ +static inline sf_t maxfinite_sf_t(xf_t a) +{ + sf_t ret; + ret.x.sign = a.sign; + ret.x.exp = SF_INF_EXP - 1; + ret.x.mant = 0x007fffffUL; + return ret; +} + +static inline sf_t f2sf_t(float in) +{ + sf_t ret; + ret.f = in; + return ret; +} + +#define GEN_XF_ROUND(TYPE,MANTBITS,INF_EXP) \ +TYPE xf_round_kvx_##TYPE(xf_t a); \ +TYPE xf_round_kvx_##TYPE(xf_t a) \ +{ \ + TYPE ret; \ + ret.i = 0; \ + ret.x.sign = a.sign; \ + if ((a.mant.high == 0) && (a.mant.low == 0) \ + && ((a.guard | a.round | a.sticky) == 0)) { \ + /* result zero */ \ + /*switch (fegetround()) { */\ + /*case FE_DOWNWARD: */\ + /* return f2##TYPE(-0.0); */\ + /*default: */\ + if(a.sign) return f2##TYPE(-0.0); \ + else return f2##TYPE(0.0); \ + /*} */\ + } \ + /* Normalize right */ \ + /* We want MANTBITS bits of mantissa plus the leading one. */ \ + /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \ + /* So we need to normalize right while the high word is non-zero and \ + * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \ + xf_debug("input: ", a); \ + while ((a.mant.high != 0) || ((a.mant.low >> (MANTBITS+1)) != 0)) { \ + a = xf_norm_right(a, 1); \ + } \ + xf_debug("norm_right: ", a); \ + /* OK, now normalize left */ \ + /* We want to normalize left until we have a leading one in bit 24 */ \ + /* Theoretically, we only need to shift a maximum of one to the left if we \ + * shifted out lots of bits from B, or if we had no shift / 1 shift sticky shoudl be 0 \ + */ \ + while ((a.mant.low & (1ULL << MANTBITS)) == 0) { \ + a = xf_norm_left(a); \ + } \ + xf_debug("norm_left: ", a); \ + /* OK, now we might need to denormalize because of potential underflow. We need \ + * to do this before rounding, and rounding might make us normal again */ \ + while (a.exp <= 0) { \ + a = xf_norm_right(a, 1 - a.exp); \ + /* Do we have underflow? That's when we get an inexact answer because we \ + * ran out of bits in a denormal. */ \ + if (a.guard || a.round || a.sticky) { \ + /*feraiseexcept(FE_UNDERFLOW);*/ \ + } \ + } \ + xf_debug("norm_denorm: ", a); \ + /* OK, we're relatively canonical... now we need to round */ \ + if (a.guard || a.round || a.sticky) { \ + /*feraiseexcept(FE_INEXACT);*/ \ + /*switch (fegetround()) { */\ + /*case FE_TOWARDZERO: */\ + /* Chop and we're done */ \ + /* break; */\ + /*case FE_UPWARD: */\ + /* if (a.sign == 0) a.mant.low += 1; */\ + /* break; */\ + /*case FE_DOWNWARD: */\ + /* if (a.sign != 0) a.mant.low += 1; */\ + /* break; */\ + /*default: */\ + if (a.round || a.sticky) { \ + /* round up if guard is 1, down if guard is zero */ \ + a.mant.low += a.guard; \ + } else if (a.guard) { \ + /* exactly .5, round up if odd */ \ + a.mant.low += (a.mant.low & 1); \ + } \ + /*break; */\ + /*}*/ \ + } \ + xf_debug("post_round: ", a); \ + /* OK, now we might have carried all the way up. So we might need to shr once */ \ + /* at least we know that the lsb should be zero if we rounded and got a carry out... */ \ + if ((a.mant.low >> (MANTBITS+1)) != 0) { \ + a = xf_norm_right(a, 1); \ + } \ + xf_debug("once_norm_right: ", a); \ + /* Overflow? */ \ + if (a.exp >= INF_EXP) { \ + /* Yep, inf result */ \ + xf_debug("inf: ", a); \ + /*feraiseexcept(FE_OVERFLOW);*/ \ + /*feraiseexcept(FE_INEXACT);*/ \ + /*switch (fegetround()) { */\ + /*case FE_TOWARDZERO: */\ + /* return maxfinite_##TYPE(a); */\ + /*case FE_UPWARD: */\ + /* if (a.sign == 0) */\ + /* return infinite_##TYPE(a); */\ + /* else */\ + /* return maxfinite_##TYPE(a); */\ + /*case FE_DOWNWARD: */\ + /* if (a.sign != 0) */\ + /* return infinite_##TYPE(a); */\ + /* else */\ + /* return maxfinite_##TYPE(a); */\ + /*default: */\ + return infinite_##TYPE(a); \ + /*} */\ + } \ + /* Underflow? */ \ + if (a.mant.low & (1ULL << MANTBITS)) { \ + /* Leading one means: No, we're normal. So, we should be done... */ \ + xf_debug("norm: ", a); \ + ret.x.exp = a.exp; \ + ret.x.mant = a.mant.low; \ + return ret; \ + } \ + xf_debug("denorm: ", a); \ + if (a.exp != 1) \ + /*printf("a.exp == %d\n", a.exp);*/ \ + assert(a.exp == 1); \ + ret.x.exp = 0; \ + ret.x.mant = a.mant.low; \ + return ret; \ +} + +#define GEN_HF_ROUND(TYPE,MANTBITS,INF_EXP) \ +TYPE hf_round_##TYPE(xf_t a); \ +TYPE hf_round_##TYPE(xf_t a) \ +{ \ + TYPE ret; \ + ret.i = 0; \ + ret.x.sign = a.sign; \ + if ((a.mant.high == 0) && (a.mant.low == 0) \ + && ((a.guard | a.round | a.sticky) == 0)) { \ + /* result zero */ \ + /*switch (fegetround()) { */\ + /*case FE_DOWNWARD: */\ + /* return f2##TYPE(-0.0); */\ + /*default: */\ + if(a.sign) return f2##TYPE(-0.0); \ + else return f2##TYPE(0.0); \ + /*} */\ + } \ + /* Normalize right */ \ + /* We want MANTBITS bits of mantissa plus the leading one. */ \ + /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \ + /* So we need to normalize right while the high word is non-zero and \ + * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \ + xf_debug("input: ", a); \ + while ((a.mant.high != 0) || ((a.mant.low >> (MANTBITS+1)) != 0)) { \ + a = xf_norm_right(a, 1); \ + } \ + xf_debug("norm_right: ", a); \ + /* OK, now normalize left */ \ + /* We want to normalize left until we have a leading one in bit 24 */ \ + /* Theoretically, we only need to shift a maximum of one to the left if we \ + * shifted out lots of bits from B, or if we had no shift / 1 shift sticky shoudl be 0 \ + */ \ + while ((a.mant.low & (1ULL << MANTBITS)) == 0) { \ + a = xf_norm_left(a); \ + } \ + xf_debug("norm_left: ", a); \ + /* OK, now we might need to denormalize because of potential underflow. We need \ + * to do this before rounding, and rounding might make us normal again */ \ + while (a.exp <= 0) { \ + a = xf_norm_right(a, 1 - a.exp); \ + /* Do we have underflow? That's when we get an inexact answer because we \ + * ran out of bits in a denormal. */ \ + if (a.guard || a.round || a.sticky) { \ + /*feraiseexcept(FE_UNDERFLOW);*/ \ + } \ + } \ + xf_debug("norm_denorm: ", a); \ + /* OK, we're relatively canonical... now we need to round */ \ + /*if (a.guard || a.round || a.sticky) { */\ + /*feraiseexcept(FE_INEXACT);*/ \ + /*switch (fegetround()) { */\ + /*case FE_TOWARDZERO: */\ + /* Chop and we're done */ \ + /* break; */\ + /*case FE_UPWARD: */\ + /* if (a.sign == 0) a.mant.low += 1; */\ + /* break; */\ + /*case FE_DOWNWARD: */\ + /* if (a.sign != 0) a.mant.low += 1; */\ + /* break; */\ + /*default: */\ + if (a.round || a.sticky || a.guard) { \ + /* round up if guard is 1, down if guard is zero */ \ + if ((a.mant.low & 0xFFF) == 0) a.mant.low += 1; \ + /* } else if (a.guard) {*/ \ + /* exactly .5, round up if odd */ \ + /* a.mant.low += (a.mant.low & 1); */\ + } \ + /*break; */\ + /*}*/ \ + /*} */\ + xf_debug("post_round: ", a); \ + /* OK, now we might have carried all the way up. So we might need to shr once */ \ + /* at least we know that the lsb should be zero if we rounded and got a carry out... */ \ + if ((a.mant.low >> (MANTBITS+1)) != 0) { \ + a = xf_norm_right(a, 1); \ + } \ + xf_debug("once_norm_right: ", a); \ + /* Overflow? */ \ + if (a.exp >= INF_EXP) { \ + /* Yep, inf result */ \ + xf_debug("inf: ", a); \ + /*feraiseexcept(FE_OVERFLOW);*/ \ + /*feraiseexcept(FE_INEXACT);*/ \ + /*switch (fegetround()) { */\ + /*case FE_TOWARDZERO: */\ + /* return maxfinite_##TYPE(a); */\ + /*case FE_UPWARD: */\ + /* if (a.sign == 0) */\ + /* return infinite_##TYPE(a); */\ + /* else */\ + /* return maxfinite_##TYPE(a); */\ + /*case FE_DOWNWARD: */\ + /* if (a.sign != 0) */\ + /* return infinite_##TYPE(a); */\ + /* else */\ + /* return maxfinite_##TYPE(a); */\ + /*default: */\ + return infinite_##TYPE(a); \ + /*} */\ + } \ + /* Underflow? */ \ + if (a.mant.low & (1ULL << MANTBITS)) { \ + /* Leading one means: No, we're normal. So, we should be done... */ \ + xf_debug("norm: ", a); \ + ret.x.exp = a.exp; \ + ret.x.mant = a.mant.low; \ + return ret; \ + } \ + xf_debug("denorm: ", a); \ + if (a.exp != 1) \ + /*printf("a.exp == %d\n", a.exp);*/ \ + assert(a.exp == 1); \ + ret.x.exp = 0; \ + ret.x.mant = a.mant.low; \ + return ret; \ +} + + +GEN_XF_ROUND(df_t,DF_MANTBITS(),DF_INF_EXP) +GEN_XF_ROUND(sf_t,SF_MANTBITS(),SF_INF_EXP) +GEN_HF_ROUND(sf_t,SF_MANTBITS(),SF_INF_EXP) + +#define int128_mult_6464 kvx_int128_mult_6464 +static inline int128_t kvx_int128_mult_6464(size8u_t ai, size8u_t bi) +{ + int128_t ret; + int128_t a, b; + size8u_t pp0, pp1a, pp1b, pp1s, pp2; + +#ifdef DEBUG + printf("ai/bi: 0x%016llx/0x%016llx\n", ai, bi); +#endif + a.high = b.high = 0; + a.low = ai; + b.low = bi; + pp0 = (size8u_t) a.w0 * (size8u_t) b.w0; + pp1a = (size8u_t) a.w1 * (size8u_t) b.w0; + pp1b = (size8u_t) b.w1 * (size8u_t) a.w0; + pp2 = (size8u_t) a.w1 * (size8u_t) b.w1; +#ifdef DEBUG + printf("pp2/1b/1a/0: 0x%016llx/0x%016llx/0x%016llx/0x%016llx\n", + pp2, pp1b, pp1a, pp0); +#endif + pp1s = pp1a + pp1b; + if ((pp1s < pp1a) || (pp1s < pp1b)) { + pp2 += (1ULL << 32); + } + ret.low = pp0 + (pp1s << 32); + if ((ret.low < pp0) || (ret.low < (pp1s << 32))) + pp2 += 1; + ret.high = pp2 + (pp1s >> 32); +#ifdef DEBUG + printf("pp1s/rethi/retlo: 0x%016llx/0x%016llx/0x%016llx\n", + pp1s, ret.high, ret.low); +#endif + return ret; +} + +xf_t xf_add_kvx(xf_t a, xf_t b); + +xf_t xf_sub_kvx(xf_t a, xf_t b, int negate); +xf_t xf_sub_kvx(xf_t a, xf_t b, int negate) +{ + xf_t ret; + xf_init(&ret); + int borrow; + xf_debug("-->Sub/a: ", a); + xf_debug("-->Sub/b: ", b); + if (a.sign != b.sign) { + b.sign = !b.sign; + return xf_add_kvx(a, b); + } + if (b.exp > a.exp) { + /* small - big == - (big - small) */ + return xf_sub_kvx(b, a, !negate); + } + if ((b.exp == a.exp) && (int128_gt(b.mant, a.mant))) { + /* small - big == - (big - small) */ + return xf_sub_kvx(b, a, !negate); + } + xf_debug("OK: Sub/a: ", a); + xf_debug("OK: Sub/b: ", b); + while (a.exp > b.exp) { + /* Try to normalize exponents: shrink a exponent and grow mantissa */ + if (a.mant.high & (1ULL << 62)) { + /* Can't grow a any more */ + break; + } else { + a = xf_norm_left(a); + } + } + xf_debug("norm_l: Sub/a: ", a); + xf_debug("norm_l: Sub/b: ", b); + while (a.exp > b.exp) { + /* Try to normalize exponents: grow b exponent and shrink mantissa */ + /* Keep around shifted out bits... we might need those later */ + b = xf_norm_right(b, a.exp - b.exp); + } + xf_debug("norm_r: Sub/a: ", a); + xf_debug("norm_r: Sub/b: ", b); + if ((int128_gt(b.mant, a.mant))) { + xf_debug("retry: Sub/a: ", a); + xf_debug("retry: Sub/b: ", b); + return xf_sub_kvx(b, a, !negate); + } + /* OK, now things should be normalized! */ + ret.sign = a.sign; + ret.exp = a.exp; + assert(!int128_gt(b.mant, a.mant)); + borrow = (b.round << 2) | (b.guard << 1) | b.sticky; + ret.mant = int128_sub(a.mant, b.mant, (borrow != 0)); + borrow = 0 - borrow; + ret.guard = (borrow >> 2) & 1; + ret.round = (borrow >> 1) & 1; + ret.sticky = (borrow >> 0) & 1; + if (negate) + ret.sign = !ret.sign; + //According to the IEEE standard, Zero result in a subtraction should always be positive + if ((ret.sign) && ((ret.mant.high == 0) && (ret.mant.low == 0) && ((ret.guard | ret.round | ret.sticky) == 0))) + ret.sign = !ret.sign; + xf_debug("ret: Sub ", ret); + return ret; +} + + +xf_t xf_add_kvx(xf_t a, xf_t b) +{ + xf_t ret; + xf_init(&ret); + xf_debug("-->Add/a: ", a); + xf_debug("-->Add/b: ", b); + if (a.sign != b.sign) { + b.sign = !b.sign; + return xf_sub_kvx(a, b, 0); + } + if (b.exp > a.exp) { + /* small + big == (big + small) */ + return xf_add_kvx(b, a); + } + if ((b.exp == a.exp) && int128_gt(b.mant, a.mant)) { + /* small + big == (big + small) */ + return xf_add_kvx(b, a); + } + xf_debug("OK? Add/a: ", a); + xf_debug("OK? Add/b: ", b); + while (a.exp > b.exp) { + /* Try to normalize exponents: shrink a exponent and grow mantissa */ + if (a.mant.high & (1ULL << 62)) { + /* Can't grow a any more */ + break; + } else { + a = xf_norm_left(a); + } + } + xf_debug("norm_l: Add/a: ", a); + xf_debug("norm_l: Add/b: ", b); + while (a.exp > b.exp) { + /* Try to normalize exponents: grow b exponent and shrink mantissa */ + /* Keep around shifted out bits... we might need those later */ + b = xf_norm_right(b, a.exp - b.exp); + } + xf_debug("norm_r: Add/a: ", a); + xf_debug("norm_r: Add/b: ", b); + /* OK, now things should be normalized! */ + if (int128_gt(b.mant, a.mant)) { + xf_debug("retry: Add/a: ", a); + xf_debug("retry: Add/b: ", b); + return xf_add_kvx(b, a); + }; + ret.sign = a.sign; + ret.exp = a.exp; + assert(!int128_gt(b.mant, a.mant)); + ret.mant = int128_add(a.mant, b.mant); + ret.guard = b.guard; + ret.round = b.round; + ret.sticky = b.sticky; + xf_debug("ret: Add ", ret); + return ret; +} + + +float internal_fma_kvx(float a_in, float b_in, float c_in, int scale); +float internal_fma_kvx(float a_in, float b_in, float c_in, int scale) +{ + sf_t a, b, c; + xf_t prod; + xf_t acc; + xf_t result; +#if 0 + df_t t; + fexcept_t flags_tmp; +#endif + xf_init(&prod); + xf_init(&acc); + xf_init(&result); + a.f = a_in; + b.f = b_in; + c.f = c_in; +// printf("internal_fma_kvxx: 0x%016x * 0x%016x + 0x%016x sc: %d\n", +// fUNFLOAT(a_in), fUNFLOAT(b_in), fUNFLOAT(c_in), scale); +// if (isinf(a.f) || isinf(b.f) || isinf(c.f)) +// return special_fmaf(a, b, c); +// if (isnan(a.f) || isnan(b.f) || isnan(c.f)) +// return special_fmaf(a, b, c); + if ((scale == 0) && (isz(a.f) || isz(b.f))) + return (a.f * b.f + c.f); + /* Is a*b exact? If so, we don't have to go the slow way */ + /* EJP: axe this for simplicity? */ +#if 0 + fegetexceptflag(&flags_tmp, FE_ALL_EXCEPT); + feclearexcept(FE_ALL_EXCEPT); + t.f = a.f * b.f; + if (0 && (scale == 0) && isfinite(t.f) + && fetestexcept(FE_ALL_EXCEPT) == 0) { + /* It's exactly correct, we can just do the add and return */ + fesetexceptflag(&flags_tmp, FE_ALL_EXCEPT); + asm volatile (""); + t.f = (t.f + c.f); + return t.f; + } + fesetexceptflag(&flags_tmp, FE_ALL_EXCEPT); +#endif + /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */ + prod.mant = int128_mult_6464(sf_getmant_kvx(a), sf_getmant_kvx(b)); + /* Note: extracting the mantissa into an int is multiplying by 2**23, so adjust here: */ + prod.exp = sf_getexp_kvx(a) + sf_getexp_kvx(b) - SF_BIAS - 23; + prod.sign = a.x.sign ^ b.x.sign; + if (isz(a.f) || isz(b.f)) prod.exp = -2*WAY_BIG_EXP; + xf_debug("prod: ", prod); + if ((scale > 0) /*&& (fpclassify(c.f) == FP_SUBNORMAL)*/) { + acc.mant = int128_mult_6464(0,0); + acc.exp = -WAY_BIG_EXP; + acc.sign = c.x.sign; + acc.sticky = 1; + xf_debug("special denorm acc: ",acc); + result = xf_add_kvx(prod,acc); + } else if (!isz(c.f)) { + acc.mant = int128_mult_6464(sf_getmant_kvx(c), 1); + acc.exp = sf_getexp_kvx(c); + acc.sign = c.x.sign; + xf_debug("acc: ", acc); + result = xf_add_kvx(prod, acc); + } else { + result = prod; + } + xf_debug("sum: ", result); +#ifdef DEBUG + printf("Scaling: %d\n", scale); +#endif + result.exp += scale; + xf_debug("post-scale: ", result); + return hf_round_sf_t(result).f; +} + +// result = (a*c) + (b*d) + acc +float internal_vdmpy_acc(float a_in, float b_in, float c_in, float d_in, float acc_in, int scale); +float internal_vdmpy_acc(float a_in, float b_in, float c_in, float d_in, float acc_in, int scale) +{ + sf_t a, b, c, d, accm; + xf_t prod1; //a*c + xf_t prod2; //b*d + xf_t acc; + xf_t result_temp; + xf_t result; + + xf_init(&prod1); + xf_init(&prod2); + xf_init(&acc); + xf_init(&result_temp); + xf_init(&result); + + a.f = a_in; + b.f = b_in; + c.f = c_in; + d.f = d_in; + accm.f = acc_in; + + /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */ + prod1.mant = int128_mult_6464(sf_getmant_kvx(a), sf_getmant_kvx(c)); + /* Note: extracting the mantissa into an int is multiplying by 2**23, so adjust here: */ + prod1.exp = sf_getexp_kvx(a) + sf_getexp_kvx(c) - SF_BIAS - 23; + prod1.sign = a.x.sign ^ c.x.sign; + + /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */ + prod2.mant = int128_mult_6464(sf_getmant_kvx(b), sf_getmant_kvx(d)); + /* Note: extracting the mantissa into an int is multiplying by 2**23, so adjust here: */ + prod2.exp = sf_getexp_kvx(b) + sf_getexp_kvx(d) - SF_BIAS - 23; + prod2.sign = b.x.sign ^ d.x.sign; + + + if (isz(a.f) || isz(c.f)) prod1.exp = -2*WAY_BIG_EXP; + if (isz(b.f) || isz(d.f)) prod2.exp = -2*WAY_BIG_EXP; + + xf_debug("prod1: ", prod1); + xf_debug("prod2: ", prod2); + + if ((scale > 0) /*&& (fpclassify(c.f) == FP_SUBNORMAL)*/) { + acc.mant = int128_mult_6464(0,0); + acc.exp = -WAY_BIG_EXP; + acc.sign = c.x.sign; + acc.sticky = 1; + xf_debug("special denorm acc: ",acc); + //result = xf_add_kvx(prod,acc); + } else /*if (!isz(accm.f)) */{ + acc.mant = int128_mult_6464(sf_getmant_kvx(accm), 1); + acc.exp = sf_getexp_kvx(accm); + acc.sign = accm.x.sign; + xf_debug("acc: ", acc); + //result = xf_add_kvx(prod, acc); + } /*else { + result = xf_add_kvx(prod1, prod2); + }*/ + + //Add the 3 numbers: prod1 prod2 acc + //result_temp = xf_add_kvx(prod1,prod2); + //result = xf_add_kvx(result_temp,acc); + result_temp = xf_add_kvx(prod1,prod2); + result = xf_add_kvx(result_temp,acc); + + xf_debug("sum: ", result); +#ifdef DEBUG + printf("Scaling: %d\n", scale); +#endif + result.exp += scale; + xf_debug("post-scale: ", result); + return xf_round_kvx_sf_t(result).f; +} + + +uint32_t fp_vdmpy_acc (uint32_t acc,uint16_t op1_u,uint16_t op1_l,uint16_t op2_u,uint16_t op2_l) +{ + union ui32_f32 u_op; + union ui32_f32 u_acc; + union ui32_f32 u_rslt; + + uint32_t op1_u_f32, op1_l_f32, op2_u_f32, op2_l_f32; + float f_op1_u, f_op1_l, f_op2_u, f_op2_l, f_acc; + float f_prod_l = 0, f_prod_u = 0, rslt; + uint32_t result; + +#ifdef DEBUG + printf("Debug : op1_u =0x%04x\n",op1_u); + printf("Debug : op1_l =0x%04x\n",op1_l); + printf("Debug : op2_u =0x%04x\n",op2_u); + printf("Debug : op2_l =0x%04x\n",op2_l); + printf("Debug : acc =0x%08x\n",acc); +#endif + + if(isNaNF16UI(op1_u) || isNaNF16UI(op1_l) || isNaNF16UI(op2_u) || isNaNF16UI(op2_l) || isNaNF32UI(acc)) + return FP32_DEF_NAN; + + op1_u_f32 = f16_to_f32(op1_u); + op1_l_f32 = f16_to_f32(op1_l); + op2_u_f32 = f16_to_f32(op2_u); + op2_l_f32 = f16_to_f32(op2_l); + +#ifdef DEBUG + printf("Debug : op1_u_f32 =0x%08x\n",op1_u_f32); + printf("Debug : op1_l_f32 =0x%08x\n",op1_l_f32); + printf("Debug : op2_u_f32 =0x%08x\n",op2_u_f32); + printf("Debug : op2_l_f32 =0x%08x\n",op2_l_f32); +#endif + + u_op.ui = op1_u_f32; + f_op1_u = u_op.f; + + u_op.ui = op1_l_f32; + f_op1_l = u_op.f; + + u_op.ui = op2_l_f32; + f_op2_l = u_op.f; + + u_op.ui = op2_u_f32; + f_op2_u = u_op.f; + + u_acc.ui = acc; + f_acc = u_acc.f; + +#ifdef DEBUG + printf("Debug_0 : f_op1_u = %f\n",f_op1_u); + printf("Debug_0 : f_op1_l = %f\n",f_op1_l); + printf("Debug_0 : f_op2_u = %f\n",f_op2_u); + printf("Debug_0 : f_op2_l = %f\n",f_op2_l); + printf("Debug_0 : f_acc = %f\n",f_acc); +#endif + + f_prod_l = (f_op1_l * f_op2_l); + f_prod_u = (f_op1_u * f_op2_u); + + if(isInfF16UI(op1_u) || isInfF16UI(op1_l) || isInfF16UI(op2_u) || isInfF16UI(op2_l) || isInfF32UI(acc)) + { + rslt = (f_prod_u + f_prod_l + f_acc); +#ifdef DEBUG + printf("Debug_inf : rslt = %f\n",rslt); +#endif + u_rslt.f = rslt; + result = u_rslt.ui; +#ifdef DEBUG + printf("Debug_inf : result =0x%08x\n",result); +#endif + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; +#ifdef DEBUG + printf("Debug_inf : result final =0x%08x\n",result); +#endif + return result; + } + + //If any of the below is a zero, we can use easy approach + if(isz(f_prod_l) || isz(f_prod_u) || isz(f_acc)) + { + rslt = (f_prod_u + f_prod_l + f_acc); +#ifdef DEBUG + printf("Debug_inf : rslt = %f\n",rslt); +#endif + u_rslt.f = rslt; + result = u_rslt.ui; +#ifdef DEBUG + printf("Debug_inf : result =0x%08x\n",result); +#endif + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; +#ifdef DEBUG + printf("Debug_inf : result final =0x%08x\n",result); +#endif + return result; + } + + +////---------------------------------------------------------------------------------------------------- +// f_prod_l = (f_op1_l * f_op2_l); +// f_prod_u = (f_op1_u * f_op2_u); +// +// printf("Debug_1 : f_prod_l = %f\n",f_prod_l); +// printf("Debug_1 : f_prod_u = %f\n",f_prod_u); +// +// rslt = (f_prod_u + f_prod_l + f_acc); +// printf("Debug_1 : rslt = %f\n",rslt); +// u_rslt.f = rslt; +// result = u_rslt.ui; +// printf("Debug_1 : result =0x%08x\n",result); +////---------------------------------------------------------------------------------------------------- + + rslt = internal_vdmpy_acc(f_op1_u, f_op1_l,f_op2_u,f_op2_l,f_acc,0); + u_rslt.f = rslt; + result = u_rslt.ui; +#ifdef DEBUG + printf("Debug_2 : rslt = %f\n",rslt); + printf("Debug_2 : result =0x%08x\n",result); +#endif + + result = isNaNF32UI(result) ? FP32_DEF_NAN : result; + +#ifdef DEBUG + printf("Debug : f_op1_u = %f\n",f_op1_u); + printf("Debug : f_op1_l = %f\n",f_op1_l); + printf("Debug : f_op2_u = %f\n",f_op2_u); + printf("Debug : f_op2_l = %f\n",f_op2_l); + printf("Debug : f_acc = %f\n",f_acc); + printf("Debug : f_prod_l = %f\n",f_prod_l); + printf("Debug : f_prod_u = %f\n",f_prod_u); + printf("Debug : rslt = %f\n",rslt); + printf("Debug : result =0x%08x\n",result); +#endif + + return result; +} + + +uint16_t fp_mult_hf_hf_acc (uint16_t op1, uint16_t op2, uint16_t acc) +{ + union ui32_f32 u_op1; + union ui32_f32 u_op2; + union ui32_f32 u_acc; + union ui32_f32 u_rslt; + + uint32_t op1_f32; + uint32_t op2_f32; + uint32_t acc_f32; + + float a,b,facc,rslt; + uint32_t result_f32; + uint16_t result; + +#ifdef DEBUG + printf("Debug : op1 =0x%04x\n",op1); + printf("Debug : op2 =0x%04x\n",op2); + printf("Debug : acc =0x%04x\n",acc); +#endif + + if(isNaNF16UI(op1) || isNaNF16UI(op2) || isNaNF16UI(acc)) + return FP16_DEF_NAN; + + op1_f32 = f16_to_f32(op1); + op2_f32 = f16_to_f32(op2); + acc_f32 = f16_to_f32(acc); + +#ifdef DEBUG + printf("Debug : op1_f32 = 0x%08x\n",op1_f32); + printf("Debug : op2_f32 = 0x%08x\n",op2_f32); + printf("Debug : acc_f32 = 0x%08x\n",acc_f32); +#endif + + u_op1.ui = op1_f32; + u_op2.ui = op2_f32; + u_acc.ui = acc_f32; + a = u_op1.f; + b = u_op2.f; + facc = u_acc.f; + +#ifdef DEBUG + printf("Debug_1 : a = %f\n",a); + printf("Debug_1 : b = %f\n",b); + printf("Debug_1 : facc = %f\n",facc); +#endif + + if(isInfF16UI(op1) || isInfF16UI(op2) || isInfF16UI(acc)) + { + rslt = (a * b) + facc; +#ifdef DEBUG + printf("Debug_inf : rslt = %f\n",rslt); +#endif + u_rslt.f = rslt; + result_f32 = u_rslt.ui; + result = f32_to_f16(result_f32); +#ifdef DEBUG + printf("Debug_inf : result_f32 =0x%08x\n",result_f32); + printf("Debug_inf : result =0x%04x\n",result); +#endif + result = isNaNF16UI(result) ? FP16_DEF_NAN : result; +#ifdef DEBUG + printf("Debug_inf : result final =0x%04x\n",result); +#endif + return result; + } + +// //---------------------------------------------------------------------------------------------------- +// rslt = (a * b) + facc; +// u_rslt.f = rslt; +// result_f32 = u_rslt.ui; +// printf("Debug_3 : result_f32 =0x%08x\n",result_f32); +// result = f32_to_f16(result_f32); +// printf("Debug_3 : result =0x%04x\n",result); +// //---------------------------------------------------------------------------------------------------- + + //rslt = fma(a,b,facc); + rslt = internal_fma_kvx(a, b, facc, 0); + u_rslt.f = rslt; + result_f32 = u_rslt.ui; +#ifdef DEBUG + printf("Debug_2 : rslt = %f\n",rslt); + printf("Debug_2 : result_f32 =0x%08x\n",result_f32); +#endif + + result = f32_to_f16(result_f32); + +#ifdef DEBUG + printf("Debug_2 : result =0x%04x\n",result); +#endif + + result = isNaNF16UI(result) ? FP16_DEF_NAN : result; + +#ifdef DEBUG + printf("Debug_2 : result final =0x%04x\n",result); +#endif + + return result; +} + diff --git a/target/hexagon/mmvec/macros.h b/target/hexagon/mmvec/macros.h index bcd4a1e8973c..645ec9280972 100644 --- a/target/hexagon/mmvec/macros.h +++ b/target/hexagon/mmvec/macros.h @@ -354,3 +354,16 @@ } while (0); #endif + +#define fPARSEHF(A) parse_hf(A) +#define fPARSESF(A) parse_sf(A) +#define fPARSEQF16(A) parse_qf16(A) +#define fPARSEQF32(A) parse_qf32(A) + +#define fRNDSATHF(A,B) rnd_sat_hf(A,B) +#define fRNDSATSF(A,B) rnd_sat_sf(A,B) +#define fRNDSATQF16(A,B,C) rnd_sat_qf16(A,B,C) +#define fRNDSATQF32(A,B,C) rnd_sat_qf32(A,B,C) + +#define fNEGQF16(A) negate16(A) +#define fNEGQF32(A) negate32(A) diff --git a/target/hexagon/mmvec/macros_auto.h b/target/hexagon/mmvec/macros_auto.h new file mode 100644 index 000000000000..479cb225c70c --- /dev/null +++ b/target/hexagon/mmvec/macros_auto.h @@ -0,0 +1,221 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef HEXAGON_MMVEC_MACROS_AUTO_H +#define HEXAGON_MMVEC_MACROS_AUTO_H + + +#include "mmvec/macros.h" + +#include "q6v_defines.h" +#pragma GCC diagnostic ignored "-Wtype-limits" +#define fDUMPQ(STR,REG) do { printf(STR ":" #REG ": 0x%016llx\n",REG.ud[0]); } while (0) +#define fRT8NOTE() +#define fEXPERIMENTAL() +#define fBFLOAT() +#define fCVI_VX_NO_TMP_LD() +#define fNOTQ(VAL) ({mmqreg_t _ret ={0}; int _i_; for (_i_ = 0; _i_ < fVECSIZE()/64; _i_++) _ret.ud[_i_] = ~VAL.ud[_i_]; _ret;}) +#define fGETQBITS(REG,WIDTH,MASK,BITNO) ((MASK) & (REG.w[(BITNO)>>5] >> ((BITNO) & 0x1f))) +#define fGETQBIT(REG,BITNO) fGETQBITS(REG,1,1,BITNO) +#define fGENMASKW(QREG,IDX) (((fGETQBIT(QREG,(IDX*4+0)) ? 0xFF : 0x0) << 0) |((fGETQBIT(QREG,(IDX*4+1)) ? 0xFF : 0x0) << 8) |((fGETQBIT(QREG,(IDX*4+2)) ? 0xFF : 0x0) << 16) |((fGETQBIT(QREG,(IDX*4+3)) ? 0xFF : 0x0) << 24)) +#define fGET10BIT(COE,VAL,POS) { COE = (((((fGETUBYTE(3,VAL) >> (2 * POS)) & 3) << 8) | fGETUBYTE(POS,VAL)) << 6); COE >>= 6; } +#define fVMAX(X,Y) (X>Y) ? X : Y +#define fREAD_VEC(DST,IDX) (DST = READ_VREG(fMODCIRCU((IDX),5))) +#define fREAD_ZVEC(DST,IDX) (DST = READ_ZREG(fMODCIRCU((IDX),5))) +#define fREAD_ZVEC_WORD(DST,IDX) { mmvector_t ZReg = READ_ZREG(0); DST = ZReg.uw[IDX]; } +#define fREAD_ZVEC_ALL(DST,N,NZ) { int __idx = 0; for (__idx = 0; __idx < NZ/N; __idx++) { memcpy(&DST[N*__idx], &THREAD2STRUCT->ZRegs[__idx], N); } } +#define fZREGB(Z,IDX) ((size1s_t)Z[IDX]) +#define fZREGUB(Z,IDX) ((size1u_t)Z[IDX]) +#define fZREGH(Z,IDX) ((size2s_t)Z[IDX]) +#define fZREGUB(Z,IDX) ((size1u_t)Z[IDX]) +#define fGETNIBBLE(IDX,SRC) ( fSXTN(4,8,(SRC >> (4*IDX)) & 0xF) ) +#define fGETCRUMB(IDX,SRC) ( fSXTN(2,8,(SRC >> (2*IDX)) & 0x3) ) +#define fGETCRUMB_SYMMETRIC(IDX,SRC) ( (fGETCRUMB(IDX,SRC)>=0 ? (2-fGETCRUMB(IDX,SRC)) : fGETCRUMB(IDX,SRC) ) ) +#define fWRITE_VEC(IDX,VAR) (WRITE_VREG(fMODCIRCU((IDX),5),VAR)) +#define fGENMASKH(QREG,IDX) (((fGETQBIT(QREG,(IDX*2+0)) ? 0xFF : 0x0) << 0) |((fGETQBIT(QREG,(IDX*2+1)) ? 0xFF : 0x0) << 8)) +#define fGETMASKW(VREG,QREG,IDX) (VREG.w[IDX] & fGENMASKW((QREG),IDX)) +#define fGETMASKH(VREG,QREG,IDX) (VREG.h[IDX] & fGENMASKH((QREG),IDX)) +#define fCONDMASK8(QREG,IDX,YESVAL,NOVAL) (fGETQBIT(QREG,IDX) ? (YESVAL) : (NOVAL)) +#define fCONDMASK16(QREG,IDX,YESVAL,NOVAL) ((fGENMASKH(QREG,IDX) & (YESVAL)) | (fGENMASKH(fNOTQ(QREG),IDX) & (NOVAL))) +#define fCONDMASK32(QREG,IDX,YESVAL,NOVAL) ((fGENMASKW(QREG,IDX) & (YESVAL)) | (fGENMASKW(fNOTQ(QREG),IDX) & (NOVAL))) +#define fSETQBITS(REG,WIDTH,MASK,BITNO,VAL) do { size4u_t __TMP = (VAL); REG.w[(BITNO)>>5] &= ~((MASK) << ((BITNO) & 0x1f)); REG.w[(BITNO)>>5] |= (((__TMP) & (MASK)) << ((BITNO) & 0x1f)); } while (0) +#define fSETQBIT(REG,BITNO,VAL) fSETQBITS(REG,1,1,BITNO,VAL) +#define fVBYTES() (fVECSIZE()) +#define fVHALVES() (fVECSIZE()/2) +#define fVWORDS() (fVECSIZE()/4) +#define fVDWORDS() (fVECSIZE()/8) +#define fVALIGN(ADDR, LOG2_ALIGNMENT) ( ADDR = ADDR & ~(LOG2_ALIGNMENT-1)) +#define fVLASTBYTE(ADDR, LOG2_ALIGNMENT) ( ADDR = ADDR | (LOG2_ALIGNMENT-1)) +#define fVELEM(WIDTH) ((fVECSIZE()*8)/WIDTH) +#define fVECLOGSIZE() (MAX_VEC_SIZE_LOGBYTES) +#define fVBUF_IDX(EA) (((EA) >> fVECLOGSIZE()) & 0xFF) +#define fREAD_VBUF(IDX,WIDX) READ_VBUF(IDX,WIDX) +#define fLOG_VBUF(IDX,VAL,WIDX) LOG_VBUF(IDX,VAL,WIDX) +#define fVECSIZE() (1<VRegs_updated & (((VRegMask)1)<future_VRegs[VNUM] : mmvec_zero_vector()) +#define fV_AL_CHECK(EA,MASK) if ((EA) & (MASK)) { warn("aligning misaligned vector. PC=%08x EA=%08x",thread->Regs[REG_PC],(EA)); } +#define fSCATTER_INIT( REGION_START, LENGTH, ELEMENT_SIZE) { mem_vector_scatter_init(thread, insn, REGION_START, LENGTH, ELEMENT_SIZE); if (EXCEPTION_DETECTED) return; } +#define fGATHER_INIT( REGION_START, LENGTH, ELEMENT_SIZE) { mem_vector_gather_init(thread, insn, REGION_START, LENGTH, ELEMENT_SIZE); if (EXCEPTION_DETECTED) return; } +#ifdef CONFIG_USER_ONLY +#define fSCATTER_FINISH(OP) +#define fGATHER_FINISH() +#else +#define fSCATTER_FINISH(OP) { if (EXCEPTION_DETECTED) return; mem_vector_scatter_finish(thread, insn, OP); } +#define fGATHER_FINISH() { if (EXCEPTION_DETECTED) return; mem_vector_gather_finish(thread, insn); } +#endif +#define CHECK_VTCM_PAGE(FLAG, BASE, LENGTH, OFFSET, ALIGNMENT) { int slot = insn->slot; paddr_t pa = thread->mem_access[slot].paddr+OFFSET; pa = pa & ~(ALIGNMENT-1); FLAG = (pa < (thread->mem_access[slot].paddr+LENGTH)); } +#define COUNT_OUT_OF_BOUNDS(FLAG, SIZE) { if (!FLAG) { THREAD2STRUCT->vtcm_log.oob_access += SIZE; warn("Scatter/Gather out of bounds of region"); } } +#define fLOG_SCATTER_OP(SIZE) { thread->vtcm_log.op = 1; thread->vtcm_log.op_size = SIZE; } +#define fVLOG_VTCM_GATHER_WORD(EA,OFFSET,IDX, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 4, IDX, 1); } +#define fVLOG_VTCM_GATHER_HALFWORD(EA,OFFSET,IDX, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, IDX, 1); } +#define fVLOG_VTCM_GATHER_HALFWORD_DV(EA,OFFSET,IDX,IDX2,IDX_H, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), 1); } +#define fVLOG_VTCM_GATHER_WORDQ(EA,OFFSET,IDX, Q, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 4, IDX, fGETQBIT(QsV,4*IDX+i0)); } +#define fVLOG_VTCM_GATHER_HALFWORDQ(EA,OFFSET,IDX, Q, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, IDX, fGETQBIT(QsV,2*IDX+i0)); } +#define fVLOG_VTCM_GATHER_HALFWORDQ_DV(EA,OFFSET,IDX,IDX2,IDX_H, Q, LEN) { GATHER_FUNCTION(EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), fGETQBIT(QsV,2*IDX+i0)); } +#define DEBUG_LOG_ADDR(OFFSET) { if (thread->processor_ptr->arch_proc_options->mmvec_network_addr_log2) { int slot = insn->slot; paddr_t pa = thread->mem_access[slot].paddr+OFFSET; } } +//#define SCATTER_OP_WRITE_TO_MEM(TYPE) { for (int i = 0; i < mmvecx->vtcm_log.size; i+=sizeof(TYPE)) { if ( mmvecx->vtcm_log.mask.ub[i] != 0) { TYPE dst = 0; TYPE inc = 0; for(int j = 0; j < sizeof(TYPE); j++) { dst |= (sim_mem_read1(thread->system_ptr, thread->threadId, mmvecx->vtcm_log.pa[i+j]) << (8*j)); inc |= mmvecx->vtcm_log.data.ub[j+i] << (8*j); mmvecx->vtcm_log.mask.ub[j+i] = 0; mmvecx->vtcm_log.data.ub[j+i] = 0; mmvecx->vtcm_log.offsets.ub[j+i] = 0; } dst += inc; for(int j = 0; j < sizeof(TYPE); j++) { sim_mem_write1(thread->system_ptr,thread->threadId, mmvecx->vtcm_log.pa[i+j], (dst >> (8*j))& 0xFF ); } } } } +#define fVLOG_VTCM_HALFWORD(EA,OFFSET,IN,IDX, LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, IDX, 1, IN); } +#define fVLOG_VTCM_WORD(EA,OFFSET,IN,IDX,LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 4, IDX, 1, IN); } +#define fVLOG_VTCM_HALFWORDQ(EA,OFFSET,IN,IDX,Q,LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, IDX, fGETQBIT(QsV,2*IDX+i0), IN); } +#define fVLOG_VTCM_WORDQ(EA,OFFSET,IN,IDX,Q,LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 4, IDX, fGETQBIT(QsV,4*IDX+i0), IN); } +#define fVLOG_VTCM_HALFWORD_DV(EA,OFFSET,IN,IDX,IDX2,IDX_H, LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), 1, IN); } +#define fVLOG_VTCM_HALFWORDQ_DV(EA,OFFSET,IN,IDX,Q,IDX2,IDX_H, LEN) { SCATTER_FUNCTION (EA,OFFSET,IDX, LEN, 2, (2*IDX2+IDX_H), fGETQBIT(QsV,2*IDX+i0), IN); } +#define fSTORERELEASE(EA,TYPE) { fV_AL_CHECK(EA,fVECSIZE()-1); mem_store_release(thread, insn, fVECSIZE(), EA&~(fVECSIZE()-1), EA, TYPE, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#define fVFETCH_AL(EA) { fV_AL_CHECK(EA,fVECSIZE()-1); mem_fetch_vector(thread, insn, EA&~(fVECSIZE()-1), slot, fVECSIZE()); } +#define fLOADMMV_AL(EA, ALIGNMENT, LEN, DST) { fV_AL_CHECK(EA,ALIGNMENT-1); /*thread->last_pkt->double_access_vec = 0;*/ mem_load_vector_oddva(thread, 0, EA&~(ALIGNMENT-1), EA, slot, LEN, &DST.ub[0], LEN, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#ifdef QEMU_GENERATE +#define fLOADMMV(EA, DST) gen_vreg_load(ctx, DST##_off, EA, true) +#else +#define fLOADMMV(EA, DST) fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST) +#endif +#define fLOADMMZ(EA,DST) { mmvector_t load_vec; fV_AL_CHECK(EA,fVECSIZE()-1); mem_load_vector_oddva(thread, 0, EA&~(fVECSIZE()-1), EA, slot, fVECSIZE(), &load_vec.ub[0], fVECSIZE(), fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); int idx = (EA & 0x80)>0; DST.v[idx] = load_vec; } +#define fLOADZ_LOAD(EA,EAU,WIDTH,DST) {/* thread->last_pkt->ext_slot_cancelled = 0; thread->last_pkt->double_access_vec = 0;*/ int etm_size = ((EA % width) ==0) ? fVECSIZE() : 0; if (thread->processor_ptr->options->testgen_mode) etm_size = ((EA % width) ==0) ? WIDTH : 0; mem_load_vector_oddva(thread, 0, EA, EAU, slot, WIDTH, &DST.ub[0], etm_size, fUSE_LOOKUP_ADDRESS()); } +#define fELSE_CANCELZ() else { /*if (thread->last_pkt) { thread->mem_access[slot].dropped_z = 1; thread->last_pkt->ext_slot_cancelled |= (1<processor_ptr)); } +#define fLOADMMVQ(EA,DST,QVAL) do { int __i; fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST); fVFOREACH(8,__i) if (!fGETQBIT(QVAL,__i)) DST.b[__i] = 0; } while (0) +#define fLOADMMVNQ(EA,DST,QVAL) do { int __i; fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST); fVFOREACH(8,__i) if (fGETQBIT(QVAL,__i)) DST.b[__i] = 0; } while (0) +#define fLOADMMVU_AL(EA, ALIGNMENT, LEN, DST) { size4u_t size2 = (EA)&(ALIGNMENT-1); size4u_t size1 = LEN-size2; /*thread->last_pkt->double_access_vec = 1;*/ mem_load_vector_oddva(thread, 0, EA+size1, EA+fVECSIZE(), 1, size2, &DST.ub[size1], size2, fUSE_LOOKUP_ADDRESS()); mem_load_vector_oddva(thread, 0, EA, EA, 0, size1, &DST.ub[0], size1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#ifdef QEMU_GENERATE +#define fLOADMMVU(EA, DST) gen_vreg_load(ctx, DST##_off, EA, false) +#else +#define fLOADMMVU(EA, DST) { /*thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0;*/ if ( (EA & (fVECSIZE()-1)) == 0) { /*thread->last_pkt->pkt_has_vmemu_access = 0; thread->last_pkt->double_access = 0;*/ fLOADMMV_AL(EA,fVECSIZE(),fVECSIZE(),DST); } else { /*thread->last_pkt->pkt_has_vmemu_access = 1; thread->last_pkt->double_access = 1;*/ fLOADMMVU_AL(EA,fVECSIZE(),fVECSIZE(),DST); } } +#endif +#define fSTOREMMV_AL(EA, ALIGNMENT, LEN, SRC) { fV_AL_CHECK(EA,ALIGNMENT-1); mem_store_vector_oddva(thread, 0, EA&~(ALIGNMENT-1), EA, slot, LEN, &SRC.ub[0], 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#ifdef QEMU_GENERATE +#define fSTOREMMV(EA, SRC) gen_vreg_store(ctx, EA, SRC##_off, insn->slot, true) +#else +#define fSTOREMMV(EA, SRC) fSTOREMMV_AL(EA,fVECSIZE(),fVECSIZE(),SRC) +#endif +#define fSTOREMMVQ_AL(EA, ALIGNMENT, LEN, SRC, MASK) do { mmvector_t maskvec; int i; for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i); mem_store_vector_oddva(thread, 0, EA&~(ALIGNMENT-1), EA, slot, LEN, &SRC.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } while (0) +#ifdef QEMU_GENERATE +#define fSTOREMMVQ(EA, SRC, MASK) \ + gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, false) +#else +#define fSTOREMMVQ(EA, SRC, MASK) fSTOREMMVQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK) +#endif +#define fSTOREMMVNQ_AL(EA, ALIGNMENT, LEN, SRC, MASK) { mmvector_t maskvec; int i; for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i); fV_AL_CHECK(EA,ALIGNMENT-1); mem_store_vector_oddva(thread, 0, EA&~(ALIGNMENT-1), EA, slot, LEN, &SRC.ub[0], &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#ifdef QEMU_GENERATE +#define fSTOREMMVNQ(EA, SRC, MASK) \ + gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, true) +#else +#define fSTOREMMVNQ(EA, SRC, MASK) fSTOREMMVNQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK) +#endif +#define fSTOREMMVU_AL(EA, ALIGNMENT, LEN, SRC) { size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1)); size4u_t size2; if (size1>LEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, 0, EA+size1, EA+fVECSIZE(), 1, size2, &SRC.ub[size1], 0, 0, fUSE_LOOKUP_ADDRESS()); mem_store_vector_oddva(thread, 0, EA, EA, 0, size1, &SRC.ub[0], 0, 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#ifdef QEMU_GENERATE +#define fSTOREMMVU(EA, SRC) \ + gen_vreg_store(ctx, EA, SRC##_off, insn->slot, false) +#else +#define fSTOREMMVU(EA, SRC) { /*thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0;*/ if ( (EA & (fVECSIZE()-1)) == 0) { /*thread->last_pkt->double_access = 0;*/ fSTOREMMV_AL(EA,fVECSIZE(),fVECSIZE(),SRC); } else { /*thread->last_pkt->double_access = 1; thread->last_pkt->pkt_has_vmemu_access = 1;*/ fSTOREMMVU_AL(EA,fVECSIZE(),fVECSIZE(),SRC); } } +#endif +#define fSTOREMMVQU_AL(EA, ALIGNMENT, LEN, SRC, MASK) { size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1)); size4u_t size2; mmvector_t maskvec; int i; for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i); if (size1>LEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, 0, EA+size1, EA+fVECSIZE(), 1, size2, &SRC.ub[size1], &maskvec.ub[size1], 0, fUSE_LOOKUP_ADDRESS()); mem_store_vector_oddva(thread, 0, EA, 0, size1, &SRC.ub[0], &maskvec.ub[0], 0, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#define fSTOREMMVQU(EA, SRC, MASK) { /*thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0;*/ if ( (EA & (fVECSIZE()-1)) == 0) { /*thread->last_pkt->double_access = 0;*/ fSTOREMMVQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK); } else { /*thread->last_pkt->double_access = 1; thread->last_pkt->pkt_has_vmemu_access = 1;*/ fSTOREMMVQU_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK); } } +#define fSTOREMMVNQU_AL(EA, ALIGNMENT, LEN, SRC, MASK) { size4u_t size1 = ALIGNMENT-((EA)&(ALIGNMENT-1)); size4u_t size2; mmvector_t maskvec; int i; for (i = 0; i < fVECSIZE(); i++) maskvec.ub[i] = fGETQBIT(MASK,i); if (size1>LEN) size1 = LEN; size2 = LEN-size1; mem_store_vector_oddva(thread, 0, EA+size1, EA+fVECSIZE(), 1, size2, &SRC.ub[size1], &maskvec.ub[size1], 1, fUSE_LOOKUP_ADDRESS()); mem_store_vector_oddva(thread, 0, EA, EA, 0, size1, &SRC.ub[0], &maskvec.ub[0], 1, fUSE_LOOKUP_ADDRESS_BY_REV(thread->processor_ptr)); } +#define fSTOREMMVNQU(EA, SRC, MASK) { /*thread->last_pkt->pkt_has_vtcm_access = 0; thread->last_pkt->pkt_access_count = 0;*/ if ( (EA & (fVECSIZE()-1)) == 0) { /*thread->last_pkt->double_access = 0;*/ fSTOREMMVNQ_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK); } else { /*thread->last_pkt->double_access = 1; thread->last_pkt->pkt_has_vmemu_access = 1;*/ fSTOREMMVNQU_AL(EA,fVECSIZE(),fVECSIZE(),SRC,MASK); } } +#define fVFOREACH(WIDTH, VAR) for (VAR = 0; VAR < fVELEM(WIDTH); VAR++) +#define fVARRAY_ELEMENT_ACCESS(ARRAY, TYPE, INDEX) ARRAY.v[(INDEX) / (fVECSIZE()/(sizeof(ARRAY.TYPE[0])))].TYPE[(INDEX) % (fVECSIZE()/(sizeof(ARRAY.TYPE[0])))] +#define fVNEWCANCEL(REGNUM) do { THREAD2STRUCT->VRegs_select &= ~(1<<(REGNUM)); } while (0) +#define fTMPVDATA() mmvec_vtmp_data(thread) +#define fVSATDW(U,V) fVSATW( ( ( ((long long)U)<<32 ) | fZXTN(32,64,V) ) ) +#define fVASL_SATHI(U,V) fVSATW(((U)<<1) | ((V)>>31)) +#define fVUADDSAT(WIDTH,U,V) fVSATUN( WIDTH, fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V)) +#define fVSADDSAT(WIDTH,U,V) ({size8s_t tmp5 = fSXTN(WIDTH, 2*WIDTH, U); size8s_t tmp6 = fSXTN(WIDTH, 2*WIDTH, V); size8s_t tmp7 = tmp5 + tmp6; fVSATN( WIDTH, tmp7); }) +#define fVUSUBSAT(WIDTH,U,V) fVSATUN( WIDTH, fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V)) +#define fVSSUBSAT(WIDTH,U,V) fVSATN( WIDTH, fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)) +#define fVAVGU(WIDTH,U,V) ((fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V))>>1) +#define fVAVGURND(WIDTH,U,V) ((fZXTN(WIDTH, 2*WIDTH, U) + fZXTN(WIDTH, 2*WIDTH, V)+1)>>1) +#define fVNAVGU(WIDTH,U,V) ((fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V))>>1) +#define fVNAVGURNDSAT(WIDTH,U,V) fVSATUN(WIDTH,((fZXTN(WIDTH, 2*WIDTH, U) - fZXTN(WIDTH, 2*WIDTH, V)+1)>>1)) +#define fVAVGS(WIDTH,U,V) ((fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V))>>1) +#define fVAVGSRND(WIDTH,U,V) ((fSXTN(WIDTH, 2*WIDTH, U) + fSXTN(WIDTH, 2*WIDTH, V)+1)>>1) +#define fVNAVGS(WIDTH,U,V) ((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V))>>1) +#define fVNAVGSRND(WIDTH,U,V) ((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)+1)>>1) +#define fVNAVGSRNDSAT(WIDTH,U,V) fVSATN(WIDTH,((fSXTN(WIDTH, 2*WIDTH, U) - fSXTN(WIDTH, 2*WIDTH, V)+1)>>1)) +#define fVNOROUND(VAL,SHAMT) VAL +#define fVNOSAT(VAL) VAL +#define fVROUND(VAL,SHAMT) ((VAL) + (((SHAMT)>0)?(1LL<<((SHAMT)-1)):0)) +#define fCARRY_FROM_ADD32(A,B,C) (((fZXTN(32,64,A)+fZXTN(32,64,B)+C) >> 32) & 1) +#define fUARCH_NOTE_PUMP_4X() +#define fUARCH_NOTE_PUMP_2X() +#define UNLIKELY(X) __builtin_expect((X), 0) +#define fVDOCHKPAGECROSS(BASE,SUM) if (UNLIKELY(thread->timing_on)) { thread->mem_access[slot].check_page_crosses = 1; thread->mem_access[slot].page_cross_base = BASE; thread->mem_access[slot].page_cross_sum = SUM; } +#define fPARSEQF32(A) parse_qf32(A) +#define fRNDSATQF32(A,B,C) rnd_sat_qf32(A,B,C) +#define fPARSEQF16(A) parse_qf16(A) +#define fRNDSATQF16(A,B,C) rnd_sat_qf16(A,B,C) +#define fPARSESF(A) parse_sf(A) +#define fRNDSATSF(A,B) rnd_sat_sf(A,B) +#define fPARSEHF(A) parse_hf(A) +#define fRNDSATHF(A,B) rnd_sat_hf(A,B) +#define fRNDSATW(A,B) rnd_sat_w(A,B) +#define fRNDSATUW(A,B) rnd_sat_uw(A,B) +#define fRNDSATH(A,B) rnd_sat_h(A,B) +#define fRNDSATUH(A,B) rnd_sat_uh(A,B) +#define fRNDSATB(A,B) rnd_sat_b(A,B) +#define fRNDSATUB(A,B) rnd_sat_ub(A,B) +#define fNEGQF32(A) negate32(A) +#define fNEGQF16(A) negate16(A) +#define fNEGSF(A) negate_sf(A) +#define fNEGHF(A) negate_hf(A) +#define fCMPGT_QF32(A,B) cmpgt_qf32(A,B) +#define fCMPGT_QF16(A,B) cmpgt_qf16(A,B) +#define fCMPGT_SF(A,B) cmpgt_sf(A,B) +#define fCMPGT_HF(A,B) cmpgt_hf(A,B) +#define fCMPGT_BF(A,B) cmpgt_sf(((int)A) << 16,((int)B) << 16) +#define fCMPGT_QF32_SF(A,B) cmpgt_qf32_sf(A,B) +#define fCMPGT_QF16_HF(A,B) cmpgt_qf16_hf(A,B) +#define fMAX_QF32(X,Y) max_qf32(X,Y) +#define fMIN_QF32(X,Y) min_qf32(X,Y) +#define fMAX_QF32_SF(X,Y) max_qf32_sf(X,Y) +#define fMIN_QF32_SF(X,Y) min_qf32_sf(X,Y) +#define fMAX_QF16(X,Y) max_qf16(X,Y) +#define fMIN_QF16(X,Y) min_qf16(X,Y) +#define fMAX_QF16_HF(X,Y) max_qf16_hf(X,Y) +#define fMIN_QF16_HF(X,Y) min_qf16_hf(X,Y) +#define fMAX_SF(X,Y) max_sf(X,Y) +#define fMIN_SF(X,Y) min_sf(X,Y) +#define fMAX_HF(X,Y) max_hf(X,Y) +#define fMIN_HF(X,Y) min_hf(X,Y) + +#define fSTOREDOUBLEMMV(EA, SRC) fSTOREMMV_AL(EA,fVECSIZE(),2*fVECSIZE(),SRC) +#endif diff --git a/target/hexagon/mmvec/mmvec.h b/target/hexagon/mmvec/mmvec.h index 52d470709c02..906bf16d8258 100644 --- a/target/hexagon/mmvec/mmvec.h +++ b/target/hexagon/mmvec/mmvec.h @@ -38,6 +38,11 @@ typedef union { int16_t h[MAX_VEC_SIZE_BYTES / 2]; uint8_t ub[MAX_VEC_SIZE_BYTES / 1]; int8_t b[MAX_VEC_SIZE_BYTES / 1]; + int32_t qf32[MAX_VEC_SIZE_BYTES / 4]; + int16_t qf16[MAX_VEC_SIZE_BYTES / 2]; + int32_t sf[MAX_VEC_SIZE_BYTES / 4]; + int16_t hf[MAX_VEC_SIZE_BYTES / 2]; + int16_t bf[MAX_VEC_SIZE_BYTES / 2]; } MMVector; typedef union { diff --git a/target/hexagon/mmvec/mmvec_qfloat.c b/target/hexagon/mmvec/mmvec_qfloat.c new file mode 100644 index 000000000000..060ac4b14d8f --- /dev/null +++ b/target/hexagon/mmvec/mmvec_qfloat.c @@ -0,0 +1,2563 @@ +/* + * Copyright(c) 2019-2020 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#if !defined(__clang__) +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" +#endif + +#include "qemu/osdep.h" +#include "mmvec_qfloat.h" +#include + +#define UNUSED(var) do { (void)var; } while (0) + +//Take one's complement of the mantissa for QF32 +size4s_t negate32(size4s_t in) +{ + size4s_t out; + out = in>>8; + out = ~out; + out = (out<<8) | (in & 0xFF); + return out; +} +//Take one's complement of the mantissa for QF16 +size2s_t negate16(size2s_t in) +{ + size2s_t out; + out = in>>5; + out = ~out; + out = (out<<5) | (in & 0x1F); + return out; +} +//Change sign for SF +size4s_t negate_sf(size4s_t in) +{ + size4s_t out; + int sign; + sign = (in>>31) & 1; + sign = ~sign; + out = (sign<<31) | (in & 0x7FFFFFFF); + return out; +} +//Change sign for SF +size2s_t negate_hf(size2s_t in) +{ + size2s_t out; + int sign; + sign = (in>>15) & 1; + sign = ~sign; + out = (sign<<15) | (in & 0x7FFF); + return out; +} +unfloat parse_qf16(size2s_t in) +{ + unfloat out; + + out.sign = (in>>15) & 0x1; + + out.exp = (size1s_t)(0x00 | (in & 0x1F)); + out.exp = out.exp - BIAS_QF16; + + /*implied LSB=1*/ + size2s_t signif; + /*take signif and sign extend, add LSB=1*/ + signif= ((size4s_t)in >> 4) | 1; + + out.sig = (double)signif * epsilon_hf; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF16_parse]in=%x, exp=%d, sig=%10.20f\n", in,out.exp,out.sig); + printf("[ARCH_QF16_parse]exp_d=%d, sig_d=%10.20f\n", ilogb(out.sig),ldexp(out.sig, -ilogb(out.sig))); +#endif + return out; +} +//Take signed int and generate sign, exp and ***signed sig +unfloat parse_qf32(size4s_t in) +{ + unfloat out; + + out.sign = (in>>31) & 0x1; + + out.exp = (size2s_t)(0x0000 | (in & 0xFF)); + out.exp = out.exp - BIAS_QF32; + + /*implied LSB=1*/ + size4s_t signif; + /*take signif and sign extend, add LSB=1*/ + signif= ((size8s_t)in >> 7) | 1; + + out.sig = (double)signif * epsilon; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_parse]in=%x, exp=%d, sig=%10.20f\n", in,out.exp,out.sig); + printf("[ARCH_QF32_parse]exp_d=%d, sig_d=%10.20f\n", ilogb(out.sig),ldexp(out.sig, -ilogb(out.sig))); +#endif + return out; +} + +unfloat parse_hf(size2s_t in) +{ + unfloat out; + + out.sign = (in>>15) & 0x1; + out.exp = (size1s_t)( (0x00 | (in>>10)) & 0x1F); + + size2u_t sig; + //take signif and sign extend + sig = (size2u_t)(in & 0x3FF); + + /*implied MSB=1*/ + if(out.exp>0) + sig = (1<<10) | sig; + + out.exp = out.exp - BIAS_HF; + if(out.exp>31) & 0x1; + out.exp = (size2s_t)( (0x0000 | (in>>23)) & 0xFF); + + size4u_t sig; + //take signif and sign extend + sig = (size4u_t)(in & 0x7FFFFF); + + /*implied MSB=1*/ + if(out.exp>0) + sig = (1<<23) | sig; + + out.exp = out.exp - BIAS_SF; + + if(out.exp=0.0)? 0:1; + +#ifndef DEBUG_MMVEC_QF + UNUSED(R_low); +#endif + + int prod_ovf=0; + if(fabs(sig)>=2.0L && sig != -2.0L) + prod_ovf = 1; + + int E_MIN=E_MIN_QF32; + int E_MAX=E_MAX_QF32; + int BIAS=BIAS_QF32; + double _epsilon=epsilon; + double _units=units; + if(ft==QF32) + { + E_MIN = E_MIN_QF32; + E_MAX = E_MAX_QF32; + BIAS = BIAS_QF32; + _epsilon = epsilon; + _units= units; + } + else if(ft==QF16) + { + E_MIN = E_MIN_QF16; + E_MAX = E_MAX_QF16; + BIAS = BIAS_QF16; + _epsilon = epsilon_hf; + _units= units_hf; + } + else if(ft==SF) + { + E_MIN = E_MIN_SF; + E_MAX = E_MAX_SF; + BIAS = BIAS_SF; + _epsilon = epsilon; + _units= units; + } + else if(ft==HF) + { + E_MIN = E_MIN_HF; + E_MAX = E_MAX_HF; + BIAS = BIAS_HF; + _epsilon = epsilon_hf; + _units= units_hf; + } + + //Set scale factor + if((exp == (E_MIN-1)) || (prod_ovf && (exp0.0) + R_low = 0.25; + else if(sig_low<0.0) + R_low = -0.25; + else + R_low = 0; + + //R2 = floor((R1+R_low)/4.0)*4.0; + //R3 = (R1+R_low) - R2; + R2 = floor(R1/4.0)*4.0; + R3 = R1 - R2; + + //Check for exp overflow/underflow + if(exp>=(E_MAX+1) || (prod_ovf && exp==E_MAX)) + { + exp_ovf=1; + } + else if(exp<=(E_MIN-2)) + { + exp_undf=1; + } + else if(exp == E_MAX)//exp=E_MAX + { + //if(R3-2.0)+sig_low<=0.0 + if((R3==0.0) && (sig_low<0.0)) + { + sig_f = sig_s + (3.0-R3-4.0)*_epsilon; + } + else if((R3<2.0) || (R3==2.0 && sig_low<=0.0)) + //if(R3<=2.0) + { + sig_f = sig_s + (1.0-R3)*_epsilon; + } + else + { + sig_f = sig_s + (3.0-R3)*_epsilon; + } + } + else if(exp == (E_MIN-1)) + { + exp_adj = 1; + if((R3==0.0) && (sig_low<0.0)) + { + sig_f = sig_s + (3.0-R3-4.0)*_epsilon; + } + else if((R3<2.0) || (R3==2.0 && sig_low<=0.0)) + //if(R3<=2.0) + { + sig_f = sig_s + (1.0-R3)*_epsilon; + } + else + { + sig_f = sig_s + (3.0-R3)*_epsilon; + } + } + else if(prod_ovf && (exp < E_MAX)) + { + exp_adj = 1; + if((R3==0.0) && (sig_low<0.0)) + { + sig_f = sig_s + (3.0-R3-4.0)*_epsilon; + } + else if((R3<2.0) || (R3==2.0 && sig_low<=0.0)) + //if(R3<=2.0) + { + sig_f = sig_s + (1.0-R3)*_epsilon; + } + else + { + sig_f = sig_s + (3.0-R3)*_epsilon; + } + } + else if(!prod_ovf) + { + if((R3==0.0) && (sig_low<0.0)) + { + sig_f = sig_s + (3.0-R3-4.0)*_epsilon; + } + else if((R3<1.5) || (R3==1.5 && sig_low<=0.0)) + //if(R3<=1.5) + { + sig_f = sig_s + (1.0-R3)*_epsilon; + } + //else if(R3<=2.5) + else if((R3<2.5) || (R3==2.5 && sig_low<=0.0)) + { + sig_f = (sig + (2.0-R3)*_epsilon)*0.5; + exp_adj=1; + } + else + { + sig_f = sig_s + (3.0-R3)*_epsilon; + } + } + //get the binary bits from the double-precision significand + //Either sig is positive or negative, IEEE double sig has magnitude + //Check for sign at the last stage and take 2's complement if negative + uint64_t sig_64_org, sig_64; + sig_64_org = *(uint64_t *)&sig_f; + sig_64 = sig_64_org; + uint32_t sig_32=0; + int32_t sig_32_out=0; + + int exp_df; + + exp_df = (sig_64_org >> 52) & 0x7FF; + exp_df = exp_df - BIAS_DF; + + if(exp_ovf) + { + exp=E_MAX+BIAS; + if(ft==QF32 || ft==SF) + sig_32 = (sign-1) & 0x7FFFFF; + else if(ft==QF16 || ft==HF) + sig_32 = (sign-1) & 0x3FF; + } + else if(exp_undf) + { + exp=E_MIN+BIAS; + if(ft==QF32 || ft==SF) + sig_32 = ((-1)*sign) & 0x7FFFFF; + else if(ft==QF16 || ft==HF) + sig_32 = ((-1)*sign) & 0x3FF; + } + else + { + exp += BIAS+exp_adj; + //Add MSB, generates 53bits (52+1) + sig_64 = (sig_64_org & 0xFFFFFFFFFFFFF) | 0x10000000000000; + //Shift out exponent 11 bits + sig_64 = sig_64<<11; + sig_64 = (exp_df>=0)? (sig_64 << exp_df):(sig_64>>abs(exp_df)); + if(ft==QF32) + { + sig_64 = sig_64 >> 41; + sig_32 = sig_64 & 0x7FFFFF; + } + else if(ft==QF16) + { + sig_64 = sig_64 >> 54; + sig_32 = sig_64 & 0x3FF; + } + + if(sign) + sig_32 = ~sig_32; + } + + sig_32_out = (sign<<23) | sig_32; + + if(ft==QF16 ||ft==HF) + sig_32_out = (sign<<10) | sig_32; + + + if( (ft ==QF16) || (ft==QF32)) { + if ((sig == 0.0) && (sig_low == 0.0)) { + exp = 0; + //printf("Squash to zero!\n"); + } + + } + + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF_rnd_sat]sign=%d exp_in=%d sig=%10.30f sig_low=%10.30f\n",sign, *exp_in, sig, sig_low); + printf("[ARCH_QF_rnd_sat]sig_s=%10.30f, sig_f=%10.30f\n",sig_s, sig_f); + printf("[ARCH_QF_rnd_sat]prod_ovf=%d exp_adj=%d exp_ovf=%d exp_undf=%d\n",prod_ovf,exp_adj, exp_ovf, exp_undf); + printf("[ARCH_QF_rnd_sat]sig_64_org=%lx sig_64=%lx sig_32=%x exp_df=%d exp=%d\n",sig_64_org, sig_64, sig_32, exp_df, exp); + printf("[ARCH_QF_rnd_sat]R1=%10.30f R_low=%1.128f R2=%10.30f R3=%10.30f eps=%10.30f\n",R1,R_low,R2,R3,_epsilon); + + double final = ldexp(sig_f, (exp-BIAS)); + printf("[ARCH_QF_norm] sig_f:%10.30f, exp-BIAS:%d, ldexp:%10.128f \n",sig_f, exp-BIAS, final); + printf("[ARCH_QF_norm] sig_32_out:%x, exp:%x \n",sig_32_out, exp); +#endif + + *exp_in = exp; + return sig_32_out; +} + +//size4s_t rnd_sat_qf32(int sign, int exp, double sig, double sig_low) +size4s_t rnd_sat_qf32(int exp, double sig, double sig_low) +{ + + //size4u_t sig_32=rnd_sat_qf_sig(sign, &exp, sig, sig_low, QF32); + //size4u_t sig_32=rnd_sat_qf_sig(&exp, sig, sig_low, QF32); + size4s_t sig_32=rnd_sat_qf_sig(&exp, sig, sig_low, QF32); + + size4s_t result; + //result = (sign<<31) | (sig_32 <<8) | (exp & 0xFF); + result = (sig_32 <<8) | (exp & 0xFF); + + return result; +} + + +size4u_t get_ieee_sig(int *exp, double sig, f_type ft); +size4u_t get_ieee_sig(int *exp, double sig, f_type ft) +{ + //Extract bits from double precision significand + uint64_t sig_64_org=0, sig_52=0, sig_53=0; + double value = 0.0; + int exp_d=0, exp_org=*exp; + int E_MIN; + E_MIN = (ft==SF)? E_MIN_SF: E_MIN_HF; + double _epsilon; + _epsilon = (ft==SF)? epsilon: epsilon_hf; + uint32_t sig_32=0; + size4s_t signif=0; + //int sign = (sig>=0.0)? 0:1; + + value = ldexp(sig, exp_org); + + sig_64_org = *(uint64_t *)&value; + exp_d = (sig_64_org >> 52) & 0x7FF; + exp_d = exp_d - BIAS_DF; + sig_52 = (sig_64_org & 0xFFFFFFFFFFFFF); + sig_53 = sig_52 | 0x10000000000000; + + //Check if exp is one less than the MIN + //shifting right the excess amount of bits from E_MIN + int shift = E_MIN - exp_d; + + int lsb =0; + int rem =0; + int sticky =0; + int sig_f =0; +#ifndef DEBUG_MMVEC_QF + UNUSED(lsb); + UNUSED(rem); + UNUSED(sticky); + UNUSED(sig_f); + UNUSED(_epsilon); +#endif + + if(exp_d <= (E_MIN-1)) + { + sig_53 = sig_53 >> shift; + } + + if(shift >=53) + sig_53=0; + + double R1, R2, R3; + if(ft==SF) + { + signif = sig_53 >> 29; + sig_32 = signif & 0x7FFFFF; + + lsb = signif & 1; + rem = (sig_53 >>28) & 1; + sticky = (sig_53 & 0xFFFFFFF)? 1:0; + + R1 = sig_53/pow(2,29); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + + if(fabs(value) >= SF_MAX) + { + //sig_32 = (1-sign)*0x7FFFFF; + sig_32 = 0x7FFFFF; + } + else if((R3>0.5 && R3<1.0) || (R3>=1.5)) + { + if(sig_32 == 0x7FFFFF) + { + sig_32 = 0; + exp_d = exp_d +1; + } + else + sig_32 = sig_32 +1; + } + sig_f = 0x800000 | (sig_32 & 0x7FFFFF); + } + else + { + signif = sig_53 >> 42; + sig_32 = signif & 0x3FF; + + lsb = signif & 1; + rem = (sig_53 >> 41) & 1; + sticky = (sig_53 & 0x1FFFFFFFFFF)? 1:0; + + R1 = sig_53/pow(2,42); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + + //if((rem==1 && sticky==1) || (lsb==1 && rem==1)) + if(fabs(value) >= HF_MAX) + { + //sig_32 = (1-sign)*0x3FF; + sig_32 = 0x3FF; + } + else if((R3>0.5 && R3<1.0) || (R3>=1.5)) + { + if(sig_32 == 0x3FF) + { + sig_32 = 0; + exp_d = exp_d +1; + } + else + sig_32 = sig_32 +1; + } + sig_f = 0x400 | (sig_32 & 0x3FF); + + } + + if(sig ==0.0 && exp_org == (E_MIN-1)) + { + sig_64_org = 0; + exp_d = 0; + sig_32=0; + sig_f =0; + } + *exp = exp_d; + + + +#ifdef DEBUG_MMVEC_QF + int sign = (sig>=0.0)? 0: 1; + double param = (double)sig_f*_epsilon; + if(sign) param = (-1.0)*param; + int exp_f = (exp_d<=E_MIN-1)? E_MIN: exp_d; + double final = ldexp(param, exp_f); + int exp_1 = (value != 0.0)? ilogb(value): 0; + int exp_2 = (exp_1 > E_MIN)? exp_1: E_MIN; + double sig_1 = ldexp(value, exp_1-exp_2); + + printf("[IEEE_sig]exp_1=%d, exp_2=%d, sig_1=%10.20f\n",exp_1,exp_2,sig_1); + printf("[IEEE_sig]exp_org=%d, sig=%10.20f, value=%10.20f, shift=%d\n",exp_org, sig, value, shift); + printf("[IEEE_sig]sign=%d exp_d=%d sig_64_org=%lx sig_52=%lx sig_53=%lx sig_32=%x signif=%x sig_f=%x\n",sign, exp_d, sig_64_org, sig_52, sig_53, sig_32, signif, sig_f); + printf("[IEEE_sig]lsb=%d, rem=%d, sticky=%d\n",lsb, rem, sticky); + printf("[IEEE_sig] param:%10.20f, exp_d:%d, exp_f:%d, ldexp:%10.20f \n",param, exp_d, exp_f, final); + printf("[IEEE_sig]R1=%lf, R2=%lf, R3=%lf\n",R1, R2, R3); +#endif + + return sig_32; +} + +size2s_t rnd_sat_hf_rint(int exp_in, double sig_in); +size2s_t rnd_sat_hf_rint(int exp_in, double sig_in) +{ + // normalize and decompose again limiting to EMIN of target + double val=0.0; + double den=0.0; + double sig=0.0; + double mant=0.0; + int exp=0, exp_d=0, exp_ub=0; + size2s_t result=0; + + val = ldexp(sig_in, exp); // normalize - convert to simple float (double) + exp_d = (val != 0.0)? ilogb(val): 0; + exp_ub = (exp_d> E_MIN_HF)? exp_d: E_MIN_HF; // EMIN=-14 for fp16 + den = ldexp(val, -exp_ub); // denormalized if we hit EMIN + int sign = (sig<0)? 1:0; + sig = fabs(den); + // round to final mantissa + mant = rint(ldexp(sig, FRAC_HF)); // FRAC=10 for fp16; RNE + // post-round exponent adjust + exp = exp_ub + BIAS_HF; // BIAS=15 for fp16 + // -1 for -1.0 (denorm) or +1 for >=2.0 (round up to next exponent) + int exp_mant = (mant != 0.0)? ilogb(mant): 0; + int exp_adj = (exp_mant-FRAC_HF > -1)? (exp_mant - FRAC_HF): -1; + exp = exp - exp_adj; + // overflow + if (exp>E_MAX_HF) { // +16 for fp16 w/o inf/nan + exp = E_MAX_HF; + mant = -1; + } + // final result// better to use a struct for fp16 instead +// result = (mant&((1<=0.0)? 0:1; + //size4u_t sig_32=0;//rnd_sat_ieee_sig(&exp, sig, sig_low, SF); + size4u_t sig_32 = get_ieee_sig(&exp, sig, HF); + + //exp is unbiased + size2s_t result; + if(exp==(E_MIN_HF-1) && sig==0.0) + { + result = 0; + } + else if(exp > E_MAX_HF) + { + result = (sign<<15) | (0x1F << 10) | 0x3FF; + } + //else if((exp < E_MIN_HF-11) ||((exp == E_MIN_HF-11) && (sig_32 ==0))) + //{ + // result = (sign<<15); + //} + else + { + exp = exp + BIAS_HF; + if(exp < 0) + exp = 0; + else if(exp > 31) + exp = 31; + result = (sign<<15) | ((exp & 0x1F) << 10) | sig_32; + } + + + return result; +} + + +//Take signed sig, produce normalized ieee sf output +size4s_t rnd_sat_sf(int exp, double sig) +{ + + int sign = (sig>=0.0)? 0: 1; + size4u_t sig_32 = get_ieee_sig(&exp, sig, SF); + + size4s_t result; + + if(exp==0 && sig==0.0) + { + result = 0; + } + else + { + exp = exp + BIAS_SF; + if(exp < 0) + exp = 0; + else if(exp > 255) + exp = 255; + result = (sign<<31) | ((exp & 0xFF)<< 23) | (sig_32 & 0x7FFFFF); + } + + return result; +} + +//size2s_t rnd_sat_qf16(int sign, int exp_ab, double sig, double sig_low) +size2s_t rnd_sat_qf16(int exp_ab, double sig, double sig_low) +{ + int exp=exp_ab; + + + //size4u_t sig_32=rnd_sat_qf_sig(&exp, sig, sig_low, QF16); + //printf("sig low=%f sig=%f\n", sig, sig_low); + size4s_t sig_32=rnd_sat_qf_sig(&exp, sig, sig_low, QF16); + + size2s_t result; + result = (sig_32<<5) | (exp & 0x1F); + //result = (sign_ab<<15) | (sig_16<<5) | (exp_ab & 0x1F); + + return result; +} + +size4s_t mpy_qf32(size4s_t in_a, size4s_t in_b ) { + size2s_t exp; + double sig; + + unfloat a, b; + + //Get double precision significands and unbiased exp + a = parse_qf32(in_a); + b = parse_qf32(in_b); + + //Unbiased: after removing bias + exp = a.exp + b.exp; + sig = a.sig * b.sig; + + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_pre_rnd] a.sig:%10.20f, b.sig:%10.20f, sig:%10.20f, ilogb(sig):%d, exp:%d\n", a.sig, b.sig, sig, ilogb(sig), exp); +#endif + + size4s_t result; + //result = rnd_sat_qf32(sign, exp_ab, sig_ab, 0.0); + result = rnd_sat_qf32(exp, sig, 0.0); + + return result; +} + +size4s_t mpy_qf32_sf(size4s_t in_a, size4s_t in_b ) { + int sign; + size2s_t exp; + double sig; + unfloat a, b; + + //Get double precision significands and unbiased exp + a = parse_sf(in_a); + b = parse_sf(in_b); + + //Unbiased: after removing bias + sign = a.sign ^ b.sign; + exp = a.exp + b.exp; + sig = a.sig * b.sig; + + size4s_t result; + result = rnd_sat_qf32(exp, sig, 0.0); + if(sign) result = negate32(result); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_SF_parse]sign:%d, a.sig:%10.20f, b.sig:%10.20f, sig:%10.20f exp:%d\n",sign, a.sig, b.sig, sig, exp); +#endif + return result; +} + +size4s_t mpy_qf32_mix_sf(size4s_t in_a, size4s_t in_b ) { + size2s_t exp; + double sig; + unfloat a, b; + + //Get double precision significands and unbiased exp + a = parse_qf32(in_a); + b = parse_sf(in_b); + + //Unbiased: after removing bias + exp = a.exp + b.exp; + sig = a.sig * b.sig; + + size4s_t result; + result = rnd_sat_qf32(exp, sig, 0.0); + if(b.sign) result = negate32(result); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_SF_parse]a.sign:%d, a.sig:%10.20f, b.sign:%d, b.sig:%10.20f, sig:%10.20f exp:%d\n",a.sign, a.sig, b.sign, b.sig, sig, exp); +#endif + return result; +} + +//QF32 output out of two QF16 muls +size8s_t mpy_qf32_qf16(size4s_t in_a, size4s_t in_b ) { + + double sig_0, sig_1; + int exp_0, exp_1; + + unfloat u0,u1,v0,v1; + + u0 = parse_qf16((in_a & 0xFFFF)); + u1 = parse_qf16(((in_a>>16) & 0xFFFF)); + v0 = parse_qf16((in_b & 0xFFFF)); + v1 = parse_qf16(((in_b>>16) & 0xFFFF)); + + //Unbiased: after removing bias + exp_0 = u0.exp + v0.exp; + exp_1 = u1.exp + v1.exp; + sig_0 = u0.sig * v0.sig; + sig_1 = u1.sig * v1.sig; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_QF16_parse]u0.exp:%d, u0.sig:%10.20f, v0.exp:%d, v0.sig:%10.20f, sig_0:%10.20f exp_0:%d\n", u0.exp, u0.sig, v0.exp, v0.sig, sig_0, exp_0); + printf("[ARCH_QF32_QF16_parse]u1.exp:%d, u1.sig:%10.20f, v1.exp:%d, v1.sig:%10.20f, sig_1:%10.20f exp_1:%d\n", u1.exp, u1.sig, v1.exp, v1.sig, sig_1, exp_1); +#endif + + size4s_t result_0, result_1; + size8s_t result; + result_0 = rnd_sat_qf32(exp_0, sig_0, 0.0); + result_1 = rnd_sat_qf32(exp_1, sig_1, 0.0); + + result = ((size8s_t)result_1 <<32) | (result_0 &0xFFFFFFFF); +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_QF16_norm]result_1:%x, result_0:%x, result:%llx\n",result_1, result_0, result); +#endif + + return result; +} + +//QF32 output out of two HF muls +size8s_t mpy_qf32_hf(size4s_t in_a, size4s_t in_b ) { + + double sig_0, sig_1; + int exp_0, exp_1; + + unfloat u0,u1,v0,v1; + + u0 = parse_hf((in_a & 0xFFFF)); + u1 = parse_hf(((in_a>>16) & 0xFFFF)); + v0 = parse_hf((in_b & 0xFFFF)); + v1 = parse_hf(((in_b>>16) & 0xFFFF)); + + //Unbiased: after removing bias + exp_0 = u0.exp + v0.exp; + exp_1 = u1.exp + v1.exp; + sig_0 = u0.sig * v0.sig; + sig_1 = u1.sig * v1.sig; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_HF_parse]u0.exp:%d, u0.sig:%10.20f, v0.exp:%d, v0.sig:%10.20f, sig_0:%10.20f exp_0:%d\n", u0.exp, u0.sig, v0.exp, v0.sig, sig_0, exp_0); + printf("[ARCH_QF32_HF_parse]u1.exp:%d, u1.sig:%10.20f, v1.exp:%d, v1.sig:%10.20f, sig_1:%10.20f exp_1:%d\n", u1.exp, u1.sig, v1.exp, v1.sig, sig_1, exp_1); +#endif + size4s_t result_0, result_1; + size8s_t result; + result_0 = rnd_sat_qf32(exp_0, sig_0, 0.0); + result_1 = rnd_sat_qf32(exp_1, sig_1, 0.0); + + if(u0.sign ^ v0.sign) + result_0 = negate32(result_0); + + if(u1.sign ^ v1.sign) + result_1 = negate32(result_1); + + result = ((size8s_t)result_1 <<32) | (result_0 & 0xFFFFFFFF); +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_HF_norm]result_1:%x, result_0:%x, result:%llx\n",result_1, result_0, result); +#endif + + return result; +} + +//QF32 output out of mix of QF16 and HF muls +size8s_t mpy_qf32_mix_hf(size4s_t in_a, size4s_t in_b ) { + + double sig_0, sig_1; + int exp_0, exp_1; + + unfloat u0,u1,v0,v1; + + u0 = parse_qf16((in_a & 0xFFFF)); + u1 = parse_qf16(((in_a>>16) & 0xFFFF)); + v0 = parse_hf((in_b & 0xFFFF)); + v1 = parse_hf(((in_b>>16) & 0xFFFF)); + + //Unbiased: after removing bias + exp_0 = u0.exp + v0.exp; + exp_1 = u1.exp + v1.exp; + sig_0 = u0.sig * v0.sig; + sig_1 = u1.sig * v1.sig; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_mix_hf_parse]u0.exp:%d, u0.sig:%10.20f, v0.exp:%d, v0.sig:%10.20f, sig_0:%10.20f exp_0:%d\n", u0.exp, u0.sig, v0.exp, v0.sig, sig_0, exp_0); + printf("[ARCH_QF32_mix_hf_parse]u1.exp:%d, u1.sig:%10.20f, v1.exp:%d, v1.sig:%10.20f, sig_1:%10.20f exp_1:%d\n", u1.exp, u1.sig, v1.exp, v1.sig, sig_1, exp_1); +#endif + + size4s_t result_0, result_1; + size8s_t result; + result_0 = rnd_sat_qf32(exp_0, sig_0, 0.0); + result_1 = rnd_sat_qf32(exp_1, sig_1, 0.0); + + if(v0.sign) + result_0 = negate32(result_0); + if(v1.sign) + result_1 = negate32(result_1); + + result = ((size8s_t)result_1 <<32) | (result_0 & 0xFFFFFFFF); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF32_mix_hf_norm]result_1:%x, result_0:%x, result:%llx\n",result_1, result_0, result); +#endif + + return result; +} + +/* VMPY_QF16 */ +//ITERATOR_INSN_MPY_SLOT(16,vmpy_qf16,"Vd32.qf16=vmpy(Vu32.qf16,Vv32.qf16)", +//"Vector multiply of qf16 format", +size2s_t mpy_qf16(size2s_t in_a, size2s_t in_b ) { + size1s_t exp; + double sig; + + unfloat a, b; + + //Get double precision significands and unbiased exp + a = parse_qf16(in_a); + b = parse_qf16(in_b); + + //Unbiased: after removing bias + exp = a.exp + b.exp; + sig = a.sig * b.sig; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_QF16_parse] a.exp:%d, a.sig:%10.20f, b.exp:%d, b.sig:%10.20f, sig:%10.20f exp:%d\n", a.exp, a.sig, b.exp, b.sig, sig, exp); +#endif + + size2s_t result; + result = rnd_sat_qf16(exp, sig, 0.0); + + return result; +} + +size2s_t mpy_qf16_hf(size2s_t in_a, size2s_t in_b ) { + int sign; + size2s_t exp; + double sig; + + unfloat a, b; + + //Get double precision significands and unbiased exp + a = parse_hf(in_a); + b = parse_hf(in_b); + + //Unbiased: after removing bias + exp = a.exp + b.exp; + sig = a.sig * b.sig; + sign = a.sign^b.sign; + + size2s_t result; + result = rnd_sat_qf16(exp, sig, 0.0); + if(sign) result = negate16(result); +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_HF_parse]a.exp:%d, a.sig:%10.20f, b.exp:%d, b.sig:%10.20f, sig:%10.20f exp:%d\n",a.exp, a.sig, b.exp, b.sig, sig, exp); +#endif + + return result; +} + +size2s_t mpy_qf16_mix_hf(size2s_t in_a, size2s_t in_b ) { + size2s_t exp; + double sig; + unfloat a, b; + + //Get double precision significands and unbiased exp + a = parse_qf16(in_a); + b = parse_hf(in_b); + + //Unbiased: after removing bias + exp = a.exp + b.exp; + sig = a.sig * b.sig; + + size2s_t result; + result = rnd_sat_qf16(exp, sig, 0.0); + if(b.sign) result = negate16(result); +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_HF_parse]a.exp:%d, a.sig:%10.20f, b.exp:%d, b.sig:%10.20f, sig:%10.20f exp:%d\n",a.exp, a.sig, b.exp, b.sig, sig, exp); +#endif + + return result; +} + +size4s_t add_qf32(size4s_t in_a, size4s_t in_b ) { + size2s_t exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_qf32(in_a); + b = parse_qf32(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig)); + if(exp_abb.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + //sig_low = (b.sign)? (-1.0*epsilon): epsilon; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_add_qf32] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_add_qf32] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); +#endif + + size4s_t result; + + result = rnd_sat_qf32(exp_ab, sig_ab, sig_low); + + return result; +} + + +size4s_t add_sf(size4s_t in_a, size4s_t in_b ) { + size2s_t exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_sf(in_a); + b = parse_sf(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig)); + if(exp_abb.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + } + else if(a.sign==0 && b.sign==1) + { + sig_ab = sig_a - sig_b; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + } + else// if(a.sign==1 && b.sign==0) + { + sig_ab = sig_b - sig_a; + sig_low = (b.exp>a.exp) ? ((sig_b-sig_ab)-sig_a) : (sig_b -(sig_a+sig_ab)); + } + + size4s_t result; + result = rnd_sat_qf32(exp_ab, sig_ab, sig_low); + + if((a.sign==1) && (b.sign== 1)) + result = negate32(result); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_add_sf] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_add_sf] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_b-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_b-sig_ab,sig_low); + printf("[ARCH_add_sf] result:%x \n\n", result); +#endif + + + return result; +} + +size4s_t add_qf32_mix(size4s_t in_a, size4s_t in_b ) { + int exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_qf32(in_a); + b = parse_sf(in_b); + + if(b.sign) b.sig = (-1.0)*b.sig; + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig)); + if(exp_abb.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + //sig_low = (b.sign)? (-1.0*epsilon): epsilon; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_add_qf32_mix] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_add_qf32_mix] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); +#endif + + size4s_t result; + + result = rnd_sat_qf32(exp_ab, sig_ab, sig_low); + + return result; +} + +size4s_t sub_qf32(size4s_t in_a, size4s_t in_b ) { + size2s_t exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_qf32(in_a); + b = parse_qf32(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig)); + if(exp_abb.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + //sig_low = (b.sign)? (-1.0*epsilon): epsilon; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_sub_qf32] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_sub_qf32] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); + printf("[ARCH_sub_qf32] a:%10.30f, a_adj:%10.30f, fabs(sig_b):%f\n", ldexp(a.sig, a.exp), ldexp(sig_a, exp_ab), fabs(sig_b)); +#endif + + size4s_t result; + + result = rnd_sat_qf32(exp_ab, sig_ab, sig_low); + + return result; +} + +size4s_t sub_sf(size4s_t in_a, size4s_t in_b ) { + size2s_t exp_ab; + unfloat a, b; + + //Get double precision significands + a = parse_sf(in_a); + b = parse_sf(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig)); + if(exp_abb.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + } + else if(a.sign ^ b.sign) + { + sig_ab = sig_a + sig_b; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + } + else// if(a.sign && b.sign) + { + sig_ab = sig_b - sig_a; + sig_low = (b.exp>a.exp) ? ((sig_b-sig_ab)-sig_a) : (sig_b -(sig_a+sig_ab)); + } + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_sub_sf] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_sub_sf] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_b-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_b-sig_ab,sig_low); +#endif + + size4s_t result; + + result = rnd_sat_qf32(exp_ab, sig_ab, sig_low); + + if((a.sign==1) && (b.sign==0)) + result = negate32(result); + + return result; +} + +size4s_t sub_qf32_mix(size4s_t in_a, size4s_t in_b ) { + size2s_t exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_qf32(in_a); + b = parse_sf(in_b); + + if(b.sign) b.sig = (-1.0)*b.sig; + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_SF+1)):ilogb(a.sig)); + if(exp_abb.exp) ? ((sig_ab-sig_a)-sig_b) : ((sig_ab-sig_b)-sig_a); + //sig_low = (a.exp>b.exp) ? ((sig_ab-sig_a)+sig_b) : (sig_a-(sig_b+sig_ab)); + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_sub_qf32_mix] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_sub_qf32_mix] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); +#endif + + size4s_t result; + + result = rnd_sat_qf32(exp_ab, sig_ab, sig_low); + + return result; +} +//add_qf16 +size2s_t add_qf16(size2s_t in_a, size2s_t in_b ) { + size1s_t exp_ab; + unfloat a, b; + + //Get double precision significands + a = parse_qf16(in_a); + b = parse_qf16(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig)); + if(exp_abb.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + //sig_low = (b.sign)? (-1.0*epsilon): epsilon; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_add_qf16] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_add_qf16] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); +#endif + + size2s_t result; + + result = rnd_sat_qf16(exp_ab, sig_ab, sig_low); + + return result; +} + +size2s_t add_hf(size2s_t in_a, size2s_t in_b ) { + size1s_t exp_ab; + unfloat a, b; + + //Get double precision significands + a = parse_hf(in_a); + b = parse_hf(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig)); + if(exp_abb.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + } + else if(a.sign==0 && b.sign==1) + { + sig_ab = sig_a - sig_b; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + } + else// if(a.sign==1 && b.sign==0) + { + sig_ab = sig_b - sig_a; + sig_low = (b.exp>a.exp) ? ((sig_b-sig_ab)-sig_a) : (sig_b -(sig_a+sig_ab)); + } + + size2s_t result; + + result = rnd_sat_qf16(exp_ab, sig_ab, sig_low); + if((a.sign==1) && (b.sign== 1)) + result = negate16(result); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_add_hf] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_add_hf] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_b-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_b-sig_ab,sig_low); + printf("[ARCH_add_sf] result:%x \n\n", result); +#endif + + + return result; +} + +size2s_t add_qf16_mix(size2s_t in_a, size2s_t in_b ) { + size1s_t exp_ab; + unfloat a, b; + + //Get double precision significands + a = parse_qf16(in_a); + b = parse_hf(in_b); + + if(b.sign) b.sig = (-1.0)*b.sig; + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig)); + if(exp_abb.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + //sig_low = (b.sign)? (-1.0*epsilon): epsilon; + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_add_qf16_mix] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_add_qf16_mix] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); +#endif + + size2s_t result; + + result = rnd_sat_qf16(exp_ab, sig_ab, sig_low); + + return result; +} + +size2s_t sub_qf16(size2s_t in_a, size2s_t in_b ) { + size1s_t exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_qf16(in_a); + b = parse_qf16(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig)); + if(exp_abb.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + //sig_low = (a.exp>b.exp) ? ((sig_ab-sig_a)+sig_b) : (sig_a-(sig_b+sig_ab)); + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_sub_qf16] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_sub_qf16] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); + printf("[ARCH_sub_qf32] a:%10.30f, a_adj:%10.30f, fabs(sig_b):%f\n", ldexp(a.sig, a.exp), ldexp(sig_a, exp_ab), fabs(sig_b)); +#endif + + size2s_t result; + + result = rnd_sat_qf16(exp_ab, sig_ab, sig_low); + + return result; +} + + +size2s_t sub_hf(size2s_t in_a, size2s_t in_b ) { + size1s_t exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_hf(in_a); + b = parse_hf(in_b); + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig)); + if(exp_abb.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + } + else if(a.sign ^ b.sign) + { + sig_ab = sig_a + sig_b; + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)+sig_b) : ((sig_b-sig_ab)+sig_a); + } + else// if(a.sign && b.sign) + { + sig_ab = sig_b - sig_a; + sig_low = (b.exp>a.exp) ? ((sig_b-sig_ab)-sig_a) : (sig_b -(sig_a+sig_ab)); + } + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_sub_hf] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_sub_hf] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.30f, sig_ab:%1.30f, sig_ab-sig_a:%1.30f, sig_low:%1.30f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_ab-sig_a,sig_low); +#endif + + size2s_t result; + + result = rnd_sat_qf16(exp_ab, sig_ab, sig_low); + if((a.sign==1) && (b.sign==0)) + result = negate16(result); + + return result; +} + +size2s_t sub_qf16_mix(size2s_t in_a, size2s_t in_b ) { + size1s_t exp_ab; + + unfloat a, b; + + //Get double precision significands + a = parse_qf16(in_a); + b = parse_hf(in_b); + + if(b.sign) b.sig = (-1.0)*b.sig; + + if(a.exp>b.exp){ + exp_ab = a.exp+((a.sig==0.0)? (-(FRAC_HF+1)):ilogb(a.sig)); + if(exp_abb.exp) ? ((sig_ab-sig_a)-sig_b) : ((sig_ab-sig_b)-sig_a); + //sig_low = (a.exp>b.exp) ? ((sig_ab-sig_a)+sig_b) : (sig_a-(sig_b+sig_ab)); + sig_low = (a.exp>b.exp) ? ((sig_a-sig_ab)-sig_b) : (sig_a -(sig_b+sig_ab)); + +#ifdef DEBUG_MMVEC_QF + printf("[ARCH_sub_qf16_mix] a.exp:%d, b.exp:%d, exp_ab:%d, ilogb(a.sig):%d, ilogb(b.sig):%d\n", a.exp,b.exp,exp_ab, ilogb(a.sig), ilogb(b.sig)); + printf("[ARCH_sub_qf16_mix] a.sig:%10.30f, b.sig:%10.30f, sig_a:%10.30f, sig_b:%1.128f, sig_ab:%1.128f, sig_a-sig_ab:%1.128f, sig_low:%1.128f\n", a.sig, b.sig, sig_a, sig_b, sig_ab, sig_a-sig_ab,sig_low); +#endif + + size2s_t result; + + result = rnd_sat_qf16(exp_ab, sig_ab, sig_low); + + return result; +} + +//FP conversion QF32 to IEEE SF +size4s_t conv_sf_qf32(size4s_t a) +{ + + size4s_t result; + unfloat u = parse_qf32(a); + + result = rnd_sat_sf(u.exp, u.sig); + +#ifdef DEBUG_MMVEC_QF + double final = ldexp(u.sig, u.exp); + printf("[SF_parse_conv_sf_qf32] u.sig:%lf, u.exp:%d, ldexp:%10.20f \n",u.sig, u.exp, final); +#endif + + return result; +} + +//FP conversion W to IEEE SF +size4s_t conv_sf_w(size4s_t a) +{ + + size4s_t result; + int exp=0; + double sig=0.0; + if(a !=0) + { + exp = ilogb(a); + sig = (double)a/scalbn(1.0, exp); + } + result = rnd_sat_sf(exp, sig); + +#ifdef DEBUG_MMVEC_QF + double final = ldexp(sig, exp); + printf("[SF_parse_conv_sf_w] sig:%lf, exp:%d, ldexp:%10.20f \n",sig, exp, final); +#endif + + return result; +} + +//FP conversion UW to IEEE SF +size4s_t conv_sf_uw(size4u_t a) +{ + + size4s_t result; + int exp=0; + double sig=0.0; + if(a !=0) + { + exp = ilogb(a); + sig = (double)(unsigned)a/scalbn(1.0, exp); + } + result = rnd_sat_sf(exp, sig); + +//#ifdef DEBUG_MMVEC_QF +// double final = ldexp(sig, exp); +// printf("[SF_parse_conv_sf_uw] sig:%lf, exp:%d, ldexp:%10.20f \n",sig, exp, final); +//#endif + + return result; +} + +//FP conversion QF16 to IEEE HF +size2s_t conv_hf_qf16(size2s_t a) +{ + + size2s_t result; + unfloat u = parse_qf16(a); + + result = rnd_sat_hf(u.exp, u.sig); + +//#ifdef DEBUG_MMVEC_QF +// double final = ldexp(u.sig, u.exp); +// printf("[HF_parse_conv_hf_qf16] u.sig:%lf, u.exp:%d, ldexp:%10.20f \n",u.sig, u.exp, final); +//#endif + + return result; +} + +//FP conversion H to IEEE HF +size2s_t conv_hf_h(size2s_t a) +{ + size2s_t result; + int exp=0; + double sig=0.0; + if(a !=0) + { + exp = ilogb(a); + sig = (double)a/scalbn(1.0, exp); + } + result = rnd_sat_hf(exp, sig); + +#ifdef DEBUG_MMVEC_QF + double final = ldexp(sig, exp); + double f_rint = rint(final); + printf("[HF_parse_conv_hf_h] sig:%lf, exp:%d, ldexp:%10.20f, rint:%lf \n",sig, exp, final, f_rint); +#endif + return result; +} + +//FP conversion UH to IEEE HF +size2s_t conv_hf_uh(size2u_t a) +{ + + size2s_t result; + int exp=0; + double sig=0.0; + if(a !=0) + { + exp = ilogb(a); + sig = (double)(unsigned)a/scalbn(1.0, exp); + } + result = rnd_sat_hf(exp, sig); + +//#ifdef DEBUG_MMVEC_QF +// double final = ldexp(sig, exp); +// printf("[SF_parse_conv_hf_uh] sig:%lf, exp:%d, ldexp:%10.20f \n",sig, exp, final); +//#endif + + return result; +} + +//FP conversion two QF32 to two QF16 +size4s_t conv_hf_qf32(size8s_t a) +{ + + size2s_t result0, result1; + size4s_t result; + size4s_t a0, a1; + a0 = a & 0xFFFFFFFF; + a1 = (a>>32) & 0xFFFFFFFF; + + unfloat u0 = parse_qf32(a0); + unfloat u1 = parse_qf32(a1); + + result0 = rnd_sat_hf(u0.exp, u0.sig); + result1 = rnd_sat_hf(u1.exp, u1.sig); + + result = ((size4s_t)result1 << 16) | (result0 & 0xFFFF); + +/* +#ifdef DEBUG_MMVEC_QF + double final0 = ldexp(u0.sig, u0.exp); + double final1 = ldexp(u1.sig, u1.exp); + + printf("[HF_parse_conv_hf_qf32] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0); + printf("[HF_parse_conv_hf_qf32] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1); +#endif +*/ + + return result; +} + +//FP conversion two W to two IEEE HF +size4s_t conv_hf_w(size8s_t a) +{ + size2s_t result0, result1; + size4s_t result; + size4s_t a0, a1; + a0 = a & 0xFFFFFFFF; + a1 = (a>>32) & 0xFFFFFFFF; + + int exp0=0, exp1=0; + double sig0=0.0, sig1=0.0; + if(a0 !=0) + { + exp0 = ilogb(a0); + sig0 = (double)a0/scalbn(1.0, exp0); + } + if(a1 !=0) + { + exp1 = ilogb(a1); + sig1 = (double)a1/scalbn(1.0, exp1); + } + result0 = rnd_sat_hf(exp0, sig0); + result1 = rnd_sat_hf(exp1, sig1); + + result = ((size4s_t)result1 << 16) | (result0 & 0xFFFF); + +/* +#ifdef DEBUG_MMVEC_QF + double final0 = ldexp(sig0, exp0); + double final1 = ldexp(sig1, exp1); + + printf("[HF_parse_conv_hf_w] sig0:%lf, exp0:%d, ldexp0:%10.20f \n",sig0, exp0, final0); + printf("[HF_parse_conv_hf_w] sig1:%lf, exp1:%d, ldexp1:%10.20f \n",sig1, exp1, final1); +#endif +*/ + return result; +} + +//FP conversion two UW to two IEEE HF +size4s_t conv_hf_uw(size8u_t a) +{ + size2s_t result0, result1; + size4s_t result; + size4u_t a0, a1; + a0 = a & 0xFFFFFFFF; + a1 = (a>>32) & 0xFFFFFFFF; + + int exp0=0, exp1=0; + double sig0=0.0, sig1=0.0; + if(a0 !=0) + { + exp0 = ilogb(a0); + sig0 = (double)(unsigned)a0/scalbn(1.0, exp0); + } + if(a1 !=0) + { + exp1 = ilogb(a1); + sig1 = (double)(unsigned)a1/scalbn(1.0, exp1); + } + result0 = rnd_sat_hf(exp0, sig0); + result1 = rnd_sat_hf(exp1, sig1); + + result = ((size4s_t)result1 << 16) | (result0 & 0xFFFF); +/* +#ifdef DEBUG_MMVEC_QF + double final0 = ldexp(sig0, exp0); + double final1 = ldexp(sig1, exp1); + + printf("[HF_parse_conv_hf_uw] sig0:%lf, exp0:%d, ldexp0:%10.20f \n",sig0, exp0, final0); + printf("[HF_parse_conv_hf_uw] sig1:%lf, exp1:%d, ldexp1:%10.20f \n",sig1, exp1, final1); +#endif +*/ + return result; +} + +size4s_t rnd_sat_w(int exp, double sig) +{ + size4s_t result=0; + size4s_t W_MAX = 0x7fffffff; + size4s_t W_MIN = 0x80000000; + + int sign = (sig>=0.0)? 0: 1; + + double R1=0.0; + double R2=0.0; + double R3=0.0; + if(exp > 30) + { + result = (sign)? W_MIN:W_MAX; + result = (sign <<31) | result; + } + else + { + R1 = ldexp(sig, exp); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + if(sign==0) + { + if(R3<=0.5) + result = (size4s_t) R1; + else if(R3>0.5 && R3<1.5) + result = (size4s_t) round(R1); + else if(R3>=1.5) + result = (size4s_t) R1+1; + } + else + result = (size4s_t)round(R1); + } + +#ifdef DEBUG_MMVEC_QF + printf("[RND_conv_w_qf32] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result); +#endif + + return result; +} + +size4u_t rnd_sat_uw(int exp, double sig) +{ + size4u_t result=0; + size4u_t W_MAX = 0xffffffff; + + double R1=0.0; + double R2=0.0; + double R3=0.0; + if(sig<0.0) + result = 0; + else if(exp > 31) + { + result = W_MAX; + } + else + { + R1 = ldexp(sig, exp); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + if(R3<=0.5) + result = (size4s_t) R1; + else if(R3>0.5 && R3<1.5) + result = (size4s_t) round(R1); + else if(R3>=1.5) + result = (size4s_t) R1+1; + } + +#ifdef DEBUG_MMVEC_QF + printf("[RND_conv_uw_qf32] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result); +#endif + + return result; +} + +size2s_t rnd_sat_h(int exp, double sig) +{ + size2s_t result=0; + size2s_t W_MAX = 0x7fff; + size2s_t W_MIN = 0x8000; + + int sign = (sig>=0.0)? 0: 1; + + double R1=0.0; + double R2=0.0; + double R3=0.0; + if(exp > 14) + { + result = (sign)? W_MIN:W_MAX; + result = (sign <<15) | result; + } + else + { + R1 = ldexp(sig, exp); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + if(sign==0) + { + if(R3<=0.5) + result = (size2s_t) R1; + else if(R3>0.5 && R3<1.5) + result = (size2s_t) round(R1); + else if(R3>=1.5) + result = (size2s_t) R1+1; + } + else + { + if(R3<=0.5 && R3 !=0.0) + result = (size2s_t)R1 -1; + else if(R3>0.5 && R3<1.5) + result = (size2s_t)round(R1); + else// if(R3>=1.5) + result = (size2s_t)R1; + } + } + +#ifdef DEBUG_MMVEC_QF + printf("[RND_conv_h_qf16] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result); +#endif + + return result; +} + +size2u_t rnd_sat_uh(int exp, double sig) +{ + size2u_t result=0; + size2u_t W_MAX = 0xffff; + + double R1=0.0; + double R2=0.0; + double R3=0.0; + if(sig<0.0) + result = 0; + else if(exp > 15) + { + result = W_MAX; + } + else + { + R1 = ldexp(sig, exp); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + if(R3<=0.5) + result = (size2s_t) R1; + else if(R3>0.5 && R3<1.5) + result = (size2s_t) round(R1); + else if(R3>=1.5) + result = (size2s_t) R1+1; + } + +#ifdef DEBUG_MMVEC_QF + printf("[RND_conv_uh_qf16] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result); +#endif + + return result; +} + +size1s_t rnd_sat_b(int exp, double sig) +{ + size1s_t result=0; + size1s_t W_MAX = 0x7f; + size1s_t W_MIN = 0x80; + + int sign = (sig>=0.0)? 0: 1; + + double R1=0.0; + double R2=0.0; + double R3=0.0; + if(exp > 6) + { + result = (sign)? W_MIN:W_MAX; + result = (sign <<7) | result; + } + else + { + R1 = ldexp(sig, exp); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + if(sign==0) + { + if(R3<=0.5) + result = (size1s_t) R1; + else if(R3>0.5 && R3<1.5) + result = (size1s_t) round(R1); + else if(R3>=1.5) + result = (size1s_t) R1+1; + } + else + { + if(R3<=0.5 && R3 !=0.0) + result = (size1s_t)R1 -1; + else if(R3>0.5 && R3<1.5) + result = (size1s_t)round(R1); + else// if(R3>=1.5) + result = (size1s_t)R1; + } + } + +#ifdef DEBUG_MMVEC_QF + printf("[RND_conv_b_qf16] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result); +#endif + + return result; +} + +size1u_t rnd_sat_ub(int exp, double sig) +{ + size1u_t result=0; + size1u_t W_MAX = 0xff; + + double R1=0.0; + double R2=0.0; + double R3=0.0; + if(sig<0.0) + result = 0; + else if(exp > 7) + { + result = W_MAX; + } + else + { + R1 = ldexp(sig, exp); + R2 = floor(R1/2.0)*2; + R3 = R1 - R2; + + if(R3<=0.5) + result = (size1s_t) R1; + else if(R3>0.5 && R3<1.5) + result = (size1s_t) round(R1); + else if(R3>=1.5) + result = (size1s_t) R1+1; + } + +#ifdef DEBUG_MMVEC_QF + printf("[RND_conv_ub_qf16] sig:%lf, exp:%d, R1:%10.20f, R2:%10.20f, R3:%10.20f, result:%x(%d)\n",sig, exp, R1, R2, R3, result, result); +#endif + + return result; +} + +//FP conversion QF32 to 32bit W +size4s_t conv_w_qf32(size4s_t a) +{ + + size4s_t result; + unfloat u = parse_qf32(a); + + result = rnd_sat_w(u.exp, u.sig); + + return result; +} + +size4s_t conv_w_sf(size4s_t op1) +{ + sf_union input; + size4s_t W_MAX = 0x7fffffff; + size4s_t W_MIN = 0x80000000; + input.i = op1; + size4s_t result; + + if(isNaNF32(op1) || isInfF32(op1) || (input.f >= (float)W_MAX) || (input.f <= (float)W_MIN)) + { + if(input.x.sign == 1){ + result = W_MIN; + } + else{ + result = W_MAX; + } + } + else{ + //convert and round to the zero + result = (int)input.f; + } + +#ifdef DEBUG_MMVEC_QF + printf("Debug : result =0x%08x\n",result); +#endif + return result; +} + +size2s_t conv_h_hf(size2s_t op1) +{ + sf_union input; + size4s_t op1_ext = op1; + size2s_t HW_MAX = 0x7fff; + size2s_t HW_MIN = 0x8000; + input.i = ((op1_ext & 0x8000) << 16) + (((op1_ext & 0x7c00) + 0x1c000) << 13) + ((op1_ext & 0x03ff) << 13); //grabbing sign, exp, and significand and ocnverting to sf32 format + size2s_t result; + + if(isNaNF16(op1) || isInfF16(op1) || (input.f >= (float)HW_MAX) || (input.f <= (float)HW_MIN)) + { + if(input.x.sign == 1){ + result = HW_MIN; + } + else{ + result = HW_MAX; + } + } + else{ + //convert and round to the zero + result = (short)input.f; + } + +#ifdef DEBUG_MMVEC_QF + printf("Debug : result =0x%08x\n",result); +#endif + return result; +} + +//FP conversion QF32 to 32bit UW +size4u_t conv_uw_qf32(size4s_t a) +{ + + size4u_t result; + unfloat u = parse_qf32(a); + + result = rnd_sat_uw(u.exp, u.sig); + + return result; +} + +//FP conversion QF16 to 16bit H +size2s_t conv_h_qf16(size2s_t a) +{ + + size2s_t result; + unfloat u = parse_qf16(a); + + result = rnd_sat_h(u.exp, u.sig); + + return result; +} + +//FP conversion QF32 to 32bit UW +size2u_t conv_uh_qf16(size2s_t a) +{ + + size2u_t result; + unfloat u = parse_qf16(a); + + result = rnd_sat_uh(u.exp, u.sig); + + return result; +} + +//FP conversion double QF32 to double H +size4s_t conv_h_qf32(size8s_t a) +{ + size2s_t result0, result1; + size4s_t result; + size4s_t a0, a1; + a0 = a & 0xFFFFFFFF; + a1 = (a>>32) & 0xFFFFFFFF; + + unfloat u0 = parse_qf32(a0); + unfloat u1 = parse_qf32(a1); + + result0 = rnd_sat_h(u0.exp, u0.sig); + result1 = rnd_sat_h(u1.exp, u1.sig); + + result = ((size4s_t)result1 << 16) | (result0 & 0xFFFF); + +#ifdef DEBUG_MMVEC_QF + double final0 = ldexp(u0.sig, u0.exp); + double final1 = ldexp(u1.sig, u1.exp); + + printf("[H_parse_conv_h_qf32] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0); + printf("[H_parse_conv_h_qf32] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1); +#endif + + return result; +} + +//FP conversion QF32 to 32bit UW +size4u_t conv_uh_qf32(size8s_t a) +{ + size2u_t result0, result1; + size4u_t result; + size4s_t a0, a1; + a0 = a & 0xFFFFFFFF; + a1 = (a>>32) & 0xFFFFFFFF; + + unfloat u0 = parse_qf32(a0); + unfloat u1 = parse_qf32(a1); + + result0 = rnd_sat_uh(u0.exp, u0.sig); + result1 = rnd_sat_uh(u1.exp, u1.sig); + + result = ((size4u_t)result1 << 16) | (result0 & 0xFFFF); + +#ifdef DEBUG_MMVEC_QF + double final0 = ldexp(u0.sig, u0.exp); + double final1 = ldexp(u1.sig, u1.exp); + + printf("[UH_parse_conv_uh_qf32] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0); + printf("[UH_parse_conv_uh_qf32] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1); +#endif + + return result; +} + +//FP conversion double QF16 to double B +size2s_t conv_b_qf16(size4s_t a) +{ + size1s_t result0, result1; + size2s_t result; + size2s_t a0, a1; + a0 = a & 0xFFFF; + a1 = (a>>16) & 0xFFFF; + + unfloat u0 = parse_qf16(a0); + unfloat u1 = parse_qf16(a1); + + result0 = rnd_sat_b(u0.exp, u0.sig); + result1 = rnd_sat_b(u1.exp, u1.sig); + + result = ((size2s_t)result1 << 8) | (result0 & 0xFF); + +#ifdef DEBUG_MMVEC_QF + double final0 = ldexp(u0.sig, u0.exp); + double final1 = ldexp(u1.sig, u1.exp); + + printf("[B_parse_conv_b_qf16] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0); + printf("[B_parse_conv_b_qf16] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1); +#endif + + return result; +} + +//FP conversion QF32 to 32bit UW +size2u_t conv_ub_qf16(size4s_t a) +{ + size1u_t result0, result1; + size2u_t result; + size2s_t a0, a1; + a0 = a & 0xFFFF; + a1 = (a>>16) & 0xFFFF; + + unfloat u0 = parse_qf16(a0); + unfloat u1 = parse_qf16(a1); + + result0 = rnd_sat_ub(u0.exp, u0.sig); + result1 = rnd_sat_ub(u1.exp, u1.sig); + + result = ((size2u_t)result1 << 8) | (result0 & 0xFF); + +#ifdef DEBUG_MMVEC_QF + double final0 = ldexp(u0.sig, u0.exp); + double final1 = ldexp(u1.sig, u1.exp); + + printf("[UB_parse_conv_ub_qf16] u0.sig:%lf, u0.exp:%d, ldexp0:%10.20f \n",u0.sig, u0.exp, final0); + printf("[UB_parse_conv_ub_qf16] u1.sig:%lf, u1.exp:%d, ldexp1:%10.20f \n",u1.sig, u1.exp, final1); +#endif + + return result; +} + +//Neg/Abs +size4s_t neg_qf32(size4s_t a) +{ + size4s_t result; + result = negate32(a); + return result; +} +size4s_t abs_qf32(size4s_t a) +{ + size4s_t result; + if((a>>31) & 1) + result = negate32(a); + else + result = a; + return result; +} +size2s_t neg_qf16(size2s_t a) +{ + size2s_t result; + result = negate16(a); + return result; +} +size2s_t abs_qf16(size2s_t a) +{ + size2s_t result; + if((a>>15) & 1) + result = negate16(a); + else + result = a; + return result; +} +size4s_t neg_sf(size4s_t a) +{ + size4s_t result; + result = negate_sf(a); + return result; +} +size4s_t abs_sf(size4s_t a) +{ + size4s_t result; + if((a>>31) & 1) + result = negate_sf(a); + else + result = a; + return result; +} +size2s_t neg_hf(size2s_t a) +{ + size2s_t result; + result = negate_hf(a); + return result; +} +size2s_t abs_hf(size2s_t a) +{ + size2s_t result; + if((a>>15) & 1) + result = negate_hf(a); + else + result = a; + return result; +} + +//FP Compare +int cmpgt_fp(unfloat a, unfloat b) +{ + int result=0; + double a_d, b_d; + a_d = ldexp(a.sig, a.exp); + b_d = ldexp(b.sig, b.exp); + + //Filter out +0/-0 by checking the sign + if(a_d > b_d) + result=1; + +#ifdef DEBUG_MMVEC_QF + printf("[CMPGT]a:%10.30f, b:%10.30f\n",a_d, b_d); +#endif + + return result; +} + +int cmpgt_qf32(size4s_t in_a, size4s_t in_b) +{ + unfloat a, b; + a= parse_qf32(in_a); + b= parse_qf32(in_b); + + int result=0; + + result = cmpgt_fp(a,b); + + return result; +} + +int cmpgt_qf16(size2s_t in_a, size2s_t in_b) +{ + + unfloat a, b; + a= parse_qf16(in_a); + b= parse_qf16(in_b); + + int result=0; + result = cmpgt_fp(a,b); + + return result; +} + +int cmpgt_sf(size4s_t in_a, size4s_t in_b) +{ + + unfloat a, b; + a= parse_sf(in_a); + b= parse_sf(in_b); + + if(a.sign) + a.sig = (-1.0)*a.sig; + if(b.sign) + b.sig = (-1.0)*b.sig; + + int result=0; + result = cmpgt_fp(a,b); + + return result; +} + +int cmpgt_hf(size2s_t in_a, size2s_t in_b) +{ + + unfloat a, b; + a= parse_hf(in_a); + b= parse_hf(in_b); + + if(a.sign) + a.sig = (-1.0)*a.sig; + if(b.sign) + b.sig = (-1.0)*b.sig; + + int result=0; + result = cmpgt_fp(a,b); + + return result; +} + +int cmpgt_qf32_sf(size4s_t in_a, size4s_t in_b) +{ + unfloat a = parse_qf32(in_a); + unfloat b = parse_sf(in_b); + if(b.sign) + b.sig = (-1.0)*b.sig; + + int result=0; + result = cmpgt_fp(a,b); + + return result; +} + +int cmpgt_qf16_hf(size2s_t in_a, size2s_t in_b) +{ + unfloat a = parse_qf16(in_a); + unfloat b = parse_hf(in_b); + if(b.sign) + b.sig = (-1.0)*b.sig; + + int result=0; + result = cmpgt_fp(a,b); + return result; +} +//max/min + //if a==b, a is returned +size4s_t max_qf32( size4s_t in_a, size4s_t in_b) { return cmpgt_qf32( in_b, in_a) ? in_b : in_a; } +size2s_t max_qf16( size2s_t in_a, size2s_t in_b) { return cmpgt_qf16( in_b, in_a) ? in_b : in_a; } + + + +size4s_t is_check_zero_sf(size4s_t in_a); +size4s_t is_check_zero_sf(size4s_t in_a) { + return (in_a == 0) || ((in_a & 0xFFFFFFFF) == 0x80000000); +} +size2s_t is_check_zero_hf(size2s_t in_a); +size2s_t is_check_zero_hf(size2s_t in_a) { + return (in_a == 0) || ((in_a & 0xFFFF) == 0x8000); +} + +size4s_t max_sf( size4s_t in_a, size4s_t in_b) { + if (is_check_zero_sf(in_a) && is_check_zero_sf(in_b) ) { + return (in_a == 0) ? in_a : in_b; // Return in_a if it's positive 0, otherwise return the other one + } + return cmpgt_sf( in_b, in_a) ? in_b : in_a; + +} +size2s_t max_hf( size2s_t in_a, size2s_t in_b) +{ + if (is_check_zero_hf(in_a) && is_check_zero_hf(in_b) ) { + return (in_a == 0) ? in_a : in_b; + } + return cmpgt_hf( in_b, in_a) ? in_b : in_a; +} + + +//size2s_t max_qf16_hf( size2s_t in_a, size2s_t in_b) { return cmpgt_qf16_hf( in_b, in_a) ? in_b : in_a; } +//size4s_t max_qf32_sf( size4s_t in_a, size4s_t in_b) { return cmpgt_qf32_sf( in_b, in_a) ? in_b : in_a; } + +size4s_t min_qf32( size4s_t in_a, size4s_t in_b) { return cmpgt_qf32( in_a, in_b) ? in_b : in_a; } +size2s_t min_qf16( size2s_t in_a, size2s_t in_b) { return cmpgt_qf16( in_a, in_b) ? in_b : in_a; } + +size4s_t min_sf( size4s_t in_a, size4s_t in_b) { + if (is_check_zero_sf(in_a) && is_check_zero_sf(in_b) ) { + return (in_a == 0) ? in_b : in_a; + } + return cmpgt_sf( in_a, in_b) ? in_b : in_a; +} +size2s_t min_hf( size2s_t in_a, size2s_t in_b) { + if (is_check_zero_hf(in_a) && is_check_zero_hf(in_b) ) { + return (in_a == 0) ? in_b : in_a; + } + return cmpgt_hf( in_a, in_b) ? in_b : in_a; +} +//size2s_t min_qf16_hf( size2s_t in_a, size2s_t in_b) { return cmpgt_qf16_hf( in_a, in_b) ? in_b : in_a; } +//size4s_t min_qf32_sf( size4s_t in_a, size4s_t in_b) { return cmpgt_qf32_sf( in_a, in_b) ? in_b : in_a; } + + +size4s_t max_qf32_sf(size4s_t in_a, size4s_t in_b) +{ + size4s_t result=0; + unfloat a,b; + a= parse_qf32(in_a); + b= parse_sf(in_b); + if(b.sign) + b.sig = (-1)*b.sig; + + double a_d, b_d; + a_d = ldexp(a.sig, a.exp); + b_d = ldexp(b.sig, b.exp); + + if(a_d >= b_d) + result = in_a; + else + result = in_b; + +#ifdef DEBUG_MMVEC_QF + printf("[max_qf32_sf]a:%10.30f, b:%10.30f\n",a_d, b_d); +#endif + + return result; +} +size4s_t min_qf32_sf(size4s_t in_a, size4s_t in_b) +{ + size4s_t result=0; + unfloat a,b; + a= parse_qf32(in_a); + b= parse_sf(in_b); + if(b.sign) + b.sig = (-1)*b.sig; + double a_d, b_d; + a_d = ldexp(a.sig, a.exp); + b_d = ldexp(b.sig, b.exp); + if(a_d <= b_d) + result = in_a; + else + result = in_b; +#ifdef DEBUG_MMVEC_QF + printf("[min_qf32_sf]a:%10.30f, b:%10.30f\n",a_d, b_d); +#endif + return result; +} + +size2s_t max_qf16_hf(size2s_t in_a, size2s_t in_b) +{ + size2s_t result=0; + unfloat a,b; + a= parse_qf16(in_a); + b= parse_hf(in_b); + if(b.sign) + b.sig = (-1)*b.sig; + double a_d, b_d; + a_d = ldexp(a.sig, a.exp); + b_d = ldexp(b.sig, b.exp); + if(a_d >= b_d) + result = in_a; + else + result = in_b; +#ifdef DEBUG_MMVEC_QF + printf("[max_qf16_hf]a:%10.30f, b:%10.30f\n",a_d, b_d); +#endif + return result; +} +size2s_t min_qf16_hf(size2s_t in_a, size2s_t in_b) +{ + size2s_t result=0; + unfloat a,b; + a= parse_qf16(in_a); + b= parse_hf(in_b); + if(b.sign) + b.sig = (-1)*b.sig; + double a_d, b_d; + a_d = ldexp(a.sig, a.exp); + b_d = ldexp(b.sig, b.exp); + if(a_d <= b_d) + result = in_a; + else + result = in_b; +#ifdef DEBUG_MMVEC_QF + printf("[min_qf16_hf]a:%10.30f, b:%10.30f\n",a_d, b_d); +#endif + return result; +} diff --git a/target/hexagon/mmvec/mmvec_qfloat.h b/target/hexagon/mmvec/mmvec_qfloat.h new file mode 100644 index 000000000000..dc15cd17408b --- /dev/null +++ b/target/hexagon/mmvec/mmvec_qfloat.h @@ -0,0 +1,199 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef MMVEC_QFLOAT_H +#define MMVEC_QFLOAT_H 1 + +#define HF_MAX 131008 //pow(2,17)-pow(2,6) =(2-1.0/pow(2,10))*pow(2,16) +#define HF_MIN 1.0/pow(2,24) +#define SF_MAX pow(2,129)-pow(2,105) //(2-1.0/pow(2,23))*pow(2,128) +#define SF_MIN 1.0/pow(2,149) + +#define E_MAX_QF32 128 +#define E_MIN_QF32 -127 +#define E_MAX_QF16 16 +#define E_MIN_QF16 -15 +#define E_MAX_SF 128 +#define E_MIN_SF -126 +#define E_MAX_HF 16 +#define E_MIN_HF -14 +#define BIAS_QF32 127 +#define BIAS_QF16 15 +#define BIAS_DF 1023 +#define BIAS_SF 127 +#define BIAS_HF 15 +#define FRAC_HF 10 +#define FRAC_SF 23 +#define isNaNF32( a ) (((~(a) & 0x7F800000) == 0) && ((a) & 0x007FFFFF)) +#define isInfF32( a ) (((~(a) & 0x7F800000) == 0) && (((a) & 0x007FFFFF) == 0)) +#define isNaNF16( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF)) +#define isInfF16( a ) (((~(a) & 0x7C00) == 0) && (((a) & 0x03FF) == 0)) + +//#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) +//#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y)) + +#include "cpu.h" +#include "hex_arch_types.h" + +#define epsilon 1.0/pow(2,23) +#define units 1.0*pow(2,23) +#define epsilon_hf 1.0/pow(2,10) +#define units_hf 1.0*pow(2,10) + +typedef struct{ + int sign; + int exp; + double sig; +} unfloat; //Un-Normalized Float + +typedef struct{ + int sign; + int sig; + int exp; +} qf_t; + +typedef struct{ + int32_t sig : 24; + uint32_t exp : 8; +} qf32_t; + +typedef struct{ + int32_t sig : 11; + uint32_t exp : 5; +} qf16_t; + +typedef enum float_type{ + QF32, + QF16, + SF, + HF +} f_type; + +typedef union { + float f; + size4u_t i; + struct { + size4u_t mant:23; + size4u_t exp:8; + size4u_t sign:1; + } x; +} sf_union; + +//MPY +size4s_t mpy_qf32(size4s_t a, size4s_t b); +size4s_t mpy_qf32_sf(size4s_t a, size4s_t b); +size4s_t mpy_qf32_mix_sf(size4s_t a, size4s_t b); +size2s_t mpy_qf16(size2s_t a, size2s_t b); +size2s_t mpy_qf16_hf(size2s_t a, size2s_t b); +size2s_t mpy_qf16_mix_hf(size2s_t a, size2s_t b); +size8s_t mpy_qf32_qf16(size4s_t a, size4s_t b); +size8s_t mpy_qf32_hf(size4s_t a, size4s_t b); +size8s_t mpy_qf32_mix_hf(size4s_t a, size4s_t b); + +unfloat parse_qf32(size4s_t a); +unfloat parse_qf16(size2s_t a); +unfloat parse_sf(size4s_t a); +unfloat parse_hf(size2s_t a); +size4s_t rnd_sat_qf32(int exp, double sig, double sig_low); +size2s_t rnd_sat_qf16(int exp, double sig, double sig_low); +size4s_t rnd_sat_sf(int exp, double sig); +size2s_t rnd_sat_hf(int exp, double sig); +size4s_t rnd_sat_w(int exp, double sig); +size4u_t rnd_sat_uw(int exp, double sig); +size2s_t rnd_sat_h(int exp, double sig); +size2u_t rnd_sat_uh(int exp, double sig); +size1s_t rnd_sat_b(int exp, double sig); +size1u_t rnd_sat_ub(int exp, double sig); +size4s_t negate32(size4s_t); +size2s_t negate16(size2s_t); +size4s_t negate_sf(size4s_t); +size2s_t negate_hf(size2s_t); + +//ADD +size4s_t add_qf32(size4s_t a, size4s_t b); +size4s_t add_sf(size4s_t a, size4s_t b); +size4s_t add_qf32_mix(size4s_t a, size4s_t b); +size2s_t add_qf16(size2s_t a, size2s_t b); +size2s_t add_hf(size2s_t a, size2s_t b); +size2s_t add_qf16_mix(size2s_t a, size2s_t b); + +//SUB +size4s_t sub_qf32(size4s_t a, size4s_t b); +size4s_t sub_sf(size4s_t a, size4s_t b); +size4s_t sub_qf32_mix(size4s_t a, size4s_t b); +size2s_t sub_qf16(size2s_t a, size2s_t b); +size2s_t sub_hf(size2s_t a, size2s_t b); +size2s_t sub_qf16_mix(size2s_t a, size2s_t b); + +//Convert +size4s_t conv_sf_qf32(size4s_t a); +size4s_t conv_sf_w(size4s_t a); +size4s_t conv_sf_uw(size4u_t a); +size2s_t conv_hf_qf16(size2s_t a); +size2s_t conv_hf_h(size2s_t a); +size2s_t conv_hf_uh(size2u_t a); +size4s_t conv_hf_qf32(size8s_t a); +size4s_t conv_hf_w(size8s_t a); +size4s_t conv_hf_uw(size8u_t a); + +size4s_t conv_w_qf32(size4s_t a); +size4u_t conv_uw_qf32(size4s_t a); +size2s_t conv_h_qf16(size2s_t a); +size2u_t conv_uh_qf16(size2s_t a); +size4s_t conv_h_qf32(size8s_t a); +size4u_t conv_uh_qf32(size8s_t a); +size2s_t conv_b_qf16(size4s_t a); +size2u_t conv_ub_qf16(size4s_t a); + +size4s_t conv_w_sf(size4s_t a); +// size4u_t conv_uw_sf(size4s_t a); +size2s_t conv_h_hf(size2s_t a); +// size2u_t conv_uh_sf(size2s_t a); + +//Neg/Abs +size4s_t neg_qf32(size4s_t a); +size4s_t abs_qf32(size4s_t a); +size2s_t neg_qf16(size2s_t a); +size2s_t abs_qf16(size2s_t a); +size4s_t neg_sf(size4s_t a); +size4s_t abs_sf(size4s_t a); +size2s_t neg_hf(size2s_t a); +size2s_t abs_hf(size2s_t a); + +//Compare +int cmpgt_fp(unfloat a, unfloat b); +int cmpgt_qf32(size4s_t a, size4s_t b); +int cmpgt_qf16(size2s_t a, size2s_t b); +int cmpgt_sf(size4s_t a, size4s_t b); +int cmpgt_hf(size2s_t a, size2s_t b); +int cmpgt_qf32_sf(size4s_t a, size4s_t b); +int cmpgt_qf16_hf(size2s_t a, size2s_t b); + +//max/min +size4s_t max_qf32(size4s_t a, size4s_t b); +size4s_t min_qf32(size4s_t a, size4s_t b); +size4s_t max_qf32_sf(size4s_t a, size4s_t b); +size4s_t min_qf32_sf(size4s_t a, size4s_t b); +size4s_t max_sf(size4s_t a, size4s_t b); +size4s_t min_sf(size4s_t a, size4s_t b); +size2s_t max_qf16(size2s_t a, size2s_t b); +size2s_t min_qf16(size2s_t a, size2s_t b); +size2s_t max_qf16_hf(size2s_t a, size2s_t b); +size2s_t min_qf16_hf(size2s_t a, size2s_t b); +size2s_t max_hf(size2s_t a, size2s_t b); +size2s_t min_hf(size2s_t a, size2s_t b); +#endif diff --git a/target/hexagon/monitor.c b/target/hexagon/monitor.c new file mode 100644 index 000000000000..534ca2abe63a --- /dev/null +++ b/target/hexagon/monitor.c @@ -0,0 +1,36 @@ +/* + * Copyright(c) 2022-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + +#include "qemu/osdep.h" +#include "cpu.h" +#include "cpu_bits.h" +#include "monitor/monitor.h" +#include "monitor/hmp-target.h" +#include "monitor/hmp.h" +#include "hex_mmu.h" + +const MonitorDef monitor_defs[] = { + { NULL }, +}; + +const MonitorDef *target_monitor_defs(void) +{ + return monitor_defs; +} + +void hmp_info_tlb(Monitor *mon, const QDict *qdict) +{ +#if !defined(CONFIG_USER_ONLY) + CPUArchState *env = mon_get_cpu_env(mon); + if (!env) { + monitor_printf(mon, "No CPU available\n"); + return; + } + + dump_mmu(env); +#endif +} diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 6da8db8ea5c5..e6f11fd5f990 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -17,6 +17,8 @@ #include "qemu/osdep.h" #include "qemu/log.h" +#include "qemu/main-loop.h" +#include "qemu/timer.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/helper-proto.h" @@ -24,31 +26,59 @@ #include "cpu.h" #include "internal.h" #include "macros.h" +#include "sys_macros.h" #include "arch.h" #include "hex_arch_types.h" #include "fma_emu.h" #include "mmvec/mmvec.h" #include "mmvec/macros.h" +#include "mmvec/mmvec_qfloat.h" #include "op_helper.h" +#include "cpu_helper.h" #include "translate.h" +#ifndef CONFIG_USER_ONLY +#include "hex_mmu.h" +#include "hw/intc/l2vic.h" +#include "hw/timer/qct-qtimer.h" +#include "hex_interrupts.h" +#include "hexswi.h" +#endif #define SF_BIAS 127 #define SF_MANTBITS 23 /* Exceptions processing helpers */ +G_NORETURN +void do_raise_exception(CPUHexagonState *env, uint32_t exception, + target_ulong PC, uintptr_t retaddr) +{ + CPUState *cs = env_cpu(env); +#ifdef CONFIG_USER_ONLY + qemu_log_mask(CPU_LOG_INT, "%s: 0x%08x\n", __func__, exception); +#else + qemu_log_mask(CPU_LOG_INT, "%s: 0x%08x, @ %08" PRIx32 "\n", + __func__, exception, PC); + + ASSERT_DIRECT_TO_GUEST_UNSET(env, exception); +#endif + + env->gpr[HEX_REG_PC] = PC; + cs->exception_index = exception; + cpu_loop_exit_restore(cs, retaddr); + cs->halted = false; +} + G_NORETURN void hexagon_raise_exception_err(CPUHexagonState *env, uint32_t exception, uintptr_t pc) { - CPUState *cs = env_cpu(env); - qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception); - cs->exception_index = exception; - cpu_loop_exit_restore(cs, pc); + do_raise_exception(env, exception, pc, 0); } -G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp) +G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp, + target_ulong PC) { - hexagon_raise_exception_err(env, excp, 0); + hexagon_raise_exception_err(env, excp, PC); } void log_store32(CPUHexagonState *env, target_ulong addr, @@ -463,11 +493,11 @@ void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask) * If the load is in slot 0 and there is a store in slot1 (that * wasn't cancelled), we have to do the store first. */ -static void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1, +static void check_noshuf(CPUHexagonState *env, bool pkt_has_scalar_store_s1, uint32_t slot, target_ulong vaddr, int size, uintptr_t ra) { - if (slot == 0 && pkt_has_store_s1 && + if (slot == 0 && pkt_has_scalar_store_s1 && ((env->slot_cancelled & (1 << 1)) == 0)) { probe_read(env, vaddr, size, MMU_USER_IDX, ra); commit_store(env, 1, ra); @@ -1149,6 +1179,119 @@ float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV, return RxxV; } +#ifndef CONFIG_USER_ONLY +void HELPER(modify_ssr)(CPUHexagonState *env, uint32_t new, uint32_t old) +{ + BQL_LOCK_GUARD(); + hexagon_modify_ssr(env, new, old); +} + +static void hex_k0_lock(CPUHexagonState *env) +{ + BQL_LOCK_GUARD(); + g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1)); + + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + if (GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg)) { + if (env->k0_lock_state == HEX_LOCK_QUEUED) { + env->next_PC += 4; + env->k0_lock_count++; + env->k0_lock_state = HEX_LOCK_OWNER; + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1); + return; + } + if (env->k0_lock_state == HEX_LOCK_OWNER) { + qemu_log_mask(LOG_GUEST_ERROR, + "Double k0lock at PC: 0x%x, thread may hang\n", + env->next_PC); + env->next_PC += 4; + CPUState *cs = env_cpu(env); + cpu_interrupt(cs, CPU_INTERRUPT_HALT); + return; + } + env->k0_lock_state = HEX_LOCK_WAITING; + CPUState *cs = env_cpu(env); + cpu_interrupt(cs, CPU_INTERRUPT_HALT); + } else { + env->next_PC += 4; + env->k0_lock_count++; + env->k0_lock_state = HEX_LOCK_OWNER; + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1); + } + +} + +static void hex_k0_unlock(CPUHexagonState *env) +{ + BQL_LOCK_GUARD(); + g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1)); + + /* Nothing to do if the k0 isn't locked by this thread */ + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + if ((GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg) == 0) || + (env->k0_lock_state != HEX_LOCK_OWNER)) { + qemu_log_mask(LOG_GUEST_ERROR, + "thread %d attempted to unlock k0 without having the " + "lock, k0_lock state = %d, syscfg:k0 = %d\n", + env->threadId, env->k0_lock_state, + GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg)); + g_assert(env->k0_lock_state != HEX_LOCK_WAITING); + return; + } + + env->k0_lock_count--; + env->k0_lock_state = HEX_LOCK_UNLOCKED; + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 0); + + /* Look for a thread to unlock */ + unsigned int this_threadId = env->threadId; + CPUHexagonState *unlock_thread = NULL; + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *thread = cpu_env(cs); + + /* + * The hardware implements round-robin fairness, so we look for threads + * starting at env->threadId + 1 and incrementing modulo the number of + * threads. + * + * To implement this, we check if thread is a earlier in the modulo + * sequence than unlock_thread. + * if unlock thread is higher than this thread + * thread must be between this thread and unlock_thread + * else + * thread higher than this thread is ahead of unlock_thread + * thread must be lower then unlock thread + */ + if (thread->k0_lock_state == HEX_LOCK_WAITING) { + if (!unlock_thread) { + unlock_thread = thread; + } else if (unlock_thread->threadId > this_threadId) { + if (this_threadId < thread->threadId && + thread->threadId < unlock_thread->threadId) { + unlock_thread = thread; + } + } else { + if (thread->threadId > this_threadId) { + unlock_thread = thread; + } + if (thread->threadId < unlock_thread->threadId) { + unlock_thread = thread; + } + } + } + } + if (unlock_thread) { + cs = env_cpu(unlock_thread); + unlock_thread->k0_lock_state = HEX_LOCK_QUEUED; + SET_SYSCFG_FIELD(unlock_thread, SYSCFG_K0LOCK, 1); + cpu_interrupt(cs, CPU_INTERRUPT_K0_UNLOCK); + } + +} +#endif + + /* Histogram instructions */ void HELPER(vhist)(CPUHexagonState *env) @@ -1314,6 +1457,555 @@ void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV) } } +#ifndef CONFIG_USER_ONLY +static void hexagon_set_vid(CPUHexagonState *env, uint32_t offset, int val) +{ + g_assert((offset == L2VIC_VID_0) || (offset == L2VIC_VID_1)); + CPUState *cs = env_cpu(env); + HexagonCPU *cpu = HEXAGON_CPU(cs); + const hwaddr pend_mem = cpu->l2vic_base_addr + offset; + cpu_physical_memory_write(pend_mem, &val, sizeof(val)); +} + +static void hexagon_clear_last_irq(CPUHexagonState *env, uint32_t offset) +{ + /* + * currently only l2vic is the only attached it uses vid0, remove + * the assert below if anther is added + */ + hexagon_set_vid(env, offset, L2VIC_CIAD_INSTRUCTION); +} + +void HELPER(ciad)(CPUHexagonState *env, uint32_t mask) +{ + uint32_t ipendad; + uint32_t iad; + + BQL_LOCK_GUARD(); + ipendad = READ_SREG(HEX_SREG_IPENDAD); + iad = fGET_FIELD(ipendad, IPENDAD_IAD); + fSET_FIELD(ipendad, IPENDAD_IAD, iad & ~(mask)); + arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad); + hexagon_clear_last_irq(env, L2VIC_VID_0); + hex_interrupt_update(env); +} + +void HELPER(siad)(CPUHexagonState *env, uint32_t mask) +{ + uint32_t ipendad; + uint32_t iad; + + BQL_LOCK_GUARD(); + ipendad = READ_SREG(HEX_SREG_IPENDAD); + iad = fGET_FIELD(ipendad, IPENDAD_IAD); + fSET_FIELD(ipendad, IPENDAD_IAD, iad | mask); + arch_set_system_reg(env, HEX_SREG_IPENDAD, ipendad); + hex_interrupt_update(env); +} + +void HELPER(swi)(CPUHexagonState *env, uint32_t mask) +{ + BQL_LOCK_GUARD(); + hex_raise_interrupts(env, mask, CPU_INTERRUPT_SWI); +} + +void HELPER(cswi)(CPUHexagonState *env, uint32_t mask) +{ + BQL_LOCK_GUARD(); + hex_clear_interrupts(env, mask, CPU_INTERRUPT_SWI); +} + +void HELPER(iassignw)(CPUHexagonState *env, uint32_t src) +{ + uint32_t modectl; + uint32_t thread_enabled_mask; + CPUState *cpu; + + BQL_LOCK_GUARD(); + modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + thread_enabled_mask = GET_FIELD(MODECTL_E, modectl); + + CPU_FOREACH(cpu) { + CPUHexagonState *thread_env = &(HEXAGON_CPU(cpu)->env); + uint32_t thread_id_mask = 0x1 << thread_env->threadId; + if (thread_enabled_mask & thread_id_mask) { + uint32_t imask = arch_get_system_reg(thread_env, HEX_SREG_IMASK); + uint32_t intbitpos = (src >> 16) & 0xF; + uint32_t val = (src >> thread_env->threadId) & 0x1; + imask = deposit32(imask, intbitpos, 1, val); + arch_set_system_reg(thread_env, HEX_SREG_IMASK, imask); + + qemu_log_mask(CPU_LOG_INT, "%s: thread " TARGET_FMT_ld + ", new imask 0x%" PRIx32 "\n", __func__, + thread_env->threadId, imask); + } + } + hex_interrupt_update(env); +} + +uint32_t HELPER(iassignr)(CPUHexagonState *env, uint32_t src) +{ + uint32_t modectl; + uint32_t thread_enabled_mask; + uint32_t intbitpos; + uint32_t dest_reg; + CPUState *cpu; + + BQL_LOCK_GUARD(); + modectl = arch_get_system_reg(env, HEX_SREG_MODECTL); + thread_enabled_mask = GET_FIELD(MODECTL_E, modectl); + /* src fields are in same position as modectl, but mean different things */ + intbitpos = GET_FIELD(MODECTL_W, src); + dest_reg = 0; + CPU_FOREACH(cpu) { + CPUHexagonState *thread_env = &(HEXAGON_CPU(cpu)->env); + uint32_t thread_id_mask = 0x1 << thread_env->threadId; + if (thread_enabled_mask & thread_id_mask) { + uint32_t imask = arch_get_system_reg(thread_env, HEX_SREG_IMASK); + dest_reg |= ((imask >> intbitpos) & 0x1) << thread_env->threadId; + } + } + + return dest_reg; +} + +void HELPER(start)(CPUHexagonState *env, uint32_t imask) +{ + hexagon_start_threads(env, imask); +} + +void HELPER(stop)(CPUHexagonState *env) +{ + hexagon_stop_thread(env); +} + +static inline QEMU_ALWAYS_INLINE void resched(CPUHexagonState *env) +{ + uint32_t schedcfg; + uint32_t schedcfg_en; + int int_number; + CPUState *cs; + uint32_t lowest_th_prio = 0; /* 0 is highest prio */ + uint32_t bestwait_reg; + uint32_t best_prio; + + BQL_LOCK_GUARD(); + qemu_log_mask(CPU_LOG_INT, "%s: check resched\n", __func__); + schedcfg = arch_get_system_reg(env, HEX_SREG_SCHEDCFG); + schedcfg_en = GET_FIELD(SCHEDCFG_EN, schedcfg); + int_number = GET_FIELD(SCHEDCFG_INTNO, schedcfg); + + if (!schedcfg_en) { + return; + } + + CPU_FOREACH(cs) { + HexagonCPU *thread = HEXAGON_CPU(cs); + CPUHexagonState *thread_env = &(thread->env); + uint32_t th_prio = GET_FIELD( + STID_PRIO, arch_get_system_reg(thread_env, HEX_SREG_STID)); + if (!hexagon_thread_is_enabled(thread_env)) { + continue; + } + + lowest_th_prio = (lowest_th_prio > th_prio) + ? lowest_th_prio + : th_prio; + } + + bestwait_reg = arch_get_system_reg(env, HEX_SREG_BESTWAIT); + best_prio = GET_FIELD(BESTWAIT_PRIO, bestwait_reg); + + /* + * If the lowest priority thread is lower priority than the + * value in the BESTWAIT register, we must raise the reschedule + * interrupt on the lowest priority thread. + */ + if (lowest_th_prio > best_prio) { + qemu_log_mask(CPU_LOG_INT, + "%s: raising resched int %d, cur PC 0x" TARGET_FMT_lx "\n", + __func__, int_number, arch_get_thread_reg(env, HEX_REG_PC)); + SET_SYSTEM_FIELD(env, HEX_SREG_BESTWAIT, BESTWAIT_PRIO, 0x1ff); + hex_raise_interrupts(env, 1 << int_number, CPU_INTERRUPT_SWI); + } +} + +void HELPER(resched)(CPUHexagonState *env) +{ + resched(env); +} + +void HELPER(wait)(CPUHexagonState *env, target_ulong PC) +{ + BQL_LOCK_GUARD(); + + if (!fIN_DEBUG_MODE(env->threadId)) { + hexagon_wait_thread(env, PC); + } +} + +void HELPER(resume)(CPUHexagonState *env, uint32_t mask) +{ + BQL_LOCK_GUARD(); + hexagon_resume_threads(env, mask); +} + +uint32_t HELPER(getimask)(CPUHexagonState *env, uint32_t tid) +{ + CPUState *cs; + CPU_FOREACH(cs) { + HexagonCPU *found_cpu = HEXAGON_CPU(cs); + CPUHexagonState *found_env = &found_cpu->env; + if (found_env->threadId == tid) { + target_ulong imask = arch_get_system_reg(found_env, HEX_SREG_IMASK); + qemu_log_mask(CPU_LOG_INT, "%s: tid %d imask = 0x%x\n", + __func__, env->threadId, + (unsigned)GET_FIELD(IMASK_MASK, imask)); + return GET_FIELD(IMASK_MASK, imask); + } + } + return 0; +} + +void HELPER(setimask)(CPUHexagonState *env, uint32_t pred, uint32_t imask) +{ + CPUState *cs; + + BQL_LOCK_GUARD(); + CPU_FOREACH(cs) { + HexagonCPU *found_cpu = HEXAGON_CPU(cs); + CPUHexagonState *found_env = &found_cpu->env; + + if (pred == found_env->threadId) { + SET_SYSTEM_FIELD(found_env, HEX_SREG_IMASK, IMASK_MASK, imask); + qemu_log_mask(CPU_LOG_INT, "%s: tid %d imask 0x%x\n", + __func__, found_env->threadId, imask); + hex_interrupt_update(env); + return; + } + } + hex_interrupt_update(env); +} + +static bool handle_pmu_sreg_write(CPUHexagonState *env, uint32_t reg, + uint32_t val) +{ + if (reg == HEX_SREG_PMUSTID0 || reg == HEX_SREG_PMUSTID1 + || reg == HEX_SREG_PMUCFG || reg == HEX_SREG_PMUEVTCFG + || reg == HEX_SREG_PMUEVTCFG1 + || (reg >= HEX_SREG_PMUCNT4 && reg <= HEX_SREG_PMUCNT3)) { + qemu_log_mask(LOG_UNIMP, "PMU registers not yet implemented"); + return true; + } + return false; +} + +static void modify_syscfg(CPUHexagonState *env, uint32_t val) +{ + g_assert(bql_locked()); + + uint32_t old; + uint32_t syscfg_read_only_mask = 0x80001c00; + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + + /* clear read-only bits if they are set in the new value. */ + val &= ~syscfg_read_only_mask; + /* if read-only are currently set in syscfg keep them set. */ + val |= (syscfg & syscfg_read_only_mask); + + uint32_t tmp = val; + old = arch_get_system_reg(env, HEX_SREG_SYSCFG); + arch_set_system_reg(env, HEX_SREG_SYSCFG, tmp); + + /* Check for change in MMU enable */ + target_ulong old_mmu_enable = GET_SYSCFG_FIELD(SYSCFG_MMUEN, old); + uint8_t old_en = GET_SYSCFG_FIELD(SYSCFG_PCYCLEEN, old); + uint8_t old_gie = GET_SYSCFG_FIELD(SYSCFG_GIE, old); + target_ulong new_mmu_enable = + GET_SYSCFG_FIELD(SYSCFG_MMUEN, val); + if (new_mmu_enable && !old_mmu_enable) { + hex_mmu_on(env); + } else if (!new_mmu_enable && old_mmu_enable) { + hex_mmu_off(env); + } + + /* Changing pcycle enable from 0 to 1 resets the counters */ + uint8_t new_en = GET_SYSCFG_FIELD(SYSCFG_PCYCLEEN, val); + CPUState *cs; + if (old_en == 0 && new_en == 1) { + CPU_FOREACH(cs) { + CPUHexagonState *_env = cpu_env(cs); + _env->t_cycle_count = 0; + } + } + + /* See if global interrupts are turned on */ + uint8_t new_gie = GET_SYSCFG_FIELD(SYSCFG_GIE, val); + if (!old_gie && new_gie) { + qemu_log_mask(CPU_LOG_INT, "%s: global interrupts enabled\n", __func__); + hex_interrupt_update(env); + } + + if (qemu_loglevel_mask(LOG_UNIMP)) { + int new_v2x = GET_SYSCFG_FIELD(SYSCFG_V2X, val); + if (!new_v2x) { + qemu_log("HVX: 64 byte vector length is unsupported\n"); + } + } +} + +static uint32_t hexagon_find_last_irq(CPUHexagonState *env, uint32_t vid) +{ + int offset = (vid == HEX_SREG_VID) ? L2VIC_VID_0 : L2VIC_VID_1; + CPUState *cs = env_cpu(env); + HexagonCPU *cpu = HEXAGON_CPU(cs); + const hwaddr pend_mem = cpu->l2vic_base_addr + offset; + uint32_t irq; + cpu_physical_memory_read(pend_mem, &irq, sizeof(irq)); + return irq; +} + +static void hexagon_read_timer(CPUHexagonState *env, uint32_t *low, + uint32_t *high) +{ + CPUState *cs = env_cpu(env); + HexagonCPU *cpu = HEXAGON_CPU(cs); + const hwaddr low_addr = cpu->qtimer_base_addr + QCT_QTIMER_CNTPCT_LO; + const hwaddr high_addr = cpu->qtimer_base_addr + QCT_QTIMER_CNTPCT_HI; + + cpu_physical_memory_read(low_addr, low, sizeof(*low)); + cpu_physical_memory_read(high_addr, high, sizeof(*high)); +} + +static inline bool ssr_ce_enabled(CPUHexagonState *env) +{ + target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR); + return GET_SSR_FIELD(SSR_CE, ssr); +} + +static uint32_t creg_read(CPUHexagonState *env, uint32_t reg) +{ + uint32_t low, high; + switch (reg) { + case HEX_REG_UPCYCLELO: + return ssr_ce_enabled(env) ? hexagon_get_sys_pcycle_count_low(env) : 0; + case HEX_REG_UPCYCLEHI: + return ssr_ce_enabled(env) ? hexagon_get_sys_pcycle_count_high(env) : 0; + case HEX_REG_UTIMERLO: + hexagon_read_timer(env, &low, &high); + return low; + case HEX_REG_UTIMERHI: + hexagon_read_timer(env, &low, &high); + return high; + default: + return env->gpr[reg]; + } +} + +uint32_t HELPER(creg_read)(CPUHexagonState *env, uint32_t reg) +{ + return creg_read(env, reg); +} + +uint64_t HELPER(creg_read_pair)(CPUHexagonState *env, uint32_t reg) +{ + return (uint64_t)creg_read(env, reg) | + (((uint64_t)creg_read(env, reg + 1)) << 32); +} + + +static inline QEMU_ALWAYS_INLINE void sreg_write(CPUHexagonState *env, + uint32_t reg, uint32_t val) + +{ + g_assert(bql_locked()); + if ((reg == HEX_SREG_VID) || (reg == HEX_SREG_VID1)) { + hexagon_set_vid(env, (reg == HEX_SREG_VID) ? L2VIC_VID_0 : L2VIC_VID_1, + val); + arch_set_system_reg(env, reg, val); + } else if (reg == HEX_SREG_SYSCFG) { + modify_syscfg(env, val); + } else if (reg == HEX_SREG_IMASK) { + val = GET_FIELD(IMASK_MASK, val); + arch_set_system_reg(env, reg, val); + } else if (reg == HEX_SREG_PCYCLELO) { + hexagon_set_sys_pcycle_count_low(env, val); + } else if (reg == HEX_SREG_PCYCLEHI) { + hexagon_set_sys_pcycle_count_high(env, val); + } else if (!handle_pmu_sreg_write(env, reg, val)) { + if (reg >= HEX_SREG_GLB_START) { + arch_set_system_reg(env, reg, val); + } else { + arch_set_system_reg(env, reg, val); + } + } +} + +void HELPER(sreg_write)(CPUHexagonState *env, uint32_t reg, uint32_t val) +{ + BQL_LOCK_GUARD(); + sreg_write(env, reg, val); +} + +void hexagon_gdb_sreg_write(CPUHexagonState *env, uint32_t reg, uint32_t val) +{ + BQL_LOCK_GUARD(); + sreg_write(env, reg, val); + /* + * The above is needed to run special logic for regs like syscfg, but it + * won't set read-only bits. This will: + */ + arch_set_system_reg(env, reg, val); +} + +void HELPER(sreg_write_pair)(CPUHexagonState *env, uint32_t reg, uint64_t val) +{ + BQL_LOCK_GUARD(); + sreg_write(env, reg, val & 0xFFFFFFFF); + sreg_write(env, reg + 1, val >> 32); +} + +static inline QEMU_ALWAYS_INLINE uint32_t sreg_read(CPUHexagonState *env, + uint32_t reg) +{ + g_assert(bql_locked()); + if (reg == HEX_SREG_PMUSTID0 || reg == HEX_SREG_PMUSTID1 + || reg == HEX_SREG_PMUCFG || reg == HEX_SREG_PMUEVTCFG + || reg == HEX_SREG_PMUEVTCFG1 + || (reg >= HEX_SREG_PMUCNT4 && reg <= HEX_SREG_PMUCNT3)) { + qemu_log_mask(LOG_UNIMP, "PMU registers not yet implemented"); + return 0; + } + if ((reg == HEX_SREG_VID) || (reg == HEX_SREG_VID1)) { + const uint32_t vid = hexagon_find_last_irq(env, reg); + arch_set_system_reg(env, reg, vid); + } else if ((reg == HEX_SREG_TIMERLO) || (reg == HEX_SREG_TIMERHI)) { + uint32_t low = 0; + uint32_t high = 0; + hexagon_read_timer(env, &low, &high); + arch_set_system_reg(env, HEX_SREG_TIMERLO, low); + arch_set_system_reg(env, HEX_SREG_TIMERHI, high); + } else if (reg == HEX_SREG_BADVA) { + target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR); + if (GET_SSR_FIELD(SSR_BVS, ssr)) { + return arch_get_system_reg(env, HEX_SREG_BADVA1); + } + return arch_get_system_reg(env, HEX_SREG_BADVA0); + } + return arch_get_system_reg(env, reg); +} + +uint32_t HELPER(sreg_read)(CPUHexagonState *env, uint32_t reg) +{ + BQL_LOCK_GUARD(); + return sreg_read(env, reg); +} + +uint32_t hexagon_sreg_read(CPUHexagonState *env, uint32_t reg) +{ + return sreg_read(env, reg); +} + +uint64_t HELPER(sreg_read_pair)(CPUHexagonState *env, uint32_t reg) +{ + BQL_LOCK_GUARD(); + if (reg == HEX_SREG_TIMERLO) { + uint32_t low = 0; + uint32_t high = 0; + hexagon_read_timer(env, &low, &high); + arch_set_system_reg(env, HEX_SREG_TIMERLO, low); + arch_set_system_reg(env, HEX_SREG_TIMERHI, high); + } else if (reg == HEX_SREG_PCYCLELO) { + return hexagon_get_sys_pcycle_count(env); + } + return (uint64_t)sreg_read(env, reg) | + (((uint64_t)sreg_read(env, reg + 1)) << 32); +} + +uint32_t HELPER(greg_read)(CPUHexagonState *env, uint32_t reg) + +{ + return hexagon_greg_read(env, reg); +} + +uint64_t HELPER(greg_read_pair)(CPUHexagonState *env, uint32_t reg) + +{ + if (reg == HEX_GREG_G0 || reg == HEX_GREG_G2) { + return (uint64_t)(env->greg[reg]) | + (((uint64_t)(env->greg[reg + 1])) << 32); + } + switch (reg) { + case HEX_GREG_GPCYCLELO: { + target_ulong ssr = arch_get_system_reg(env, HEX_SREG_SSR); + int ssr_ce = GET_SSR_FIELD(SSR_CE, ssr); + return ssr_ce ? hexagon_get_sys_pcycle_count(env) : 0; + } + default: + return (uint64_t)hexagon_greg_read(env, reg) | + ((uint64_t)(hexagon_greg_read(env, reg + 1)) << 32); + } +} + +void HELPER(setprio)(CPUHexagonState *env, uint32_t thread, uint32_t prio) +{ + CPUState *cs; + + BQL_LOCK_GUARD(); + CPU_FOREACH(cs) { + HexagonCPU *found_cpu = HEXAGON_CPU(cs); + CPUHexagonState *found_env = &found_cpu->env; + if (thread == found_env->threadId) { + SET_SYSTEM_FIELD(found_env, HEX_SREG_STID, STID_PRIO, prio); + qemu_log_mask(CPU_LOG_INT, "%s: tid %d prio = 0x%x\n", + __func__, found_env->threadId, prio); + resched(env); + return; + } + } + g_assert_not_reached(); +} + +void HELPER(nmi)(CPUHexagonState *env, uint32_t thread_mask) +{ + g_assert_not_reached(); +} + +void HELPER(pending_interrupt)(CPUHexagonState *env) +{ + BQL_LOCK_GUARD(); + hex_interrupt_update(env); +} +#endif + +#ifdef CONFIG_USER_ONLY +uint32_t HELPER(creg_read)(CPUHexagonState *env, uint32_t reg) +{ + /* These are handled directly by gen_read_ctrl_reg(). */ + g_assert(reg != HEX_REG_UPCYCLELO && reg != HEX_REG_UPCYCLEHI); + + if (reg == HEX_REG_UTIMERHI) { + return cpu_get_host_ticks() >> 32; + } else if (reg == HEX_REG_UTIMERLO) { + return extract32(cpu_get_host_ticks(), 0, 32); + } + return 0; +} + +uint64_t HELPER(creg_read_pair)(CPUHexagonState *env, uint32_t reg) +{ + if (reg == HEX_REG_UPCYCLELO) { + /* Pretend SSR[CE] is always set. */ + return hexagon_get_sys_pcycle_count(env); + } + if (reg == HEX_REG_UTIMERLO) { + return cpu_get_host_ticks(); + } + return 0; +} +#endif + + /* These macros can be referenced in the generated helper functions */ #define warn(...) /* Nothing */ #define fatal(...) g_assert_not_reached(); @@ -1321,4 +2013,5 @@ void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV) #define BOGUS_HELPER(tag) \ printf("ERROR: bogus helper: " #tag "\n") +#include "mmvec/kvx_ieee.h" #include "helper_funcs_generated.c.inc" diff --git a/target/hexagon/printinsn.c b/target/hexagon/printinsn.c index 4865cdd133b5..f780092586cf 100644 --- a/target/hexagon/printinsn.c +++ b/target/hexagon/printinsn.c @@ -24,16 +24,17 @@ static const char *sreg2str(unsigned int reg) { - if (reg < TOTAL_PER_THREAD_REGS) { - return hexagon_regnames[reg]; - } else { - return "???"; +#ifndef CONFIG_USER_ONLY + if (reg < NUM_SREGS) { + return hexagon_sregnames[reg]; } +#endif + return "???"; } static const char *creg2str(unsigned int reg) { - return sreg2str(reg + HEX_REG_SA0); + return hexagon_regnames[reg + HEX_REG_SA0]; } static void snprintinsn(GString *buf, Insn *insn) diff --git a/target/hexagon/reg_fields_def.h.inc b/target/hexagon/reg_fields_def.h.inc index f2a58d486c55..50b8c26f8bfa 100644 --- a/target/hexagon/reg_fields_def.h.inc +++ b/target/hexagon/reg_fields_def.h.inc @@ -39,3 +39,110 @@ DEF_REG_FIELD(USR_FPDBZE, 26, 1) DEF_REG_FIELD(USR_FPOVFE, 27, 1) DEF_REG_FIELD(USR_FPUNFE, 28, 1) DEF_REG_FIELD(USR_FPINPE, 29, 1) + +DEF_REG_FIELD(IPENDAD_IAD, 16, 16) +DEF_REG_FIELD(IPENDAD_IPEND, 0, 16) + +DEF_REG_FIELD(SCHEDCFG_EN, 8, 1) +DEF_REG_FIELD(SCHEDCFG_INTNO, 0, 4) +DEF_REG_FIELD(BESTWAIT_PRIO, 0, 10) + + +/* PTE (aka TLB entry) fields */ +DEF_REG_FIELD(PTE_PPD, 0, 24) +DEF_REG_FIELD(PTE_C, 24, 4) +DEF_REG_FIELD(PTE_U, 28, 1) +DEF_REG_FIELD(PTE_R, 29, 1) +DEF_REG_FIELD(PTE_W, 30, 1) +DEF_REG_FIELD(PTE_X, 31, 1) +DEF_REG_FIELD(PTE_VPN, 32, 20) +DEF_REG_FIELD(PTE_ASID, 52, 7) +DEF_REG_FIELD(PTE_ATR0, 59, 1) +DEF_REG_FIELD(PTE_ATR1, 60, 1) +DEF_REG_FIELD(PTE_PA35, 61, 1) +DEF_REG_FIELD(PTE_G, 62, 1) +DEF_REG_FIELD(PTE_V, 63, 1) + +/* SYSCFG fields */ +DEF_REG_FIELD(SYSCFG_MMUEN, 0, 1) +DEF_REG_FIELD(SYSCFG_ICEN, 1, 1) +DEF_REG_FIELD(SYSCFG_DCEN, 2, 1) +DEF_REG_FIELD(SYSCFG_ISDBTRUSTED, 3, 1) +DEF_REG_FIELD(SYSCFG_GIE, 4, 1) +DEF_REG_FIELD(SYSCFG_ISDBREADY, 5, 1) +DEF_REG_FIELD(SYSCFG_PCYCLEEN, 6, 1) +DEF_REG_FIELD(SYSCFG_V2X, 7, 1) +DEF_REG_FIELD(SYSCFG_IGNOREDABORT, 8, 1) +DEF_REG_FIELD(SYSCFG_PM, 9, 1) +DEF_REG_FIELD(SYSCFG_TLBLOCK, 11, 1) +DEF_REG_FIELD(SYSCFG_K0LOCK, 12, 1) +DEF_REG_FIELD(SYSCFG_BQ, 13, 1) +DEF_REG_FIELD(SYSCFG_PRIO, 14, 1) +DEF_REG_FIELD(SYSCFG_DMT, 15, 1) +DEF_REG_FIELD(SYSCFG_L2CFG, 16, 3) +DEF_REG_FIELD(SYSCFG_ITCM, 19, 1) +DEF_REG_FIELD(SYSCFG_L2NWA, 21, 1) +DEF_REG_FIELD(SYSCFG_L2NRA, 22, 1) +DEF_REG_FIELD(SYSCFG_L2WB, 23, 1) +DEF_REG_FIELD(SYSCFG_L2P, 24, 1) +DEF_REG_FIELD(SYSCFG_SLVCTL0, 25, 2) +DEF_REG_FIELD(SYSCFG_SLVCTL1, 27, 2) +DEF_REG_FIELD(SYSCFG_L2PARTSIZE, 29, 2) +DEF_REG_FIELD(SYSCFG_L2GCA, 31, 1) + +/* SSR fields */ +DEF_REG_FIELD(SSR_CAUSE, 0, 8) +DEF_REG_FIELD(SSR_ASID, 8, 7) +DEF_REG_FIELD(SSR_UM, 16, 1) +DEF_REG_FIELD(SSR_EX, 17, 1) +DEF_REG_FIELD(SSR_IE, 18, 1) +DEF_REG_FIELD(SSR_GM, 19, 1) +DEF_REG_FIELD(SSR_V0, 20, 1) +DEF_REG_FIELD(SSR_V1, 21, 1) +DEF_REG_FIELD(SSR_BVS, 22, 1) +DEF_REG_FIELD(SSR_CE, 23, 1) +DEF_REG_FIELD(SSR_PE, 24, 1) +DEF_REG_FIELD(SSR_BP, 25, 1) +DEF_REG_FIELD(SSR_XE2, 26, 1) +DEF_REG_FIELD(SSR_XA, 27, 3) +DEF_REG_FIELD(SSR_SS, 30, 1) +DEF_REG_FIELD(SSR_XE, 31, 1) + +/* misc registers */ +DEF_REG_FIELD(IMASK_MASK, 0, 16) + +DEF_REG_FIELD(STID_PRIO, 16, 8) +DEF_REG_FIELD(STID_STID, 0, 8) + +/* MODECTL fields */ +DEF_REG_FIELD(MODECTL_E, 0, 8) +DEF_REG_FIELD(MODECTL_W, 16, 8) + +DEF_REG_FIELD(CCR_L1ICP, 0, 2) +DEF_REG_FIELD(CCR_L1DCP, 3, 2) +DEF_REG_FIELD(CCR_L2CP, 6, 2) + +DEF_REG_FIELD(CCR_HFI, 16, 1) +DEF_REG_FIELD(CCR_HFD, 17, 1) +DEF_REG_FIELD(CCR_HFIL2, 18, 1) +DEF_REG_FIELD(CCR_HFDL2, 19, 1) +DEF_REG_FIELD(CCR_SFD, 20, 1) + +DEF_REG_FIELD(CCR_GIE, 24, 1) +DEF_REG_FIELD(CCR_GTE, 25, 1) +DEF_REG_FIELD(CCR_GEE, 26, 1) +DEF_REG_FIELD(CCR_GRE, 27, 1) +DEF_REG_FIELD(CCR_VV1, 29, 1) +DEF_REG_FIELD(CCR_VV2, 30, 1) +DEF_REG_FIELD(CCR_VV3, 31, 1) + +/* ISDB ST fields */ +DEF_REG_FIELD(ISDBST_WAITRUN, 24, 8) +DEF_REG_FIELD(ISDBST_ONOFF, 16, 8) +DEF_REG_FIELD(ISDBST_DEBUGMODE, 8, 8) +DEF_REG_FIELD(ISDBST_STUFFSTATUS, 5, 1) +DEF_REG_FIELD(ISDBST_CMDSTATUS, 4, 1) +DEF_REG_FIELD(ISDBST_PROCMODE, 3, 1) +DEF_REG_FIELD(ISDBST_MBXINSTATUS, 2, 1) +DEF_REG_FIELD(ISDBST_MBXOUTSTATUS, 1, 1) +DEF_REG_FIELD(ISDBST_READY, 0, 1) diff --git a/target/hexagon/sys_macros.h b/target/hexagon/sys_macros.h new file mode 100644 index 000000000000..e5dc1ce0ab9f --- /dev/null +++ b/target/hexagon/sys_macros.h @@ -0,0 +1,238 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HEXAGON_SYS_MACROS_H +#define HEXAGON_SYS_MACROS_H + +/* + * Macro definitions for Hexagon system mode + */ + +#ifndef CONFIG_USER_ONLY + +#define READ_SREG(NUM) arch_get_system_reg(env, NUM) +#define READ_SGP0() arch_get_system_reg(env, HEX_SREG_SGP0) +#define READ_SGP1() arch_get_system_reg(env, HEX_SREG_SGP1) +#define READ_SGP10() ((uint64_t)arch_get_system_reg(env, HEX_SREG_SGP0) | \ + ((uint64_t)arch_get_system_reg(env, HEX_SREG_SGP1) << 32)) + +#define WRITE_SREG(NUM, VAL) log_sreg_write(env, NUM, VAL, slot) +#define WRITE_SGP0(VAL) log_sreg_write(env, HEX_SREG_SGP0, VAL, slot) +#define WRITE_SGP1(VAL) log_sreg_write(env, HEX_SREG_SGP1, VAL, slot) +#define WRITE_SGP10(VAL) \ + do { \ + log_sreg_write(env, HEX_SREG_SGP0, (VAL) & 0xFFFFFFFF, slot); \ + log_sreg_write(env, HEX_SREG_SGP1, (VAL) >> 32, slot); \ + } while (0) + +#ifdef QEMU_GENERATE +#define GET_SSR_FIELD(RES, FIELD) \ + GET_FIELD(RES, FIELD, hex_t_sreg[HEX_SREG_SSR]) +#else + +#define GET_SSR_FIELD(FIELD, REGIN) \ + (uint32_t)GET_FIELD(FIELD, REGIN) +#define GET_SYSCFG_FIELD(FIELD, REGIN) \ + (uint32_t)GET_FIELD(FIELD, REGIN) +#define SET_SYSTEM_FIELD(ENV, REG, FIELD, VAL) \ + do { \ + uint32_t regval = arch_get_system_reg(ENV, REG); \ + fINSERT_BITS(regval, reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset, (VAL)); \ + arch_set_system_reg(ENV, REG, regval); \ + } while (0) +#define SET_SSR_FIELD(ENV, FIELD, VAL) \ + SET_SYSTEM_FIELD(ENV, HEX_SREG_SSR, FIELD, VAL) +#define SET_SYSCFG_FIELD(ENV, FIELD, VAL) \ + SET_SYSTEM_FIELD(ENV, HEX_SREG_SYSCFG, FIELD, VAL) + +#define CCR_FIELD_SET(ENV, FIELD) \ + (!!GET_FIELD(FIELD, arch_get_system_reg(ENV, HEX_SREG_CCR))) + +/* + * Direct-to-guest is not implemented yet, continuing would cause unexpected + * behavior, so we abort. + */ +#define ASSERT_DIRECT_TO_GUEST_UNSET(ENV, EXCP) \ + do { \ + switch (EXCP) { \ + case HEX_EVENT_TRAP0: \ + g_assert(!CCR_FIELD_SET(ENV, CCR_GTE)); \ + break; \ + case HEX_EVENT_IMPRECISE: \ + case HEX_EVENT_PRECISE: \ + case HEX_EVENT_FPTRAP: \ + g_assert(!CCR_FIELD_SET(ENV, CCR_GEE)); \ + break; \ + default: \ + if ((EXCP) >= HEX_EVENT_INT0) { \ + g_assert(!CCR_FIELD_SET(ENV, CCR_GIE)); \ + } \ + break; \ + } \ + } while (0) +#endif + +#define fREAD_ELR() (READ_SREG(HEX_SREG_ELR)) + +#define fLOAD_PHYS(NUM, SIZE, SIGN, SRC1, SRC2, DST) { \ + const uintptr_t rs = ((unsigned long)(unsigned)(SRC1)) & 0x7ff; \ + const uintptr_t rt = ((unsigned long)(unsigned)(SRC2)) << 11; \ + const uintptr_t addr = rs + rt; \ + cpu_physical_memory_read(addr, &DST, sizeof(uint32_t)); \ +} + +#define fPOW2_HELP_ROUNDUP(VAL) \ + ((VAL) | \ + ((VAL) >> 1) | \ + ((VAL) >> 2) | \ + ((VAL) >> 4) | \ + ((VAL) >> 8) | \ + ((VAL) >> 16)) +#define fPOW2_ROUNDUP(VAL) (fPOW2_HELP_ROUNDUP((VAL) - 1) + 1) + +#define fFRAMECHECK(ADDR, EA) g_assert_not_reached(); + +#define fTRAP(TRAPTYPE, IMM) \ + register_trap_exception(env, TRAPTYPE, IMM, PC) + +#define fVIRTINSN_SPSWAP(IMM, REG) +#define fVIRTINSN_GETIE(IMM, REG) { REG = 0xdeafbeef; } +#define fVIRTINSN_SETIE(IMM, REG) +#define fVIRTINSN_RTE(IMM, REG) +#define fGRE_ENABLED() GET_FIELD(CCR_GRE, READ_SREG(HEX_SREG_CCR)) +#define fTRAP1_VIRTINSN(IMM) \ + (fGRE_ENABLED() && \ + (((IMM) == 1) || ((IMM) == 3) || ((IMM) == 4) || ((IMM) == 6))) + +/* Not modeled in qemu */ + +#define MARK_LATE_PRED_WRITE(RNUM) +#define fICINVIDX(REG) +#define fICKILL() +#define fDCKILL() +#define fL2KILL() +#define fL2UNLOCK() +#define fL2CLEAN() +#define fL2CLEANINV() +#define fL2CLEANPA(REG) +#define fL2CLEANINVPA(REG) +#define fL2CLEANINVIDX(REG) +#define fL2CLEANIDX(REG) +#define fL2INVIDX(REG) +#define fL2TAGR(INDEX, DST, DSTREG) +#define fL2UNLOCKA(VA) ((void) VA) +#define fL2TAGW(INDEX, PART2) +#define fDCCLEANIDX(REG) +#define fDCCLEANINVIDX(REG) + +/* Always succeed: */ +#define fL2LOCKA(EA, PDV, PDN) ((void) EA, PDV = 0xFF) +#define fCLEAR_RTE_EX() \ + do { \ + uint32_t tmp = 0; \ + tmp = arch_get_system_reg(env, HEX_SREG_SSR); \ + fINSERT_BITS(tmp, reg_field_info[SSR_EX].width, \ + reg_field_info[SSR_EX].offset, 0); \ + log_sreg_write(env, HEX_SREG_SSR, tmp, slot); \ + } while (0) + +#define fDCINVIDX(REG) +#define fDCINVA(REG) do { REG = REG; } while (0) /* Nothing to do in qemu */ + +#define fSET_TLB_LOCK() hex_tlb_lock(env); +#define fCLEAR_TLB_LOCK() hex_tlb_unlock(env); + +#define fSET_K0_LOCK() hex_k0_lock(env); +#define fCLEAR_K0_LOCK() hex_k0_unlock(env); + +#define fTLB_IDXMASK(INDEX) \ + ((INDEX) & (fPOW2_ROUNDUP(fCAST4u(env_archcpu(env)->num_tlbs)) - 1)) + +#define fTLB_NONPOW2WRAP(INDEX) \ + (((INDEX) >= env_archcpu(env)->num_tlbs) ? \ + ((INDEX) - env_archcpu(env)->num_tlbs) : \ + (INDEX)) + + +#define fTLBW(INDEX, VALUE) \ + hex_tlbw(env, (INDEX), (VALUE)) +#define fTLBW_EXTENDED(INDEX, VALUE) \ + hex_tlbw(env, (INDEX), (VALUE)) +#define fTLB_ENTRY_OVERLAP(VALUE) \ + (hex_tlb_check_overlap(env, VALUE, -1) != -2) +#define fTLB_ENTRY_OVERLAP_IDX(VALUE) \ + hex_tlb_check_overlap(env, VALUE, -1) +#define fTLBR(INDEX) \ + (env->hex_tlb->entries[fTLB_NONPOW2WRAP(fTLB_IDXMASK(INDEX))]) +#define fTLBR_EXTENDED(INDEX) \ + (env->hex_tlb->entries[fTLB_NONPOW2WRAP(fTLB_IDXMASK(INDEX))]) +#define fTLBP(TLBHI) \ + hex_tlb_lookup(env, ((TLBHI) >> 12), ((TLBHI) << 12)) +#define iic_flush_cache(p) + +#define fIN_DEBUG_MODE(TNUM) \ + ((GET_FIELD(ISDBST_DEBUGMODE, arch_get_system_reg(env, HEX_SREG_ISDBST)) \ + & (0x1 << (TNUM))) != 0) + +#define fIN_DEBUG_MODE_NO_ISDB(TNUM) false +#define fIN_DEBUG_MODE_WARN(TNUM) false + +#ifdef QEMU_GENERATE + +/* + * Read tags back as zero for now: + * + * tag value in RD[31:10] for 32k, RD[31:9] for 16k + */ +#define fICTAGR(RS, RD, RD2) \ + do { \ + RD = ctx->zero; \ + } while (0) +#define fICTAGW(RS, RD) +#define fICDATAR(RS, RD) \ + do { \ + RD = ctx->zero; \ + } while (0) +#define fICDATAW(RS, RD) + +#define fDCTAGW(RS, RT) +/* tag: RD[23:0], state: RD[30:29] */ +#define fDCTAGR(INDEX, DST, DST_REG_NUM) \ + do { \ + DST = ctx->zero; \ + } while (0) +#else + +/* + * Read tags back as zero for now: + * + * tag value in RD[31:10] for 32k, RD[31:9] for 16k + */ +#define fICTAGR(RS, RD, RD2) \ + do { \ + RD = 0x00; \ + } while (0) +#define fICTAGW(RS, RD) +#define fICDATAR(RS, RD) \ + do { \ + RD = 0x00; \ + } while (0) +#define fICDATAW(RS, RD) + +#define fDCTAGW(RS, RT) +/* tag: RD[23:0], state: RD[30:29] */ +#define fDCTAGR(INDEX, DST, DST_REG_NUM) \ + do { \ + DST = HEX_DC_STATE_INVALID | 0x00; \ + } while (0) +#endif + +#endif + +#define NUM_TLB_REGS(x) (env_archcpu(env)->num_tlbs) + +#endif diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index fe7858703c8c..35765d48ba11 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -49,6 +49,7 @@ static const AnalyzeInsn opcode_analyze[XX_LAST_OPCODE] = { TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; TCGv hex_pred[NUM_PREGS]; TCGv hex_slot_cancelled; +TCGv hex_next_PC; TCGv hex_new_value_usr; TCGv hex_store_addr[STORES_MAX]; TCGv hex_store_width[STORES_MAX]; @@ -57,9 +58,19 @@ TCGv_i64 hex_store_val64[STORES_MAX]; TCGv hex_llsc_addr; TCGv hex_llsc_val; TCGv_i64 hex_llsc_val_i64; +TCGv_i64 hex_cycle_count; TCGv hex_vstore_addr[VSTORES_MAX]; TCGv hex_vstore_size[VSTORES_MAX]; TCGv hex_vstore_pending[VSTORES_MAX]; +static bool need_next_PC(DisasContext *ctx); + +#ifndef CONFIG_USER_ONLY +TCGv hex_greg[NUM_GREGS]; +TCGv hex_t_sreg[NUM_SREGS]; +TCGv_ptr hex_g_sreg_ptr; +TCGv hex_g_sreg[NUM_SREGS]; +TCGv hex_cause_code; +#endif static const char * const hexagon_prednames[] = { "p0", "p1", "p2", "p3" @@ -113,11 +124,28 @@ intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum, return offset; } -static void gen_exception_raw(int excp) +static void gen_exception(int excp, target_ulong PC) +{ + gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp), + tcg_constant_tl(PC)); +} + +#ifndef CONFIG_USER_ONLY +static inline void gen_precise_exception(int excp, target_ulong PC) { - gen_helper_raise_exception(tcg_env, tcg_constant_i32(excp)); + tcg_gen_movi_tl(hex_cause_code, excp); + gen_exception(HEX_EVENT_PRECISE, PC); } +static inline void gen_pcycle_counters(DisasContext *ctx) +{ + if (ctx->pcycle_enabled) { + tcg_gen_addi_i64(hex_cycle_count, hex_cycle_count, ctx->num_cycles); + ctx->num_cycles = 0; + } +} +#endif + static void gen_exec_counters(DisasContext *ctx) { tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_PKT_CNT], @@ -126,6 +154,10 @@ static void gen_exec_counters(DisasContext *ctx) hex_gpr[HEX_REG_QEMU_INSN_CNT], ctx->num_insns); tcg_gen_addi_tl(hex_gpr[HEX_REG_QEMU_HVX_CNT], hex_gpr[HEX_REG_QEMU_HVX_CNT], ctx->num_hvx_insns); + +#ifndef CONFIG_USER_ONLY + gen_pcycle_counters(ctx); +#endif } static bool use_goto_tb(DisasContext *ctx, target_ulong dest) @@ -156,6 +188,9 @@ static void gen_end_tb(DisasContext *ctx) gen_exec_counters(ctx); + if (ctx->need_next_pc) { + tcg_gen_mov_tl(hex_gpr[HEX_REG_PC], hex_next_PC); + } if (ctx->branch_cond != TCG_COND_NEVER) { if (ctx->branch_cond != TCG_COND_ALWAYS) { TCGLabel *skip = gen_new_label(); @@ -185,13 +220,14 @@ static void gen_end_tb(DisasContext *ctx) ctx->base.is_jmp = DISAS_NORETURN; } -static void gen_exception_end_tb(DisasContext *ctx, int excp) +void hex_gen_exception_end_tb(DisasContext *ctx, int excp) { - gen_exec_counters(ctx); - tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], ctx->next_PC); - gen_exception_raw(excp); +#ifdef CONFIG_USER_ONLY + gen_exception(excp, ctx->pkt->pc); +#else + gen_precise_exception(excp, ctx->pkt->pc); +#endif ctx->base.is_jmp = DISAS_NORETURN; - } static int read_packet_words(CPUHexagonState *env, DisasContext *ctx, @@ -232,6 +268,18 @@ static bool check_for_attrib(Packet *pkt, int attrib) return false; } +#ifndef CONFIG_USER_ONLY +static bool check_for_opcode(Packet *pkt, uint16_t opcode) +{ + for (int i = 0; i < pkt->num_insns; i++) { + if (pkt->insn[i].opcode == opcode) { + return true; + } + } + return false; +} +#endif + static bool need_slot_cancelled(Packet *pkt) { /* We only need slot_cancelled for conditional store instructions */ @@ -245,21 +293,111 @@ static bool need_slot_cancelled(Packet *pkt) return false; } -static bool need_next_PC(DisasContext *ctx) +#ifndef CONFIG_USER_ONLY +static bool sreg_write_to_global(int reg_num) { - Packet *pkt = ctx->pkt; + return reg_num == HEX_SREG_SSR || + reg_num == HEX_SREG_STID || + reg_num == HEX_SREG_IMASK || + reg_num == HEX_SREG_IPENDAD || + reg_num == HEX_SREG_BESTWAIT || + reg_num == HEX_SREG_SCHEDCFG; +} - /* Check for conditional control flow or HW loop end */ +static bool has_sreg_write_to_global(Packet const *pkt) +{ for (int i = 0; i < pkt->num_insns; i++) { - uint16_t opcode = pkt->insn[i].opcode; - if (GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) { - return true; + Insn const *insn = &pkt->insn[i]; + uint16_t opcode = insn->opcode; + if (opcode == Y2_tfrsrcr) { + /* Write to a single sreg */ + int reg_num = insn->regno[0]; + if (sreg_write_to_global(reg_num)) { + return true; + } + } else if (opcode == Y4_tfrspcp) { + /* Write to a sreg pair */ + int reg_num = insn->regno[0]; + if (sreg_write_to_global(reg_num)) { + return true; + } + if (sreg_write_to_global(reg_num + 1)) { + return true; + } } - if (GET_ATTRIB(opcode, A_HWLOOP0_END) || - GET_ATTRIB(opcode, A_HWLOOP1_END)) { - return true; + } + return false; +} +#endif + +static bool pkt_ends_tb(Packet *pkt) +{ + if (pkt->pkt_has_cof) { + return true; + } +#ifndef CONFIG_USER_ONLY + /* System mode instructions that end TLB */ + if (check_for_opcode(pkt, Y2_swi) || + check_for_opcode(pkt, Y2_cswi) || + check_for_opcode(pkt, Y2_ciad) || + check_for_opcode(pkt, Y4_siad) || + check_for_opcode(pkt, Y2_wait) || + check_for_opcode(pkt, Y2_resume) || + check_for_opcode(pkt, Y2_iassignw) || + check_for_opcode(pkt, Y2_setimask) || + check_for_opcode(pkt, Y4_nmi) || + check_for_opcode(pkt, Y2_setprio) || + check_for_opcode(pkt, Y2_start) || + check_for_opcode(pkt, Y2_stop) || + check_for_opcode(pkt, Y2_k0lock) || + check_for_opcode(pkt, Y2_k0unlock) || + check_for_opcode(pkt, Y2_tlblock) || + check_for_opcode(pkt, Y2_tlbunlock) || + check_for_opcode(pkt, Y2_break) || + check_for_opcode(pkt, Y2_isync) || + check_for_opcode(pkt, Y2_syncht) || + check_for_opcode(pkt, Y2_tlbp) || + check_for_opcode(pkt, Y2_tlbw) || + check_for_opcode(pkt, Y5_ctlbw) || + check_for_opcode(pkt, Y5_tlbasidi)) { + return true; + } + + /* + * Check for sreg writes that would end the TB + */ + if (check_for_attrib(pkt, A_IMPLICIT_WRITES_SSR)) { + return true; + } + if (has_sreg_write_to_global(pkt)) { + return true; + } +#endif + return false; +} + + +static bool need_next_PC(DisasContext *ctx) +{ + Packet *pkt = ctx->pkt; + if (pkt->pkt_has_cof || ctx->pkt_ends_tb) { + for (int i = 0; i < pkt->num_insns; i++) { + uint16_t opcode = pkt->insn[i].opcode; + if ((GET_ATTRIB(opcode, A_CONDEXEC) && GET_ATTRIB(opcode, A_COF)) || + GET_ATTRIB(opcode, A_HWLOOP0_END) || + GET_ATTRIB(opcode, A_HWLOOP1_END)) { + return true; + } } } + /* + * We end the TB on some instructions that do not change the flow (for + * other reasons). In these cases, we must set pc too, as the insn won't + * do it themselves. + */ + if (ctx->pkt_ends_tb && !check_for_attrib(pkt, A_COF)) { + return true; + } return false; } @@ -291,6 +429,16 @@ static void mark_implicit_reg_write(DisasContext *ctx, int attrib, int rnum) } } +#ifndef CONFIG_USER_ONLY +static void mark_implicit_sreg_write(DisasContext *ctx, int attrib, int snum) +{ + uint16_t opcode = ctx->insn->opcode; + if (GET_ATTRIB(opcode, attrib)) { + ctx_log_sreg_write(ctx, snum); + } +} +#endif + static void mark_implicit_reg_writes(DisasContext *ctx) { mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_FP, HEX_REG_FP); @@ -302,6 +450,12 @@ static void mark_implicit_reg_writes(DisasContext *ctx) mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_SA1, HEX_REG_SA1); mark_implicit_reg_write(ctx, A_IMPLICIT_WRITES_USR, HEX_REG_USR); mark_implicit_reg_write(ctx, A_FPOP, HEX_REG_USR); + +#ifndef CONFIG_USER_ONLY + mark_implicit_sreg_write(ctx, A_IMPLICIT_WRITES_SGP0, HEX_SREG_SGP0); + mark_implicit_sreg_write(ctx, A_IMPLICIT_WRITES_SGP1, HEX_SREG_SGP1); + mark_implicit_sreg_write(ctx, A_IMPLICIT_WRITES_SSR, HEX_SREG_SSR); +#endif } static void mark_implicit_pred_write(DisasContext *ctx, int attrib, int pnum) @@ -400,7 +554,14 @@ static void analyze_packet(DisasContext *ctx) static void gen_start_packet(DisasContext *ctx) { Packet *pkt = ctx->pkt; +#ifndef CONFIG_USER_ONLY + target_ulong next_PC = (check_for_opcode(pkt, Y2_k0lock) || + check_for_opcode(pkt, Y2_tlblock)) ? + ctx->base.pc_next : + ctx->base.pc_next + pkt->encod_pkt_size_in_bytes; +#else target_ulong next_PC = ctx->base.pc_next + pkt->encod_pkt_size_in_bytes; +#endif int i; /* Clear out the disassembly context */ @@ -408,6 +569,10 @@ static void gen_start_packet(DisasContext *ctx) ctx->reg_log_idx = 0; bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); +#ifndef CONFIG_USER_ONLY + ctx->greg_log_idx = 0; + ctx->sreg_log_idx = 0; +#endif ctx->preg_log_idx = 0; bitmap_zero(ctx->pregs_written, NUM_PREGS); ctx->future_vregs_idx = 0; @@ -440,6 +605,25 @@ static void gen_start_packet(DisasContext *ctx) * gen phase, so clear it again. */ bitmap_zero(ctx->pregs_written, NUM_PREGS); +#ifndef CONFIG_USER_ONLY + for (i = 0; i < NUM_SREGS; i++) { + ctx->t_sreg_new_value[i] = NULL; + } + for (i = 0; i < ctx->sreg_log_idx; i++) { + int reg_num = ctx->sreg_log[i]; + if (reg_num < HEX_SREG_GLB_START) { + ctx->t_sreg_new_value[reg_num] = tcg_temp_new(); + tcg_gen_mov_tl(ctx->t_sreg_new_value[reg_num], hex_t_sreg[reg_num]); + } + } + for (i = 0; i < NUM_GREGS; i++) { + ctx->greg_new_value[i] = NULL; + } + for (i = 0; i < ctx->greg_log_idx; i++) { + int reg_num = ctx->greg_log[i]; + ctx->greg_new_value[reg_num] = tcg_temp_new(); + } +#endif /* Initialize the runtime state for packet semantics */ if (need_slot_cancelled(pkt)) { @@ -448,12 +632,14 @@ static void gen_start_packet(DisasContext *ctx) ctx->branch_taken = NULL; if (pkt->pkt_has_cof) { ctx->branch_taken = tcg_temp_new(); - if (pkt->pkt_has_multi_cof) { - tcg_gen_movi_tl(ctx->branch_taken, 0); - } - if (need_next_PC(ctx)) { - tcg_gen_movi_tl(hex_gpr[HEX_REG_PC], next_PC); - } + } + if (pkt->pkt_has_multi_cof) { + tcg_gen_movi_tl(ctx->branch_taken, 0); + } + ctx->pkt_ends_tb = pkt_ends_tb(pkt); + ctx->need_next_pc = need_next_PC(ctx); + if (ctx->need_next_pc) { + tcg_gen_movi_tl(hex_next_PC, next_PC); } /* Preload the predicated registers into get_result_gpr(ctx, i) */ @@ -558,7 +744,7 @@ static void gen_insn(DisasContext *ctx) ctx->insn->generate(ctx); mark_store_width(ctx); } else { - gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_OPCODE); + hex_gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_OPCODE); } } @@ -589,6 +775,59 @@ static void gen_reg_writes(DisasContext *ctx) } } +#ifndef CONFIG_USER_ONLY +static void gen_greg_writes(DisasContext *ctx) +{ + int i; + + for (i = 0; i < ctx->greg_log_idx; i++) { + int reg_num = ctx->greg_log[i]; + + tcg_gen_mov_tl(hex_greg[reg_num], ctx->greg_new_value[reg_num]); + } +} + + +static void gen_sreg_writes(DisasContext *ctx) +{ + int i; + + TCGv old_reg = tcg_temp_new(); + for (i = 0; i < ctx->sreg_log_idx; i++) { + int reg_num = ctx->sreg_log[i]; + + if (reg_num == HEX_SREG_SSR) { + tcg_gen_mov_tl(old_reg, hex_t_sreg[reg_num]); + tcg_gen_mov_tl(hex_t_sreg[reg_num], ctx->t_sreg_new_value[reg_num]); + gen_helper_modify_ssr(tcg_env, ctx->t_sreg_new_value[reg_num], + old_reg); + /* This can change processor state, so end the TB */ + ctx->base.is_jmp = DISAS_NORETURN; + } else if ((reg_num == HEX_SREG_STID) || + (reg_num == HEX_SREG_IMASK) || + (reg_num == HEX_SREG_IPENDAD)) { + if (reg_num < HEX_SREG_GLB_START) { + tcg_gen_mov_tl(old_reg, hex_t_sreg[reg_num]); + tcg_gen_mov_tl(hex_t_sreg[reg_num], + ctx->t_sreg_new_value[reg_num]); + } + /* This can change the interrupt state, so end the TB */ + gen_helper_pending_interrupt(tcg_env); + ctx->base.is_jmp = DISAS_NORETURN; + } else if ((reg_num == HEX_SREG_BESTWAIT) || + (reg_num == HEX_SREG_SCHEDCFG)) { + /* This can trigger resched interrupt, so end the TB */ + gen_helper_resched(tcg_env); + ctx->base.is_jmp = DISAS_NORETURN; + } + + if (reg_num < HEX_SREG_GLB_START) { + tcg_gen_mov_tl(hex_t_sreg[reg_num], ctx->t_sreg_new_value[reg_num]); + } + } +} +#endif + static void gen_pred_writes(DisasContext *ctx) { /* Early exit if not needed or the log is empty */ @@ -693,11 +932,11 @@ static void process_store_log(DisasContext *ctx) * the memory accesses overlap. */ Packet *pkt = ctx->pkt; - if (pkt->pkt_has_store_s1) { + if (pkt->pkt_has_scalar_store_s1) { g_assert(!pkt->pkt_has_dczeroa); process_store(ctx, 1); } - if (pkt->pkt_has_store_s0) { + if (pkt->pkt_has_scalar_store_s0) { g_assert(!pkt->pkt_has_dczeroa); process_store(ctx, 0); } @@ -779,6 +1018,7 @@ static void gen_commit_hvx(DisasContext *ctx) } } +static const int PCYCLES_PER_PACKET = 3; static void update_exec_counters(DisasContext *ctx) { Packet *pkt = ctx->pkt; @@ -798,6 +1038,7 @@ static void update_exec_counters(DisasContext *ctx) } ctx->num_packets++; + ctx->num_cycles += PCYCLES_PER_PACKET; ctx->num_insns += num_real_insns; ctx->num_hvx_insns += num_hvx_insns; } @@ -822,8 +1063,9 @@ static void gen_commit_packet(DisasContext *ctx) * involved in committing the packet. */ Packet *pkt = ctx->pkt; - bool has_store_s0 = pkt->pkt_has_store_s0; - bool has_store_s1 = (pkt->pkt_has_store_s1 && !ctx->s1_store_processed); + bool has_store_s0 = pkt->pkt_has_scalar_store_s0; + bool has_store_s1 = + (pkt->pkt_has_scalar_store_s1 && !ctx->s1_store_processed); bool has_hvx_store = pkt_has_hvx_store(pkt); if (pkt->pkt_has_dczeroa) { /* @@ -886,6 +1128,10 @@ static void gen_commit_packet(DisasContext *ctx) process_store_log(ctx); gen_reg_writes(ctx); +#if !defined(CONFIG_USER_ONLY) + gen_greg_writes(ctx); + gen_sreg_writes(ctx); +#endif gen_pred_writes(ctx); if (pkt->pkt_has_hvx) { gen_commit_hvx(ctx); @@ -898,7 +1144,7 @@ static void gen_commit_packet(DisasContext *ctx) pkt->vhist_insn->generate(ctx); } - if (pkt->pkt_has_cof) { + if (ctx->pkt_ends_tb || ctx->base.is_jmp == DISAS_NORETURN) { gen_end_tb(ctx); } } @@ -912,7 +1158,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx) nwords = read_packet_words(env, ctx, words); if (!nwords) { - gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET); + hex_gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET); return; } @@ -927,7 +1173,7 @@ static void decode_and_translate_packet(CPUHexagonState *env, DisasContext *ctx) gen_commit_packet(ctx); ctx->base.pc_next += pkt.encod_pkt_size_in_bytes; } else { - gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET); + hex_gen_exception_end_tb(ctx, HEX_CAUSE_INVALID_PACKET); } } @@ -938,13 +1184,16 @@ static void hexagon_tr_init_disas_context(DisasContextBase *dcbase, HexagonCPU *hex_cpu = env_archcpu(cpu_env(cs)); uint32_t hex_flags = dcbase->tb->flags; - ctx->mem_idx = MMU_USER_IDX; + ctx->mem_idx = FIELD_EX32(hex_flags, TB_FLAGS, MMU_INDEX); ctx->num_packets = 0; + ctx->num_cycles = 0; ctx->num_insns = 0; ctx->num_hvx_insns = 0; ctx->branch_cond = TCG_COND_NEVER; ctx->is_tight_loop = FIELD_EX32(hex_flags, TB_FLAGS, IS_TIGHT_LOOP); ctx->short_circuit = hex_cpu->short_circuit; + ctx->pcycle_enabled = FIELD_EX32(hex_flags, TB_FLAGS, PCYCLE_ENABLED); + ctx->need_next_pc = false; } static void hexagon_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@ -1050,6 +1299,26 @@ void hexagon_translate_init(void) opcode_init(); +#ifndef CONFIG_USER_ONLY + for (i = 0; i < NUM_GREGS; i++) { + hex_greg[i] = tcg_global_mem_new(tcg_env, + offsetof(CPUHexagonState, greg[i]), + hexagon_gregnames[i]); + } + hex_g_sreg_ptr = tcg_global_mem_new_ptr(tcg_env, + offsetof(CPUHexagonState, g_sreg), "hex_g_sreg_ptr"); + for (i = 0; i < NUM_SREGS; i++) { + if (i < HEX_SREG_GLB_START) { + hex_t_sreg[i] = tcg_global_mem_new(tcg_env, + offsetof(CPUHexagonState, t_sreg[i]), + hexagon_sregnames[i]); + } else { + hex_g_sreg[i] = tcg_global_mem_new(hex_g_sreg_ptr, + i * sizeof(target_ulong), + hexagon_sregnames[i]); + } + } +#endif for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) { hex_gpr[i] = tcg_global_mem_new(tcg_env, offsetof(CPUHexagonState, gpr[i]), @@ -1071,6 +1340,15 @@ void hexagon_translate_init(void) offsetof(CPUHexagonState, llsc_val), "llsc_val"); hex_llsc_val_i64 = tcg_global_mem_new_i64(tcg_env, offsetof(CPUHexagonState, llsc_val_i64), "llsc_val_i64"); + hex_cycle_count = tcg_global_mem_new_i64(tcg_env, + offsetof(CPUHexagonState, t_cycle_count), "t_cycle_count"); +#ifndef CONFIG_USER_ONLY + hex_cause_code = tcg_global_mem_new(tcg_env, + offsetof(CPUHexagonState, cause_code), "cause_code"); +#endif + hex_next_PC = tcg_global_mem_new(tcg_env, + offsetof(CPUHexagonState, next_PC), "next_PC"); + for (i = 0; i < STORES_MAX; i++) { snprintf(store_addr_names[i], NAME_LEN, "store_addr_%d", i); hex_store_addr[i] = tcg_global_mem_new(tcg_env, diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index d251e2233fda..ad1a2f404534 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -39,6 +39,14 @@ typedef struct DisasContext { int reg_log_idx; DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS); DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS); +#ifndef CONFIG_USER_ONLY + int greg_log[GREG_WRITES_MAX]; + int greg_log_idx; + int sreg_log[SREG_WRITES_MAX]; + int sreg_log_idx; + TCGv t_sreg_new_value[NUM_SREGS]; + TCGv greg_new_value[NUM_GREGS]; +#endif int preg_log[PRED_WRITES_MAX]; int preg_log_idx; DECLARE_BITMAP(pregs_written, NUM_PREGS); @@ -75,10 +83,42 @@ typedef struct DisasContext { TCGv new_pred_value[NUM_PREGS]; TCGv branch_taken; TCGv dczero_addr; + bool pcycle_enabled; + bool pkt_ends_tb; + bool need_next_pc; + uint32_t num_cycles; } DisasContext; bool is_gather_store_insn(DisasContext *ctx); +#ifndef CONFIG_USER_ONLY +static inline void ctx_log_greg_write(DisasContext *ctx, int rnum) +{ + if (rnum <= HEX_GREG_G3) { + ctx->greg_log[ctx->greg_log_idx] = rnum; + ctx->greg_log_idx++; + } +} + +static inline void ctx_log_greg_write_pair(DisasContext *ctx, int rnum) +{ + ctx_log_greg_write(ctx, rnum); + ctx_log_greg_write(ctx, rnum + 1); +} + +static inline void ctx_log_sreg_write(DisasContext *ctx, int rnum) +{ + ctx->sreg_log[ctx->sreg_log_idx] = rnum; + ctx->sreg_log_idx++; +} + +static inline void ctx_log_sreg_write_pair(DisasContext *ctx, int rnum) +{ + ctx_log_sreg_write(ctx, rnum); + ctx_log_sreg_write(ctx, rnum + 1); +} +#endif + static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) { if (!test_bit(pnum, ctx->pregs_written)) { @@ -267,6 +307,7 @@ static inline void ctx_log_qreg_read(DisasContext *ctx, } extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; +extern TCGv hex_next_PC; extern TCGv hex_pred[NUM_PREGS]; extern TCGv hex_slot_cancelled; extern TCGv hex_new_value_usr; @@ -280,6 +321,15 @@ extern TCGv_i64 hex_llsc_val_i64; extern TCGv hex_vstore_addr[VSTORES_MAX]; extern TCGv hex_vstore_size[VSTORES_MAX]; extern TCGv hex_vstore_pending[VSTORES_MAX]; +#ifndef CONFIG_USER_ONLY +extern TCGv hex_greg[NUM_GREGS]; +extern TCGv hex_t_sreg[NUM_SREGS]; +extern TCGv_ptr hex_g_sreg_ptr; +extern TCGv hex_g_sreg[NUM_SREGS]; +#endif + + +void hex_gen_exception_end_tb(DisasContext *ctx, int excp); void process_store(DisasContext *ctx, int slot_num); diff --git a/target/riscv/common-semi-target.h b/target/riscv/common-semi-target.h index 7c8a59e0cc3c..ef6929bdfc5a 100644 --- a/target/riscv/common-semi-target.h +++ b/target/riscv/common-semi-target.h @@ -11,6 +11,17 @@ #ifndef TARGET_RISCV_COMMON_SEMI_TARGET_H #define TARGET_RISCV_COMMON_SEMI_TARGET_H +static inline bool common_semi_read_arg_word(CPUArchState *env, + target_ulong *save_to, + target_ulong args_addr, + int arg_num) +{ + if (is_64bit_semihosting(env)) { + return get_user_u64(*save_to, args_addr + (arg_num) * 8)); + } + return get_user_u32(*save_to, args_addr + (arg_num) * 4)); +} + static inline target_ulong common_semi_arg(CPUState *cs, int argno) { RISCVCPU *cpu = RISCV_CPU(cs); diff --git a/tests/functional/meson.build b/tests/functional/meson.build index 3fd2652c0782..7e361c68dd90 100644 --- a/tests/functional/meson.build +++ b/tests/functional/meson.build @@ -140,6 +140,14 @@ tests_i386_system_quick = [ 'migration', ] +test_timeouts += { + 'hexagon_minivm': 180, +} + +tests_hexagon_system_quick = [ + 'hexagon_minivm', +] + tests_i386_system_thorough = [ 'i386_tuxrun', ] diff --git a/tests/functional/test_hexagon_minivm.py b/tests/functional/test_hexagon_minivm.py new file mode 100755 index 000000000000..2ba92bcce383 --- /dev/null +++ b/tests/functional/test_hexagon_minivm.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# +# Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. +# +# SPDX-License-Identifier: GPL-2.0-or-later + +import os +from glob import glob +from qemu_test import QemuSystemTest, Asset +from qemu_test import wait_for_console_pattern + +class MiniVMTest(QemuSystemTest): + + timeout = 180 + GUEST_ENTRY = 0xc0000000 + + REPO = 'https://artifacts.codelinaro.org/artifactory' + ASSET_TARBALL = \ + Asset(f'{REPO}/codelinaro-toolchain-for-hexagon/' + '19.1.5/hexagon_minivm_2024_Dec_15.tar.gz', + 'd7920b5ff14bed5a10b23ada7d4eb927ede08635281f25067e0d5711feee2c2a') + + def test_minivm(self): + self.set_machine('virt') + self.archive_extract(self.ASSET_TARBALL) + rootfs_path = f'{self.workdir}/hexagon-unknown-linux-musl-rootfs' + kernel_path = f'{rootfs_path}/boot/minivm' + + assert(os.path.exists(kernel_path)) + for test_bin_path in glob(f'{rootfs_path}/boot/test_*'): + print(f'# Testing "{os.path.basename(test_bin_path)}"') + + vm = self.get_vm() + vm.add_args('-kernel', kernel_path, + '-device', + f'loader,addr={hex(self.GUEST_ENTRY)},file={test_bin_path}') + vm.launch() + vm.wait() + self.assertEqual(vm.exitcode(), 0) + +if __name__ == '__main__': + QemuSystemTest.main() diff --git a/tests/tcg/hexagon/Makefile.softmmu-target b/tests/tcg/hexagon/Makefile.softmmu-target new file mode 100644 index 000000000000..0b12f7485b62 --- /dev/null +++ b/tests/tcg/hexagon/Makefile.softmmu-target @@ -0,0 +1,114 @@ +## +## Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. +## +## SPDX-License-Identifier: GPL-2.0-or-later +## + +# -*- Mode: makefile -*- +# +# Hexagon SoftMMU tests - included from tests/tcg/Makefile +# + +HEXAGON_SYSTEM_SRC=$(SRC_PATH)/tests/tcg/hexagon/system + +# Set search path for all sources +VPATH += $(HEXAGON_SYSTEM_SRC) + +########### Compiling options +# We force -O0 to avoid optimizations that would break the +# libc simplifications we made at min_libc.c +# +CFLAGS=-mv73 -U__linux__ -G0 -nodefaultlibs -nostdlib -static -fno-PIC -O0 -g -Werror +LDFLAGS=-lclang_rt.builtins-hexagon + +########### QEMU options +QEMU_BASE_MACHINE=-M V66G_1024 -semihosting-config usefs=$(SRC_PATH)/tests/tcg/hexagon/system +QEMU_OPTS+=-display none + +QEMU_OPTS+=$(QEMU_BASE_MACHINE) -kernel + +crt0.o: crt0/crt0.S crt0/crt0.inc +crt0_standalone.o: crt0/crt0_standalone.S crt0/crt0.inc +pte.o: crt0/pte.S +min_libc.o: crt0/min_libc.c +tlb.o: crt0/tlb.c + +CRT0_OBJS=crt0.o crt0_standalone.o pte.o min_libc.o tlb.o + +TESTS_BUILT_WITH_DEFAULT_RULES = \ + semihost \ + mmu_overlap \ + mmu_asids \ + standalone_hw \ + ciad-siad \ + badva \ + vid_reg \ + hvx-multi \ + standalone_vec \ + fastl2vic \ + int_range \ + $() + +TESTS += \ + $(TESTS_BUILT_WITH_DEFAULT_RULES) \ + tlb-miss-tlblock \ + $() + +$(TESTS_BUILT_WITH_DEFAULT_RULES): $(CRT0_OBJS) + +# Build and link the tests +echo-and-run = echo $(1) && $(1) +define build_fn + @if test "$(3)" = LINK; then extra="$(LDFLAGS)"; else extra=-c; fi && \ + $(call echo-and-run, $(CC) $(CFLAGS) $(1) -o $(2) $$extra) +endef + +$(CRT0_OBJS): + $(call build_fn,$<,$@) +$(TESTS_BUILT_WITH_DEFAULT_RULES): + $(call build_fn,$^,$@,LINK) + +%.o: %.S + $(call build_fn,$<,$@) +%.o: %.c + $(call build_fn,$<,$@) + +mmu.h: ../hex_test.h + +semihost.o: semihost.c strutils.h +semihost: semihost.o +mmu_overlap.o: mmu_overlap.c mmu.h +mmu_overlap: mmu_overlap.o +mmu_asids.o: mmu_asids.c mmu.h +mmu_asids: mmu_asids.o +ciad-siad: ciad-siad.o +standalone_hw: standalone_hw.o monitor_insts.o +vid_reg: vid_reg.o +hvx-multi.o: hvx-multi.c ../hvx_misc.h +hvx-multi: hvx-multi.o +standalone_vec.o: standalone_vec.c cfgtable.h +standalone_vec: standalone_vec.o +badva.o: badva.c ../hex_test.h crt0/hexagon_standalone.h +badva: badva.o +fastl2vic.o: fastl2vic.c cfgtable.h +fastl2vic: fastl2vic.o +int_range.o: int_range.c cfgtable.h +int_range: int_range.o + +############# Custom build options + +standalone_vec.o: CFLAGS+= -mv69 -O2 -mhvx -fvectorize +hvx-multi.o: CFLAGS+= -O2 -mhvx + +# We don't want to link this one with crt0 files +tlb-miss-tlblock: tlb-miss-tlblock.o + $(CC) $(CFLAGS) $< -o $@ -nostartfiles -Wl,-Ttext,0x9b800000 -Wl,-entry,0x9b800000 + +############# Custom test rules + +run-semihost: semihost + mkdir -p _semihost_dir + touch _semihost_dir/fileA _semihost_dir/fileB + $(call run-test, $<, $(QEMU) --append "arg1 arg2" $(QEMU_OPTS) $< \ + > $<.stdout) + $(call quiet-command, grep -q "PASS" $<.stdout, "GREP", "PASS") diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target index e5182c01d8a0..44dd927b5937 100644 --- a/tests/tcg/hexagon/Makefile.target +++ b/tests/tcg/hexagon/Makefile.target @@ -52,6 +52,7 @@ HEX_TESTS += hvx_misc HEX_TESTS += hvx_histogram HEX_TESTS += invalid-slots HEX_TESTS += unaligned_pc +HEX_TESTS += utimer run-and-check-exception = $(call run-test,$2,$3 2>$2.stderr; \ test $$? -eq 1 && grep -q "exception $(strip $1)" $2.stderr) @@ -109,6 +110,7 @@ preg_alias: preg_alias.c hex_test.h read_write_overlap: read_write_overlap.c hex_test.h reg_mut: reg_mut.c hex_test.h unaligned_pc: unaligned_pc.c +utimer: utimer.c hex_test.h # This test has to be compiled for the -mv67t target usr: usr.c hex_test.h diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c index 90c3733da071..319d7c0dd052 100644 --- a/tests/tcg/hexagon/hvx_misc.c +++ b/tests/tcg/hexagon/hvx_misc.c @@ -495,6 +495,28 @@ void test_store_new() check_output_w(__LINE__, 1); } +void test_qfloat() +{ + asm volatile( + "r0 = #0xf\n" + "v0 = vsplat(r0)\n" + "v1 = vsplat(r0)\n" + "{\n" + " v2.qf16 = vadd(v0.qf16, v1.qf16)\n" + "}\n" + "vmem(%0) = v2\n" + : + : "r"(&output[0]) + : "r0", "v0", "v1", "v2", "memory" + ); + + for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) { + expect[0].w[i] = 0x10010; + } + + check_output_w(__LINE__, 1); +} + int main() { init_buffers(); @@ -538,6 +560,8 @@ int main() test_store_new(); + test_qfloat(); + puts(err ? "FAIL" : "PASS"); return err ? 1 : 0; } diff --git a/tests/tcg/hexagon/reg_mut.c b/tests/tcg/hexagon/reg_mut.c index c5a39e55100d..45db9ae5cd15 100644 --- a/tests/tcg/hexagon/reg_mut.c +++ b/tests/tcg/hexagon/reg_mut.c @@ -77,10 +77,10 @@ static inline void write_control_registers(void) check32(result, 0x00000000); WRITE_REG_NOCLOBBER(result, "utimerlo", 0xffffffff); - check32(result, 0x00000000); + check32_ne(result, 0xffffffff); WRITE_REG_NOCLOBBER(result, "utimerhi", 0xffffffff); - check32(result, 0x00000000); + check32_ne(result, 0xffffffff); /* * PC is special. Setting it to these values @@ -107,7 +107,7 @@ static inline void write_control_register_pairs(void) check64(result, 0x0000000000000000); WRITE_REG_NOCLOBBER(result, "c31:30", 0xffffffffffffffff); - check64(result, 0x0000000000000000); + check64_ne(result, 0xffffffffffffffff); WRITE_REG_PAIR_ENCODED(result, "c9:8", (uint64_t) 0x0000000000000000, C9_8_EQ_R1_0); diff --git a/tests/tcg/hexagon/system/badva.c b/tests/tcg/hexagon/system/badva.c new file mode 100644 index 000000000000..1351269d1077 --- /dev/null +++ b/tests/tcg/hexagon/system/badva.c @@ -0,0 +1,335 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "crt0/hexagon_standalone.h" + +#define DEBUG 0 + +int err; +#include "../hex_test.h" + +/* volatile because it is written through different MMU mappings */ +typedef volatile int mmu_variable; +mmu_variable data0 = 0xdeadbeef; +mmu_variable data1 = 0xabcdef01; + +#define ONE_MB (1 << 20) +#define INVALID_BADVA 0xbadabada + +static uint32_t read_badva(void) +{ + uint32_t ret; + __asm__ __volatile__("%0 = badva\n\t" : "=r"(ret)); + return ret; +} + +static uint32_t read_badva0(void) +{ + uint32_t ret; + __asm__ __volatile__("%0 = badva0\n\t" : "=r"(ret)); + return ret; +} + +static uint32_t read_badva1(void) +{ + uint32_t ret; + __asm__ __volatile__("%0 = badva1\n\t" : "=r"(ret)); + return ret; +} + +static uint32_t read_ssr(void) +{ + uint32_t ret; + __asm__ __volatile__("%0 = ssr\n\t" : "=r"(ret)); + return ret; +} + +static void write_badva0(uint32_t val) +{ + __asm__ __volatile__("badva0=%0;" : : "r"(val)); + return; +} + +static void write_badva1(uint32_t val) +{ + __asm__ __volatile__("badva1=%0;" : : "r"(val)); + return; +} + +#define SSR_V0_BIT 20 +#define SSR_V1_BIT 21 +#define SSR_BVS_BIT 21 + +static uint32_t read_ssr_v0(void) +{ + return (read_ssr() >> SSR_V0_BIT) & 0x1; +} + +static uint32_t read_ssr_v1(void) +{ + return (read_ssr() >> SSR_V1_BIT) & 0x1; +} + +static uint32_t read_ssr_bvs(void) +{ + return (read_ssr() >> SSR_BVS_BIT) & 0x1; +} + +static void dual_store(mmu_variable *p, mmu_variable *q, uint32_t pval, + uint32_t qval) +{ +#if DEBUG + printf("dual_store:\t0x%p, 0x%p, 0x%lx, 0x%lx\n", p, q, pval, qval); +#endif + + __asm__ __volatile__("r6 = #0\n\t" + "badva0 = r6\n\t" + "badva1 = r6\n\t" + "r6 = ssr\n\t" + "r6 = clrbit(r6, #%4) // V0\n\t" + "r6 = clrbit(r6, #%5) // V1\n\t" + "r6 = clrbit(r6, #%6) // BVS\n\t" + "ssr = r6\n\t" + "{\n\t" + " memw(%0) = %2 // slot 1\n\t" + " memw(%1) = %3 // slot 0\n\t" + "}\n\t" + : "=m"(*p), "=m"(*q) + : "r"(pval), "r"(qval), "i"(SSR_V0_BIT), + "i"(SSR_V1_BIT), "i"(SSR_BVS_BIT) + : "r6"); +} + +static void dual_load(mmu_variable *p, mmu_variable *q, uint32_t *pval, + uint32_t *qval) +{ + uint32_t val0, val1; + +#if DEBUG + printf("dual_load:\t0x%p, 0x%p\n", p, q); +#endif + + __asm__ __volatile__("r6 = #0\n\t" + "badva0 = r6\n\t" + "badva1 = r6\n\t" + "r6 = ssr\n\t" + "r6 = clrbit(r6, #%4) // V0\n\t" + "r6 = clrbit(r6, #%5) // V1\n\t" + "r6 = clrbit(r6, #%6) // BVS\n\t" + "ssr = r6\n\t" + "{\n\t" + " %1 = memw(%3) // slot 1\n\t" + " %0 = memw(%2) // slot 0\n\t" + "}\n\t" + : "=r"(val0), "=r"(val1) + : "m"(*p), "m"(*q), "i"(SSR_V0_BIT), "i"(SSR_V1_BIT), + "i"(SSR_BVS_BIT) + : "r6"); + +#if DEBUG + printf("\t\t0x%lx, 0x%lx\n", val0, val1); +#endif + + *pval = val0; + *qval = val1; +} + +static void load_store(mmu_variable *p, mmu_variable *q, uint32_t *pval, + uint32_t qval) +{ + uint32_t val; + +#if DEBUG + printf("load_store:\t0x%p, 0x%p, 0x%lx\n", p, q, qval); +#endif + + __asm__ __volatile__("r6 = #0\n\t" + "badva0 = r6\n\t" + "badva1 = r6\n\t" + "r6 = ssr\n\t" + "r6 = clrbit(r6, #%4) // V0\n\t" + "r6 = clrbit(r6, #%5) // V1\n\t" + "r6 = clrbit(r6, #%6) // BVS\n\t" + "ssr = r6\n\t" + "{\n\t" + " %0 = memw(%2) // slot 1\n\t" + " memw(%1) = %3 // slot 0\n\t" + "}\n\t" + : "=r"(val), "=m"(*q) + : "m"(*p), "r"(qval), "i"(SSR_V0_BIT), "i"(SSR_V1_BIT), + "i"(SSR_BVS_BIT) + : "r6"); + +#if DEBUG + printf("\t\t0x%lx\n", val); +#endif + + *pval = val; +} + +enum { + TLB_U = (1 << 0), + TLB_R = (1 << 1), + TLB_W = (1 << 2), + TLB_X = (1 << 3), +}; + +uint32_t add_trans_pgsize(uint32_t page_size_bits) +{ + switch (page_size_bits) { + case 12: /* 4KB */ + return 1; + case 14: /* 16KB */ + return 2; + case 16: /* 64KB */ + return 4; + case 18: /* 256KB */ + return 8; + case 20: /* 1MB */ + return 16; + case 22: /* 4MB */ + return 32; + case 24: /* 16MB */ + return 64; + default: + return 1; + } +} + +int mb_counter = 1; + +static mmu_variable *map_data_address(mmu_variable *p, uint32_t data_offset) +{ + uint32_t page_size_bits = 12; + uint32_t page_size = 1 << page_size_bits; + uint32_t page_align = ~(page_size - 1); + + uint32_t data_addr = (uint32_t)p; + uint32_t data_page = data_addr & page_align; + + uint32_t new_data_page = data_page + data_offset; + uint32_t read_data_addr = data_addr + data_offset; + unsigned int data_perm = TLB_X | TLB_W | TLB_U; + add_translation((void *)new_data_page, (void *)data_page, 0); + + return (mmu_variable *)read_data_addr; +} + +static void test_dual_store(void) +{ + data0 = 0x12345678; + data1 = 0x87654321; + + mmu_variable *new_data0 = map_data_address(&data0, mb_counter * ONE_MB); + mb_counter++; + mmu_variable *new_data1 = map_data_address(&data1, mb_counter * ONE_MB); + mb_counter++; + + dual_store(new_data0, new_data1, 0x1, 0x2); + if (read_badva() == (uint32_t)new_data0) { + check32(read_badva0(), (uint32_t)new_data0); + check32(read_badva1(), INVALID_BADVA); + check32(read_ssr_v0(), 1); + check32(read_ssr_v1(), 0); + check32(read_ssr_bvs(), 0); + } else if (read_badva() == (uint32_t)new_data1) { + check32(read_badva0(), INVALID_BADVA); + check32(read_badva1(), (uint32_t)new_data1); + check32(read_ssr_v0(), 0); + check32(read_ssr_v1(), 1); + check32(read_ssr_bvs(), 1); + } else { + /* Something went wrong! */ + check32(0, 1); + } + check32(data0, 0x1); + check32(data1, 0x2); +} + +static void test_dual_load(void) +{ + uint32_t val0, val1; + + data0 = 0xaabbccdd; + data1 = 0xeeff0011; + + mmu_variable *new_data0 = map_data_address(&data0, mb_counter * ONE_MB); + mb_counter++; + mmu_variable *new_data1 = map_data_address(&data1, mb_counter * ONE_MB); + mb_counter++; + + dual_load(new_data0, new_data1, &val0, &val1); + if (read_badva() == (uint32_t)new_data0) { + check32(read_badva0(), (uint32_t)new_data0); + check32(read_badva1(), INVALID_BADVA); + check32(read_ssr_v0(), 1); + check32(read_ssr_v1(), 0); + check32(read_ssr_bvs(), 0); + } else if (read_badva() == (uint32_t)new_data1) { + check32(read_badva0(), INVALID_BADVA); + check32(read_badva1(), (uint32_t)new_data1); + check32(read_ssr_v0(), 0); + check32(read_ssr_v1(), 1); + check32(read_ssr_bvs(), 1); + } else { + /* Something went wrong! */ + check32(0, 1); + } + check32(val0, 0xaabbccdd); + check32(val1, 0xeeff0011); +} + +static void test_load_store(void) +{ + uint32_t val; + + data0 = 0x11223344; + data1 = 0x55667788; + + mmu_variable *new_data0 = map_data_address(&data0, mb_counter * ONE_MB); + mb_counter++; + mmu_variable *new_data1 = map_data_address(&data1, mb_counter * ONE_MB); + mb_counter++; + + load_store(new_data0, new_data1, &val, 0x123); + if (read_badva() == (uint32_t)new_data1) { + check32(read_badva0(), (uint32_t)new_data1); + check32(read_badva1(), INVALID_BADVA); + check32(read_ssr_v0(), 1); + check32(read_ssr_v1(), 0); + check32(read_ssr_bvs(), 0); + } else if (read_badva() == (uint32_t)new_data0) { + check32(read_badva0(), INVALID_BADVA); + check32(read_badva1(), (uint32_t)new_data0); + check32(read_ssr_v0(), 0); + check32(read_ssr_v1(), 1); + check32(read_ssr_bvs(), 1); + } else { + /* Something went wrong! */ + check32(0, 1); + } + check32(val, 0x11223344); + check32(data1, 0x123); +} +static void test_badva_write(void) +{ + uint32_t va = 0x11223344; + write_badva0(va); + check32(read_badva(), va); +} + +int main() +{ + puts("Hexagon badva test"); + + test_dual_store(); + test_dual_load(); + test_load_store(); + test_badva_write(); + + printf("%s\n", ((err) ? "FAIL" : "PASS")); + return err; +} diff --git a/tests/tcg/hexagon/system/cfgtable.h b/tests/tcg/hexagon/system/cfgtable.h new file mode 100644 index 000000000000..fff84ef56950 --- /dev/null +++ b/tests/tcg/hexagon/system/cfgtable.h @@ -0,0 +1,39 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef CFGTABLE_H +#define CFGTABLE_H + +#include + +static uint32_t read_cfgtable_field(uint32_t offset) +{ + uint32_t val; + asm volatile("r0 = cfgbase\n\t" + "r0 = asl(r0, #5)\n\t" + "%0 = memw_phys(%1, r0)\n\t" + : "=r"(val) + : "r"(offset) + : "r0"); + return val; +} + +#define GET_SUBSYSTEM_BASE() (read_cfgtable_field(0x8) << 16) +#define GET_FASTL2VIC_BASE() (read_cfgtable_field(0x28) << 16) + +static uintptr_t get_vtcm_base(void) +{ +#if __HEXAGON_ARCH__ == 65 + return 0xD8200000L; +#elif __HEXAGON_ARCH__ >= 66 + int vtcm_offset = 0x038; + return read_cfgtable_field(vtcm_offset) << 16; +#else +#error "unsupported hexagon revision" +#endif +} + +#endif /* CFGTABLE_H */ diff --git a/tests/tcg/hexagon/system/ciad-siad.c b/tests/tcg/hexagon/system/ciad-siad.c new file mode 100644 index 000000000000..e3fbb7a506dc --- /dev/null +++ b/tests/tcg/hexagon/system/ciad-siad.c @@ -0,0 +1,50 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include + + +static inline void siad(uint32_t val) +{ + asm volatile ("siad(%0);" + : : "r"(val)); + return; +} +static inline void ciad(uint32_t val) +{ + asm volatile ("ciad(%0);" + : : "r"(val)); + return; +} + +static inline uint32_t getipendad() +{ + uint32_t reg; + asm volatile ("%0=s20;" + : "=r"(reg)); + return reg; +} +int +main(int argc, char *argv[]) +{ + siad(4); + int ipend = getipendad(); + if (ipend != (0x4 << 16)) { + goto fail; + } + ciad(4); + ipend = getipendad(); + if (ipend) { + goto fail; + } + + printf("PASS\n"); + return 0; +fail: + printf("FAIL\n"); + return 1; +} diff --git a/tests/tcg/hexagon/system/crt0/crt0.S b/tests/tcg/hexagon/system/crt0/crt0.S new file mode 100644 index 000000000000..8a40e39536eb --- /dev/null +++ b/tests/tcg/hexagon/system/crt0/crt0.S @@ -0,0 +1,103 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "crt0.inc" + .equ DEFAULT_HEAP_SIZE, 0x4000000 /* 64MB */ + .equ DEFAULT_STACK_SIZE, 0x100000 /* 1MB */ + + .section .start, "ax", @progbits + .subsection 0 + .org 0 + + .global _start + .type _start, @function + .p2align 5 +_start: + jump hexagon_start_init + jump hexagon_start_main + .size _start, . - _start + +/*----------------------------------------------------------------------------*/ + + .global hexagon_pre_main + .type hexagon_pre_main, @function + +hexagon_pre_main: + /* Mark first stack frame. */ + fp = #0 + + ReadFrom heapBase, r4 + + AddrOf DEFAULT_HEAP_SIZE + r5 = r0 + + r5 = add (r4, r5) /* Calculate aligned heap top. */ + r5 = add (r5, #15) + r5 = and (r5, #-16) + WriteTo heapLimit, r5 + + /* Set up stack. */ + AddrOf DEFAULT_STACK_SIZE + r7 = r0 + + r6 = add (r5, r7) /* Assume stack after heap. */ + r6 = and (r6, #-16) + + WriteTo stackBase, r6 + + ReadFrom stackBase, r6 + + r7 = sub (r6, r7) /* Desired stack size. */ + r7 = add (r7, #15) + r7 = and (r7, #-16) + WriteTo stackLimit, r7 + + /* Set stack up. */ + ReadFrom stackBase, r0 + sp = and (r0, #-16) /* Align top of stack. */ + + /* Zero up BSS. */ + AddrOf __bss_start, r0 + AddrOf _end, r2 + AddrOf memset, r28 /* bzero () is deprecated. */ + { r1 = #0 + r2 = sub (r2, r0) + callr r28 } + .size hexagon_pre_main, . - hexagon_pre_main + +/*----------------------------------------------------------------------------*/ + + .global hexagon_start_main + .type hexagon_start_main, @function +hexagon_start_main: + AddrOf _start_main, r28 + callr r28 + /*Stop all threads to terminate execution */ + r0 = #0x3f + stop (r0) + .size hexagon_start_main, . - hexagon_start_main + +/*----------------------------------------------------------------------------*/ + + .data + .global heapBase + .global heapLimit + .global stackBase + .global stackLimit + .global setHeapAngelCallParams + +.HeapParams: +heapBase: + .word end /* Provided by the linker script. */ +heapLimit: + .word end + (DEFAULT_HEAP_SIZE & -16) +stackBase: + .word 0 +stackLimit: + .word end + ((DEFAULT_HEAP_SIZE + 15) & -16) + +setHeapAngelCallParams: + .word .HeapParams diff --git a/tests/tcg/hexagon/system/crt0/crt0.inc b/tests/tcg/hexagon/system/crt0/crt0.inc new file mode 100755 index 000000000000..a28d68c51cd5 --- /dev/null +++ b/tests/tcg/hexagon/system/crt0/crt0.inc @@ -0,0 +1,25 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + .macro AddrOf Var, To = r0 + \To\() = ## (\Var) + .endm + + .macro ReadFrom Var, To = r0 + AddrOf \Var, \To + \To = memw (\To) + .endm + + .macro WriteTo Var, From = r0, Ptr = r1 + .ifnc "\From", "\Ptr" + AddrOf \Var, \Ptr + memw (\Ptr) = \From + \From = memw (\Ptr) + .else + .print "Macro arguments \"From\" and \"Ptr\" cannot be the same." + .err + .endif + .endm diff --git a/tests/tcg/hexagon/system/crt0/crt0_standalone.S b/tests/tcg/hexagon/system/crt0/crt0_standalone.S new file mode 100644 index 000000000000..a3ca6ea95da2 --- /dev/null +++ b/tests/tcg/hexagon/system/crt0/crt0_standalone.S @@ -0,0 +1,1206 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "crt0.inc" + .equ TLB_FIXED_ENTRIES, 6 + + .org 0x20 /* This must be at address 0x20 */ +EventVectorBase: + .word .EventVectors + +/* This can vary based on the revid of the part: + 64, 128, 192. Most are 128 */ +_NumTLBEntries: + .word 127 + +TLBMapTable: + .word UPTE_START + +CoreDump: + .word RegDump + + .subsection 0 + + /* Make sure that data and code don't end up in the same L2 cache-line. */ + .p2align 6, 0 + + .global hexagon_start_init + .type hexagon_start_init, @function +hexagon_start_init: +.Init: + /* Clean up house (make sure that R0 is initialized before DCKILL). */ + dckill + isync + ickill + isync + +.InitSSR: + /* SFD = 0, IE = 0, UM = 0, EX = 0, ASID = 0 */ + r0 = #0 + ssr = r0 + isync + + /* Setup events */ +.InitVector: + ReadFrom EventVectorBase + evb = r0 + +.InitStack: + ReadFrom exc_stack_tops + sgp0 = r0 + +.InitFramekey: + r0 = #0 + framekey = r0 + + /* Configure cycle counter. */ +.InitPcycle: + r1 = #1 + r0 = syscfg + r0 = insert (r1, #1, #6) + syscfg = r0 + + /* Configure IMT/DMT. */ +.InitDMT: + r1 = #1 + r0 = syscfg + r0 = insert (r1, #1, #15) + syscfg = r0 +.InitQoS: + r1 = #1 + r0 = syscfg + r0 = insert (r1, #1, #13) + syscfg = r0 +1: +.InitXE: + r1 = #1 + r0 = ssr + r0 = insert (r1, #1, #31) + ssr = r0 + + //{ 0x4066, 0x4, 0x7F, 0, 4 }, // v66a_512 + { + r0 = #0x2c // JTLB size + r2 = cfgbase + } + r1 = asl(r2, #5) + r0 = memw_phys(r0, r1) + { + r0 = add(r0, #-1); + memw(##_tlbmax) = r0.new + } + + { + r0 = #0x40 // L2 Tag size + r2 = cfgbase + } + r0 = memw_phys(r0, r1) + r1 = #0; + p0 = cmp.eq(r0, #0x400) + { + if (p0) r1 = #5 + if (p0) jump 1f + } + p0 = cmp.eq(r0, #0x200) + { + if (p0) r1 = #4 + if (p0) jump 1f + } + p0 = cmp.eq(r0, #0x100) + { + if (p0) r1 = #3 + if (p0) jump 1f + } + p0 = cmp.eq(r0, #0x080) + { + if (p0) r1 = #2 + if (p0) jump 1f + } +1: + memw(##_l2cfg) = r1 + +/* L2 config sequence: + * 1 - Disable prefetching by clearing HFd/i bits in ssr/ccr + */ + r0 = ccr + r3 = #0 + r0 = insert (r3, #4, #16) /* Clear HFi, HFd, HFiL2 HFdL2 bits */ + ccr = r0 + + /* Configure L2 cache. */ + r0 = syscfg + r0 = insert (r3, #3, #16) /* Set L2 size to 0 via L2CFG. */ + + +/* L2 config sequence: + * 2 - execute an isync which is aligned to a 32byte boundary. + */ + .p2alignl 5, 0x7f00c000 + isync + +/* L2 config sequence: + * 3 - execute an syncht insn to insure there are no outstanding + * memory transactions. + */ + syncht + +/* L2 config sequence: + * 4 - Set the desired L2 size for < V4 (set to 0 for >= V4). + */ + syscfg = r0 + isync + +/* L2 config sequence: + * 5 - Execute the L2KILL insn to initiate the cache. + */ + l2kill + syncht + +/* L2 config sequence: + * 6 - Set the desired L2 size. + */ + r2 = memw(##_l2cfg) + r3 = #0x5 + r3 = min (r2, r3) /* min between desired and hwmax */ + r0 = insert (r3, #4, #16) /* Set L2 size via L2CFG. */ + syscfg = r0 + isync + + /* Configure L1 caches. */ +.InitCache: + r1 = #0 + r1 = #1 + r2 = syscfg + r2 = insert (r1, #1, #1) + r2 = insert (r0, #1, #2) + + r1 = #1 + r2 = insert (r1, #1, #23) + + syscfg = r2 + isync + + /* BEGIN code to turn on translation */ +.InitTLB: + // V65 an later use a table for this stuff, should get a table for all of it! + r0 = memw(##_tlbmax) + + /* Clear TLB and store the number of TLBs */ + { + r3:2 = combine(#0,#0) + memw(##_NumTLBEntries) = r0 + } + + loop0(.InitTLBLoop, r0) +.falign +.InitTLBLoop: + tlbw(r3:2,r0) + r0 = add (r0, #-1) + {}:endloop0 + isync + +.InitTLBGlobal: /* Fixed entry for everything. */ + AddrOf _start, r2 + r2 = lsr (r2, #12) + + AddrOf 0xc3f00000, r1 /* Global, 1-1 mapping. */ + AddrOf 0xf7000000, r0 /* Full perms, fully cacheable WB */ + r1 = or (r1, r2) /* 1M translation */ + r0 |= asl (r2,#1) + r0 = setbit(r0,#4) + r0 = and(r0,#-16) + r2 = #0 + tlbw(r1:0,r3) + + /* TODO Should there be a TLB entry for TCM too? */ + + r0 = syscfg + r0 = setbit (r0, #0) /* Turn the MMU on. */ + syscfg = r0 + isync + +.InitInt: + /* Set up rising edge triggered interrupts */ + r0 = #0 + imask = r0 + r1 = #-1 + cswi (r1) + + /* Enable interrupts globally. */ + r0 = ssr + r0 = setbit (r0, #18) + ssr= r0 + + r0 = syscfg + r0 = setbit (r0, #4) + syscfg = r0 + isync + + /* Set up input params to Angel call */ + r0 = #22 + AddrOf setHeapAngelCallParams, r1 + trap0 (#0) + +.PreMain: + AddrOf hexagon_pre_main, r28 + jumpr r28 + .size hexagon_start_init, . - hexagon_start_init + +.global qdsp6_start_init +.set qdsp6_start_init, \ + hexagon_start_init + +/* (At this point the machine is mostly ready for normal execution */ + + /* This code is jumped to when we start a new thread. */ + /* It reads some values out of memory and uses them */ + /* to begin execution. */ + /* The code supports going to a function of the type: */ + /* void foo (void *arg); */ + /* or */ + /* void foo (int arg); */ + /* All we have to do is get the location of "foo", the */ + /* value for "arg", and set up the stack. */ + /* This stuff has been set up for us by thread_create, below.*/ + /* Under the OS, we have no need for this, it is merely for */ + /* trying multithreaded applications on the raw hardware. */ + + .p2align 4 + .weak thread_stop + .type thread_stop, @function +thread_stop: +{ + r0 = htid + r1 = #1 +} + r1 = lsl (r1, r0) + stop (r1) + + .p2align 4 + + .type event_handle_reset, @function + +event_handle_reset: + r1 = htid /* do not alter until final register initialization */ + + { + r28 = ##(start_pc) + r29 = ##(start_sp) + } + + r2 = #0 /* UM = 0 EX = 0 IE = 0 ASID = 0 */ + ssr = r2 + isync + imask = r2 + + r2 = ##(exc_stack_tops) + r2 = memw (r2+r1<<#2) + sgp0 = r2 + + /* Initialize GP to the start of the global data area. */ + //r2 = ##(_SDA_BASE_) + //gp = r2 + + r2.h = #4 + r2.l = #0 + ssr = r2 /* Turn on interrupts */ + + r3 = #1 + r2 = ssr + r2 = insert (r3, #1, #31) + ssr = r2 + + r2.h = #0x1 /* Enable cache fetching */ + usr = r2 + + r0 = #1 + r2 = #1 + r0 |= asl (r2, #1) + r2 = ccr + r2 = insert (r0, #2, #16) + /* Enable dcfetch and l2fetch. */ + r2 = setbit (r2, #20) + ccr = r2 + + isync + + { + r2 = ##framekey_tbl + r3 = ##stack_size + } + { + r2 = memw(r2+r1<<#2) /* load framekey from memory array */ + r3 = memw(r3+r1<<#2) /* load stack_size from memory array */ + } + { + framekey = r2 /* store into framekey register */ + r2 = memw (sp+r1<<#2) + } + r3 = sub(r2, r3) /* framelimt = sp-stack_size) */ + framelimit = r3 /* store into framelimit register */ + + { + r28 = memw (r28+r1<<#2) + sp = memw (sp+r1<<#2) + fp = #0 + } + + { + r0 = ##(start_param) + lr = ##(thread_stop) + } + fp = #0 + r1 = htid + r0 = memw (r0+r1<<#2) + + jump thread_start + + .size event_handle_reset, . - event_handle_reset + + .global __coredump + .type coredump, @function + .set __coredump, coredump +coredump: + r0 = ssr + r0 = clrbit (r0, #16) /* UM = 0 */ + r0 = clrbit (r0, #17) /* EX = 0 */ + ssr = r0 + isync + r0 = #0xCD + trap0 (#0) + r2 = #-1 + r0 = #-1 + stop (r0) + .size event_core_dump, . - event_core_dump + + .type event_handle_nmi, @function +event_handle_nmi: + r0 = #1 + stid = r0 + jump coredump + .size event_handle_nmi, . - event_handle_nmi + + .type event_handle_error, @function +event_handle_error: + r0 = #2 + stid = r0 + jump coredump + .size event_handle_error, . - event_handle_error + + .type event_handle_rsvd, @function +event_handle_rsvd: + r0.h = #0xdead + r0.l = #0xbeef + stid = r0 + jump coredump + .size event_handle_rsvd, . - event_handle_rsvd + + .global thread_start + .type thread_start, @function +thread_start: + jumpr r28 + .size thread_start, . - thread_start + + /* TLB HANDLING */ + /* There are a few strategies we have tried for TLB handling. */ + /* The first is just to map every page 1:1 for virtual:physical */ + /* This means we have nothing to look up but no flexibility */ + /* The strategy implemented here is to divide memory into */ + /* a bunch of 1MB pages. Each page is by default set to the */ + /* corresponding physical 1M page, but the translation (and the */ + /* cacheability) can be changed with the add_translation function*/ + /* below. */ + /* We have to keep the table in memory, and it's down in the data*/ + /* section. */ + /* The page at address 0 is always kept in the TLB. */ + /* You will run into problems if the data gets pushed out into */ + /* another page, because you don't have a translation for the */ + /* data you need to do the translation! */ + /* The solution is to put the translation table (and probably */ + /* the TLB fill code) in special section (s) that go near address 0 */ + /* You can set that up in the linker script. */ + /* TLB miss because of eXecution */ + /* See HEXAGON Architecture System-Level Spec for more information */ + + + + .subsection 0 + + .p2align 6 + .global event_handle_tlbmissx + .type event_handle_tlbmissx, @function + +event_handle_tlbmissx: + crswap (sp, sgp0) + sp = add (sp, #-64) + /* Save off state */ + { + memd (sp + #0) = r1:0 + memd (sp + #8) = r3:2 + } + { + memd (sp + #16) = r5:4 + memd (sp + #24) = r7:6 + } + { + memd (sp + #32) = r9:8 + r9 = p3:0 + } + r8 = ssr + r7 = elr + p1 = tstbit (r8, #0) + { + /* Calculate 4K page index */ + r7 = lsr (r7, #12) + /* Check for next page hit */ + if (!p1) jump 1f + r0 = ##(__tlb_idx) + } + r7 = add (r7, #1) +1: + { + r1 = memw(##_tlb_fixed_entries) /* First non-fixed entry. */ + r3 = memw(##_NumTLBEntries) + } + /* Atomically increment index */ + /* NEVER overwrite fixed entries */ +1: + r6 = memw_locked (r0) + { + r6 = add (r6, #1) + /* This was hard coded to p0 = cmp.ge(r6, #NUM_TLB_ENTRIES) + Now we are using 2 registers so switch to the equivalent + p0 = !cmp.gt(r3, r6) */ + p0 = !cmp.gt (r3, r6) + } + /* Will never store a number greater than + _NumTLBEntries in &__tlb_idx */ + r6 = mux (p0, r1, r6) + memw_locked (r0, p0) = r6 + if (!p0) jump 1b /* Retry, lost reservation. */ + + { + r7 = lsr (r7, #8) /* 1M page index */ + r3 = memw (##TLBMapTable) + } + r3 = addasl (r3, r7, #1) + { + r3 = memh (r3) + r7 = asl (r7, #8) /* VPN */ + } + r5 = extractu (r3, #12, #4) + { + r4 = extractu (r3, #4, #0) + r0 = #0x0010 /* 1M */ + r1 = #0 + } + { + r4 = asl (r4, #24) + r1.h = #0xc000 + r0.h = #0xf000 + } +1: + { + r1 = or (r1, r7) /* c000_0000 + VPN */ + r0 |= asl(r5,#9) /* f000_0000 + PPD */ + } + r0 = or (r0, r4) + /* Get Lock */ + tlblock + r5 = tlbp(r1) + p0 = tstbit (r5, #31) + if (!p0) jump 1f + + tlbw(r1:0,r6) + isync + +1: + tlbunlock + + p3:0 = r9 + { + r9:8 = memd (sp + #32) + r7:6 = memd (sp + #24) + } + { + r5:4 = memd (sp + #16) + r3:2 = memd (sp + #8) + } + { + r1:0 = memd (sp + #0) + sp = add (sp, #64) + } + crswap (sp, sgp0) + rte + + .size .event_handle_tlbmissx, . - event_handle_tlbmissx + + /* TLB Miss RW */ + /* Basically the same as TLB MissX, but we get */ + /* The address from BADVA instead of EVB... see the */ + /* HEXAGON Architecture System-level Spec for more details. */ + + .p2align 6 + + .global event_handle_tlbmissrw + .type event_handle_tlbmissrw, @function + +event_handle_tlbmissrw: + crswap (sp, sgp0) + sp = add (sp, #-64) + { + memd (sp + #0) = r1:0 + memd (sp + #8) = r3:2 + } + { + memd (sp + #16) = r5:4 + memd (sp + #24) = r7:6 + } + { + memd (sp + #32) = r9:8 + r8 = ssr + } + r7 = badva + r9 = p3:0 + { + r0 = ##__tlb_idx + r1 = memw(##_tlb_fixed_entries) + } + { + r7 = lsr (r7, #20) + r3 = memw(##_NumTLBEntries) /* 31, 63, 127, or 191 */ + } + /* Atomically increment index */ + /* NEVER overwrite entry 0 */ +1: + r6 = memw_locked (r0) + { + r6 = add (r6, #1) + /* This was hard coded to p0 = cmp.ge(r6, #NUM_TLB_ENTRIES) + Now we are using 2 registers so switch to the equivalent + p0 = !cmp.gt(r3, r6) */ + p0 = !cmp.gt (r3, r6) + } + /* Will never store a number greater than + _NumTLBEntries in &__tlb_idx */ + r6 = mux (p0, r1, r6) + memw_locked (r0, p0) = r6 + if (!p0) jump 1b /* Retry, lost reservation. */ + + r3 = memw (##TLBMapTable) + r3 = addasl (r3, r7, #1) + { + r3 = memh (r3) + r7 = asl (r7, #8) /* VPN */ + } + + r4 = extractu (r3, #4, #0) +.L_OK: + { + r5 = extractu (r3, #12, #4) + r0 = #0x0010 /* 1M */ + r1 = #0 + } + { + r4 = asl (r4, #24) + r1.h = #0xc000 + r0.h = #0xf000 + } +1: + { + r1 = or (r1, r7) /* R5: VPN | C000_0000 */ + r0 |= asl(r5,#9) /* R4: PPD | F000_0000 */ + } + r0 = or (r0, r4) + + tlblock + r5 = tlbp(r1) + p0 = tstbit (r5, #31) + if (!p0) jump 1f + + tlbw(r1:0,r6) + isync + jump 2f +1: + // If we take a miss around a user defined page they need to + // manually create another page or not touch the regions above + // and below their page within a 1M boundary. + r4 = memw(##_tlb_fixed_entries) + p0 = cmp.gt(r4, r5) // r4>r5 == r5 0k L2 cache */ + .byte 0x2 /* rev: 0x1xxx: 128K L2 -> 128k L2 cache */ + .byte 0x3 /* rev: 0x2xxx: 256K L2 -> 256k L2 cache */ + .byte 0x3 /* rev: 0x3xxx: Not valid at this time */ + .byte 0x4 /* rev: 0x4xxx: 512K L2 -> 512k L2 cache */ + .byte 0x4 /* rev: 0x5xxx: Not valid at this time */ + .byte 0x4 /* rev: 0x6xxx: 768K L2 -> 512k L2 cache */ + .byte 0x4 /* rev: 0x7xxx: Not valid at this time */ + .byte 0x5 /* rev: 0x8xxx: 1024K L2 -> 1024 L2 cache */ + .byte 0x4 /* rev: 0x9xxx: Not valid at this time */ + .byte 0x5 /* rev: 0xAxxx: 1536K L2 -> 1024 L2 cache */ + .byte 0x4 /* rev: 0xBxxx: Not valid at this time */ + .byte 0x4 /* rev: 0xCxxx: Not valid at this time */ + .byte 0x4 /* rev: 0xDxxx: Not valid at this time */ + .byte 0x4 /* rev: 0xExxx: Not valid at this time */ + .byte 0x4 /* rev: 0xFxxx: Not valid at this time */ + + + /* Data used for TLB refill */ + + .p2align 6, 0 + + .global __tlb_lock + .set __tlb_lock, tlb_lock +tlb_lock: + .word 0 + .global __tlb_idx + .set __tlb_idx, tlb_idx +tlb_idx: + .word TLB_FIXED_ENTRIES - 1 + + .global _tlb_fixed_entries +_tlb_fixed_entries: + .word TLB_FIXED_ENTRIES diff --git a/tests/tcg/hexagon/system/crt0/hexagon_standalone.h b/tests/tcg/hexagon/system/crt0/hexagon_standalone.h new file mode 100644 index 000000000000..01ca41349f0f --- /dev/null +++ b/tests/tcg/hexagon/system/crt0/hexagon_standalone.h @@ -0,0 +1,103 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include + +#ifndef _TLB_H +#define _TLB_H + +typedef enum { + SHIFT_4K = 0, + SHIFT_16K, + SHIFT_64K, + SHIFT_256K, + SHIFT_1M, + SHIFT_4M, + SHIFT_16M, + SHIFT_64M, + SHIFT_256M, + SHIFT_1G, +} PageShift; + +typedef enum { + PAGE_4K = 1 << SHIFT_4K, + PAGE_16K = 1 << SHIFT_16K, + PAGE_64K = 1 << SHIFT_64K, + PAGE_256K = 1 << SHIFT_256K, + PAGE_1M = 1 << SHIFT_1M, + PAGE_4M = 1 << SHIFT_4M, + PAGE_16M = 1 << SHIFT_16M, + PAGE_64M = 1 << SHIFT_64M, + PAGE_256M = 1 << SHIFT_256M, + PAGE_1G = 1 << SHIFT_1G, +} PageSize; + + +/* + * TLB entry format: + * + * TLBHI: + * 63 | 62 | 61 | 60:59 | 58 -- 52 | 51 -------- 32 | + * V | G | EP PPNex | ASID | Virtual Page # | + * ------------------------------------------- + * + * V - Valid bit. + * G - Global bit. If set ASID is ignored and the page + * is globally accessible. + * EP - Extra Physical Bit + * PPNex - Extended Physical Page. (V73 and beyond) + * ASID - Address Space Identifier. + * Virtual Page - Virtual Page number. It has a minimum 4K alignment. + * This means the input value is right shifted 12 bits + * and that is what is placed into this field. + * + * TLBLO: + * 31 | 30 | 29 | 28 | 27 -- 24 | 23 --------- 1 | 0 | + * X | W | R | U | C | Physical Page # | S | + * ---------------------------------------------------- + * + * X - Execute Enabled + * W - Write Enabled + * R - Read Enabled + * U - User mode accessible + * C - Cacheablilty attributes: L1/L2 Cacheable Writeback/thru + * Physical Page - Physical Page # + * + */ + +typedef union { + struct { + uint64_t S:1; + uint64_t PPN:23; + uint64_t CacheAttr:4; + uint64_t XWRU:4; + uint64_t VirtualPage:20; + uint64_t ASID:7; +#if __HEXAGON_ARCH__ < 73 + uint64_t A0:1; + uint64_t A1:1; +#else + uint64_t PPN_EX:2; +#endif + uint64_t EP:1; + uint64_t VG:2; + }; + uint64_t raw; +} TLBEntry; + + +#define TLB_NOT_FOUND 0x80000000 + +int add_translation_extended(int index, void *va, uint64_t pa, + unsigned int page_size, unsigned int xwru, + unsigned int cccc, unsigned int asid, + unsigned int aa, unsigned int vg); +void add_translation_fixed(int index, void *va, void *pa, int cccc, + int permissions); +void add_translation(void *va, void *pa, int cccc); + +#endif /* _TLB_H */ diff --git a/tests/tcg/hexagon/system/crt0/min_libc.c b/tests/tcg/hexagon/system/crt0/min_libc.c new file mode 100644 index 000000000000..f44ee49f8f44 --- /dev/null +++ b/tests/tcg/hexagon/system/crt0/min_libc.c @@ -0,0 +1,359 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +/* + * Small cheat: take size_t, NULL, and other type/symbol definitions from the + * hexagon toolchain. We cannot link with the libc, though, as the actual + * implementation for functions like printf and open are defined for Linux, and + * we are running on "bare metal". + */ +#include +#include +#include +#include + +FILE *const stdout = (FILE *)1; + +void exit(int code) +{ + asm volatile( + "r2 = %0\n" + "stop(r0)\n" + : + : "r"(code) + : "r2"); + __builtin_unreachable(); +} + +/* The assert() macro will use this. */ +void __assert_fail(const char *assertion, const char *file, int line, + const char *function) +{ + printf("ASSERT fail '%s' at file '%s' line %d function %s\n", + assertion, file, line, function); + exit(1); +} + +void *memset(void *b, int c, size_t len) +{ + for (size_t i = 0; i < len; i++) { + ((unsigned char *)b)[i] = (unsigned char)c; + } + return b; +} + +int memcmp(const void *p1, const void *p2, size_t n) +{ + const char *s1 = p1; + const char *s2 = p2; + for ( ; n && (*s1 == *s2); s1++, s2++, n--) { + /* empty */ + } + return n ? *(unsigned char *)s1 - *(unsigned char *)s2 : 0; +} + +int bcmp(const void *s1, const void *s2, size_t n) +{ + return __builtin_bcmp(s1, s2, n); +} + + +#define HEX_SYS_WRITEC 0x03 +#define HEX_SYS_WRITE0 0x04 +#define HEX_SYS_GET_CMDLINE 0x15 + +/* + * Macro flavors: + * - DIRECT_SWI takes up to two args an put them at r1 and r2. + * - SWI takes up to four args and puts them in an array, placing the + * array address at r1. + */ + +static int swi_ret, swi_err, swi_args[4]; +#define DO_SWI(CODE, ARG0, ARG1) \ + do { \ + asm volatile( \ + "r0 = %2\n" \ + "r1 = %3\n" \ + "r2 = %4\n" \ + "trap0(#0)\n" \ + "%0 = r0\n" \ + "%1 = r1\n" \ + : "=r"(swi_ret), "=r"(swi_err) \ + : "r"(CODE), "r"(ARG0), "r"(ARG1) \ + : "r0", "r1", "r2", "memory" \ + ); \ + } while (0) + +#define SWI0(CODE) DO_SWI(CODE, swi_args, 0) +#define SWI1(CODE, ARG0) \ + do { swi_args[0] = (uint32_t)(ARG0); SWI0(CODE); } while (0) +#define SWI2(CODE, ARG0, ARG1) \ + do { swi_args[1] = (uint32_t)(ARG1); SWI1(CODE, ARG0); } while (0) +#define SWI3(CODE, ARG0, ARG1, ARG2) \ + do { swi_args[2] = (uint32_t)(ARG2); SWI2(CODE, ARG0, ARG1); } while (0) +#define SWI4(CODE, ARG0, ARG1, ARG2, ARG3) \ + do { swi_args[3] = (uint32_t)(ARG3); SWI3(CODE, ARG0, ARG1, ARG2); } while (0) + +#define GET_MACRO_5(_1, _2, _3, _4, _5, NAME, ...) NAME +#define SWI(...) \ + ({ GET_MACRO_5(__VA_ARGS__, SWI4, SWI3, SWI2, SWI1, SWI0)(__VA_ARGS__); \ + swi_ret; }) + +#define DIRECT_SWI0(CODE) DO_SWI(CODE, 0, 0) +#define DIRECT_SWI1(CODE, ARG1) DO_SWI(CODE, ARG1, 0) +#define DIRECT_SWI2(CODE, ARG1, ARG2) DO_SWI(CODE, ARG1, ARG2) + +#define GET_MACRO_3(_1, _2, _3, NAME, ...) NAME +#define DIRECT_SWI(...) \ + ({ GET_MACRO_3(__VA_ARGS__, DIRECT_SWI2, DIRECT_SWI1, DIRECT_SWI0)(__VA_ARGS__); \ + swi_ret; }) + +int puts(const char *str) +{ + DIRECT_SWI(HEX_SYS_WRITE0, str); + DIRECT_SWI(HEX_SYS_WRITE0, "\n"); + return 0; +} + +int fputs(const char *str, FILE *f) +{ + assert(f == stdout); /* Only stdout is supported. */ + DIRECT_SWI(HEX_SYS_WRITE0, str); + return 0; +} + +size_t fwrite(const void *ptr, size_t size, size_t nitems, FILE *f) +{ + assert(f == stdout); /* Only stdout is supported. */ + for (size_t i = 0; i < size * nitems; i++) { + DIRECT_SWI(HEX_SYS_WRITEC, &ptr[i]); + } + return size * nitems; +} + +int putchar(int c) +{ + DIRECT_SWI(HEX_SYS_WRITEC, &c); + return c; +} + +static char *num_to_s(uint64_t signed_num, uint64_t base) +{ + static char buffer[1024]; + char *bptr = buffer; + uint64_t num; + + if (base == 16) { + num = signed_num; + } else if (base == 10) { + if (signed_num < 0) { + *bptr++ = '-'; + signed_num *= -1; + } + num = signed_num; + } else { + puts("fatal: num_to_s expects base 16 or 10"); + exit(1); + } + + if (!num) { + return "0"; + } + + uint64_t divider = 1; + for (uint64_t n = num; n >= base; n /= base) { + divider *= base; + } + + while (num) { + unsigned int digit = num / divider; + if (digit) { + num %= divider; + divider /= base; + if (digit >= 10) { + *bptr++ = 'a' + (digit - 10); + } else { + *bptr++ = '0' + digit; + } + while (num < divider) { + *bptr++ = '0'; + divider /= base; + } + } else { + divider /= base; + } + } + + *bptr = '\0'; + return buffer; +} + +static int advance_prefix(const char **str_ptr, char *prefix) +{ + const char *str = *str_ptr; + while (*str && *str == *prefix) { + str++; + prefix++; + } + str--; + if (!*prefix) { + *str_ptr = str; + return 1; + } + return 0; +} + +static char *pad0(char *str, int n) +{ + static char buffer[1024]; + int len = strlen(str); + assert(n < 1024); + + int i; + for (i = 0; i < n - len; i++) { + buffer[i] = '0'; + } + strcpy(&buffer[i], str); + return buffer; +} + +/* + * Very simple implementation. No error checking. + * Supported formats are: + * %d, %s, %c, %x, %016llx + */ +int printf(const char *format, ...) +{ + va_list ap; + __builtin_va_start(ap, format); + for (const char *ptr = format; *ptr; ptr++) { + if (*ptr == '%') { + ptr++; + switch (*ptr) { + case 'd': + case 'x': + case 'p': + { + int num = __builtin_va_arg(ap, int); + fputs(num_to_s(num, *ptr == 'd' ? 10 : 16), stdout); + break; + } + case 's': + fputs(__builtin_va_arg(ap, char *), stdout); + break; + case 'c': + putchar(__builtin_va_arg(ap, int)); + break; + case '%': + putchar('%'); + break; + case '0': + if (advance_prefix(&ptr, "016llx")) { + uint64_t num = __builtin_va_arg(ap, uint64_t); + fputs(pad0(num_to_s(num, 16), 16), stdout); + break; + } + /* else: fallthrough */ + default: + fputs("fatal: unknown printf modifier '", stdout); + putchar(*ptr); + puts("'"); + exit(1); + } + } else { + putchar(*ptr); + } + } + __builtin_va_end(ap); + return 1; +} + +size_t strlen(const char *s) +{ + size_t len = 0; + for ( ; *s; s++) { + len++; + } + return len; +} + +char *strcpy(char *dst, const char *src) +{ + int i; + for (i = 0; src[i]; i++) { + dst[i] = src[i]; + } + dst[i] = '\0'; + return dst; +} + +int strcmp(const char *s1, const char *s2) +{ + for ( ; *s1 && (*s1 == *s2); s1++, s2++) { + /* empty */ + } + return *(unsigned char *)s1 - *(unsigned char *)s2; +} + +char *strrchr(const char *s, int c) +{ + for (int i = strlen(s) - 1; i >= 0; i--) { + if (s[i] == c) { + return (char *)&s[i]; + } + } + return NULL; +} + +#define MAX_ARGS 15 +/* + * Very simplistic implementation, using static buffers, and assuming no + * args will contain spaces. + */ +static inline char **getcmdline(int *argc) +{ + static char *args[MAX_ARGS] = { NULL }; + char buf[4096]; + char *c; + int id = 0; + + assert(!SWI(HEX_SYS_GET_CMDLINE, buf, sizeof(buf))); + + *argc = 1; + for (c = buf; *c; c++) { + if (*c == ' ' && *(c + 1)) { + (*argc)++; + } + } + assert(*argc <= MAX_ARGS); + + if (*argc == 0) { + return args; + } + + args[id++] = buf; + for (c = buf; *c; c++) { + if (*c == ' ') { + *c = '\0'; + if (id < *argc) { + args[id++] = c + 1; + } + } + } + return args; +} + +int main(int argc, char **argv, char **envp); +void _start_main(void) +{ + int argc; + char **argv = getcmdline(&argc); + /* For now, we ignore envp */ + char *envp[] = { NULL }; + exit(main(argc, argv, envp)); + exit(1); +} diff --git a/tests/tcg/hexagon/system/crt0/pte.S b/tests/tcg/hexagon/system/crt0/pte.S new file mode 100644 index 000000000000..406e45389118 --- /dev/null +++ b/tests/tcg/hexagon/system/crt0/pte.S @@ -0,0 +1,80 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + .section .start, "awx", @progbits + .p2align 3 + .subsection 1 +/* This is the translation table */ +/* We make a table of 2^12 entries */ +/* Each entry is a .hword (16 bits) */ +/* Each entry is initialized to 0 in the 4 LSBs (cached WB, see system spec) */ +/* Each entry is initialized to 1:1 Virtual:Physical in the upper 12 bits. */ +/* We use the preprocessor to avoid copy-paste errors and to avoid */ +/* an 8192-line addition to the file. */ + + .set __UPTE_START, UPTE_START + .weak __UPTE_START, UPTE_START +UPTE_START: +#define TLBENTRY(X) .hword ((((X) >> 16) & (0xfff0)) | 0x7); + +#define TLB_1M(X) TLBENTRY ((X) << 20) +#define TLB_16M(X) \ + TLB_1M (((X) << 4) + 0) \ + TLB_1M (((X) << 4) + 1) \ + TLB_1M (((X) << 4) + 2) \ + TLB_1M (((X) << 4) + 3) \ + TLB_1M (((X) << 4) + 4) \ + TLB_1M (((X) << 4) + 5) \ + TLB_1M (((X) << 4) + 6) \ + TLB_1M (((X) << 4) + 7) \ + TLB_1M (((X) << 4) + 8) \ + TLB_1M (((X) << 4) + 9) \ + TLB_1M (((X) << 4) + 10) \ + TLB_1M (((X) << 4) + 11) \ + TLB_1M (((X) << 4) + 12) \ + TLB_1M (((X) << 4) + 13) \ + TLB_1M (((X) << 4) + 14) \ + TLB_1M (((X) << 4) + 15) + +#define TLB_256M(X) \ + TLB_16M (((X) << 4) + 0) \ + TLB_16M (((X) << 4) + 1) \ + TLB_16M (((X) << 4) + 2) \ + TLB_16M (((X) << 4) + 3) \ + TLB_16M (((X) << 4) + 4) \ + TLB_16M (((X) << 4) + 5) \ + TLB_16M (((X) << 4) + 6) \ + TLB_16M (((X) << 4) + 7) \ + TLB_16M (((X) << 4) + 8) \ + TLB_16M (((X) << 4) + 9) \ + TLB_16M (((X) << 4) + 10) \ + TLB_16M (((X) << 4) + 11) \ + TLB_16M (((X) << 4) + 12) \ + TLB_16M (((X) << 4) + 13) \ + TLB_16M (((X) << 4) + 14) \ + TLB_16M (((X) << 4) + 15) + +#define TLB_4G \ + TLB_256M (0) \ + TLB_256M (1) \ + TLB_256M (2) \ + TLB_256M (3) \ + TLB_256M (4) \ + TLB_256M (5) \ + TLB_256M (6) \ + TLB_256M (7) \ + TLB_256M (8) \ + TLB_256M (9) \ + TLB_256M (10) \ + TLB_256M (11) \ + TLB_256M (12) \ + TLB_256M (13) \ + TLB_256M (14) \ + TLB_256M (15) + +TLB_4G + + .size UPTE_START, . - UPTE_START diff --git a/tests/tcg/hexagon/system/crt0/tlb.c b/tests/tcg/hexagon/system/crt0/tlb.c new file mode 100644 index 000000000000..00e07761dbe9 --- /dev/null +++ b/tests/tcg/hexagon/system/crt0/tlb.c @@ -0,0 +1,198 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include "hexagon_standalone.h" + +/* + * The following 2 functions use global addressing mode + * to avoid GP relative overflows. + */ +static inline uint32_t get_tlb_fixed_entries(void) +{ + uint32_t *addr; + asm volatile ("%0=##_tlb_fixed_entries\n\t" + : "=r"(addr)); + return *addr; +} +static inline uint32_t *get_UPTE_START(void) +{ + uint32_t addr; + asm volatile ("%0=##__UPTE_START\n\t" + : "=r"(addr)); + return (uint32_t *)addr; +} + +static inline uint32_t get_ssr(void) +{ + uint32_t reg; + asm volatile ("%0=ssr\n\t" + : "=r"(reg)); + return reg; +} + + +static inline int64_t read_tlb_entry(int index) +{ + uint64_t reg; + asm volatile ("%[reg]=tlbr(%[index])" + : [reg] "=r" (reg) + : [index] "r" (index)); + asm volatile ("isync"); + return reg; +} + + +static inline void write_tlb_entry(TLBEntry tlb, int index) +{ + uint64_t entry = tlb.raw; + asm volatile ("tlblock\n" + "tlbw(%[entry], %[index])\n" + "isync\n" + "tlbunlock\n" + : + : [entry] "r" (entry), [index] "r" (index)); +} + +static inline int32_t tlb_probe(uint32_t va) +{ + uint32_t VirtualPageNumber = va >> 12; + uint32_t ASID = (get_ssr() >> 8) & 0x7f; + uint32_t probe = ((ASID << 20) | VirtualPageNumber) & 0x7ffffff; + uint32_t result = 0; + asm volatile ("%[result]=tlbp(%[probe])" + : [result] "=r" (result) + : [probe] "r" (probe)); + + return result; +} + + +static inline void tlb_invalidate(uint32_t va) +{ + int entry = tlb_probe(va); + if (entry == TLB_NOT_FOUND) { + return; + } + + TLBEntry tlb; + tlb.raw = read_tlb_entry(entry); + tlb.raw = tlb.raw & ~(1ull << 63); /* Clear the V bit. */ + write_tlb_entry(tlb, entry); +} + + +static inline TLBEntry basic_entry(uint32_t va, uint64_t pa, PageSize pagesize) +{ + TLBEntry T; + uint64_t PPN; + T.raw = 0ull; + T.VirtualPage = va >> 12; /* 63-51 */ +#if __HEXAGON_ARCH__ > 72 + T.PPN_EX = (pa & (3ull << 36)) >> 36; +#endif + T.EP = (pa & (1ull << 35)) >> 35; + PPN = pa >> 12ull; + PPN = (PPN << 1ull) | pagesize; + if (pagesize == 1) { + T.S = 1; + } + T.raw |= PPN; + return T; +} +/* + * function: mkentry + * description: + * - Given just a Physical Address (pa) and a Virtual Address (va) + * create a default entry. + * - A user wanting to change the cache attributes or permissions + * can do so prior to writing the entry. + */ +static TLBEntry mkentry(uint32_t va, uint64_t pa, PageSize pagesize) +{ + + /* Make an entry and set some reasonable defaults */ + TLBEntry T = basic_entry(va, pa, pagesize); + + T.CacheAttr = 0x7; + T.XWRU = 0x6; + T.VG = 0x3; + return T; +} + +int add_translation_extended(int index, void *va, uint64_t pa, + unsigned int page_size, unsigned int xwru, + unsigned int cccc, unsigned int asid, + unsigned int aa, unsigned int vg) +{ + uint32_t num_entries = get_tlb_fixed_entries(); + + if ((index < 1) || (index > (num_entries - 1))) { + return -1; + } + + tlb_invalidate((uint32_t)va); + TLBEntry T; + T = basic_entry((uint32_t)va, pa, page_size); + T.ASID = ((uint64_t)asid & 0x7f); + T.CacheAttr = ((uint64_t)cccc & 0xf); + T.XWRU = ((uint64_t)xwru & 0xf); + T.VG = ((uint64_t)vg & 0x3); +#if __HEXAGON_ARCH__ < 73 + T.raw |= ((uint64_t)aa & 0x3) << 59ull; +#endif + write_tlb_entry(T, index); + + return 0; +} + + +void add_translation_fixed(int index, void *va, void *pa, int cccc, + int permissions) +{ + tlb_invalidate((uint32_t)va); + add_translation_extended(index, va, (uint64_t)pa, PAGE_1M, permissions, cccc, + 0, 0, 3); +} + +/* + * The following deals with the PTE software structure. The actual entry will + * not be placed into the TLB until an address fault occurrs. + */ + +typedef union { + struct { + uint16_t cache:4; + uint16_t pa:12; + }; + uint16_t PTE_raw; +} SMALL_PTE; + +static SMALL_PTE *findPTEAddr(uint32_t va) +{ + uint32_t *PTE = get_UPTE_START(); + int index = va >> 20; + return (SMALL_PTE *)PTE + index; +} +static SMALL_PTE findPTEValue(uint32_t va) +{ + SMALL_PTE *A = findPTEAddr(va); + return *A; +} + +/* This function adds a translation into the mapping table, see above */ +/* Because we use 1MB pages, we only need to translate 12 bits. */ +/* We keep those 12 bits plus 4 bits (where we keep the C field, */ +/* see the System-level architecture spec on TLB entries) in */ +/* a 16-bit entry in the table. */ +/* We index into the table using the upper 12 bits. */ +/* As a note, 2 bytes x 2^12 entries == 8KB table */ +void add_translation(void *va, void *pa, int cccc) +{ + SMALL_PTE *S = findPTEAddr((uint32_t)va); + S->pa = (uint32_t)pa >> 20; + S->cache = cccc; +} diff --git a/tests/tcg/hexagon/system/fastl2vic.c b/tests/tcg/hexagon/system/fastl2vic.c new file mode 100644 index 000000000000..a115ae73f799 --- /dev/null +++ b/tests/tcg/hexagon/system/fastl2vic.c @@ -0,0 +1,73 @@ +/* + * Copyright(c) 2024-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +/* + * Test the fastl2vic interface. + * + * hexagon-sim a.out --subsystem_base=0xfab0 --cosim_file q6ss.cfg + */ + +#include "crt0/hexagon_standalone.h" + +#include "cfgtable.h" + +#define CSR_BASE 0xfab00000 +#define L2VIC_BASE ((CSR_BASE) + 0x10000) +#define L2VIC_INT_ENABLE(b, n) \ + ((unsigned int *) ((b) + 0x100 + 4 * (n / 32))) +#define L2VIC_INT_ENABLE_SET(b, n) \ + ((unsigned int *) ((b) + 0x200 + 4 * (n / 32))) + +int main() +{ + int ret = 0; + unsigned int irq_bit; + + /* setup the fastl2vic interface and setup an indirect mapping */ + volatile uint32_t *A = (uint32_t *)0x888e0000; + add_translation_extended(3, (void *)A, GET_FASTL2VIC_BASE(), 16, 7, 4, 0, 0, 3); + + uint32_t l2vic_base = GET_SUBSYSTEM_BASE() + 0x10000; + + /* set and verify an interrupt using the L2VIC_BASE */ + irq_bit = (1 << (66 % 32)); + *L2VIC_INT_ENABLE_SET(l2vic_base, 66) = irq_bit; + if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x4) { + ret = __LINE__; + } + + /* set and verify an interrupt using the FASTL2VIC interface */ + *A = 68; + if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x14) { + ret = __LINE__; + } + *A = 67; + if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x1C) { + ret = __LINE__; + } + + + /* Now clear the lines */ + *A = ((1 << 16) | 68); + if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0xC) { + ret = __LINE__; + } + *A = ((1 << 16) | 66); + if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x8) { + ret = __LINE__; + } + *A = ((1 << 16) | 67); + if (*L2VIC_INT_ENABLE(l2vic_base, 64) != 0x0) { + ret = __LINE__; + } + + if (ret) { + printf("%s: FAIL, last failure near line %d\n", __FILE__, ret); + } else { + printf("PASS\n"); + } + return ret; +} diff --git a/tests/tcg/hexagon/system/hvx-multi.c b/tests/tcg/hexagon/system/hvx-multi.c new file mode 100644 index 000000000000..0d2e90c2c79b --- /dev/null +++ b/tests/tcg/hexagon/system/hvx-multi.c @@ -0,0 +1,119 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include +#include + +int err; + +#include "../hvx_misc.h" + +void set_hvx_context(int n) +{ + uint32_t ssr_context_bits = n << 27; + asm volatile( + "r1 = ssr\n" + "r1 = and(r1, ##0xc7ffffff)\n" + "r1 = or(r1, %0)\n" + "ssr = r1\r" + "isync\n" + : + : "r"(ssr_context_bits) + : "r1" + ); +} + +void setv0(int n) +{ + asm volatile( + "v0 = vsplat(%0)\n" + : : "r"(n) : "v0" + ); +} + +void store_v0(MMVector *v) +{ + asm volatile( + "vmemu(%0) = v0\n" + : + : "r"(v) + : "memory" + ); +} + +uint32_t get_num_contexts(void) +{ + const int EXT_CONTEXT_OFFSET = 13; + unsigned int cfgbase; + asm volatile("%0 = cfgbase\n" : "=r"(cfgbase)); + uint32_t *cfgtable = (uint32_t *)(cfgbase << 16); + return *(cfgtable + EXT_CONTEXT_OFFSET); +} + +uint32_t get_rev(void) +{ + uint32_t rev; + asm volatile("%0 = rev\n" : "=r"(rev)); + return rev; +} + +/* + * This test verifies that each new context is properly selected and is + * independent of the thread. + */ +int main() +{ + int num_contexts = get_num_contexts(); + printf("rev=v%x, HVX-contexts=%d\n", (int)(get_rev() & 0xff), num_contexts); + memset(&output[0], 0, 8 * sizeof(MMVector)); + + /* First set v0 on all the contexts. */ + for (int i = 0; i < num_contexts; i++) { + set_hvx_context(i); + setv0(i + 1); + } + + /* + * Now each context should have its own v0 value. Save it to memory. We + * check all possible SSR.XA values to make sure the "aliases" are + * implemented correctly. + */ + for (int i = 0; i < 8; i++) { + set_hvx_context(i); + store_v0(&output[i]); + } + + + /* + * Set expected values: + * + * num contexts + * SSR.XA 2 4 6 8 + * 000 HVX Context 0 HVX Context 0 HVX Context 0 HVX Context 0 + * 001 HVX Context 1 HVX Context 1 HVX Context 1 HVX Context 1 + * 010 HVX Context 0 HVX Context 2 HVX Context 2 HVX Context 2 + * 011 HVX Context 1 HVX Context 3 HVX Context 3 HVX Context 3 + * 100 HVX Context 0 HVX Context 0 HVX Context 4 HVX Context 4 + * 101 HVX Context 1 HVX Context 1 HVX Context 5 HVX Context 5 + * 110 HVX Context 0 HVX Context 2 HVX Context 2 HVX Context 6 + * 111 HVX Context 1 HVX Context 3 HVX Context 3 HVX Context 7 + */ + for (int i = 0; i < 8; i++) { + int expected = (i % num_contexts) + 1; + /* Exception for num_contexts=6 */ + if (num_contexts == 6 && i >= 6) { + expected = (i - 6 + 2) + 1; + } + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[i].w[j] = expected; + } + } + + check_output_w(__LINE__, 8); + puts(err ? "FAIL" : "PASS"); + return !!err; +} diff --git a/tests/tcg/hexagon/system/int_range.c b/tests/tcg/hexagon/system/int_range.c new file mode 100644 index 000000000000..688355886362 --- /dev/null +++ b/tests/tcg/hexagon/system/int_range.c @@ -0,0 +1,94 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +/* + * Test the range of the l2vic interface. + */ + + +#include +#include +#include +#include "cfgtable.h" + +#define L2VIC_INT_ENABLE(b, n) \ + ((volatile unsigned int *)((b) + 0x100 + 4 * (n / 32))) /* device mem */ + +#define L2VIC_INT_ENABLE_SET(b, n) \ + ((volatile unsigned int *)((b) + 0x200 + 4 * (n / 32))) /* device mem */ + +#define L2VIC_INT_ENABLE_CLEAR(b, n) \ + ((volatile unsigned int *)((b) + 0x180 + 4 * (n / 32))) /* device mem */ + +#define L2VIC_SOFT_INT_SET(b, n) \ + ((volatile unsigned int *)((b) + 0x480 + 4 * (n / 32))) /* device mem */ + +#define L2VIC_INT_TYPE(b, n) \ + ((volatile unsigned int *)((b) + 0x280 + 4 * (n / 32))) /* device mem */ + +volatile int pass; /* must use volatile */ +int g_irq; +volatile uint32_t g_l2vic_base; /* must use volatile */ + + +/* + * When complete the irqlog will contain the value of the vid when the + * handler was active. + */ +#define INTMAX 1024 +#define LEFT_SET 666 + +int main() +{ + unsigned int irq_bit; + unsigned int left_set = 0; + int ret = 0; + + /* setup the fastl2vic interface and setup an indirect mapping */ + g_l2vic_base = GET_SUBSYSTEM_BASE() + 0x10000; + + /* Setup interrupts */ + for (int irq = 1; irq < INTMAX; irq++) { + irq_bit = (1 << (irq % 32)); + *L2VIC_INT_ENABLE(g_l2vic_base, irq) |= irq_bit; + } + + /* Read them all back and check */ + for (int irq = 1; irq < INTMAX; irq++) { + if ((*L2VIC_INT_ENABLE(g_l2vic_base, irq) & (1 << (irq % 32))) != + (1 << irq % 32)) { + printf("%d: ERROR: irq: %d: 0x%x\n", __LINE__, irq, + *L2VIC_INT_ENABLE(g_l2vic_base, irq)); + ret = 1; + } + } + /* Clear them all, except int 1 and LEFT_SET (test) */ + for (int irq = 1; irq < INTMAX; irq++) { + if (!(irq % LEFT_SET)) { + continue; + } + irq_bit = (1 << (irq % 32)); + *L2VIC_INT_ENABLE_CLEAR(g_l2vic_base, irq) |= irq_bit; + } + + /* make sure just LEFT_SET is set */ + for (int irq = 0; irq < INTMAX; irq++) { + if ((*L2VIC_INT_ENABLE(g_l2vic_base, irq) & (1 << (irq % 32))) != + (0 << irq % 32)) { + if (irq != LEFT_SET) { + printf("%d: ERROR: irq: %d: 0x%x\n", __LINE__, irq, + *L2VIC_INT_ENABLE(g_l2vic_base, irq)); + ret = 1; + } else { + left_set = irq; + } + } + } + if (left_set == LEFT_SET) { + printf("PASS\n"); + } + return ret; +} diff --git a/tests/tcg/hexagon/system/mmu.h b/tests/tcg/hexagon/system/mmu.h new file mode 100644 index 000000000000..0856c94ab5dd --- /dev/null +++ b/tests/tcg/hexagon/system/mmu.h @@ -0,0 +1,718 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef MMU_H +#define MMU_H +#include +#include +#include +#include "crt0/hexagon_standalone.h" + +/* + * Helpers for MMU tests + */ + +#define TARGET_PAGE_BITS 12 +#ifndef TLB_NOT_FOUND +#define TLB_NOT_FOUND (1 << 31) +#endif + +static inline uint32_t page_start(uint32_t addr, uint32_t page_size_bits) +{ + uint32_t page_size = 1 << page_size_bits; + uint32_t page_align = ~(page_size - 1); + return addr & page_align; +} + +/* + * The Hexagon standalone runtime leaves TLB entries 1-5 reserved for + * user-defined entries. We'll set them up to map virtual addresses at + * 1MB offsets above the actual physical address + * PA == VA - (entry_num * 1MB) + * + * We'll define some macros/functions to help with the manipulation + */ + +#define ONE_MB (1 << 20) +#define TWO_MB (2 * ONE_MB) +#define THREE_MB (3 * ONE_MB) +#define FOUR_MB (4 * ONE_MB) +#define FIVE_MB (5 * ONE_MB) + +#define ONE_MB_ENTRY 1 +#define TWO_MB_ENTRY 2 +#define THREE_MB_ENTRY 3 +#define FOUR_MB_ENTRY 4 +#define FIVE_MB_ENTRY 5 + +static inline uint32_t tlb_entry_num(uint32_t va) +{ + return va >> 20; +} + +#define fZXTN(N, M, VAL) ((VAL) & ((1LL << (N)) - 1)) +#define fEXTRACTU_BITS(INREG, WIDTH, OFFSET) \ + (fZXTN(WIDTH, 32, (INREG >> OFFSET))) + +#define fINSERT_BITS(REG, WIDTH, OFFSET, INVAL) \ + do { \ + REG = ((REG) & ~(((1LL << (WIDTH)) - 1) << (OFFSET))) | \ + (((INVAL) & ((1LL << (WIDTH)) - 1)) << (OFFSET)); \ + } while (0) + +#define GET_FIELD(ENTRY, FIELD) \ + fEXTRACTU_BITS(ENTRY, reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset) +#define SET_FIELD(ENTRY, FIELD, VAL) \ + fINSERT_BITS(ENTRY, reg_field_info[FIELD].width, \ + reg_field_info[FIELD].offset, (VAL)) + +typedef struct { + int offset; + int width; +} reg_field_t; + +enum reg_fields_enum { +#define DEF_REG_FIELD(TAG, NAME, START, WIDTH, DESCRIPTION) \ + TAG, +#include "reg_fields_def.h" + NUM_REG_FIELDS +#undef DEF_REG_FIELD +}; + +static const reg_field_t reg_field_info[] = { +#define DEF_REG_FIELD(TAG, NAME, START, WIDTH, DESCRIPTION) \ + { START, WIDTH }, + +#include "reg_fields_def.h" + + { 0, 0 } +#undef DEF_REG_FIELD +}; + +/* + * PPD (physical page descriptor) is formed by putting the PTE_PA35 field + * in the MSB of the PPD + */ +#define GET_PPD(ENTRY) \ + ((GET_FIELD((ENTRY), PTE_PPD) | \ + (GET_FIELD((ENTRY), PTE_PA35) << reg_field_info[PTE_PPD].width))) + +#define NUM_PGSIZE_TYPES (SHIFT_1G + 1) + +static const char *pgsize_str(PageSize pgsize) +{ + static const char *size_str[NUM_PGSIZE_TYPES] = { + "4K", + "16K", + "64K", + "256K", + "1M", + "4M", + "16M", + "64M", + "256M", + "1G" + }; + assert(pgsize); + return size_str[__builtin_ctz(pgsize)]; +} + +static const uint64_t encmask_2_mask[] = { + 0x0fffLL, /* 4k, 0000 */ + 0x3fffLL, /* 16k, 0001 */ + 0xffffLL, /* 64k, 0010 */ + 0x3ffffLL, /* 256k, 0011 */ + 0xfffffLL, /* 1m, 0100 */ + 0x3fffffLL, /* 4m, 0101 */ + 0xffffffLL, /* 16M, 0110 */ + 0xffffffffLL, /* RSVD, 0111 */ +}; + +static inline int hex_tlb_pgsize(uint64_t entry) +{ + assert(entry != 0); + int size = __builtin_ctzll(entry); + assert(size < NUM_PGSIZE_TYPES); + return size; +} + +static inline uint32_t hex_tlb_page_size(uint64_t entry) +{ + return 1 << (TARGET_PAGE_BITS + 2 * hex_tlb_pgsize(entry)); +} + +static inline uint64_t hex_tlb_phys_page_num(uint64_t entry) +{ + uint32_t ppd = GET_PPD(entry); + return ppd >> 1; +} + +static inline uint64_t hex_tlb_phys_addr(uint64_t entry) +{ + uint64_t pagemask = encmask_2_mask[hex_tlb_pgsize(entry)]; + uint64_t pagenum = hex_tlb_phys_page_num(entry); + uint64_t PA = (pagenum << TARGET_PAGE_BITS) & (~pagemask); + return PA; +} + +static inline uint64_t hex_tlb_virt_addr(uint64_t entry) +{ + return GET_FIELD(entry, PTE_VPN) << TARGET_PAGE_BITS; +} + +static inline uint64_t create_mmu_entry(uint8_t G, uint8_t A0, uint8_t A1, + uint8_t ASID, uint32_t VA, + uint8_t X, int8_t W, uint8_t R, + uint8_t U, uint8_t C, uint64_t PA, + PageSize SZ) +{ + uint64_t entry = 0; + SET_FIELD(entry, PTE_V, 1); + SET_FIELD(entry, PTE_G, G); + SET_FIELD(entry, PTE_ATR0, A0); + SET_FIELD(entry, PTE_ATR1, A1); + SET_FIELD(entry, PTE_ASID, ASID); + SET_FIELD(entry, PTE_VPN, VA >> TARGET_PAGE_BITS); + SET_FIELD(entry, PTE_X, X); + SET_FIELD(entry, PTE_W, W); + SET_FIELD(entry, PTE_R, R); + SET_FIELD(entry, PTE_U, U); + SET_FIELD(entry, PTE_C, C); + SET_FIELD(entry, PTE_PA35, (PA >> (TARGET_PAGE_BITS + 35)) & 1); + SET_FIELD(entry, PTE_PPD, ((PA >> (TARGET_PAGE_BITS - 1)))); + entry |= SZ; + return entry; +} + +static inline uint64_t tlbr(uint32_t i) +{ + uint64_t ret; + asm volatile ("%0 = tlbr(%1)\n\t" : "=r"(ret) : "r"(i)); + return ret; +} + +static inline uint32_t ctlbw(uint64_t entry, uint32_t idx) +{ + uint32_t ret; + asm volatile ("%0 = ctlbw(%1, %2)\n\t" : "=r"(ret) : "r"(entry), "r"(idx)); + return ret; +} + +static inline uint32_t tlbp(uint32_t asid, uint32_t VA) +{ + uint32_t x = ((asid & 0x7f) << 20) | ((VA >> 12) & 0xfffff); + uint32_t ret; + asm volatile ("%0 = tlbp(%1)\n\t" : "=r"(ret) : "r"(x)); + return ret; +} + +static inline void tlbw(uint64_t entry, uint32_t idx) +{ + asm volatile ("tlbw(%0, %1)\n\t" :: "r"(entry), "r"(idx)); +} + +static inline uint32_t tlboc(uint64_t entry) +{ + uint32_t ret; + asm volatile ("%0 = tlboc(%1)\n\t" : "=r"(ret) : "r"(entry)); + return ret; +} + +void tlbinvasid(uint32_t entry_hi) +{ + asm volatile ("tlbinvasid(%0)\n\t" :: "r"(entry_hi)); +} + +static inline void enter_user_mode(void) +{ + asm volatile ("r0 = ssr\n\t" + "r0 = clrbit(r0, #17) // EX\n\t" + "r0 = setbit(r0, #16) // UM\n\t" + "r0 = clrbit(r0, #19) // GM\n\t" + "ssr = r0\n\t" : : : "r0"); +} + +static inline void enter_kernel_mode(void) +{ + asm volatile ("r0 = ssr\n\t" + "r0 = clrbit(r0, #17) // EX\n\t" + "r0 = clrbit(r0, #16) // UM\n\t" + "r0 = clrbit(r0, #19) // GM\n\t" + "ssr = r0\n\t" : : : "r0"); +} + +static inline uint32_t *getevb() +{ + uint32_t reg; + asm volatile ("%0 = evb\n\t" : "=r"(reg)); + return (uint32_t *)reg; +} + +static inline void setevb(void *new_evb) +{ + asm volatile("evb = %0\n\t" : : "r"(new_evb)); +} + +static inline uint32_t getbadva() +{ + uint32_t badva; + asm volatile ("%0 = badva\n\t" : "=r"(badva)); + return badva; +} + +static void inc_elr(uint32_t inc) +{ + + asm volatile ("r1 = %0\n\t" + "r2 = elr\n\t" + "r1 = add(r2, r1)\n\t" + "elr = r1\n\t" + : : "r"(inc) : "r1", "r2"); +} + +static inline void do_coredump(void) +{ + asm volatile("r0 = #2\n\t" + "stid = r0\n\t" + "jump __coredump\n\t" : : : "r0"); +} + +static inline uint32_t getssr(void) +{ + uint32_t ret; + asm volatile ("%0 = ssr\n\t" : "=r"(ret)); + return ret; +} + +static inline void setssr(uint32_t new_ssr) +{ + asm volatile ("ssr = %0\n\t" :: "r"(new_ssr)); +} + +static inline void set_asid(uint32_t asid) +{ + uint32_t ssr = getssr(); + SET_FIELD(ssr, SSR_ASID, asid); + setssr(ssr); +} + +int err; +#include "../hex_test.h" + +static void *old_evb; + +typedef uint64_t exception_vector[2]; +static exception_vector my_exceptions; + +static inline void clear_exception_vector(exception_vector excp) +{ + excp[0] = 0; + excp[1] = 0; +} + +static inline void set_exception_vector_bit(exception_vector excp, uint32_t bit) +{ + if (bit < 64) { + excp[0] |= 1LL << bit; + } else if (bit < 128) { + excp[1] |= 1LL << (bit - 64); + } +} + +#define check_exception_vector(excp, expect) \ + do { \ + check64(excp[0], expect[0]); \ + check64(excp[1], expect[1]); \ + } while (0) + +static inline void print_exception_vector(exception_vector excp) +{ + printf("exceptions (0x%016llx 0x%016llx):", excp[1], excp[0]); + for (int i = 0; i < 64; i++) { + if (excp[0] & (1LL << i)) { + printf(" 0x%x", i); + } + } + for (int i = 0; i < 64; i++) { + if (excp[1] & (1LL << i)) { + printf(" 0x%x", i + 64); + } + } + printf("\n"); +} + +/* volatile because it is written through different MMU mappings */ +typedef volatile int mmu_variable; +mmu_variable data = 0xdeadbeef; + +typedef int (*func_t)(void); +/* volatile because it will be invoked via different MMU mappings */ +typedef volatile func_t mmu_func_t; + +/* + * Create a function that returns its (virtual) address + * Write it fully in assembly so we don't have to worry about + * which optimization level we are compiled with + */ +extern int func_return_pc(void); +asm( +".global func_return_pc\n" +".balign 4\n" +".type func_return_pc, @function\n" +"func_return_pc:\n" +" r0 = pc\n" +" jumpr r31\n" +".size func_return_pc, . - func_return_pc\n" +); + +enum { + TLB_U = (1 << 0), + TLB_R = (1 << 1), + TLB_W = (1 << 2), + TLB_X = (1 << 3), +}; + +#define HEX_CAUSE_FETCH_NO_XPAGE 0x011 +#define HEX_CAUSE_FETCH_NO_UPAGE 0x012 +#define HEX_CAUSE_PRIV_NO_READ 0x022 +#define HEX_CAUSE_PRIV_NO_WRITE 0x023 +#define HEX_CAUSE_PRIV_NO_UREAD 0x024 +#define HEX_CAUSE_PRIV_NO_UWRITE 0x025 +#define HEX_CAUSE_IMPRECISE_MULTI_TLB_MATCH 0x044 +#define HEX_CAUSE_TLBMISSX_NORMAL 0x060 +#define HEX_CAUSE_TLBMISSX_NEXTPAGE 0x061 +#define HEX_CAUSE_TLBMISSRW_READ 0x070 +#define HEX_CAUSE_TLBMISSRW_WRITE 0x071 + +/* + * The following lets us override the default exception handlers + * This can be handy for adding code to check that they are called as well + * as special handling needed for the test to succeed. + * + * MY_EVENT_HANDLE Use this to define your own event handler + * DEFAULT_EVENT_HANDLE Use this to point to the default handler + * my_event_vectors New event vector table + * install_my_event_vectors Change from the default event handlers + */ + +extern void *my_event_vectors; + +#define MY_EVENT_HANDLE(name, helper) \ +void name(void) \ +{ \ + asm volatile("crswap(sp, sgp0)\n\t" \ + "memd(sp++#8) = r1:0\n\t" \ + "memd(sp++#8) = r3:2\n\t" \ + "memd(sp++#8) = r5:4\n\t" \ + "memd(sp++#8) = r7:6\n\t" \ + "memd(sp++#8) = r9:8\n\t" \ + "memd(sp++#8) = r11:10\n\t" \ + "memd(sp++#8) = r13:12\n\t" \ + "memd(sp++#8) = r15:14\n\t" \ + "memd(sp++#8) = r17:16\n\t" \ + "memd(sp++#8) = r19:18\n\t" \ + "memd(sp++#8) = r21:20\n\t" \ + "memd(sp++#8) = r23:22\n\t" \ + "memd(sp++#8) = r25:24\n\t" \ + "memd(sp++#8) = r27:26\n\t" \ + "memd(sp++#8) = r31:30\n\t" \ + "r0 = ssr\n\t" \ + "call " #helper "\n\t" \ + "sp = add(sp, #-8)\n\t" \ + "r31:30 = memd(sp++#-8)\n\t" \ + "r27:26 = memd(sp++#-8)\n\t" \ + "r25:24 = memd(sp++#-8)\n\t" \ + "r23:22 = memd(sp++#-8)\n\t" \ + "r21:20 = memd(sp++#-8)\n\t" \ + "r19:18 = memd(sp++#-8)\n\t" \ + "r17:16 = memd(sp++#-8)\n\t" \ + "r15:14 = memd(sp++#-8)\n\t" \ + "r13:12 = memd(sp++#-8)\n\t" \ + "r11:10 = memd(sp++#-8)\n\t" \ + "r9:8 = memd(sp++#-8)\n\t" \ + "r7:6 = memd(sp++#-8)\n\t" \ + "r5:4 = memd(sp++#-8)\n\t" \ + "r3:2 = memd(sp++#-8)\n\t" \ + "r1:0 = memd(sp)\n\t" \ + "crswap(sp, sgp0);\n\t" \ + "rte\n\t"); \ +} + +#ifndef NO_DEFAULT_EVENT_HANDLES + +#define DEFAULT_EVENT_HANDLE(name, offset) \ +void name(void) \ +{ \ + asm volatile("r0 = %0\n\t" \ + "r0 = add(r0, #" #offset ")\n\t" \ + "jumpr r0\n\t" \ + : : "r"(old_evb) : "r0"); \ +} + + +/* Use these values as the offset for DEFAULT_EVENT_HANDLE */ +asm ( +".set HANDLE_RESET_OFFSET, 0x00\n\t" +".set HANDLE_NMI_OFFSET, 0x04\n\t" +".set HANDLE_ERROR_OFFSET, 0x08\n\t" +".set HANDLE_RSVD_OFFSET, 0x0c\n\t" +".set HANDLE_TLBMISSX_OFFSET, 0x10\n\t" +".set HANDLE_TLBMISSRW_OFFSET, 0x18\n\t" +".set HANDLE_TRAP0_OFFSET, 0x20\n\t" +".set HANDLE_TRAP1_OFFSET, 0x24\n\t" +".set HANDLE_FPERROR_OFFSET, 0x28\n\t" +".set HANDLE_INT_OFFSET, 0x40\n\t" +); + +asm( +".align 0x1000\n\t" +"my_event_vectors:\n\t" + "jump my_event_handle_reset\n\t" + "jump my_event_handle_nmi\n\t" + "jump my_event_handle_error\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_tlbmissx\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_tlbmissrw\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_trap0\n\t" + "jump my_event_handle_trap1\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_fperror\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_rsvd\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" + "jump my_event_handle_int\n\t" +); + +#define DEFAULT_EVENT_HANDLES \ +DEFAULT_EVENT_HANDLE(my_event_handle_error, HANDLE_ERROR_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_nmi, HANDLE_NMI_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_tlbmissrw, HANDLE_TLBMISSRW_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_tlbmissx, HANDLE_TLBMISSX_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_reset, HANDLE_RESET_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_rsvd, HANDLE_RSVD_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_trap0, HANDLE_TRAP0_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_trap1, HANDLE_TRAP1_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_int, HANDLE_INT_OFFSET) \ +DEFAULT_EVENT_HANDLE(my_event_handle_fperror, HANDLE_FPERROR_OFFSET) + +#endif /* NO_DEFAULT_EVENT_HANDLES */ + +/* When a permission error happens, add the permission to the TLB entry */ +void my_event_handle_error_helper(uint32_t ssr) +{ + uint32_t cause = GET_FIELD(ssr, SSR_CAUSE); + uint32_t badva = getbadva(); + uint32_t entry_num = tlb_entry_num(badva); + uint64_t entry; + + set_exception_vector_bit(my_exceptions, cause); + + switch (cause) { + case HEX_CAUSE_FETCH_NO_XPAGE: + entry = tlbr(entry_num); + SET_FIELD(entry, PTE_X, 1); + tlbw(entry, entry_num); + break; + case HEX_CAUSE_FETCH_NO_UPAGE: + entry = tlbr(entry_num); + SET_FIELD(entry, PTE_U, 1); + tlbw(entry, entry_num); + break; + case HEX_CAUSE_PRIV_NO_READ: + entry = tlbr(entry_num); + SET_FIELD(entry, PTE_R, 1); + tlbw(entry, entry_num); + break; + case HEX_CAUSE_PRIV_NO_WRITE: + entry = tlbr(entry_num); + SET_FIELD(entry, PTE_W, 1); + tlbw(entry, entry_num); + break; + case HEX_CAUSE_PRIV_NO_UREAD: + entry = tlbr(entry_num); + SET_FIELD(entry, PTE_U, 1); + tlbw(entry, entry_num); + break; + case HEX_CAUSE_PRIV_NO_UWRITE: + entry = tlbr(entry_num); + SET_FIELD(entry, PTE_U, 1); + tlbw(entry, entry_num); + break; + default: + do_coredump(); + break; + } +} + +void my_event_handle_nmi_helper(uint32_t ssr) +{ + uint32_t cause = GET_FIELD(ssr, SSR_CAUSE); + + set_exception_vector_bit(my_exceptions, cause); + + switch (cause) { + case HEX_CAUSE_IMPRECISE_MULTI_TLB_MATCH: + break; + default: + do_coredump(); + break; + } +} + +/* + * When a TLB miss happens, create a mapping + * We'll set different read/write/execute permissions + * for different entry numbers. + */ +void my_event_handle_tlbmissrw_helper(uint32_t ssr) +{ + uint32_t cause = GET_FIELD(ssr, SSR_CAUSE); + uint32_t badva = getbadva(); + uint32_t entry_num = tlb_entry_num(badva); + uint32_t VA = page_start(badva, TARGET_PAGE_BITS); + uint32_t PA = VA - (entry_num * ONE_MB); + + uint64_t entry = + create_mmu_entry(1, 0, 0, 0, VA, 0, 0, 0, 1, 0x3, PA, PAGE_4K); + if (entry_num == TWO_MB_ENTRY) { + SET_FIELD(entry, PTE_R, 1); + } + if (entry_num == THREE_MB_ENTRY) { + SET_FIELD(entry, PTE_W, 1); + } + + set_exception_vector_bit(my_exceptions, cause); + + switch (cause) { + case HEX_CAUSE_TLBMISSRW_READ: + tlbw(entry, entry_num); + break; + case HEX_CAUSE_TLBMISSRW_WRITE: + tlbw(entry, entry_num); + break; + default: + do_coredump(); + break; + } +} + +void my_event_handle_tlbmissx_helper(uint32_t ssr) +{ + uint32_t cause = GET_FIELD(ssr, SSR_CAUSE); + uint32_t badva = getbadva(); + uint32_t entry_num = tlb_entry_num(badva); + uint32_t VA = page_start(badva, TARGET_PAGE_BITS); + uint32_t PA = VA - (entry_num * ONE_MB); + + uint64_t entry = + create_mmu_entry(1, 0, 0, 0, VA, 0, 0, 0, 1, 0x3, PA, PAGE_4K); + + set_exception_vector_bit(my_exceptions, cause); + + switch (cause) { + case HEX_CAUSE_TLBMISSX_NORMAL: + tlbw(entry, entry_num); + break; + default: + do_coredump(); + break; + } +} + +static inline void install_my_event_vectors(void) +{ + old_evb = getevb(); + setevb(&my_event_vectors); +} + +#define MAKE_GOTO(name) \ +void goto_##name(void) \ +{ \ + asm volatile("r0 = ##" #name "\n\t" \ + "jumpr r0\n\t" \ + : : : "r0"); \ +} + +#define MAKE_ERR_HANDLER(name, helper_fn) \ + MY_EVENT_HANDLE(name, helper_fn) \ + MAKE_GOTO(name) + +#define INSTALL_ERR_HANDLER(name) { \ + /* + * Install our own privelege exception handler. + * The normal behavior is to coredump + * Read and decode the jump displacemnts from evb + * ASSUME negative displacement which is the standard. + */ \ + uint32_t *evb_err = getevb() + 2; \ + uint32_t err_distance = -(0xfe000000 | *evb_err) << 1; \ + uint32_t err_handler = (uint32_t)evb_err - err_distance; \ + memcpy((void *)err_handler, goto_##name, 12); \ +} while (0) + +static inline void remove_trans(int index) +{ + uint64_t entry = tlbr(index); + SET_FIELD(entry, PTE_V, 0); + tlbw(entry, index); +} + +static inline void clear_overlapping_entry(unsigned int asid, uint32_t va) +{ + int32_t index = tlbp(asid, va); + if (index != TLB_NOT_FOUND) { + remove_trans(index); + } +} + +static void add_trans(int index, uint32_t va, uint64_t pa, + PageSize page_size, uint8_t xwru, + unsigned int asid, uint8_t V, uint8_t G) +{ + if (V) { + clear_overlapping_entry(asid, va); + } + assert(!add_translation_extended(index, (void *)va, pa, page_size, + xwru, 0, asid, 0, + ((V & 1) << 1) | (G & 1))); +} + +#endif diff --git a/tests/tcg/hexagon/system/mmu_asids.c b/tests/tcg/hexagon/system/mmu_asids.c new file mode 100644 index 000000000000..34f25c25a3d7 --- /dev/null +++ b/tests/tcg/hexagon/system/mmu_asids.c @@ -0,0 +1,80 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include +#include +#include + + +#define DEBUG 0 + +#include "mmu.h" + +DEFAULT_EVENT_HANDLES + +void test_asids(void) +{ + uint32_t addr = (uint32_t)&data; + uint32_t page = page_start(addr, TARGET_PAGE_BITS); + uint32_t offset = FIVE_MB; + uint32_t new_addr = addr + offset; + uint32_t new_page = page + offset; + uint64_t entry = + create_mmu_entry(0, 0, 0, 1, new_page, 1, 1, 1, 0, 7, page, PAGE_4K); + /* + * Create a TLB entry for ASID=1 + * Write it at index 1 + * Check that it is present + * Invalidate the ASID + * Check that it is not found + */ + tlbw(entry, 1); + check32(tlboc(entry), 1); + tlbinvasid(entry >> 32); + check32(tlboc(entry), TLB_NOT_FOUND); + + /* + * Re-install the entry + * Put ourselves in ASID=1 + * Do a load and a store + */ + data = 0xdeadbeef; + tlbw(entry, 1); + set_asid(1); + check32(*(mmu_variable *)new_addr, 0xdeadbeef); + *(mmu_variable *)new_addr = 0xcafebabe; + check32(data, 0xcafebabe); + + /* + * Make sure a load from ASID 2 gets a different value. + * The standalone runtime will create a VA==PA entry on + * a TLB miss, so the load will be reading from uninitialized + * memory. + */ + set_asid(2); + data = 0xdeadbeef; + check32_ne(*(mmu_variable *)new_addr, 0xdeadbeef); + + /* + * Invalidate the ASID and make sure a loads from ASID 1 + * gets a different value. + */ + tlbinvasid(entry >> 32); + set_asid(1); + data = 0xcafebabe; + check32_ne(*(mmu_variable *)new_addr, 0xcafebabe); +} + +int main() +{ + puts("Hexagon MMU ASID test"); + + test_asids(); + + printf("%s\n", ((err) ? "FAIL" : "PASS")); + return err; +} diff --git a/tests/tcg/hexagon/system/mmu_overlap.c b/tests/tcg/hexagon/system/mmu_overlap.c new file mode 100644 index 000000000000..73d0565abed4 --- /dev/null +++ b/tests/tcg/hexagon/system/mmu_overlap.c @@ -0,0 +1,65 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include +#include +#include + +#define DEBUG 0 + +#include "mmu.h" + +DEFAULT_EVENT_HANDLES + +void test_overlap(void) +{ + uint32_t addr = (uint32_t)&data; + uint32_t page = page_start(addr, 20); + uint32_t offset = FIVE_MB; + uint32_t new_page = page + offset; + uint32_t new_addr = addr + offset; + uint8_t data_perm = TLB_X | TLB_W | TLB_R | TLB_U; + uint64_t entry; + + add_trans(1, new_page, page, PAGE_1M, data_perm, 0, 1, 1); + check32(tlbp(0, new_addr), 1); + + /* Check an entry that overlaps with the one we just created */ + entry = + create_mmu_entry(1, 0, 0, 0, new_page, 1, 1, 1, 0, 7, page, PAGE_4K); + check32(tlboc(entry), 1); + /* Check that conditional TLB write (ctlbw) does NOT write the new entry */ + check32(ctlbw(entry, 2), 0x1); + + /* Create an entry that does not overlap with the one we just created */ + entry = create_mmu_entry(1, 0, 0, 0, new_page + ONE_MB, 1, 1, 1, 0, 7, page, + PAGE_4K); + check32(tlboc(entry), TLB_NOT_FOUND); + /* Check that conditional TLB write (ctlbw) does write the new entry */ + check32(ctlbw(entry, 2), TLB_NOT_FOUND); + + /* Create an entry that overalps both of these entries */ + entry = + create_mmu_entry(1, 0, 0, 0, new_page, 1, 1, 1, 0, 7, page, PAGE_4M); + check32(tlboc(entry), 0xffffffff); + + /* Clear the TLB entries */ + remove_trans(1); + check32(tlbp(0, new_addr), TLB_NOT_FOUND); + remove_trans(2); + check32(tlbp(0, (new_addr + ONE_MB)), TLB_NOT_FOUND); +} + +int main() +{ + puts("Hexagon MMU overlap test"); + + test_overlap(); + + printf("%s\n", ((err) ? "FAIL" : "PASS")); + return err; +} diff --git a/tests/tcg/hexagon/system/monitor_insts.S b/tests/tcg/hexagon/system/monitor_insts.S new file mode 100644 index 000000000000..8027068511f1 --- /dev/null +++ b/tests/tcg/hexagon/system/monitor_insts.S @@ -0,0 +1,18 @@ +/* + * Copyright(c) 2020-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + + .text + .type test_set_prio, @function + .global test_set_prio + +test_set_prio: + r0 = #3 + r1 = #1 + p0 = cmp.eq(r0,r1) + setprio(p0, r0) + jumpr lr + + .size test_set_prio, . - test_set_prio diff --git a/tests/tcg/hexagon/system/reg_fields_def.h b/tests/tcg/hexagon/system/reg_fields_def.h new file mode 100644 index 000000000000..ff2769a1399d --- /dev/null +++ b/tests/tcg/hexagon/system/reg_fields_def.h @@ -0,0 +1,87 @@ +/* PTE (aka TLB entry) fields */ +DEF_REG_FIELD(PTE_PPD, + "PPD", 0, 24, + "Physical page number that the corresponding virtual page maps to.") +DEF_REG_FIELD(PTE_C, + "C", 24, 4, + "Cacheability attributes for the page.") +DEF_REG_FIELD(PTE_U, + "U", 28, 1, + "User mode permitted.") +DEF_REG_FIELD(PTE_R, + "R", 29, 1, + "Read-enable.") +DEF_REG_FIELD(PTE_W, + "W", 30, 1, + "Write-enable.") +DEF_REG_FIELD(PTE_X, + "X", 31, 1, + "Execute-enable.") +DEF_REG_FIELD(PTE_VPN, + "VPN", 32, 20, + "Virtual page number that is matched against the load or store address.") +DEF_REG_FIELD(PTE_ASID, + "ASID", 52, 7, + "7-bit address space identifier (tag extender)") +DEF_REG_FIELD(PTE_ATR0, + "ATR0", 59, 1, + "General purpose attribute bit kept as an attribute of each cache line.") +DEF_REG_FIELD(PTE_ATR1, + "ATR1", 60, 1, + "General purpose attribute bit kept as an attribute of each cache line.") +DEF_REG_FIELD(PTE_PA35, + "PA35", 61, 1, + "The Extra Physical bit is the most-significant physical address bit.") +DEF_REG_FIELD(PTE_G, + "G", 62, 1, + "Global bit. If set, then the ASID is ignored in the match.") +DEF_REG_FIELD(PTE_V, + "V", 63, 1, + "Valid bit. indicates whether this entry should be used for matching.") + +/* SSR fields */ +DEF_REG_FIELD(SSR_CAUSE, + "cause", 0, 8, + "8-bit field that contains the reason for various exception.") +DEF_REG_FIELD(SSR_ASID, + "asid", 8, 7, + "7-bit field that contains the Address Space Identifier.") +DEF_REG_FIELD(SSR_UM, + "um", 16, 1, + "read-write bit.") +DEF_REG_FIELD(SSR_EX, + "ex", 17, 1, + "set when an interrupt or exception is accepted.") +DEF_REG_FIELD(SSR_IE, + "ie", 18, 1, + "indicates whether the global interrupt is enabled.") +DEF_REG_FIELD(SSR_GM, + "gm", 19, 1, + "Guest mode bit.") +DEF_REG_FIELD(SSR_V0, + "v0", 20, 1, + "if BADVA0 register contents are from a valid slot 0 instruction.") +DEF_REG_FIELD(SSR_V1, + "v1", 21, 1, + "if BADVA1 register contents are from a valid slot 1 instruction.") +DEF_REG_FIELD(SSR_BVS, + "bvs", 22, 1, + "BADVA Selector.") +DEF_REG_FIELD(SSR_CE, + "ce", 23, 1, + "grants user or guest read permissions to the PCYCLE register aliases.") +DEF_REG_FIELD(SSR_PE, + "pe", 24, 1, + "grants guest read permissions to the PMU register aliases.") +DEF_REG_FIELD(SSR_BP, + "bp", 25, 1, + "Internal Bus Priority bit.") +DEF_REG_FIELD(SSR_XA, + "xa", 27, 3, + "Extension Active, which control operation of an attached coprocessor.") +DEF_REG_FIELD(SSR_SS, + "ss", 30, 1, + "Single Step, which enables single-step exceptions.") +DEF_REG_FIELD(SSR_XE, + "xe", 31, 1, + "Coprocessor Enable, which enables use of an attached coprocessor.") diff --git a/tests/tcg/hexagon/system/semihost.c b/tests/tcg/hexagon/system/semihost.c new file mode 100644 index 000000000000..7a0fa0cb73ff --- /dev/null +++ b/tests/tcg/hexagon/system/semihost.c @@ -0,0 +1,297 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "strutils.h" + +/* Defines in order of testing */ + +/* env/CLI-related */ +#define HEX_SYS_GET_CMDLINE 0x15 +#define HEX_SYS_GETCWD 0x104 + +/* File manipulation */ +#define HEX_SYS_TMPNAM 0x0d +#define HEX_SYS_OPEN 0x01 +#define HEX_SYS_ACCESS 0x105 +#define HEX_SYS_ISTTY 0x09 +#define HEX_SYS_WRITE 0x05 +#define HEX_SYS_SEEK 0x0a +#define HEX_SYS_READ 0x06 +#define HEX_SYS_FTELL 0x100 +#define HEX_SYS_FSTAT 0x101 +#define HEX_SYS_FTRUNC 0x186 +#define HEX_SYS_FLEN 0x0c +#define HEX_SYS_CLOSE 0x02 +#define HEX_SYS_ERRNO 0x13 +#define HEX_SYS_RENAME 0x0f +#define HEX_SYS_STAT 0x103 +#define HEX_SYS_REMOVE 0x0e + +/* Time */ +#define HEX_SYS_CLOCK 0x10 +#define HEX_SYS_TIME 0x11 + +/* dirent */ +#define HEX_SYS_OPENDIR 0x180 +#define HEX_SYS_CLOSEDIR 0x181 +#define HEX_SYS_READDIR 0x182 + +/* STDOUT */ +#define HEX_SYS_WRITEC 0x03 +#define HEX_SYS_WRITE0 0x04 +#define HEX_SYS_WRITECREG 0x43 + +static uint32_t ret, err, args[4]; + +/* + * Macro flavors: + * - DIRECT_SWI takes up to two args an put them at r1 and r2. + * - SWI takes up to four args and puts them in an array, placing the + * array address at r1. + */ + +#define DO_SWI(CODE, ARG0, ARG1) \ + do { \ + asm volatile( \ + "r0 = %2\n" \ + "r1 = %3\n" \ + "r2 = %4\n" \ + "trap0(#0)\n" \ + "%0 = r0\n" \ + "%1 = r1\n" \ + : "=r"(ret), "=r"(err) \ + : "r"(CODE), "r"(ARG0), "r"(ARG1) \ + : "r0", "r1", "r2", "memory" \ + ); \ + } while (0) + +#define SWI0(CODE) DO_SWI(CODE, args, 0) +#define SWI1(CODE, ARG0) \ + do { args[0] = (uint32_t)(ARG0); SWI0(CODE); } while (0) +#define SWI2(CODE, ARG0, ARG1) \ + do { args[1] = (uint32_t)(ARG1); SWI1(CODE, ARG0); } while (0) +#define SWI3(CODE, ARG0, ARG1, ARG2) \ + do { args[2] = (uint32_t)(ARG2); SWI2(CODE, ARG0, ARG1); } while (0) +#define SWI4(CODE, ARG0, ARG1, ARG2, ARG3) \ + do { args[3] = (uint32_t)(ARG3); SWI3(CODE, ARG0, ARG1, ARG2); } while (0) + +#define GET_MACRO_5(_1, _2, _3, _4, _5, NAME, ...) NAME +#define SWI(...) \ + GET_MACRO_5(__VA_ARGS__, SWI4, SWI3, SWI2, SWI1, SWI0)(__VA_ARGS__) + +#define DIRECT_SWI0(CODE) DO_SWI(CODE, 0, 0) +#define DIRECT_SWI1(CODE, ARG1) DO_SWI(CODE, ARG1, 0) +#define DIRECT_SWI2(CODE, ARG1, ARG2) DO_SWI(CODE, ARG1, ARG2) + +#define GET_MACRO_3(_1, _2, _3, NAME, ...) NAME +#define DIRECT_SWI(...) \ + GET_MACRO_3(__VA_ARGS__, DIRECT_SWI2, DIRECT_SWI1, DIRECT_SWI0)(__VA_ARGS__) + +#define is_path_sep(C) ((C) == '/' || (C) == '\\') + +static int path_ends_with(const char *str, const char *suffix) +{ + const char *str_cursor = str + strlen(str) - 1; + const char *suffix_cursor = suffix + strlen(suffix) - 1; + while (str_cursor >= str && suffix_cursor >= suffix) { + /* is_path_sep handles the semihosting-on-Windows case */ + if (*str_cursor != *suffix_cursor && + !(is_path_sep(*str_cursor) && is_path_sep(*suffix_cursor))) { + return 0; + } + str_cursor--; + suffix_cursor--; + } + return 1; +} + +/* + * This must match the caller's definition, it would be in the + * caller's angel.h or equivalent header. + */ +struct __SYS_STAT { + uint64_t dev; + uint64_t ino; + uint32_t mode; + uint32_t nlink; + uint64_t rdev; + uint32_t size; + uint32_t __pad1; + uint32_t atime; + uint32_t mtime; + uint32_t ctime; + uint32_t __pad2; +}; + +int main(int argc, char **argv) +{ + /* GET_CMDLINE */ + char argv_concat[1024]; + char *cursor = argv_concat; + for (int i = 0; i < argc; i++) { + strcpy(cursor, argv[i]); + cursor += strlen(argv[i]); + *cursor = ' '; + cursor++; + } + *(cursor - 1) = '\0'; + char buf[4096]; + SWI(HEX_SYS_GET_CMDLINE, buf, sizeof(buf)); + assert(!ret && !strcmp(buf, argv_concat)); + + /* GETCWD */ + const char *expected_cwd = "tests/tcg/hexagon-softmmu"; + SWI(HEX_SYS_GETCWD, buf, sizeof(buf)); + assert(ret && path_ends_with(buf, expected_cwd)); + + /* TMPNAM */ + char fname[4096]; + SWI(HEX_SYS_TMPNAM, fname, 0, sizeof(fname)); + assert(!ret); + + /* OPEN */ + /* 13 is O_RDWR | O_CREAT | O_EXCL */ + SWI(HEX_SYS_OPEN, fname, 13, strlen(fname)); + int fd = (int)ret; + assert(fd >= 0); + + /* ACCESS */ + SWI(HEX_SYS_ACCESS, fname, R_OK); + assert(!ret); + /* ACCESS with error */ + SWI(HEX_SYS_ACCESS, "non-existent-semihost-file", R_OK); + assert(ret); + assert(err == ENOENT); + + /* ISTTY */ + SWI(HEX_SYS_ISTTY, fd); + assert(!ret); + + /* WRITE */ + char *str = "hello"; + SWI(HEX_SYS_WRITE, fd, str, strlen(str)); + assert(!ret); + + /* SEEK */ + SWI(HEX_SYS_SEEK, fd, 0); + assert(!ret); + + /* READ */ + int n = strlen(str); + SWI(HEX_SYS_READ, fd, buf, n); + buf[n] = '\0'; + assert(!ret && !strcmp(str, buf)); + + /* FTELL */ + SWI(HEX_SYS_FTELL, fd); + assert(ret == strlen(str)); + + /* FSTAT */ + struct __SYS_STAT st; + SWI(HEX_SYS_FSTAT, fd, &st); + assert(!ret); + assert(st.atime && st.ctime && st.mtime); + assert(st.size == strlen(str)); + assert((st.mode & S_IFMT) == S_IFREG); + + /* FTRUNC */ + SWI(HEX_SYS_FTRUNC, fd, 1, 0); + assert(!ret); + + /* FLEN */ + SWI(HEX_SYS_FLEN, fd); + assert(ret == 1); + + /* CLOSE */ + SWI(HEX_SYS_CLOSE, fd); + assert(!ret); + + /* CLOSE w/ error && ERRNO */ + SWI(HEX_SYS_CLOSE, fd); + assert(ret); + assert(err == EBADF); + SWI(HEX_SYS_ERRNO); + assert(ret == EBADF); + + /* RENAME */ + char ogfname[4096]; + int len = strlen(fname); + strcpy(ogfname, fname); + fname[len - 1] = (fname[len - 1] == 'a' ? 'b' : 'a'); + SWI(HEX_SYS_RENAME, ogfname, len, fname, len); + assert(!ret); + + /* STAT */ + SWI(HEX_SYS_STAT, fname, &st); + assert(!ret); + assert(st.atime && st.ctime && st.mtime); + assert(st.size == 1); + assert((st.mode & S_IFMT) == S_IFREG); + + /* REMOVE */ + SWI(HEX_SYS_REMOVE, fname, strlen(fname)); + assert(!ret); + + /* STAT w/ error */ + SWI(HEX_SYS_STAT, fname, &st); + assert(ret); + assert(err == ENOENT); + + /* TIME && CLOCK */ + SWI(HEX_SYS_TIME); + assert(ret); + SWI(HEX_SYS_CLOCK); + assert(ret); + + /* OPENDIR */ + char *dname = "./_semihost_dir"; + DIRECT_SWI(HEX_SYS_OPENDIR, dname); + assert(ret); + int dir_index = ret; + + /* READDIR */ + char *expected_files[4] = { ".", "..", "fileA", "fileB" }; + char found_files_buffer[4][256]; + char *found_files[4]; + for (int i = 0; 1; i++) { + struct __attribute__((__packed__)) { int32_t _; char d_name[256]; } dirent; + DIRECT_SWI(HEX_SYS_READDIR, dir_index, &dirent); + if (!ret) { + break; + } + assert(i < 4); + found_files[i] = found_files_buffer[i]; + strcpy(found_files[i], dirent.d_name); + } + + sort_str_arr(found_files, 4); + for (int i = 0; i < 4; i++) { + assert(!strcmp(found_files[i], expected_files[i])); + } + + /* CLOSEDIR */ + DIRECT_SWI(HEX_SYS_CLOSEDIR, dir_index); + assert(!ret); + + /* WRITEC, WRITECREG, WRITE0 */ + /* We use DO_SWI directly here to bypass the args array */ + char *pass = "PASS\n"; + DIRECT_SWI(HEX_SYS_WRITEC, &pass[0]); + DIRECT_SWI(HEX_SYS_WRITECREG, pass[1]); + DIRECT_SWI(HEX_SYS_WRITE0, &pass[2]); + + return 0; +} diff --git a/tests/tcg/hexagon/system/standalone_hw.c b/tests/tcg/hexagon/system/standalone_hw.c new file mode 100644 index 000000000000..c67343204a80 --- /dev/null +++ b/tests/tcg/hexagon/system/standalone_hw.c @@ -0,0 +1,43 @@ +#include +#include + +void test_set_prio(); + +void inst_test() +{ + asm volatile("dczeroa(r0)\n\t" + "dccleanidx(r0)\n\t" + "dcinvidx(r0)\n\t" + "r1 = dctagr(r0)\n\t" + "dctagw(r0, r1)\n\t" + "dcfetch(r0)\n\t" + "dccleaninvidx(r0)\n\t" + "l2gclean\n\t" + "l2gclean(r1:0)\n\t" + "l2gcleaninv\n\t" + "l2gcleaninv(r1:0)\n\t" + "l2gunlock\n\t" + "l2kill\n\t" + "trace(r0)\n\t" + "pause(#1)\n\t" + ); + + asm volatile("r0 = #0\n\t" + "r1 = iassignr(r0)\n\t" + /* Set interrupt 0 to disabled on all threads */ + "r0 = #0\n\t" + "iassignw(r0)\n\t"); + + test_set_prio(); + printf("Executed monitor mode instructions\n"); +} + +int main(int argc, const char *argv[]) +{ + inst_test(); + printf("Hello, World: (argc: %d)\n", argc); + assert(argc >= 1); + for (int i = 0; i < argc; i++) { + printf("\t> '%s'\n", argv[i]); + } +} diff --git a/tests/tcg/hexagon/system/standalone_vec.c b/tests/tcg/hexagon/system/standalone_vec.c new file mode 100644 index 000000000000..eb1b2ef4830c --- /dev/null +++ b/tests/tcg/hexagon/system/standalone_vec.c @@ -0,0 +1,1419 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include +#include +#include + +#include +#include + +#include "cfgtable.h" + +int err; + +#ifdef __linux__ +#define VTCM_SIZE_KB (2048) +#define VTCM_BYTES_PER_KB (1024) + +static char vtcm_buffer[VTCM_SIZE_KB * VTCM_BYTES_PER_KB] + __attribute__((aligned(0x10000))); +#endif + +/* define the number of rows/cols in a square matrix */ +#define MATRIX_SIZE 64 + +/* define the size of the scatter buffer */ +#define SCATTER_BUFFER_SIZE (MATRIX_SIZE * MATRIX_SIZE) + +#define SCATTER16_BUF_SIZE (2 * SCATTER_BUFFER_SIZE) +#define SCATTER32_BUF_SIZE (4 * SCATTER_BUFFER_SIZE) + +#define GATHER16_BUF_SIZE (2 * MATRIX_SIZE) +#define GATHER32_BUF_SIZE (4 * MATRIX_SIZE) + +uintptr_t VTCM_BASE_ADDRESS; +uintptr_t VTCM_SCATTER16_ADDRESS; +uintptr_t VTCM_GATHER16_ADDRESS; +uintptr_t VTCM_SCATTER32_ADDRESS; +uintptr_t VTCM_GATHER32_ADDRESS; +uintptr_t VTCM_SCATTER16_32_ADDRESS; +uintptr_t VTCM_GATHER16_32_ADDRESS; + +/* the vtcm base address */ +unsigned char *vtcm_base; + +/* scatter gather 16 bit elements using 16 bit offsets */ +unsigned short *vscatter16; +unsigned short *vgather16; +unsigned short vscatter16_ref[SCATTER_BUFFER_SIZE]; +unsigned short vgather16_ref[MATRIX_SIZE]; + +/* scatter gather 32 bit elements using 32 bit offsets */ +unsigned int *vscatter32; +unsigned int *vgather32; +unsigned int vscatter32_ref[SCATTER_BUFFER_SIZE]; +unsigned int vgather32_ref[MATRIX_SIZE]; + +/* scatter gather 16 bit elements using 32 bit offsets */ +unsigned short *vscatter16_32; +unsigned short *vgather16_32; +unsigned short vscatter16_32_ref[SCATTER_BUFFER_SIZE]; +unsigned short vgather16_32_ref[MATRIX_SIZE]; + + +/* declare the arrays of offsets */ +unsigned short half_offsets[MATRIX_SIZE]; +unsigned int word_offsets[MATRIX_SIZE]; + +/* declare the arrays of values */ +unsigned short half_values[MATRIX_SIZE]; +unsigned short half_acc_values[MATRIX_SIZE]; +unsigned short half_q_values[MATRIX_SIZE]; +unsigned int word_values[MATRIX_SIZE]; +unsigned int word_acc_values[MATRIX_SIZE]; +unsigned int word_q_values[MATRIX_SIZE]; + +/* declare the array of predicates */ +unsigned short half_predicates[MATRIX_SIZE]; +unsigned int word_predicates[MATRIX_SIZE]; + +/* make this big enough for all the intrinsics */ +unsigned int region_len = 4 * SCATTER_BUFFER_SIZE - 1; + +/* optionally add sync instructions */ +#define SYNC_VECTOR 1 + +/* optionally print cycle counts */ +#define PRINT_CYCLE_COUNTS 0 + +#if PRINT_CYCLE_COUNTS +unsigned long long start_cycles; +#define START_CYCLES start_cycles = hexagon_sim_read_pcycles(); +#define PRINT_CYCLES(x) printf(x, hexagon_sim_read_pcycles() - start_cycles); +#else +#define START_CYCLES +#define PRINT_CYCLES(x) +#endif + +/* define a scratch area for debug and prefill */ +#define SCRATCH_SIZE 0x8800 + +#define FILL_CHAR '.' + +/* fill vtcm scratch with ee */ +void prefill_vtcm_scratch(void) +{ + memset((void *)VTCM_BASE_ADDRESS, FILL_CHAR, SCRATCH_SIZE * sizeof(char)); +} + +/* print vtcm scratch buffer */ +void print_vtcm_scratch_16(void) +{ + unsigned short *vtmp = (unsigned short *)VTCM_BASE_ADDRESS; + + printf("\n\nPrinting the vtcm scratch in half words"); + + for (int i = 0; i < SCRATCH_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + for (int j = 0; j < 2; j++) { + printf("%c", (char)((vtmp[i] >> j * 8) & 0xff)); + } + + printf(" "); + } +} + +/* print vtcm scratch buffer */ +void print_vtcm_scratch_32(void) +{ + unsigned int *vtmp = (unsigned int *)VTCM_BASE_ADDRESS; + + printf("\n\nPrinting the vtcm scratch in words"); + + for (int i = 0; i < SCRATCH_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + for (int j = 0; j < 4; j++) { + printf("%c", (char)((vtmp[i] >> j * 8) & 0xff)); + } + + printf(" "); + } +} + + +/* create byte offsets to be a diagonal of the matrix with 16 bit elements */ +void create_offsets_and_values_16(void) +{ + unsigned short half_element = 0; + unsigned short half_q_element = 0; + char letter = 'A'; + char q_letter = '@'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + half_offsets[i] = i * (2 * MATRIX_SIZE + 2); + + half_element = 0; + half_q_element = 0; + for (int j = 0; j < 2; j++) { + half_element |= letter << j * 8; + half_q_element |= q_letter << j * 8; + } + + half_values[i] = half_element; + half_acc_values[i] = ((i % 10) << 8) + (i % 10); + half_q_values[i] = half_q_element; + + letter++; + /* reset to 'A' */ + if (letter == 'M') { + letter = 'A'; + } + } +} + +/* create a predicate mask for the half word scatter */ +void create_preds_16() +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + half_predicates[i] = (i % 3 == 0 || i % 5 == 0) ? ~0 : 0; + } +} + + +/* create byte offsets to be a diagonal of the matrix with 32 bit elements */ +void create_offsets_and_values_32(void) +{ + unsigned int word_element = 0; + unsigned int word_q_element = 0; + char letter = 'A'; + char q_letter = '&'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + word_offsets[i] = i * (4 * MATRIX_SIZE + 4); + + word_element = 0; + word_q_element = 0; + for (int j = 0; j < 4; j++) { + word_element |= letter << j * 8; + word_q_element |= q_letter << j * 8; + } + + word_values[i] = word_element; + word_acc_values[i] = ((i % 10) << 8) + (i % 10); + word_q_values[i] = word_q_element; + + letter++; + /* reset to 'A' */ + if (letter == 'M') { + letter = 'A'; + } + } +} + +/* create a predicate mask for the word scatter */ +void create_preds_32() +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + word_predicates[i] = (i % 4 == 0 || i % 7 == 0) ? ~0 : 0; + } +} + + +void dump_buf(char *str, void *addr, int element_size, int byte_len) + +{ + unsigned short *sptr = addr; + unsigned int *ptr = addr; + + printf("\n\nBuffer: %s\n", str); + for (int i = 0; i < byte_len / element_size; ++ptr, ++sptr, ++i) { + if (i != 0 && (i % 16) == 0) { + printf("\n"); + } + if (element_size == 2) { + printf("%c ", *sptr); + } else if (element_size == 4) { + printf("%4.4x ", *ptr); + } + } +} + +/* + * create byte offsets to be a diagonal of the matrix with 16 bit elements and + * 32 bit offsets + */ +void create_offsets_and_values_16_32(void) +{ + unsigned int half_element = 0; + unsigned short half_q_element = 0; + char letter = 'D'; + char q_letter = '$'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + word_offsets[i] = i * (2 * MATRIX_SIZE + 2); + + half_element = 0; + half_q_element = 0; + for (int j = 0; j < 2; j++) { + half_element |= letter << j * 8; + half_q_element |= q_letter << j * 8; + } + + half_values[i] = half_element; + half_acc_values[i] = ((i % 10) << 8) + (i % 10); + half_q_values[i] = half_q_element; + + letter++; + /* reset to 'A' */ + if (letter == 'P') { + letter = 'D'; + } + } + + /* + * dump_buf("word_offsets", word_offsets, sizeof(*word_offsets), + * sizeof(word_offsets)); dump_buf("half_offsets", half_offsets, + * sizeof(*half_offsets), sizeof(half_offsets)); + */ +} + +void create_preds_16_32() +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + half_predicates[i] = (i % 2 == 0 || i % 13 == 0) ? ~0 : 0; + } +} + +#define SCATTER_RELEASE(ADDR) \ + asm volatile("vmem(%0 + #0):scatter_release\n" : : "r"(ADDR)); + +/* scatter the 16 bit elements using intrinsics */ +void vector_scatter_16(void) +{ + START_CYCLES; + + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_values; + + /* do the scatter */ + Q6_vscatter_RMVhV(VTCM_SCATTER16_ADDRESS, region_len, offsets, values); + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter16); + /* + * This dummy load from vscatter16 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16; +#endif + + PRINT_CYCLES("\nVector Scatter 16 cycles = %llu\n"); +} + +/* scatter-accumulate the 16 bit elements using intrinsics */ +void vector_scatter_acc_16(void) +{ + START_CYCLES; + + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_acc_values; + + /* do the scatter */ + Q6_vscatteracc_RMVhV(VTCM_SCATTER16_ADDRESS, region_len, offsets, values); + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter16); + /* + * This dummy load from vscatter16 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16; +#endif + + PRINT_CYCLES("\nVector Scatter Acc 16 cycles = %llu\n"); +} + +/* scatter the 16 bit elements using intrinsics */ +void vector_scatter_q_16(void) +{ + START_CYCLES; + + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_q_values; + HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; + HVX_VectorPred preds = Q6_Q_vand_VR(pred_reg, ~0); + + /* do the scatter */ + Q6_vscatter_QRMVhV(preds, VTCM_SCATTER16_ADDRESS, region_len, offsets, + values); + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter16); + /* + * This dummy load from vscatter16 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16; +#endif + + PRINT_CYCLES("\nVector Scatter Q 16 cycles = %llu\n"); +} + +/* scatter the 32 bit elements using intrinsics */ +void vector_scatter_32(void) +{ + START_CYCLES; + + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_values; + HVX_Vector valueshi = *(HVX_Vector *)&word_values[MATRIX_SIZE / 2]; + + /* do the scatter */ + Q6_vscatter_RMVwV(VTCM_SCATTER32_ADDRESS, region_len, offsetslo, valueslo); + Q6_vscatter_RMVwV(VTCM_SCATTER32_ADDRESS, region_len, offsetshi, valueshi); + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter32); + /* + * This dummy load from vscatter32 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter32; +#endif + + PRINT_CYCLES("\nVector Scatter 32 cycles = %llu\n"); +} + +/* scatter-acc the 32 bit elements using intrinsics */ +void vector_scatter_acc_32(void) +{ + START_CYCLES; + + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_acc_values; + HVX_Vector valueshi = *(HVX_Vector *)&word_acc_values[MATRIX_SIZE / 2]; + + /* do the scatter */ + Q6_vscatteracc_RMVwV(VTCM_SCATTER32_ADDRESS, region_len, offsetslo, + valueslo); + Q6_vscatteracc_RMVwV(VTCM_SCATTER32_ADDRESS, region_len, offsetshi, + valueshi); + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter32); + /* + * This dummy load from vscatter32 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter32; +#endif + + PRINT_CYCLES("\nVector Scatter Acc 32 cycles = %llu\n"); +} + +/* scatter the 32 bit elements using intrinsics */ +void vector_scatter_q_32(void) +{ + START_CYCLES; + + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_q_values; + HVX_Vector valueshi = *(HVX_Vector *)&word_q_values[MATRIX_SIZE / 2]; + HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates; + HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2]; + HVX_VectorPred predslo = Q6_Q_vand_VR(pred_reglo, ~0); + HVX_VectorPred predshi = Q6_Q_vand_VR(pred_reghi, ~0); + + /* do the scatter */ + Q6_vscatter_QRMVwV(predslo, VTCM_SCATTER32_ADDRESS, region_len, offsetslo, + valueslo); + Q6_vscatter_QRMVwV(predshi, VTCM_SCATTER32_ADDRESS, region_len, offsetshi, + valueshi); + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter16); + /* + * This dummy load from vscatter16 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16; +#endif + + PRINT_CYCLES("\nVector Scatter Q 16 cycles = %llu\n"); +} + +void print_vector(char *str, HVX_Vector *v) + +{ + unsigned char *ptr = (unsigned char *)v; + + printf("\n\nVector: %s\n", str); + for (int i = 0; i < sizeof(HVX_Vector) * 4; ++ptr, ++i) { + if (i != 0 && (i % 16) == 0) { + printf("\n"); + } + printf("%c ", *ptr); + } + printf("\n"); +} + +void print_vectorpair(char *str, HVX_VectorPair *v) + +{ + unsigned char *ptr = (unsigned char *)v; + + printf("\n\nVectorPair: %s\n", str); + for (int i = 0; i < sizeof(HVX_VectorPair); ++ptr, ++i) { + if (i != 0 && (i % 16) == 0) { + printf("\n"); + } + printf("%c ", *ptr); + } + printf("\n"); +} + +/* scatter the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_16_32(void) +{ + START_CYCLES; + + /* get the word offsets in a vector pair */ + HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets; + /* print_vectorpair("word_offsets", (HVX_VectorPair *)&word_offsets); */ + + /* these values need to be shuffled for the RMWwV scatter */ + HVX_Vector values = *(HVX_Vector *)half_values; + values = Q6_Vh_vshuff_Vh(values); + /* print_vector("values", (HVX_Vector *)&values); */ + + /* do the scatter */ + Q6_vscatter_RMWwV(VTCM_SCATTER16_32_ADDRESS, region_len, offsets, values); + /* print_vector("scatter16_32_address", (HVX_Vector */ + /* *)VTCM_SCATTER16_32_ADDRESS); */ + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter16_32); + /* + * This dummy load from vscatter16_32 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16_32; +#endif + + PRINT_CYCLES("\nVector Scatter 16_32 cycles = %llu\n"); +} + +/* scatter-acc the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_acc_16_32(void) +{ + START_CYCLES; + + /* get the word offsets in a vector pair */ + HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets; + /* print_vectorpair("word_offsets", (HVX_VectorPair *)&word_offsets); */ + + /* these values need to be shuffled for the RMWwV scatter */ + HVX_Vector values = *(HVX_Vector *)half_acc_values; + values = Q6_Vh_vshuff_Vh(values); + /* print_vector("values", (HVX_Vector *)&values); */ + + /* do the scatter */ + Q6_vscatteracc_RMWwV(VTCM_SCATTER16_32_ADDRESS, region_len, offsets, + values); + /* print_vector("scatter16_32_address", (HVX_Vector */ + /* *)VTCM_SCATTER16_32_ADDRESS); */ + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter16_32); + /* + * This dummy load from vscatter16_32 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16_32; +#endif + + PRINT_CYCLES("\nVector Scatter Acc 16_32 cycles = %llu\n"); +} + +/* scatter-acc the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_q_16_32(void) +{ + START_CYCLES; + + /* get the word offsets in a vector pair */ + HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets; + /* print_vectorpair("word_offsets", (HVX_VectorPair *)&word_offsets); */ + + /* these values need to be shuffled for the RMWwV scatter */ + HVX_Vector values = *(HVX_Vector *)half_q_values; + values = Q6_Vh_vshuff_Vh(values); + /* print_vector("values", (HVX_Vector *)&values); */ + + HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; + pred_reg = Q6_Vh_vshuff_Vh(pred_reg); + HVX_VectorPred preds = Q6_Q_vand_VR(pred_reg, ~0); + + /* do the scatter */ + Q6_vscatter_QRMWwV(preds, VTCM_SCATTER16_32_ADDRESS, region_len, offsets, + values); + /* print_vector("scatter16_32_address", (HVX_Vector */ + /* *)VTCM_SCATTER16_32_ADDRESS); */ + +#if SYNC_VECTOR + /* do the sync operation */ + SCATTER_RELEASE(vscatter16_32); + /* + * This dummy load from vscatter16_32 is to complete the synchronization. + * Normally this load would be deferred as long as possible to minimize + * stalls. + */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vscatter16_32; +#endif + + PRINT_CYCLES("\nVector Scatter Q 16_32 cycles = %llu\n"); +} + + +/* gather the elements from the scatter16 buffer */ +void vector_gather_16(void) +{ + START_CYCLES; + + HVX_Vector *vgather = (HVX_Vector *)VTCM_GATHER16_ADDRESS; + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + + /* do the gather to the gather16 buffer */ + Q6_vgather_ARMVh(vgather, VTCM_SCATTER16_ADDRESS, region_len, offsets); + + +#if SYNC_VECTOR + /* This dummy read of vgather will stall until completion */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vgather; +#endif + + PRINT_CYCLES("\nVector Gather 16 cycles = %llu\n"); +} + +static unsigned short gather_q_16_init(void) +{ + char letter = '?'; + return letter | (letter << 8); +} + +void vector_gather_q_16(void) +{ + START_CYCLES; + + HVX_Vector *vgather = (HVX_Vector *)VTCM_GATHER16_ADDRESS; + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; + HVX_VectorPred preds = Q6_Q_vand_VR(pred_reg, ~0); + + *vgather = Q6_Vh_vsplat_R(gather_q_16_init()); + /* do the gather to the gather16 buffer */ + Q6_vgather_AQRMVh(vgather, preds, VTCM_SCATTER16_ADDRESS, region_len, + offsets); + + +#if SYNC_VECTOR + /* This dummy read of vgather will stall until completion */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vgather; +#endif + + PRINT_CYCLES("\nVector Gather Q 16 cycles = %llu\n"); +} + + +/* gather the elements from the scatter32 buffer */ +void vector_gather_32(void) +{ + START_CYCLES; + + HVX_Vector *vgatherlo = (HVX_Vector *)VTCM_GATHER32_ADDRESS; + HVX_Vector *vgatherhi = + (HVX_Vector *)(VTCM_GATHER32_ADDRESS + (MATRIX_SIZE * 2)); + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + + /* do the gather to vgather */ + Q6_vgather_ARMVw(vgatherlo, VTCM_SCATTER32_ADDRESS, region_len, offsetslo); + Q6_vgather_ARMVw(vgatherhi, VTCM_SCATTER32_ADDRESS, region_len, offsetshi); + +#if SYNC_VECTOR + /* This dummy read of vgatherhi will stall until completion */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vgatherhi; +#endif + + PRINT_CYCLES("\nVector Gather 32 cycles = %llu\n"); +} + +static unsigned int gather_q_32_init(void) +{ + char letter = '?'; + return letter | (letter << 8) | (letter << 16) | (letter << 24); +} + +void vector_gather_q_32(void) +{ + START_CYCLES; + + HVX_Vector *vgatherlo = (HVX_Vector *)VTCM_GATHER32_ADDRESS; + HVX_Vector *vgatherhi = + (HVX_Vector *)(VTCM_GATHER32_ADDRESS + (MATRIX_SIZE * 2)); + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates; + HVX_VectorPred predslo = Q6_Q_vand_VR(pred_reglo, ~0); + HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2]; + HVX_VectorPred predshi = Q6_Q_vand_VR(pred_reghi, ~0); + + *vgatherlo = Q6_Vh_vsplat_R(gather_q_32_init()); + *vgatherhi = Q6_Vh_vsplat_R(gather_q_32_init()); + /* do the gather to vgather */ + Q6_vgather_AQRMVw(vgatherlo, predslo, VTCM_SCATTER32_ADDRESS, region_len, + offsetslo); + Q6_vgather_AQRMVw(vgatherhi, predshi, VTCM_SCATTER32_ADDRESS, region_len, + offsetshi); + +#if SYNC_VECTOR + /* This dummy read of vgatherhi will stall until completion */ + volatile HVX_Vector vDummy = *(HVX_Vector *)vgatherhi; +#endif + + PRINT_CYCLES("\nVector Gather Q 32 cycles = %llu\n"); +} + +/* gather the elements from the scatter16_32 buffer */ +void vector_gather_16_32(void) +{ + START_CYCLES; + + /* get the vtcm address to gather from */ + HVX_Vector *vgather = (HVX_Vector *)VTCM_GATHER16_32_ADDRESS; + + /* get the word offsets in a vector pair */ + HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets; + + /* do the gather to vgather */ + Q6_vgather_ARMWw(vgather, VTCM_SCATTER16_32_ADDRESS, region_len, offsets); + + /* the read of gather will stall until completion */ + volatile HVX_Vector values = *(HVX_Vector *)vgather; + + /* deal the elements to get the order back */ + values = Q6_Vh_vdeal_Vh(values); + + /* write it back to vtcm address */ + *(HVX_Vector *)vgather = values; + + + PRINT_CYCLES("\nVector Gather 16_32 cycles = %llu\n"); +} + +void vector_gather_q_16_32(void) +{ + START_CYCLES; + + /* get the vtcm address to gather from */ + HVX_Vector *vgather = (HVX_Vector *)VTCM_GATHER16_32_ADDRESS; + + /* get the word offsets in a vector pair */ + HVX_VectorPair offsets = *(HVX_VectorPair *)word_offsets; + HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; + pred_reg = Q6_Vh_vshuff_Vh(pred_reg); + HVX_VectorPred preds = Q6_Q_vand_VR(pred_reg, ~0); + + *vgather = Q6_Vh_vsplat_R(gather_q_16_init()); + /* do the gather to vgather */ + Q6_vgather_AQRMWw(vgather, preds, VTCM_SCATTER16_32_ADDRESS, region_len, + offsets); + + /* the read of gather will stall until completion */ + volatile HVX_Vector values = *(HVX_Vector *)vgather; + + /* deal the elements to get the order back */ + values = Q6_Vh_vdeal_Vh(values); + + /* write it back to vtcm address */ + *(HVX_Vector *)vgather = values; + + + PRINT_CYCLES("\nVector Gather Q 16_32 cycles = %llu\n"); +} + + +static void check_buffer(const char *name, void *c, void *r, size_t size) +{ + char *check = (char *)c; + char *ref = (char *)r; + /* printf("check buffer %s 0x%x, 0x%x, %d\n", name, check, ref, size); */ + for (int i = 0; i < size; i++) { + if (check[i] != ref[i]) { + printf("Error %s [%d]: 0x%x (%c) != 0x%x (%c)\n", name, i, check[i], + check[i], ref[i], ref[i]); + err++; + } + } +} + + +/* + * These scalar functions are the C equivalents of the vector functions that + * use HVX + */ + +/* scatter the 16 bit elements using C */ +void scalar_scatter_16(unsigned short *vscatter16) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16[half_offsets[i] / 2] = half_values[i]; + } + + PRINT_CYCLES("\nScalar Scatter 16 cycles = %llu\n"); +} + +void check_scatter_16() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + check_buffer("check_scatter_16", vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 16 bit elements using C */ +void scalar_scatter_acc_16(unsigned short *vscatter16) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16[half_offsets[i] / 2] += half_acc_values[i]; + } + + PRINT_CYCLES("\nScalar Scatter Acc 16 cycles = %llu\n"); +} + +/* scatter the 16 bit elements using C */ +void scalar_scatter_q_16(unsigned short *vscatter16) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; i++) { + if (half_predicates[i]) { + vscatter16[half_offsets[i] / 2] = half_q_values[i]; + } + } + + PRINT_CYCLES("\nScalar Scatter Q 16 cycles = %llu\n"); +} + + +void check_scatter_acc_16() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + scalar_scatter_acc_16(vscatter16_ref); + check_buffer("check_scatter_acc_16", vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +void check_scatter_q_16() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + scalar_scatter_acc_16(vscatter16_ref); + scalar_scatter_q_16(vscatter16_ref); + check_buffer("check_scatter_q_16", vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_32(unsigned int *vscatter32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter32[word_offsets[i] / 4] = word_values[i]; + } + + PRINT_CYCLES("\n\nScalar Scatter 32 cycles = %llu\n"); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_acc_32(unsigned int *vscatter32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter32[word_offsets[i] / 4] += word_acc_values[i]; + } + + PRINT_CYCLES("\nScalar Scatter Acc 32 cycles = %llu\n"); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_q_32(unsigned int *vscatter32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; i++) { + if (word_predicates[i]) { + vscatter32[word_offsets[i] / 4] = word_q_values[i]; + } + } + + PRINT_CYCLES("\nScalar Scatter Q 32 cycles = %llu\n"); +} + +void check_scatter_32() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + check_buffer("check_scatter_32", vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +void check_scatter_acc_32() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + scalar_scatter_acc_32(vscatter32_ref); + check_buffer("check_scatter_acc_32", vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +void check_scatter_q_32() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + scalar_scatter_acc_32(vscatter32_ref); + scalar_scatter_q_32(vscatter32_ref); + check_buffer("check_scatter_q_32", vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_16_32(unsigned short *vscatter16_32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16_32[word_offsets[i] / 2] = half_values[i]; + } + + PRINT_CYCLES("\n\nScalar Scatter 16_32 cycles = %llu\n"); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatteracc_16_32(unsigned short *vscatter16_32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16_32[word_offsets[i] / 2] += half_acc_values[i]; + } + + PRINT_CYCLES("\n\nScalar Scatter Acc 16_32 cycles = %llu\n"); +} + +void scalar_scatter_q_16_32(unsigned short *vscatter16_32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; i++) { + if (half_predicates[i]) { + vscatter16_32[word_offsets[i] / 2] = half_q_values[i]; + } + } + + PRINT_CYCLES("\nScalar Scatter Q 16_32 cycles = %llu\n"); +} + +void check_scatter_16_32() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + check_buffer("check_scatter_16_32", vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +void check_scatter_acc_16_32() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + scalar_scatteracc_16_32(vscatter16_32_ref); + check_buffer("check_scatter_acc_16_32", vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +void check_scatter_q_16_32() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + scalar_scatteracc_16_32(vscatter16_32_ref); + scalar_scatter_q_16_32(vscatter16_32_ref); + check_buffer("check_scatter_q_16_32", vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_16(unsigned short *vgather16) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather16[i] = vscatter16[half_offsets[i] / 2]; + } + + PRINT_CYCLES("\n\nScalar Gather 16 cycles = %llu\n"); +} + +void scalar_gather_q_16(unsigned short *vgather16) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (half_predicates[i]) { + vgather16[i] = vscatter16[half_offsets[i] / 2]; + } + } + + PRINT_CYCLES("\n\nScalar Gather Q 16 cycles = %llu\n"); +} + +void check_gather_16() +{ + memset(vgather16_ref, 0, MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16(vgather16_ref); + check_buffer("check_gather_16", vgather16, vgather16_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +void check_gather_q_16() +{ + memset(vgather16_ref, gather_q_16_init(), + MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_q_16(vgather16_ref); + check_buffer("check_gather_q_16", vgather16, vgather16_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_32(unsigned int *vgather32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather32[i] = vscatter32[word_offsets[i] / 4]; + } + + PRINT_CYCLES("\n\nScalar Gather 32 cycles = %llu\n"); +} + +void scalar_gather_q_32(unsigned int *vgather32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (word_predicates[i]) { + vgather32[i] = vscatter32[word_offsets[i] / 4]; + } + } + + PRINT_CYCLES("\n\nScalar Gather Q 32 cycles = %llu\n"); +} + + +void check_gather_32(void) +{ + memset(vgather32_ref, 0, MATRIX_SIZE * sizeof(unsigned int)); + scalar_gather_32(vgather32_ref); + check_buffer("check_gather_32", vgather32, vgather32_ref, + MATRIX_SIZE * sizeof(unsigned int)); +} + +void check_gather_q_32(void) +{ + memset(vgather32_ref, gather_q_32_init(), + MATRIX_SIZE * sizeof(unsigned int)); + scalar_gather_q_32(vgather32_ref); + check_buffer("check_gather_q_32", vgather32, vgather32_ref, + MATRIX_SIZE * sizeof(unsigned int)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_16_32(unsigned short *vgather16_32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather16_32[i] = vscatter16_32[word_offsets[i] / 2]; + } + + PRINT_CYCLES("\n\nScalar Gather 16_32 cycles = %llu\n"); +} + +void scalar_gather_q_16_32(unsigned short *vgather16_32) +{ + START_CYCLES; + + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (half_predicates[i]) { + vgather16_32[i] = vscatter16_32[word_offsets[i] / 2]; + } + } + + PRINT_CYCLES("\n\nScalar Gather Q 16_32 cycles = %llu\n"); +} + +void check_gather_16_32(void) +{ + memset(vgather16_32_ref, 0, MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16_32(vgather16_32_ref); + check_buffer("check_gather_16_32", vgather16_32, vgather16_32_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +void check_gather_q_16_32(void) +{ + memset(vgather16_32_ref, gather_q_16_init(), + MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_q_16_32(vgather16_32_ref); + check_buffer("check_gather_q_16_32", vgather16_32, vgather16_32_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +/* These functions print the buffers to the display */ + +/* print scatter16 buffer */ +void print_scatter16_buffer(void) +{ +#if PRINT_DATA + /* + * printf("\n\nPrinting the 16 bit scatter buffer at 0x%08x", + * VTCM_SCATTER16_ADDRESS); + */ + printf("\n\nPrinting the 16 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + + for (int j = 0; j < 2; j++) { + printf("%c", (char)((vscatter16[i] >> j * 8) & 0xff)); + } + + printf(" "); + } + printf("\n"); +#endif +} + +/* print the gather 16 buffer */ +void print_gather_result_16(void) +{ +#if PRINT_DATA + /* + * printf("\n\nPrinting the 16 bit gather result at 0x%08x\n", + * VTCM_GATHER16_ADDRESS); + */ + printf("\n\nPrinting the 16 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 2; j++) { + printf("%c", (char)((vgather16[i] >> j * 8) & 0xff)); + } + + printf(" "); + } + printf("\n"); +#endif +} + +/* print the scatter32 buffer */ +void print_scatter32_buffer(void) +{ +#if PRINT_DATA + /* + * printf("\n\nPrinting the 32 bit scatter buffer at 0x%08x", + * VTCM_SCATTER32_ADDRESS); + */ + printf("\n\nPrinting the 32 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + + for (int j = 0; j < 4; j++) { + printf("%c", (char)((vscatter32[i] >> j * 8) & 0xff)); + } + + printf(" "); + } + printf("\n"); +#endif +} + + +/* print the gather 32 buffer */ +void print_gather_result_32(void) +{ +#if PRINT_DATA + /* + * printf("\n\nPrinting the 32 bit gather result at 0x%08x\n", + * VTCM_GATHER32_ADDRESS); + */ + printf("\n\nPrinting the 32 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 4; j++) { + printf("%c", (char)((vgather32[i] >> j * 8) & 0xff)); + } + + printf(" "); + } + printf("\n"); +#endif +} + +/* print the scatter16_32 buffer */ +void print_scatter16_32_buffer(void) +{ +#if PRINT_DATA + /* + * printf("\n\nPrinting the 16_32 bit scatter buffer at 0x%08x", + * VTCM_SCATTER16_32_ADDRESS); + */ + printf("\n\nPrinting the 16_32 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + + for (int j = 0; j < 2; j++) { + printf("%c", (unsigned char)((vscatter16_32[i] >> j * 8) & 0xff)); + } + + printf(" "); + } + printf("\n"); +#endif +} + +/* print the gather 16_32 buffer */ +void print_gather_result_16_32(void) +{ +#if PRINT_DATA + /* + * printf("\n\nPrinting the 16_32 bit gather result at 0x%08x\n", + * VTCM_GATHER16_32_ADDRESS); + */ + printf("\n\nPrinting the 16_32 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 2; j++) { + printf("%c", (unsigned char)((vgather16_32[i] >> j * 8) & 0xff)); + } + + printf(" "); + } + printf("\n"); +#endif +} + +/* + * set up the tcm address translation + * Note: This method is only for the standalone environment + * SDK users should use the "VTCM Manager" to use VTCM + */ +void setup_tcm(void) +{ + VTCM_BASE_ADDRESS = get_vtcm_base(); + + uint64_t pa = VTCM_BASE_ADDRESS; + void *va = (void *)VTCM_BASE_ADDRESS; + + VTCM_SCATTER16_ADDRESS = VTCM_BASE_ADDRESS; + VTCM_GATHER16_ADDRESS = VTCM_BASE_ADDRESS + SCATTER16_BUF_SIZE; + VTCM_SCATTER32_ADDRESS = VTCM_GATHER16_ADDRESS + GATHER16_BUF_SIZE; + VTCM_GATHER32_ADDRESS = VTCM_SCATTER32_ADDRESS + SCATTER32_BUF_SIZE; + VTCM_SCATTER16_32_ADDRESS = VTCM_GATHER32_ADDRESS + GATHER32_BUF_SIZE; + VTCM_GATHER16_32_ADDRESS = VTCM_SCATTER16_32_ADDRESS + SCATTER16_BUF_SIZE; + + /* the vtcm base address */ + vtcm_base = (unsigned char *)VTCM_BASE_ADDRESS; + + /* scatter gather 16 bit elements using 16 bit offsets */ + vscatter16 = (unsigned short *)VTCM_SCATTER16_ADDRESS; + vgather16 = (unsigned short *)VTCM_GATHER16_ADDRESS; + + /* scatter gather 32 bit elements using 32 bit offsets */ + vscatter32 = (unsigned int *)VTCM_SCATTER32_ADDRESS; + vgather32 = (unsigned int *)VTCM_GATHER32_ADDRESS; + + /* scatter gather 16 bit elements using 32 bit offsets */ + vscatter16_32 = (unsigned short *)VTCM_SCATTER16_32_ADDRESS; + vgather16_32 = (unsigned short *)VTCM_GATHER16_32_ADDRESS; +} + +void inst_test() +{ + /* Should NOT throw an error when paranoid-commit-state turned on */ + uint32_t R; + asm volatile("release(%0):at\n\t" : : "r"(R)); +} + + +int main() +{ + setup_tcm(); + prefill_vtcm_scratch(); + + /* 16 bit elements with 16 bit offsets */ + create_offsets_and_values_16(); + create_preds_16(); + +#if PRINT_CYCLE_COUNTS + scalar_scatter_16(vscatter16); +#endif + vector_scatter_16(); + print_scatter16_buffer(); + check_scatter_16(); + + +#if PRINT_CYCLE_COUNTS + scalar_gather_16(vgather16); +#endif + vector_gather_16(); + print_gather_result_16(); + check_gather_16(); + + vector_gather_q_16(); + print_gather_result_16(); + check_gather_q_16(); + + vector_scatter_acc_16(); + print_scatter16_buffer(); + check_scatter_acc_16(); + + vector_scatter_q_16(); + print_scatter16_buffer(); + check_scatter_q_16(); + + /* 32 bit elements with 32 bit offsets */ + create_offsets_and_values_32(); + create_preds_32(); + +#if PRINT_CYCLE_COUNTS + scalar_scatter_32(vscatter32); +#endif + + vector_scatter_32(); + + print_scatter32_buffer(); + check_scatter_32(); + +#if PRINT_CYCLE_COUNTS + scalar_gather_32(vgather32); +#endif + + vector_gather_32(); + + print_gather_result_32(); + check_gather_32(); + + vector_gather_q_32(); + print_gather_result_32(); + check_gather_q_32(); + + vector_scatter_acc_32(); + print_scatter32_buffer(); + check_scatter_acc_32(); + + vector_scatter_q_32(); + print_scatter32_buffer(); + check_scatter_q_32(); + + /* 16 bit elements with 32 bit offsets */ + create_offsets_and_values_16_32(); + create_preds_16_32(); + +#if PRINT_CYCLE_COUNTS + scalar_scatter_16_32(); +#endif + vector_scatter_16_32(); + + print_scatter16_32_buffer(); + check_scatter_16_32(); + +#if PRINT_CYCLE_COUNTS + scalar_gather_16_32(vgather16_32); +#endif + + vector_gather_16_32(); + + print_gather_result_16_32(); + check_gather_16_32(); + + vector_gather_q_16_32(); + print_gather_result_16_32(); + check_gather_q_16_32(); + + vector_scatter_acc_16_32(); + print_scatter16_32_buffer(); + check_scatter_acc_16_32(); + + vector_scatter_q_16_32(); + print_scatter16_32_buffer(); + check_scatter_q_16_32(); + + inst_test(); + printf("%s\n", ((err) ? "FAIL" : "PASS")); + return err; +} diff --git a/tests/tcg/hexagon/system/strutils.h b/tests/tcg/hexagon/system/strutils.h new file mode 100644 index 000000000000..14f4a290b817 --- /dev/null +++ b/tests/tcg/hexagon/system/strutils.h @@ -0,0 +1,25 @@ +/* + * Copyright(c) 2023-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef STRUTILS_H +#define STRUTILS_H + +#include + +void sort_str_arr(char **arr, size_t n) +{ + for (int i = 0; i < n - 1; i++) { + for (int j = 0; j < n - i - 1; j++) { + if (strcmp(arr[j], arr[j + 1]) > 0) { + char *tmp = arr[j]; + arr[j] = arr[j + 1]; + arr[j + 1] = tmp; + } + } + } +} + +#endif diff --git a/tests/tcg/hexagon/system/tlb-miss-tlblock.S b/tests/tcg/hexagon/system/tlb-miss-tlblock.S new file mode 100644 index 000000000000..fe07aca47b37 --- /dev/null +++ b/tests/tcg/hexagon/system/tlb-miss-tlblock.S @@ -0,0 +1,156 @@ +/* + * Copyright(c) 2019-2025 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +/* + * Test Purpose: + * Verify that tlbmissx and tlbmissrw do not set the syscfg.tl bit + * The HW spec says: + * "TLBLOCK is acquired automatically whenever a hardware thread raises a + * TLB miss-RW or TLBmiss-X exception." + * The casual reader would assume that a miss handler would implicitly have + * the lock, that apparently + * isn't the case. + */ + +.global start +start: + r0 = ##evb + evb = r0 + r0 = ##0 + ssr = r0 + jump #setup + +#define tlb_index r11 +#define stack r29 +#define data r18 +tlb_index = ##0x00000007 + +.org 0x100 + +evb: + jump #reset + jump #nmi + jump #error + jump #0 + jump #tlbmissx + jump #0 + jump #tlbmissrw + + +setup: + { + r1 = ##0xc009b800 + r0 = ##0xf7137010 + } + tlb_index = add(tlb_index, #1) + tlbw(r1:0,tlb_index) + +/* Enable MMU */ + r2 = ##0x0085a07f + syscfg = r2 + +/* Test setup */ + r12 = #0x12 + r0 = #0x6 + r7 = ##0x77777777 + r6 = ##0x66666666 + data = ##0xf2000000 + stack = ##0x9ba01000 + jump ##.L_server_loop + +/* event vector handlers */ +reset: + r2 = #1 + stop(r0) +nmi: + r2 = #1 + stop(r0) +error: + r2 = #1 + stop(r0) + + +/* + * Can only handle a single ex fault. + */ +tlbmissx: + r0 = syscfg + r1 = #0x800 +/* + * Fail if we automatically start setting SYSCFG:TL again + */ + r0 = and(r0, r1) + { + p0 = cmp.eq(r0, r1); if (p0.new) jump:t .Lfailmissx + } + { + r1 = ##0xc009b900 + r0 = ##0xf7137210 + } + tlb_index = add(tlb_index, #1) + tlbw(r1:0,tlb_index) + tlbunlock + rte + stop(r0); +.Lfailmissx: + r2 = #1 + stop(r2); + +/* + * Can only handle a stack fault and a data fault + */ +tlbmissrw: + r0 = syscfg + r1 = #0x800 +/* + * Fail if we automatically start setting SYSCFG:TL again + */ + r0 = and(r0, r1) + { + p0 = cmp.eq(r0, r1); if (p0.new) jump:t .Lfailmissrw + } + r0 = badva + p0 = cmp.eq (stack, r0) // missed the stack + if (!p0) jump .Ldata + { + r1 = ##0xc009ba00 + r0 = ##0xf7137210 + } + jump #.Ldone +.Ldata: + { + r1 = ##0xc00f2000 + r0 = ##0xf71e4010 + } +.Ldone: + tlb_index = add(tlb_index, #1) + tlbw(r1:0,tlb_index) + tlbunlock + rte +.Lfailmissrw: + r2 = #1 + stop(r2); + + + +.org 0x100000 + nop +.Lpass: + r2 = #0 + stop(r0); + trap0(#0x18) +.L_server_loop: +{ + p0 = cmp.eq(r0,#-0x1) + if (!p0.new) jump:t .Lpass + memd(stack) = r7:6; // S1 store to stack will also fault + memw(data) = r12; // S0 store will fault +} +/* + * We should not get here: + */ + r2 = #1 + stop(r0); diff --git a/tests/tcg/hexagon/system/vid_reg.c b/tests/tcg/hexagon/system/vid_reg.c new file mode 100644 index 000000000000..25f266f98b2d --- /dev/null +++ b/tests/tcg/hexagon/system/vid_reg.c @@ -0,0 +1,36 @@ +/* + * Verify vid reads/writes really update the register. + */ + +#include +#include +#include + +static inline uint32_t getvid() +{ + uint32_t reg; + asm volatile("%0=vid;" : "=r"(reg)); + return reg; +} +static inline void setvid(uint32_t val) +{ + asm volatile("vid=%0;" : : "r"(val)); + return; +} +int main() +{ + uint32_t testval = 0x3ff03ff; + setvid(testval); + if (testval != getvid()) { + printf("ERROR: vid read returned: 0x%x\n", getvid()); + } + assert(testval == getvid()); + + /* L2VIC_NO_PENDING (0xffffffff) should not update the vid */ + setvid(0xffffffff); + if (testval != getvid()) { + printf("ERROR: vid read returned: 0x%x\n", getvid()); + } + + assert(testval == getvid()); +} diff --git a/tests/tcg/hexagon/utimer.c b/tests/tcg/hexagon/utimer.c new file mode 100644 index 000000000000..ae3bca320192 --- /dev/null +++ b/tests/tcg/hexagon/utimer.c @@ -0,0 +1,50 @@ +/* + * Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include + +static int err; + +#include "hex_test.h" + +static uint64_t get_time() +{ + uint64_t time; + asm volatile("%0 = utimer\n\t" + : "=r"(time) + : + : + ); + return time; +} + +static uint64_t get_time_from_regs() +{ + uint32_t time_low; + uint32_t time_high; + asm volatile("%0 = utimerhi\n\t" + "%1 = utimerlo\n\t" + : "=r"(time_high), "=r"(time_low) + : + : + ); + return ((uint64_t)time_high << 32) | (uint64_t)time_low; +} + + +int main() +{ + err = 0; + + uint64_t t0 = get_time(); + check64_ne(t0, 0); + + uint64_t t1 = get_time_from_regs(); + check64_ne(t1, 0); + + puts(err ? "FAIL" : "PASS"); + return err; +}