From 3a8f11ce7df388ce98b5c4c9b30111217ab1771c Mon Sep 17 00:00:00 2001 From: Sean Maas Date: Thu, 12 Dec 2024 23:45:55 -0500 Subject: [PATCH] Lay groundwork for HLEing the DSP-1 coprocessor --- README.md | 1 + src/cop_dsp1.S | 189 +++++++++++++++++++++++++++++++++++++++++++++++++ src/memory.S | 81 +++++++++++++++++---- 3 files changed, 256 insertions(+), 15 deletions(-) create mode 100644 src/cop_dsp1.S diff --git a/README.md b/README.md index 05513f3..fb75d55 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ its build system. With that set up, run `make` in the project root directory to * [Fullsnes](https://problemkaputt.de/fullsnes.htm) - The main source of information on SNES hardware * [Anomie Docs](https://www.romhacking.net/community/548) - More detailed documentation for certain components * [6502 Tutorials](http://6502.org/tutorials/) - Has articles thoroughly covering the CPU and its quirks +* [bsnes](https://github.com/bsnes-emu/bsnes) - Reference for the HLE DSP-1 coprocessor commands ### Other Links * [Hydra's Lair](https://hydr8gon.github.io) - Blog where I may or may not write about things diff --git a/src/cop_dsp1.S b/src/cop_dsp1.S new file mode 100644 index 0000000..2c7fa28 --- /dev/null +++ b/src/cop_dsp1.S @@ -0,0 +1,189 @@ +/* + Copyright 2021-2024 Hydr8gon + + This file is part of sodium64. + + sodium64 is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + sodium64 is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with sodium64. If not, see . +*/ + +#include + +.globl read_dsp1sr +.globl read_dsp1dr +.globl write_dsp1dr + +.data + +.align 4 +input_buf: .hword 0:8 +output_buf: .hword 0:8 + +.align 4 +cmd_func: .word 0 +cmd_count: .word 0 +dsp1_sr: .hword 0x8400 +dsp1_dr: .hword 0x80 +sr_toggle: .byte 0 + +.align 4 +dsp1_cmds: // Lookup table for DSP1 command functions and parameter counts + .word dsp1_unimp, 0x20001, dsp1_unimp, 0x40000, dsp1_unimp, 0x70004, dsp1_unimp, 0x30003 // 0x00-0x03 + .word dsp1_unimp, 0x20002, dsp1_unimp, 0x40000, dsp1_unimp, 0x30003, dsp1_unimp, 0x10001 // 0x04-0x07 + .word dsp1_unimp, 0x30002, dsp1_unimp, 0x30003, dsp1_raster, 0x10004, dsp1_unimp, 0x30001 // 0x08-0x0B + .word dsp1_unimp, 0x30002, dsp1_unimp, 0x30003, dsp1_unimp, 0x20002, dsp1_unimp, 0x10001 // 0x0C-0x0F + .word dsp1_unimp, 0x20002, dsp1_unimp, 0x40000, dsp1_unimp, 0x70004, dsp1_unimp, 0x30003 // 0x10-0x13 + .word dsp1_unimp, 0x60003, dsp1_unimp, 0x40000, dsp1_unimp, 0x30003, dsp1_unimp, 0x10400 // 0x14-0x17 + .word dsp1_unimp, 0x40001, dsp1_unimp, 0x30003, dsp1_unimp, 0x00000, dsp1_unimp, 0x30001 // 0x18-0x1B + .word dsp1_unimp, 0x60003, dsp1_unimp, 0x30003, dsp1_unimp, 0x20002, dsp1_unimp, 0x10400 // 0x1C-0x1F + .word dsp1_unimp, 0x20001, dsp1_unimp, 0x40000, dsp1_unimp, 0x70004, dsp1_unimp, 0x30003 // 0x20-0x23 + .word dsp1_unimp, 0x20002, dsp1_unimp, 0x40000, dsp1_unimp, 0x30003, dsp1_unimp, 0x10001 // 0x24-0x27 + .word dsp1_unimp, 0x30001, dsp1_unimp, 0x30003, dsp1_unimp, 0x00000, dsp1_unimp, 0x30001 // 0x28-0x2B + .word dsp1_unimp, 0x30002, dsp1_unimp, 0x30003, dsp1_unimp, 0x20002, dsp1_unimp, 0x10001 // 0x2C-0x2F + .word dsp1_unimp, 0x20002, dsp1_unimp, 0x40000, dsp1_unimp, 0x70004, dsp1_unimp, 0x30003 // 0x30-0x33 + .word dsp1_unimp, 0x60003, dsp1_unimp, 0x40000, dsp1_unimp, 0x30003, dsp1_unimp, 0x10400 // 0x34-0x37 + .word dsp1_unimp, 0x40001, dsp1_unimp, 0x30003, dsp1_unimp, 0x00000, dsp1_unimp, 0x30001 // 0x38-0x3B + .word dsp1_unimp, 0x60003, dsp1_unimp, 0x30003, dsp1_unimp, 0x20002, dsp1_unimp, 0x10400 // 0x3C-0x3F + +.text +.set noreorder + +.align 5 +dsp1_execute: // a0: status + // Check if the data register is in 8-bit mode, waiting for a command + andi t0, a0, 0x400 // DRC + beqz t0, check_input + move v1, ra + + // Look up and reload a DSP1 command function and its parameter counts + lbu t0, dsp1_dr + 1 + andi t1, t0, 0xC0 + bnez t1, skip_cmd + sll t0, t0, 3 + ld t0, dsp1_cmds(t0) + xori a0, a0, 0x400 // DRC + sd t0, cmd_func + sh a0, dsp1_sr + jr v1 + +check_input: + // Do nothing unless the upper byte was accessed + xori a0, a0, 0x1000 // DRS + sh a0, dsp1_sr + andi t0, a0, 0x1000 // DRS + bnez t0, skip_cmd + nop + + // Write a value to the input buffer if there's anything to send + lhu t0, cmd_count + 0 + beqz t0, check_output + addi t0, t0, -1 + sh t0, cmd_count + 0 + lhu t1, dsp1_dr + sll t0, t0, 1 + sh t1, input_buf(t0) + + // Execute a command once all input has been sent + bnez t0, skip_cmd + nop +loop_cmd: + lw t0, cmd_func + jalr t0 + nop + +check_output: + // Read a value from the output buffer if there's anything to receive + lhu t0, cmd_count + 2 + beqz t0, check_cmd + addi t0, t0, -1 + sh t0, cmd_count + 2 + sll t0, t0, 1 + andi t0, t0, 0xE + lhu t1, output_buf(t0) + sh t1, dsp1_dr + jr v1 + nop + +check_cmd: + // Check if running a raster command, or if an end value was sent + lw t0, cmd_func + la t1, dsp1_raster + bne t0, t1, finish_cmd + li t1, 0x8000 + lhu t0, dsp1_dr + beq t0, t1, finish_cmd + nop + + // Increment the input and loop the raster command + lhu t0, input_buf + 0 + li t1, 4 + addi t0, t0, 1 + sh t0, input_buf + 0 + sh t1, cmd_count + 2 + b loop_cmd + nop + +finish_cmd: + // Report command completion and wait for the next one + li t0, 0x80 + sh t0, dsp1_dr + ori a0, a0, 0x400 // DRC + sh a0, dsp1_sr +skip_cmd: + jr v1 + nop + +.align 5 +dsp1_raster: + // Stub the raster command just to differentiate it + jr ra + nop + +.align 5 +dsp1_unimp: + // Do nothing for unimplemented DSP1 commands + jr ra + nop + +.align 5 +read_dsp1sr: // v0: value + // Alternate between reading the status register's low and high bytes + lbu t0, sr_toggle + xori t0, t0, 0x1 + sb t0, sr_toggle + lbu v0, dsp1_sr(t0) + jr ra + nop + +.align 5 +read_dsp1dr: // v0: value + // Read from the data register's low or high byte and run the DSP1 + lhu a0, dsp1_sr + andi t0, a0, 0x1000 // DRS + srl t0, t0, 12 + xori t0, t0, 0x1 + lbu v0, dsp1_dr(t0) + j dsp1_execute + nop + + +.align 5 +write_dsp1dr: // a1: value + // Write to the data register's low or high byte and run the DSP1 + lhu a0, dsp1_sr + andi t0, a0, 0x1000 // DRS + srl t0, t0, 12 + xori t0, t0, 0x1 + sb a1, dsp1_dr(t0) + j dsp1_execute + nop diff --git a/src/memory.S b/src/memory.S index a168809..6f3cec3 100644 --- a/src/memory.S +++ b/src/memory.S @@ -35,15 +35,21 @@ sram: .byte 0:0x8000 wram: .byte 0:0x20000 empty: .byte 0:0x2000 -wmadd: .word 0 .align 4 memory_map: .word 0:0x800 rom_addrs: .word 0:0x100 rom_entries: .word 0:0x100 + +.align 4 +wmadd: .word 0 rom_pointer: .byte 0 sram_dirty: .byte 0 +.align 4 +dsp1_reads: .word read_dsp1dr, read_dsp1sr +dsp1_writes: .word write_dsp1dr, write_unk + .align 4 read_iomap: .word read_unk:(0x2133 - 0x20FF) // 0x2100-0x2133 @@ -222,14 +228,20 @@ map_lorom: li t2, 0x2000 // Block size (8KB) // Set the ROM mask using the header size - lw t4, 0xA0007FD4(t8) + lw t5, 0xA0007FD4(t8) li t7, 0x7F0000 li t3, 0x800 - andi t4, t4, 0xFF + andi t4, t5, 0xFF sll t4, t3, t4 addi t4, t4, -1 and t7, t7, t4 + // Check if the DSP-1 coprocessor should be mapped + srl t5, t5, 8 + andi t5, t5, 0xFF + addi t5, t5, -0x03 + sltiu t6, t5, 0x04 + // Set the SRAM mask using the header size lw t9, 0xA0007FD8(t8) srl t9, t9, 24 @@ -276,6 +288,14 @@ lowram_area: ori t3, t3, 1 // Write bit lorom_area: + // Map I/O registers to banks 0x30-0x3F if DSP-1 is enabled + srl t3, t0, 16 + sub t3, t3, 0x30 + sltiu t3, t3, 0x10 + and t3, t3, t6 + bnez t3, loio_area + nop + // Special case LoROM; the exception handler will dynamically load blocks and t4, t7, t0 // Mirrored bank srl t4, t4, 1 // 32KB every 64KB bank @@ -341,14 +361,20 @@ map_hirom: li t2, 0x2000 // Block size (8KB) // Set the ROM mask using the header size - lw t4, 0xA000FFD4(t8) + lw t5, 0xA000FFD4(t8) li t7, 0x3FFFFF li t3, 0x400 - andi t4, t4, 0xFF + andi t4, t5, 0xFF sll t4, t3, t4 addi t4, t4, -1 and t7, t7, t4 + // Check if the DSP-1 coprocessor should be mapped + srl t5, t5, 8 + andi t5, t5, 0xFF + addi t5, t5, -0x03 + sltiu t6, t5, 0x04 + // Set the SRAM mask using the header size lw t9, 0xA000FFD8(t8) srl t9, t9, 24 @@ -399,6 +425,13 @@ hirom_area: add t3, t8, t4 hisram_area: + // Map I/O registers to banks 0x00-0x0F if DSP-1 is enabled + srl t3, t0, 16 + sltiu t3, t3, 0x10 + and t3, t3, t6 + bnez t3, hiio_area + nop + // Special case SRAM so writes can be tracked beqz t9, hiempty_area nop @@ -550,24 +583,33 @@ tlbl_io: mfc0 k0, $14 // EPC addi ra, k0, 4 - // Look up and return to an I/O read function if no memory is mapped + // Look up an I/O read function if no memory is mapped andi a0, a0, 0xFFFF addi t0, a0, -0x2100 bgeu t0, 0x4380 - 0x2100, tlbl_unk sll t0, t0, 2 lw k0, read_iomap(t0) + +tlbl_ret: + // Return to a function and adjust cycle count for an I/O read mtc0 k0, $14 // EPC addi s5, s5, RAM_CYCLE - IO_CYCLE nop eret tlbl_unk: - // Return to the unknown I/O read function if no register is mapped + // Use a DSP-1 read function if enabled, or fall back to an unknown I/O read la k0, read_unk - mtc0 k0, $14 // EPC - addi s5, s5, RAM_CYCLE - IO_CYCLE + addi t0, a0, -0x6000 + bgeu t0, 0xA000, tlbl_ret + sltiu t1, t0, 0x2000 // HiROM + sll t1, t1, 1 + sll t0, t0, t1 + srl t0, t0, 12 + andi t0, t0, 0x4 + lw k0, dsp1_reads(t0) + b tlbl_ret nop - eret .align 5 tlbl_sram: @@ -619,24 +661,33 @@ tlbs_io: mfc0 k0, $14 // EPC addi ra, k0, 4 - // Look up and return to an I/O write function if no memory is mapped + // Look up an I/O write function if no memory is mapped andi a0, a0, 0xFFFF addi t0, a0, -0x2100 bgeu t0, 0x4380 - 0x2100, tlbs_unk sll t0, t0, 2 lw k0, write_iomap(t0) + +tlbs_ret: + // Return to a function and adjust cycle count for an I/O write mtc0 k0, $14 // EPC addi s5, s5, RAM_CYCLE - IO_CYCLE nop eret tlbs_unk: - // Return to the unknown I/O write function if no register is mapped + // Use a DSP-1 write function if enabled, or fall back to an unknown I/O write la k0, write_unk - mtc0 k0, $14 // EPC - addi s5, s5, RAM_CYCLE - IO_CYCLE + addi t0, a0, -0x6000 + bgeu t0, 0xA000, tlbs_ret + sltiu t1, t0, 0x2000 // HiROM + sll t1, t1, 1 + sll t0, t0, t1 + srl t0, t0, 12 + andi t0, t0, 0x4 + lw k0, dsp1_writes(t0) + b tlbs_ret nop - eret .align 5 tlbs_sram: