Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use F3DEX3 memset to clear Z buffer for RDP time savings #130

Merged
merged 7 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified F3DEX3/F3DEX3_BrW.code.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW.data.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW_NOC.code.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW_NOC.data.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW_NOC_PA.code.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW_NOC_PA.data.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW_NOC_PB.code.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW_NOC_PB.data.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW_NOC_PC.code.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW_NOC_PC.data.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW_PA.code.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW_PA.data.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW_PB.code.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW_PB.data.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW_PC.code.bps
Binary file not shown.
Binary file modified F3DEX3/F3DEX3_BrW_PC.data.bps
Binary file not shown.
1 change: 1 addition & 0 deletions include/functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -1086,6 +1086,7 @@ Gfx* Gfx_TwoTexScrollEnvColor(GraphicsContext* gfxCtx, s32 tile1, u32 x1, u32 y1
u32 x2, u32 y2, s32 width2, s32 height2, s32 r, s32 g, s32 b, s32 a);
Gfx* Gfx_EnvColor(GraphicsContext* gfxCtx, s32 r, s32 g, s32 b, s32 a);
void Gfx_SetupFrame(GraphicsContext* gfxCtx, s32 clearFB, u8 r, u8 g, u8 b);
void Gfx_ClearZBuffer(GraphicsContext* gfxCtx);
void func_80095974(GraphicsContext* gfxCtx);
void func_80095AA0(PlayState* play, Room* room, Input* input, s32 arg3);
void Room_DrawBackground2D(Gfx** gfxP, void* tex, void* tlut, u16 width, u16 height, u8 fmt, u8 siz, u16 tlutMode,
Expand Down
20 changes: 20 additions & 0 deletions include/ultra64/gbi.f3dex3.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ of warnings if you use -Wpedantic. */
/*#define G_SPECIAL_3 0xD3 no-op in F3DEX2 */
/*#define G_SPECIAL_2 0xD4 no-op in F3DEX2 */
/*#define G_SPECIAL_1 0xD5 triggered MVP recalculation, not supported in F3DEX3 */
#define G_MEMSET 0xD5
#define G_DMA_IO 0xD6
#define G_TEXTURE 0xD7
#define G_POPMTX 0xD8
Expand Down Expand Up @@ -2385,6 +2386,25 @@ _DW({ \
#define gSPDmaWrite(pkt,dmem,dram,size) gSPDma_io((pkt),1,(dmem),(dram),(size))
#define gsSPDmaWrite(dmem,dram,size) gsSPDma_io( 1,(dmem),(dram),(size))

/**
* Use RSP DMAs to set a region of memory to a repeated 16-bit value. This can
* clear the color framebuffer or Z-buffer faster than the RDP can in fill mode.
* SPMemset overwrites the DMEM vertex buffer, so vertices loaded before this
* command cannot be used after it (though this would not normally be done).
*
* dram: Segmented or physical start address. Must be aligned to 16 bytes.
* value: 16-bit value to fill the memory with. e.g. 0 for color, 0xFFFC for Z.
* size: Size in bytes to fill, must be nonzero and a multiple of 16 bytes.
*/
#define gSPMemset(pkt, dram, value, size) \
_DW({ \
gImmp1(pkt, G_RDPHALF_1, ((value) & 0xFFFF)); \
gDma0p(pkt, G_MEMSET, (dram), ((size) & 0xFFFFF0)); \
})

#define gsSPMemset(pkt, dram, value, size) \
gsImmp1(G_RDPHALF_1, ((value) & 0xFFFF)), \
gsDma0p(G_MEMSET, (dram), ((size) & 0xFFFFF0))

/*
* RSP short command (no DMA required) macros
Expand Down
8 changes: 1 addition & 7 deletions include/variables.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,13 +223,7 @@ extern volatile s8 gLoadedF3DEX3Version;
extern volatile s8 gF3DEX3ProfVersion;
extern volatile s8 gF3DEX3NOCVersion;
extern s8 gF3DEX3OccMode;
#endif

#if ENABLE_F3DEX3
extern u8 gF3DEX3TextBuffer[];
extern volatile s8 gF3DEX3ProfVersion;
extern volatile s8 gF3DEX3NOCVersion;
extern s8 gF3DEX3OccMode;
extern u8 gUseMemsetForZBuffer;
#endif

extern SfxBankEntry D_8016BAD0[9];
Expand Down
11 changes: 10 additions & 1 deletion src/code/z_play.c
Original file line number Diff line number Diff line change
Expand Up @@ -1258,7 +1258,7 @@ void Play_Draw(PlayState* this) {
clearG = this->lightCtx.fogColor[1];
clearB = this->lightCtx.fogColor[2];
}
// Clear the fb only if we aren't drawing a skybox, but always clear zb
// Clear the fb only if we aren't drawing a skybox
Gfx_SetupFrame(gfxCtx, clearFB, clearR, clearG, clearB);
}

Expand Down Expand Up @@ -1378,6 +1378,15 @@ void Play_Draw(PlayState* this) {
Environment_DrawSkyboxFilters(this);
}

// The Z buffer has to be cleared at some point before anything using it
// is drawn (lighting strike is the first which does). But if we are
// using F3DEX3's SPMemset to clear it, it should be done as late as
// possible, after the RSP has already sent commands to the RDP for the
// skybox or framebuffer clear. This is so that the RSP can clear the Z
// buffer while the RDP is working on the framebuffer, without making
// the RDP wait for new work to be available.
Gfx_ClearZBuffer(gfxCtx);

if (!IS_DEBUG || (R_HREG_MODE != HREG_MODE_PLAY) || (R_PLAY_DRAW_ENV_FLAGS & PLAY_ENV_DRAW_LIGHTNING)) {
Environment_UpdateLightningStrike(this);
Environment_DrawLightning(this, 0);
Expand Down
1 change: 1 addition & 0 deletions src/code/z_prenmi.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ void PreNMI_Draw(PreNMIState* this) {

gSPSegment(POLY_OPA_DISP++, 0x00, NULL);
Gfx_SetupFrame(gfxCtx, true, 0, 0, 0);
Gfx_ClearZBuffer(gfxCtx);
Gfx_SetupDL_36Opa(gfxCtx);
gDPSetFillColor(POLY_OPA_DISP++, (GPACK_RGBA5551(255, 255, 255, 1) << 16) | GPACK_RGBA5551(255, 255, 255, 1));
gDPFillRectangle(POLY_OPA_DISP++, 0, this->timer + 100, SCREEN_WIDTH - 1, this->timer + 100);
Expand Down
44 changes: 34 additions & 10 deletions src/code/z_rcp.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#include "global.h"

#if ENABLE_F3DEX3
u8 gUseMemsetForZBuffer = 1;
#endif

Gfx sSetupDL[SETUPDL_MAX][6] = {
{
/* SETUPDL_0 */
Expand Down Expand Up @@ -1472,7 +1476,6 @@ void Gfx_SetupFrame(GraphicsContext* gfxCtx, s32 clearFB, u8 r, u8 g, u8 b) {

// Set up the framebuffer, primitives will be drawn here
gDPSetColorImage(POLY_OPA_DISP++, G_IM_FMT_RGBA, G_IM_SIZ_16b, gScreenWidth, gfxCtx->curFrameBuffer);
gDPSetColorImage(POLY_OPA_DISP++, G_IM_FMT_RGBA, G_IM_SIZ_16b, gScreenWidth, gfxCtx->curFrameBuffer);
gDPSetColorImage(POLY_XLU_DISP++, G_IM_FMT_RGBA, G_IM_SIZ_16b, gScreenWidth, gfxCtx->curFrameBuffer);
gDPSetColorImage(OVERLAY_DISP++, G_IM_FMT_RGBA, G_IM_SIZ_16b, gScreenWidth, gfxCtx->curFrameBuffer);

Expand Down Expand Up @@ -1529,17 +1532,8 @@ void Gfx_SetupFrame(GraphicsContext* gfxCtx, s32 clearFB, u8 r, u8 g, u8 b) {
}
#endif

// Set the whole z buffer to maximum depth
// Don't bother with pixels that are being covered by the letterbox
gDPSetColorImage(POLY_OPA_DISP++, G_IM_FMT_RGBA, G_IM_SIZ_16b, gScreenWidth, gZBuffer);
gDPSetRenderMode(POLY_OPA_DISP++, G_RM_NOOP, G_RM_NOOP2);
gDPSetFillColor(POLY_OPA_DISP++, (GPACK_ZDZ(G_MAXFBZ, 0) << 16) | GPACK_ZDZ(G_MAXFBZ, 0));
gDPFillRectangle(POLY_OPA_DISP++, 0, letterboxSize, gScreenWidth - 1, gScreenHeight - letterboxSize - 1);
gDPPipeSync(POLY_OPA_DISP++);

// Fill the whole screen with the base color, only done when there is no skybox or if it is a solid color.
// Don't bother with pixels that are being covered by the letterbox
gDPSetColorImage(POLY_OPA_DISP++, G_IM_FMT_RGBA, G_IM_SIZ_16b, gScreenWidth, gfxCtx->curFrameBuffer);
if (clearFB) {
gDPSetRenderMode(POLY_OPA_DISP++, G_RM_NOOP, G_RM_NOOP2);
gDPSetFillColor(POLY_OPA_DISP++, (GPACK_RGBA5551(r, g, b, 1) << 16) | GPACK_RGBA5551(r, g, b, 1));
Expand All @@ -1560,6 +1554,36 @@ void Gfx_SetupFrame(GraphicsContext* gfxCtx, s32 clearFB, u8 r, u8 g, u8 b) {
CLOSE_DISPS(gfxCtx, "../z_rcp.c", 2497);
}

void Gfx_ClearZBuffer(GraphicsContext* gfxCtx) {
s32 letterboxSize = Letterbox_GetSize();
OPEN_DISPS(gfxCtx, "../z_rcp.c", __LINE__);

// Set the whole z buffer to maximum depth
// Don't bother with pixels that are being covered by the letterbox
#if ENABLE_F3DEX3
if (gUseMemsetForZBuffer) {
s32 w2 = gScreenWidth * 2; // 2 bytes per pixel
if (letterboxSize < 0 || letterboxSize > 100) {
letterboxSize = 0;
}
gSPMemset(POLY_OPA_DISP++, (u8*)gZBuffer + letterboxSize * w2, GPACK_ZDZ(G_MAXFBZ, 0),
(gScreenHeight - 2 * letterboxSize) * w2);
} else {
#endif
gSPDisplayList(POLY_OPA_DISP++, sFillSetupDL);
gDPSetColorImage(POLY_OPA_DISP++, G_IM_FMT_RGBA, G_IM_SIZ_16b, gScreenWidth, gZBuffer);
gDPSetRenderMode(POLY_OPA_DISP++, G_RM_NOOP, G_RM_NOOP2);
gDPSetFillColor(POLY_OPA_DISP++, (GPACK_ZDZ(G_MAXFBZ, 0) << 16) | GPACK_ZDZ(G_MAXFBZ, 0));
gDPFillRectangle(POLY_OPA_DISP++, 0, letterboxSize, gScreenWidth - 1, gScreenHeight - letterboxSize - 1);
gDPPipeSync(POLY_OPA_DISP++);
gDPSetColorImage(POLY_OPA_DISP++, G_IM_FMT_RGBA, G_IM_SIZ_16b, gScreenWidth, gfxCtx->curFrameBuffer);
#if ENABLE_F3DEX3
}
#endif

CLOSE_DISPS(gfxCtx, "../z_rcp.c", __LINE__);
}

void func_80095974(GraphicsContext* gfxCtx) {
OPEN_DISPS(gfxCtx, "../z_rcp.c", 2503);

Expand Down
1 change: 1 addition & 0 deletions src/code/z_sample.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ void Sample_Draw(SampleState* this) {
gSPSegment(POLY_OPA_DISP++, 0x01, this->staticSegment);

Gfx_SetupFrame(gfxCtx, true, 0, 0, 0);
Gfx_ClearZBuffer(gfxCtx);

view->flags = VIEW_VIEWING | VIEW_VIEWPORT | VIEW_PROJECTION_PERSPECTIVE;
View_Apply(view, VIEW_ALL);
Expand Down
3 changes: 3 additions & 0 deletions src/overlays/gamestates/ovl_file_choose/z_file_choose.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ void FileSelect_InitModeDraw(GameState* thisx) {
FileSelectState* this = (FileSelectState*)thisx;

Gfx_SetupFrame(this->state.gfxCtx, true, 0, 0, 0);
Gfx_ClearZBuffer(this->state.gfxCtx);
}

/**
Expand Down Expand Up @@ -1136,6 +1137,7 @@ void FileSelect_ConfigModeDraw(GameState* thisx) {
Gfx_SetupFrame(this->state.gfxCtx, false, 0, 0, 0);
Skybox_Draw(&this->skyboxCtx, this->state.gfxCtx, NULL, SKYBOX_NORMAL_SKY, this->envCtx.skyboxBlend, eyeX, eyeY,
eyeZ);
Gfx_ClearZBuffer(this->state.gfxCtx);
gDPSetTextureLUT(POLY_OPA_DISP++, G_TT_NONE);
ZREG(11) += ZREG(10);
Environment_UpdateSkybox(SKYBOX_NORMAL_SKY, &this->envCtx, &this->skyboxCtx);
Expand Down Expand Up @@ -1551,6 +1553,7 @@ void FileSelect_SelectModeDraw(GameState* thisx) {
Gfx_SetupFrame(this->state.gfxCtx, false, 0, 0, 0);
Skybox_Draw(&this->skyboxCtx, this->state.gfxCtx, NULL, SKYBOX_NORMAL_SKY, this->envCtx.skyboxBlend, eyeX, eyeY,
eyeZ);
Gfx_ClearZBuffer(this->state.gfxCtx);
gDPSetTextureLUT(POLY_OPA_DISP++, G_TT_NONE);
ZREG(11) += ZREG(10);
Environment_UpdateSkybox(SKYBOX_NORMAL_SKY, &this->envCtx, &this->skyboxCtx);
Expand Down
1 change: 1 addition & 0 deletions src/overlays/gamestates/ovl_opening/z_opening.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ void TitleSetup_Main(GameState* thisx) {
TitleSetupState* this = (TitleSetupState*)thisx;

Gfx_SetupFrame(this->state.gfxCtx, true, 0, 0, 0);
Gfx_ClearZBuffer(this->state.gfxCtx);
TitleSetup_SetupTitleScreen(this);
func_80803C5C(this);
}
Expand Down
13 changes: 1 addition & 12 deletions src/overlays/gamestates/ovl_select/z_select.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ void MapSelect_Draw(MapSelectState* this) {
Gfx_SetupFrame(gfxCtx, true, 0, 0, 0);
SET_FULLSCREEN_VIEWPORT(&this->view);
View_Apply(&this->view, VIEW_ALL);
Gfx_SetupDL_28Opa(gfxCtx);

if (!this->state.running) {
MapSelect_DrawLoadingScreen(this);
Expand Down Expand Up @@ -333,12 +334,6 @@ void MapSelect_DrawMenu(MapSelectState* this) {

OPEN_DISPS(gfxCtx, __FILE__, __LINE__);

gSPSegment(POLY_OPA_DISP++, 0x00, NULL);
Gfx_SetupFrame(gfxCtx, true, 0, 0, 0);
SET_FULLSCREEN_VIEWPORT(&this->view);
View_Apply(&this->view, VIEW_ALL);
Gfx_SetupDL_28Opa(gfxCtx);

printer = alloca(sizeof(GfxPrint));
GfxPrint_Init(printer);
GfxPrint_Open(printer, POLY_OPA_DISP);
Expand Down Expand Up @@ -366,12 +361,6 @@ void MapSelect_DrawLoadingScreen(MapSelectState* this) {

OPEN_DISPS(gfxCtx, __FILE__, __LINE__);

gSPSegment(POLY_OPA_DISP++, 0x00, NULL);
Gfx_SetupFrame(gfxCtx, true, 0, 0, 0);
SET_FULLSCREEN_VIEWPORT(&this->view);
View_Apply(&this->view, VIEW_ALL);
Gfx_SetupDL_28Opa(gfxCtx);

printer = alloca(sizeof(GfxPrint));
GfxPrint_Init(printer);
GfxPrint_Open(printer, POLY_OPA_DISP);
Expand Down
1 change: 1 addition & 0 deletions src/overlays/gamestates/ovl_title/z_title.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ void ConsoleLogo_Main(GameState* thisx) {
gSPSegment(POLY_OPA_DISP++, 0, NULL);
gSPSegment(POLY_OPA_DISP++, 1, this->staticSegment);
Gfx_SetupFrame(this->state.gfxCtx, true, 0, 0, 0);
Gfx_ClearZBuffer(this->state.gfxCtx);
ConsoleLogo_Calc(this);
ConsoleLogo_Draw(this);

Expand Down
Loading