diff --git a/include/ultra64/gbi.f3dex3.h b/include/ultra64/gbi.f3dex3.h index 689d058ec7..15d01f9447 100644 --- a/include/ultra64/gbi.f3dex3.h +++ b/include/ultra64/gbi.f3dex3.h @@ -263,6 +263,7 @@ longer a multiple of 8 (DMA word). This was not used in any command anyway. */ #define G_MWO_ATTR_OFFSET_Z 0x14 #define G_MWO_ALPHA_COMPARE_CULL 0x16 #define G_MWO_NORMALS_MODE 0x18 +#define G_MWO_LAST_MAT_DL_ADDR 0x1A /* * RDP command argument defines @@ -1191,6 +1192,16 @@ typedef struct { short kc; } OcclusionPlane_t; +typedef struct { + /* Four vertices of a quad, XYZ components in world space */ + struct { + short x; + short y; + short z; + } v[4]; + float weight; /* Higher if there's a lot of stuff behind it */ +} OcclusionPlaneCandidate; + typedef union { Light_t l; long long int force_structure_alignment[2]; @@ -2844,6 +2855,38 @@ _DW({ \ #define gsSPNormalsMode(mode) \ gsMoveHalfwd(G_MW_FX, G_MWO_NORMALS_MODE, (mode) & 0xFF) +/* + * F3DEX3 has a basic auto-batched rendering system. At a high level, if a + * material display list being run is the same as the last material, the texture + * loads are automatically skipped the second time as they should already be in + * TMEM. + * + * This design generally works, but can break if you call a display list twice + * but in between change a segment mapping so that a referenced image inside is + * actually different the two times. In these cases, run the below command + * between the two calls (e.g. when you change the segment) and the microcode + * will not skip the second texture loads. + * + * Internally, a material is defined to start with any set image command, and + * end on any of the following: call, branch, return, vertex, all tri commands, + * modify vertex, branch Z/W, or cull. The physical address of the display list + * --not the address of the image--is stored when a material is started. If a + * material starts and its physical address is the same as the stored last start + * address, i.e. we're executing the same material display list as the last + * material, material cull mode is set. In this mode, load block, load tile, and + * load TLUT all are skipped. This mode is cleared when the material ends. + * + * This design has the benefit that it works correctly even with complex + * materials, e.g. with two CI4 textures (four loads), whereas it would be + * difficult to implement tracking all these loads separately. Furthermore, a + * design based on tracking the image addresses could break if you loaded + * different tile sections of the same image in consecutive materials. + */ +#define gSPDontSkipTexLoadsAcross(pkt) \ + gMoveWd(pkt, G_MW_FX, G_MWO_LAST_MAT_DL_ADDR, 0xFFFFFFFF) +#define gsSPDontSkipTexLoadsAcross() \ + gsMoveWd(G_MW_FX, G_MWO_LAST_MAT_DL_ADDR, 0xFFFFFFFF) + typedef union { struct { s16 intPart[3][4]; /* Fourth row containing translations is omitted. */ diff --git a/src/overlays/actors/ovl_En_Ex_Ruppy/z_en_ex_ruppy.c b/src/overlays/actors/ovl_En_Ex_Ruppy/z_en_ex_ruppy.c index ea7d242248..86ee9e21b8 100644 --- a/src/overlays/actors/ovl_En_Ex_Ruppy/z_en_ex_ruppy.c +++ b/src/overlays/actors/ovl_En_Ex_Ruppy/z_en_ex_ruppy.c @@ -379,6 +379,13 @@ void EnExRuppy_Draw(Actor* thisx, PlayState* play) { static void* rupeeTextures[] = { gRupeeGreenTex, gRupeeBlueTex, gRupeeRedTex, gRupeePinkTex, gRupeeOrangeTex, }; +#if ENABLE_F3DEX3 + // It might seem that we'd need to ensure this is reset every frame. But we + // actually only care about when this changes within a frame, as the texture + // loads would only ever be skipped between two or more rupees drawn + // consecutively. + static s16 lastColorIdx = -1; +#endif s32 pad; EnExRuppy* this = (EnExRuppy*)thisx; @@ -390,6 +397,17 @@ void EnExRuppy_Draw(Actor* thisx, PlayState* play) { gSPMatrix(POLY_OPA_DISP++, MATRIX_NEW(play->state.gfxCtx, "../z_en_ex_ruppy.c", 780), G_MTX_NOPUSH | G_MTX_LOAD | G_MTX_MODELVIEW); gSPSegment(POLY_OPA_DISP++, 0x08, SEGMENTED_TO_VIRTUAL(rupeeTextures[this->colorIdx])); +#if ENABLE_F3DEX3 + // If we have consecutive rupees rendering with different textures, + // F3DEX3's optimizer will incorrectly believe the texture loads can be + // skipped, so this command tells it not to skip them. However, if the + // rupee really is the same as last time, then we can let the optimizer + // skip the load. + if (this->colorIdx != lastColorIdx) { + gSPDontSkipTexLoadsAcross(POLY_OPA_DISP++); + lastColorIdx = this->colorIdx; + } +#endif gSPDisplayList(POLY_OPA_DISP++, gRupeeDL); CLOSE_DISPS(play->state.gfxCtx, "../z_en_ex_ruppy.c", 784); diff --git a/src/overlays/effects/ovl_Effect_Ss_Stone1/z_eff_ss_stone1.c b/src/overlays/effects/ovl_Effect_Ss_Stone1/z_eff_ss_stone1.c index 45846446fc..f7295b3791 100644 --- a/src/overlays/effects/ovl_Effect_Ss_Stone1/z_eff_ss_stone1.c +++ b/src/overlays/effects/ovl_Effect_Ss_Stone1/z_eff_ss_stone1.c @@ -55,6 +55,13 @@ void EffectSsStone1_Draw(PlayState* play, u32 index, EffectSs* this) { Vec3f mfVec; f32 mfW; f32 scale; +#if ENABLE_F3DEX3 + // It might seem that we'd need to ensure this is reset every frame. But we + // actually only care about when this changes within a frame, as the texture + // loads would only ever be skipped between two or more particles drawn + // consecutively. + static s16 lastTextureIndex = -1; +#endif OPEN_DISPS(gfxCtx, "../z_eff_ss_stone1.c", 154); @@ -67,6 +74,17 @@ void EffectSsStone1_Draw(PlayState* play, u32 index, EffectSs* this) { G_MTX_NOPUSH | G_MTX_LOAD | G_MTX_MODELVIEW); Gfx_SetupDL_61Xlu(gfxCtx); gSPSegment(POLY_XLU_DISP++, 0x08, SEGMENTED_TO_VIRTUAL(drawParams->texture)); +#if ENABLE_F3DEX3 + // If we have consecutive particles rendering with different textures, + // F3DEX3's optimizer will incorrectly believe the texture loads can be + // skipped, so this command tells it not to skip them. However, if the + // particle really is the same as last time, then we can let the optimizer + // skip the load. + if (this->life != lastTextureIndex) { + gSPDontSkipTexLoadsAcross(POLY_OPA_DISP++); + lastTextureIndex = this->life; + } +#endif gDPSetPrimColor(POLY_XLU_DISP++, 0, 0, drawParams->primColor.r, drawParams->primColor.g, drawParams->primColor.b, 255); gDPSetEnvColor(POLY_XLU_DISP++, drawParams->envColor.r, drawParams->envColor.g, drawParams->envColor.b, 255);