Skip to content

Commit 4765db8

Browse files
committed
gspgpu: add PerfLog API
1 parent a88ab66 commit 4765db8

File tree

2 files changed

+87
-0
lines changed

2 files changed

+87
-0
lines changed

libctru/include/3ds/services/gspgpu.h

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,29 @@ typedef enum
6262
GSPGPU_EVENT_MAX, ///< Used to know how many events there are.
6363
} GSPGPU_Event;
6464

65+
/**
66+
* GSPGPU performance log entry.
67+
*
68+
* Use the lastDurationUs field when benchmarking single GPU operations, this is usally meant
69+
* for 3D library writers.
70+
*
71+
* Use the difference between two totalDurationUs when using a GPU library (e.g. citro3d), as
72+
* there can be multiple GPU operations (e.g. P3D, PPF) per render pass, or per frame, and so on.
73+
* Don't use totalDurationUs as-is (rather, take the difference as just described), because it
74+
* can overflow.
75+
*/
76+
typedef struct
77+
{
78+
u32 lastDurationUs; ///< Duration of the last corresponding PICA200 operation (time between op is started and IRQ is received).
79+
u32 totalDurationUs; ///< Sum of lastDurationUs for the corresponding PICA200 operation. Can overflow.
80+
} GSPGPU_PerfLogEntry;
81+
82+
/// GSPGPU performance log
83+
typedef struct
84+
{
85+
GSPGPU_PerfLogEntry entries[GSPGPU_EVENT_MAX]; ///< Performance log entries (one per operation/"event").
86+
} GSPGPU_PerfLog;
87+
6588
/**
6689
* @brief Gets the number of bytes per pixel for the specified format.
6790
* @param format See \ref GSPGPU_FramebufferFormat.
@@ -268,3 +291,39 @@ Result GSPGPU_TriggerCmdReqQueue(void);
268291
* @param disable False = 3D LED enable, true = 3D LED disable.
269292
*/
270293
Result GSPGPU_SetLedForceOff(bool disable);
294+
295+
/**
296+
* @brief Enables or disables the performance log and clear
297+
* its state to zero.
298+
* @param enabled Whether to enable the performance log.
299+
* @note It is assumed that no GPU operation is in progress when calling this function.
300+
* @bug The official sysmodule forgets to clear the "start tick" states to 0, though
301+
* this should not be much of an issue (as per the note above).
302+
*/
303+
Result GSPGPU_SetPerfLogMode(bool enabled);
304+
305+
/**
306+
* @brief Retrieves the performance log.
307+
* @param[out] outPerfLog Pointer to output the performance log to.
308+
* @note Use the difference between two totalDurationUs when using a GPU library (e.g. citro3d), as
309+
* there can be multiple GPU operations (e.g. P3D, PPF) per render pass, or per frame, and so on.
310+
* Don't use totalDurationUs as-is (rather, take the difference as just described), because it
311+
* can overflow.
312+
* @note For a MemoryFill operation that uses both PSC0 and PSC1, take the maximum
313+
* of the two "last duration" entries.
314+
* @note For PDC0/PDC1 (VBlank0/1), the "last duration" entry corresponds to the time between
315+
* the current PDC (VBlank) IRQ and the previous one. The official GSP sysmodule
316+
* assumes both PDC0 and PDC1 IRQ happens at the same rate (this is almost always
317+
* the case, but not always if user changes PDC timings), and sets both entries
318+
* in the PDC0 handler.
319+
* @bug The official sysmodule doesn't handle the PDC0/1 entries correctly after init. On the first
320+
* frame \ref GSPGPU_SetPerfLogMode is enabled, "last duration" will have a nonsensical
321+
* value; and "total duration" stays nonsensical. This isn't much of a problem, except for the
322+
* first frame, because "total duration" is not supposed to be used as-is (you are supposed
323+
* to take the difference of this field between two time points of your choosing, instead).
324+
* @bug Since it is running at approx. 3.25 GiB/s per bank, some small PSC operations might
325+
* complete before the official GSP has time to record the start time.
326+
* @bug The official sysmodule doesn't properly handle data synchronization for the perflog,
327+
* in practice this should be fine, however.
328+
*/
329+
Result GSPGPU_GetPerfLog(GSPGPU_PerfLog *outPerfLog);

libctru/source/services/gspgpu.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,34 @@ Result GSPGPU_TriggerCmdReqQueue(void)
553553
return cmdbuf[1];
554554
}
555555

556+
Result GSPGPU_SetPerfLogMode(bool enabled)
557+
{
558+
u32 *cmdbuf = getThreadCommandBuffer();
559+
560+
cmdbuf[0] = IPC_MakeHeader(0x11,1,0); // 0x110040
561+
cmdbuf[1] = enabled ? 1 : 0;
562+
563+
Result ret=0;
564+
if (R_FAILED(ret = svcSendSyncRequest(gspGpuHandle))) return ret;
565+
566+
return cmdbuf[1];
567+
}
568+
569+
Result GSPGPU_GetPerfLog(GSPGPU_PerfLog *outPerfLog)
570+
{
571+
u32 *cmdbuf = getThreadCommandBuffer();
572+
573+
cmdbuf[0] = IPC_MakeHeader(0x12,0,0); // 0x120000
574+
575+
Result ret=0;
576+
if (R_FAILED(ret = svcSendSyncRequest(gspGpuHandle))) return ret;
577+
578+
memcpy(outPerfLog, &cmdbuf[2], sizeof(GSPGPU_PerfLog));
579+
580+
return cmdbuf[1];
581+
}
582+
583+
556584
Result GSPGPU_RegisterInterruptRelayQueue(Handle eventHandle, u32 flags, Handle* outMemHandle, u8* threadID)
557585
{
558586
u32* cmdbuf=getThreadCommandBuffer();

0 commit comments

Comments
 (0)