@@ -62,6 +62,29 @@ typedef enum
6262 GSPGPU_EVENT_MAX , ///< Used to know how many events there are.
6363} GSPGPU_Event ;
6464
65+ /**
66+ * GSPGPU performance log entry.
67+ *
68+ * Use the lastDurationUs field when benchmarking single GPU operations, this is usally meant
69+ * for 3D library writers.
70+ *
71+ * Use the difference between two totalDurationUs when using a GPU library (e.g. citro3d), as
72+ * there can be multiple GPU operations (e.g. P3D, PPF) per render pass, or per frame, and so on.
73+ * Don't use totalDurationUs as-is (rather, take the difference as just described), because it
74+ * can overflow.
75+ */
76+ typedef struct
77+ {
78+ u32 lastDurationUs ; ///< Duration of the last corresponding PICA200 operation (time between op is started and IRQ is received).
79+ u32 totalDurationUs ; ///< Sum of lastDurationUs for the corresponding PICA200 operation. Can overflow.
80+ } GSPGPU_PerfLogEntry ;
81+
82+ /// GSPGPU performance log
83+ typedef struct
84+ {
85+ GSPGPU_PerfLogEntry entries [GSPGPU_EVENT_MAX ]; ///< Performance log entries (one per operation/"event").
86+ } GSPGPU_PerfLog ;
87+
6588/**
6689 * @brief Gets the number of bytes per pixel for the specified format.
6790 * @param format See \ref GSPGPU_FramebufferFormat.
@@ -268,3 +291,39 @@ Result GSPGPU_TriggerCmdReqQueue(void);
268291 * @param disable False = 3D LED enable, true = 3D LED disable.
269292 */
270293Result GSPGPU_SetLedForceOff (bool disable );
294+
295+ /**
296+ * @brief Enables or disables the performance log and clear
297+ * its state to zero.
298+ * @param enabled Whether to enable the performance log.
299+ * @note It is assumed that no GPU operation is in progress when calling this function.
300+ * @bug The official sysmodule forgets to clear the "start tick" states to 0, though
301+ * this should not be much of an issue (as per the note above).
302+ */
303+ Result GSPGPU_SetPerfLogMode (bool enabled );
304+
305+ /**
306+ * @brief Retrieves the performance log.
307+ * @param[out] outPerfLog Pointer to output the performance log to.
308+ * @note Use the difference between two totalDurationUs when using a GPU library (e.g. citro3d), as
309+ * there can be multiple GPU operations (e.g. P3D, PPF) per render pass, or per frame, and so on.
310+ * Don't use totalDurationUs as-is (rather, take the difference as just described), because it
311+ * can overflow.
312+ * @note For a MemoryFill operation that uses both PSC0 and PSC1, take the maximum
313+ * of the two "last duration" entries.
314+ * @note For PDC0/PDC1 (VBlank0/1), the "last duration" entry corresponds to the time between
315+ * the current PDC (VBlank) IRQ and the previous one. The official GSP sysmodule
316+ * assumes both PDC0 and PDC1 IRQ happens at the same rate (this is almost always
317+ * the case, but not always if user changes PDC timings), and sets both entries
318+ * in the PDC0 handler.
319+ * @bug The official sysmodule doesn't handle the PDC0/1 entries correctly after init. On the first
320+ * frame \ref GSPGPU_SetPerfLogMode is enabled, "last duration" will have a nonsensical
321+ * value; and "total duration" stays nonsensical. This isn't much of a problem, except for the
322+ * first frame, because "total duration" is not supposed to be used as-is (you are supposed
323+ * to take the difference of this field between two time points of your choosing, instead).
324+ * @bug Since it is running at approx. 3.25 GiB/s per bank, some small PSC operations might
325+ * complete before the official GSP has time to record the start time.
326+ * @bug The official sysmodule doesn't properly handle data synchronization for the perflog,
327+ * in practice this should be fine, however.
328+ */
329+ Result GSPGPU_GetPerfLog (GSPGPU_PerfLog * outPerfLog );
0 commit comments