From b226a45caf8183daccf65d9138e18f9b2e1c537f Mon Sep 17 00:00:00 2001 From: Kai Ninomiya Date: Thu, 2 Nov 2023 09:18:45 -0700 Subject: [PATCH] Audit TypedArray allocations (#3114) * memory_model: remove unnecessary zero-init (no speedup) * flow_control: replace a slice() with a subarray() (no speedup) * Comment on some cold functions --- .../memory_sync/buffer/buffer_sync_test.ts | 4 ++ .../shader/execution/flow_control/harness.ts | 5 +-- .../memory_model/memory_model_setup.ts | 40 +++++++++---------- .../reftests/canvas_colorspace.html.ts | 2 + .../reftests/canvas_complex.html.ts | 1 + 5 files changed, 29 insertions(+), 23 deletions(-) diff --git a/src/webgpu/api/operation/memory_sync/buffer/buffer_sync_test.ts b/src/webgpu/api/operation/memory_sync/buffer/buffer_sync_test.ts index e18dc59abf2e..6dda4b7205d9 100644 --- a/src/webgpu/api/operation/memory_sync/buffer/buffer_sync_test.ts +++ b/src/webgpu/api/operation/memory_sync/buffer/buffer_sync_test.ts @@ -285,6 +285,7 @@ export class BufferSyncTest extends GPUTest { // Create a 1x1 texture, and initialize it to a specified value for all elements. async createTextureWithValue(initValue: number): Promise { + // This is not hot in profiles; optimize if this gets used more heavily. const data = new Uint32Array(1).fill(initValue); const texture = this.trackForCleanup( this.device.createTexture({ @@ -446,6 +447,7 @@ export class BufferSyncTest extends GPUTest { // Write buffer via writeBuffer API on queue writeByWriteBuffer(buffer: GPUBuffer, value: number) { + // This is not hot in profiles; optimize if this gets used more heavily. const data = new Uint32Array(1).fill(value); this.device.queue.writeBuffer(buffer, 0, data); } @@ -919,12 +921,14 @@ export class BufferSyncTest extends GPUTest { } verifyData(buffer: GPUBuffer, expectedValue: number) { + // This is not hot in profiles; optimize if this gets used more heavily. const bufferData = new Uint32Array(1); bufferData[0] = expectedValue; this.expectGPUBufferValuesEqual(buffer, bufferData); } verifyDataTwoValidValues(buffer: GPUBuffer, expectedValue1: number, expectedValue2: number) { + // This is not hot in profiles; optimize if this gets used more heavily. const bufferData1 = new Uint32Array(1); bufferData1[0] = expectedValue1; const bufferData2 = new Uint32Array(1); diff --git a/src/webgpu/shader/execution/flow_control/harness.ts b/src/webgpu/shader/execution/flow_control/harness.ts index 94dfcdfc4d8f..885e32bd4aec 100644 --- a/src/webgpu/shader/execution/flow_control/harness.ts +++ b/src/webgpu/shader/execution/flow_control/harness.ts @@ -221,9 +221,8 @@ ${main_wgsl.extra} // returns a string that shows the outputted values to help understand the whole trace. const print_output_value = () => { - return `Output values (length: ${outputCount}): ${outputs.data - .slice(1, outputCount + 1) - .join(', ')}`; + const subarray = outputs.data.subarray(1, outputCount + 1); + return `Output values (length: ${outputCount}): ${subarray.join(', ')}`; }; // returns a colorized string of the expect_order() call, highlighting diff --git a/src/webgpu/shader/execution/memory_model/memory_model_setup.ts b/src/webgpu/shader/execution/memory_model/memory_model_setup.ts index d2dab32ecbb1..f8e5b9034cb4 100644 --- a/src/webgpu/shader/execution/memory_model/memory_model_setup.ts +++ b/src/webgpu/shader/execution/memory_model/memory_model_setup.ts @@ -203,10 +203,10 @@ export class MemoryModelTester { size: testLocationsSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE, }), - srcBuf: this.test.makeBufferWithContents( - new Uint32Array(testLocationsSize).fill(0), - GPUBufferUsage.COPY_SRC - ), + srcBuf: this.test.device.createBuffer({ + size: testLocationsSize, + usage: GPUBufferUsage.COPY_SRC, + }), size: testLocationsSize, }; @@ -216,10 +216,10 @@ export class MemoryModelTester { size: readResultsSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE, }), - srcBuf: this.test.makeBufferWithContents( - new Uint32Array(readResultsSize).fill(0), - GPUBufferUsage.COPY_SRC - ), + srcBuf: this.test.device.createBuffer({ + size: readResultsSize, + usage: GPUBufferUsage.COPY_SRC, + }), size: readResultsSize, }; @@ -229,10 +229,10 @@ export class MemoryModelTester { size: testResultsSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, }), - srcBuf: this.test.makeBufferWithContents( - new Uint32Array(testResultsSize).fill(0), - GPUBufferUsage.COPY_SRC - ), + srcBuf: this.test.device.createBuffer({ + size: testResultsSize, + usage: GPUBufferUsage.COPY_SRC, + }), size: testResultsSize, }; @@ -255,10 +255,10 @@ export class MemoryModelTester { size: barrierSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE, }), - srcBuf: this.test.makeBufferWithContents( - new Uint32Array(barrierSize).fill(0), - GPUBufferUsage.COPY_SRC - ), + srcBuf: this.test.device.createBuffer({ + size: barrierSize, + usage: GPUBufferUsage.COPY_SRC, + }), size: barrierSize, }; @@ -268,10 +268,10 @@ export class MemoryModelTester { size: scratchpadSize, usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE, }), - srcBuf: this.test.makeBufferWithContents( - new Uint32Array(scratchpadSize).fill(0), - GPUBufferUsage.COPY_SRC - ), + srcBuf: this.test.device.createBuffer({ + size: scratchpadSize, + usage: GPUBufferUsage.COPY_SRC, + }), size: scratchpadSize, }; diff --git a/src/webgpu/web_platform/reftests/canvas_colorspace.html.ts b/src/webgpu/web_platform/reftests/canvas_colorspace.html.ts index d4b5b24e1e82..3a763e8c28b3 100644 --- a/src/webgpu/web_platform/reftests/canvas_colorspace.html.ts +++ b/src/webgpu/web_platform/reftests/canvas_colorspace.html.ts @@ -5,6 +5,7 @@ import { kCanvasAlphaModes, kCanvasColorSpaces } from '../../capability_info.js' import { runRefTest } from './gpu_ref_test.js'; function bgra8UnormFromRgba8Unorm(rgba8Unorm: Uint8Array) { + // This is used only once. May need to optimize if reused. const bgra8Unorm = rgba8Unorm.slice(); for (let i = 0; i < bgra8Unorm.length; i += 4) { [bgra8Unorm[i], bgra8Unorm[i + 2]] = [bgra8Unorm[i + 2], bgra8Unorm[i]]; @@ -13,6 +14,7 @@ function bgra8UnormFromRgba8Unorm(rgba8Unorm: Uint8Array) { } function rgba16floatFromRgba8unorm(rgba8Unorm: Uint8Array) { + // This is used only once. May need to optimize if reused. const rgba16Float = new Float16Array(rgba8Unorm.length); for (let i = 0; i < rgba8Unorm.length; ++i) { rgba16Float[i] = rgba8Unorm[i] / 255; diff --git a/src/webgpu/web_platform/reftests/canvas_complex.html.ts b/src/webgpu/web_platform/reftests/canvas_complex.html.ts index a8da9cb5b36d..2c17be88754e 100644 --- a/src/webgpu/web_platform/reftests/canvas_complex.html.ts +++ b/src/webgpu/web_platform/reftests/canvas_complex.html.ts @@ -55,6 +55,7 @@ export function run( size: rows * bytesPerRow, usage: GPUBufferUsage.COPY_SRC, }); + // These are run only once per test, so there are no wasted reallocations below. let red: Uint8Array | Uint16Array; let green: Uint8Array | Uint16Array; let blue: Uint8Array | Uint16Array;