From 38ce61bc51fd524cfe83d093d674a769f94cbd6f Mon Sep 17 00:00:00 2001 From: Brandon Jones Date: Wed, 1 Nov 2023 10:48:56 -0700 Subject: [PATCH 1/4] Speed up shader,execution,expression tests with async pipelines Some of these tests are among the longest running that we have, so hopefully this has a dramatic impact on runtimes. Profiling shows that almost all the time is being spent waiting for pipeline creation. By using createComputePipelineAsync, however, we ensure that multiple shaders in a batch can be compiling at once, which drops test run times significantly. --- .../shader/execution/expression/expression.ts | 38 ++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/webgpu/shader/execution/expression/expression.ts b/src/webgpu/shader/execution/expression/expression.ts index fb0b417930c2..10d46c2e97dd 100644 --- a/src/webgpu/shader/execution/expression/expression.ts +++ b/src/webgpu/shader/execution/expression/expression.ts @@ -353,6 +353,20 @@ export async function run( } }; + const processBatch = async (batchCases: CaseList) => { + const checkBatch = await submitBatch( + t, + shaderBuilder, + parameterTypes, + resultType, + batchCases, + cfg.inputSource, + pipelineCache + ); + checkBatch(); + void t.queue.onSubmittedWorkDone().finally(batchFinishedCallback); + } + for (let i = 0; i < cases.length; i += casesPerBatch) { const batchCases = cases.slice(i, Math.min(i + casesPerBatch, cases.length)); @@ -365,17 +379,7 @@ export async function run( } batchesInFlight += 1; - const checkBatch = submitBatch( - t, - shaderBuilder, - parameterTypes, - resultType, - batchCases, - cfg.inputSource, - pipelineCache - ); - checkBatch(); - void t.queue.onSubmittedWorkDone().finally(batchFinishedCallback); + processBatch(batchCases); } } @@ -391,7 +395,7 @@ export async function run( * @param pipelineCache the cache of compute pipelines, shared between batches * @returns a function that checks the results are as expected */ -function submitBatch( +async function submitBatch( t: GPUTest, shaderBuilder: ShaderBuilder, parameterTypes: Array, @@ -399,7 +403,7 @@ function submitBatch( cases: CaseList, inputSource: InputSource, pipelineCache: PipelineCache -): () => void { +): Promise<() => void> { // Construct a buffer to hold the results of the expression tests const outputBufferSize = cases.length * valueStride(resultType); const outputBuffer = t.device.createBuffer({ @@ -407,7 +411,7 @@ function submitBatch( usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE, }); - const [pipeline, group] = buildPipeline( + const [pipeline, group] = await buildPipeline( t, shaderBuilder, parameterTypes, @@ -1003,7 +1007,7 @@ ${body} * @param outputBuffer the buffer that will hold the output values of the tests * @param pipelineCache the cache of compute pipelines, shared between batches */ -function buildPipeline( +async function buildPipeline( t: GPUTest, shaderBuilder: ShaderBuilder, parameterTypes: Array, @@ -1012,7 +1016,7 @@ function buildPipeline( inputSource: InputSource, outputBuffer: GPUBuffer, pipelineCache: PipelineCache -): [GPUComputePipeline, GPUBindGroup] { +): Promise<[GPUComputePipeline, GPUBindGroup]> { cases.forEach(c => { const inputTypes = c.input instanceof Array ? c.input.map(i => i.type) : [c.input.type]; if (!objectEquals(inputTypes, parameterTypes)) { @@ -1032,7 +1036,7 @@ function buildPipeline( const module = t.device.createShaderModule({ code: source }); // build the pipeline - const pipeline = t.device.createComputePipeline({ + const pipeline = await t.device.createComputePipelineAsync({ layout: 'auto', compute: { module, entryPoint: 'main' }, }); From d5bd63ea186df8d28043377b2dad1082a6bd24c7 Mon Sep 17 00:00:00 2001 From: Brandon Jones Date: Wed, 1 Nov 2023 11:26:32 -0700 Subject: [PATCH 2/4] Lint fixes --- src/webgpu/shader/execution/expression/expression.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/webgpu/shader/execution/expression/expression.ts b/src/webgpu/shader/execution/expression/expression.ts index 10d46c2e97dd..e3dbe95d6e23 100644 --- a/src/webgpu/shader/execution/expression/expression.ts +++ b/src/webgpu/shader/execution/expression/expression.ts @@ -365,7 +365,7 @@ export async function run( ); checkBatch(); void t.queue.onSubmittedWorkDone().finally(batchFinishedCallback); - } + }; for (let i = 0; i < cases.length; i += casesPerBatch) { const batchCases = cases.slice(i, Math.min(i + casesPerBatch, cases.length)); @@ -379,7 +379,7 @@ export async function run( } batchesInFlight += 1; - processBatch(batchCases); + void processBatch(batchCases); } } From 5ad5b3b7ca2996a0e8ccc21b1bd44c7e4ae2f7d3 Mon Sep 17 00:00:00 2001 From: Brandon Jones Date: Wed, 1 Nov 2023 12:45:26 -0700 Subject: [PATCH 3/4] Fix async tests not waiting --- src/webgpu/shader/execution/expression/expression.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/webgpu/shader/execution/expression/expression.ts b/src/webgpu/shader/execution/expression/expression.ts index e3dbe95d6e23..f85516f29bdd 100644 --- a/src/webgpu/shader/execution/expression/expression.ts +++ b/src/webgpu/shader/execution/expression/expression.ts @@ -367,6 +367,8 @@ export async function run( void t.queue.onSubmittedWorkDone().finally(batchFinishedCallback); }; + const pendingBatches = []; + for (let i = 0; i < cases.length; i += casesPerBatch) { const batchCases = cases.slice(i, Math.min(i + casesPerBatch, cases.length)); @@ -379,8 +381,10 @@ export async function run( } batchesInFlight += 1; - void processBatch(batchCases); + pendingBatches.push(processBatch(batchCases)); } + + await Promise.all(pendingBatches); } /** From cc35561b74333fbe493cff49d7c7501de97a0c20 Mon Sep 17 00:00:00 2001 From: Brandon Jones Date: Thu, 2 Nov 2023 14:24:23 -0700 Subject: [PATCH 4/4] Extended the async pipeline pattern to two more sets of long running tests --- src/webgpu/shader/execution/robust_access.spec.ts | 12 ++++++------ src/webgpu/shader/execution/zero_init.spec.ts | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/webgpu/shader/execution/robust_access.spec.ts b/src/webgpu/shader/execution/robust_access.spec.ts index 03b58265661a..965dd283dd16 100644 --- a/src/webgpu/shader/execution/robust_access.spec.ts +++ b/src/webgpu/shader/execution/robust_access.spec.ts @@ -23,14 +23,14 @@ const kMinI32 = -0x8000_0000; * Non-test bindings are in bind group 1, including: * - `constants.zero`: a dynamically-uniform `0u` value. */ -function runShaderTest( +async function runShaderTest( t: GPUTest, stage: GPUShaderStageFlags, testSource: string, layout: GPUPipelineLayout, testBindings: GPUBindGroupEntry[], dynamicOffsets?: number[] -): void { +): Promise { assert(stage === GPUShaderStage.COMPUTE, 'Only know how to deal with compute for now'); // Contains just zero (for now). @@ -62,7 +62,7 @@ fn main() { t.debug(source); const module = t.device.createShaderModule({ code: source }); - const pipeline = t.device.createComputePipeline({ + const pipeline = await t.device.createComputePipelineAsync({ layout, compute: { module, entryPoint: 'main' }, }); @@ -172,7 +172,7 @@ g.test('linear_memory') .expand('baseType', supportedScalarTypes) .expandWithParams(generateTypes) ) - .fn(t => { + .fn(async t => { const { addressSpace, storageMode, @@ -448,7 +448,7 @@ fn runTest() -> u32 { ); // Run the shader, accessing the buffer. - runShaderTest( + await runShaderTest( t, GPUShaderStage.COMPUTE, testSource, @@ -475,6 +475,6 @@ fn runTest() -> u32 { bufferBindingEnd ); } else { - runShaderTest(t, GPUShaderStage.COMPUTE, testSource, layout, []); + await runShaderTest(t, GPUShaderStage.COMPUTE, testSource, layout, []); } }); diff --git a/src/webgpu/shader/execution/zero_init.spec.ts b/src/webgpu/shader/execution/zero_init.spec.ts index fe298a161a66..e03a72f8df56 100644 --- a/src/webgpu/shader/execution/zero_init.spec.ts +++ b/src/webgpu/shader/execution/zero_init.spec.ts @@ -227,7 +227,7 @@ g.test('compute,zero_init') }) ) .batch(15) - .fn(t => { + .fn(async t => { const { workgroupSize } = t.params; const { maxComputeInvocationsPerWorkgroup } = t.device.limits; const numWorkgroupInvocations = workgroupSize.reduce((a, b) => a * b); @@ -446,7 +446,7 @@ g.test('compute,zero_init') ], }); - const fillPipeline = t.device.createComputePipeline({ + const fillPipeline = await t.device.createComputePipelineAsync({ layout: t.device.createPipelineLayout({ bindGroupLayouts: [fillLayout] }), label: 'Workgroup Fill Pipeline', compute: { @@ -495,7 +495,7 @@ g.test('compute,zero_init') t.queue.submit([e.finish()]); } - const pipeline = t.device.createComputePipeline({ + const pipeline = await t.device.createComputePipelineAsync({ layout: 'auto', compute: { module: t.device.createShaderModule({