From 38ce61bc51fd524cfe83d093d674a769f94cbd6f Mon Sep 17 00:00:00 2001
From: Brandon Jones <bajones@chromium.org>
Date: Wed, 1 Nov 2023 10:48:56 -0700
Subject: [PATCH 1/4] Speed up shader,execution,expression tests with async
 pipelines

Some of these tests are among the longest running that we have, so
hopefully this has a dramatic impact on runtimes. Profiling shows
that almost all the time is being spent waiting for pipeline
creation. By using createComputePipelineAsync, however, we ensure
that multiple shaders in a batch can be compiling at once, which
drops test run times significantly.
---
 .../shader/execution/expression/expression.ts | 38 ++++++++++---------
 1 file changed, 21 insertions(+), 17 deletions(-)
diff --git a/src/webgpu/shader/execution/expression/expression.ts b/src/webgpu/shader/execution/expression/expression.ts
index fb0b417930c2..10d46c2e97dd 100644
--- a/src/webgpu/shader/execution/expression/expression.ts
+++ b/src/webgpu/shader/execution/expression/expression.ts
@@ -353,6 +353,20 @@ export async function run(
     }
   };
 
+  const processBatch = async (batchCases: CaseList) => {
+    const checkBatch = await submitBatch(
+      t,
+      shaderBuilder,
+      parameterTypes,
+      resultType,
+      batchCases,
+      cfg.inputSource,
+      pipelineCache
+    );
+    checkBatch();
+    void t.queue.onSubmittedWorkDone().finally(batchFinishedCallback);
+  }
+
   for (let i = 0; i < cases.length; i += casesPerBatch) {
     const batchCases = cases.slice(i, Math.min(i + casesPerBatch, cases.length));
 
@@ -365,17 +379,7 @@ export async function run(
     }
     batchesInFlight += 1;
 
-    const checkBatch = submitBatch(
-      t,
-      shaderBuilder,
-      parameterTypes,
-      resultType,
-      batchCases,
-      cfg.inputSource,
-      pipelineCache
-    );
-    checkBatch();
-    void t.queue.onSubmittedWorkDone().finally(batchFinishedCallback);
+    processBatch(batchCases);
   }
 }
 
@@ -391,7 +395,7 @@ export async function run(
  * @param pipelineCache the cache of compute pipelines, shared between batches
  * @returns a function that checks the results are as expected
  */
-function submitBatch(
+async function submitBatch(
   t: GPUTest,
   shaderBuilder: ShaderBuilder,
   parameterTypes: Array<Type>,
@@ -399,7 +403,7 @@ function submitBatch(
   cases: CaseList,
   inputSource: InputSource,
   pipelineCache: PipelineCache
-): () => void {
+): Promise<() => void> {
   // Construct a buffer to hold the results of the expression tests
   const outputBufferSize = cases.length * valueStride(resultType);
   const outputBuffer = t.device.createBuffer({
@@ -407,7 +411,7 @@ function submitBatch(
     usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
   });
 
-  const [pipeline, group] = buildPipeline(
+  const [pipeline, group] = await buildPipeline(
     t,
     shaderBuilder,
     parameterTypes,
@@ -1003,7 +1007,7 @@ ${body}
  * @param outputBuffer the buffer that will hold the output values of the tests
  * @param pipelineCache the cache of compute pipelines, shared between batches
  */
-function buildPipeline(
+async function buildPipeline(
   t: GPUTest,
   shaderBuilder: ShaderBuilder,
   parameterTypes: Array<Type>,
@@ -1012,7 +1016,7 @@ function buildPipeline(
   inputSource: InputSource,
   outputBuffer: GPUBuffer,
   pipelineCache: PipelineCache
-): [GPUComputePipeline, GPUBindGroup] {
+): Promise<[GPUComputePipeline, GPUBindGroup]> {
   cases.forEach(c => {
     const inputTypes = c.input instanceof Array ? c.input.map(i => i.type) : [c.input.type];
     if (!objectEquals(inputTypes, parameterTypes)) {
@@ -1032,7 +1036,7 @@ function buildPipeline(
       const module = t.device.createShaderModule({ code: source });
 
       // build the pipeline
-      const pipeline = t.device.createComputePipeline({
+      const pipeline = await t.device.createComputePipelineAsync({
         layout: 'auto',
         compute: { module, entryPoint: 'main' },
       });

From d5bd63ea186df8d28043377b2dad1082a6bd24c7 Mon Sep 17 00:00:00 2001
From: Brandon Jones <bajones@chromium.org>
Date: Wed, 1 Nov 2023 11:26:32 -0700
Subject: [PATCH 2/4] Lint fixes

---
 src/webgpu/shader/execution/expression/expression.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/webgpu/shader/execution/expression/expression.ts b/src/webgpu/shader/execution/expression/expression.ts
index 10d46c2e97dd..e3dbe95d6e23 100644
--- a/src/webgpu/shader/execution/expression/expression.ts
+++ b/src/webgpu/shader/execution/expression/expression.ts
@@ -365,7 +365,7 @@ export async function run(
     );
     checkBatch();
     void t.queue.onSubmittedWorkDone().finally(batchFinishedCallback);
-  }
+  };
 
   for (let i = 0; i < cases.length; i += casesPerBatch) {
     const batchCases = cases.slice(i, Math.min(i + casesPerBatch, cases.length));
@@ -379,7 +379,7 @@ export async function run(
     }
     batchesInFlight += 1;
 
-    processBatch(batchCases);
+    void processBatch(batchCases);
   }
 }
 

From 5ad5b3b7ca2996a0e8ccc21b1bd44c7e4ae2f7d3 Mon Sep 17 00:00:00 2001
From: Brandon Jones <bajones@chromium.org>
Date: Wed, 1 Nov 2023 12:45:26 -0700
Subject: [PATCH 3/4] Fix async tests not waiting

---
 src/webgpu/shader/execution/expression/expression.ts | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/webgpu/shader/execution/expression/expression.ts b/src/webgpu/shader/execution/expression/expression.ts
index e3dbe95d6e23..f85516f29bdd 100644
--- a/src/webgpu/shader/execution/expression/expression.ts
+++ b/src/webgpu/shader/execution/expression/expression.ts
@@ -367,6 +367,8 @@ export async function run(
     void t.queue.onSubmittedWorkDone().finally(batchFinishedCallback);
   };
 
+  const pendingBatches = [];
+
   for (let i = 0; i < cases.length; i += casesPerBatch) {
     const batchCases = cases.slice(i, Math.min(i + casesPerBatch, cases.length));
 
@@ -379,8 +381,10 @@ export async function run(
     }
     batchesInFlight += 1;
 
-    void processBatch(batchCases);
+    pendingBatches.push(processBatch(batchCases));
   }
+
+  await Promise.all(pendingBatches);
 }
 
 /**

From cc35561b74333fbe493cff49d7c7501de97a0c20 Mon Sep 17 00:00:00 2001
From: Brandon Jones <bajones@chromium.org>
Date: Thu, 2 Nov 2023 14:24:23 -0700
Subject: [PATCH 4/4] Extended the async pipeline pattern to two more sets of
 long running tests

---
 src/webgpu/shader/execution/robust_access.spec.ts | 12 ++++++------
 src/webgpu/shader/execution/zero_init.spec.ts     |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/webgpu/shader/execution/robust_access.spec.ts b/src/webgpu/shader/execution/robust_access.spec.ts
index 03b58265661a..965dd283dd16 100644
--- a/src/webgpu/shader/execution/robust_access.spec.ts
+++ b/src/webgpu/shader/execution/robust_access.spec.ts
@@ -23,14 +23,14 @@ const kMinI32 = -0x8000_0000;
  * Non-test bindings are in bind group 1, including:
  * - `constants.zero`: a dynamically-uniform `0u` value.
  */
-function runShaderTest(
+async function runShaderTest(
   t: GPUTest,
   stage: GPUShaderStageFlags,
   testSource: string,
   layout: GPUPipelineLayout,
   testBindings: GPUBindGroupEntry[],
   dynamicOffsets?: number[]
-): void {
+): Promise<void> {
   assert(stage === GPUShaderStage.COMPUTE, 'Only know how to deal with compute for now');
 
   // Contains just zero (for now).
@@ -62,7 +62,7 @@ fn main() {
 
   t.debug(source);
   const module = t.device.createShaderModule({ code: source });
-  const pipeline = t.device.createComputePipeline({
+  const pipeline = await t.device.createComputePipelineAsync({
     layout,
     compute: { module, entryPoint: 'main' },
   });
@@ -172,7 +172,7 @@ g.test('linear_memory')
       .expand('baseType', supportedScalarTypes)
       .expandWithParams(generateTypes)
   )
-  .fn(t => {
+  .fn(async t => {
     const {
       addressSpace,
       storageMode,
@@ -448,7 +448,7 @@ fn runTest() -> u32 {
       );
 
       // Run the shader, accessing the buffer.
-      runShaderTest(
+      await runShaderTest(
         t,
         GPUShaderStage.COMPUTE,
         testSource,
@@ -475,6 +475,6 @@ fn runTest() -> u32 {
         bufferBindingEnd
       );
     } else {
-      runShaderTest(t, GPUShaderStage.COMPUTE, testSource, layout, []);
+      await runShaderTest(t, GPUShaderStage.COMPUTE, testSource, layout, []);
     }
   });
diff --git a/src/webgpu/shader/execution/zero_init.spec.ts b/src/webgpu/shader/execution/zero_init.spec.ts
index fe298a161a66..e03a72f8df56 100644
--- a/src/webgpu/shader/execution/zero_init.spec.ts
+++ b/src/webgpu/shader/execution/zero_init.spec.ts
@@ -227,7 +227,7 @@ g.test('compute,zero_init')
       })
   )
   .batch(15)
-  .fn(t => {
+  .fn(async t => {
     const { workgroupSize } = t.params;
     const { maxComputeInvocationsPerWorkgroup } = t.device.limits;
     const numWorkgroupInvocations = workgroupSize.reduce((a, b) => a * b);
@@ -446,7 +446,7 @@ g.test('compute,zero_init')
         ],
       });
 
-      const fillPipeline = t.device.createComputePipeline({
+      const fillPipeline = await t.device.createComputePipelineAsync({
         layout: t.device.createPipelineLayout({ bindGroupLayouts: [fillLayout] }),
         label: 'Workgroup Fill Pipeline',
         compute: {
@@ -495,7 +495,7 @@ g.test('compute,zero_init')
       t.queue.submit([e.finish()]);
     }
 
-    const pipeline = t.device.createComputePipeline({
+    const pipeline = await t.device.createComputePipelineAsync({
       layout: 'auto',
       compute: {
         module: t.device.createShaderModule({