From 63658e137bf2efcb6199c849afc8be1b4995f2d2 Mon Sep 17 00:00:00 2001
From: Greggman <github@greggman.com>
Date: Wed, 8 Jan 2025 04:29:35 +0900
Subject: [PATCH 1/7] Add "enforceDefaultLimits" flag (#4124)

I'm not sure what to do about this but dealing with dependent
limits is kind of a PITA.

Tests that deal with storage buffers and storage textures need to
take into account that they might have 0. But, they also need to
be tested with the maximum number of storage buffers/textures.

So, if you set the test to use max storage buffers/textures then,
unless you have a device that supports 0 you have no easy way to
test that the test functions when the limit is 0.

While refactoring the tests I start without requesting the limit
so on compat I get zero. Once that works I add `MaxLimitsTestMixin`
or similar to request the maximum limits, otherwise a compat device
would get no coverage.

But, now I have the issue that if I'm modifying the test I need to
remember to test with 0 so I have to go manually comment out the code
that's requesting max limits.

So, I thought I'd add this option.
---
 src/common/framework/test_config.ts       |   6 +
 src/common/runtime/cmdline.ts             |   2 +
 src/common/runtime/helper/options.ts      |   6 +
 src/common/runtime/helper/utils_worker.ts |   1 +
 src/common/runtime/server.ts              |   2 +
 src/common/runtime/standalone.ts          |   1 +
 src/common/util/navigator_gpu.ts          |  84 +++++++++
 src/resources/cache/hashes.json           | 220 +++++++++++-----------
 8 files changed, 212 insertions(+), 110 deletions(-)
diff --git a/src/common/framework/test_config.ts b/src/common/framework/test_config.ts
index 072aaf736027..48d090c45185 100644
--- a/src/common/framework/test_config.ts
+++ b/src/common/framework/test_config.ts
@@ -44,6 +44,11 @@ export type TestConfig = {
    */
   forceFallbackAdapter: boolean;
 
+  /**
+   * Enforce the default limits on the adapter
+   */
+  enforceDefaultLimits: boolean;
+
   /**
    * Whether to enable the `logToWebSocket` function used for out-of-band test logging.
    */
@@ -59,5 +64,6 @@ export const globalTestConfig: TestConfig = {
   unrollConstEvalLoops: false,
   compatibility: false,
   forceFallbackAdapter: false,
+  enforceDefaultLimits: false,
   logToWebSocket: false,
 };
diff --git a/src/common/runtime/cmdline.ts b/src/common/runtime/cmdline.ts
index b2d220ec8ef9..635c30eb1410 100644
--- a/src/common/runtime/cmdline.ts
+++ b/src/common/runtime/cmdline.ts
@@ -107,6 +107,8 @@ for (let i = 0; i < sys.args.length; ++i) {
       globalTestConfig.compatibility = true;
     } else if (a === '--force-fallback-adapter') {
       globalTestConfig.forceFallbackAdapter = true;
+    } else if (a === '--enforce-default-limits') {
+      globalTestConfig.enforceDefaultLimits = true;
     } else if (a === '--log-to-websocket') {
       globalTestConfig.logToWebSocket = true;
     } else {
diff --git a/src/common/runtime/helper/options.ts b/src/common/runtime/helper/options.ts
index 4a82c7d2928e..2bea817d44a1 100644
--- a/src/common/runtime/helper/options.ts
+++ b/src/common/runtime/helper/options.ts
@@ -53,6 +53,7 @@ export interface CTSOptions {
   debug: boolean;
   compatibility: boolean;
   forceFallbackAdapter: boolean;
+  enforceDefaultLimits: boolean;
   unrollConstEvalLoops: boolean;
   powerPreference: GPUPowerPreference | null;
   logToWebSocket: boolean;
@@ -63,6 +64,7 @@ export const kDefaultCTSOptions: CTSOptions = {
   debug: true,
   compatibility: false,
   forceFallbackAdapter: false,
+  enforceDefaultLimits: false,
   unrollConstEvalLoops: false,
   powerPreference: null,
   logToWebSocket: false,
@@ -100,6 +102,10 @@ export const kCTSOptionsInfo: OptionsInfos<CTSOptions> = {
   debug: { description: 'show more info' },
   compatibility: { description: 'run in compatibility mode' },
   forceFallbackAdapter: { description: 'pass forceFallbackAdapter: true to requestAdapter' },
+  enforceDefaultLimits: {
+    description: `force the adapter limits to the default limits.
+Note: May fail on tests for low-power/high-performance`,
+  },
   unrollConstEvalLoops: { description: 'unroll const eval loops in WGSL' },
   powerPreference: {
     description: 'set default powerPreference for some tests',
diff --git a/src/common/runtime/helper/utils_worker.ts b/src/common/runtime/helper/utils_worker.ts
index 5886839f6c33..7054be317c13 100644
--- a/src/common/runtime/helper/utils_worker.ts
+++ b/src/common/runtime/helper/utils_worker.ts
@@ -19,6 +19,7 @@ export function setupWorkerEnvironment(ctsOptions: CTSOptions): Logger {
   globalTestConfig.enableDebugLogs = ctsOptions.debug;
   globalTestConfig.unrollConstEvalLoops = ctsOptions.unrollConstEvalLoops;
   globalTestConfig.compatibility = compatibility;
+  globalTestConfig.enforceDefaultLimits = ctsOptions.enforceDefaultLimits;
   globalTestConfig.logToWebSocket = ctsOptions.logToWebSocket;
 
   const log = new Logger();
diff --git a/src/common/runtime/server.ts b/src/common/runtime/server.ts
index d908ce89ba7a..bb68b0413353 100644
--- a/src/common/runtime/server.ts
+++ b/src/common/runtime/server.ts
@@ -96,6 +96,8 @@ for (let i = 0; i < sys.args.length; ++i) {
       emitCoverage = true;
     } else if (a === '--force-fallback-adapter') {
       globalTestConfig.forceFallbackAdapter = true;
+    } else if (a === '--enforce-default-limits') {
+      globalTestConfig.enforceDefaultLimits = true;
     } else if (a === '--log-to-websocket') {
       globalTestConfig.logToWebSocket = true;
     } else if (a === '--gpu-provider') {
diff --git a/src/common/runtime/standalone.ts b/src/common/runtime/standalone.ts
index a079ac28dd98..d5b51b11c6f5 100644
--- a/src/common/runtime/standalone.ts
+++ b/src/common/runtime/standalone.ts
@@ -52,6 +52,7 @@ const { runnow, powerPreference, compatibility, forceFallbackAdapter } = options
 globalTestConfig.enableDebugLogs = options.debug;
 globalTestConfig.unrollConstEvalLoops = options.unrollConstEvalLoops;
 globalTestConfig.compatibility = compatibility;
+globalTestConfig.enforceDefaultLimits = options.enforceDefaultLimits;
 globalTestConfig.logToWebSocket = options.logToWebSocket;
 
 const logger = new Logger();
diff --git a/src/common/util/navigator_gpu.ts b/src/common/util/navigator_gpu.ts
index 4e58797097ed..728b8cdadf54 100644
--- a/src/common/util/navigator_gpu.ts
+++ b/src/common/util/navigator_gpu.ts
@@ -1,4 +1,6 @@
+// eslint-disable-next-line import/no-restricted-paths
 import { TestCaseRecorder } from '../framework/fixture.js';
+import { globalTestConfig } from '../framework/test_config.js';
 
 import { ErrorWithExtra, assert, objectEquals } from './util.js';
 
@@ -31,6 +33,7 @@ export function setGPUProvider(provider: GPUProvider) {
 }
 
 let impl: GPU | undefined = undefined;
+let s_defaultLimits: Record<string, GPUSize64> | undefined = undefined;
 
 let defaultRequestAdapterOptions: GPURequestAdapterOptions | undefined;
 
@@ -49,6 +52,14 @@ export function getDefaultRequestAdapterOptions() {
   return defaultRequestAdapterOptions;
 }
 
+function copyLimits(objLike: GPUSupportedLimits) {
+  const obj: Record<string, number> = {};
+  for (const key in objLike) {
+    obj[key] = (objLike as unknown as Record<string, number>)[key];
+  }
+  return obj;
+}
+
 /**
  * Finds and returns the `navigator.gpu` object (or equivalent, for non-browser implementations).
  * Throws an exception if not found.
@@ -60,6 +71,79 @@ export function getGPU(recorder: TestCaseRecorder | null): GPU {
 
   impl = gpuProvider();
 
+  if (globalTestConfig.enforceDefaultLimits) {
+    // eslint-disable-next-line @typescript-eslint/unbound-method
+    const origRequestAdapterFn = impl.requestAdapter;
+    // eslint-disable-next-line @typescript-eslint/unbound-method
+    const origRequestDeviceFn = GPUAdapter.prototype.requestDevice;
+
+    impl.requestAdapter = async function (options?: GPURequestAdapterOptions) {
+      if (!s_defaultLimits) {
+        const tempAdapter = await origRequestAdapterFn.call(this, {
+          ...defaultRequestAdapterOptions,
+          ...options,
+        });
+        // eslint-disable-next-line no-restricted-syntax
+        const tempDevice = await tempAdapter?.requestDevice();
+        s_defaultLimits = copyLimits(tempDevice!.limits);
+        tempDevice?.destroy();
+      }
+      const adapter = await origRequestAdapterFn.call(this, {
+        ...defaultRequestAdapterOptions,
+        ...options,
+      });
+      if (adapter) {
+        const limits = Object.fromEntries(
+          Object.entries(s_defaultLimits).map(([key, v]) => [key, v])
+        );
+
+        Object.defineProperty(adapter, 'limits', {
+          get() {
+            return limits;
+          },
+        });
+      }
+      return adapter;
+    };
+
+    const enforceDefaultLimits = (adapter: GPUAdapter, desc: GPUDeviceDescriptor | undefined) => {
+      if (desc?.requiredLimits) {
+        for (const [key, value] of Object.entries(desc.requiredLimits)) {
+          const limit = s_defaultLimits![key];
+          if (limit !== undefined && value !== undefined) {
+            const [beyondLimit, condition] = key.startsWith('max')
+              ? [value > limit, 'greater']
+              : [value < limit, 'less'];
+            if (beyondLimit) {
+              throw new DOMException(
+                `requestedLimit ${value} for ${key} is ${condition} than adapter limit ${limit}`,
+                'OperationError'
+              );
+            }
+          }
+        }
+      }
+    };
+
+    GPUAdapter.prototype.requestDevice = async function (
+      this: GPUAdapter,
+      desc?: GPUDeviceDescriptor | undefined
+    ) {
+      // We need to enforce the default limits because even though we patched the adapter to
+      // show defaults for adapter.limits, there are tests that test we throw when we request more than the max.
+      // In other words.
+      //
+      //   adapter.requestDevice({ requiredLimits: {
+      //     maxXXX: addapter.limits.maxXXX + 1,  // should throw
+      //   });
+      //
+      // But unless we enforce this manually, it won't actual through if the adapter's
+      // true limits are higher than we patched above.
+      enforceDefaultLimits(this, desc);
+      return await origRequestDeviceFn.call(this, desc);
+    };
+  }
+
   if (defaultRequestAdapterOptions) {
     // eslint-disable-next-line @typescript-eslint/unbound-method
     const oldFn = impl.requestAdapter;
diff --git a/src/resources/cache/hashes.json b/src/resources/cache/hashes.json
index 16ed2d6f855f..7f214ed4d33b 100644
--- a/src/resources/cache/hashes.json
+++ b/src/resources/cache/hashes.json
@@ -1,112 +1,112 @@
 {
-  "webgpu/shader/execution/binary/af_addition.bin": "e5d3d0ec",
-  "webgpu/shader/execution/binary/af_logical.bin": "4cadb0c4",
-  "webgpu/shader/execution/binary/af_division.bin": "24d5c047",
-  "webgpu/shader/execution/binary/af_matrix_addition.bin": "27b5045f",
-  "webgpu/shader/execution/binary/af_matrix_subtraction.bin": "e51ed65c",
-  "webgpu/shader/execution/binary/af_multiplication.bin": "ccdd9db3",
-  "webgpu/shader/execution/binary/af_remainder.bin": "7ba7561",
-  "webgpu/shader/execution/binary/af_subtraction.bin": "77ae32fd",
-  "webgpu/shader/execution/binary/f16_addition.bin": "7cf4d65a",
-  "webgpu/shader/execution/binary/f16_logical.bin": "b9e93570",
-  "webgpu/shader/execution/binary/f16_division.bin": "9ec5ad6a",
-  "webgpu/shader/execution/binary/f16_matrix_addition.bin": "e89ed3cb",
-  "webgpu/shader/execution/binary/f16_matrix_matrix_multiplication.bin": "375d5af3",
-  "webgpu/shader/execution/binary/f16_matrix_scalar_multiplication.bin": "9bd3af4e",
-  "webgpu/shader/execution/binary/f16_matrix_subtraction.bin": "43350bb3",
-  "webgpu/shader/execution/binary/f16_matrix_vector_multiplication.bin": "1edb6630",
-  "webgpu/shader/execution/binary/f16_multiplication.bin": "7485e77b",
-  "webgpu/shader/execution/binary/f16_remainder.bin": "ec817ba",
-  "webgpu/shader/execution/binary/f16_subtraction.bin": "aae1c57a",
-  "webgpu/shader/execution/binary/f32_addition.bin": "7134e85",
-  "webgpu/shader/execution/binary/f32_logical.bin": "24ba1a54",
-  "webgpu/shader/execution/binary/f32_division.bin": "f51e95fe",
-  "webgpu/shader/execution/binary/f32_matrix_addition.bin": "97f4a153",
-  "webgpu/shader/execution/binary/f32_matrix_matrix_multiplication.bin": "70677010",
-  "webgpu/shader/execution/binary/f32_matrix_scalar_multiplication.bin": "355c0954",
-  "webgpu/shader/execution/binary/f32_matrix_subtraction.bin": "85135fad",
-  "webgpu/shader/execution/binary/f32_matrix_vector_multiplication.bin": "a3fa2750",
-  "webgpu/shader/execution/binary/f32_multiplication.bin": "ba41fc13",
-  "webgpu/shader/execution/binary/f32_remainder.bin": "142765dd",
-  "webgpu/shader/execution/binary/f32_subtraction.bin": "c1523f1c",
-  "webgpu/shader/execution/binary/i32_arithmetic.bin": "bcff0350",
-  "webgpu/shader/execution/binary/i32_comparison.bin": "396f7bad",
-  "webgpu/shader/execution/binary/u32_arithmetic.bin": "7357b8e9",
-  "webgpu/shader/execution/binary/u32_comparison.bin": "374b246a",
-  "webgpu/shader/execution/abs.bin": "1ab55590",
-  "webgpu/shader/execution/acos.bin": "d85ca70",
-  "webgpu/shader/execution/acosh.bin": "e28fd395",
-  "webgpu/shader/execution/asin.bin": "eced623b",
-  "webgpu/shader/execution/asinh.bin": "223989ea",
-  "webgpu/shader/execution/atan.bin": "d896ec78",
-  "webgpu/shader/execution/atan2.bin": "b394e737",
-  "webgpu/shader/execution/atanh.bin": "9e4a5a",
-  "webgpu/shader/execution/bitcast.bin": "51bb8e9f",
-  "webgpu/shader/execution/ceil.bin": "11d3b9aa",
-  "webgpu/shader/execution/clamp.bin": "b2860448",
-  "webgpu/shader/execution/cos.bin": "c965712f",
-  "webgpu/shader/execution/cosh.bin": "89f1ce32",
-  "webgpu/shader/execution/cross.bin": "8b3df30b",
-  "webgpu/shader/execution/degrees.bin": "182a521e",
-  "webgpu/shader/execution/determinant.bin": "d5b3ed92",
-  "webgpu/shader/execution/distance.bin": "d21d853b",
-  "webgpu/shader/execution/dot.bin": "c91e716",
-  "webgpu/shader/execution/exp.bin": "4240da7",
-  "webgpu/shader/execution/exp2.bin": "11938883",
-  "webgpu/shader/execution/faceForward.bin": "a2e27acc",
-  "webgpu/shader/execution/floor.bin": "c06553ca",
-  "webgpu/shader/execution/fma.bin": "7dea01cf",
-  "webgpu/shader/execution/fract.bin": "677b1105",
-  "webgpu/shader/execution/frexp.bin": "3dbf112c",
-  "webgpu/shader/execution/inverseSqrt.bin": "8f6bb04e",
-  "webgpu/shader/execution/ldexp.bin": "287a7307",
-  "webgpu/shader/execution/length.bin": "908ff679",
-  "webgpu/shader/execution/log.bin": "8585f4ac",
-  "webgpu/shader/execution/log2.bin": "6847e7ca",
-  "webgpu/shader/execution/max.bin": "777cfa16",
-  "webgpu/shader/execution/min.bin": "51ae81f5",
-  "webgpu/shader/execution/mix.bin": "56c2fa65",
-  "webgpu/shader/execution/modf.bin": "4e106c58",
-  "webgpu/shader/execution/normalize.bin": "32c44235",
-  "webgpu/shader/execution/pack2x16float.bin": "133f1db3",
-  "webgpu/shader/execution/pow.bin": "370df2fa",
-  "webgpu/shader/execution/quantizeToF16.bin": "8a4fa4a8",
-  "webgpu/shader/execution/radians.bin": "c4684d86",
-  "webgpu/shader/execution/reflect.bin": "3620e893",
-  "webgpu/shader/execution/refract.bin": "f6bd722f",
-  "webgpu/shader/execution/round.bin": "765d4e71",
-  "webgpu/shader/execution/saturate.bin": "772a5d55",
-  "webgpu/shader/execution/sign.bin": "b99ea52d",
-  "webgpu/shader/execution/sin.bin": "69c8cce9",
-  "webgpu/shader/execution/sinh.bin": "3bc5f870",
-  "webgpu/shader/execution/smoothstep.bin": "bc6be719",
-  "webgpu/shader/execution/sqrt.bin": "c3960a52",
-  "webgpu/shader/execution/step.bin": "8a715292",
-  "webgpu/shader/execution/tan.bin": "5c531104",
-  "webgpu/shader/execution/tanh.bin": "ff9cadf1",
-  "webgpu/shader/execution/transpose.bin": "f9cebed4",
-  "webgpu/shader/execution/trunc.bin": "1c0edd1b",
-  "webgpu/shader/execution/unpack2x16float.bin": "1d4051b9",
-  "webgpu/shader/execution/unpack2x16snorm.bin": "f9a16118",
-  "webgpu/shader/execution/unpack2x16unorm.bin": "cad45023",
-  "webgpu/shader/execution/unpack4x8snorm.bin": "26f9cbc4",
-  "webgpu/shader/execution/unpack4x8unorm.bin": "76de2d56",
-  "webgpu/shader/execution/unary/af_arithmetic.bin": "7044a2b4",
-  "webgpu/shader/execution/unary/af_assignment.bin": "fca7094b",
-  "webgpu/shader/execution/unary/bool_conversion.bin": "ac26e6b8",
-  "webgpu/shader/execution/unary/f16_arithmetic.bin": "913af76",
-  "webgpu/shader/execution/unary/f16_conversion.bin": "1fb4a7a4",
-  "webgpu/shader/execution/unary/f32_arithmetic.bin": "7c274ba7",
-  "webgpu/shader/execution/unary/f32_conversion.bin": "1175ae48",
-  "webgpu/shader/execution/unary/i32_arithmetic.bin": "7bf685a3",
-  "webgpu/shader/execution/unary/i32_conversion.bin": "60437023",
-  "webgpu/shader/execution/unary/u32_conversion.bin": "3bc30fc0",
-  "webgpu/shader/execution/unary/ai_assignment.bin": "66d85afa",
-  "webgpu/shader/execution/binary/ai_arithmetic.bin": "3c6c91e3",
-  "webgpu/shader/execution/unary/ai_arithmetic.bin": "37cc249a",
-  "webgpu/shader/execution/binary/af_matrix_matrix_multiplication.bin": "c22028c6",
-  "webgpu/shader/execution/binary/af_matrix_scalar_multiplication.bin": "8fa6602",
-  "webgpu/shader/execution/binary/af_matrix_vector_multiplication.bin": "f1bc0050",
-  "webgpu/shader/execution/derivatives.bin": "d96a3465",
-  "webgpu/shader/execution/fwidth.bin": "c63db6a9"
+  "webgpu/shader/execution/binary/af_addition.bin": "a076fefd",
+  "webgpu/shader/execution/binary/af_logical.bin": "5ef95b51",
+  "webgpu/shader/execution/binary/af_division.bin": "2ee1f517",
+  "webgpu/shader/execution/binary/af_matrix_addition.bin": "46d1c536",
+  "webgpu/shader/execution/binary/af_matrix_subtraction.bin": "1ba9140b",
+  "webgpu/shader/execution/binary/af_multiplication.bin": "f55ec87f",
+  "webgpu/shader/execution/binary/af_remainder.bin": "39607546",
+  "webgpu/shader/execution/binary/af_subtraction.bin": "a1e1671b",
+  "webgpu/shader/execution/binary/f16_addition.bin": "98ec8cbb",
+  "webgpu/shader/execution/binary/f16_logical.bin": "e1b101aa",
+  "webgpu/shader/execution/binary/f16_division.bin": "f24f41e6",
+  "webgpu/shader/execution/binary/f16_matrix_addition.bin": "a6c126ba",
+  "webgpu/shader/execution/binary/f16_matrix_matrix_multiplication.bin": "4810437f",
+  "webgpu/shader/execution/binary/f16_matrix_scalar_multiplication.bin": "33bd4e0b",
+  "webgpu/shader/execution/binary/f16_matrix_subtraction.bin": "2a42a145",
+  "webgpu/shader/execution/binary/f16_matrix_vector_multiplication.bin": "17eeecc2",
+  "webgpu/shader/execution/binary/f16_multiplication.bin": "5ee924d2",
+  "webgpu/shader/execution/binary/f16_remainder.bin": "a371e824",
+  "webgpu/shader/execution/binary/f16_subtraction.bin": "c5e6d455",
+  "webgpu/shader/execution/binary/f32_addition.bin": "371675d2",
+  "webgpu/shader/execution/binary/f32_logical.bin": "6c691798",
+  "webgpu/shader/execution/binary/f32_division.bin": "11ed1f8d",
+  "webgpu/shader/execution/binary/f32_matrix_addition.bin": "662a8c2a",
+  "webgpu/shader/execution/binary/f32_matrix_matrix_multiplication.bin": "3bef3e82",
+  "webgpu/shader/execution/binary/f32_matrix_scalar_multiplication.bin": "4b0d7b28",
+  "webgpu/shader/execution/binary/f32_matrix_subtraction.bin": "c1b78a5f",
+  "webgpu/shader/execution/binary/f32_matrix_vector_multiplication.bin": "3ae5663c",
+  "webgpu/shader/execution/binary/f32_multiplication.bin": "7c887b3c",
+  "webgpu/shader/execution/binary/f32_remainder.bin": "955b27f7",
+  "webgpu/shader/execution/binary/f32_subtraction.bin": "10a5d990",
+  "webgpu/shader/execution/binary/i32_arithmetic.bin": "d8d24c51",
+  "webgpu/shader/execution/binary/i32_comparison.bin": "97a65e83",
+  "webgpu/shader/execution/binary/u32_arithmetic.bin": "76af97a5",
+  "webgpu/shader/execution/binary/u32_comparison.bin": "107ae7dd",
+  "webgpu/shader/execution/abs.bin": "8702bbde",
+  "webgpu/shader/execution/acos.bin": "505d4e5c",
+  "webgpu/shader/execution/acosh.bin": "6d849181",
+  "webgpu/shader/execution/asin.bin": "3739abaa",
+  "webgpu/shader/execution/asinh.bin": "5f912ea9",
+  "webgpu/shader/execution/atan.bin": "d15dc231",
+  "webgpu/shader/execution/atan2.bin": "60eb6015",
+  "webgpu/shader/execution/atanh.bin": "f8b2fb79",
+  "webgpu/shader/execution/bitcast.bin": "af41ce05",
+  "webgpu/shader/execution/ceil.bin": "8da53d8b",
+  "webgpu/shader/execution/clamp.bin": "a02c15b",
+  "webgpu/shader/execution/cos.bin": "4f444ab6",
+  "webgpu/shader/execution/cosh.bin": "aaa40c4b",
+  "webgpu/shader/execution/cross.bin": "534a949c",
+  "webgpu/shader/execution/degrees.bin": "2d55f678",
+  "webgpu/shader/execution/determinant.bin": "5b49ee94",
+  "webgpu/shader/execution/distance.bin": "d6963680",
+  "webgpu/shader/execution/dot.bin": "e03347d6",
+  "webgpu/shader/execution/exp.bin": "a5affb43",
+  "webgpu/shader/execution/exp2.bin": "21d376a0",
+  "webgpu/shader/execution/faceForward.bin": "e0e2ad0e",
+  "webgpu/shader/execution/floor.bin": "3658073",
+  "webgpu/shader/execution/fma.bin": "a7cc5707",
+  "webgpu/shader/execution/fract.bin": "44a3775d",
+  "webgpu/shader/execution/frexp.bin": "241abedb",
+  "webgpu/shader/execution/inverseSqrt.bin": "766974b5",
+  "webgpu/shader/execution/ldexp.bin": "db0c0fcf",
+  "webgpu/shader/execution/length.bin": "c1240c03",
+  "webgpu/shader/execution/log.bin": "98aceda7",
+  "webgpu/shader/execution/log2.bin": "ffdc85d7",
+  "webgpu/shader/execution/max.bin": "a2c6c4b1",
+  "webgpu/shader/execution/min.bin": "344390ef",
+  "webgpu/shader/execution/mix.bin": "367c1ff3",
+  "webgpu/shader/execution/modf.bin": "7be6faa3",
+  "webgpu/shader/execution/normalize.bin": "b2b9eb0c",
+  "webgpu/shader/execution/pack2x16float.bin": "a66da753",
+  "webgpu/shader/execution/pow.bin": "a9858b9",
+  "webgpu/shader/execution/quantizeToF16.bin": "bf80d34e",
+  "webgpu/shader/execution/radians.bin": "cc7b8d0c",
+  "webgpu/shader/execution/reflect.bin": "cb0be6ee",
+  "webgpu/shader/execution/refract.bin": "501ac731",
+  "webgpu/shader/execution/round.bin": "b4ea1e61",
+  "webgpu/shader/execution/saturate.bin": "2783de66",
+  "webgpu/shader/execution/sign.bin": "30ad6ecf",
+  "webgpu/shader/execution/sin.bin": "9f8b5d9e",
+  "webgpu/shader/execution/sinh.bin": "d988cc09",
+  "webgpu/shader/execution/smoothstep.bin": "2e89af8e",
+  "webgpu/shader/execution/sqrt.bin": "55dd81cf",
+  "webgpu/shader/execution/step.bin": "f1bced79",
+  "webgpu/shader/execution/tan.bin": "a8354079",
+  "webgpu/shader/execution/tanh.bin": "fd1c38ee",
+  "webgpu/shader/execution/transpose.bin": "e8bdca54",
+  "webgpu/shader/execution/trunc.bin": "ffedffa",
+  "webgpu/shader/execution/unpack2x16float.bin": "9251ad61",
+  "webgpu/shader/execution/unpack2x16snorm.bin": "6133b78b",
+  "webgpu/shader/execution/unpack2x16unorm.bin": "291b47bd",
+  "webgpu/shader/execution/unpack4x8snorm.bin": "93230ee1",
+  "webgpu/shader/execution/unpack4x8unorm.bin": "99fd9a23",
+  "webgpu/shader/execution/unary/af_arithmetic.bin": "dc1de35b",
+  "webgpu/shader/execution/unary/af_assignment.bin": "6a907068",
+  "webgpu/shader/execution/unary/bool_conversion.bin": "4fb09ad6",
+  "webgpu/shader/execution/unary/f16_arithmetic.bin": "5443808d",
+  "webgpu/shader/execution/unary/f16_conversion.bin": "e6f6743",
+  "webgpu/shader/execution/unary/f32_arithmetic.bin": "980abd9d",
+  "webgpu/shader/execution/unary/f32_conversion.bin": "c666a6e8",
+  "webgpu/shader/execution/unary/i32_arithmetic.bin": "4c1bf2ef",
+  "webgpu/shader/execution/unary/i32_conversion.bin": "9d2e1411",
+  "webgpu/shader/execution/unary/u32_conversion.bin": "962b68ac",
+  "webgpu/shader/execution/unary/ai_assignment.bin": "d34f3811",
+  "webgpu/shader/execution/binary/ai_arithmetic.bin": "b4811a5c",
+  "webgpu/shader/execution/unary/ai_arithmetic.bin": "d203a070",
+  "webgpu/shader/execution/binary/af_matrix_matrix_multiplication.bin": "1405c422",
+  "webgpu/shader/execution/binary/af_matrix_scalar_multiplication.bin": "c24e7f75",
+  "webgpu/shader/execution/binary/af_matrix_vector_multiplication.bin": "e36fcfd",
+  "webgpu/shader/execution/derivatives.bin": "e8c5ea73",
+  "webgpu/shader/execution/fwidth.bin": "cb050a6f"
 }
\ No newline at end of file

From 40867c3c83526e442350dfca7bbb8ee332a3431e Mon Sep 17 00:00:00 2001
From: Greggman <github@greggman.com>
Date: Wed, 8 Jan 2025 04:41:11 +0900
Subject: [PATCH 2/7] Compat: refactor storage_texture tests (#4099)

1. Skip them if there are no storage textures

2. Refactor them so they don't need storage buffers to test.

Also did some minor JavaScript styling. I hope that's okay.
While the CTS doesn't have a style guide, JS is generally
methods, properties, and variable names are camelCase and classes are
CamelCase.
---
 .../storage_texture/read_only.spec.ts         | 276 +++++++++---------
 .../storage_texture/read_write.spec.ts        |  34 ++-
 2 files changed, 157 insertions(+), 153 deletions(-)

diff --git a/src/webgpu/api/operation/storage_texture/read_only.spec.ts b/src/webgpu/api/operation/storage_texture/read_only.spec.ts
index b5224eec2b9d..769036751258 100644
--- a/src/webgpu/api/operation/storage_texture/read_only.spec.ts
+++ b/src/webgpu/api/operation/storage_texture/read_only.spec.ts
@@ -15,7 +15,7 @@ import {
   kColorTextureFormats,
   kTextureFormatInfo,
 } from '../../../format_info.js';
-import { GPUTest } from '../../../gpu_test.js';
+import { GPUTest, MaxLimitsTestMixin } from '../../../gpu_test.js';
 import { kValidShaderStages, TValidShaderStage } from '../../../util/shader.js';
 
 function ComponentCount(format: ColorTextureFormat): number {
@@ -47,7 +47,7 @@ function ComponentCount(format: ColorTextureFormat): number {
 }
 
 class F extends GPUTest {
-  InitTextureAndGetExpectedOutputBufferData(
+  initTextureAndGetExpectedOutputBufferData(
     storageTexture: GPUTexture,
     format: ColorTextureFormat
   ): ArrayBuffer {
@@ -59,10 +59,10 @@ class F extends GPUTest {
     const depthOrArrayLayers = storageTexture.depthOrArrayLayers;
 
     const texelData = new ArrayBuffer(bytesPerBlock * width * height * depthOrArrayLayers);
-    const texelTypedDataView = this.GetTypedArrayBufferViewForTexelData(texelData, format);
+    const texelTypedDataView = this.getTypedArrayBufferViewForTexelData(texelData, format);
     const componentCount = ComponentCount(format);
     const outputBufferData = new ArrayBuffer(4 * 4 * width * height * depthOrArrayLayers);
-    const outputBufferTypedData = this.GetTypedArrayBufferForOutputBufferData(
+    const outputBufferTypedData = this.getTypedArrayBufferForOutputBufferData(
       outputBufferData,
       format
     );
@@ -174,7 +174,7 @@ class F extends GPUTest {
     return outputBufferData;
   }
 
-  GetTypedArrayBufferForOutputBufferData(arrayBuffer: ArrayBuffer, format: ColorTextureFormat) {
+  getTypedArrayBufferForOutputBufferData(arrayBuffer: ArrayBuffer, format: ColorTextureFormat) {
     switch (kTextureFormatInfo[format].color.type) {
       case 'uint':
         return new Uint32Array(arrayBuffer);
@@ -186,7 +186,7 @@ class F extends GPUTest {
     }
   }
 
-  GetTypedArrayBufferViewForTexelData(arrayBuffer: ArrayBuffer, format: ColorTextureFormat) {
+  getTypedArrayBufferViewForTexelData(arrayBuffer: ArrayBuffer, format: ColorTextureFormat) {
     switch (format) {
       case 'r32uint':
       case 'rg32uint':
@@ -219,7 +219,7 @@ class F extends GPUTest {
     }
   }
 
-  GetOutputBufferWGSLType(format: ColorTextureFormat) {
+  getOutputBufferWGSLType(format: ColorTextureFormat) {
     switch (kTextureFormatInfo[format].color.type) {
       case 'uint':
         return 'vec4u';
@@ -234,7 +234,7 @@ class F extends GPUTest {
     }
   }
 
-  DoTransform(
+  doTransform(
     storageTexture: GPUTexture,
     shaderStage: TValidShaderStage,
     format: ColorTextureFormat,
@@ -256,23 +256,22 @@ class F extends GPUTest {
     const textureDeclaration = `
     @group(0) @binding(0) var readOnlyTexture: ${declaration}<${format}, read>;
     `;
-    const bindingResourceDeclaration = `
-    ${textureDeclaration}
-    @group(0) @binding(1)
-    var<storage,read_write> outputBuffer : array<${this.GetOutputBufferWGSLType(format)}>;
-    `;
 
     const bindGroupEntries = [
       {
         binding: 0,
         resource: storageTexture.createView(),
       },
-      {
-        binding: 1,
-        resource: {
-          buffer: outputBuffer,
-        },
-      },
+      ...(shaderStage === 'compute'
+        ? [
+            {
+              binding: 1,
+              resource: {
+                buffer: outputBuffer,
+              },
+            },
+          ]
+        : []),
     ];
 
     const commandEncoder = this.device.createCommandEncoder();
@@ -296,7 +295,10 @@ class F extends GPUTest {
         }
 
         const computeShader = `
-      ${bindingResourceDeclaration}
+      ${textureDeclaration}
+      @group(0) @binding(1)
+      var<storage,read_write> outputBuffer : array<${this.getOutputBufferWGSLType(format)}>;
+
       @compute
       @workgroup_size(
         ${storageTexture.width}, ${storageTexture.height}, ${storageTexture.depthOrArrayLayers})
@@ -334,61 +336,42 @@ class F extends GPUTest {
             break;
           case '2d':
             textureLoadCoord =
-              storageTexture.depthOrArrayLayers > 1 ? 'textureCoord, z' : 'textureCoord';
+              storageTexture.depthOrArrayLayers > 1 ? 'textureCoord, coordZ' : 'textureCoord';
             break;
           case '3d':
-            textureLoadCoord = 'vec3u(textureCoord, z)';
+            textureLoadCoord = 'vec3u(textureCoord, coordZ)';
             break;
         }
 
-        const fragmentShader = `
-        ${bindingResourceDeclaration}
+        const shader = `
+        ${textureDeclaration}
         @fragment
-        fn main(@builtin(position) fragCoord: vec4f) -> @location(0) vec4f {
-          let textureCoord = vec2u(fragCoord.xy);
-          let storageTextureTexelCountPerImage = ${storageTexture.width * storageTexture.height}u;
-          for (var z = 0u; z < ${storageTexture.depthOrArrayLayers}; z++) {
-            let initialValue = textureLoad(readOnlyTexture, ${textureLoadCoord});
-            let outputIndex =
-              storageTextureTexelCountPerImage * z + textureCoord.y * ${storageTexture.width} +
-              textureCoord.x;
-            outputBuffer[outputIndex] = initialValue;
-          }
-          return vec4f(0.0, 1.0, 0.0, 1.0);
-        }`;
-        const vertexShader = `
-            @vertex
-            fn main(@builtin(vertex_index) vertexIndex : u32) -> @builtin(position) vec4f {
-                var pos = array(
-                  vec2f(-1.0, -1.0),
-                  vec2f(-1.0,  1.0),
-                  vec2f( 1.0, -1.0),
-                  vec2f(-1.0,  1.0),
-                  vec2f( 1.0, -1.0),
-                  vec2f( 1.0,  1.0));
-                return vec4f(pos[vertexIndex], 0.0, 1.0);
-            }
-          `;
+        fn fs(@builtin(position) fragCoord: vec4f) -> @location(0) vec4u {
+          let coordX = u32(fragCoord.x);
+          let coordY = u32(fragCoord.y) % ${storageTexture.height}u;
+          let coordZ = u32(fragCoord.y) / ${storageTexture.height}u;
+          let textureCoord = vec2u(coordX, coordY);
+          return bitcast<vec4u>(textureLoad(readOnlyTexture, ${textureLoadCoord}));
+        }
+
+        @vertex
+        fn vs(@builtin(vertex_index) vertexIndex : u32) -> @builtin(position) vec4f {
+            var pos = array(
+              vec2f(-1.0,  3.0),
+              vec2f( 3.0, -1.0),
+              vec2f(-1.0, -1.0));
+            return vec4f(pos[vertexIndex], 0.0, 1.0);
+        }
+        `;
+
+        const module = this.device.createShaderModule({
+          code: shader,
+        });
         const renderPipeline = this.device.createRenderPipeline({
           layout: 'auto',
-          vertex: {
-            module: this.device.createShaderModule({
-              code: vertexShader,
-            }),
-          },
-          fragment: {
-            module: this.device.createShaderModule({
-              code: fragmentShader,
-            }),
-            targets: [
-              {
-                format: 'rgba8unorm',
-              },
-            ],
-          },
-          primitive: {
-            topology: 'triangle-list',
-          },
+          vertex: { module },
+          fragment: { module, targets: [{ format: 'rgba32uint' }] },
+          primitive: { topology: 'triangle-list' },
         });
 
         const bindGroup = this.device.createBindGroup({
@@ -396,10 +379,14 @@ class F extends GPUTest {
           entries: bindGroupEntries,
         });
 
+        // This is just so our buffer compare is the same as the compute stage.
+        // Otherwise, we'd have to pad every row to a multiple of 256 bytes and
+        // change the comparison code to take that into account.
+        assert(storageTexture.width === 16, `width must be 16 because we require 256 bytesPerRow`);
         const placeholderColorTexture = this.createTextureTracked({
-          size: [storageTexture.width, storageTexture.height, 1],
-          usage: GPUTextureUsage.RENDER_ATTACHMENT,
-          format: 'rgba8unorm',
+          size: [storageTexture.width, storageTexture.height * storageTexture.depthOrArrayLayers],
+          usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+          format: 'rgba32uint',
         });
 
         const renderPassEncoder = commandEncoder.beginRenderPass({
@@ -407,54 +394,50 @@ class F extends GPUTest {
             {
               view: placeholderColorTexture.createView(),
               loadOp: 'clear',
-              clearValue: { r: 0, g: 0, b: 0, a: 0 },
               storeOp: 'store',
             },
           ],
         });
         renderPassEncoder.setPipeline(renderPipeline);
         renderPassEncoder.setBindGroup(0, bindGroup);
-        renderPassEncoder.draw(6);
+        renderPassEncoder.draw(3);
         renderPassEncoder.end();
+
+        commandEncoder.copyTextureToBuffer(
+          { texture: placeholderColorTexture },
+          {
+            buffer: outputBuffer,
+            bytesPerRow: 256,
+          },
+          placeholderColorTexture
+        );
         break;
       }
       case 'vertex': {
-        // For each texel location (coordX, coordY), draw one point at (coordX + 0.5, coordY + 0.5)
-        // in the storageTexture.width * storageTexture.height grid, and save all the texel values
-        // at (coordX, coordY, z) (z >= 0 && z < storageTexture.depthOrArrayLayers) into the
-        // corresponding vertex shader outputs.
-        let vertexOutputs = '';
-        for (let layer = 0; layer < storageTexture.depthOrArrayLayers; ++layer) {
-          vertexOutputs = vertexOutputs.concat(
-            `
-            @location(${layer + 1}) @interpolate(flat, either)
-            vertex_out${layer}: ${this.GetOutputBufferWGSLType(format)},`
-          );
-        }
-
+        // We draw storageTexture.Width by (storageTexture.height * storageTexture.depthOrArrayLayers)
+        // points via 'point-list' to a placeholderColorTexture of the same size.
+        //
+        // We use the @builtin(vertex_index) to compute a coord in the source texture
+        // and use that same coord to compute a place to render in the point in the placeholder.
         let loadFromTextureWGSL = '';
         switch (storageTexture.dimension) {
           case '1d':
             loadFromTextureWGSL = `
-              output.vertex_out0 = textureLoad(readOnlyTexture, coordX);`;
+              output.vertex_out = textureLoad(readOnlyTexture, coordX);`;
             break;
           case '2d':
             if (storageTexture.depthOrArrayLayers === 1) {
               loadFromTextureWGSL = `
-                output.vertex_out0 = textureLoad(readOnlyTexture, vec2u(coordX, coordY));`;
+                output.vertex_out = textureLoad(readOnlyTexture, vec2u(coordX, coordY));`;
             } else {
-              for (let z = 0; z < storageTexture.depthOrArrayLayers; ++z) {
-                loadFromTextureWGSL = loadFromTextureWGSL.concat(`
-                  output.vertex_out${z} =
-                    textureLoad(readOnlyTexture, vec2u(coordX, coordY), ${z});`);
-              }
+              loadFromTextureWGSL = loadFromTextureWGSL.concat(`
+                output.vertex_out =
+                  textureLoad(readOnlyTexture, vec2u(coordX, coordY), coordZ);`);
             }
             break;
           case '3d':
-            for (let z = 0; z < storageTexture.depthOrArrayLayers; ++z) {
-              loadFromTextureWGSL = loadFromTextureWGSL.concat(`
-                output.vertex_out${z} = textureLoad(readOnlyTexture, vec3u(coordX, coordY, ${z}));`);
-            }
+            loadFromTextureWGSL = loadFromTextureWGSL.concat(`
+              output.vertex_out = textureLoad(readOnlyTexture, vec3u(coordX, coordY, coordZ));`);
             break;
         }
 
@@ -470,57 +453,39 @@ class F extends GPUTest {
         }
 
         const shader = `
-        ${bindingResourceDeclaration}
+        ${textureDeclaration}
         struct VertexOutput {
           @builtin(position) my_pos: vec4f,
-          @location(0) @interpolate(flat, either) tex_coord: vec2u,
-          ${vertexOutputs}
+          @location(0) @interpolate(flat, either)
+            vertex_out: ${this.getOutputBufferWGSLType(format)},
         }
         @vertex
         fn vs_main(@builtin(vertex_index) vertexIndex : u32) -> VertexOutput {
             var output : VertexOutput;
             let coordX = vertexIndex % ${storageTexture.width}u;
-            let coordY = vertexIndex / ${storageTexture.width}u;
-            // Each vertex in the mesh take an even step along X axis from -1.0 to 1.0.
-            let posXStep = f32(${2.0 / storageTexture.width});
-            // As well as along Y axis.
-            let posYStep = f32(${2.0 / storageTexture.height});
-            // And the vertex located in the middle of the step, i.e. with a bias of 0.5 step.
-            let outputPosX = -1.0 + posXStep * 0.5 + posXStep * f32(coordX);
-            let outputPosY = -1.0 + posYStep * 0.5 + posYStep * f32(coordY);
-            output.my_pos = vec4f(outputPosX, outputPosY, 0.0, 1.0);
-            output.tex_coord = vec2u(coordX, coordY);
+            let coordY = vertexIndex / ${storageTexture.width}u % ${storageTexture.height}u;
+            let coordZ = vertexIndex / ${storageTexture.width * storageTexture.height}u;
+            let writePos = vec2f(f32(coordX), f32(coordY + coordZ * ${storageTexture.height}));
+            let destSize = vec2f(
+              ${storageTexture.width},
+              ${storageTexture.height * storageTexture.depthOrArrayLayers});
+            output.my_pos = vec4f((((writePos + 0.5) / destSize) * 2.0 - 1.0) * vec2f(1, -1), 0.0, 1.0);
             ${loadFromTextureWGSL}
             return output;
         }
         @fragment
-        fn fs_main(fragmentInput : VertexOutput) -> @location(0) vec4f {
-          let storageTextureTexelCountPerImage = ${storageTexture.width * storageTexture.height}u;
-          ${outputToBufferWGSL}
-          return vec4f(0.0, 1.0, 0.0, 1.0);
+        fn fs_main(fragmentInput : VertexOutput) -> @location(0) vec4u {
+          let v = fragmentInput.vertex_out;
+          return bitcast<vec4u>(v);
         }
         `;
 
+        const module = this.device.createShaderModule({ code: shader });
         const renderPipeline = this.device.createRenderPipeline({
           layout: 'auto',
-          vertex: {
-            module: this.device.createShaderModule({
-              code: shader,
-            }),
-          },
-          fragment: {
-            module: this.device.createShaderModule({
-              code: shader,
-            }),
-            targets: [
-              {
-                format: 'rgba8unorm',
-              },
-            ],
-          },
-          primitive: {
-            topology: 'point-list',
-          },
+          vertex: { module },
+          fragment: { module, targets: [{ format: 'rgba32uint' }] },
+          primitive: { topology: 'point-list' },
         });
 
         const bindGroup = this.device.createBindGroup({
@@ -529,9 +494,9 @@ class F extends GPUTest {
         });
 
         const placeholderColorTexture = this.createTextureTracked({
-          size: [storageTexture.width, storageTexture.height, 1],
-          usage: GPUTextureUsage.RENDER_ATTACHMENT,
-          format: 'rgba8unorm',
+          size: [storageTexture.width, storageTexture.height * storageTexture.depthOrArrayLayers],
+          usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+          format: 'rgba32uint',
         });
 
         const renderPassEncoder = commandEncoder.beginRenderPass({
@@ -546,8 +511,19 @@ class F extends GPUTest {
         });
         renderPassEncoder.setPipeline(renderPipeline);
         renderPassEncoder.setBindGroup(0, bindGroup);
-        renderPassEncoder.draw(storageTexture.width * storageTexture.height);
+        const texelCount =
+          storageTexture.width * storageTexture.height * storageTexture.depthOrArrayLayers;
+        renderPassEncoder.draw(texelCount);
         renderPassEncoder.end();
+
+        commandEncoder.copyTextureToBuffer(
+          { texture: placeholderColorTexture },
+          {
+            buffer: outputBuffer,
+            bytesPerRow: 256,
+          },
+          placeholderColorTexture
+        );
         break;
       }
     }
@@ -556,7 +532,7 @@ class F extends GPUTest {
   }
 }
 
-export const g = makeTestGroup(F);
+export const g = makeTestGroup(MaxLimitsTestMixin(F));
 
 g.test('basic')
   .desc(
@@ -586,7 +562,23 @@ g.test('basic')
   .fn(t => {
     const { format, shaderStage, dimension, depthOrArrayLayers } = t.params;
 
-    const kWidth = 8;
+    if (t.isCompatibility) {
+      if (shaderStage === 'fragment') {
+        t.skipIf(
+          !(t.device.limits.maxStorageTexturesInFragmentStage! > 0),
+          `maxStorageTexturesInFragmentStage(${t.device.limits
+            .maxStorageTexturesInFragmentStage!}) is not > 0`
+        );
+      } else if (shaderStage === 'vertex') {
+        t.skipIf(
+          !(t.device.limits.maxStorageTexturesInVertexStage! > 0),
+          `maxStorageTexturesInVertexStage(${t.device.limits
+            .maxStorageTexturesInVertexStage!}) is not > 0`
+        );
+      }
+    }
+
+    const kWidth = 16;
     const height = dimension === '1d' ? 1 : 8;
     const storageTexture = t.createTextureTracked({
       format,
@@ -595,14 +587,16 @@ g.test('basic')
       usage: GPUTextureUsage.COPY_SRC | GPUTextureUsage.COPY_DST | GPUTextureUsage.STORAGE_BINDING,
     });
 
-    const expectedData = t.InitTextureAndGetExpectedOutputBufferData(storageTexture, format);
+    const expectedData = t.initTextureAndGetExpectedOutputBufferData(storageTexture, format);
 
+    const bytesPerRow = 4 * 4 * kWidth;
+    assert(bytesPerRow === 256, 'bytesPerRow === 256');
     const outputBuffer = t.createBufferTracked({
-      size: 4 * 4 * kWidth * height * depthOrArrayLayers,
-      usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.STORAGE,
+      size: bytesPerRow * height * depthOrArrayLayers,
+      usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
     });
 
-    t.DoTransform(storageTexture, shaderStage, format, outputBuffer);
+    t.doTransform(storageTexture, shaderStage, format, outputBuffer);
 
     switch (kTextureFormatInfo[format].color.type) {
       case 'uint':
diff --git a/src/webgpu/api/operation/storage_texture/read_write.spec.ts b/src/webgpu/api/operation/storage_texture/read_write.spec.ts
index 03f613284238..ad70d68352c4 100644
--- a/src/webgpu/api/operation/storage_texture/read_write.spec.ts
+++ b/src/webgpu/api/operation/storage_texture/read_write.spec.ts
@@ -9,7 +9,7 @@ import { makeTestGroup } from '../../../../common/framework/test_group.js';
 import { assert, unreachable } from '../../../../common/util/util.js';
 import { kTextureDimensions } from '../../../capability_info.js';
 import { kColorTextureFormats, kTextureFormatInfo } from '../../../format_info.js';
-import { GPUTest } from '../../../gpu_test.js';
+import { GPUTest, MaxLimitsTestMixin } from '../../../gpu_test.js';
 import { align } from '../../../util/math.js';
 
 const kShaderStagesForReadWriteStorageTexture = ['fragment', 'compute'] as const;
@@ -17,7 +17,7 @@ type ShaderStageForReadWriteStorageTexture =
   (typeof kShaderStagesForReadWriteStorageTexture)[number];
 
 class F extends GPUTest {
-  GetInitialData(storageTexture: GPUTexture): ArrayBuffer {
+  getInitialData(storageTexture: GPUTexture): ArrayBuffer {
     const format = storageTexture.format;
     const bytesPerBlock = kTextureFormatInfo[format].bytesPerBlock;
     assert(bytesPerBlock !== undefined);
@@ -26,7 +26,7 @@ class F extends GPUTest {
     const height = storageTexture.height;
     const depthOrArrayLayers = storageTexture.depthOrArrayLayers;
     const initialData = new ArrayBuffer(bytesPerBlock * width * height * depthOrArrayLayers);
-    const initialTypedData = this.GetTypedArrayBuffer(initialData, format);
+    const initialTypedData = this.getTypedArrayBuffer(initialData, format);
     for (let z = 0; z < depthOrArrayLayers; ++z) {
       for (let y = 0; y < height; ++y) {
         for (let x = 0; x < width; ++x) {
@@ -48,7 +48,7 @@ class F extends GPUTest {
     return initialData;
   }
 
-  GetTypedArrayBuffer(arrayBuffer: ArrayBuffer, format: GPUTextureFormat) {
+  getTypedArrayBuffer(arrayBuffer: ArrayBuffer, format: GPUTextureFormat) {
     switch (format) {
       case 'r32sint':
         return new Int32Array(arrayBuffer);
@@ -62,7 +62,7 @@ class F extends GPUTest {
     }
   }
 
-  GetExpectedData(
+  getExpectedData(
     shaderStage: ShaderStageForReadWriteStorageTexture,
     storageTexture: GPUTexture,
     initialData: ArrayBuffer
@@ -80,8 +80,8 @@ class F extends GPUTest {
     const expectedData = new ArrayBuffer(
       bytesPerRowAlignment * (height * depthOrArrayLayers - 1) + bytesPerBlock * width
     );
-    const expectedTypedData = this.GetTypedArrayBuffer(expectedData, format);
-    const initialTypedData = this.GetTypedArrayBuffer(initialData, format);
+    const expectedTypedData = this.getTypedArrayBuffer(expectedData, format);
+    const initialTypedData = this.getTypedArrayBuffer(initialData, format);
     for (let z = 0; z < depthOrArrayLayers; ++z) {
       for (let y = 0; y < height; ++y) {
         for (let x = 0; x < width; ++x) {
@@ -110,7 +110,7 @@ class F extends GPUTest {
     return expectedData;
   }
 
-  RecordCommandsToTransform(
+  recordCommandsToTransform(
     device: GPUDevice,
     shaderStage: ShaderStageForReadWriteStorageTexture,
     commandEncoder: GPUCommandEncoder,
@@ -298,7 +298,7 @@ class F extends GPUTest {
   }
 }
 
-export const g = makeTestGroup(F);
+export const g = makeTestGroup(MaxLimitsTestMixin(F));
 
 g.test('basic')
   .desc(
@@ -321,6 +321,16 @@ g.test('basic')
   .fn(t => {
     const { format, shaderStage, textureDimension, depthOrArrayLayers } = t.params;
 
+    if (t.isCompatibility) {
+      if (shaderStage === 'fragment') {
+        t.skipIf(
+          !(t.device.limits.maxStorageTexturesInFragmentStage! > 0),
+          `maxStorageTexturesInFragmentStage(${t.device.limits
+            .maxStorageTexturesInFragmentStage!}) is not > 0`
+        );
+      }
+    }
+
     // In compatibility mode the lowest maxComputeInvocationsPerWorkgroup is 128 vs non-compat which is 256
     // So in non-compat we get 16 * 8 * 2, vs compat where we get 8 * 8 * 2
     const kWidth = t.isCompatibility ? 8 : 16;
@@ -334,7 +344,7 @@ g.test('basic')
     });
 
     const bytesPerBlock = kTextureFormatInfo[format].bytesPerBlock;
-    const initialData = t.GetInitialData(storageTexture);
+    const initialData = t.getInitialData(storageTexture);
     t.queue.writeTexture(
       { texture: storageTexture },
       initialData,
@@ -347,9 +357,9 @@ g.test('basic')
 
     const commandEncoder = t.device.createCommandEncoder();
 
-    t.RecordCommandsToTransform(t.device, shaderStage, commandEncoder, storageTexture);
+    t.recordCommandsToTransform(t.device, shaderStage, commandEncoder, storageTexture);
 
-    const expectedData = t.GetExpectedData(shaderStage, storageTexture, initialData);
+    const expectedData = t.getExpectedData(shaderStage, storageTexture, initialData);
     const readbackBuffer = t.createBufferTracked({
       size: expectedData.byteLength,
       usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,

From 2a7c7f97218183cc23231f587c9177db08c60611 Mon Sep 17 00:00:00 2001
From: Kai Ninomiya <kainino@chromium.org>
Date: Wed, 8 Jan 2025 15:57:48 -0800
Subject: [PATCH 3/7] Update code that uses adapter.isCompatibilityMode (#4131)

* Update tests using .isCompatibilityMode to understand .featureLevel

And update all comments about removing `compatibilityMode: boolean`.

* address comments
---
 src/common/runtime/cmdline.ts                 |  2 +-
 src/common/runtime/helper/utils_worker.ts     |  2 +-
 src/common/runtime/server.ts                  |  2 +-
 src/common/runtime/standalone.ts              |  2 +-
 .../operation/adapter/requestDevice.spec.ts   | 35 +++++++++++++------
 src/webgpu/capability_info.ts                 | 15 +++++---
 6 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/src/common/runtime/cmdline.ts b/src/common/runtime/cmdline.ts
index 635c30eb1410..c071cd54aa88 100644
--- a/src/common/runtime/cmdline.ts
+++ b/src/common/runtime/cmdline.ts
@@ -123,7 +123,7 @@ for (let i = 0; i < sys.args.length; ++i) {
 let codeCoverage: CodeCoverageProvider | undefined = undefined;
 
 if (globalTestConfig.compatibility || globalTestConfig.forceFallbackAdapter) {
-  // MAINTENANCE_TODO: remove the cast once compatibilityMode is officially added
+  // MAINTENANCE_TODO: remove compatibilityMode (and the typecast) once no longer needed.
   setDefaultRequestAdapterOptions({
     compatibilityMode: globalTestConfig.compatibility,
     featureLevel: globalTestConfig.compatibility ? 'compatibility' : 'core',
diff --git a/src/common/runtime/helper/utils_worker.ts b/src/common/runtime/helper/utils_worker.ts
index 7054be317c13..4210db20f08e 100644
--- a/src/common/runtime/helper/utils_worker.ts
+++ b/src/common/runtime/helper/utils_worker.ts
@@ -27,7 +27,7 @@ export function setupWorkerEnvironment(ctsOptions: CTSOptions): Logger {
   if (powerPreference || compatibility) {
     setDefaultRequestAdapterOptions({
       ...(powerPreference && { powerPreference }),
-      // MAINTENANCE_TODO: Change this to whatever the option ends up being
+      // MAINTENANCE_TODO: remove compatibilityMode once no longer needed.
       ...(compatibility && { compatibilityMode: true, featureLevel: 'compatibility' }),
     });
   }
diff --git a/src/common/runtime/server.ts b/src/common/runtime/server.ts
index bb68b0413353..783d7244d4f6 100644
--- a/src/common/runtime/server.ts
+++ b/src/common/runtime/server.ts
@@ -122,7 +122,7 @@ for (let i = 0; i < sys.args.length; ++i) {
 let codeCoverage: CodeCoverageProvider | undefined = undefined;
 
 if (globalTestConfig.compatibility || globalTestConfig.forceFallbackAdapter) {
-  // MAINTENANCE_TODO: remove the cast once compatibilityMode is officially added
+  // MAINTENANCE_TODO: remove compatibilityMode (and the typecast) once no longer needed.
   setDefaultRequestAdapterOptions({
     compatibilityMode: globalTestConfig.compatibility,
     featureLevel: globalTestConfig.compatibility ? 'compatibility' : 'core',
diff --git a/src/common/runtime/standalone.ts b/src/common/runtime/standalone.ts
index d5b51b11c6f5..3f7333c1eb35 100644
--- a/src/common/runtime/standalone.ts
+++ b/src/common/runtime/standalone.ts
@@ -85,7 +85,7 @@ stopButtonElem.addEventListener('click', () => {
 if (powerPreference || compatibility || forceFallbackAdapter) {
   setDefaultRequestAdapterOptions({
     ...(powerPreference && { powerPreference }),
-    // MAINTENANCE_TODO: Change this to whatever the option ends up being
+    // MAINTENANCE_TODO: remove compatibilityMode once no longer needed.
     ...(compatibility && { compatibilityMode: true, featureLevel: 'compatibility' }),
     ...(forceFallbackAdapter && { forceFallbackAdapter: true }),
   });
diff --git a/src/webgpu/api/operation/adapter/requestDevice.spec.ts b/src/webgpu/api/operation/adapter/requestDevice.spec.ts
index 42701660386c..421fc991f402 100644
--- a/src/webgpu/api/operation/adapter/requestDevice.spec.ts
+++ b/src/webgpu/api/operation/adapter/requestDevice.spec.ts
@@ -491,7 +491,7 @@ g.test('always_returns_device')
 
     Note: This is a regression test for a Chrome bug crbug.com/349062459
     Checking that a requestDevice always return a device is checked in other tests above
-    but those tests have 'compatibilityMode: true' set for them by the API that getGPU
+    but those tests have 'featureLevel: "compatibility"' set for them by the API that getGPU
     returns when the test suite is run in compatibility mode.
 
     This test tries to force both compat and core separately so both code paths are
@@ -502,19 +502,34 @@ g.test('always_returns_device')
   .fn(async t => {
     const { compatibilityMode } = t.params;
     const gpu = getGPU(t.rec);
-    // MAINTENANCE_TODO: Remove this cast compatibilityMode is added.
-    const adapter = await gpu.requestAdapter({ compatibilityMode } as GPURequestAdapterOptions);
+    // MAINTENANCE_TODO: Remove compatibilityMode and the cast once compatibilityMode is no longer
+    // used (mainly in `setDefaultRequestAdapterOptions`).
+    const adapter = await gpu.requestAdapter({
+      compatibilityMode,
+      featureLevel: compatibilityMode ? 'compatibility' : 'core',
+    } as GPURequestAdapterOptions);
     if (adapter) {
+      const device = await t.requestDeviceTracked(adapter);
+      assert(device instanceof GPUDevice, 'requestDevice must return a device or throw');
+
       if (!compatibilityMode) {
-        // This check is to make sure something lower-level is not forcing compatibility mode
-        // MAINTENANCE_TODO: Remove this cast compatibilityMode is added.
+        // This check is to make sure something lower-level is not forcing compatibility mode.
+
+        // MAINTENANCE_TODO: Simplify this check (and typecast) once we standardize how to do this.
+        const adapterExtensions = adapter as unknown as {
+          isCompatibilityMode?: boolean;
+          featureLevel?: string;
+        };
         t.expect(
-          !(adapter as unknown as { isCompatibilityMode?: boolean }).isCompatibilityMode,
-          'must not be compatibility mode'
+          // Old version of Compat design.
+          !adapterExtensions.isCompatibilityMode &&
+            // Current version of Compat design, as of this writing.
+            adapterExtensions.featureLevel !== 'compatibility' &&
+            // An as-yet-unlanded proposed change to the Compat design, but for now it doesn't hurt
+            // to just check. Unlanded PR: https://github.com/gpuweb/gpuweb/pull/5036
+            !device.features.has('webgpu-core'),
+          'must not get a Compatibility adapter if not requested'
         );
       }
-      const device = await t.requestDeviceTracked(adapter);
-      t.expect(device instanceof GPUDevice, 'requestDevice must return a device or throw');
-      device.destroy();
     }
   });
diff --git a/src/webgpu/capability_info.ts b/src/webgpu/capability_info.ts
index 661812cd4beb..4da99bad66e8 100644
--- a/src/webgpu/capability_info.ts
+++ b/src/webgpu/capability_info.ts
@@ -801,12 +801,17 @@ export function getDefaultLimits(featureLevel: FeatureLevel) {
 }
 
 export function getDefaultLimitsForAdapter(adapter: GPUAdapter) {
-  // MAINTENANCE_TODO: Remove casts when GPUAdapter IDL has isCompatibilityMode.
-  return getDefaultLimits(
-    (adapter as unknown as { isCompatibilityMode: boolean }).isCompatibilityMode
+  // MAINTENANCE_TODO: Remove casts once we have a standardized way to do this
+  // (see https://github.com/gpuweb/gpuweb/pull/5037#issuecomment-2576110161).
+  const adapterExtensions = adapter as unknown as {
+    isCompatibilityMode?: boolean;
+    featureLevel?: string;
+  };
+  const featureLevel =
+    adapterExtensions.featureLevel === 'compatibility' || adapterExtensions.isCompatibilityMode
       ? 'compatibility'
-      : 'core'
-  );
+      : 'core';
+  return getDefaultLimits(featureLevel);
 }
 
 const kEachStage = [

From 077ffee360f156d7638bce8b3b762c196f4a0793 Mon Sep 17 00:00:00 2001
From: Greggman <github@greggman.com>
Date: Thu, 9 Jan 2025 11:24:13 +0900
Subject: [PATCH 4/7] Compat: Refactor fwidth/Fine/Coarse for 0 storage
 buffers. (#4128)

Modified so this test doesn't use storage buffers by having
it return values from a fragment shader as rgba32uint
---
 .../expression/call/builtin/derivatives.ts    |  11 +-
 .../expression/call/builtin/fwidth.ts         | 283 ++++++++++--------
 2 files changed, 161 insertions(+), 133 deletions(-)

diff --git a/src/webgpu/shader/execution/expression/call/builtin/derivatives.ts b/src/webgpu/shader/execution/expression/call/builtin/derivatives.ts
index b6c7d54669d3..40f0adf279f0 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/derivatives.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/derivatives.ts
@@ -33,12 +33,11 @@ export function runDerivativeTest(
   // We will populate a uniform buffer with these input values laid out sequentially:
   // [ case_0_input_1, case_0_input_0, case_1_input_1, case_1_input_0, ...]
   //
-  // The render pipeline will be launched once per pixel per pair of cases over
-  // a viewport size of (2, 2) with the viewport set to cover 1 pixel.
-  // Each 2x2 set of calls will will exercise two test cases. Each of these
-  // draw calls will use a different instance index, which is forwarded to the
-  // fragment shader. Each invocation returns the result which is stored in
-  // a rgba32uint texture.
+  // The render pipeline will be launched once per pair of cases over a viewport
+  // size of (2, 2). Each 2x2 set of calls will will exercise two test cases.
+  // Each of these draw calls will use a different instance index, which is
+  // forwarded to the fragment shader. Each invocation returns the result which
+  // is stored in a rgba32uint texture.
   //
   // Consider draw calls that test 4 cases (c_0, c_1, c_2, c_3).
   //
diff --git a/src/webgpu/shader/execution/expression/call/builtin/fwidth.ts b/src/webgpu/shader/execution/expression/call/builtin/fwidth.ts
index d87f6b06c2da..795df55ce97c 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/fwidth.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/fwidth.ts
@@ -1,7 +1,9 @@
+import { assert } from '../../../../../../common/util/util.js';
 import { GPUTest } from '../../../../../gpu_test.js';
 import { anyOf } from '../../../../../util/compare.js';
 import { Type, Value } from '../../../../../util/conversion.js';
 import { FPInterval } from '../../../../../util/floating_point.js';
+import { align } from '../../../../../util/math.js';
 import { Case } from '../../case.js';
 import { toComparator } from '../../expectation.js';
 
@@ -22,14 +24,11 @@ export function runFWidthTest(
 ) {
   ////////////////////////////////////////////////////////////////
   // The four input values for a given case are distributed to across the invocations in a quad.
-  // We will populate a storage buffer with these input values laid out sequentially:
+  // We will populate a uniform buffer with these input values laid out sequentially:
   // [ case0_input0, case0_input1, case0_input2, case0_input3, ...]
   //
-  // The render pipeline will be launched several times over a viewport size of (2, 2). Each draw
-  // call will execute a single quad (four fragment invocation), which will exercise one test case.
-  // Each of these draw calls will use a different instance index, which is forwarded to the
-  // fragment shader. Each invocation will determine its index into the storage buffer using its
-  // fragment position and the instance index for that draw call.
+  // The render pipeline a 512x2 texture. In the fragment shader, every 2x2 texels is one test case.
+  // The results are the output from the fragment shader.
   //
   // Consider two draw calls that test 2 cases (c0, c1).
   //
@@ -46,46 +45,56 @@ export function runFWidthTest(
   }
 
   // Determine the WGSL type to use in the shader, and the stride in bytes between values.
-  let valueStride = 4;
-  let wgslType = 'f32';
+  const valueStride = 16;
+  let conversionFromInput = 'input.x';
+  let conversionToOutput = `vec4f(v, 0, 0, 0)`;
   if (vectorize) {
-    wgslType = `vec${vectorize}f`;
-    valueStride = vectorize * 4;
-    if (vectorize === 3) {
-      valueStride = 16;
+    switch (vectorize) {
+      case 2:
+        conversionFromInput = 'input.xy';
+        conversionToOutput = 'vec4f(v, 0, 0)';
+        break;
+      case 3:
+        conversionFromInput = 'input.xyz';
+        conversionToOutput = 'vec4f(v, 0)';
+        break;
+      case 4:
+        conversionFromInput = 'input';
+        conversionToOutput = 'v';
+        break;
     }
   }
 
+  const kUniformBufferSize = 16384; // min supported by compat mode.
+  const kNumCasesPerUniformBuffer = kUniformBufferSize / 64;
+
   // Define a vertex shader that draws a triangle over the full viewport, and a fragment shader that
   // calls the fwidth builtin with a value loaded from that fragment's index into the storage
   // buffer (determined using the quad index and fragment position, as described above).
   const code = `
-struct CaseInfo {
-  @builtin(position) position: vec4f,
-  @location(0) @interpolate(flat, either) quad_idx: u32,
-}
-
 @vertex
-fn vert(@builtin(vertex_index) vertex_idx: u32,
-        @builtin(instance_index) instance_idx: u32) -> CaseInfo {
+fn vert(@builtin(vertex_index) vertex_idx: u32) -> @builtin(position) vec4f {
   const kVertices = array(
-    vec2f(-2, -2),
-    vec2f( 2, -2),
-    vec2f( 0,  2),
+    vec2f( 3, -1),
+    vec2f(-1,  3),
+    vec2f(-1, -1),
   );
-  return CaseInfo(vec4(kVertices[vertex_idx], 0, 1), instance_idx);
+  return vec4(kVertices[vertex_idx], 0, 1);
 }
 
-@group(0) @binding(0) var<storage, read> inputs : array<${wgslType}>;
-@group(0) @binding(1) var<storage, read_write> outputs : array<${wgslType}>;
+@group(0) @binding(0) var<uniform> inputs : array<vec4f, ${kNumCasesPerUniformBuffer * 4}>;
 
 @fragment
-fn frag(info : CaseInfo) {
-  let inv_idx = u32(info.position.x) + u32(info.position.y)*2;
-  let index = info.quad_idx*4 + inv_idx;
+fn frag(@builtin(position) position: vec4f) -> @location(0) vec4u {
+  let t = vec2u(position.xy);
+  let inv_idx = t.x % 2 + (t.y % 2) * 2;
+  let q = t / 2;
+  let quad_idx = q.y * 256 + q.x;
+  let index = quad_idx * 4 + inv_idx;
   let input = inputs[index];
   ${non_uniform_discard ? 'if inv_idx == 0 { discard; }' : ''}
-  outputs[index] = ${builtin}(input);
+  let v = ${builtin}(${conversionFromInput});
+  return bitcast<vec4u>(${conversionToOutput});
 }
 `;
 
@@ -94,116 +103,136 @@ fn frag(info : CaseInfo) {
   const pipeline = t.device.createRenderPipeline({
     layout: 'auto',
     vertex: { module },
-    fragment: { module, targets: [{ format: 'rgba8unorm', writeMask: 0 }] },
-  });
-
-  // Create storage buffers to hold the inputs and outputs.
-  const bufferSize = cases.length * 4 * valueStride;
-  const inputBuffer = t.createBufferTracked({
-    size: bufferSize,
-    usage: GPUBufferUsage.STORAGE,
-    mappedAtCreation: true,
-  });
-  const outputBuffer = t.createBufferTracked({
-    size: bufferSize,
-    usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
+    fragment: { module, targets: [{ format: 'rgba32uint' }] },
   });
 
-  // Populate the input storage buffer with case input values.
-  const valuesData = new Uint8Array(inputBuffer.getMappedRange());
-  for (let i = 0; i < cases.length / vectorWidth; i++) {
-    for (let v = 0; v < vectorWidth; v++) {
-      const index = i * vectorWidth + v;
-      if (index >= cases.length) {
-        break;
-      }
-      const inputs = cases[index].input as ReadonlyArray<Value>;
-      for (let x = 0; x < 4; x++) {
-        inputs[x].copyTo(valuesData, (i * 4 + x) * valueStride + v * 4);
-      }
-    }
-  }
-  inputBuffer.unmap();
-
-  // Create a bind group for the storage buffers.
-  const group = t.device.createBindGroup({
-    entries: [
-      { binding: 0, resource: { buffer: inputBuffer } },
-      { binding: 1, resource: { buffer: outputBuffer } },
-    ],
-    layout: pipeline.getBindGroupLayout(0),
-  });
-
-  // Create a texture to use as a color attachment.
-  // We only need this for launching the desired number of fragment invocations.
+  // Create a texture to use as a color attachment to receive the results;
+  const width = kNumCasesPerUniformBuffer * 2;
+  const height = 2;
+  // note: We could limit it to this size and increase height but kNumCasesPerUniformBuffer is limited to 256
+  // because we can't fit more into a single uniform buffer in compat.
+  assert(width < t.device.limits.maxTextureDimension2D);
   const colorAttachment = t.createTextureTracked({
-    size: { width: 2, height: 2 },
-    format: 'rgba8unorm',
-    usage: GPUTextureUsage.RENDER_ATTACHMENT,
+    size: [width, height],
+    format: 'rgba32uint',
+    usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
   });
+  const bytesPerRow = align(width * 16, 256);
 
-  // Submit the render pass to the device.
+  const results = [];
   const encoder = t.device.createCommandEncoder();
-  const pass = encoder.beginRenderPass({
-    colorAttachments: [
-      {
-        view: colorAttachment.createView(),
-        loadOp: 'clear',
-        storeOp: 'discard',
-      },
-    ],
-  });
-  pass.setPipeline(pipeline);
-  pass.setBindGroup(0, group);
-  for (let quad = 0; quad < cases.length / vectorWidth; quad++) {
-    pass.draw(3, 1, undefined, quad);
+  for (let c = 0; c < cases.length; c += kNumCasesPerUniformBuffer) {
+    // Create uniform buffer to hold the inputs.
+    const inputBuffer = t.createBufferTracked({
+      size: kUniformBufferSize,
+      usage: GPUBufferUsage.UNIFORM,
+      mappedAtCreation: true,
+    });
+    const valuesData = new Uint8Array(inputBuffer.getMappedRange());
+
+    // Populate the input uniform buffer with case input values.
+    for (let i = 0; i < kNumCasesPerUniformBuffer / vectorWidth; i++) {
+      for (let v = 0; v < vectorWidth; v++) {
+        const index = c + i * vectorWidth + v;
+        if (index >= cases.length) {
+          break;
+        }
+        const inputs = cases[index].input as ReadonlyArray<Value>;
+        for (let x = 0; x < 4; x++) {
+          inputs[x].copyTo(valuesData, (i * 4 + x) * valueStride + v * 4);
+        }
+      }
+    }
+    inputBuffer.unmap();
+
+    // Create a bind group for the input buffer.
+    const group = t.device.createBindGroup({
+      entries: [{ binding: 0, resource: { buffer: inputBuffer } }],
+      layout: pipeline.getBindGroupLayout(0),
+    });
+
+    // Submit the render pass to the device.
+    const pass = encoder.beginRenderPass({
+      colorAttachments: [
+        {
+          view: colorAttachment.createView(),
+          loadOp: 'clear',
+          storeOp: 'store',
+        },
+      ],
+    });
+    pass.setPipeline(pipeline);
+    pass.setBindGroup(0, group);
+    pass.draw(3);
+    pass.end();
+
+    // Create buffer to hold the outputs.
+    const outputBuffer = t.createBufferTracked({
+      size: bytesPerRow * height,
+      usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
+    });
+    results.push(outputBuffer);
+
+    // Copy the texture to the output buffer
+    encoder.copyTextureToBuffer(
+      { texture: colorAttachment },
+      { buffer: outputBuffer, bytesPerRow },
+      [colorAttachment.width, colorAttachment.height]
+    );
   }
-  pass.end();
   t.queue.submit([encoder.finish()]);
 
-  // Check the outputs match the expected results.
-  t.expectGPUBufferValuesPassCheck(
-    outputBuffer,
-    (outputData: Uint8Array) => {
-      for (let i = 0; i < cases.length / vectorWidth; i++) {
-        for (let v = 0; v < vectorWidth; v++) {
-          const index = i * vectorWidth + v;
-          if (index >= cases.length) {
-            break;
-          }
-          const c = cases[index];
-
-          for (let x = 0; x < 4; x++) {
-            if (non_uniform_discard && x === 0) {
-              continue;
+  results.forEach((outputBuffer, groupNdx) => {
+    // Check the outputs match the expected results.
+    t.expectGPUBufferValuesPassCheck(
+      outputBuffer,
+      (outputData: Uint8Array) => {
+        const base = groupNdx * kNumCasesPerUniformBuffer;
+        const numCases = Math.min(kNumCasesPerUniformBuffer, cases.length - base);
+        const numQuads = numCases / vectorWidth;
+        for (let i = 0; i < numQuads; i++) {
+          for (let v = 0; v < vectorWidth; v++) {
+            const caseNdx = base + i * vectorWidth + v;
+            if (caseNdx >= cases.length) {
+              break;
             }
-
-            const index = (i * 4 + x) * valueStride + v * 4;
-            const result = Type.f32.read(outputData, index);
-
-            let expected = c.expected;
-            if (builtin.endsWith('Fine')) {
-              expected = toComparator((expected as FPInterval[])[x]);
-            } else {
-              expected = anyOf(...(expected as FPInterval[]));
-            }
-
-            const cmp = expected.compare(result);
-            if (!cmp.matched) {
-              return new Error(`
-    inputs: (${(c.input as Value[]).join(', ')})
-  expected: ${cmp.expected}
-
-  returned: ${result}`);
+            const c = cases[caseNdx];
+
+            for (let x = 0; x < 4; x++) {
+              if (non_uniform_discard && x === 0) {
+                continue;
+              }
+
+              const tx = x % 2;
+              const ty = (x / 2) | 0;
+              const index = ty * bytesPerRow + i * 32 + tx * 16 + v * 4;
+              const result = Type.f32.read(outputData, index);
+
+              let expected = c.expected;
+              if (builtin.endsWith('Fine')) {
+                expected = toComparator((expected as FPInterval[])[x]);
+              } else {
+                expected = anyOf(...(expected as FPInterval[]));
+              }
+
+              const cmp = expected.compare(result);
+              if (!cmp.matched) {
+                return new Error(`
+     caseNdx: ${caseNdx} v: ${v} x: ${x}
+      inputs: (${(c.input as Value[]).join(', ')})
+    expected: ${cmp.expected}
+
+    returned: ${result}`);
+              }
             }
           }
         }
+        return undefined;
+      },
+      {
+        type: Uint8Array,
+        typedLength: outputBuffer.size,
       }
-      return undefined;
-    },
-    {
-      type: Uint8Array,
-      typedLength: bufferSize,
-    }
-  );
+    );
+  });
 }

From b5532d4e983ce6b3a0baae9352457d14997b88f0 Mon Sep 17 00:00:00 2001
From: Jiawei Shao <jiawei.shao@intel.com>
Date: Thu, 9 Jan 2025 15:53:23 +0800
Subject: [PATCH 5/7] Add operation test on the creation of pipeline layout
 with null bind group layout (#4116)

* Test build checks

* Add tests on render and compute pipeline

* Small fix

* Also set bind group on null bind group layout

* Test empty bind group layouts

* Address reviewers' comments
---
 ...reated_with_null_bind_group_layout.spec.ts | 324 ++++++++++++++++++
 1 file changed, 324 insertions(+)
 create mode 100644 src/webgpu/api/operation/pipeline/pipeline_layout_created_with_null_bind_group_layout.spec.ts

diff --git a/src/webgpu/api/operation/pipeline/pipeline_layout_created_with_null_bind_group_layout.spec.ts b/src/webgpu/api/operation/pipeline/pipeline_layout_created_with_null_bind_group_layout.spec.ts
new file mode 100644
index 000000000000..10416e2b4d09
--- /dev/null
+++ b/src/webgpu/api/operation/pipeline/pipeline_layout_created_with_null_bind_group_layout.spec.ts
@@ -0,0 +1,324 @@
+export const description = `
+Tests for the creation of pipeline layouts with null bind group layouts.
+`;
+
+import { makeTestGroup } from '../../../../common/framework/test_group.js';
+import { GPUConst } from '../../../constants.js';
+import { GPUTest } from '../../../gpu_test.js';
+
+export const g = makeTestGroup(GPUTest);
+
+g.test('pipeline_layout_with_null_bind_group_layout,rendering')
+  .desc(
+    `
+Tests that using a render pipeline created with a pipeline layout that has null bind group layout
+works correctly.
+`
+  )
+  .params(u =>
+    u
+      .combine('emptyBindGroupLayoutType', ['Null', 'Undefined', 'Empty'] as const)
+      .combine('emptyBindGroupLayoutIndex', [0, 1, 2, 3] as const)
+  )
+  .fn(t => {
+    const { emptyBindGroupLayoutType, emptyBindGroupLayoutIndex } = t.params;
+
+    const colors = [
+      [0.2, 0, 0, 0.2],
+      [0, 0.2, 0, 0.2],
+      [0, 0, 0.2, 0.2],
+      [0.4, 0, 0, 0.2],
+    ] as const;
+    const outputColor = [0.0, 0.0, 0.0, 0.0];
+
+    let declarations = '';
+    let statement = 'return vec4(0.0, 0.0, 0.0, 0.0)';
+    const bindGroupLayouts: (GPUBindGroupLayout | null | undefined)[] = [];
+    const bindGroups: GPUBindGroup[] = [];
+    for (let bindGroupIndex = 0; bindGroupIndex < 4; ++bindGroupIndex) {
+      const bindGroupLayout = t.device.createBindGroupLayout({
+        entries: [
+          {
+            binding: 0,
+            visibility: GPUConst.ShaderStage.FRAGMENT,
+            buffer: {
+              type: 'uniform',
+              minBindingSize: 16,
+            },
+          },
+        ],
+      });
+
+      const color = colors[bindGroupIndex];
+      const buffer = t.makeBufferWithContents(new Float32Array(color), GPUBufferUsage.UNIFORM);
+
+      // Still create and set the bind group when the corresponding bind group layout in the
+      // pipeline is null. The output color should not be affected by the buffer in this bind group
+      const bindGroup = t.device.createBindGroup({
+        layout: bindGroupLayout,
+        entries: [
+          {
+            binding: 0,
+            resource: {
+              buffer,
+            },
+          },
+        ],
+      });
+      bindGroups.push(bindGroup);
+
+      // Set `null`, `undefined` or empty bind group layout in `bindGroupLayouts` which is used in
+      // the creation of pipeline layout
+      if (bindGroupIndex === emptyBindGroupLayoutIndex) {
+        switch (emptyBindGroupLayoutType) {
+          case 'Null':
+            bindGroupLayouts.push(null);
+            break;
+          case 'Undefined':
+            bindGroupLayouts.push(undefined);
+            break;
+          case 'Empty':
+            bindGroupLayouts.push(
+              t.device.createBindGroupLayout({
+                entries: [],
+              })
+            );
+            break;
+        }
+        continue;
+      }
+
+      // Set the uniform buffers used in the shader
+      bindGroupLayouts.push(bindGroupLayout);
+      declarations += `@group(${bindGroupIndex}) @binding(0) var<uniform> input${bindGroupIndex} : vec4f;\n`;
+      statement += ` + input${bindGroupIndex}`;
+
+      // Compute the expected output color
+      for (let i = 0; i < color.length; ++i) {
+        outputColor[i] += color[i];
+      }
+    }
+
+    const pipelineLayout = t.device.createPipelineLayout({
+      bindGroupLayouts,
+    });
+
+    const format = 'rgba8unorm';
+    const code = `
+    ${declarations}
+    @vertex
+    fn vert_main() -> @builtin(position) vec4f {
+        return vec4f(0.0, 0.0, 0.0, 1.0);
+    }
+    @fragment
+    fn frag_main() -> @location(0) vec4f {
+        ${statement};
+    }
+    `;
+    const shaderModule = t.device.createShaderModule({
+      code,
+    });
+    const renderPipeline = t.device.createRenderPipeline({
+      layout: pipelineLayout,
+      vertex: {
+        module: shaderModule,
+      },
+      fragment: {
+        module: shaderModule,
+        targets: [
+          {
+            format,
+          },
+        ],
+      },
+      primitive: {
+        topology: 'point-list',
+      },
+    });
+
+    const renderTarget = t.createTextureTracked({
+      usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+      size: [1, 1, 1],
+      format,
+    });
+    const commandEncoder = t.device.createCommandEncoder();
+    const renderPassEncoder = commandEncoder.beginRenderPass({
+      colorAttachments: [
+        {
+          view: renderTarget.createView(),
+          loadOp: 'load',
+          storeOp: 'store',
+        },
+      ],
+    });
+    for (let i = 0; i < 4; ++i) {
+      renderPassEncoder.setBindGroup(i, bindGroups[i]);
+    }
+    renderPassEncoder.setPipeline(renderPipeline);
+    renderPassEncoder.draw(1);
+    renderPassEncoder.end();
+
+    t.queue.submit([commandEncoder.finish()]);
+
+    t.expectSingleColor(renderTarget, format, {
+      size: [1, 1, 1],
+      exp: { R: outputColor[0], G: outputColor[1], B: outputColor[2], A: outputColor[3] },
+    });
+  });
+
+g.test('pipeline_layout_with_null_bind_group_layout,compute')
+  .desc(
+    `
+Tests that using a compute pipeline created with a pipeline layout that has null bind group layout
+works correctly.
+`
+  )
+  .params(u =>
+    u
+      .combine('emptyBindGroupLayoutType', ['Null', 'Undefined', 'Empty'] as const)
+      .combine('emptyBindGroupLayoutIndex', [0, 1, 2, 3] as const)
+  )
+  .fn(t => {
+    const { emptyBindGroupLayoutType, emptyBindGroupLayoutIndex } = t.params;
+
+    let declarations = '';
+    let statement = 'output = 0u ';
+
+    const outputBuffer = t.createBufferTracked({
+      size: 4,
+      usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.STORAGE,
+    });
+    let expectedValue = 0;
+
+    const bindGroupLayouts: (GPUBindGroupLayout | null | undefined)[] = [];
+    const bindGroups: GPUBindGroup[] = [];
+    let outputDeclared = false;
+    for (let bindGroupIndex = 0; bindGroupIndex < 4; ++bindGroupIndex) {
+      const inputBuffer = t.makeBufferWithContents(
+        new Uint32Array([bindGroupIndex + 1]),
+        GPUBufferUsage.UNIFORM
+      );
+
+      const bindGroupLayoutEntries: GPUBindGroupLayoutEntry[] = [];
+      const bindGroupEntries: GPUBindGroupEntry[] = [];
+      bindGroupLayoutEntries.push({
+        binding: 0,
+        visibility: GPUConst.ShaderStage.COMPUTE,
+        buffer: {
+          type: 'uniform',
+          minBindingSize: 4,
+        },
+      });
+      bindGroupEntries.push({
+        binding: 0,
+        resource: {
+          buffer: inputBuffer,
+        },
+      });
+
+      // Set `null`, `undefined` or empty bind group layout in `bindGroupLayouts` which is used in
+      // the creation of pipeline layout
+      if (bindGroupIndex === emptyBindGroupLayoutIndex) {
+        switch (emptyBindGroupLayoutType) {
+          case 'Null':
+            bindGroupLayouts.push(null);
+            break;
+          case 'Undefined':
+            bindGroupLayouts.push(undefined);
+            break;
+          case 'Empty':
+            bindGroupLayouts.push(
+              t.device.createBindGroupLayout({
+                entries: [],
+              })
+            );
+            break;
+        }
+
+        // Still create and set the bind group when the corresponding bind group layout in the
+        // compute pipeline is null. The value in the output buffer should not be affected by the
+        // buffer in this bind group
+        const bindGroup = t.device.createBindGroup({
+          layout: t.device.createBindGroupLayout({
+            entries: bindGroupLayoutEntries,
+          }),
+          entries: bindGroupEntries,
+        });
+        bindGroups.push(bindGroup);
+        continue;
+      }
+
+      declarations += `@group(${bindGroupIndex}) @binding(0) var<uniform> input${bindGroupIndex} : u32;\n`;
+      statement += ` + input${bindGroupIndex}`;
+
+      // Set the output storage buffer
+      if (!outputDeclared) {
+        bindGroupLayoutEntries.push({
+          binding: 1,
+          visibility: GPUConst.ShaderStage.COMPUTE,
+          buffer: {
+            type: 'storage',
+            minBindingSize: 4,
+          },
+        });
+        bindGroupEntries.push({
+          binding: 1,
+          resource: {
+            buffer: outputBuffer,
+          },
+        });
+        declarations += `@group(${bindGroupIndex}) @binding(1) var<storage, read_write> output : u32;\n`;
+        outputDeclared = true;
+      }
+
+      // Set the input uniform buffers
+      const bindGroupLayout = t.device.createBindGroupLayout({
+        entries: bindGroupLayoutEntries,
+      });
+      bindGroupLayouts.push(bindGroupLayout);
+
+      const bindGroup = t.device.createBindGroup({
+        layout: bindGroupLayout,
+        entries: bindGroupEntries,
+      });
+      bindGroups.push(bindGroup);
+
+      // Compute the expected output value in the output storage buffer
+      expectedValue += bindGroupIndex + 1;
+    }
+
+    const pipelineLayout = t.device.createPipelineLayout({
+      bindGroupLayouts,
+    });
+
+    const code = `
+    ${declarations}
+    @compute @workgroup_size(1)
+    fn main() {
+      ${statement};
+    }
+    `;
+    const module = t.device.createShaderModule({
+      code,
+    });
+    const computePipeline = t.device.createComputePipeline({
+      layout: pipelineLayout,
+      compute: {
+        module,
+      },
+    });
+
+    const commandEncoder = t.device.createCommandEncoder();
+    const computePassEncoder = commandEncoder.beginComputePass();
+    for (let i = 0; i < bindGroups.length; ++i) {
+      computePassEncoder.setBindGroup(i, bindGroups[i]);
+    }
+    computePassEncoder.setPipeline(computePipeline);
+    computePassEncoder.dispatchWorkgroups(1);
+    computePassEncoder.end();
+
+    t.queue.submit([commandEncoder.finish()]);
+
+    const expectedValues = new Uint32Array([expectedValue]);
+    t.expectGPUBufferValuesEqual(outputBuffer, expectedValues);
+  });

From cb74c1676f6e0bf798dcc8dea3ccea101ac0a091 Mon Sep 17 00:00:00 2001
From: Greggman <github@greggman.com>
Date: Fri, 10 Jan 2025 03:16:53 +0900
Subject: [PATCH 6/7] Fix limits tests with new extra limits (#4136)

The code was not checking if the limits actually exist.
If they don't then the test would fail. Since these limits
have not been added to the spec, they're still being discussed,
they are not required and therefore the tests should still run
without them.
---
 .../validation/capability_checks/limits/limit_utils.ts | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts b/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts
index 8ab31f04b9d7..3f72b90901e3 100644
--- a/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts
+++ b/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts
@@ -417,10 +417,12 @@ export class LimitTestsImpl extends GPUTestBase {
     if (extraLimits) {
       for (const [extraLimitStr, limitMode] of Object.entries(extraLimits)) {
         const extraLimit = extraLimitStr as GPUSupportedLimit;
-        requiredLimits[extraLimit] =
-          limitMode === 'defaultLimit'
-            ? getDefaultLimitForAdapter(adapter, extraLimit)
-            : (adapter.limits[extraLimit] as number);
+        if (adapter.limits[extraLimit] !== undefined) {
+          requiredLimits[extraLimit] =
+            limitMode === 'defaultLimit'
+              ? getDefaultLimitForAdapter(adapter, extraLimit)
+              : (adapter.limits[extraLimit] as number);
+        }
       }
     }
 

From 3fae862a029c250ec0b0d3e4bd05a1102937022d Mon Sep 17 00:00:00 2001
From: Greggman <github@greggman.com>
Date: Fri, 10 Jan 2025 06:22:38 +0900
Subject: [PATCH 7/7] Compat: refactor state_tracking test for 0 frag buffers.
 (#4112)

This is a first attempt. Feel free to push back and/or
give ideas.

The original tests use 2 read-only-storage buffers and
1 read-write storage buffer. Each has a single i32 in it
and generally they substract the first 2 from the 2nd.

Storage buffers in the fragment stage might not exist on
some compat devices so the question is how to work around
that and still test.

This solution is to add subcases, `storage` and `uniform`.
The `storage` case is unchanged. The compute pass case
will run in compat always. The render pass and render
bundle cases only run in compat if the device supports
storage buffers in the fragment stage.

The uniform cases use 2 uniform buffers and render to
a single pixel r32sint texture. They then copy that
texture to the `out` buffer that the original test was
checking. This path needs no storage buffers in the
fragment shader and so always runs.

This works but it's effectively only checking 2 bindings,
not 3. So, the question is, should I add 3rd buffer and
change the algo to out =  a - b - c etc.... so that we can
shuffle more bindings? Or is this good enough? Or should
I do something completely different.

Also note: the last test 'compatible_pipelines' is unchagned
and so only runs the comput pass unless the device supports
storage buffers in fragment shaders.

I didn't update it yet because for it to work requires
either (a) two render passes to render to 2 different
render targets. Or it needs some viewport settings to
render to 2 different pixels in the same target. Or something...,
all of which seem like the might require some big refactors.
In the `createEncoder` infra in gpu_test.ts or else they'd
just have to do their own thing entirely.

Maybe that change doesn't need to happen in this PR but
ideas are welcome.
---
 .../programmable/programmable_state_test.ts   | 144 +++++++++++---
 .../programmable/state_tracking.spec.ts       | 188 ++++++++++++++----
 src/webgpu/gpu_test.ts                        |   8 +-
 3 files changed, 275 insertions(+), 65 deletions(-)

diff --git a/src/webgpu/api/operation/command_buffer/programmable/programmable_state_test.ts b/src/webgpu/api/operation/command_buffer/programmable/programmable_state_test.ts
index 19cf91419c16..a8222807b7bd 100644
--- a/src/webgpu/api/operation/command_buffer/programmable/programmable_state_test.ts
+++ b/src/webgpu/api/operation/command_buffer/programmable/programmable_state_test.ts
@@ -1,5 +1,5 @@
 import { unreachable } from '../../../../../common/util/util.js';
-import { GPUTest } from '../../../../gpu_test.js';
+import { GPUTest, GPUTestBase } from '../../../../gpu_test.js';
 import { EncoderType } from '../../../../util/command_buffer_maker.js';
 
 interface BindGroupIndices {
@@ -8,38 +8,81 @@ interface BindGroupIndices {
   out: number;
 }
 
+type CreateEncoderType = ReturnType<
+  typeof GPUTestBase.prototype.createEncoder<'compute pass' | 'render pass' | 'render bundle'>
+>['encoder'];
+
 export class ProgrammableStateTest extends GPUTest {
   private commonBindGroupLayouts: Map<string, GPUBindGroupLayout> = new Map();
 
-  getBindGroupLayout(type: GPUBufferBindingType): GPUBindGroupLayout {
-    if (!this.commonBindGroupLayouts.has(type)) {
+  skipIfNeedsStorageBuffersInFragmentStageAndHaveNone(
+    type: GPUBufferBindingType,
+    encoderType: EncoderType
+  ) {
+    if (!this.isCompatibility) {
+      return;
+    }
+
+    const needsStorageBuffersInFragmentStage =
+      type === 'storage' && (encoderType === 'render bundle' || encoderType === 'render pass');
+
+    this.skipIf(
+      needsStorageBuffersInFragmentStage &&
+        !(this.device.limits.maxStorageBuffersInFragmentStage! >= 3),
+      `maxStorageBuffersInFragmentStage(${this.device.limits.maxStorageBuffersInFragmentStage}) < 3`
+    );
+  }
+
+  getBindGroupLayout(
+    type: GPUBufferBindingType,
+    visibility: GPUShaderStageFlags
+  ): GPUBindGroupLayout {
+    const id = `${type}:${visibility}`;
+    if (!this.commonBindGroupLayouts.has(id)) {
       this.commonBindGroupLayouts.set(
-        type,
+        id,
         this.device.createBindGroupLayout({
           entries: [
             {
               binding: 0,
-              visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT,
+              visibility,
               buffer: { type },
             },
           ],
         })
       );
     }
-    return this.commonBindGroupLayouts.get(type)!;
+    return this.commonBindGroupLayouts.get(id)!;
   }
 
-  getBindGroupLayouts(indices: BindGroupIndices): GPUBindGroupLayout[] {
+  getVisibilityForEncoderType(encoderType: EncoderType) {
+    return encoderType === 'compute pass' ? GPUShaderStage.COMPUTE : GPUShaderStage.FRAGMENT;
+  }
+
+  getBindGroupLayouts(
+    indices: BindGroupIndices,
+    type: GPUBufferBindingType,
+    encoderType: EncoderType
+  ): GPUBindGroupLayout[] {
     const bindGroupLayouts: GPUBindGroupLayout[] = [];
-    bindGroupLayouts[indices.a] = this.getBindGroupLayout('read-only-storage');
-    bindGroupLayouts[indices.b] = this.getBindGroupLayout('read-only-storage');
-    bindGroupLayouts[indices.out] = this.getBindGroupLayout('storage');
+    const inputType = type === 'storage' ? 'read-only-storage' : 'uniform';
+    const visibility = this.getVisibilityForEncoderType(encoderType);
+    bindGroupLayouts[indices.a] = this.getBindGroupLayout(inputType, visibility);
+    bindGroupLayouts[indices.b] = this.getBindGroupLayout(inputType, visibility);
+    if (type === 'storage' || encoderType === 'compute pass') {
+      bindGroupLayouts[indices.out] = this.getBindGroupLayout('storage', visibility);
+    }
     return bindGroupLayouts;
   }
 
-  createBindGroup(buffer: GPUBuffer, type: GPUBufferBindingType): GPUBindGroup {
+  createBindGroup(
+    buffer: GPUBuffer,
+    type: GPUBufferBindingType,
+    encoderType: EncoderType
+  ): GPUBindGroup {
+    const visibility = this.getVisibilityForEncoderType(encoderType);
     return this.device.createBindGroup({
-      layout: this.getBindGroupLayout(type),
+      layout: this.getBindGroupLayout(type, visibility),
       entries: [{ binding: 0, resource: { buffer } }],
     });
   }
@@ -57,6 +100,7 @@ export class ProgrammableStateTest extends GPUTest {
   createBindingStatePipeline<T extends EncoderType>(
     encoderType: T,
     groups: BindGroupIndices,
+    type: GPUBufferBindingType,
     algorithm: string = 'a.value - b.value'
   ): GPUComputePipeline | GPURenderPipeline {
     switch (encoderType) {
@@ -65,8 +109,8 @@ export class ProgrammableStateTest extends GPUTest {
             value : i32
           };
 
-          @group(${groups.a}) @binding(0) var<storage> a : Data;
-          @group(${groups.b}) @binding(0) var<storage> b : Data;
+          @group(${groups.a}) @binding(0) var<${type}> a : Data;
+          @group(${groups.b}) @binding(0) var<${type}> b : Data;
           @group(${groups.out}) @binding(0) var<storage, read_write> out : Data;
 
           @compute @workgroup_size(1) fn main() {
@@ -77,7 +121,7 @@ export class ProgrammableStateTest extends GPUTest {
 
         return this.device.createComputePipeline({
           layout: this.device.createPipelineLayout({
-            bindGroupLayouts: this.getBindGroupLayouts(groups),
+            bindGroupLayouts: this.getBindGroupLayouts(groups, type, encoderType),
           }),
           compute: {
             module: this.device.createShaderModule({
@@ -92,7 +136,7 @@ export class ProgrammableStateTest extends GPUTest {
         const wgslShaders = {
           vertex: `
             @vertex fn vert_main() -> @builtin(position) vec4<f32> {
-              return vec4<f32>(0.5, 0.5, 0.0, 1.0);
+              return vec4<f32>(0, 0, 0, 1);
             }
           `,
 
@@ -101,20 +145,23 @@ export class ProgrammableStateTest extends GPUTest {
               value : i32
             };
 
-            @group(${groups.a}) @binding(0) var<storage> a : Data;
-            @group(${groups.b}) @binding(0) var<storage> b : Data;
+            @group(${groups.a}) @binding(0) var<${type}> a : Data;
+            @group(${groups.b}) @binding(0) var<${type}> b : Data;
             @group(${groups.out}) @binding(0) var<storage, read_write> out : Data;
 
-            @fragment fn frag_main() -> @location(0) vec4<f32> {
+            @fragment fn frag_main_storage() -> @location(0) vec4<i32> {
               out.value = ${algorithm};
-              return vec4<f32>(1.0, 0.0, 0.0, 1.0);
+              return vec4<i32>(1, 0, 0, 1);
+            }
+            @fragment fn frag_main_uniform() -> @location(0) vec4<i32> {
+              return vec4<i32>(${algorithm});
             }
           `,
         };
 
         return this.device.createRenderPipeline({
           layout: this.device.createPipelineLayout({
-            bindGroupLayouts: this.getBindGroupLayouts(groups),
+            bindGroupLayouts: this.getBindGroupLayouts(groups, type, encoderType),
           }),
           vertex: {
             module: this.device.createShaderModule({
@@ -126,8 +173,8 @@ export class ProgrammableStateTest extends GPUTest {
             module: this.device.createShaderModule({
               code: wgslShaders.fragment,
             }),
-            entryPoint: 'frag_main',
-            targets: [{ format: 'rgba8unorm' }],
+            entryPoint: type === 'uniform' ? 'frag_main_uniform' : 'frag_main_storage',
+            targets: [{ format: 'r32sint' }],
           },
           primitive: { topology: 'point-list' },
         });
@@ -137,6 +184,57 @@ export class ProgrammableStateTest extends GPUTest {
     }
   }
 
+  createEncoderForStateTest(
+    type: GPUBufferBindingType,
+    out: GPUBuffer,
+    ...params: Parameters<typeof GPUTestBase.prototype.createEncoder>
+  ): {
+    encoder: CreateEncoderType;
+    validateFinishAndSubmit: (shouldBeValid: boolean, submitShouldSucceedIfValid: boolean) => void;
+  } {
+    const encoderType = params[0];
+    const renderTarget = this.createTextureTracked({
+      size: [1, 1],
+      format: 'r32sint',
+      usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+    });
+
+    // Note: This nightmare of gibberish is trying the result of 2 hours of
+    // trying to get typescript to accept the code. Originally the code was
+    // effectively just
+    //
+    //  const { encoder, validateFinishAndSubmit } = this.createEncoder(...);
+    //  const fn = (b0, b1) => { validateFinishAndSubmit(b1, b1); if (...) { ... copyT2B ... } }
+    //  return { encoder: e__, validateFinishAndSubmit: fn };
+    //
+    // But TS didn't like it. I couldn't figure out why.
+    const encoderAndFinish = this.createEncoder(encoderType, {
+      attachmentInfo: { colorFormats: ['r32sint'] },
+      targets: [renderTarget.createView()],
+    });
+
+    const validateFinishAndSubmit = (
+      shouldBeValid: boolean,
+      submitShouldSucceedIfValid: boolean
+    ) => {
+      encoderAndFinish.validateFinishAndSubmit(shouldBeValid, submitShouldSucceedIfValid);
+
+      if (
+        type === 'uniform' &&
+        (encoderType === 'render pass' || encoderType === 'render bundle')
+      ) {
+        const encoder = this.device.createCommandEncoder();
+        encoder.copyTextureToBuffer({ texture: renderTarget }, { buffer: out }, [1, 1]);
+        this.device.queue.submit([encoder.finish()]);
+      }
+    };
+
+    return {
+      encoder: encoderAndFinish.encoder as CreateEncoderType,
+      validateFinishAndSubmit,
+    };
+  }
+
   setPipeline(pass: GPUBindingCommandsMixin, pipeline: GPUComputePipeline | GPURenderPipeline) {
     if (pass instanceof GPUComputePassEncoder) {
       pass.setPipeline(pipeline as GPUComputePipeline);
diff --git a/src/webgpu/api/operation/command_buffer/programmable/state_tracking.spec.ts b/src/webgpu/api/operation/command_buffer/programmable/state_tracking.spec.ts
index fe8ef3d4374f..3dd8b9f5392f 100644
--- a/src/webgpu/api/operation/command_buffer/programmable/state_tracking.spec.ts
+++ b/src/webgpu/api/operation/command_buffer/programmable/state_tracking.spec.ts
@@ -5,13 +5,18 @@ times in different orders) for setBindGroup and setPipeline.
 
 import { makeTestGroup } from '../../../../../common/framework/test_group.js';
 import { GPUConst } from '../../../../constants.js';
+import { MaxLimitsTestMixin } from '../../../../gpu_test.js';
 import { kProgrammableEncoderTypes } from '../../../../util/command_buffer_maker.js';
 
 import { ProgrammableStateTest } from './programmable_state_test.js';
 
-export const g = makeTestGroup(ProgrammableStateTest);
+export const g = makeTestGroup(MaxLimitsTestMixin(ProgrammableStateTest));
 
-const kBufferUsage = GPUConst.BufferUsage.COPY_SRC | GPUConst.BufferUsage.STORAGE;
+const kBufferUsage =
+  GPUConst.BufferUsage.COPY_SRC |
+  GPUConst.BufferUsage.COPY_DST |
+  GPUConst.BufferUsage.STORAGE |
+  GPUConst.BufferUsage.UNIFORM;
 
 g.test('bind_group_indices')
   .desc(
@@ -24,6 +29,7 @@ g.test('bind_group_indices')
     u //
       .combine('encoderType', kProgrammableEncoderTypes)
       .beginSubcases()
+      .combine('type', ['storage', 'uniform'] as const)
       .combine('groupIndices', [
         { a: 0, b: 1, out: 2 },
         { a: 1, b: 2, out: 0 },
@@ -34,24 +40,40 @@ g.test('bind_group_indices')
       ])
   )
   .fn(t => {
-    const { encoderType, groupIndices } = t.params;
+    const { encoderType, groupIndices, type } = t.params;
+    t.skipIfNeedsStorageBuffersInFragmentStageAndHaveNone(type, encoderType);
 
-    const pipeline = t.createBindingStatePipeline(encoderType, groupIndices);
+    const pipeline = t.createBindingStatePipeline(
+      encoderType,
+      groupIndices,
+      type,
+      'a.value - b.value'
+    );
 
+    const inputType: GPUBufferBindingType = type === 'storage' ? 'read-only-storage' : 'uniform';
     const out = t.makeBufferWithContents(new Int32Array([0]), kBufferUsage);
     const bindGroups = {
       a: t.createBindGroup(
         t.makeBufferWithContents(new Int32Array([3]), kBufferUsage),
-        'read-only-storage'
+        inputType,
+        encoderType
       ),
       b: t.createBindGroup(
         t.makeBufferWithContents(new Int32Array([2]), kBufferUsage),
-        'read-only-storage'
+        inputType,
+        encoderType
       ),
-      out: t.createBindGroup(out, 'storage'),
+      out:
+        encoderType === 'compute pass' || type === 'storage'
+          ? t.createBindGroup(out, 'storage', encoderType)
+          : null,
     };
 
-    const { encoder, validateFinishAndSubmit } = t.createEncoder(encoderType);
+    const { encoder, validateFinishAndSubmit } = t.createEncoderForStateTest(
+      type,
+      out,
+      encoderType
+    );
 
     t.setPipeline(encoder, pipeline);
     encoder.setBindGroup(groupIndices.a, bindGroups.a);
@@ -73,6 +95,7 @@ g.test('bind_group_order')
     u //
       .combine('encoderType', kProgrammableEncoderTypes)
       .beginSubcases()
+      .combine('type', ['storage', 'uniform'] as const)
       .combine('setOrder', [
         ['a', 'b', 'out'],
         ['b', 'out', 'a'],
@@ -83,25 +106,41 @@ g.test('bind_group_order')
       ] as const)
   )
   .fn(t => {
-    const { encoderType, setOrder } = t.params;
+    const { encoderType, setOrder, type } = t.params;
+    t.skipIfNeedsStorageBuffersInFragmentStageAndHaveNone(type, encoderType);
 
     const groupIndices = { a: 0, b: 1, out: 2 };
-    const pipeline = t.createBindingStatePipeline(encoderType, groupIndices);
+    const pipeline = t.createBindingStatePipeline(
+      encoderType,
+      groupIndices,
+      type,
+      'a.value - b.value'
+    );
 
     const out = t.makeBufferWithContents(new Int32Array([0]), kBufferUsage);
+    const inputType: GPUBufferBindingType = type === 'storage' ? 'read-only-storage' : 'uniform';
     const bindGroups = {
       a: t.createBindGroup(
         t.makeBufferWithContents(new Int32Array([3]), kBufferUsage),
-        'read-only-storage'
+        inputType,
+        encoderType
       ),
       b: t.createBindGroup(
         t.makeBufferWithContents(new Int32Array([2]), kBufferUsage),
-        'read-only-storage'
+        inputType,
+        encoderType
       ),
-      out: t.createBindGroup(out, 'storage'),
+      out:
+        encoderType === 'compute pass' || type === 'storage'
+          ? t.createBindGroup(out, 'storage', encoderType)
+          : null,
     };
 
-    const { encoder, validateFinishAndSubmit } = t.createEncoder(encoderType);
+    const { encoder, validateFinishAndSubmit } = t.createEncoderForStateTest(
+      type,
+      out,
+      encoderType
+    );
     t.setPipeline(encoder, pipeline);
 
     for (const bindingName of setOrder) {
@@ -124,6 +163,7 @@ g.test('bind_group_before_pipeline')
     u //
       .combine('encoderType', kProgrammableEncoderTypes)
       .beginSubcases()
+      .combine('type', ['storage', 'uniform'] as const)
       .combineWithParams([
         { setBefore: ['a', 'b'], setAfter: ['out'] },
         { setBefore: ['a'], setAfter: ['b', 'out'] },
@@ -132,24 +172,41 @@ g.test('bind_group_before_pipeline')
       ] as const)
   )
   .fn(t => {
-    const { encoderType, setBefore, setAfter } = t.params;
+    const { encoderType, type, setBefore, setAfter } = t.params;
+    t.skipIfNeedsStorageBuffersInFragmentStageAndHaveNone(type, encoderType);
+
     const groupIndices = { a: 0, b: 1, out: 2 };
-    const pipeline = t.createBindingStatePipeline(encoderType, groupIndices);
+    const pipeline = t.createBindingStatePipeline(
+      encoderType,
+      groupIndices,
+      type,
+      'a.value - b.value'
+    );
 
     const out = t.makeBufferWithContents(new Int32Array([0]), kBufferUsage);
+    const inputType: GPUBufferBindingType = type === 'storage' ? 'read-only-storage' : 'uniform';
     const bindGroups = {
       a: t.createBindGroup(
         t.makeBufferWithContents(new Int32Array([3]), kBufferUsage),
-        'read-only-storage'
+        inputType,
+        encoderType
       ),
       b: t.createBindGroup(
         t.makeBufferWithContents(new Int32Array([2]), kBufferUsage),
-        'read-only-storage'
+        inputType,
+        encoderType
       ),
-      out: t.createBindGroup(out, 'storage'),
+      out:
+        encoderType === 'compute pass' || type === 'storage'
+          ? t.createBindGroup(out, 'storage', encoderType)
+          : null,
     };
 
-    const { encoder, validateFinishAndSubmit } = t.createEncoder(encoderType);
+    const { encoder, validateFinishAndSubmit } = t.createEncoderForStateTest(
+      type,
+      out,
+      encoderType
+    );
 
     for (const bindingName of setBefore) {
       encoder.setBindGroup(groupIndices[bindingName], bindGroups[bindingName]);
@@ -176,21 +233,39 @@ g.test('one_bind_group_multiple_slots')
   .params(u =>
     u //
       .combine('encoderType', kProgrammableEncoderTypes)
+      .beginSubcases()
+      .combine('type', ['storage', 'uniform'] as const)
   )
   .fn(t => {
-    const { encoderType } = t.params;
-    const pipeline = t.createBindingStatePipeline(encoderType, { a: 0, b: 1, out: 2 });
+    const { encoderType, type } = t.params;
+    t.skipIfNeedsStorageBuffersInFragmentStageAndHaveNone(type, encoderType);
+
+    const pipeline = t.createBindingStatePipeline(
+      encoderType,
+      { a: 0, b: 1, out: 2 },
+      type,
+      'a.value - b.value'
+    );
 
     const out = t.makeBufferWithContents(new Int32Array([1]), kBufferUsage);
+    const inputType: GPUBufferBindingType = type === 'storage' ? 'read-only-storage' : 'uniform';
     const bindGroups = {
       ab: t.createBindGroup(
         t.makeBufferWithContents(new Int32Array([3]), kBufferUsage),
-        'read-only-storage'
+        inputType,
+        encoderType
       ),
-      out: t.createBindGroup(out, 'storage'),
+      out:
+        encoderType === 'compute pass' || type === 'storage'
+          ? t.createBindGroup(out, 'storage', encoderType)
+          : null,
     };
 
-    const { encoder, validateFinishAndSubmit } = t.createEncoder(encoderType);
+    const { encoder, validateFinishAndSubmit } = t.createEncoderForStateTest(
+      type,
+      out,
+      encoderType
+    );
     t.setPipeline(encoder, pipeline);
 
     encoder.setBindGroup(0, bindGroups.ab);
@@ -212,31 +287,54 @@ g.test('bind_group_multiple_sets')
   .params(u =>
     u //
       .combine('encoderType', kProgrammableEncoderTypes)
+      .beginSubcases()
+      .combine('type', ['storage', 'uniform'] as const)
   )
   .fn(t => {
-    const { encoderType } = t.params;
-    const pipeline = t.createBindingStatePipeline(encoderType, { a: 0, b: 1, out: 2 });
+    const { encoderType, type } = t.params;
+    t.skipIfNeedsStorageBuffersInFragmentStageAndHaveNone(type, encoderType);
+
+    const pipeline = t.createBindingStatePipeline(
+      encoderType,
+      { a: 0, b: 1, out: 2 },
+      type,
+      'a.value - b.value'
+    );
 
     const badOut = t.makeBufferWithContents(new Int32Array([-1]), kBufferUsage);
     const out = t.makeBufferWithContents(new Int32Array([0]), kBufferUsage);
+    const inputType: GPUBufferBindingType = type === 'storage' ? 'read-only-storage' : 'uniform';
     const bindGroups = {
       a: t.createBindGroup(
         t.makeBufferWithContents(new Int32Array([3]), kBufferUsage),
-        'read-only-storage'
+        inputType,
+        encoderType
       ),
       b: t.createBindGroup(
         t.makeBufferWithContents(new Int32Array([2]), kBufferUsage),
-        'read-only-storage'
+        inputType,
+        encoderType
       ),
       c: t.createBindGroup(
         t.makeBufferWithContents(new Int32Array([5]), kBufferUsage),
-        'read-only-storage'
+        inputType,
+        encoderType
       ),
-      badOut: t.createBindGroup(badOut, 'storage'),
-      out: t.createBindGroup(out, 'storage'),
+      badOut:
+        encoderType === 'compute pass' || type === 'storage'
+          ? t.createBindGroup(badOut, 'storage', encoderType)
+          : null,
+      out:
+        encoderType === 'compute pass' || type === 'storage'
+          ? t.createBindGroup(out, 'storage', encoderType)
+          : null,
     };
 
-    const { encoder, validateFinishAndSubmit } = t.createEncoder(encoderType);
+    const { encoder, validateFinishAndSubmit } = t.createEncoderForStateTest(
+      type,
+      out,
+      encoderType
+    );
 
     encoder.setBindGroup(1, bindGroups.c);
 
@@ -265,10 +363,18 @@ g.test('compatible_pipelines')
   )
   .fn(t => {
     const { encoderType } = t.params;
-    const pipelineA = t.createBindingStatePipeline(encoderType, { a: 0, b: 1, out: 2 });
+    t.skipIfNeedsStorageBuffersInFragmentStageAndHaveNone('storage', encoderType);
+
+    const pipelineA = t.createBindingStatePipeline(
+      encoderType,
+      { a: 0, b: 1, out: 2 },
+      'storage',
+      'a.value - b.value'
+    );
     const pipelineB = t.createBindingStatePipeline(
       encoderType,
       { a: 0, b: 1, out: 2 },
+      'storage',
       'a.value + b.value'
     );
 
@@ -277,17 +383,21 @@ g.test('compatible_pipelines')
     const bindGroups = {
       a: t.createBindGroup(
         t.makeBufferWithContents(new Int32Array([3]), kBufferUsage),
-        'read-only-storage'
+        'read-only-storage',
+        encoderType
       ),
       b: t.createBindGroup(
         t.makeBufferWithContents(new Int32Array([2]), kBufferUsage),
-        'read-only-storage'
+        'read-only-storage',
+        encoderType
       ),
-      outA: t.createBindGroup(outA, 'storage'),
-      outB: t.createBindGroup(outB, 'storage'),
+      outA: t.createBindGroup(outA, 'storage', encoderType),
+      outB: t.createBindGroup(outB, 'storage', encoderType),
     };
 
-    const { encoder, validateFinishAndSubmit } = t.createEncoder(encoderType);
+    const { encoder, validateFinishAndSubmit } = t.createEncoder(encoderType, {
+      attachmentInfo: { colorFormats: ['r32sint'] },
+    });
     encoder.setBindGroup(0, bindGroups.a);
     encoder.setBindGroup(1, bindGroups.b);
 
diff --git a/src/webgpu/gpu_test.ts b/src/webgpu/gpu_test.ts
index 1e7fac4a984e..9c63f5d15d2c 100644
--- a/src/webgpu/gpu_test.ts
+++ b/src/webgpu/gpu_test.ts
@@ -1131,9 +1131,11 @@ export class GPUTestBase extends Fixture<GPUTestSubcaseBatchState> {
     {
       attachmentInfo,
       occlusionQuerySet,
+      targets,
     }: {
       attachmentInfo?: GPURenderBundleEncoderDescriptor;
       occlusionQuerySet?: GPUQuerySet;
+      targets?: GPUTextureView[];
     } = {}
   ): CommandBufferMaker<T> {
     const fullAttachmentInfo = {
@@ -1155,7 +1157,7 @@ export class GPUTestBase extends Fixture<GPUTestSubcaseBatchState> {
       case 'render bundle': {
         const device = this.device;
         const rbEncoder = device.createRenderBundleEncoder(fullAttachmentInfo);
-        const pass = this.createEncoder('render pass', { attachmentInfo });
+        const pass = this.createEncoder('render pass', { attachmentInfo, targets });
 
         return new CommandBufferMaker(this, rbEncoder, () => {
           pass.encoder.executeBundles([rbEncoder.finish()]);
@@ -1205,10 +1207,10 @@ export class GPUTestBase extends Fixture<GPUTestSubcaseBatchState> {
           }
         }
         const passDesc: GPURenderPassDescriptor = {
-          colorAttachments: Array.from(fullAttachmentInfo.colorFormats, format =>
+          colorAttachments: Array.from(fullAttachmentInfo.colorFormats, (format, i) =>
             format
               ? {
-                  view: makeAttachmentView(format),
+                  view: targets ? targets[i] : makeAttachmentView(format),
                   clearValue: [0, 0, 0, 0],
                   loadOp: 'clear',
                   storeOp: 'store',