diff --git a/package-lock.json b/package-lock.json
index 4837e5c70485..19429df6e283 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -24,7 +24,7 @@
         "@types/w3c-image-capture": "^1.0.10",
         "@typescript-eslint/eslint-plugin": "^6.9.1",
         "@typescript-eslint/parser": "^6.9.1",
-        "@webgpu/types": "^0.1.43",
+        "@webgpu/types": "^0.1.46",
         "ansi-colors": "4.1.3",
         "babel-plugin-add-header-comment": "^1.0.3",
         "babel-plugin-const-enum": "^1.2.0",
@@ -1539,9 +1539,9 @@
       "dev": true
     },
     "node_modules/@webgpu/types": {
-      "version": "0.1.43",
-      "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.43.tgz",
-      "integrity": "sha512-HoP+d+m+Kuq8CsE63BZ3+BYBKAemrqbHUNrCalxrUju5XW+q/094Q3oeIa+2pTraEbO8ckJmGpibzyGT4OV4YQ==",
+      "version": "0.1.46",
+      "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.46.tgz",
+      "integrity": "sha512-2iogO6Zh0pTbKLGZuuGWEmJpF/fTABGs7G9wXxpn7s24XSJchSUIiMqIJHURi5zsMZRRTuXrV/3GLOkmOFjq5w==",
       "dev": true
     },
     "node_modules/abbrev": {
@@ -10076,9 +10076,9 @@
       "dev": true
     },
     "@webgpu/types": {
-      "version": "0.1.43",
-      "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.43.tgz",
-      "integrity": "sha512-HoP+d+m+Kuq8CsE63BZ3+BYBKAemrqbHUNrCalxrUju5XW+q/094Q3oeIa+2pTraEbO8ckJmGpibzyGT4OV4YQ==",
+      "version": "0.1.46",
+      "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.46.tgz",
+      "integrity": "sha512-2iogO6Zh0pTbKLGZuuGWEmJpF/fTABGs7G9wXxpn7s24XSJchSUIiMqIJHURi5zsMZRRTuXrV/3GLOkmOFjq5w==",
       "dev": true
     },
     "abbrev": {
diff --git a/package.json b/package.json
index 9d311579c314..c82fe0f2cba5 100644
--- a/package.json
+++ b/package.json
@@ -50,7 +50,7 @@
     "@types/w3c-image-capture": "^1.0.10",
     "@typescript-eslint/eslint-plugin": "^6.9.1",
     "@typescript-eslint/parser": "^6.9.1",
-    "@webgpu/types": "^0.1.43",
+    "@webgpu/types": "^0.1.46",
     "ansi-colors": "4.1.3",
     "babel-plugin-add-header-comment": "^1.0.3",
     "babel-plugin-const-enum": "^1.2.0",
diff --git a/src/common/internal/logging/test_case_recorder.ts b/src/common/internal/logging/test_case_recorder.ts
index 78f625269e3d..eb03f4ea96f7 100644
--- a/src/common/internal/logging/test_case_recorder.ts
+++ b/src/common/internal/logging/test_case_recorder.ts
@@ -44,7 +44,7 @@ export class TestCaseRecorder {
   private startTime = -1;
   private logs: LogMessageWithStack[] = [];
   private logLinesAtCurrentSeverity = 0;
-  private debugging = false;
+  public debugging = false;
 
   constructor(result: LiveTestCaseResult, debugging: boolean) {
     this.result = result;
diff --git a/src/common/internal/test_group.ts b/src/common/internal/test_group.ts
index e1d0cde12d5c..aaaec9fe3f3a 100644
--- a/src/common/internal/test_group.ts
+++ b/src/common/internal/test_group.ts
@@ -620,7 +620,7 @@ class RunCaseSpecific implements RunCase {
             const subcasePrefix = 'subcase: ' + stringifyPublicParams(subParams);
             const subRec = new Proxy(rec, {
               get: (target, k: keyof TestCaseRecorder) => {
-                const prop = TestCaseRecorder.prototype[k];
+                const prop = rec[k] ?? TestCaseRecorder.prototype[k];
                 if (typeof prop === 'function') {
                   testHeartbeatCallback();
                   return function (...args: Parameters<typeof prop>) {
diff --git a/src/common/runtime/standalone.ts b/src/common/runtime/standalone.ts
index 932c5668b587..0305031cc790 100644
--- a/src/common/runtime/standalone.ts
+++ b/src/common/runtime/standalone.ts
@@ -369,6 +369,9 @@ function makeSubtreeChildrenHTML(
   const runMySubtree = async () => {
     const results: SubtreeResult[] = [];
     for (const { runSubtree } of childFns) {
+      if (stopRequested) {
+        break;
+      }
       results.push(await runSubtree());
     }
     return mergeSubtreeResults(...results);
diff --git a/src/common/tools/dev_server.ts b/src/common/tools/dev_server.ts
index 1d1313e4f51c..8d78855974d6 100644
--- a/src/common/tools/dev_server.ts
+++ b/src/common/tools/dev_server.ts
@@ -106,10 +106,6 @@ const app = express();
 
 // Send Chrome Origin Trial tokens
 app.use((_req, res, next) => {
-  res.header('Origin-Trial', [
-    // Token for http://localhost:8080
-    'AvyDIV+RJoYs8fn3W6kIrBhWw0te0klraoz04mw/nPb8VTus3w5HCdy+vXqsSzomIH745CT6B5j1naHgWqt/tw8AAABJeyJvcmlnaW4iOiJodHRwOi8vbG9jYWxob3N0OjgwODAiLCJmZWF0dXJlIjoiV2ViR1BVIiwiZXhwaXJ5IjoxNjYzNzE4Mzk5fQ==',
-  ]);
   next();
 });
 
diff --git a/src/common/util/navigator_gpu.ts b/src/common/util/navigator_gpu.ts
index 6f3a423db39f..4e58797097ed 100644
--- a/src/common/util/navigator_gpu.ts
+++ b/src/common/util/navigator_gpu.ts
@@ -68,12 +68,11 @@ export function getGPU(recorder: TestCaseRecorder | null): GPU {
     ): Promise<GPUAdapter | null> {
       const promise = oldFn.call(this, { ...defaultRequestAdapterOptions, ...options });
       if (recorder) {
-        void promise.then(async adapter => {
+        void promise.then(adapter => {
           if (adapter) {
-            // MAINTENANCE_TODO: Remove requestAdapterInfo when info is implemented.
-            const info = adapter.info || (await adapter.requestAdapterInfo());
-            const infoString = `Adapter: ${info.vendor} / ${info.architecture} / ${info.device}`;
-            recorder.debug(new ErrorWithExtra(infoString, () => ({ adapterInfo: info })));
+            const adapterInfo = adapter.info;
+            const infoString = `Adapter: ${adapterInfo.vendor} / ${adapterInfo.architecture} / ${adapterInfo.device}`;
+            recorder.debug(new ErrorWithExtra(infoString, () => ({ adapterInfo })));
           }
         });
       }
diff --git a/src/resources/cache/hashes.json b/src/resources/cache/hashes.json
index e0459422560f..bb92869fdf3e 100644
--- a/src/resources/cache/hashes.json
+++ b/src/resources/cache/hashes.json
@@ -1,112 +1,112 @@
 {
-  "webgpu/shader/execution/binary/af_addition.bin": "338b5b67",
-  "webgpu/shader/execution/binary/af_logical.bin": "3b2aceb8",
-  "webgpu/shader/execution/binary/af_division.bin": "a77dc4c0",
-  "webgpu/shader/execution/binary/af_matrix_addition.bin": "136a7fbb",
-  "webgpu/shader/execution/binary/af_matrix_subtraction.bin": "90f2c731",
-  "webgpu/shader/execution/binary/af_multiplication.bin": "35ba40b9",
-  "webgpu/shader/execution/binary/af_remainder.bin": "41582f85",
-  "webgpu/shader/execution/binary/af_subtraction.bin": "a41420b2",
-  "webgpu/shader/execution/binary/f16_addition.bin": "ef10ca66",
-  "webgpu/shader/execution/binary/f16_logical.bin": "4bf24ca5",
-  "webgpu/shader/execution/binary/f16_division.bin": "f826b6ba",
-  "webgpu/shader/execution/binary/f16_matrix_addition.bin": "a910ddb0",
-  "webgpu/shader/execution/binary/f16_matrix_matrix_multiplication.bin": "9458671c",
-  "webgpu/shader/execution/binary/f16_matrix_scalar_multiplication.bin": "36be05d3",
-  "webgpu/shader/execution/binary/f16_matrix_subtraction.bin": "8aa6a88a",
-  "webgpu/shader/execution/binary/f16_matrix_vector_multiplication.bin": "38282a11",
-  "webgpu/shader/execution/binary/f16_multiplication.bin": "62f91819",
-  "webgpu/shader/execution/binary/f16_remainder.bin": "f829bb65",
-  "webgpu/shader/execution/binary/f16_subtraction.bin": "82d4e231",
-  "webgpu/shader/execution/binary/f32_addition.bin": "9b0a0c50",
-  "webgpu/shader/execution/binary/f32_logical.bin": "b75af25a",
-  "webgpu/shader/execution/binary/f32_division.bin": "f6d7832f",
-  "webgpu/shader/execution/binary/f32_matrix_addition.bin": "3317c75b",
-  "webgpu/shader/execution/binary/f32_matrix_matrix_multiplication.bin": "c6f990c8",
-  "webgpu/shader/execution/binary/f32_matrix_scalar_multiplication.bin": "b091a702",
-  "webgpu/shader/execution/binary/f32_matrix_subtraction.bin": "2d12a16b",
-  "webgpu/shader/execution/binary/f32_matrix_vector_multiplication.bin": "e1217524",
-  "webgpu/shader/execution/binary/f32_multiplication.bin": "19774fb3",
-  "webgpu/shader/execution/binary/f32_remainder.bin": "fd94bb9a",
-  "webgpu/shader/execution/binary/f32_subtraction.bin": "dba7cd7a",
-  "webgpu/shader/execution/binary/i32_arithmetic.bin": "e3b317e1",
-  "webgpu/shader/execution/binary/i32_comparison.bin": "63fa9be8",
-  "webgpu/shader/execution/binary/u32_arithmetic.bin": "e8b4008c",
-  "webgpu/shader/execution/binary/u32_comparison.bin": "d472fd61",
-  "webgpu/shader/execution/abs.bin": "631d932d",
-  "webgpu/shader/execution/acos.bin": "afcafcb1",
-  "webgpu/shader/execution/acosh.bin": "4b30eb95",
-  "webgpu/shader/execution/asin.bin": "c850c13d",
-  "webgpu/shader/execution/asinh.bin": "66a6acc0",
-  "webgpu/shader/execution/atan.bin": "2aabbb53",
-  "webgpu/shader/execution/atan2.bin": "82dd926a",
-  "webgpu/shader/execution/atanh.bin": "b98c937c",
-  "webgpu/shader/execution/bitcast.bin": "5daaee1b",
-  "webgpu/shader/execution/ceil.bin": "d0c32cf4",
-  "webgpu/shader/execution/clamp.bin": "4d1fc26a",
-  "webgpu/shader/execution/cos.bin": "dc837ae2",
-  "webgpu/shader/execution/cosh.bin": "d9e90580",
-  "webgpu/shader/execution/cross.bin": "ce7979f",
-  "webgpu/shader/execution/degrees.bin": "1436a196",
-  "webgpu/shader/execution/determinant.bin": "f36f1fa1",
-  "webgpu/shader/execution/distance.bin": "5103f8bd",
-  "webgpu/shader/execution/dot.bin": "4514172c",
-  "webgpu/shader/execution/exp.bin": "f41150bd",
-  "webgpu/shader/execution/exp2.bin": "19c494e",
-  "webgpu/shader/execution/faceForward.bin": "27b6e4a7",
-  "webgpu/shader/execution/floor.bin": "5bb5098b",
-  "webgpu/shader/execution/fma.bin": "daace9a4",
-  "webgpu/shader/execution/fract.bin": "be5f0334",
-  "webgpu/shader/execution/frexp.bin": "c9efaf7c",
-  "webgpu/shader/execution/inverseSqrt.bin": "8a50b907",
-  "webgpu/shader/execution/ldexp.bin": "cb4cea21",
-  "webgpu/shader/execution/length.bin": "a1b9fbeb",
-  "webgpu/shader/execution/log.bin": "9f2eb7c3",
-  "webgpu/shader/execution/log2.bin": "9ee7d861",
-  "webgpu/shader/execution/max.bin": "11e4608e",
-  "webgpu/shader/execution/min.bin": "7a084c44",
-  "webgpu/shader/execution/mix.bin": "7b892a4f",
-  "webgpu/shader/execution/modf.bin": "b3bf26d7",
-  "webgpu/shader/execution/normalize.bin": "18eba01d",
-  "webgpu/shader/execution/pack2x16float.bin": "82df446e",
-  "webgpu/shader/execution/pow.bin": "d3a05344",
-  "webgpu/shader/execution/quantizeToF16.bin": "7793770e",
-  "webgpu/shader/execution/radians.bin": "582c1f6b",
-  "webgpu/shader/execution/reflect.bin": "9161d6e5",
-  "webgpu/shader/execution/refract.bin": "817b59aa",
-  "webgpu/shader/execution/round.bin": "cb881aa2",
-  "webgpu/shader/execution/saturate.bin": "3716605e",
-  "webgpu/shader/execution/sign.bin": "549ac92f",
-  "webgpu/shader/execution/sin.bin": "5ec5bcb7",
-  "webgpu/shader/execution/sinh.bin": "62f6b736",
-  "webgpu/shader/execution/smoothstep.bin": "aa97768",
-  "webgpu/shader/execution/sqrt.bin": "d0a134ce",
-  "webgpu/shader/execution/step.bin": "b8035bb9",
-  "webgpu/shader/execution/tan.bin": "b34366cd",
-  "webgpu/shader/execution/tanh.bin": "8f5edddc",
-  "webgpu/shader/execution/transpose.bin": "1aa2de65",
-  "webgpu/shader/execution/trunc.bin": "cf43e3f7",
-  "webgpu/shader/execution/unpack2x16float.bin": "57ea7c02",
-  "webgpu/shader/execution/unpack2x16snorm.bin": "17fd3f86",
-  "webgpu/shader/execution/unpack2x16unorm.bin": "fc68bc4b",
-  "webgpu/shader/execution/unpack4x8snorm.bin": "fef504c1",
-  "webgpu/shader/execution/unpack4x8unorm.bin": "e8d8de93",
-  "webgpu/shader/execution/unary/af_arithmetic.bin": "14c0612a",
-  "webgpu/shader/execution/unary/af_assignment.bin": "3ad4afc",
-  "webgpu/shader/execution/unary/bool_conversion.bin": "15f7f3fb",
-  "webgpu/shader/execution/unary/f16_arithmetic.bin": "4a20db6d",
-  "webgpu/shader/execution/unary/f16_conversion.bin": "31f72f5a",
-  "webgpu/shader/execution/unary/f32_arithmetic.bin": "f1c311cb",
-  "webgpu/shader/execution/unary/f32_conversion.bin": "7539cdb3",
-  "webgpu/shader/execution/unary/i32_arithmetic.bin": "de945eec",
-  "webgpu/shader/execution/unary/i32_conversion.bin": "1728a03e",
-  "webgpu/shader/execution/unary/u32_conversion.bin": "9e6ca0ce",
-  "webgpu/shader/execution/unary/ai_assignment.bin": "1fd685a2",
-  "webgpu/shader/execution/binary/ai_arithmetic.bin": "90e651f4",
-  "webgpu/shader/execution/unary/ai_arithmetic.bin": "ba31d178",
-  "webgpu/shader/execution/binary/af_matrix_matrix_multiplication.bin": "bc8b52ef",
-  "webgpu/shader/execution/binary/af_matrix_scalar_multiplication.bin": "54edf6a2",
-  "webgpu/shader/execution/binary/af_matrix_vector_multiplication.bin": "43b036b1",
-  "webgpu/shader/execution/derivatives.bin": "65c15fc3",
-  "webgpu/shader/execution/fwidth.bin": "cc91c875"
+  "webgpu/shader/execution/binary/af_addition.bin": "82c9422d",
+  "webgpu/shader/execution/binary/af_logical.bin": "fe21109a",
+  "webgpu/shader/execution/binary/af_division.bin": "606ed00d",
+  "webgpu/shader/execution/binary/af_matrix_addition.bin": "731d24fb",
+  "webgpu/shader/execution/binary/af_matrix_subtraction.bin": "ada2bd52",
+  "webgpu/shader/execution/binary/af_multiplication.bin": "cc2892a3",
+  "webgpu/shader/execution/binary/af_remainder.bin": "6ac1fa",
+  "webgpu/shader/execution/binary/af_subtraction.bin": "f6e12b22",
+  "webgpu/shader/execution/binary/f16_addition.bin": "4b1f652f",
+  "webgpu/shader/execution/binary/f16_logical.bin": "8c8f6ced",
+  "webgpu/shader/execution/binary/f16_division.bin": "79519150",
+  "webgpu/shader/execution/binary/f16_matrix_addition.bin": "1aca77a1",
+  "webgpu/shader/execution/binary/f16_matrix_matrix_multiplication.bin": "5416bc07",
+  "webgpu/shader/execution/binary/f16_matrix_scalar_multiplication.bin": "3c3b8d39",
+  "webgpu/shader/execution/binary/f16_matrix_subtraction.bin": "8f4be1ef",
+  "webgpu/shader/execution/binary/f16_matrix_vector_multiplication.bin": "a3a2c8fd",
+  "webgpu/shader/execution/binary/f16_multiplication.bin": "d6602d76",
+  "webgpu/shader/execution/binary/f16_remainder.bin": "693a0ace",
+  "webgpu/shader/execution/binary/f16_subtraction.bin": "81e29c16",
+  "webgpu/shader/execution/binary/f32_addition.bin": "64946a10",
+  "webgpu/shader/execution/binary/f32_logical.bin": "69c18e28",
+  "webgpu/shader/execution/binary/f32_division.bin": "3d6326d2",
+  "webgpu/shader/execution/binary/f32_matrix_addition.bin": "1b9a4a03",
+  "webgpu/shader/execution/binary/f32_matrix_matrix_multiplication.bin": "7339e7a4",
+  "webgpu/shader/execution/binary/f32_matrix_scalar_multiplication.bin": "428a8238",
+  "webgpu/shader/execution/binary/f32_matrix_subtraction.bin": "9651223a",
+  "webgpu/shader/execution/binary/f32_matrix_vector_multiplication.bin": "3f9688e9",
+  "webgpu/shader/execution/binary/f32_multiplication.bin": "9291e0f2",
+  "webgpu/shader/execution/binary/f32_remainder.bin": "719f1d40",
+  "webgpu/shader/execution/binary/f32_subtraction.bin": "59b6582e",
+  "webgpu/shader/execution/binary/i32_arithmetic.bin": "c81cf318",
+  "webgpu/shader/execution/binary/i32_comparison.bin": "21e05bd7",
+  "webgpu/shader/execution/binary/u32_arithmetic.bin": "eb905f8f",
+  "webgpu/shader/execution/binary/u32_comparison.bin": "665ed925",
+  "webgpu/shader/execution/abs.bin": "ec493974",
+  "webgpu/shader/execution/acos.bin": "3806122f",
+  "webgpu/shader/execution/acosh.bin": "bef89cc8",
+  "webgpu/shader/execution/asin.bin": "2e9a0a5b",
+  "webgpu/shader/execution/asinh.bin": "e5e056b8",
+  "webgpu/shader/execution/atan.bin": "d89481bf",
+  "webgpu/shader/execution/atan2.bin": "5320957a",
+  "webgpu/shader/execution/atanh.bin": "46226444",
+  "webgpu/shader/execution/bitcast.bin": "3c797668",
+  "webgpu/shader/execution/ceil.bin": "3c12c9a8",
+  "webgpu/shader/execution/clamp.bin": "dee74684",
+  "webgpu/shader/execution/cos.bin": "425c0bf3",
+  "webgpu/shader/execution/cosh.bin": "1664b602",
+  "webgpu/shader/execution/cross.bin": "1a4286f4",
+  "webgpu/shader/execution/degrees.bin": "50d340d8",
+  "webgpu/shader/execution/determinant.bin": "bb023e5",
+  "webgpu/shader/execution/distance.bin": "f881e9c4",
+  "webgpu/shader/execution/dot.bin": "570fea29",
+  "webgpu/shader/execution/exp.bin": "76843e6f",
+  "webgpu/shader/execution/exp2.bin": "75dfc3ba",
+  "webgpu/shader/execution/faceForward.bin": "6f5f6a3b",
+  "webgpu/shader/execution/floor.bin": "388bd73",
+  "webgpu/shader/execution/fma.bin": "7fe4d24b",
+  "webgpu/shader/execution/fract.bin": "f614e69f",
+  "webgpu/shader/execution/frexp.bin": "3c21ac64",
+  "webgpu/shader/execution/inverseSqrt.bin": "c41a3b91",
+  "webgpu/shader/execution/ldexp.bin": "82715024",
+  "webgpu/shader/execution/length.bin": "8b8e6a96",
+  "webgpu/shader/execution/log.bin": "febf86a5",
+  "webgpu/shader/execution/log2.bin": "8961d4f8",
+  "webgpu/shader/execution/max.bin": "10aee992",
+  "webgpu/shader/execution/min.bin": "aebcc063",
+  "webgpu/shader/execution/mix.bin": "968355d6",
+  "webgpu/shader/execution/modf.bin": "2ed2e487",
+  "webgpu/shader/execution/normalize.bin": "6e7113f9",
+  "webgpu/shader/execution/pack2x16float.bin": "33bb2862",
+  "webgpu/shader/execution/pow.bin": "83285164",
+  "webgpu/shader/execution/quantizeToF16.bin": "22eb970e",
+  "webgpu/shader/execution/radians.bin": "2653234f",
+  "webgpu/shader/execution/reflect.bin": "a3ee7789",
+  "webgpu/shader/execution/refract.bin": "77a43445",
+  "webgpu/shader/execution/round.bin": "421742d7",
+  "webgpu/shader/execution/saturate.bin": "2382307d",
+  "webgpu/shader/execution/sign.bin": "af1a4dfa",
+  "webgpu/shader/execution/sin.bin": "9ebf5419",
+  "webgpu/shader/execution/sinh.bin": "7ec22790",
+  "webgpu/shader/execution/smoothstep.bin": "9f12610c",
+  "webgpu/shader/execution/sqrt.bin": "11c3e405",
+  "webgpu/shader/execution/step.bin": "9363eaf0",
+  "webgpu/shader/execution/tan.bin": "adfe8d47",
+  "webgpu/shader/execution/tanh.bin": "76f2df9",
+  "webgpu/shader/execution/transpose.bin": "1c664829",
+  "webgpu/shader/execution/trunc.bin": "599ddfaf",
+  "webgpu/shader/execution/unpack2x16float.bin": "4995ab2c",
+  "webgpu/shader/execution/unpack2x16snorm.bin": "4d7e0eb5",
+  "webgpu/shader/execution/unpack2x16unorm.bin": "5bd397da",
+  "webgpu/shader/execution/unpack4x8snorm.bin": "9a4d38cb",
+  "webgpu/shader/execution/unpack4x8unorm.bin": "66308de3",
+  "webgpu/shader/execution/unary/af_arithmetic.bin": "c6efec3",
+  "webgpu/shader/execution/unary/af_assignment.bin": "92d5b3da",
+  "webgpu/shader/execution/unary/bool_conversion.bin": "1dd65f27",
+  "webgpu/shader/execution/unary/f16_arithmetic.bin": "d3e48584",
+  "webgpu/shader/execution/unary/f16_conversion.bin": "caf76d89",
+  "webgpu/shader/execution/unary/f32_arithmetic.bin": "20e7caca",
+  "webgpu/shader/execution/unary/f32_conversion.bin": "a1d49d40",
+  "webgpu/shader/execution/unary/i32_arithmetic.bin": "e83d060d",
+  "webgpu/shader/execution/unary/i32_conversion.bin": "25421e80",
+  "webgpu/shader/execution/unary/u32_conversion.bin": "bd8f17ae",
+  "webgpu/shader/execution/unary/ai_assignment.bin": "7be15439",
+  "webgpu/shader/execution/binary/ai_arithmetic.bin": "ccff3bc2",
+  "webgpu/shader/execution/unary/ai_arithmetic.bin": "840cb623",
+  "webgpu/shader/execution/binary/af_matrix_matrix_multiplication.bin": "50719535",
+  "webgpu/shader/execution/binary/af_matrix_scalar_multiplication.bin": "758077e0",
+  "webgpu/shader/execution/binary/af_matrix_vector_multiplication.bin": "32ebd17e",
+  "webgpu/shader/execution/derivatives.bin": "381b6836",
+  "webgpu/shader/execution/fwidth.bin": "4fda6558"
 }
\ No newline at end of file
diff --git a/src/resources/cache/webgpu/shader/execution/abs.bin b/src/resources/cache/webgpu/shader/execution/abs.bin
index 4cba9b72dff4..373fef8f3b32 100644
Binary files a/src/resources/cache/webgpu/shader/execution/abs.bin and b/src/resources/cache/webgpu/shader/execution/abs.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/acos.bin b/src/resources/cache/webgpu/shader/execution/acos.bin
index 2ecaaa389a4e..5e311531fef4 100644
Binary files a/src/resources/cache/webgpu/shader/execution/acos.bin and b/src/resources/cache/webgpu/shader/execution/acos.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/acosh.bin b/src/resources/cache/webgpu/shader/execution/acosh.bin
index d48659f3c325..82a3857ebdc6 100644
Binary files a/src/resources/cache/webgpu/shader/execution/acosh.bin and b/src/resources/cache/webgpu/shader/execution/acosh.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/asin.bin b/src/resources/cache/webgpu/shader/execution/asin.bin
index b199953eaf4b..388de445c06b 100644
Binary files a/src/resources/cache/webgpu/shader/execution/asin.bin and b/src/resources/cache/webgpu/shader/execution/asin.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/asinh.bin b/src/resources/cache/webgpu/shader/execution/asinh.bin
index b370c53b0179..120654f685c2 100644
Binary files a/src/resources/cache/webgpu/shader/execution/asinh.bin and b/src/resources/cache/webgpu/shader/execution/asinh.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/atan.bin b/src/resources/cache/webgpu/shader/execution/atan.bin
index 6ab0ba106a9e..e81af87e15e0 100644
Binary files a/src/resources/cache/webgpu/shader/execution/atan.bin and b/src/resources/cache/webgpu/shader/execution/atan.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/atanh.bin b/src/resources/cache/webgpu/shader/execution/atanh.bin
index e6a190b35df5..a7fee794094d 100644
Binary files a/src/resources/cache/webgpu/shader/execution/atanh.bin and b/src/resources/cache/webgpu/shader/execution/atanh.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/bitcast.bin b/src/resources/cache/webgpu/shader/execution/bitcast.bin
index ead299d5e78f..e743a092553e 100644
Binary files a/src/resources/cache/webgpu/shader/execution/bitcast.bin and b/src/resources/cache/webgpu/shader/execution/bitcast.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/ceil.bin b/src/resources/cache/webgpu/shader/execution/ceil.bin
index 9b93ed416f64..02cf23324cdf 100644
Binary files a/src/resources/cache/webgpu/shader/execution/ceil.bin and b/src/resources/cache/webgpu/shader/execution/ceil.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/cos.bin b/src/resources/cache/webgpu/shader/execution/cos.bin
index 4e34eff3f1b1..a5d8573c6257 100644
Binary files a/src/resources/cache/webgpu/shader/execution/cos.bin and b/src/resources/cache/webgpu/shader/execution/cos.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/cosh.bin b/src/resources/cache/webgpu/shader/execution/cosh.bin
index 5b30d2786c5e..25e8750cc7fc 100644
Binary files a/src/resources/cache/webgpu/shader/execution/cosh.bin and b/src/resources/cache/webgpu/shader/execution/cosh.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/degrees.bin b/src/resources/cache/webgpu/shader/execution/degrees.bin
index 662558d78aca..eb514cb48b4d 100644
Binary files a/src/resources/cache/webgpu/shader/execution/degrees.bin and b/src/resources/cache/webgpu/shader/execution/degrees.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/distance.bin b/src/resources/cache/webgpu/shader/execution/distance.bin
index 23a4756a69eb..06d0d9a8fc2a 100644
Binary files a/src/resources/cache/webgpu/shader/execution/distance.bin and b/src/resources/cache/webgpu/shader/execution/distance.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/floor.bin b/src/resources/cache/webgpu/shader/execution/floor.bin
index b5341907f8ef..a199d0db9ff0 100644
Binary files a/src/resources/cache/webgpu/shader/execution/floor.bin and b/src/resources/cache/webgpu/shader/execution/floor.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/fract.bin b/src/resources/cache/webgpu/shader/execution/fract.bin
index 7f09e8f60b23..bb80e873625f 100644
Binary files a/src/resources/cache/webgpu/shader/execution/fract.bin and b/src/resources/cache/webgpu/shader/execution/fract.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/frexp.bin b/src/resources/cache/webgpu/shader/execution/frexp.bin
index 6811dfa29507..8f87d16a9c95 100644
Binary files a/src/resources/cache/webgpu/shader/execution/frexp.bin and b/src/resources/cache/webgpu/shader/execution/frexp.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/length.bin b/src/resources/cache/webgpu/shader/execution/length.bin
index 3644d9b683ac..db42153edff7 100644
Binary files a/src/resources/cache/webgpu/shader/execution/length.bin and b/src/resources/cache/webgpu/shader/execution/length.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/log.bin b/src/resources/cache/webgpu/shader/execution/log.bin
index ba591faad8a0..a5e62f8e02d8 100644
Binary files a/src/resources/cache/webgpu/shader/execution/log.bin and b/src/resources/cache/webgpu/shader/execution/log.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/log2.bin b/src/resources/cache/webgpu/shader/execution/log2.bin
index 00641ce119cf..f19d77f41097 100644
Binary files a/src/resources/cache/webgpu/shader/execution/log2.bin and b/src/resources/cache/webgpu/shader/execution/log2.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/modf.bin b/src/resources/cache/webgpu/shader/execution/modf.bin
index 363cc161fd72..74259a23a6f8 100644
Binary files a/src/resources/cache/webgpu/shader/execution/modf.bin and b/src/resources/cache/webgpu/shader/execution/modf.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/pack2x16float.bin b/src/resources/cache/webgpu/shader/execution/pack2x16float.bin
index e95227d36e50..a7b99a0a6cfd 100644
Binary files a/src/resources/cache/webgpu/shader/execution/pack2x16float.bin and b/src/resources/cache/webgpu/shader/execution/pack2x16float.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/pow.bin b/src/resources/cache/webgpu/shader/execution/pow.bin
index 4f5faf3293fa..f66ec5ca2fbe 100644
Binary files a/src/resources/cache/webgpu/shader/execution/pow.bin and b/src/resources/cache/webgpu/shader/execution/pow.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin b/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin
index 9e4308d5cd30..d6d75befc06b 100644
Binary files a/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin and b/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/radians.bin b/src/resources/cache/webgpu/shader/execution/radians.bin
index f5285d108778..731e6be24ed5 100644
Binary files a/src/resources/cache/webgpu/shader/execution/radians.bin and b/src/resources/cache/webgpu/shader/execution/radians.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/round.bin b/src/resources/cache/webgpu/shader/execution/round.bin
index c3b30b68f0a1..5ccab9e661c4 100644
Binary files a/src/resources/cache/webgpu/shader/execution/round.bin and b/src/resources/cache/webgpu/shader/execution/round.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/saturate.bin b/src/resources/cache/webgpu/shader/execution/saturate.bin
index 2e1eb821a9e7..e7402f25af73 100644
Binary files a/src/resources/cache/webgpu/shader/execution/saturate.bin and b/src/resources/cache/webgpu/shader/execution/saturate.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/sign.bin b/src/resources/cache/webgpu/shader/execution/sign.bin
index 033f2e8158f6..576019c008ee 100644
Binary files a/src/resources/cache/webgpu/shader/execution/sign.bin and b/src/resources/cache/webgpu/shader/execution/sign.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/sin.bin b/src/resources/cache/webgpu/shader/execution/sin.bin
index a2ca632008ff..bdbbfe2bd539 100644
Binary files a/src/resources/cache/webgpu/shader/execution/sin.bin and b/src/resources/cache/webgpu/shader/execution/sin.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/sinh.bin b/src/resources/cache/webgpu/shader/execution/sinh.bin
index 1176cd472bf2..b4b051a226ae 100644
Binary files a/src/resources/cache/webgpu/shader/execution/sinh.bin and b/src/resources/cache/webgpu/shader/execution/sinh.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/sqrt.bin b/src/resources/cache/webgpu/shader/execution/sqrt.bin
index 6dd8088c0898..64a7db70d004 100644
Binary files a/src/resources/cache/webgpu/shader/execution/sqrt.bin and b/src/resources/cache/webgpu/shader/execution/sqrt.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/tan.bin b/src/resources/cache/webgpu/shader/execution/tan.bin
index 572bee4df2a5..5af3e740d213 100644
Binary files a/src/resources/cache/webgpu/shader/execution/tan.bin and b/src/resources/cache/webgpu/shader/execution/tan.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/tanh.bin b/src/resources/cache/webgpu/shader/execution/tanh.bin
index a13028b165f0..9687ff00235c 100644
Binary files a/src/resources/cache/webgpu/shader/execution/tanh.bin and b/src/resources/cache/webgpu/shader/execution/tanh.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/trunc.bin b/src/resources/cache/webgpu/shader/execution/trunc.bin
index ba81e2ada427..e18bb52ed981 100644
Binary files a/src/resources/cache/webgpu/shader/execution/trunc.bin and b/src/resources/cache/webgpu/shader/execution/trunc.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin
index 98a90ea45b9a..f28c275092f1 100644
Binary files a/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin
index 14299da76670..c47b3d0afcb0 100644
Binary files a/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin b/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin
index ebc60029fa60..6e93bec14f76 100644
Binary files a/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin and b/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin
index 66b2bc73f889..55e1f5ed945f 100644
Binary files a/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin
index 04841df60785..49969e9221ad 100644
Binary files a/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin differ
diff --git a/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin
index 277ffc4d76b7..8dfc4e268561 100644
Binary files a/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin differ
diff --git a/src/webgpu/api/operation/shader_module/compilation_info.spec.ts b/src/webgpu/api/operation/shader_module/compilation_info.spec.ts
index 3382dabc3720..c0613087a96a 100644
--- a/src/webgpu/api/operation/shader_module/compilation_info.spec.ts
+++ b/src/webgpu/api/operation/shader_module/compilation_info.spec.ts
@@ -3,7 +3,6 @@ ShaderModule CompilationInfo tests.
 `;
 
 import { makeTestGroup } from '../../../../common/framework/test_group.js';
-import { keysOf } from '../../../../common/util/data_tables.js';
 import { assert } from '../../../../common/util/util.js';
 import { GPUTest } from '../../../gpu_test.js';
 
@@ -79,66 +78,24 @@ const kInvalidShaderSources = [
 
 const kAllShaderSources = [...kValidShaderSources, ...kInvalidShaderSources];
 
-// This is the source the sourcemap refers to.
-const kOriginalSource = new Array(20)
-  .fill(0)
-  .map((_, i) => `original line ${i}`)
-  .join('\n');
-
-const kSourceMaps: { [name: string]: undefined | object } = {
-  none: undefined,
-  empty: {},
-  // A valid source map. It maps `unknown` on lines 4 and line 5 to
-  // `wasUnknown` from lines 20, 21 respectively
-  valid: {
-    version: 3,
-    sources: ['myCode'],
-    sourcesContent: [kOriginalSource],
-    names: ['myMain', 'wasUnknown'],
-    mappings: ';kBAYkCA,OACd;SAElB;gBAKOC;gBACAA',
-  },
-  // not a valid sourcemap
-  invalid: {
-    version: -123,
-    notAnything: {},
-  },
-  // The correct format but this data is for lines 11,12 even
-  // though the source only has 5 or 6 lines
-  nonMatching: {
-    version: 3,
-    sources: ['myCode'],
-    sourcesContent: [kOriginalSource],
-    names: ['myMain'],
-    mappings: ';;;;;;;;;;kBAYkCA,OACd;SAElB',
-  },
-};
-const kSourceMapsKeys = keysOf(kSourceMaps);
-
 g.test('getCompilationInfo_returns')
   .desc(
     `
     Test that getCompilationInfo() can be called on any ShaderModule.
 
-    Note: sourcemaps are not used in the WebGPU API. We are only testing that
-    browser that happen to use them don't fail or crash if the sourcemap is
-    bad or invalid.
-
     - Test for both valid and invalid shader modules.
     - Test for shader modules containing only ASCII and those containing unicode characters.
     - Test that the compilation info for valid shader modules contains no errors.
     - Test that the compilation info for invalid shader modules contains at least one error.`
   )
-  .params(u =>
-    u.combineWithParams(kAllShaderSources).beginSubcases().combine('sourceMapName', kSourceMapsKeys)
-  )
+  .params(u => u.combineWithParams(kAllShaderSources))
   .fn(async t => {
-    const { _code, valid, sourceMapName } = t.params;
+    const { _code, valid } = t.params;
 
     const shaderModule = t.expectGPUError(
       'validation',
       () => {
-        const sourceMap = kSourceMaps[sourceMapName];
-        return t.device.createShaderModule({ code: _code, ...(sourceMap && { sourceMap }) });
+        return t.device.createShaderModule({ code: _code });
       },
       !valid
     );
@@ -171,25 +128,15 @@ g.test('line_number_and_position')
     Test that line numbers reported by compilationInfo either point at an appropriate line and
     position or at 0:0, indicating an unknown position.
 
-    Note: sourcemaps are not used in the WebGPU API. We are only testing that
-    browser that happen to use them don't fail or crash if the sourcemap is
-    bad or invalid.
-
     - Test for invalid shader modules containing containing at least one error.
     - Test for shader modules containing only ASCII and those containing unicode characters.`
   )
-  .params(u =>
-    u
-      .combineWithParams(kInvalidShaderSources)
-      .beginSubcases()
-      .combine('sourceMapName', kSourceMapsKeys)
-  )
+  .params(u => u.combineWithParams(kInvalidShaderSources))
   .fn(async t => {
-    const { _code, _errorLine, _errorLinePos, sourceMapName } = t.params;
+    const { _code, _errorLine, _errorLinePos } = t.params;
 
     const shaderModule = t.expectGPUError('validation', () => {
-      const sourceMap = kSourceMaps[sourceMapName];
-      return t.device.createShaderModule({ code: _code, ...(sourceMap && { sourceMap }) });
+      return t.device.createShaderModule({ code: _code });
     });
 
     const info = await shaderModule.getCompilationInfo();
@@ -232,24 +179,17 @@ g.test('offset_and_length')
   .desc(
     `Test that message offsets and lengths are valid and align with any reported lineNum and linePos.
 
-     Note: sourcemaps are not used in the WebGPU API. We are only testing that
-     browser that happen to use them don't fail or crash if the sourcemap is
-     bad or invalid.
-
     - Test for valid and invalid shader modules.
     - Test for shader modules containing only ASCII and those containing unicode characters.`
   )
-  .params(u =>
-    u.combineWithParams(kAllShaderSources).beginSubcases().combine('sourceMapName', kSourceMapsKeys)
-  )
+  .params(u => u.combineWithParams(kAllShaderSources))
   .fn(async t => {
-    const { _code, valid, sourceMapName } = t.params;
+    const { _code, valid } = t.params;
 
     const shaderModule = t.expectGPUError(
       'validation',
       () => {
-        const sourceMap = kSourceMaps[sourceMapName];
-        return t.device.createShaderModule({ code: _code, ...(sourceMap && { sourceMap }) });
+        return t.device.createShaderModule({ code: _code });
       },
       !valid
     );
diff --git a/src/webgpu/api/validation/capability_checks/features/clip_distances.spec.ts b/src/webgpu/api/validation/capability_checks/features/clip_distances.spec.ts
new file mode 100644
index 000000000000..02ffd9e99db6
--- /dev/null
+++ b/src/webgpu/api/validation/capability_checks/features/clip_distances.spec.ts
@@ -0,0 +1,161 @@
+import { range } from '../../../../../common/util/util.js';
+import { align } from '../../../../util/math.js';
+import { kMaximumLimitBaseParams, makeLimitTestGroup } from '../limits/limit_utils.js';
+
+function getPipelineDescriptorWithClipDistances(
+  device: GPUDevice,
+  interStageShaderVariables: number,
+  pointList: boolean,
+  clipDistances: number,
+  startLocation: number = 0
+): GPURenderPipelineDescriptor {
+  const vertexOutputVariables =
+    interStageShaderVariables - (pointList ? 1 : 0) - align(clipDistances, 4) / 4;
+  const maxVertexOutputVariables =
+    device.limits.maxInterStageShaderVariables - (pointList ? 1 : 0) - align(clipDistances, 4) / 4;
+
+  const varyings = `
+      ${range(
+        vertexOutputVariables,
+        i => `@location(${i + startLocation}) v4_${i + startLocation}: vec4f,`
+      ).join('\n')}
+  `;
+
+  const code = `
+    // test value                        : ${interStageShaderVariables}
+    // maxInterStageShaderVariables     : ${device.limits.maxInterStageShaderVariables}
+    // num variables in vertex shader : ${vertexOutputVariables}${
+      pointList ? ' + point-list' : ''
+    }${
+      clipDistances > 0
+        ? ` + ${align(clipDistances, 4) / 4} (clip_distances[${clipDistances}])`
+        : ''
+    }
+    // maxInterStageVariables:           : ${maxVertexOutputVariables}
+    // num used inter stage variables    : ${vertexOutputVariables}
+    // vertex output start location      : ${startLocation}
+
+    enable clip_distances;
+
+    struct VSOut {
+      @builtin(position) p: vec4f,
+      ${varyings}
+      ${
+        clipDistances > 0
+          ? `@builtin(clip_distances) clipDistances: array<f32, ${clipDistances}>,`
+          : ''
+      }
+    }
+    struct FSIn {
+      ${varyings}
+    }
+    struct FSOut {
+      @location(0) color: vec4f,
+    }
+    @vertex fn vs() -> VSOut {
+      var o: VSOut;
+      o.p = vec4f(0);
+      return o;
+    }
+    @fragment fn fs(i: FSIn) -> FSOut {
+      var o: FSOut;
+      o.color = vec4f(0);
+      return o;
+    }
+  `;
+  const module = device.createShaderModule({ code });
+  const pipelineDescriptor: GPURenderPipelineDescriptor = {
+    layout: 'auto',
+    primitive: {
+      topology: pointList ? 'point-list' : 'triangle-list',
+    },
+    vertex: {
+      module,
+    },
+    fragment: {
+      module,
+      targets: [
+        {
+          format: 'rgba8unorm',
+        },
+      ],
+    },
+  };
+  return pipelineDescriptor;
+}
+
+const limit = 'maxInterStageShaderVariables';
+export const { g, description } = makeLimitTestGroup(limit);
+
+g.test('createRenderPipeline,at_over')
+  .desc(`Test using at and over ${limit} limit with clip_distances in createRenderPipeline(Async)`)
+  .params(
+    kMaximumLimitBaseParams
+      .combine('async', [false, true])
+      .combine('pointList', [false, true])
+      .combine('clipDistances', [1, 2, 3, 4, 5, 6, 7, 8])
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('clip-distances');
+  })
+  .fn(async t => {
+    const { limitTest, testValueName, async, pointList, clipDistances } = t.params;
+    await t.testDeviceWithRequestedMaximumLimits(
+      limitTest,
+      testValueName,
+      async ({ device, testValue, shouldError }) => {
+        const pipelineDescriptor = getPipelineDescriptorWithClipDistances(
+          device,
+          testValue,
+          pointList,
+          clipDistances
+        );
+
+        await t.testCreateRenderPipeline(pipelineDescriptor, async, shouldError);
+      },
+      undefined,
+      ['clip-distances']
+    );
+  });
+
+g.test('createRenderPipeline,max_vertex_output_location')
+  .desc(`Test using clip_distances will limit the maximum value of vertex output location`)
+  .params(u =>
+    u
+      .combine('pointList', [false, true])
+      .combine('clipDistances', [1, 2, 3, 4, 5, 6, 7, 8])
+      .combine('startLocation', [0, 1, 2])
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('clip-distances');
+  })
+  .fn(async t => {
+    const { pointList, clipDistances, startLocation } = t.params;
+
+    const maxInterStageShaderVariables = t.adapter.limits.maxInterStageShaderVariables;
+    const deviceInTest = await t.requestDeviceTracked(t.adapter, {
+      requiredFeatures: ['clip-distances'],
+      requiredLimits: {
+        maxInterStageShaderVariables: t.adapter.limits.maxInterStageShaderVariables,
+      },
+    });
+    const pipelineDescriptor = getPipelineDescriptorWithClipDistances(
+      deviceInTest,
+      maxInterStageShaderVariables,
+      pointList,
+      clipDistances,
+      startLocation
+    );
+    const vertexOutputVariables =
+      maxInterStageShaderVariables - (pointList ? 1 : 0) - align(clipDistances, 4) / 4;
+    const maxLocationInTest = startLocation + vertexOutputVariables - 1;
+    const maxAllowedLocation = maxInterStageShaderVariables - 1 - align(clipDistances, 4) / 4;
+    const shouldError = maxLocationInTest > maxAllowedLocation;
+
+    deviceInTest.pushErrorScope('validation');
+    deviceInTest.createRenderPipeline(pipelineDescriptor);
+    const error = await deviceInTest.popErrorScope();
+    t.expect(!!error === shouldError, `${error?.message || 'no error when one was expected'}`);
+
+    deviceInTest.destroy();
+  });
diff --git a/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts b/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts
index ea44b11c9148..14f1642cea9f 100644
--- a/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts
+++ b/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts
@@ -535,11 +535,16 @@ export class LimitTestsImpl extends GPUTestBase {
     limitTest: MaximumLimitValueTest,
     testValueName: MaximumTestValue,
     fn: (inputs: MaximumLimitTestInputs) => void | Promise<void>,
-    extraLimits?: LimitsRequest
+    extraLimits?: LimitsRequest,
+    extraFeatures: GPUFeatureName[] = []
   ) {
     assert(!this._device);
 
-    const deviceAndLimits = await this._getDeviceWithRequestedMaximumLimit(limitTest, extraLimits);
+    const deviceAndLimits = await this._getDeviceWithRequestedMaximumLimit(
+      limitTest,
+      extraLimits,
+      extraFeatures
+    );
     // If we request over the limit requestDevice will throw
     if (!deviceAndLimits) {
       return;
diff --git a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderComponents.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderComponents.spec.ts
deleted file mode 100644
index 1963d9f28c6c..000000000000
--- a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderComponents.spec.ts
+++ /dev/null
@@ -1,153 +0,0 @@
-import { range } from '../../../../../common/util/util.js';
-
-import { kMaximumLimitBaseParams, LimitsRequest, makeLimitTestGroup } from './limit_utils.js';
-
-function getPipelineDescriptor(
-  device: GPUDevice,
-  testValue: number,
-  pointList: boolean,
-  frontFacing: boolean,
-  sampleIndex: boolean,
-  sampleMaskIn: boolean,
-  sampleMaskOut: boolean
-): { pipelineDescriptor: GPURenderPipelineDescriptor; code: string } {
-  const success = testValue <= device.limits.maxInterStageShaderComponents;
-
-  const maxVertexOutputComponents =
-    device.limits.maxInterStageShaderComponents - (pointList ? 1 : 0);
-  const maxFragmentInputComponents =
-    device.limits.maxInterStageShaderComponents -
-    (frontFacing ? 1 : 0) -
-    (sampleIndex ? 1 : 0) -
-    (sampleMaskIn ? 1 : 0);
-  const maxOutputComponents = Math.min(maxVertexOutputComponents, maxFragmentInputComponents);
-  const maxInterStageVariables = Math.floor(maxOutputComponents / 4);
-  const maxUserDefinedVertexComponents = Math.floor(maxVertexOutputComponents / 4) * 4;
-  const maxUserDefinedFragmentComponents = Math.floor(maxFragmentInputComponents / 4) * 4;
-
-  const numInterStageVariables = success ? maxInterStageVariables : maxInterStageVariables + 1;
-  const numUserDefinedComponents = numInterStageVariables * 4;
-
-  const varyings = `
-      ${range(numInterStageVariables, i => `@location(${i}) v4_${i}: vec4f,`).join('\n')}
-  `;
-
-  const code = `
-    // test value                        : ${testValue}
-    // maxInterStageShaderComponents     : ${device.limits.maxInterStageShaderComponents}
-    // num components in vertex shader   : ${numUserDefinedComponents}${
-      pointList ? ' + point-list' : ''
-    }
-    // num components in fragment shader : ${numUserDefinedComponents}${
-      frontFacing ? ' + front-facing' : ''
-    }${sampleIndex ? ' + sample_index' : ''}${sampleMaskIn ? ' + sample_mask' : ''}
-    // maxUserDefinedVertexShaderOutputComponents   : ${maxUserDefinedVertexComponents}
-    // maxUserDefinedFragmentShaderInputComponents  : ${maxUserDefinedFragmentComponents}
-    // maxInterStageVariables:           : ${maxInterStageVariables}
-    // num used inter stage variables    : ${numInterStageVariables}
-
-    struct VSOut {
-      @builtin(position) p: vec4f,
-      ${varyings}
-    }
-    struct FSIn {
-      ${frontFacing ? '@builtin(front_facing) frontFacing: bool,' : ''}
-      ${sampleIndex ? '@builtin(sample_index) sampleIndex: u32,' : ''}
-      ${sampleMaskIn ? '@builtin(sample_mask) sampleMask: u32,' : ''}
-      ${varyings}
-    }
-    struct FSOut {
-      @location(0) color: vec4f,
-      ${sampleMaskOut ? '@builtin(sample_mask) sampleMask: u32,' : ''}
-    }
-    @vertex fn vs() -> VSOut {
-      var o: VSOut;
-      o.p = vec4f(0);
-      return o;
-    }
-    @fragment fn fs(i: FSIn) -> FSOut {
-      var o: FSOut;
-      o.color = vec4f(0);
-      return o;
-    }
-  `;
-  const module = device.createShaderModule({ code });
-  const pipelineDescriptor: GPURenderPipelineDescriptor = {
-    layout: 'auto',
-    primitive: {
-      topology: pointList ? 'point-list' : 'triangle-list',
-    },
-    vertex: {
-      module,
-      entryPoint: 'vs',
-    },
-    fragment: {
-      module,
-      entryPoint: 'fs',
-      targets: [
-        {
-          format: 'rgba8unorm',
-        },
-      ],
-    },
-  };
-  return { pipelineDescriptor, code };
-}
-
-const limit = 'maxInterStageShaderComponents';
-export const { g, description } = makeLimitTestGroup(limit);
-
-g.test('createRenderPipeline,at_over')
-  .desc(`Test using at and over ${limit} limit in createRenderPipeline(Async)`)
-  .params(
-    kMaximumLimitBaseParams
-      .combine('async', [false, true])
-      .combine('pointList', [false, true])
-      .combine('frontFacing', [false, true])
-      .combine('sampleIndex', [false, true])
-      .combine('sampleMaskIn', [false, true])
-      .combine('sampleMaskOut', [false, true])
-  )
-  .beforeAllSubcases(t => {
-    if (t.isCompatibility) {
-      t.skipIf(
-        t.params.sampleMaskIn || t.params.sampleMaskOut,
-        'sample_mask not supported in compatibility mode'
-      );
-      t.skipIf(t.params.sampleIndex, 'sample_index not supported in compatibility mode');
-    }
-  })
-  .fn(async t => {
-    const {
-      limitTest,
-      testValueName,
-      async,
-      pointList,
-      frontFacing,
-      sampleIndex,
-      sampleMaskIn,
-      sampleMaskOut,
-    } = t.params;
-    // Request the largest value of maxInterStageShaderVariables to allow the test using as many
-    // inter-stage shader components as possible without being limited by
-    // maxInterStageShaderVariables.
-    const extraLimits: LimitsRequest = { maxInterStageShaderVariables: 'adapterLimit' };
-    await t.testDeviceWithRequestedMaximumLimits(
-      limitTest,
-      testValueName,
-      async ({ device, testValue, shouldError }) => {
-        const { pipelineDescriptor, code } = getPipelineDescriptor(
-          device,
-          testValue,
-          pointList,
-          frontFacing,
-          sampleIndex,
-          sampleMaskIn,
-          sampleMaskOut
-        );
-
-        await t.testCreateRenderPipeline(pipelineDescriptor, async, shouldError, code);
-      },
-      extraLimits
-    );
-  });
diff --git a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts
index e54b7f7df178..5298e8c21587 100644
--- a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts
+++ b/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts
@@ -1,26 +1,86 @@
+import { range } from '../../../../../common/util/util.js';
+
 import { kMaximumLimitBaseParams, makeLimitTestGroup } from './limit_utils.js';
 
-function getPipelineDescriptor(device: GPUDevice, testValue: number): GPURenderPipelineDescriptor {
+function getPipelineDescriptor(
+  device: GPUDevice,
+  testValue: number,
+  pointList: boolean,
+  frontFacing: boolean,
+  sampleIndex: boolean,
+  sampleMaskIn: boolean,
+  sampleMaskOut: boolean
+): GPURenderPipelineDescriptor {
+  const vertexOutputVariables = testValue - (pointList ? 1 : 0);
+  const fragmentInputVariables = testValue - (frontFacing || sampleIndex || sampleMaskIn ? 1 : 0);
+  const numInterStageVariables = Math.min(vertexOutputVariables, fragmentInputVariables);
+
+  const maxVertexOutputVariables = device.limits.maxInterStageShaderVariables - (pointList ? 1 : 0);
+  const maxFragmentInputVariables =
+    device.limits.maxInterStageShaderVariables -
+    (frontFacing || sampleIndex || sampleMaskIn ? 1 : 0);
+  const maxInterStageVariables = Math.min(maxVertexOutputVariables, maxFragmentInputVariables);
+
+  const varyings = `
+      ${range(numInterStageVariables, i => `@location(${i}) v4_${i}: vec4f,`).join('\n')}
+  `;
+
   const code = `
+    // test value                        : ${testValue}
+    // maxInterStageShaderVariables     : ${device.limits.maxInterStageShaderVariables}
+    // num variables in vertex shader : ${vertexOutputVariables}${pointList ? ' + point-list' : ''}
+    // num variables in fragment shader : ${fragmentInputVariables}${
+      frontFacing ? ' + front-facing' : ''
+    }${sampleIndex ? ' + sample_index' : ''}${sampleMaskIn ? ' + sample_mask' : ''}
+    // maxInterStageVariables:           : ${maxInterStageVariables}
+    // num used inter stage variables    : ${numInterStageVariables}
+
     struct VSOut {
       @builtin(position) p: vec4f,
-      @location(${testValue}) v: f32,
+      ${varyings}
+    }
+    struct FSIn {
+      ${frontFacing ? '@builtin(front_facing) frontFacing: bool,' : ''}
+      ${sampleIndex ? '@builtin(sample_index) sampleIndex: u32,' : ''}
+      ${sampleMaskIn ? '@builtin(sample_mask) sampleMask: u32,' : ''}
+      ${varyings}
+    }
+    struct FSOut {
+      @location(0) color: vec4f,
+      ${sampleMaskOut ? '@builtin(sample_mask) sampleMask: u32,' : ''}
     }
     @vertex fn vs() -> VSOut {
       var o: VSOut;
       o.p = vec4f(0);
-      o.v = 1.0;
+      return o;
+    }
+    @fragment fn fs(i: FSIn) -> FSOut {
+      var o: FSOut;
+      o.color = vec4f(0);
       return o;
     }
   `;
   const module = device.createShaderModule({ code });
-  return {
+  const pipelineDescriptor: GPURenderPipelineDescriptor = {
     layout: 'auto',
+    primitive: {
+      topology: pointList ? 'point-list' : 'triangle-list',
+    },
     vertex: {
       module,
       entryPoint: 'vs',
     },
+    fragment: {
+      module,
+      entryPoint: 'fs',
+      targets: [
+        {
+          format: 'rgba8unorm',
+        },
+      ],
+    },
   };
+  return pipelineDescriptor;
 }
 
 const limit = 'maxInterStageShaderVariables';
@@ -28,15 +88,48 @@ export const { g, description } = makeLimitTestGroup(limit);
 
 g.test('createRenderPipeline,at_over')
   .desc(`Test using at and over ${limit} limit in createRenderPipeline(Async)`)
-  .params(kMaximumLimitBaseParams.combine('async', [false, true]))
+  .params(
+    kMaximumLimitBaseParams
+      .combine('async', [false, true])
+      .combine('pointList', [false, true])
+      .combine('frontFacing', [false, true])
+      .combine('sampleIndex', [false, true])
+      .combine('sampleMaskIn', [false, true])
+      .combine('sampleMaskOut', [false, true])
+  )
+  .beforeAllSubcases(t => {
+    if (t.isCompatibility) {
+      t.skipIf(
+        t.params.sampleMaskIn || t.params.sampleMaskOut,
+        'sample_mask not supported in compatibility mode'
+      );
+      t.skipIf(t.params.sampleIndex, 'sample_index not supported in compatibility mode');
+    }
+  })
   .fn(async t => {
-    const { limitTest, testValueName, async } = t.params;
+    const {
+      limitTest,
+      testValueName,
+      async,
+      pointList,
+      frontFacing,
+      sampleIndex,
+      sampleMaskIn,
+      sampleMaskOut,
+    } = t.params;
     await t.testDeviceWithRequestedMaximumLimits(
       limitTest,
       testValueName,
       async ({ device, testValue, shouldError }) => {
-        const lastIndex = testValue - 1;
-        const pipelineDescriptor = getPipelineDescriptor(device, lastIndex);
+        const pipelineDescriptor = getPipelineDescriptor(
+          device,
+          testValue,
+          pointList,
+          frontFacing,
+          sampleIndex,
+          sampleMaskIn,
+          sampleMaskOut
+        );
 
         await t.testCreateRenderPipeline(pipelineDescriptor, async, shouldError);
       }
diff --git a/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts
index 9e5aaa144bfa..b37cc9230931 100644
--- a/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts
+++ b/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts
@@ -19,6 +19,7 @@ function getPipelineDescriptor(device: GPUDevice, lastIndex: number): GPURenderP
         },
       ],
     },
+    depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
   };
 }
 
diff --git a/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts
index 0af5724f2a2a..be9c7ffd7f7b 100644
--- a/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts
+++ b/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts
@@ -32,6 +32,7 @@ function getPipelineDescriptor(device: GPUDevice, testValue: number): GPURenderP
         },
       ],
     },
+    depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
   };
 }
 
diff --git a/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts
index 9a4108cb0c08..02701de0d1e9 100644
--- a/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts
+++ b/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts
@@ -19,6 +19,7 @@ function getPipelineDescriptor(device: GPUDevice, testValue: number): GPURenderP
       module,
       buffers,
     },
+    depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
   };
 }
 
diff --git a/src/webgpu/api/validation/layout_shader_compat.spec.ts b/src/webgpu/api/validation/layout_shader_compat.spec.ts
index 2b5e609c55d6..5ee16510c77a 100644
--- a/src/webgpu/api/validation/layout_shader_compat.spec.ts
+++ b/src/webgpu/api/validation/layout_shader_compat.spec.ts
@@ -253,6 +253,7 @@ g.test('pipeline_layout_shader_exact_match')
               code: vertexShader,
             }),
           },
+          depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
         });
         break;
       }
diff --git a/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts b/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts
index 1a8dec37464f..a3af6d675ac3 100644
--- a/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts
+++ b/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts
@@ -273,39 +273,29 @@ g.test('max_shader_variable_location')
     t.doCreateRenderPipelineTest(isAsync, location < maxInterStageShaderVariables, descriptor);
   });
 
-g.test('max_components_count,output')
+g.test('max_variables_count,output')
   .desc(
-    `Tests that validation should fail when scalar components of all user-defined outputs > max vertex shader output components.`
+    `Tests that validation should fail when all user-defined outputs > max vertex shader output
+    variables.`
   )
   .params(u =>
     u.combine('isAsync', [false, true]).combineWithParams([
-      // Number of user-defined output scalar components in test shader =
-      //     Math.floor((device.limits.maxInterStageShaderComponents + numScalarDelta) / 4) * 4.
-      { numScalarDelta: 0, topology: 'triangle-list', _success: true },
-      { numScalarDelta: 1, topology: 'triangle-list', _success: false },
-      { numScalarDelta: 0, topology: 'point-list', _success: false },
-      { numScalarDelta: -1, topology: 'point-list', _success: false },
-      { numScalarDelta: -3, topology: 'point-list', _success: false },
-      { numScalarDelta: -4, topology: 'point-list', _success: true },
+      // Number of user-defined output variables in test shader =
+      //     device.limits.maxInterStageShaderVariables + numVariablesDelta
+      { numVariablesDelta: 0, topology: 'triangle-list', _success: true },
+      { numVariablesDelta: 1, topology: 'triangle-list', _success: false },
+      { numVariablesDelta: 0, topology: 'point-list', _success: false },
+      { numVariablesDelta: -1, topology: 'point-list', _success: true },
     ] as const)
   )
   .fn(t => {
-    const { isAsync, numScalarDelta, topology, _success } = t.params;
+    const { isAsync, numVariablesDelta, topology, _success } = t.params;
 
-    const numScalarComponents = t.device.limits.maxInterStageShaderComponents + numScalarDelta;
-
-    const numVec4 = Math.floor(numScalarComponents / 4);
-    const numTrailingScalars = numScalarComponents % 4;
+    const numVec4 = t.device.limits.maxInterStageShaderVariables + numVariablesDelta;
 
     const outputs = range(numVec4, i => `@location(${i}) vout${i}: vec4<f32>`);
     const inputs = range(numVec4, i => `@location(${i}) fin${i}: vec4<f32>`);
 
-    if (numTrailingScalars > 0) {
-      const typeString = numTrailingScalars === 1 ? 'f32' : `vec${numTrailingScalars}<f32>`;
-      outputs.push(`@location(${numVec4}) vout${numVec4}: ${typeString}`);
-      inputs.push(`@location(${numVec4}) fin${numVec4}: ${typeString}`);
-    }
-
     const descriptor = t.getDescriptorWithStates(
       t.getVertexStateWithOutputs(outputs),
       t.getFragmentStateWithInputs(inputs)
@@ -315,42 +305,32 @@ g.test('max_components_count,output')
     t.doCreateRenderPipelineTest(isAsync, _success, descriptor);
   });
 
-g.test('max_components_count,input')
+g.test('max_variables_count,input')
   .desc(
-    `Tests that validation should fail when scalar components of all user-defined inputs > max vertex shader output components.`
+    `Tests that validation should fail when all user-defined inputs > max vertex shader output
+    variables.`
   )
   .params(u =>
     u.combine('isAsync', [false, true]).combineWithParams([
-      // Number of user-defined input scalar components in test shader =
-      //     Math.floor((device.limits.maxInterStageShaderComponents + numScalarDelta) / 4) * 4.
-      { numScalarDelta: 0, useExtraBuiltinInputs: false },
-      { numScalarDelta: 1, useExtraBuiltinInputs: false },
-      { numScalarDelta: 0, useExtraBuiltinInputs: true },
-      { numScalarDelta: -3, useExtraBuiltinInputs: true },
-      { numScalarDelta: -4, useExtraBuiltinInputs: true },
+      // Number of user-defined output variables in test shader =
+      //     device.limits.maxInterStageShaderVariables + numVariablesDelta
+      { numVariablesDelta: 0, useExtraBuiltinInputs: false },
+      { numVariablesDelta: 1, useExtraBuiltinInputs: false },
+      { numVariablesDelta: 0, useExtraBuiltinInputs: true },
+      { numVariablesDelta: -1, useExtraBuiltinInputs: true },
     ] as const)
   )
   .fn(t => {
-    const { isAsync, numScalarDelta, useExtraBuiltinInputs } = t.params;
+    const { isAsync, numVariablesDelta, useExtraBuiltinInputs } = t.params;
 
-    const numScalarComponents =
-      Math.floor((t.device.limits.maxInterStageShaderComponents + numScalarDelta) / 4) * 4;
-    const numExtraComponents = useExtraBuiltinInputs ? (t.isCompatibility ? 2 : 3) : 0;
-    const numUsedComponents = numScalarComponents + numExtraComponents;
-    const success = numUsedComponents <= t.device.limits.maxInterStageShaderComponents;
-
-    const numVec4 = Math.floor(numScalarComponents / 4);
-    const numTrailingScalars = numScalarComponents % 4;
+    const numVec4 = t.device.limits.maxInterStageShaderVariables + numVariablesDelta;
+    const numExtraVariables = useExtraBuiltinInputs ? 1 : 0;
+    const numUsedVariables = numVec4 + numExtraVariables;
+    const success = numUsedVariables <= t.device.limits.maxInterStageShaderVariables;
 
     const outputs = range(numVec4, i => `@location(${i}) vout${i}: vec4<f32>`);
     const inputs = range(numVec4, i => `@location(${i}) fin${i}: vec4<f32>`);
 
-    if (numTrailingScalars > 0) {
-      const typeString = numTrailingScalars === 1 ? 'f32' : `vec${numTrailingScalars}<f32>`;
-      outputs.push(`@location(${numVec4}) vout${numVec4}: ${typeString}`);
-      inputs.push(`@location(${numVec4}) fin${numVec4}: ${typeString}`);
-    }
-
     if (useExtraBuiltinInputs) {
       inputs.push('@builtin(front_facing) front_facing_in: bool');
       if (!t.isCompatibility) {
diff --git a/src/webgpu/api/validation/render_pipeline/misc.spec.ts b/src/webgpu/api/validation/render_pipeline/misc.spec.ts
index 861eb4d24c7f..d10c7ca99985 100644
--- a/src/webgpu/api/validation/render_pipeline/misc.spec.ts
+++ b/src/webgpu/api/validation/render_pipeline/misc.spec.ts
@@ -36,7 +36,7 @@ g.test('no_attachment')
 g.test('vertex_state_only')
   .desc(
     `Tests creating vertex-state-only render pipeline. A vertex-only render pipeline has no fragment
-state (and thus has no color state), and can be created with or without depth stencil state.`
+state (and thus has no color state), and must have a depth-stencil state as an attachment is required.`
   )
   .params(u =>
     u
@@ -76,7 +76,7 @@ state (and thus has no color state), and can be created with or without depth st
       targets: hasColor ? [{ format: 'rgba8unorm' }] : [],
     });
 
-    t.doCreateRenderPipelineTest(isAsync, true, descriptor);
+    t.doCreateRenderPipelineTest(isAsync, depthStencilState !== undefined, descriptor);
   });
 
 g.test('pipeline_layout,device_mismatch')
diff --git a/src/webgpu/api/validation/shader_module/entry_point.spec.ts b/src/webgpu/api/validation/shader_module/entry_point.spec.ts
index c956dc302144..67dbef1851e1 100644
--- a/src/webgpu/api/validation/shader_module/entry_point.spec.ts
+++ b/src/webgpu/api/validation/shader_module/entry_point.spec.ts
@@ -128,6 +128,7 @@ and check that the APIs only accept matching entryPoint.
         module: t.device.createShaderModule({ code }),
         entryPoint,
       },
+      depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
     };
     let _success = true;
     if (shaderModuleStage !== 'vertex') {
@@ -258,6 +259,7 @@ an undefined entryPoint is valid if there's an extra shader stage.
         }),
         entryPoint: undefined,
       },
+      depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
     };
 
     const success = extraShaderModuleStage !== 'vertex';
diff --git a/src/webgpu/capability_info.ts b/src/webgpu/capability_info.ts
index 24a103dfb4b1..62ac621dc66e 100644
--- a/src/webgpu/capability_info.ts
+++ b/src/webgpu/capability_info.ts
@@ -730,7 +730,6 @@ const [kLimitInfoKeys, kLimitInfoDefaults, kLimitInfoData] =
   'maxBufferSize':                             [           , 268435456,       268435456, kMaxUnsignedLongLongValue],
   'maxVertexAttributes':                       [           ,        16,              16,                          ],
   'maxVertexBufferArrayStride':                [           ,      2048,            2048,                          ],
-  'maxInterStageShaderComponents':             [           ,        64,              60,                          ],
   'maxInterStageShaderVariables':              [           ,        16,              15,                          ],
 
   'maxColorAttachments':                       [           ,         8,               4,                          ],
@@ -818,19 +817,21 @@ export const kFeatureNameInfo: {
   readonly [k in GPUFeatureName]: {};
 } =
   /* prettier-ignore */ {
-  'bgra8unorm-storage':       {},
-  'depth-clip-control':       {},
-  'depth32float-stencil8':    {},
-  'texture-compression-bc':   {},
-  'texture-compression-etc2': {},
-  'texture-compression-astc': {},
-  'timestamp-query':          {},
-  'indirect-first-instance':  {},
-  'shader-f16':               {},
-  'rg11b10ufloat-renderable': {},
-  'float32-filterable':       {},
-  'clip-distances':           {},
-  'dual-source-blending':     {},
+  'bgra8unorm-storage':                 {},
+  'depth-clip-control':                 {},
+  'depth32float-stencil8':              {},
+  'texture-compression-bc':             {},
+  'texture-compression-bc-sliced-3d':   {},
+  'texture-compression-etc2':           {},
+  'texture-compression-astc':           {},
+  'texture-compression-astc-sliced-3d': {},
+  'timestamp-query':                    {},
+  'indirect-first-instance':            {},
+  'shader-f16':                         {},
+  'rg11b10ufloat-renderable':           {},
+  'float32-filterable':                 {},
+  'clip-distances':                     {},
+  'dual-source-blending':               {},
 };
 /** List of all GPUFeatureName values. */
 export const kFeatureNames = keysOf(kFeatureNameInfo);
diff --git a/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts b/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts
index 805203870903..a74e8900904c 100644
--- a/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts
+++ b/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts
@@ -232,6 +232,7 @@ g.test('unsupportedStorageTextureFormats,renderPipeline')
     t.doCreateRenderPipelineTest(async, isValid, {
       layout: 'auto',
       vertex: { module, entryPoint },
+      depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
     });
   });
 
@@ -267,7 +268,7 @@ g.test('textureLoad_with_depth_textures,computePipeline')
       `,
     });
 
-    const isValid = !t.isCompatibility;
+    const isValid = !t.isCompatibility || entryPoint === 'csWithoutDepthUsage';
     t.doCreateComputePipelineTest(async, isValid, {
       layout: 'auto',
       compute: { module, entryPoint },
@@ -301,9 +302,10 @@ g.test('textureLoad_with_depth_textures,renderPipeline')
       `,
     });
 
-    const isValid = !t.isCompatibility;
+    const isValid = !t.isCompatibility || entryPoint === 'vsWithoutDepthUsage';
     t.doCreateRenderPipelineTest(async, isValid, {
       layout: 'auto',
       vertex: { module, entryPoint },
+      depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' },
     });
   });
diff --git a/src/webgpu/listing_meta.json b/src/webgpu/listing_meta.json
index 85fe0bdc6a8b..b17a8c772873 100644
--- a/src/webgpu/listing_meta.json
+++ b/src/webgpu/listing_meta.json
@@ -283,6 +283,7 @@
   "webgpu:api,validation,capability_checks,features,texture_formats:texture_descriptor:*": { "subcaseMS": 3.830 },
   "webgpu:api,validation,capability_checks,features,texture_formats:texture_descriptor_view_formats:*": { "subcaseMS": 5.734 },
   "webgpu:api,validation,capability_checks,features,texture_formats:texture_view_descriptor:*": { "subcaseMS": 4.113 },
+  "webgpu:api,validation,capability_checks,features,clip_distances:createRenderPipeline,at_over:*": { "subcaseMS": 13.7 },
   "webgpu:api,validation,capability_checks,limits,maxBindGroups:createPipeline,at_over:*": { "subcaseMS": 10.990 },
   "webgpu:api,validation,capability_checks,limits,maxBindGroups:createPipelineLayout,at_over:*": { "subcaseMS": 9.310 },
   "webgpu:api,validation,capability_checks,limits,maxBindGroups:setBindGroup,at_over:*": { "subcaseMS": 9.984 },
@@ -313,7 +314,6 @@
   "webgpu:api,validation,capability_checks,limits,maxComputeWorkgroupsPerDimension:validate:*": { "subcaseMS": 138.900 },
   "webgpu:api,validation,capability_checks,limits,maxDynamicStorageBuffersPerPipelineLayout:createBindGroupLayout,at_over:*": { "subcaseMS": 15.680 },
   "webgpu:api,validation,capability_checks,limits,maxDynamicUniformBuffersPerPipelineLayout:createBindGroupLayout,at_over:*": { "subcaseMS": 10.268 },
-  "webgpu:api,validation,capability_checks,limits,maxInterStageShaderComponents:createRenderPipeline,at_over:*": { "subcaseMS": 12.916 },
   "webgpu:api,validation,capability_checks,limits,maxInterStageShaderVariables:createRenderPipeline,at_over:*": { "subcaseMS": 13.700 },
   "webgpu:api,validation,capability_checks,limits,maxSampledTexturesPerShaderStage:createBindGroupLayout,at_over:*": { "subcaseMS": 47.857 },
   "webgpu:api,validation,capability_checks,limits,maxSampledTexturesPerShaderStage:createPipeline,at_over:*": { "subcaseMS": 45.611 },
@@ -728,9 +728,9 @@
   "webgpu:api,validation,render_pipeline,inter_stage:location,mismatch:*": { "subcaseMS": 7.280 },
   "webgpu:api,validation,render_pipeline,inter_stage:location,subset:*": { "subcaseMS": 1.250 },
   "webgpu:api,validation,render_pipeline,inter_stage:location,superset:*": { "subcaseMS": 0.901 },
-  "webgpu:api,validation,render_pipeline,inter_stage:max_components_count,input:*": { "subcaseMS": 6.560 },
-  "webgpu:api,validation,render_pipeline,inter_stage:max_components_count,output:*": { "subcaseMS": 8.426 },
   "webgpu:api,validation,render_pipeline,inter_stage:max_shader_variable_location:*": { "subcaseMS": 11.050 },
+  "webgpu:api,validation,render_pipeline,inter_stage:max_variables_count,input:*": { "subcaseMS": 6.560 },
+  "webgpu:api,validation,render_pipeline,inter_stage:max_variables_count,output:*": { "subcaseMS": 8.426 },
   "webgpu:api,validation,render_pipeline,inter_stage:type:*": { "subcaseMS": 6.170 },
   "webgpu:api,validation,render_pipeline,misc:basic:*": { "subcaseMS": 0.901 },
   "webgpu:api,validation,render_pipeline,misc:external_texture:*": { "subcaseMS": 35.189 },
@@ -862,7 +862,6 @@
   "webgpu:compat,api,validation,encoding,programmable,pipeline_bind_group_compat:twoDifferentTextureViews,render_pass,used:*": { "subcaseMS": 0.000 },
   "webgpu:compat,api,validation,render_pipeline,depth_stencil_state:depthBiasClamp:*": { "subcaseMS": 1.604 },
   "webgpu:compat,api,validation,render_pipeline,fragment_state:colorState:*": { "subcaseMS": 32.604 },
-  "webgpu:compat,api,validation,render_pipeline,vertex_state:maxVertexAttributesVertexIndexInstanceIndex:*": { "subcaseMS": 3.700 },
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:interpolate:*": { "subcaseMS": 3.488 },
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:sample_index:*": { "subcaseMS": 0.487 },
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:sample_mask:*": { "subcaseMS": 0.408 },
@@ -870,6 +869,7 @@
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:textureLoad_with_depth_textures,renderPipeline:*": { "subcaseMS": 1.259 },
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:unsupportedStorageTextureFormats,computePipeline:*": { "subcaseMS": 1.206 },
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:unsupportedStorageTextureFormats,renderPipeline:*": { "subcaseMS": 1.206 },
+  "webgpu:compat,api,validation,render_pipeline,vertex_state:maxVertexAttributesVertexIndexInstanceIndex:*": { "subcaseMS": 3.700 },
   "webgpu:compat,api,validation,texture,createTexture:depthOrArrayLayers_incompatible_with_textureBindingViewDimension:*": { "subcaseMS": 12.712 },
   "webgpu:compat,api,validation,texture,createTexture:format_reinterpretation:*": { "subcaseMS": 7.012 },
   "webgpu:compat,api,validation,texture,createTexture:invalidTextureBindingViewDimension:*": { "subcaseMS": 6.022 },
@@ -1525,15 +1525,30 @@
   "webgpu:shader,execution,expression,call,builtin,step:f32:*": { "subcaseMS": 291.363 },
   "webgpu:shader,execution,expression,call,builtin,storageBarrier:barrier:*": { "subcaseMS": 0.801 },
   "webgpu:shader,execution,expression,call,builtin,storageBarrier:stage:*": { "subcaseMS": 2.402 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAdd:compute,split:*": { "subcaseMS": 2853.671 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAdd:data_types:*": { "subcaseMS": 9216.247 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAdd:fp_accuracy:*": { "subcaseMS": 9952.350 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAdd:fragment:*": { "subcaseMS": 0.229 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAll:compute,all_active:*": { "subcaseMS": 5162.414 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAll:compute,split:*": { "subcaseMS": 26610.627 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAll:fragment,all_active:*": { "subcaseMS": 0.172 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAll:fragment,split:*": { "subcaseMS": 0.327 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAny:compute,all_active:*": { "subcaseMS": 7028.394 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAny:compute,split:*": { "subcaseMS": 50.998 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAny:fragment,all_active:*": { "subcaseMS": 0.227 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupAny:fragment,split:*": { "subcaseMS": 0.309 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBallot:compute,split:*": { "subcaseMS": 38.740 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment,split:*": { "subcaseMS": 0.331 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment:*": { "subcaseMS": 0.059 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate:*": { "subcaseMS": 0.075 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate_and_control_flow:*": { "subcaseMS": 41.053 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:data_types:*": { "subcaseMS": 252.374 },
-  "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:dynamically_uniform_id:*": { "subcaseMS": 0.211 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:fragment:*": { "subcaseMS": 0.108 },
   "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:workgroup_uniform_load:*": { "subcaseMS": 109.832 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupMul:compute,split:*": { "subcaseMS": 5034.263 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupMul:data_types:*": { "subcaseMS": 11861.865 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupMul:fp_accuracy:*": { "subcaseMS": 35606.717 },
+  "webgpu:shader,execution,expression,call,builtin,subgroupMul:fragment:*": { "subcaseMS": 0.263 },
   "webgpu:shader,execution,expression,call,builtin,tan:abstract_float:*": { "subcaseMS": 17043.428 },
   "webgpu:shader,execution,expression,call,builtin,tan:f16:*": { "subcaseMS": 116.157 },
   "webgpu:shader,execution,expression,call,builtin,tan:f32:*": { "subcaseMS": 13.532 },
@@ -1554,8 +1569,8 @@
   "webgpu:shader,execution,expression,call,builtin,textureGather:sampled_array_3d_coords:*": { "subcaseMS": 60.700 },
   "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:array_2d_coords:*": { "subcaseMS": 291.301 },
   "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:array_3d_coords:*": { "subcaseMS": 191.101 },
-  "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_array_2d_coords:*": { "subcaseMS": 57.600 },
-  "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_array_3d_coords:*": { "subcaseMS": 10.101 },
+  "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_2d_coords:*": { "subcaseMS": 57.600 },
+  "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_3d_coords:*": { "subcaseMS": 10.101 },
   "webgpu:shader,execution,expression,call,builtin,textureLoad:arrayed:*": { "subcaseMS": 30.501 },
   "webgpu:shader,execution,expression,call,builtin,textureLoad:depth:*": { "subcaseMS": 3.200 },
   "webgpu:shader,execution,expression,call,builtin,textureLoad:external:*": { "subcaseMS": 1.401 },
@@ -2005,6 +2020,11 @@
   "webgpu:shader,validation,expression,binary,div_rem:scalar_vector:*": { "subcaseMS": 743.721 },
   "webgpu:shader,validation,expression,binary,div_rem:scalar_vector_out_of_range:*": { "subcaseMS": 650.727 },
   "webgpu:shader,validation,expression,binary,parse:all:*": { "subcaseMS": 527.287 },
+  "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_array_count_on_rhs:*": { "subcaseMS": 4.309 },
+  "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_rhs_const:*": { "subcaseMS": 4.341 },
+  "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_rhs_override:*": { "subcaseMS": 27.490 },
+  "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_types:*": { "subcaseMS": 13.409 },
+  "webgpu:shader,validation,expression,binary,short_circuiting_and_or:scalar_vector:*": { "subcaseMS": 397.769 },
   "webgpu:shader,validation,expression,call,builtin,abs:parameters:*": { "subcaseMS": 10.133 },
   "webgpu:shader,validation,expression,call,builtin,abs:values:*": { "subcaseMS": 0.391 },
   "webgpu:shader,validation,expression,call,builtin,acos:integer_argument:*": { "subcaseMS": 1.512 },
@@ -2226,6 +2246,22 @@
   "webgpu:shader,validation,expression,call,builtin,pow:invalid_argument:*": { "subcaseMS": 1.000 },
   "webgpu:shader,validation,expression,call,builtin,pow:must_use:*": { "subcaseMS": 1.000 },
   "webgpu:shader,validation,expression,call,builtin,pow:values:*": { "subcaseMS": 1.000 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:data_type:*": { "subcaseMS": 39.783 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:early_eval:*": { "subcaseMS": 63.825 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:id_constness:*": { "subcaseMS": 15.347 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:id_type:*": { "subcaseMS": 26.268 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:must_use:*": { "subcaseMS": 41.658 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:requires_subgroups:*": { "subcaseMS": 42.565 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:requires_subgroups_f16:*": { "subcaseMS": 44.998 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:return_type:*": { "subcaseMS": 363.607 },
+  "webgpu:shader,validation,expression,call,builtin,quadBroadcast:stage:*": { "subcaseMS": 3.050 },
+  "webgpu:shader,validation,expression,call,builtin,quadSwap:data_type:*": { "subcaseMS": 89.379 },
+  "webgpu:shader,validation,expression,call,builtin,quadSwap:early_eval:*": { "subcaseMS": 108.243 },
+  "webgpu:shader,validation,expression,call,builtin,quadSwap:must_use:*": { "subcaseMS": 5.557 },
+  "webgpu:shader,validation,expression,call,builtin,quadSwap:requires_subgroups:*": { "subcaseMS": 113.624 },
+  "webgpu:shader,validation,expression,call,builtin,quadSwap:requires_subgroups_f16:*": { "subcaseMS": 12.712 },
+  "webgpu:shader,validation,expression,call,builtin,quadSwap:return_type:*": { "subcaseMS": 1424.551 },
+  "webgpu:shader,validation,expression,call,builtin,quadSwap:stage:*": { "subcaseMS": 7.664 },
   "webgpu:shader,validation,expression,call,builtin,quantizeToF16:args:*": { "subcaseMS": 1.000 },
   "webgpu:shader,validation,expression,call,builtin,quantizeToF16:must_use:*": { "subcaseMS": 1.000 },
   "webgpu:shader,validation,expression,call,builtin,quantizeToF16:values:*": { "subcaseMS": 1.000 },
@@ -2278,16 +2314,73 @@
   "webgpu:shader,validation,expression,call,builtin,step:args:*": { "subcaseMS": 1.000 },
   "webgpu:shader,validation,expression,call,builtin,step:must_use:*": { "subcaseMS": 1.000 },
   "webgpu:shader,validation,expression,call,builtin,step:values:*": { "subcaseMS": 1.000 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAdd:data_type:*": { "subcaseMS": 32.897 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAdd:early_eval:*": { "subcaseMS": 101.800 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAdd:invalid_types:*": { "subcaseMS": 95.889 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAdd:must_use:*": { "subcaseMS": 62.933 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAdd:return_type:*": { "subcaseMS": 363.546 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAdd:stage:*": { "subcaseMS": 3.536 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:data_type:*": { "subcaseMS": 57.943 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:early_eval:*": { "subcaseMS": 173.714 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:must_use:*": { "subcaseMS": 4.592 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:requires_subgroups:*": { "subcaseMS": 73.866 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:return_type:*": { "subcaseMS": 39.388 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:stage:*": { "subcaseMS": 6.862 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBallot:data_type:*": { "subcaseMS": 115.557 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBallot:early_eval:*": { "subcaseMS": 52.992 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBallot:must_use:*": { "subcaseMS": 39.441 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBallot:requires_subgroups:*": { "subcaseMS": 36.819 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBallot:return_type:*": { "subcaseMS": 22.381 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBallot:stage:*": { "subcaseMS": 3.712 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:data_type:*": { "subcaseMS": 94.072 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:early_eval:*": { "subcaseMS": 569.598 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:must_use:*": { "subcaseMS": 6.172 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:requires_subgroups:*": { "subcaseMS": 108.478 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:return_type:*": { "subcaseMS": 1430.736 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:stage:*": { "subcaseMS": 11.858 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:data_type:*": { "subcaseMS": 97.991 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:early_eval:*": { "subcaseMS": 1.254 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:id_constness:*": { "subcaseMS": 7.026 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:id_type:*": { "subcaseMS": 24.703 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:must_use:*": { "subcaseMS": 232.030 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:requires_subgroups:*": { "subcaseMS": 47.231 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:requires_subgroups_f16:*": { "subcaseMS": 38.503 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:return_type:*": { "subcaseMS": 496.031 },
   "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:stage:*": { "subcaseMS": 3.715 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:data_type:*": { "subcaseMS": 32.168 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:early_eval:*": { "subcaseMS": 57.922 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:must_use:*": { "subcaseMS": 36.296 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:requires_subgroups:*": { "subcaseMS": 42.522 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:requires_subgroups_f16:*": { "subcaseMS": 47.111 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:return_type:*": { "subcaseMS": 402.558 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:stage:*": { "subcaseMS": 2.869 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupElect:data_type:*": { "subcaseMS": 72.441 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupElect:early_eval:*": { "subcaseMS": 56.115 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupElect:must_use:*": { "subcaseMS": 32.820 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupElect:requires_subgroups:*": { "subcaseMS": 35.595 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupElect:return_type:*": { "subcaseMS": 22.712 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupElect:stage:*": { "subcaseMS": 3.790 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:data_type:*": { "subcaseMS": 64.143 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:early_eval:*": { "subcaseMS": 551.671 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:must_use:*": { "subcaseMS": 4.403 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:requires_subgroups:*": { "subcaseMS": 87.208 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:requires_subgroups_f16:*": { "subcaseMS": 25.190 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:return_type:*": { "subcaseMS": 911.454 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:stage:*": { "subcaseMS": 6.395 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMul:data_type:*": { "subcaseMS": 45.396 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMul:early_eval:*": { "subcaseMS": 56.571 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMul:invalid_types:*": { "subcaseMS": 91.040 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMul:must_use:*": { "subcaseMS": 39.041 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMul:return_type:*": { "subcaseMS": 549.172 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupMul:stage:*": { "subcaseMS": 4.489 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:data_type:*": { "subcaseMS": 115.093 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:early_eval:*": { "subcaseMS": 110.489 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:must_use:*": { "subcaseMS": 7.628 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:param2_type:*": { "subcaseMS": 88.305 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:requires_subgroups:*": { "subcaseMS": 102.779 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:requires_subgroups_f16:*": { "subcaseMS": 13.121 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:return_type:*": { "subcaseMS": 1930.309 },
+  "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:stage:*": { "subcaseMS": 9.527 },
   "webgpu:shader,validation,expression,call,builtin,tan:args:*": { "subcaseMS": 43.560 },
   "webgpu:shader,validation,expression,call,builtin,tan:must_use:*": { "subcaseMS": 5.401 },
   "webgpu:shader,validation,expression,call,builtin,tan:values:*": { "subcaseMS": 0.350 },
diff --git a/src/webgpu/print_environment.spec.ts b/src/webgpu/print_environment.spec.ts
index 9790c770cefa..f3ca67d3a2c0 100644
--- a/src/webgpu/print_environment.spec.ts
+++ b/src/webgpu/print_environment.spec.ts
@@ -35,9 +35,7 @@ NOTE: If your test runtime elides logs when tests pass, you won't see the prints
 in the logs. On non-WPT runtimes, it will also print to the console with console.log.
 WPT disallows console.log and doesn't support logs on passing tests, so this does nothing on WPT.`
   )
-  .fn(async t => {
-    // MAINTENANCE_TODO: Remove requestAdapterInfo when info is implemented.
-    const adapterInfo = t.adapter.info || (await t.adapter.requestAdapterInfo());
+  .fn(t => {
     const isCompatibilityMode = (t.adapter as unknown as { isCompatibilityMode?: boolean })
       .isCompatibilityMode;
 
@@ -51,7 +49,7 @@ WPT disallows console.log and doesn't support logs on passing tests, so this doe
         adapter: {
           isFallbackAdapter: t.adapter.isFallbackAdapter,
           isCompatibilityMode,
-          info: adapterInfo,
+          info: t.adapter.info,
           features: Array.from(t.adapter.features),
           limits: t.adapter.limits,
         },
diff --git a/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts
index 42d8d09ff569..f65bb951bf25 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts
@@ -7,11 +7,16 @@ T is S or vecN<S>
 Returns the smooth Hermite interpolation between 0 and 1.
 Component-wise when T is a vector.
 For scalar T, the result is t * t * (3.0 - 2.0 * t), where t = clamp((x - low) / (high - low), 0.0, 1.0).
+
+If low >= high:
+* It is a shader-creation error if low and high are const-expressions.
+* It is a pipeline-creation error if low and high are override-expressions.
 `;
 
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
 import { GPUTest } from '../../../../../gpu_test.js';
-import { Type } from '../../../../../util/conversion.js';
+import { ScalarValue, Type, Value } from '../../../../../util/conversion.js';
+import { Case } from '../../case.js';
 import { allInputSources, onlyConstInputSource, run } from '../../expression.js';
 
 import { abstractFloatBuiltin, builtin } from './builtin.js';
@@ -19,6 +24,13 @@ import { d } from './smoothstep.cache.js';
 
 export const g = makeTestGroup(GPUTest);
 
+// Returns true if `c` is valid for a const evaluation of smoothstep.
+function validForConst(c: Case): boolean {
+  const low = (c.input as Value[])[0] as ScalarValue;
+  const high = (c.input as Value[])[1] as ScalarValue;
+  return low.value < high.value;
+}
+
 g.test('abstract_float')
   .specURL('https://www.w3.org/TR/WGSL/#float-builtin-functions')
   .desc(`abstract float tests`)
@@ -28,7 +40,7 @@ g.test('abstract_float')
       .combine('vectorize', [undefined, 2, 3, 4] as const)
   )
   .fn(async t => {
-    const cases = await d.get('abstract_const');
+    const cases = (await d.get('abstract_const')).filter(c => validForConst(c));
     await run(
       t,
       abstractFloatBuiltin('smoothstep'),
@@ -47,7 +59,15 @@ g.test('f32')
   )
   .fn(async t => {
     const cases = await d.get(t.params.inputSource === 'const' ? 'f32_const' : 'f32_non_const');
-    await run(t, builtin('smoothstep'), [Type.f32, Type.f32, Type.f32], Type.f32, t.params, cases);
+    const validCases = cases.filter(c => t.params.inputSource !== 'const' || validForConst(c));
+    await run(
+      t,
+      builtin('smoothstep'),
+      [Type.f32, Type.f32, Type.f32],
+      Type.f32,
+      t.params,
+      validCases
+    );
   });
 
 g.test('f16')
@@ -61,5 +81,13 @@ g.test('f16')
   })
   .fn(async t => {
     const cases = await d.get(t.params.inputSource === 'const' ? 'f16_const' : 'f16_non_const');
-    await run(t, builtin('smoothstep'), [Type.f16, Type.f16, Type.f16], Type.f16, t.params, cases);
+    const validCases = cases.filter(c => t.params.inputSource !== 'const' || validForConst(c));
+    await run(
+      t,
+      builtin('smoothstep'),
+      [Type.f16, Type.f16, Type.f16],
+      Type.f16,
+      t.params,
+      validCases
+    );
   });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts
new file mode 100644
index 000000000000..04792b2d98c1
--- /dev/null
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts
@@ -0,0 +1,364 @@
+export const description = `
+Execution tests for subgroupAdd, subgroupExclusiveAdd, and subgroupInclusiveAdd
+
+Note: There is a lack of portability for non-uniform execution so these tests
+restrict themselves to uniform control flow.
+Note: There is no guaranteed mapping between subgroup_invocation_id and
+local_invocation_index. Tests should avoid assuming there is.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { iterRange } from '../../../../../../common/util/util.js';
+import { GPUTest } from '../../../../../gpu_test.js';
+import {
+  kConcreteNumericScalarsAndVectors,
+  Type,
+  VectorType,
+  numberToFloatBits,
+  floatBitsToNumber,
+  kFloat32Format,
+  kFloat16Format,
+  scalarTypeOf,
+} from '../../../../../util/conversion.js';
+import { FP } from '../../../../../util/floating_point.js';
+
+import {
+  kNumCases,
+  kStride,
+  kWGSizes,
+  kPredicateCases,
+  runAccuracyTest,
+  runComputeTest,
+} from './subgroup_util.js';
+
+export const g = makeTestGroup(GPUTest);
+
+const kIdentity = 0;
+
+const kDataTypes = objectsToRecord(kConcreteNumericScalarsAndVectors);
+
+const kOperations = ['subgroupAdd', 'subgroupExclusiveAdd', 'subgroupInclusiveAdd'] as const;
+
+g.test('fp_accuracy')
+  .desc(
+    `Tests the accuracy of floating-point addition.
+
+The order of operations is implementation defined, most threads are filled with
+the identity value and two receive random values.
+Subgroup sizes are not known ahead of time so some cases may not perform any
+interesting operations. The test biases towards checking subgroup sizes under 64.
+These tests only check two values in order to reuse more of the existing infrastructure
+and limit the number of permutations needed to calculate the final result.`
+  )
+  .params(u =>
+    u
+      .combine('case', [...iterRange(kNumCases, x => x)])
+      .combine('type', ['f32', 'f16'] as const)
+      .combine('wgSize', [
+        [kStride, 1, 1],
+        [kStride / 2, 2, 1],
+      ] as const)
+  )
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName];
+    if (t.params.type === 'f16') {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(async t => {
+    await runAccuracyTest(
+      t,
+      t.params.case,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      'subgroupAdd',
+      t.params.type,
+      kIdentity,
+      t.params.type === 'f16' ? FP.f16.additionInterval : FP.f32.additionInterval
+    );
+  });
+
+/**
+ * Checks subgroup additions
+ *
+ * Expected results:
+ * - subgroupAdd: each invocation should have result equal to real subgroup size
+ * - subgroupExclusiveAdd: each invocation should have result equal to its subgroup invocation id
+ * - subgroupInclusiveAdd: each invocation should be equal to the result of subgroupExclusiveAdd plus the fill value
+ * @param metadata An array containing actual subgroup size per invocation followed by
+ *                 subgroup invocation id per invocation
+ * @param output An array of additions
+ * @param type The data type
+ * @param operation Type of addition
+ * @param expectedfillValue The original value used to fill the test array
+ */
+function checkAddition(
+  metadata: Uint32Array,
+  output: Uint32Array,
+  type: Type,
+  operation: 'subgroupAdd' | 'subgroupExclusiveAdd' | 'subgroupInclusiveAdd',
+  expectedfillValue: number
+): undefined | Error {
+  let numEles = 1;
+  if (type instanceof VectorType) {
+    numEles = type.width;
+  }
+  const scalarTy = scalarTypeOf(type);
+  const expectedOffset = operation === 'subgroupAdd' ? 0 : metadata.length / 2;
+  for (let i = 0; i < metadata.length / 2; i++) {
+    let expected = metadata[i + expectedOffset];
+    if (operation === 'subgroupInclusiveAdd') {
+      expected += expectedfillValue;
+    }
+
+    for (let j = 0; j < numEles; j++) {
+      let idx = i * numEles + j;
+      const isOdd = idx & 0x1;
+      if (scalarTy === Type.f16) {
+        idx = Math.floor(idx / 2);
+      }
+      let val = output[idx];
+      if (scalarTy === Type.f32) {
+        val = floatBitsToNumber(val, kFloat32Format);
+      } else if (scalarTy === Type.f16) {
+        if (isOdd) {
+          val = val >> 16;
+        }
+        val = floatBitsToNumber(val & 0xffff, kFloat16Format);
+      }
+      if (expected !== val) {
+        return new Error(`Invocation ${i}, component ${j}: incorrect result
+- expected: ${expected}
+-      got: ${val}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('data_types')
+  .desc(
+    `Tests subgroup addition for valid data types
+
+Tests a simple addition of all 1 values.
+Reductions expect result to be equal to actual subgroup size.
+Exclusice scans expect result to be equal subgroup invocation id.
+
+TODO: support vec3 types.
+  `
+  )
+  .params(u =>
+    u
+      .combine('type', keysOf(kDataTypes))
+      .filter(t => {
+        const type = kDataTypes[t.type];
+        if (type instanceof VectorType) {
+          return type.width !== 3;
+        }
+        return true;
+      })
+      .beginSubcases()
+      .combine('wgSize', kWGSizes)
+      .combine('operation', kOperations)
+  )
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName];
+    const type = kDataTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(async t => {
+    const type = kDataTypes[t.params.type];
+    let numEles = 1;
+    if (type instanceof VectorType) {
+      numEles = type.width;
+    }
+    const scalarType = scalarTypeOf(type);
+    let enables = 'enable subgroups;\n';
+    if (type.requiresF16()) {
+      enables += 'enable f16;\nenable subgroups_f16;\n';
+    }
+
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+${enables}
+
+@group(0) @binding(0)
+var<storage> inputs : array<${type.toString()}>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<${type.toString()}>;
+
+struct Metadata {
+  subgroup_size : array<u32, ${wgThreads}>,
+  subgroup_invocation_id : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+) {
+  // Record the actual subgroup size for this invocation.
+  // Note: subgroup_size builtin is always a power-of-2 and might be larger
+  // if the subgroup is not full.
+  let ballot = subgroupBallot(true);
+  var size = countOneBits(ballot.x);
+  size += countOneBits(ballot.y);
+  size += countOneBits(ballot.z);
+  size += countOneBits(ballot.w);
+  metadata.subgroup_size[lid] = size;
+
+  // Record subgroup invocation id for this invocation.
+  metadata.subgroup_invocation_id[lid] = id;
+
+  outputs[lid] = ${t.params.operation}(inputs[lid]);
+}`;
+    const expectedFillValue = 1;
+    let fillValue = expectedFillValue;
+    let numUints = wgThreads * numEles;
+    if (scalarType === Type.f32) {
+      fillValue = numberToFloatBits(1, kFloat32Format);
+    } else if (scalarType === Type.f16) {
+      const f16 = numberToFloatBits(1, kFloat16Format);
+      fillValue = f16 | (f16 << 16);
+      numUints = Math.ceil(numUints / 2);
+    }
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      numUints,
+      new Uint32Array([...iterRange(numUints, x => fillValue)]),
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkAddition(metadata, output, type, t.params.operation, expectedFillValue);
+      }
+    );
+  });
+
+g.test('fragment').unimplemented();
+
+/**
+ * Performs correctness checking for predicated additions
+ *
+ * Assumes the shader performs a predicated subgroup addition with the
+ * subgroup_invocation_id as the data.
+ *
+ * @param metadata An array containing subgroup sizes and subgroup invocation ids
+ * @param output An array containing the output results
+ * @param operation The type of addition
+ * @param filter A functor that mirrors the predication in the shader
+ */
+function checkPredicatedAddition(
+  metadata: Uint32Array,
+  output: Uint32Array,
+  operation: 'subgroupAdd' | 'subgroupExclusiveAdd' | 'subgroupInclusiveAdd',
+  filter: (id: number, size: number) => boolean
+): Error | undefined {
+  for (let i = 0; i < output.length; i++) {
+    const size = metadata[i];
+    const id = metadata[output.length + i];
+    let expected = 0;
+    if (filter(id, size)) {
+      const bound =
+        operation === 'subgroupInclusiveAdd' ? id + 1 : operation === 'subgroupAdd' ? size : id;
+      for (let j = 0; j < bound; j++) {
+        if (filter(j, size)) {
+          expected += j;
+        }
+      }
+    } else {
+      expected = 999;
+    }
+    if (expected !== output[i]) {
+      return new Error(`Invocation ${i}: incorrect result
+- expected: ${expected}
+-      got: ${output[i]}`);
+    }
+  }
+  return undefined;
+}
+
+g.test('compute,split')
+  .desc('Tests that only active invocations contribute to the operation')
+  .params(u =>
+    u
+      .combine('case', keysOf(kPredicateCases))
+      .beginSubcases()
+      .combine('operation', kOperations)
+      .combine('wgSize', kWGSizes)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const testcase = kPredicateCases[t.params.case];
+    const outputUintsPerElement = 1;
+    const inputData = new Uint32Array([0]); // no input data
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> input : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<u32>;
+
+struct Metadata {
+  subgroup_size : array<u32, ${wgThreads}>,
+  subgroup_invocation_id : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+) {
+  _ = input[0];
+
+  // Record the actual subgroup size for this invocation.
+  // Note: subgroup_size builtin is always a power-of-2 and might be larger
+  // if the subgroup is not full.
+  let ballot = subgroupBallot(true);
+  var subgroupSize = countOneBits(ballot.x);
+  subgroupSize += countOneBits(ballot.y);
+  subgroupSize += countOneBits(ballot.z);
+  subgroupSize += countOneBits(ballot.w);
+  metadata.subgroup_size[lid] = subgroupSize;
+
+  // Record subgroup invocation id for this invocation.
+  metadata.subgroup_invocation_id[lid] = id;
+
+  if ${testcase.cond} {
+    outputs[lid] = ${t.params.operation}(id);
+  } else {
+    return;
+  }
+}`;
+
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      outputUintsPerElement,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkPredicatedAddition(metadata, output, t.params.operation, testcase.filter);
+      }
+    );
+  });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts
new file mode 100644
index 000000000000..0aa461c4a578
--- /dev/null
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts
@@ -0,0 +1,390 @@
+export const description = `
+Execution tests for subgroupAll.
+
+Note: There is a lack of portability for non-uniform execution so these tests
+restrict themselves to uniform control flow.
+Note: There is no guaranteed mapping between subgroup_invocation_id and
+local_invocation_index. Tests should avoid assuming there is.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf } from '../../../../../../common/util/data_tables.js';
+import { iterRange } from '../../../../../../common/util/util.js';
+import { kTextureFormatInfo } from '../../../../../format_info.js';
+import { align } from '../../../../../util/math.js';
+import { PRNG } from '../../../../../util/prng.js';
+
+import {
+  kWGSizes,
+  kPredicateCases,
+  SubgroupTest,
+  kDataSentinel,
+  kFramebufferSizes,
+  runComputeTest,
+  runFragmentTest,
+} from './subgroup_util.js';
+
+export const g = makeTestGroup(SubgroupTest);
+
+const kNumCases = 15;
+
+/**
+ * Generate input data for testing.
+ *
+ * Data is generated in the following categories:
+ * Seed 0 generates all 0 data
+ * Seed 1 generates all 1 data
+ * Seeds 2-9 generates all 1s except for a zero randomly once per 32 elements
+ * Seeds 10+ generate all random data
+ * @param seed The seed for the PRNG
+ * @param num The number of data items to generate
+ */
+function generateInputData(seed: number, num: number): Uint32Array {
+  const prng = new PRNG(seed);
+
+  const bound = Math.min(num, 32);
+  const index = prng.uniformInt(bound);
+
+  return new Uint32Array([
+    ...iterRange(num, x => {
+      if (seed === 0) {
+        return 0;
+      } else if (seed === 1) {
+        return 1;
+      } else if (seed < 10) {
+        const bounded = x % bound;
+        return bounded === index ? 0 : 1;
+      }
+      return prng.uniformInt(2);
+    }),
+  ]);
+}
+
+/**
+ * Checks the result of a subgroupAll operation
+ *
+ * Since subgroup size depends on the pipeline compile, we calculate the expected
+ * results after execution. The shader generates a subgroup id and records it for
+ * each invocation. The check first calculates the expected result for each subgroup
+ * and then compares to the actual result for each invocation. The filter functor
+ * ensures only the correct invocations contribute to the calculation.
+ * @param metadata An array of uints:
+ *                 * first half containing subgroup sizes (from builtin value)
+ *                 * second half subgroup invocation id
+ * @param output An array of uints containing:
+ *               * first half is the outputs of subgroupAll
+ *               * second half is a generated subgroup id
+ * @param numInvs Number of invocations executed
+ * @param input The input data (equal size to output)
+ * @param filter A functor to filter active invocations
+ */
+function checkAll(
+  metadata: Uint32Array, // unused
+  output: Uint32Array,
+  numInvs: number,
+  input: Uint32Array,
+  filter: (id: number, size: number) => boolean
+): Error | undefined {
+  // First, generate expected results.
+  const expected = new Map<number, number>();
+  for (let inv = 0; inv < numInvs; inv++) {
+    const size = metadata[inv];
+    const id = metadata[inv + numInvs];
+    if (!filter(id, size)) {
+      continue;
+    }
+    const subgroup_id = output[numInvs + inv];
+    let v = expected.get(subgroup_id) ?? 1;
+    v &= input[inv];
+    expected.set(subgroup_id, v);
+  }
+
+  // Second, check against actual results.
+  for (let inv = 0; inv < numInvs; inv++) {
+    const size = metadata[inv];
+    const id = metadata[inv + numInvs];
+    const res = output[inv];
+    if (filter(id, size)) {
+      const subgroup_id = output[numInvs + inv];
+      const expected_v = expected.get(subgroup_id) ?? 0;
+      if (expected_v !== res) {
+        return new Error(`Invocation ${inv}:
+- expected: ${expected_v}
+-      got: ${res}`);
+      }
+    } else {
+      if (res !== kDataSentinel) {
+        return new Error(`Invocation ${inv} unexpected write:
+- subgroup invocation id: ${id}
+-          subgroup size: ${size}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('compute,all_active')
+  .desc(`Test compute subgroupAll`)
+  .params(u =>
+    u
+      .combine('wgSize', kWGSizes)
+      .beginSubcases()
+      .combine('case', [...iterRange(kNumCases, x => x)])
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<u32>;
+
+struct Metadata {
+  subgroup_size: array<u32, ${wgThreads}>,
+  subgroup_invocation_id: array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) subgroupSize : u32,
+) {
+  metadata.subgroup_size[lid] = subgroupSize;
+
+  metadata.subgroup_invocation_id[lid] = id;
+
+  // Record a representative subgroup id.
+  outputs[lid + ${wgThreads}] = subgroupBroadcastFirst(lid);
+
+  let res = select(0u, 1u, subgroupAll(bool(inputs[lid])));
+  outputs[lid] = res;
+}`;
+
+    const inputData = generateInputData(t.params.case, wgThreads);
+
+    const uintsPerOutput = 2;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      uintsPerOutput,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkAll(metadata, output, wgThreads, inputData, (id: number, size: number) => {
+          return true;
+        });
+      }
+    );
+  });
+
+g.test('compute,split')
+  .desc('Test that only active invocation participate')
+  .params(u =>
+    u
+      .combine('predicate', keysOf(kPredicateCases))
+      .beginSubcases()
+      .combine('wgSize', kWGSizes)
+      .combine('case', [...iterRange(kNumCases, x => x)])
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const testcase = kPredicateCases[t.params.predicate];
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<u32>;
+
+struct Metadata {
+  subgroup_size : array<u32, ${wgThreads}>,
+  subgroup_invocation_id : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) subgroupSize : u32,
+) {
+  metadata.subgroup_size[lid] = subgroupSize;
+
+  // Record subgroup invocation id for this invocation.
+  metadata.subgroup_invocation_id[lid] = id;
+
+  // Record a generated subgroup id.
+  outputs[${wgThreads} + lid] = subgroupBroadcastFirst(lid);
+
+  if ${testcase.cond} {
+    outputs[lid] = select(0u, 1u, subgroupAll(bool(inputs[lid])));
+  } else {
+    return;
+  }
+}`;
+
+    const inputData = generateInputData(t.params.case, wgThreads);
+
+    const uintsPerOutput = 2;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      uintsPerOutput,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkAll(metadata, output, wgThreads, inputData, testcase.filter);
+      }
+    );
+  });
+
+/**
+ * Checks subgroupAll results from a fragment shader.
+ *
+ * @param data Framebuffer output
+ *             * component 0 is result
+ *             * component 1 is generated subgroup id
+ * @param input An array of input data
+ * @param format The framebuffer format
+ * @param width Framebuffer width
+ * @param height Framebuffer height
+ */
+function checkFragmentAll(
+  data: Uint32Array,
+  input: Uint32Array,
+  format: GPUTextureFormat,
+  width: number,
+  height: number
+): Error | undefined {
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  const blocksPerRow = width / blockWidth;
+  // 256 minimum comes from image copy requirements.
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const uintsPerRow = bytesPerRow / 4;
+  const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4;
+
+  // Iteration skips last row and column to avoid helper invocations because it is not
+  // guaranteed whether or not they participate in the subgroup operation.
+  const expected = new Map<number, number>();
+  for (let row = 0; row < height - 1; row++) {
+    for (let col = 0; col < width - 1; col++) {
+      const offset = uintsPerRow * row + col * uintsPerTexel;
+      const subgroup_id = data[offset + 1];
+
+      if (subgroup_id === 0) {
+        return new Error(`Internal error: helper invocation at (${col}, ${row})`);
+      }
+
+      let v = expected.get(subgroup_id) ?? 1;
+      // First index of input is an atomic counter.
+      v &= input[row * width + col];
+      expected.set(subgroup_id, v);
+    }
+  }
+
+  for (let row = 0; row < height - 1; row++) {
+    for (let col = 0; col < width - 1; col++) {
+      const offset = uintsPerRow * row + col * uintsPerTexel;
+      const res = data[offset];
+      const subgroup_id = data[offset + 1];
+
+      if (subgroup_id === 0) {
+        // Inactive in the fragment.
+        continue;
+      }
+
+      const expected_v = expected.get(subgroup_id) ?? 0;
+      if (expected_v !== res) {
+        return new Error(`Row ${row}, col ${col}: incorrect results:
+- expected: ${expected_v}
+-      got: ${res}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('fragment,all_active')
+  .desc('Tests subgroupAll in fragment shaders')
+  .params(u =>
+    u
+      .combine('size', kFramebufferSizes)
+      .beginSubcases()
+      .combine('case', [...iterRange(kNumCases, x => x)])
+      .combineWithParams([{ format: 'rg32uint' }] as const)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const numInputs = t.params.size[0] * t.params.size[1];
+    const inputData = generateInputData(t.params.case, numInputs);
+
+    const fsShader = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage, read_write> inputs : array<u32>;
+
+@fragment
+fn main(
+  @builtin(position) pos : vec4f,
+) -> @location(0) vec2u {
+  // Generate a subgroup id based on linearized position, but avoid 0.
+  let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]};
+  var subgroup_id = linear + 1;
+  subgroup_id = subgroupBroadcastFirst(subgroup_id);
+
+  // Filter out possible helper invocations.
+  let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1);
+  let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1);
+  let in_range = x_in_range && y_in_range;
+  let input = select(1u, inputs[linear], in_range);
+
+  let res = select(0u, 1u, subgroupAll(bool(input)));
+  return vec2u(res, subgroup_id);
+}`;
+
+    await runFragmentTest(
+      t,
+      t.params.format,
+      fsShader,
+      t.params.size[0],
+      t.params.size[1],
+      inputData,
+      (data: Uint32Array) => {
+        return checkFragmentAll(
+          data,
+          inputData,
+          t.params.format,
+          t.params.size[0],
+          t.params.size[1]
+        );
+      }
+    );
+  });
+
+// Using subgroup operations in control with fragment shaders
+// quickly leads to unportable behavior.
+g.test('fragment,split').unimplemented();
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts
new file mode 100644
index 000000000000..5d5b9de11420
--- /dev/null
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts
@@ -0,0 +1,390 @@
+export const description = `
+Execution tests for subgroupAny.
+
+Note: There is a lack of portability for non-uniform execution so these tests
+restrict themselves to uniform control flow.
+Note: There is no guaranteed mapping between subgroup_invocation_id and
+local_invocation_index. Tests should avoid assuming there is.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf } from '../../../../../../common/util/data_tables.js';
+import { iterRange } from '../../../../../../common/util/util.js';
+import { kTextureFormatInfo } from '../../../../../format_info.js';
+import { align } from '../../../../../util/math.js';
+import { PRNG } from '../../../../../util/prng.js';
+
+import {
+  kWGSizes,
+  kPredicateCases,
+  SubgroupTest,
+  kDataSentinel,
+  runComputeTest,
+  runFragmentTest,
+  kFramebufferSizes,
+} from './subgroup_util.js';
+
+export const g = makeTestGroup(SubgroupTest);
+
+const kNumCases = 15;
+
+/**
+ * Generate input data for testing.
+ *
+ * Data is generated in the following categories:
+ * Seed 0 generates all 0 data
+ * Seed 1 generates all 1 data
+ * Seeds 2-9 generates all 0s except for a one randomly once per 32 elements
+ * Seeds 10+ generate all random data
+ * @param seed The seed for the PRNG
+ * @param num The number of data items to generate
+ */
+function generateInputData(seed: number, num: number): Uint32Array {
+  const prng = new PRNG(seed);
+
+  const bound = Math.min(num, 32);
+  const index = prng.uniformInt(bound);
+
+  return new Uint32Array([
+    ...iterRange(num, x => {
+      if (seed === 0) {
+        return 0;
+      } else if (seed === 1) {
+        return 1;
+      } else if (seed < 10) {
+        const bounded = x % bound;
+        return bounded === index ? 1 : 0;
+      }
+      return prng.uniformInt(2);
+    }),
+  ]);
+}
+
+/**
+ * Checks the result of a subgroupAny operation
+ *
+ * Since subgroup size depends on the pipeline compile, we calculate the expected
+ * results after execution. The shader generates a subgroup id and records it for
+ * each invocation. The check first calculates the expected result for each subgroup
+ * and then compares to the actual result for each invocation. The filter functor
+ * ensures only the correct invocations contribute to the calculation.
+ * @param metadata An array of uints:
+ *                 * first half containing subgroup sizes (from builtin value)
+ *                 * second half subgroup invocation id
+ * @param output An array of uints containing:
+ *               * first half is the outputs of subgroupAny
+ *               * second half is a generated subgroup id
+ * @param numInvs Number of invocations executed
+ * @param input The input data (equal size to output)
+ * @param filter A functor to filter active invocations
+ */
+function checkAny(
+  metadata: Uint32Array, // unused
+  output: Uint32Array,
+  numInvs: number,
+  input: Uint32Array,
+  filter: (id: number, size: number) => boolean
+): Error | undefined {
+  // First, generate expected results.
+  const expected = new Map<number, number>();
+  for (let inv = 0; inv < numInvs; inv++) {
+    const size = metadata[inv];
+    const id = metadata[inv + numInvs];
+    if (!filter(id, size)) {
+      continue;
+    }
+    const subgroup_id = output[numInvs + inv];
+    let v = expected.get(subgroup_id) ?? 0;
+    v |= input[inv];
+    expected.set(subgroup_id, v);
+  }
+
+  // Second, check against actual results.
+  for (let inv = 0; inv < numInvs; inv++) {
+    const size = metadata[inv];
+    const id = metadata[inv + numInvs];
+    const res = output[inv];
+    if (filter(id, size)) {
+      const subgroup_id = output[numInvs + inv];
+      const expected_v = expected.get(subgroup_id) ?? 0;
+      if (expected_v !== res) {
+        return new Error(`Invocation ${inv}:
+- expected: ${expected_v}
+-      got: ${res}`);
+      }
+    } else {
+      if (res !== kDataSentinel) {
+        return new Error(`Invocation ${inv} unexpected write:
+- subgroup invocation id: ${id}
+-          subgroup size: ${size}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('compute,all_active')
+  .desc(`Test compute subgroupAny`)
+  .params(u =>
+    u
+      .combine('wgSize', kWGSizes)
+      .beginSubcases()
+      .combine('case', [...iterRange(kNumCases, x => x)])
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<u32>;
+
+struct Metadata {
+  subgroup_size: array<u32, ${wgThreads}>,
+  subgroup_invocation_id: array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) subgroupSize : u32,
+) {
+  metadata.subgroup_size[lid] = subgroupSize;
+
+  metadata.subgroup_invocation_id[lid] = id;
+
+  // Record a representative subgroup id.
+  outputs[lid + ${wgThreads}] = subgroupBroadcastFirst(lid);
+
+  let res = select(0u, 1u, subgroupAny(bool(inputs[lid])));
+  outputs[lid] = res;
+}`;
+
+    const inputData = generateInputData(t.params.case, wgThreads);
+
+    const uintsPerOutput = 2;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      uintsPerOutput,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkAny(metadata, output, wgThreads, inputData, (id: number, size: number) => {
+          return true;
+        });
+      }
+    );
+  });
+
+g.test('compute,split')
+  .desc('Test that only active invocation participate')
+  .params(u =>
+    u
+      .combine('predicate', keysOf(kPredicateCases))
+      .beginSubcases()
+      .combine('wgSize', kWGSizes)
+      .combine('case', [...iterRange(kNumCases, x => x)])
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const testcase = kPredicateCases[t.params.predicate];
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> inputs : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<u32>;
+
+struct Metadata {
+  subgroup_size : array<u32, ${wgThreads}>,
+  subgroup_invocation_id : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) subgroupSize : u32,
+) {
+  metadata.subgroup_size[lid] = subgroupSize;
+
+  // Record subgroup invocation id for this invocation.
+  metadata.subgroup_invocation_id[lid] = id;
+
+  // Record a generated subgroup id.
+  outputs[${wgThreads} + lid] = subgroupBroadcastFirst(lid);
+
+  if ${testcase.cond} {
+    outputs[lid] = select(0u, 1u, subgroupAny(bool(inputs[lid])));
+  } else {
+    return;
+  }
+}`;
+
+    const inputData = generateInputData(t.params.case, wgThreads);
+
+    const uintsPerOutput = 2;
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      uintsPerOutput,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkAny(metadata, output, wgThreads, inputData, testcase.filter);
+      }
+    );
+  });
+
+/**
+ * Checks subgroupAny results from a fragment shader.
+ *
+ * @param data Framebuffer output
+ *             * component 0 is result
+ *             * component 1 is generated subgroup id
+ * @param input An array of input data
+ * @param format The framebuffer format
+ * @param width Framebuffer width
+ * @param height Framebuffer height
+ */
+function checkFragmentAny(
+  data: Uint32Array,
+  input: Uint32Array,
+  format: GPUTextureFormat,
+  width: number,
+  height: number
+): Error | undefined {
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  const blocksPerRow = width / blockWidth;
+  // 256 minimum comes from image copy requirements.
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const uintsPerRow = bytesPerRow / 4;
+  const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4;
+
+  // Iteration skips last row and column to avoid helper invocations because it is not
+  // guaranteed whether or not they participate in the subgroup operation.
+  const expected = new Map<number, number>();
+  for (let row = 0; row < height - 1; row++) {
+    for (let col = 0; col < width - 1; col++) {
+      const offset = uintsPerRow * row + col * uintsPerTexel;
+      const subgroup_id = data[offset + 1];
+
+      if (subgroup_id === 0) {
+        return new Error(`Internal error: helper invocation at (${col}, ${row})`);
+      }
+
+      let v = expected.get(subgroup_id) ?? 0;
+      // First index of input is an atomic counter.
+      v |= input[row * width + col];
+      expected.set(subgroup_id, v);
+    }
+  }
+
+  for (let row = 0; row < height - 1; row++) {
+    for (let col = 0; col < width - 1; col++) {
+      const offset = uintsPerRow * row + col * uintsPerTexel;
+      const res = data[offset];
+      const subgroup_id = data[offset + 1];
+
+      if (subgroup_id === 0) {
+        // Inactive in the fragment.
+        continue;
+      }
+
+      const expected_v = expected.get(subgroup_id) ?? 0;
+      if (expected_v !== res) {
+        return new Error(`Row ${row}, col ${col}: incorrect results:
+- expected: ${expected_v}
+-      got: ${res}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('fragment,all_active')
+  .desc('Tests subgroupAny in fragment shaders')
+  .params(u =>
+    u
+      .combine('size', kFramebufferSizes)
+      .beginSubcases()
+      .combine('case', [...iterRange(kNumCases, x => x)])
+      .combineWithParams([{ format: 'rg32uint' }] as const)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const numInputs = t.params.size[0] * t.params.size[1];
+    const inputData = generateInputData(t.params.case, numInputs);
+
+    const fsShader = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage, read_write> inputs : array<u32>;
+
+@fragment
+fn main(
+  @builtin(position) pos : vec4f,
+) -> @location(0) vec2u {
+  // Generate a subgroup id based on linearized position, but avoid 0.
+  let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]};
+  var subgroup_id = linear + 1;
+  subgroup_id = subgroupBroadcastFirst(subgroup_id);
+
+  // Filter out possible helper invocations.
+  let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1);
+  let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1);
+  let in_range = x_in_range && y_in_range;
+  let input = select(0u, inputs[linear], in_range);
+
+  let res = select(0u, 1u, subgroupAny(bool(input)));
+  return vec2u(res, subgroup_id);
+}`;
+
+    await runFragmentTest(
+      t,
+      t.params.format,
+      fsShader,
+      t.params.size[0],
+      t.params.size[1],
+      inputData,
+      (data: Uint32Array) => {
+        return checkFragmentAny(
+          data,
+          inputData,
+          t.params.format,
+          t.params.size[0],
+          t.params.size[1]
+        );
+      }
+    );
+  });
+
+// Using subgroup operations in control with fragment shaders
+// quickly leads to unportable behavior.
+g.test('fragment,split').unimplemented();
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts
index b2fa9e46ec7a..75fe27e8cb5d 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts
@@ -318,6 +318,4 @@ fn main(@builtin(subgroup_invocation_id) id : u32,
     t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expect));
   });
 
-g.test('dynamically_uniform_id').unimplemented();
-
 g.test('fragment').unimplemented();
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts
new file mode 100644
index 000000000000..d45c023cd17a
--- /dev/null
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts
@@ -0,0 +1,387 @@
+export const description = `
+Execution tests for subgroupMul, subgroupExclusiveMul, and subgroupInclusiveMul
+
+Note: There is a lack of portability for non-uniform execution so these tests
+restrict themselves to uniform control flow.
+Note: There is no guaranteed mapping between subgroup_invocation_id and
+local_invocation_index. Tests should avoid assuming there is.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { iterRange } from '../../../../../../common/util/util.js';
+import { GPUTest } from '../../../../../gpu_test.js';
+import {
+  kConcreteNumericScalarsAndVectors,
+  Type,
+  VectorType,
+  numberToFloatBits,
+  floatBitsToNumber,
+  kFloat32Format,
+  kFloat16Format,
+  scalarTypeOf,
+} from '../../../../../util/conversion.js';
+import { FP } from '../../../../../util/floating_point.js';
+
+import {
+  kNumCases,
+  kStride,
+  kWGSizes,
+  kPredicateCases,
+  runAccuracyTest,
+  runComputeTest,
+} from './subgroup_util.js';
+
+export const g = makeTestGroup(GPUTest);
+
+const kIdentity = 1;
+
+const kDataTypes = objectsToRecord(kConcreteNumericScalarsAndVectors);
+
+const kOperations = ['subgroupMul', 'subgroupExclusiveMul', 'subgroupInclusiveMul'] as const;
+
+g.test('fp_accuracy')
+  .desc(
+    `Tests the accuracy of floating-point multiplication.
+
+The order of operations is implementation defined, most threads are filled with
+the identity value and two receive random values.
+Subgroup sizes are not known ahead of time so some cases may not perform any
+interesting operations. The test biases towards checking subgroup sizes under 64.
+These tests only check two values in order to reuse more of the existing infrastructure
+and limit the number of permutations needed to calculate the final result.`
+  )
+  .params(u =>
+    u
+      .combine('case', [...iterRange(kNumCases, x => x)])
+      .combine('type', ['f32', 'f16'] as const)
+      .combine('wgSize', [
+        [kStride, 1, 1],
+        [kStride / 2, 2, 1],
+      ] as const)
+  )
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName];
+    if (t.params.type === 'f16') {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(async t => {
+    await runAccuracyTest(
+      t,
+      t.params.case,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      'subgroupMul',
+      t.params.type,
+      kIdentity,
+      t.params.type === 'f16' ? FP.f16.multiplicationInterval : FP.f32.multiplicationInterval
+    );
+  });
+
+/**
+ * Checks subgroup multiplications.
+ *
+ * Expected results:
+ * - subgroupMul: each invocation should have result equal to 2 to the real subgroup size
+ * - subgroupExclusiveMul: each invocation should have result equal to 2 to its subgroup invocation id
+ * - subgroupInclusiveMul: each invocation should be equal to subgroupExclusiveMul result multiplied by the fill value
+ * @param metadata An array containing actual subgroup size per invocation followed by
+ *                 subgroup invocation id per invocation
+ * @param output An array of multiplications
+ * @param type The data type
+ * @param operation Type of multiplication
+ * @param expectedFillValue The original value used to fill the test array
+ */
+function checkMultiplication(
+  metadata: Uint32Array,
+  output: Uint32Array,
+  type: Type,
+  operation: 'subgroupMul' | 'subgroupExclusiveMul' | 'subgroupInclusiveMul',
+  expectedfillValue: number
+): undefined | Error {
+  let numEles = 1;
+  if (type instanceof VectorType) {
+    numEles = type.width;
+  }
+  const scalarTy = scalarTypeOf(type);
+  const expectedOffset = operation === 'subgroupMul' ? 0 : metadata.length / 2;
+  for (let i = 0; i < metadata.length / 2; i++) {
+    let expected = Math.pow(2, metadata[i + expectedOffset]);
+    if (operation === 'subgroupInclusiveMul') {
+      expected *= expectedfillValue;
+    }
+    for (let j = 0; j < numEles; j++) {
+      let idx = i * numEles + j;
+      const isOdd = idx & 0x1;
+      if (scalarTy === Type.f16) {
+        idx = Math.floor(idx / 2);
+      }
+      let val = output[idx];
+      if (scalarTy === Type.f32) {
+        val = floatBitsToNumber(val, kFloat32Format);
+      } else if (scalarTy === Type.f16) {
+        if (isOdd) {
+          val = val >> 16;
+        }
+        val = floatBitsToNumber(val & 0xffff, kFloat16Format);
+      }
+      if (expected !== val) {
+        return new Error(`Invocation ${i}, component ${j}: incorrect result
+- expected: ${expected}
+-      got: ${val}`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+g.test('data_types')
+  .desc(
+    `Tests subgroup multiplication for valid data types
+
+Tests a simple multiplication of all 2 values.
+Reductions expect result to be equal to actual subgroup size.
+Exclusice scans expect result to be equal subgroup invocation id.
+
+TODO: support vec3 types.
+  `
+  )
+  .params(u =>
+    u
+      .combine('type', keysOf(kDataTypes))
+      .filter(t => {
+        const type = kDataTypes[t.type];
+        if (type instanceof VectorType) {
+          return type.width !== 3;
+        }
+        return true;
+      })
+      .beginSubcases()
+      // Workgroup sizes are kept < 16 to avoid overflows.
+      // Other tests cover that the full subgroup will contribute.
+      .combine('wgSize', [
+        [4, 1, 1],
+        [8, 1, 1],
+        [1, 4, 1],
+        [1, 8, 1],
+        [1, 1, 4],
+        [1, 1, 8],
+        [2, 2, 2],
+        [4, 2, 1],
+        [4, 1, 2],
+        [2, 4, 1],
+        [2, 1, 4],
+        [1, 4, 2],
+        [1, 2, 4],
+        [3, 3, 1],
+        [3, 1, 3],
+        [1, 3, 3],
+      ] as const)
+      .combine('operation', kOperations)
+  )
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName];
+    const type = kDataTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(async t => {
+    const type = kDataTypes[t.params.type];
+    let numEles = 1;
+    if (type instanceof VectorType) {
+      numEles = type.width;
+    }
+    const scalarType = scalarTypeOf(type);
+    let enables = 'enable subgroups;\n';
+    if (type.requiresF16()) {
+      enables += 'enable f16;\nenable subgroups_f16;\n';
+    }
+
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+${enables}
+
+@group(0) @binding(0)
+var<storage> inputs : array<${type.toString()}>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<${type.toString()}>;
+
+struct Metadata {
+  subgroup_size : array<u32, ${wgThreads}>,
+  subgroup_invocation_id : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+) {
+  // Record the actual subgroup size for this invocation.
+  // Note: subgroup_size builtin is always a power-of-2 and might be larger
+  // if the subgroup is not full.
+  let ballot = subgroupBallot(true);
+  var size = countOneBits(ballot.x);
+  size += countOneBits(ballot.y);
+  size += countOneBits(ballot.z);
+  size += countOneBits(ballot.w);
+  metadata.subgroup_size[lid] = size;
+
+  // Record subgroup invocation id for this invocation.
+  metadata.subgroup_invocation_id[lid] = id;
+
+  outputs[lid] = ${t.params.operation}(inputs[lid]);
+}`;
+
+    const expectedfillValue = 2;
+    let fillValue = expectedfillValue;
+    let numUints = wgThreads * numEles;
+    if (scalarType === Type.f32) {
+      fillValue = numberToFloatBits(fillValue, kFloat32Format);
+    } else if (scalarType === Type.f16) {
+      const f16 = numberToFloatBits(fillValue, kFloat16Format);
+      fillValue = f16 | (f16 << 16);
+      numUints = Math.ceil(numUints / 2);
+    }
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      numUints,
+      new Uint32Array([...iterRange(numUints, x => fillValue)]),
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkMultiplication(metadata, output, type, t.params.operation, expectedfillValue);
+      }
+    );
+  });
+
+g.test('fragment').unimplemented();
+
+/**
+ * Performs correctness checking for predicated multiplications
+ *
+ * Assumes the shader performs a predicated subgroup multiplication with the
+ * subgroup_invocation_id as the data.
+ *
+ * @param metadata An array containing subgroup sizes and subgroup invocation ids
+ * @param output An array containing the output results
+ * @param operation The type of multiplication
+ * @param filter A functor that mirrors the predication in the shader
+ */
+function checkPredicatedMultiplication(
+  metadata: Uint32Array,
+  output: Uint32Array,
+  operation: 'subgroupMul' | 'subgroupExclusiveMul' | 'subgroupInclusiveMul',
+  filter: (id: number, size: number) => boolean
+): Error | undefined {
+  for (let i = 0; i < output.length; i++) {
+    const size = metadata[i];
+    const id = metadata[output.length + i];
+    let expected = 1;
+    if (filter(id, size)) {
+      // This function replicates the behavior in the shader.
+      const valueModFun = function (id: number) {
+        return (id % 4) + 1;
+      };
+      const bound =
+        operation === 'subgroupInclusiveMul' ? id + 1 : operation === 'subgroupMul' ? size : id;
+      for (let j = 0; j < bound; j++) {
+        if (filter(j, size)) {
+          expected *= valueModFun(j);
+        }
+      }
+    } else {
+      expected = 999;
+    }
+    if (expected !== output[i]) {
+      return new Error(`Invocation ${i}: incorrect result
+- expected: ${expected}
+-      got: ${output[i]}`);
+    }
+  }
+  return undefined;
+}
+
+g.test('compute,split')
+  .desc('Tests that only active invocations contribute to the operation')
+  .params(u =>
+    u
+      .combine('case', keysOf(kPredicateCases))
+      .beginSubcases()
+      .combine('operation', kOperations)
+      .combine('wgSize', kWGSizes)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const testcase = kPredicateCases[t.params.case];
+    const outputUintsPerElement = 1;
+    const inputData = new Uint32Array([0]); // no input data
+    const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];
+
+    const wgsl = `
+enable subgroups;
+
+@group(0) @binding(0)
+var<storage> input : array<u32>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<u32>;
+
+struct Metadata {
+  subgroup_size : array<u32, ${wgThreads}>,
+  subgroup_invocation_id : array<u32, ${wgThreads}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+  @builtin(subgroup_invocation_id) id : u32,
+) {
+  _ = input[0];
+
+  // Record the actual subgroup size for this invocation.
+  // Note: subgroup_size builtin is always a power-of-2 and might be larger
+  // if the subgroup is not full.
+  let ballot = subgroupBallot(true);
+  var subgroupSize = countOneBits(ballot.x);
+  subgroupSize += countOneBits(ballot.y);
+  subgroupSize += countOneBits(ballot.z);
+  subgroupSize += countOneBits(ballot.w);
+  metadata.subgroup_size[lid] = subgroupSize;
+
+  // Record subgroup invocation id for this invocation.
+  metadata.subgroup_invocation_id[lid] = id;
+
+  if ${testcase.cond} {
+    outputs[lid] = ${t.params.operation}((id % 4) + 1);
+  } else {
+    return;
+  }
+}`;
+
+    await runComputeTest(
+      t,
+      wgsl,
+      [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
+      outputUintsPerElement,
+      inputData,
+      (metadata: Uint32Array, output: Uint32Array) => {
+        return checkPredicatedMultiplication(metadata, output, t.params.operation, testcase.filter);
+      }
+    );
+  });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts
new file mode 100644
index 000000000000..9d147de1968b
--- /dev/null
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts
@@ -0,0 +1,554 @@
+import { assert, iterRange } from '../../../../../../common/util/util.js';
+import { Float16Array } from '../../../../../../external/petamoriken/float16/float16.js';
+import { kTextureFormatInfo } from '../../../../../format_info.js';
+import { GPUTest, TextureTestMixin } from '../../../../../gpu_test.js';
+import { FPInterval } from '../../../../../util/floating_point.js';
+import { sparseScalarF16Range, sparseScalarF32Range, align } from '../../../../../util/math.js';
+import { PRNG } from '../../../../../util/prng.js';
+
+export class SubgroupTest extends TextureTestMixin(GPUTest) {}
+
+export const kNumCases = 1000;
+export const kStride = 128;
+
+export const kWGSizes = [
+  [4, 1, 1],
+  [8, 1, 1],
+  [16, 1, 1],
+  [32, 1, 1],
+  [64, 1, 1],
+  [128, 1, 1],
+  [256, 1, 1],
+  [1, 4, 1],
+  [1, 8, 1],
+  [1, 16, 1],
+  [1, 32, 1],
+  [1, 64, 1],
+  [1, 128, 1],
+  [1, 256, 1],
+  [1, 1, 4],
+  [1, 1, 8],
+  [1, 1, 16],
+  [1, 1, 32],
+  [1, 1, 64],
+  [3, 3, 3],
+  [4, 4, 4],
+  [16, 16, 1],
+  [16, 1, 16],
+  [1, 16, 16],
+  [15, 3, 3],
+  [3, 15, 3],
+  [3, 3, 15],
+] as const;
+
+export const kPredicateCases = {
+  every_even: {
+    cond: `id % 2 == 0`,
+    filter: (id: number, size: number) => {
+      return id % 2 === 0;
+    },
+  },
+  every_odd: {
+    cond: `id % 2 == 1`,
+    filter: (id: number, size: number) => {
+      return id % 2 === 1;
+    },
+  },
+  lower_half: {
+    cond: `id < subgroupSize / 2`,
+    filter: (id: number, size: number) => {
+      return id < Math.floor(size / 2);
+    },
+  },
+  upper_half: {
+    cond: `id >= subgroupSize / 2`,
+    filter: (id: number, size: number) => {
+      return id >= Math.floor(size / 2);
+    },
+  },
+  first_two: {
+    cond: `id == 0 || id == 1`,
+    filter: (id: number) => {
+      return id === 0 || id === 1;
+    },
+  },
+};
+
+/**
+ * Check the accuracy of the reduction operation.
+ *
+ * @param metadata An array containing subgroup ids for each invocation
+ * @param output An array containing the results of the reduction for each invocation
+ * @param indices An array of two values containing the indices of the interesting values in the input
+ * @param values An array of two values containing the interesting values in the input
+ * @param identity The identity for the operation
+ * @param intervalGen A functor to generate an appropriate FPInterval for a binary operation
+ */
+function checkAccuracy(
+  metadata: Uint32Array,
+  output: Float32Array | Float16Array,
+  indices: number[],
+  values: number[],
+  identity: number,
+  intervalGen: (x: number | FPInterval, y: number | FPInterval) => FPInterval
+): undefined | Error {
+  const subgroupIdIdx1 = metadata[indices[0]];
+  const subgroupIdIdx2 = metadata[indices[1]];
+  for (let i = 0; i < output.length; i++) {
+    const subgroupId = metadata[i];
+
+    const v1 = subgroupId === subgroupIdIdx1 ? values[0] : identity;
+    const v2 = subgroupId === subgroupIdIdx2 ? values[1] : identity;
+    const interval = intervalGen(v1, v2);
+    if (!interval.contains(output[i])) {
+      return new Error(`Invocation ${i}, subgroup id ${subgroupId}: incorrect result
+- interval: ${interval.toString()}
+- output: ${output[i]}`);
+    }
+  }
+
+  return undefined;
+}
+
+/**
+ * Run a floating-point accuracy subgroup test.
+ *
+ * @param t The base test
+ * @param seed A seed for the PRNG
+ * @param wgSize An array for the workgroup size
+ * @param operation The subgroup operation
+ * @param type The type (f16 or f32)
+ * @param identity The identity for the operation
+ * @param intervalGen A functor to generate an appropriate FPInterval for a binary operation
+ */
+export async function runAccuracyTest(
+  t: GPUTest,
+  seed: number,
+  wgSize: number[],
+  operation: string,
+  type: 'f16' | 'f32',
+  identity: number,
+  intervalGen: (x: number | FPInterval, y: number | FPInterval) => FPInterval
+) {
+  assert(seed < kNumCases);
+  const prng = new PRNG(seed);
+
+  // Compatibility mode has lower workgroup limits.
+  const wgThreads = wgSize[0] * wgSize[1] * wgSize[2];
+  const {
+    maxComputeInvocationsPerWorkgroup,
+    maxComputeWorkgroupSizeX,
+    maxComputeWorkgroupSizeY,
+    maxComputeWorkgroupSizeZ,
+  } = t.device.limits;
+  t.skipIf(
+    maxComputeInvocationsPerWorkgroup < wgThreads ||
+      maxComputeWorkgroupSizeX < wgSize[0] ||
+      maxComputeWorkgroupSizeY < wgSize[1] ||
+      maxComputeWorkgroupSizeZ < wgSize[2],
+    'Workgroup size too large'
+  );
+
+  // Bias half the cases to lower indices since most subgroup sizes are <= 64.
+  let indexLimit = kStride;
+  if (seed < kNumCases / 4) {
+    indexLimit = 16;
+  } else if (seed < kNumCases / 2) {
+    indexLimit = 64;
+  }
+
+  // Ensure two distinct indices are picked.
+  const idx1 = prng.uniformInt(indexLimit);
+  let idx2 = prng.uniformInt(indexLimit - 1);
+  if (idx1 === idx2) {
+    idx2++;
+  }
+  assert(idx2 < indexLimit);
+
+  // Select two random values.
+  const range = type === 'f16' ? sparseScalarF16Range() : sparseScalarF32Range();
+  const numVals = range.length;
+  const val1 = range[prng.uniformInt(numVals)];
+  const val2 = range[prng.uniformInt(numVals)];
+
+  const extraEnables = type === 'f16' ? `enable f16;\nenable subgroups_f16;` : ``;
+  const wgsl = `
+enable subgroups;
+${extraEnables}
+
+@group(0) @binding(0)
+var<storage> inputs : array<${type}>;
+
+@group(0) @binding(1)
+var<storage, read_write> outputs : array<${type}>;
+
+struct Metadata {
+  subgroup_id : array<u32, ${kStride}>,
+}
+
+@group(0) @binding(2)
+var<storage, read_write> metadata : Metadata;
+
+@compute @workgroup_size(${wgSize[0]}, ${wgSize[1]}, ${wgSize[2]})
+fn main(
+  @builtin(local_invocation_index) lid : u32,
+) {
+  metadata.subgroup_id[lid] = subgroupBroadcast(lid, 0);
+  outputs[lid] = ${operation}(inputs[lid]);
+}`;
+
+  const inputData =
+    type === 'f16'
+      ? new Float16Array([
+          ...iterRange(kStride, x => {
+            if (x === idx1) return val1;
+            if (x === idx2) return val2;
+            return identity;
+          }),
+        ])
+      : new Float32Array([
+          ...iterRange(kStride, x => {
+            if (x === idx1) return val1;
+            if (x === idx2) return val2;
+            return identity;
+          }),
+        ]);
+
+  const inputBuffer = t.makeBufferWithContents(
+    inputData,
+    GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+  );
+  t.trackForCleanup(inputBuffer);
+
+  const outputBuffer = t.makeBufferWithContents(
+    new Float32Array([...iterRange(kStride, x => 0)]),
+    GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+  );
+  t.trackForCleanup(outputBuffer);
+
+  const numMetadata = kStride;
+  const metadataBuffer = t.makeBufferWithContents(
+    new Uint32Array([...iterRange(numMetadata, x => 0)]),
+    GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+  );
+
+  const pipeline = t.device.createComputePipeline({
+    layout: 'auto',
+    compute: {
+      module: t.device.createShaderModule({
+        code: wgsl,
+      }),
+      entryPoint: 'main',
+    },
+  });
+  const bg = t.device.createBindGroup({
+    layout: pipeline.getBindGroupLayout(0),
+    entries: [
+      {
+        binding: 0,
+        resource: {
+          buffer: inputBuffer,
+        },
+      },
+      {
+        binding: 1,
+        resource: {
+          buffer: outputBuffer,
+        },
+      },
+      {
+        binding: 2,
+        resource: {
+          buffer: metadataBuffer,
+        },
+      },
+    ],
+  });
+
+  const encoder = t.device.createCommandEncoder();
+  const pass = encoder.beginComputePass();
+  pass.setPipeline(pipeline);
+  pass.setBindGroup(0, bg);
+  pass.dispatchWorkgroups(1, 1, 1);
+  pass.end();
+  t.queue.submit([encoder.finish()]);
+
+  const metadataReadback = await t.readGPUBufferRangeTyped(metadataBuffer, {
+    srcByteOffset: 0,
+    type: Uint32Array,
+    typedLength: numMetadata,
+    method: 'copy',
+  });
+  const metadata = metadataReadback.data;
+
+  let output: Float16Array | Float32Array;
+  if (type === 'f16') {
+    const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, {
+      srcByteOffset: 0,
+      type: Float16Array,
+      typedLength: kStride,
+      method: 'copy',
+    });
+    output = outputReadback.data;
+  } else {
+    const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, {
+      srcByteOffset: 0,
+      type: Float32Array,
+      typedLength: kStride,
+      method: 'copy',
+    });
+    output = outputReadback.data;
+  }
+
+  t.expectOK(checkAccuracy(metadata, output, [idx1, idx2], [val1, val2], identity, intervalGen));
+}
+
+export const kDataSentinel = 999;
+
+/**
+ * Runs compute shader subgroup test
+ *
+ * The test makes the following assumptions:
+ * * group(0) binding(0) is a storage buffer for input data
+ * * group(0) binding(1) is an output storage buffer for outputUintsPerElement * wgSize uints
+ * * group(0) binding(2) is an output storage buffer for 2 * wgSize uints
+ *
+ * @param t The base test
+ * @param wgsl The shader code
+ * @param outputUintsPerElement number of uints output per invocation
+ * @param inputData the input data
+ * @param checkFunction a functor that takes the output storage buffer data to check result validity
+ */
+export async function runComputeTest(
+  t: GPUTest,
+  wgsl: string,
+  wgSize: number[],
+  outputUintsPerElement: number,
+  inputData: Uint32Array,
+  checkFunction: (metadata: Uint32Array, output: Uint32Array) => Error | undefined
+) {
+  // Compatibility mode has lower workgroup limits.
+  const wgThreads = wgSize[0] * wgSize[1] * wgSize[2];
+  const {
+    maxComputeInvocationsPerWorkgroup,
+    maxComputeWorkgroupSizeX,
+    maxComputeWorkgroupSizeY,
+    maxComputeWorkgroupSizeZ,
+  } = t.device.limits;
+  t.skipIf(
+    maxComputeInvocationsPerWorkgroup < wgThreads ||
+      maxComputeWorkgroupSizeX < wgSize[0] ||
+      maxComputeWorkgroupSizeY < wgSize[1] ||
+      maxComputeWorkgroupSizeZ < wgSize[2],
+    'Workgroup size too large'
+  );
+
+  const inputBuffer = t.makeBufferWithContents(
+    inputData,
+    GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+  );
+  t.trackForCleanup(inputBuffer);
+
+  const outputUints = outputUintsPerElement * wgThreads;
+  const outputBuffer = t.makeBufferWithContents(
+    new Uint32Array([...iterRange(outputUints, x => kDataSentinel)]),
+    GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+  );
+  t.trackForCleanup(outputBuffer);
+
+  const numMetadata = 2 * wgThreads;
+  const metadataBuffer = t.makeBufferWithContents(
+    new Uint32Array([...iterRange(numMetadata, x => kDataSentinel)]),
+    GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
+  );
+
+  const pipeline = t.device.createComputePipeline({
+    layout: 'auto',
+    compute: {
+      module: t.device.createShaderModule({
+        code: wgsl,
+      }),
+    },
+  });
+  const bg = t.device.createBindGroup({
+    layout: pipeline.getBindGroupLayout(0),
+    entries: [
+      {
+        binding: 0,
+        resource: {
+          buffer: inputBuffer,
+        },
+      },
+      {
+        binding: 1,
+        resource: {
+          buffer: outputBuffer,
+        },
+      },
+      {
+        binding: 2,
+        resource: {
+          buffer: metadataBuffer,
+        },
+      },
+    ],
+  });
+
+  const encoder = t.device.createCommandEncoder();
+  const pass = encoder.beginComputePass();
+  pass.setPipeline(pipeline);
+  pass.setBindGroup(0, bg);
+  pass.dispatchWorkgroups(1, 1, 1);
+  pass.end();
+  t.queue.submit([encoder.finish()]);
+
+  const metadataReadback = await t.readGPUBufferRangeTyped(metadataBuffer, {
+    srcByteOffset: 0,
+    type: Uint32Array,
+    typedLength: numMetadata,
+    method: 'copy',
+  });
+  const metadata = metadataReadback.data;
+
+  const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, {
+    srcByteOffset: 0,
+    type: Uint32Array,
+    typedLength: outputUints,
+    method: 'copy',
+  });
+  const output = outputReadback.data;
+
+  t.expectOK(checkFunction(metadata, output));
+}
+
+// Minimum size is [3, 3].
+export const kFramebufferSizes = [
+  [15, 15],
+  [16, 16],
+  [17, 17],
+  [19, 13],
+  [13, 10],
+  [111, 3],
+  [3, 111],
+  [35, 3],
+  [3, 35],
+  [53, 13],
+  [13, 53],
+  [3, 3],
+] as const;
+
+/**
+ * Runs a subgroup builtin test for fragment shaders
+ *
+ * This test draws a full screen triangle.
+ * Tests should avoid checking the last row or column to avoid helper
+ * invocations. Underlying APIs do not consistently guarantee whether
+ * helper invocations participate in subgroup operations.
+ * @param t The base test
+ * @param format The framebuffer format
+ * @param fsShader The fragment shader with the following interface:
+ *                 Location 0 output is framebuffer with format
+ *                 Group 0 binding 0 is input data
+ * @param width The framebuffer width
+ * @param height The framebuffer height
+ * @param inputData The input data
+ * @param checker A functor to check the framebuffer values
+ */
+export async function runFragmentTest(
+  t: SubgroupTest,
+  format: GPUTextureFormat,
+  fsShader: string,
+  width: number,
+  height: number,
+  inputData: Uint32Array | Float32Array | Float16Array,
+  checker: (data: Uint32Array) => Error | undefined
+) {
+  const vsShader = `
+@vertex
+fn vsMain(@builtin(vertex_index) index : u32) -> @builtin(position) vec4f {
+  const vertices = array(
+    vec2(-2, 4), vec2(-2, -4), vec2(2, 0),
+  );
+  return vec4f(vec2f(vertices[index]), 0, 1);
+}`;
+
+  assert(width >= 3, 'Minimum width is 3');
+  assert(height >= 3, 'Minimum height is 3');
+  const pipeline = t.device.createRenderPipeline({
+    layout: 'auto',
+    vertex: {
+      module: t.device.createShaderModule({ code: vsShader }),
+    },
+    fragment: {
+      module: t.device.createShaderModule({ code: fsShader }),
+      targets: [{ format }],
+    },
+    primitive: {
+      topology: 'triangle-list',
+    },
+  });
+
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  assert(bytesPerBlock !== undefined);
+
+  const blocksPerRow = width / blockWidth;
+  const blocksPerColumn = height / blockHeight;
+  // 256 minimum arises from image copy requirements.
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const byteLength = bytesPerRow * blocksPerColumn;
+  const uintLength = byteLength / 4;
+
+  const buffer = t.makeBufferWithContents(
+    inputData,
+    GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
+  );
+
+  const bg = t.device.createBindGroup({
+    layout: pipeline.getBindGroupLayout(0),
+    entries: [
+      {
+        binding: 0,
+        resource: {
+          buffer,
+        },
+      },
+    ],
+  });
+
+  const framebuffer = t.createTextureTracked({
+    size: [width, height],
+    usage:
+      GPUTextureUsage.COPY_SRC |
+      GPUTextureUsage.COPY_DST |
+      GPUTextureUsage.RENDER_ATTACHMENT |
+      GPUTextureUsage.TEXTURE_BINDING,
+    format,
+  });
+
+  const encoder = t.device.createCommandEncoder();
+  const pass = encoder.beginRenderPass({
+    colorAttachments: [
+      {
+        view: framebuffer.createView(),
+        loadOp: 'clear',
+        storeOp: 'store',
+      },
+    ],
+  });
+  pass.setPipeline(pipeline);
+  pass.setBindGroup(0, bg);
+  pass.draw(3);
+  pass.end();
+  t.queue.submit([encoder.finish()]);
+
+  const copyBuffer = t.copyWholeTextureToNewBufferSimple(framebuffer, 0);
+  const readback = await t.readGPUBufferRangeTyped(copyBuffer, {
+    srcByteOffset: 0,
+    type: Uint32Array,
+    typedLength: uintLength,
+    method: 'copy',
+  });
+  const data: Uint32Array = readback.data;
+
+  t.expectOK(checker(data));
+}
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts
index 40b331efaba9..fddb65e1420f 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts
@@ -1,6 +1,8 @@
 export const description = `
 Execution tests for the 'textureGather' builtin function
 
+- TODO: Test un-encodable formats.
+
 A texture gather operation reads from a 2D, 2D array, cube, or cube array texture, computing a four-component vector as follows:
  * Find the four texels that would be used in a sampling operation with linear filtering, from mip level 0:
    - Use the specified coordinate, array index (when present), and offset (when present).
@@ -23,11 +25,35 @@ A texture gather operation reads from a 2D, 2D array, cube, or cube array textur
 `;
 
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
-import { GPUTest } from '../../../../../gpu_test.js';
+import {
+  isDepthTextureFormat,
+  isEncodableTextureFormat,
+  kCompressedTextureFormats,
+  kDepthStencilFormats,
+  kEncodableTextureFormats,
+} from '../../../../../format_info.js';
+
+import {
+  appendComponentTypeForFormatToTextureType,
+  checkCallResults,
+  chooseTextureSize,
+  createTextureWithRandomDataAndGetTexels,
+  doTextureCalls,
+  generateSamplePointsCube,
+  generateTextureBuiltinInputs2D,
+  isFillable,
+  kCubeSamplePointMethods,
+  kSamplePointMethods,
+  skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice,
+  TextureCall,
+  vec2,
+  vec3,
+  WGSLTextureSampleTest,
+} from './texture_utils.js';
 
-import { generateCoordBoundaries, generateOffsets } from './utils.js';
+const kTestableColorFormats = [...kEncodableTextureFormats, ...kCompressedTextureFormats] as const;
 
-export const g = makeTestGroup(GPUTest);
+export const g = makeTestGroup(WGSLTextureSampleTest);
 
 g.test('sampled_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
@@ -55,22 +81,78 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('T', ['f32-only', 'i32', 'u32'] as const)
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
+      .combine('format', kTestableColorFormats)
+      .filter(t => isFillable(t.format))
+      .combine('minFilter', ['nearest', 'linear'] as const)
+      .beginSubcases()
       .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('offset', generateOffsets(2))
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
+      .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
+      .combine('offset', [false, true] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureFormatNotSupported(t.params.format);
+    skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.minFilter, t.params.format);
+  })
+  .fn(async t => {
+    const { format, C, samplePoints, addressModeU, addressModeV, minFilter, offset } = t.params;
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height },
+      mipLevelCount: 3,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU,
+      addressModeV,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureGather',
+      sampler,
+      descriptor,
+      offset,
+      component: true,
+      hashInputs: [format, C, samplePoints, addressModeU, addressModeV, minFilter, offset],
+    }).map(({ coords, component, offset }) => {
+      return {
+        builtin: 'textureGather',
+        coordType: 'f',
+        coords,
+        component,
+        componentType: C === 'i32' ? 'i' : 'u',
+        offset,
+      };
+    });
+    const textureType = appendComponentTypeForFormatToTextureType('texture_2d', format);
+    const viewDescriptor = {};
+    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results
+    );
+    t.expectOK(res);
+  });
 
 g.test('sampled_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
   .desc(
     `
-C: i32, u32
 T: i32, u32, f32
 
 fn textureGather(component: C, t: texture_cube<T>, s: sampler, coords: vec3<f32>) -> vec4<T>
@@ -85,15 +167,75 @@ Parameters:
  * coords: The texture coordinates
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('T', ['f32-only', 'i32', 'u32'] as const)
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
+      .combine('format', kTestableColorFormats)
+      .filter(t => isFillable(t.format))
+      .combine('minFilter', ['nearest', 'linear'] as const)
+      .beginSubcases()
       .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      .combine('coords', generateCoordBoundaries(3))
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureFormatNotSupported(t.params.format);
+    skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.minFilter, t.params.format);
+  })
+  .fn(async t => {
+    const { format, C, samplePoints, addressMode, minFilter } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube';
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+    const depthOrArrayLayers = 6;
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size: { width, height, depthOrArrayLayers },
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: addressMode,
+      addressModeV: addressMode,
+      addressModeW: addressMode,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      component: true,
+      textureBuiltin: 'textureGather',
+      hashInputs: [format, C, samplePoints, addressMode, minFilter],
+    }).map(({ coords, component }) => {
+      return {
+        builtin: 'textureGather',
+        component,
+        componentType: C === 'i32' ? 'i' : 'u',
+        coordType: 'f',
+        coords,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = appendComponentTypeForFormatToTextureType('texture_cube', format);
+    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results
+    );
+    t.expectOK(res);
+  });
 
 g.test('sampled_array_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
@@ -122,17 +264,79 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('T', ['f32-only', 'i32', 'u32'] as const)
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
+      .combine('format', kTestableColorFormats)
+      .filter(t => isFillable(t.format))
+      .combine('minFilter', ['nearest', 'linear'] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
       .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-      .combine('coords', generateCoordBoundaries(2))
-      /* array_index not param'd as out-of-bounds is implementation specific */
-      .combine('offset', generateOffsets(2))
+      .combine('A', ['i32', 'u32'] as const)
+      .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
+      .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
+      .combine('offset', [false, true] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureFormatNotSupported(t.params.format);
+    skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.minFilter, t.params.format);
+  })
+  .fn(async t => {
+    const { format, samplePoints, C, A, addressModeU, addressModeV, minFilter, offset } = t.params;
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const depthOrArrayLayers = 4;
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height, depthOrArrayLayers },
+      mipLevelCount: 3,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU,
+      addressModeV,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureGather',
+      sampler,
+      descriptor,
+      arrayIndex: { num: texture.depthOrArrayLayers, type: A },
+      offset,
+      component: true,
+      hashInputs: [format, samplePoints, C, A, addressModeU, addressModeV, minFilter, offset],
+    }).map(({ coords, component, arrayIndex, offset }) => {
+      return {
+        builtin: 'textureGather',
+        component,
+        componentType: C === 'i32' ? 'i' : 'u',
+        coordType: 'f',
+        coords,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        offset,
+      };
+    });
+    const textureType = appendComponentTypeForFormatToTextureType('texture_2d_array', format);
+    const viewDescriptor = {};
+    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results
+    );
+    t.expectOK(res);
+  });
 
 g.test('sampled_array_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
@@ -140,8 +344,9 @@ g.test('sampled_array_3d_coords')
     `
 C: i32, u32
 T: i32, u32, f32
+A: i32, u32
 
-fn textureGather(component: C, t: texture_cube_array<T>, s: sampler, coords: vec3<f32>, array_index: C) -> vec4<T>
+fn textureGather(component: C, t: texture_cube_array<T>, s: sampler, coords: vec3<f32>, array_index: A) -> vec4<T>
 
 Parameters:
  * component:
@@ -154,17 +359,79 @@ Parameters:
  * array_index: The 0-based texture array index
 `
   )
-  .paramsSubcasesOnly(
-    u =>
-      u
-        .combine('T', ['f32-only', 'i32', 'u32'] as const)
-        .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-        .combine('C', ['i32', 'u32'] as const)
-        .combine('C_value', [-1, 0, 1, 2, 3, 4] as const)
-        .combine('coords', generateCoordBoundaries(3))
-    /* array_index not param'd as out-of-bounds is implementation specific */
+  .params(u =>
+    u
+      .combine('format', kTestableColorFormats)
+      .filter(t => isFillable(t.format))
+      .combine('minFilter', ['nearest', 'linear'] as const)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('C', ['i32', 'u32'] as const)
+      .combine('A', ['i32', 'u32'] as const)
+      .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureFormatNotSupported(t.params.format);
+    t.skipIfTextureViewDimensionNotSupported('cube-array');
+    skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.minFilter, t.params.format);
+  })
+  .fn(async t => {
+    const { format, C, A, samplePoints, addressMode, minFilter } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube-array';
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: addressMode,
+      addressModeV: addressMode,
+      addressModeW: addressMode,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      component: true,
+      textureBuiltin: 'textureGather',
+      arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A },
+      hashInputs: [format, C, samplePoints, addressMode, minFilter],
+    }).map(({ coords, component, arrayIndex }) => {
+      return {
+        builtin: 'textureGather',
+        component,
+        componentType: C === 'i32' ? 'i' : 'u',
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        coordType: 'f',
+        coords,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = appendComponentTypeForFormatToTextureType('texture_cube_array', format);
+    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results
+    );
+    t.expectOK(res);
+  });
 
 g.test('depth_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
@@ -185,13 +452,68 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('offset', generateOffsets(2))
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('minFilter', ['nearest', 'linear'] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
+      .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
+      .combine('offset', [false, true] as const)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, samplePoints, addressModeU, addressModeV, minFilter, offset } = t.params;
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height },
+      mipLevelCount: 3,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU,
+      addressModeV,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureGather',
+      sampler,
+      descriptor,
+      offset,
+      hashInputs: [format, samplePoints, addressModeU, addressModeV, minFilter, offset],
+    }).map(({ coords, offset }) => {
+      return {
+        builtin: 'textureGather',
+        coordType: 'f',
+        coords,
+        offset,
+      };
+    });
+    const textureType = 'texture_depth_2d';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results
+    );
+    t.expectOK(res);
+  });
 
 g.test('depth_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
@@ -205,21 +527,79 @@ Parameters:
  * coords: The texture coordinates
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(3))
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('minFilter', ['nearest', 'linear'] as const)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, samplePoints, addressMode, minFilter } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube';
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+    const depthOrArrayLayers = 6;
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size: { width, height, depthOrArrayLayers },
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: addressMode,
+      addressModeV: addressMode,
+      addressModeW: addressMode,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      textureBuiltin: 'textureGather',
+      hashInputs: [format, samplePoints, addressMode, minFilter],
+    }).map(({ coords, component }) => {
+      return {
+        builtin: 'textureGather',
+        coordType: 'f',
+        coords,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = 'texture_depth_cube';
+    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results
+    );
+    t.expectOK(res);
+  });
 
 g.test('depth_array_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
   .desc(
     `
-C: i32, u32
+A: i32, u32
 
-fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: C) -> vec4<f32>
-fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: C, offset: vec2<i32>) -> vec4<f32>
+fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: A) -> vec4<f32>
+fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: A, offset: vec2<i32>) -> vec4<f32>
 
 Parameters:
  * t: The depth texture to read from
@@ -234,23 +614,86 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('coords', generateCoordBoundaries(2))
-      /* array_index not param'd as out-of-bounds is implementation specific */
-      .combine('offset', generateOffsets(2))
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('minFilter', ['nearest', 'linear'] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
+      .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
+      .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
+      .combine('offset', [false, true] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureFormatNotSupported(t.params.format);
+    skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.minFilter, t.params.format);
+  })
+  .fn(async t => {
+    const { format, samplePoints, A, addressModeU, addressModeV, minFilter, offset } = t.params;
+
+    // We want at least 4 blocks or something wide enough for 3 mip levels.
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const depthOrArrayLayers = 4;
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height, depthOrArrayLayers },
+      mipLevelCount: 3,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU,
+      addressModeV,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureGather',
+      sampler,
+      descriptor,
+      arrayIndex: { num: texture.depthOrArrayLayers, type: A },
+      offset,
+      hashInputs: [format, samplePoints, A, addressModeU, addressModeV, minFilter, offset],
+    }).map(({ coords, arrayIndex, offset }) => {
+      return {
+        builtin: 'textureGather',
+        coordType: 'f',
+        coords,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        offset,
+      };
+    });
+    const textureType = 'texture_depth_2d_array';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results
+    );
+    t.expectOK(res);
+  });
 
 g.test('depth_array_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegather')
   .desc(
     `
-C: i32, u32
+A: i32, u32
 
-fn textureGather(t: texture_depth_cube_array, s: sampler, coords: vec3<f32>, array_index: C) -> vec4<f32>
+fn textureGather(t: texture_depth_cube_array, s: sampler, coords: vec3<f32>, array_index: A) -> vec4<f32>
 
 Parameters:
  * t: The depth texture to read from
@@ -259,12 +702,73 @@ Parameters:
  * array_index: The 0-based texture array index
 `
   )
-  .paramsSubcasesOnly(
-    u =>
-      u
-        .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-        .combine('C', ['i32', 'u32'] as const)
-        .combine('coords', generateCoordBoundaries(3))
-    /* array_index not param'd as out-of-bounds is implementation specific */
+  .params(u =>
+    u
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('minFilter', ['nearest', 'linear'] as const)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
+      .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureViewDimensionNotSupported('cube-array');
+  })
+  .fn(async t => {
+    const { format, A, samplePoints, addressMode, minFilter } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube-array';
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+      mipLevelCount: 3,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: addressMode,
+      addressModeV: addressMode,
+      addressModeW: addressMode,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      textureBuiltin: 'textureGather',
+      arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A },
+      hashInputs: [format, samplePoints, addressMode, minFilter],
+    }).map(({ coords, arrayIndex }) => {
+      return {
+        builtin: 'textureGather',
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        coordType: 'f',
+        coords,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = 'texture_depth_cube_array';
+    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results
+    );
+    t.expectOK(res);
+  });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts
index c743883ce849..89891738b0f6 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts
@@ -17,20 +17,38 @@ A texture gather compare operation performs a depth comparison on four texels in
 `;
 
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
-import { GPUTest } from '../../../../../gpu_test.js';
+import { kCompareFunctions } from '../../../../../capability_info.js';
+import {
+  isDepthTextureFormat,
+  isEncodableTextureFormat,
+  kDepthStencilFormats,
+} from '../../../../../format_info.js';
 
-import { generateCoordBoundaries, generateOffsets } from './utils.js';
+import {
+  checkCallResults,
+  chooseTextureSize,
+  createTextureWithRandomDataAndGetTexels,
+  doTextureCalls,
+  generateSamplePointsCube,
+  generateTextureBuiltinInputs2D,
+  kCubeSamplePointMethods,
+  kSamplePointMethods,
+  TextureCall,
+  vec2,
+  vec3,
+  WGSLTextureSampleTest,
+} from './texture_utils.js';
 
-export const g = makeTestGroup(GPUTest);
+export const g = makeTestGroup(WGSLTextureSampleTest);
 
 g.test('array_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare')
   .desc(
     `
-C: i32, u32
+A: i32, u32
 
-fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: C, depth_ref: f32) -> vec4<f32>
-fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: C, depth_ref: f32, offset: vec2<i32>) -> vec4<f32>
+fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: A, depth_ref: f32) -> vec4<f32>
+fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2<f32>, array_index: A, depth_ref: f32, offset: vec2<i32>) -> vec4<f32>
 
 Parameters:
  * t: The depth texture to read from
@@ -46,24 +64,88 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4])
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
-      .combine('offset', generateOffsets(2))
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('minFilter', ['nearest', 'linear'] as const)
+      .beginSubcases()
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
+      .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
+      .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
+      .combine('compare', kCompareFunctions)
+      .combine('offset', [false, true] as const)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureFormatNotSupported(t.params.format);
+  })
+  .fn(async t => {
+    const { format, samplePoints, A, addressModeU, addressModeV, minFilter, compare, offset } =
+      t.params;
+
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const depthOrArrayLayers = 4;
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height, depthOrArrayLayers },
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU,
+      addressModeV,
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureGatherCompare',
+      sampler,
+      descriptor,
+      arrayIndex: { num: texture.depthOrArrayLayers, type: A },
+      depthRef: true,
+      offset,
+      hashInputs: [format, samplePoints, A, addressModeU, addressModeV, minFilter, offset],
+    }).map(({ coords, arrayIndex, depthRef, offset }) => {
+      return {
+        builtin: 'textureGatherCompare',
+        coordType: 'f',
+        coords,
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        depthRef,
+        offset,
+      };
+    });
+    const textureType = 'texture_depth_2d_array';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results
+    );
+    t.expectOK(res);
+  });
 
 g.test('array_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare')
   .desc(
     `
-C: i32, u32
+A: i32, u32
 
-fn textureGatherCompare(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3<f32>, array_index: C, depth_ref: f32) -> vec4<f32>
+fn textureGatherCompare(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3<f32>, array_index: A, depth_ref: f32) -> vec4<f32>
 
 Parameters:
  * t: The depth texture to read from
@@ -73,17 +155,81 @@ Parameters:
  * depth_ref: The reference value to compare the sampled depth value against
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('C', ['i32', 'u32'] as const)
-      .combine('C_value', [-1, 0, 1, 2, 3, 4])
-      .combine('coords', generateCoordBoundaries(3))
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('minFilter', ['nearest', 'linear'] as const)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('A', ['i32', 'u32'] as const)
+      .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
+      .combine('compare', kCompareFunctions)
   )
-  .unimplemented();
+  .beforeAllSubcases(t => {
+    t.skipIfTextureViewDimensionNotSupported('cube-array');
+  })
+  .fn(async t => {
+    const { format, A, samplePoints, addressMode, minFilter, compare } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube-array';
+    const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: addressMode,
+      addressModeV: addressMode,
+      addressModeW: addressMode,
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
 
-g.test('sampled_array_2d_coords')
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      textureBuiltin: 'textureGatherCompare',
+      arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A },
+      depthRef: true,
+      hashInputs: [format, samplePoints, addressMode, minFilter],
+    }).map(({ coords, depthRef, arrayIndex }) => {
+      return {
+        builtin: 'textureGatherCompare',
+        arrayIndex,
+        arrayIndexType: A === 'i32' ? 'i' : 'u',
+        coordType: 'f',
+        coords,
+        depthRef,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = 'texture_depth_cube_array';
+    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results
+    );
+    t.expectOK(res);
+  });
+
+g.test('sampled_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare')
   .desc(
     `
@@ -103,16 +249,72 @@ Parameters:
       Values outside of this range will result in a shader-creation error.
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(2))
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
-      .combine('offset', generateOffsets(2))
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('minFilter', ['nearest', 'linear'] as const)
+      .beginSubcases()
+      .combine('C', ['i32', 'u32'] as const)
+      .combine('samplePoints', kSamplePointMethods)
+      .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
+      .combine('compare', kCompareFunctions)
+      .combine('offset', [false, true] as const)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, C, samplePoints, addressMode, compare, minFilter, offset } = t.params;
+
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format });
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      size: { width, height },
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: addressMode,
+      addressModeV: addressMode,
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec2>[] = generateTextureBuiltinInputs2D(50, {
+      method: samplePoints,
+      textureBuiltin: 'textureGatherCompare',
+      sampler,
+      descriptor,
+      offset,
+      depthRef: true,
+      hashInputs: [format, C, samplePoints, addressMode, minFilter, compare, offset],
+    }).map(({ coords, depthRef, offset }) => {
+      return {
+        builtin: 'textureGatherCompare',
+        coordType: 'f',
+        coords,
+        depthRef,
+        offset,
+      };
+    });
+    const textureType = 'texture_depth_2d';
+    const viewDescriptor = {};
+    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results
+    );
+    t.expectOK(res);
+  });
 
-g.test('sampled_array_3d_coords')
+g.test('sampled_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare')
   .desc(
     `
@@ -125,10 +327,70 @@ Parameters:
  * depth_ref: The reference value to compare the sampled depth value against
 `
   )
-  .paramsSubcasesOnly(u =>
+  .params(u =>
     u
-      .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat'])
-      .combine('coords', generateCoordBoundaries(3))
-      .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const)
+      .combine('format', kDepthStencilFormats)
+      // filter out stencil only formats
+      .filter(t => isDepthTextureFormat(t.format))
+      // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added.
+      .filter(t => isEncodableTextureFormat(t.format))
+      .combine('minFilter', ['nearest', 'linear'] as const)
+      .beginSubcases()
+      .combine('samplePoints', kCubeSamplePointMethods)
+      .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
+      .combine('compare', kCompareFunctions)
   )
-  .unimplemented();
+  .fn(async t => {
+    const { format, samplePoints, addressMode, minFilter, compare } = t.params;
+
+    const viewDimension: GPUTextureViewDimension = 'cube';
+    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+    const depthOrArrayLayers = 6;
+
+    const descriptor: GPUTextureDescriptor = {
+      format,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
+      size: { width, height, depthOrArrayLayers },
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
+    };
+    const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
+    const sampler: GPUSamplerDescriptor = {
+      addressModeU: addressMode,
+      addressModeV: addressMode,
+      addressModeW: addressMode,
+      compare,
+      minFilter,
+      magFilter: minFilter,
+      mipmapFilter: minFilter,
+    };
+
+    const calls: TextureCall<vec3>[] = generateSamplePointsCube(50, {
+      method: samplePoints,
+      sampler,
+      descriptor,
+      depthRef: true,
+      textureBuiltin: 'textureGatherCompare',
+      hashInputs: [format, samplePoints, addressMode, minFilter, compare],
+    }).map(({ coords, depthRef }) => {
+      return {
+        builtin: 'textureGatherCompare',
+        coordType: 'f',
+        coords,
+        depthRef,
+      };
+    });
+    const viewDescriptor = {
+      dimension: viewDimension,
+    };
+    const textureType = 'texture_depth_cube';
+    const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls);
+    const res = await checkCallResults(
+      t,
+      { texels, descriptor, viewDescriptor },
+      textureType,
+      sampler,
+      calls,
+      results
+    );
+    t.expectOK(res);
+  });
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts
index 879817ec8ca3..2f42fffeb768 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts
@@ -167,8 +167,7 @@ Parameters:
   .params(u =>
     u
       .combine('format', kTestableColorFormats)
-      // MAINTENANCE_TODO: Update createTextureFromTexelViews to support stencil8 and remove this filter.
-      .filter(t => t.format !== 'stencil8' && !isCompressedFloatTextureFormat(t.format))
+      .filter(t => !isCompressedFloatTextureFormat(t.format))
       .beginSubcases()
       .combine('samplePoints', kSamplePointMethods)
       .combine('C', ['i32', 'u32'] as const)
@@ -188,10 +187,7 @@ Parameters:
     const descriptor: GPUTextureDescriptor = {
       format,
       size,
-      usage:
-        GPUTextureUsage.COPY_DST |
-        GPUTextureUsage.TEXTURE_BINDING |
-        (canUseAsRenderTarget(format) ? GPUTextureUsage.RENDER_ATTACHMENT : 0),
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
       mipLevelCount: maxMipLevelCount({ size }),
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
@@ -422,10 +418,7 @@ Parameters:
     const descriptor: GPUTextureDescriptor = {
       format,
       size,
-      usage:
-        GPUTextureUsage.COPY_DST |
-        GPUTextureUsage.TEXTURE_BINDING |
-        GPUTextureUsage.RENDER_ATTACHMENT,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
       mipLevelCount: maxMipLevelCount({ size }),
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
@@ -478,6 +471,9 @@ Parameters:
       .combine('C', ['i32', 'u32'] as const)
       .combine('L', ['i32', 'u32'] as const)
   )
+  .beforeAllSubcases(t =>
+    t.skipIf(typeof VideoFrame === 'undefined', 'VideoFrames are not supported')
+  )
   .fn(async t => {
     const { samplePoints, C, L } = t.params;
 
@@ -490,6 +486,7 @@ Parameters:
       size,
       usage: GPUTextureUsage.COPY_DST,
     };
+
     const { texels, videoFrame } = createVideoFrameWithRandomDataAndGetTexels(descriptor.size);
     const texture = t.device.importExternalTexture({ source: videoFrame });
 
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts
index ca7ae3d0655c..500376321444 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts
@@ -51,13 +51,13 @@ Parameters
   .params(u =>
     u
       .combine('texture_type', ['texture_2d_array', 'texture_cube_array'] as const)
+      .combine('view_type', ['full', 'partial'] as const)
       .beginSubcases()
       .combine('sampled_type', ['f32', 'i32', 'u32'] as const)
-      .combine('view_type', ['full', 'partial'] as const)
   )
   .beforeAllSubcases(t => {
     t.skipIf(
-      t.isCompatibility && t.params.view === 'partial',
+      t.isCompatibility && t.params.view_type === 'partial',
       'compatibility mode does not support partial layer views'
     );
     t.skipIf(
@@ -110,12 +110,11 @@ Parameters
   .params(u =>
     u
       .combine('texture_type', ['texture_depth_2d_array', 'texture_depth_cube_array'] as const)
-      .beginSubcases()
       .combine('view_type', ['full', 'partial'] as const)
   )
   .beforeAllSubcases(t => {
     t.skipIf(
-      t.isCompatibility && t.params.view === 'partial',
+      t.isCompatibility && t.params.view_type === 'partial',
       'compatibility mode does not support partial layer views'
     );
     t.skipIf(
@@ -184,14 +183,20 @@ Parameters
   .params(u =>
     u
       .combineWithParams(TexelFormats)
+      .combine('view_type', ['full', 'partial'] as const)
       .beginSubcases()
       .combine('access_mode', ['read', 'write', 'read_write'] as const)
       .filter(
         t => t.access_mode !== 'read_write' || kTextureFormatInfo[t.format].color?.readWriteStorage
       )
-      .combine('view_type', ['full', 'partial'] as const)
   )
-  .beforeAllSubcases(t => t.skipIfTextureFormatNotUsableAsStorageTexture(t.params.format))
+  .beforeAllSubcases(t => {
+    t.skipIf(
+      t.isCompatibility && t.params.view_type === 'partial',
+      'compatibility mode does not support partial layer views'
+    );
+    t.skipIfTextureFormatNotUsableAsStorageTexture(t.params.format);
+  })
   .fn(t => {
     const { format, access_mode, view_type } = t.params;
 
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts
index 5610701601cb..471a462504d4 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts
@@ -88,6 +88,7 @@ Parameters
     const texture = t.createTextureTracked({
       format,
       dimension,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
       usage: GPUTextureUsage.TEXTURE_BINDING,
       size: {
         width,
@@ -157,6 +158,7 @@ Parameters
     const texture = t.createTextureTracked({
       format: 'depth32float',
       dimension,
+      ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
       usage: GPUTextureUsage.TEXTURE_BINDING,
       size: {
         width,
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts
index 452c3b4df710..b670c44035f9 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts
@@ -61,6 +61,12 @@ Parameters:
       .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const)
       .combine('minFilter', ['nearest', 'linear'] as const)
   )
+  .beforeAllSubcases(t =>
+    t.skipIf(
+      t.params.textureType === 'texture_external' && typeof VideoFrame === 'undefined',
+      'VideoFrames are not supported'
+    )
+  )
   .fn(async t => {
     const { textureType, samplePoints, addressModeU, addressModeV, minFilter } = t.params;
 
diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts
index 729563553260..5c6e99eb9665 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts
@@ -5,15 +5,6 @@ Must only be used in a fragment shader stage.
 Must only be invoked in uniform control flow.
 
 - TODO: Test un-encodable formats.
-- TODO: set mipLevelCount to 3 for cubemaps. See MAINTENANCE_TODO below
-
-  The issue is sampling a corner of a cubemap is undefined. We try to quantize coordinates
-  so we never get a corner but when sampling smaller mip levels that's more difficult unless we make the textures
-  larger. Larger is slower.
-
-  Solution 1: Fix the quantization
-  Solution 2: special case checking cube corners. Expect some value between the color of the 3 corner texels.
-
 `;
 
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
@@ -284,7 +275,7 @@ Parameters:
   .fn(async t => {
     const { format, viewDimension, samplePoints, addressMode, minFilter, offset } = t.params;
 
-    const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension });
+    const [width, height] = chooseTextureSize({ minSize: 32, minBlocks: 2, format, viewDimension });
     const depthOrArrayLayers = getDepthOrArrayLayersForViewDimension(viewDimension);
 
     const descriptor: GPUTextureDescriptor = {
@@ -293,8 +284,7 @@ Parameters:
       ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
       size: { width, height, depthOrArrayLayers },
       usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
-      // MAINTENANCE_TODO: make mipLevelCount always 3
-      mipLevelCount: viewDimension === 'cube' ? 1 : 3,
+      mipLevelCount: 3,
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
     const sampler: GPUSamplerDescriptor = {
@@ -396,7 +386,7 @@ Parameters:
 
     const viewDimension: GPUTextureViewDimension = 'cube-array';
     const size = chooseTextureSize({
-      minSize: 8,
+      minSize: 32,
       minBlocks: 4,
       format,
       viewDimension,
@@ -405,8 +395,7 @@ Parameters:
       format,
       size,
       usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
-      // MAINTENANCE_TODO: Set this to 3. See above.
-      mipLevelCount: 1,
+      mipLevelCount: 3,
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
     const sampler: GPUSamplerDescriptor = {
@@ -423,8 +412,8 @@ Parameters:
       sampler,
       descriptor,
       mipLevel: { num: texture.mipLevelCount, type: 'f32' },
-      arrayIndex: { num: texture.depthOrArrayLayers, type: A },
-      hashInputs: [format, viewDimension, samplePoints, addressMode, minFilter],
+      arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A },
+      hashInputs: [format, viewDimension, A, samplePoints, addressMode, minFilter],
     }).map(({ coords, mipLevel, arrayIndex }) => {
       return {
         builtin: 'textureSampleLevel',
@@ -456,7 +445,7 @@ g.test('depth_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplelevel')
   .desc(
     `
-C is i32 or u32
+L is i32 or u32
 
 fn textureSampleLevel(t: texture_depth_2d, s: sampler, coords: vec2<f32>, level: L) -> f32
 fn textureSampleLevel(t: texture_depth_2d, s: sampler, coords: vec2<f32>, level: L, offset: vec2<i32>) -> f32
@@ -504,10 +493,7 @@ Parameters:
       format,
       size: { width, height },
       mipLevelCount: 3,
-      usage:
-        GPUTextureUsage.COPY_DST |
-        GPUTextureUsage.TEXTURE_BINDING |
-        GPUTextureUsage.RENDER_ATTACHMENT,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
     const sampler: GPUSamplerDescriptor = {
@@ -553,7 +539,8 @@ g.test('depth_array_2d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplelevel')
   .desc(
     `
-C is i32 or u32
+A is i32 or u32
+L is i32 or u32
 
 fn textureSampleLevel(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: A, level: L) -> f32
 fn textureSampleLevel(t: texture_depth_2d_array, s: sampler, coords: vec2<f32>, array_index: A, level: L, offset: vec2<i32>) -> f32
@@ -603,10 +590,7 @@ Parameters:
       format,
       size: { width, height },
       mipLevelCount: 3,
-      usage:
-        GPUTextureUsage.COPY_DST |
-        GPUTextureUsage.TEXTURE_BINDING |
-        GPUTextureUsage.RENDER_ATTACHMENT,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
       ...(t.isCompatibility && { textureBindingViewDimension: '2d-array' }),
     };
     const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor);
@@ -656,7 +640,8 @@ g.test('depth_3d_coords')
   .specURL('https://www.w3.org/TR/WGSL/#texturesamplelevel')
   .desc(
     `
-C is i32 or u32
+L is i32 or u32
+A is i32 or u32
 
 fn textureSampleLevel(t: texture_depth_cube, s: sampler, coords: vec3<f32>, level: L) -> f32
 fn textureSampleLevel(t: texture_depth_cube_array, s: sampler, coords: vec3<f32>, array_index: A, level: L) -> f32
@@ -704,7 +689,7 @@ Parameters:
     const { format, viewDimension, samplePoints, A, L, addressMode, minFilter } = t.params;
 
     const size = chooseTextureSize({
-      minSize: 8,
+      minSize: 32,
       minBlocks: 4,
       format,
       viewDimension,
@@ -712,10 +697,7 @@ Parameters:
     const descriptor: GPUTextureDescriptor = {
       format,
       size,
-      usage:
-        GPUTextureUsage.COPY_DST |
-        GPUTextureUsage.TEXTURE_BINDING |
-        GPUTextureUsage.RENDER_ATTACHMENT,
+      usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING,
       mipLevelCount: 3,
       ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }),
     };
@@ -733,8 +715,8 @@ Parameters:
       method: samplePoints,
       sampler,
       descriptor,
-      mipLevel: { num: texture.mipLevelCount, type: L },
-      arrayIndex: A ? { num: texture.depthOrArrayLayers, type: A } : undefined,
+      mipLevel: { num: texture.mipLevelCount - 1, type: L },
+      arrayIndex: A ? { num: texture.depthOrArrayLayers / 6, type: A } : undefined,
       hashInputs: [format, viewDimension, samplePoints, addressMode, minFilter],
     }).map(({ coords, mipLevel, arrayIndex }) => {
       return {
diff --git a/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts b/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts
index e997833a137f..f5e6b55dd23c 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts
@@ -1,10 +1,11 @@
-import { keysOf } from '../../../../../../common/util/data_tables.js';
 import { assert, range, unreachable } from '../../../../../../common/util/util.js';
 import {
   EncodableTextureFormat,
   isCompressedFloatTextureFormat,
   isCompressedTextureFormat,
   isDepthOrStencilTextureFormat,
+  isDepthTextureFormat,
+  isStencilTextureFormat,
   kEncodableTextureFormats,
   kTextureFormatInfo,
 } from '../../../../../format_info.js';
@@ -77,16 +78,52 @@ export function getTextureTypeForTextureViewDimension(viewDimension: GPUTextureV
   }
 }
 
+const is32Float = (format: GPUTextureFormat) =>
+  format === 'r32float' || format === 'rg32float' || format === 'rgba32float';
+
+/**
+ * Skips a subcase if the filter === 'linear' and the format is type
+ * 'unfilterable-float' and we cannot enable filtering.
+ */
+export function skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(
+  t: GPUTestSubcaseBatchState,
+  filter: GPUFilterMode,
+  format: GPUTextureFormat
+) {
+  const features = new Set<GPUFeatureName | undefined>();
+  features.add(kTextureFormatInfo[format].feature);
+
+  if (filter === 'linear') {
+    t.skipIf(isDepthTextureFormat(format), 'depth texture are unfilterable');
+
+    const type = kTextureFormatInfo[format].color?.type;
+    if (type === 'unfilterable-float') {
+      assert(is32Float(format));
+      features.add('float32-filterable');
+    }
+  }
+
+  if (features.size > 0) {
+    t.selectDeviceOrSkipTestCase(Array.from(features));
+  }
+}
+
+/**
+ * Returns if a texture format can be filled with random data.
+ */
+export function isFillable(format: GPUTextureFormat) {
+  // We can't easily put random bytes into compressed textures if they are float formats
+  // since we want the range to be +/- 1000 and not +/- infinity or NaN.
+  return !isCompressedTextureFormat(format) || !format.endsWith('float');
+}
+
 /**
  * Returns if a texture format can potentially be filtered and can be filled with random data.
  */
 export function isPotentiallyFilterableAndFillable(format: GPUTextureFormat) {
   const type = kTextureFormatInfo[format].color?.type;
   const canPotentiallyFilter = type === 'float' || type === 'unfilterable-float';
-  // We can't easily put random bytes into compressed textures if they are float formats
-  // since we want the range to be +/- 1000 and not +/- infinity or NaN.
-  const isFillable = !isCompressedTextureFormat(format) || !format.endsWith('float');
-  return canPotentiallyFilter && isFillable;
+  return canPotentiallyFilter && isFillable(format);
 }
 
 /**
@@ -105,6 +142,156 @@ export function skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(
   }
 }
 
+async function queryMipGradientValuesForDevice(t: GPUTest) {
+  const { device } = t;
+  const module = device.createShaderModule({
+    code: `
+      @group(0) @binding(0) var tex: texture_2d<f32>;
+      @group(0) @binding(1) var smp: sampler;
+      @group(0) @binding(2) var<storage, read_write> result: array<f32>;
+
+      @vertex fn vs(@builtin(vertex_index) vNdx: u32) -> @builtin(position) vec4f {
+        let pos = array(
+          vec2f(-1,  3),
+          vec2f( 3, -1),
+          vec2f(-1, -1),
+        );
+        return vec4f(pos[vNdx], 0, 1);
+      }
+      @fragment fn fs(@builtin(position) pos: vec4f) -> @location(0) vec4f {
+        let mipLevel = floor(pos.x) / ${kMipGradientSteps};
+        result[u32(pos.x)] = textureSampleLevel(tex, smp, vec2f(0.5), mipLevel).r;
+        return vec4f(0);
+      }
+    `,
+  });
+
+  const pipeline = device.createRenderPipeline({
+    layout: 'auto',
+    vertex: { module },
+    fragment: { module, targets: [{ format: 'rgba8unorm' }] },
+  });
+
+  const target = t.createTextureTracked({
+    size: [kMipGradientSteps + 1, 1, 1],
+    format: 'rgba8unorm',
+    usage: GPUTextureUsage.RENDER_ATTACHMENT,
+  });
+
+  const texture = t.createTextureTracked({
+    size: [2, 2, 1],
+    format: 'r8unorm',
+    usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
+    mipLevelCount: 2,
+  });
+
+  device.queue.writeTexture(
+    { texture, mipLevel: 1 },
+    new Uint8Array([255]),
+    { bytesPerRow: 1 },
+    [1, 1]
+  );
+
+  const sampler = device.createSampler({
+    minFilter: 'linear',
+    magFilter: 'linear',
+    mipmapFilter: 'linear',
+  });
+
+  const storageBuffer = t.createBufferTracked({
+    size: 4 * (kMipGradientSteps + 1),
+    usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
+  });
+
+  const resultBuffer = t.createBufferTracked({
+    size: storageBuffer.size,
+    usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
+  });
+
+  const bindGroup = device.createBindGroup({
+    layout: pipeline.getBindGroupLayout(0),
+    entries: [
+      { binding: 0, resource: texture.createView() },
+      { binding: 1, resource: sampler },
+      { binding: 2, resource: { buffer: storageBuffer } },
+    ],
+  });
+
+  const encoder = device.createCommandEncoder();
+  const pass = encoder.beginRenderPass({
+    colorAttachments: [
+      {
+        view: target.createView(),
+        loadOp: 'clear',
+        storeOp: 'store',
+      },
+    ],
+  });
+  pass.setPipeline(pipeline);
+  pass.setBindGroup(0, bindGroup);
+  pass.draw(3);
+  pass.end();
+  encoder.copyBufferToBuffer(storageBuffer, 0, resultBuffer, 0, resultBuffer.size);
+  device.queue.submit([encoder.finish()]);
+
+  await resultBuffer.mapAsync(GPUMapMode.READ);
+  const weights = Array.from(new Float32Array(resultBuffer.getMappedRange()));
+  resultBuffer.unmap();
+
+  texture.destroy();
+  storageBuffer.destroy();
+  resultBuffer.destroy();
+
+  const showWeights = () => weights.map((v, i) => `${i.toString().padStart(2)}: ${v}`).join('\n');
+
+  // Validate the weights
+  assert(weights[0] === 0, `weight 0 expected 0 but was ${weights[0]}\n${showWeights()}`);
+  assert(
+    weights[kMipGradientSteps] === 1,
+    `top weight expected 1 but was ${weights[kMipGradientSteps]}\n${showWeights()}`
+  );
+  assert(
+    Math.abs(weights[kMipGradientSteps / 2] - 0.5) < 0.0001,
+    `middle weight expected approximately 0.5 but was ${
+      weights[kMipGradientSteps / 2]
+    }\n${showWeights()}`
+  );
+
+  // Note: for 16 steps, these are the AMD weights
+  //
+  //                 standard
+  // step  mipLevel    gpu        AMD
+  // ----  --------  --------  ----------
+  //  0:   0         0           0
+  //  1:   0.0625    0.0625      0
+  //  2:   0.125     0.125       0.03125
+  //  3:   0.1875    0.1875      0.109375
+  //  4:   0.25      0.25        0.1875
+  //  5:   0.3125    0.3125      0.265625
+  //  6:   0.375     0.375       0.34375
+  //  7:   0.4375    0.4375      0.421875
+  //  8:   0.5       0.5         0.5
+  //  9:   0.5625    0.5625      0.578125
+  // 10:   0.625     0.625       0.65625
+  // 11:   0.6875    0.6875      0.734375
+  // 12:   0.75      0.75        0.8125
+  // 13:   0.8125    0.8125      0.890625
+  // 14:   0.875     0.875       0.96875
+  // 15:   0.9375    0.9375      1
+  // 16:   1         1           1
+  //
+  // notice step 1 is 0 and step 15 is 1.
+  // so we only check the 1 through 14.
+  for (let i = 1; i < kMipGradientSteps - 1; ++i) {
+    assert(
+      weights[i] < weights[i + 1],
+      `weight[${i}] was not less than < weight[${i + 1}]\n${showWeights()}`
+    );
+  }
+
+  s_deviceToMipGradientValues.set(device, weights);
+}
+
 /**
  * Gets the mip gradient values for the current device.
  * The issue is, different GPUs have different ways of mixing between mip levels.
@@ -121,123 +308,28 @@ export function skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable(
  *
  * There's an assumption that the gradient will be the same for all formats
  * and usages.
+ *
+ * Note: The code below has 2 maps. One device->Promise, the other device->weights
+ * device->weights is meant to be used synchronously by other code so we don't
+ * want to leave initMipGradientValuesForDevice until the weights have been read.
+ * But, multiple subcases will run because this function is async. So, subcase 1
+ * runs, hits this init code, this code waits for the weights. Then, subcase 2
+ * runs and hits this init code. The weights will not be in the device->weights map
+ * yet which is why we have the device->Promise map. This is so subcase 2 waits
+ * for subcase 1's "query the weights" step. Otherwise, all subcases would do the
+ * "get the weights" step separately.
  */
 const kMipGradientSteps = 16;
+const s_deviceToMipGradientValuesPromise = new WeakMap<GPUDevice, Promise<void>>();
 const s_deviceToMipGradientValues = new WeakMap<GPUDevice, number[]>();
 async function initMipGradientValuesForDevice(t: GPUTest) {
   const { device } = t;
-  const weights = s_deviceToMipGradientValues.get(device);
-  if (!weights) {
-    const module = device.createShaderModule({
-      code: `
-        @group(0) @binding(0) var tex: texture_2d<f32>;
-        @group(0) @binding(1) var smp: sampler;
-        @group(0) @binding(2) var<storage, read_write> result: array<f32>;
-
-        @compute @workgroup_size(1) fn cs(@builtin(global_invocation_id) id: vec3u) {
-          let mipLevel = f32(id.x) / ${kMipGradientSteps};
-          result[id.x] = textureSampleLevel(tex, smp, vec2f(0.5), mipLevel).r;
-        }
-      `,
-    });
-
-    const pipeline = device.createComputePipeline({
-      layout: 'auto',
-      compute: { module },
-    });
-
-    const texture = t.createTextureTracked({
-      size: [2, 2, 1],
-      format: 'r8unorm',
-      usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST,
-      mipLevelCount: 2,
-    });
-
-    device.queue.writeTexture(
-      { texture, mipLevel: 1 },
-      new Uint8Array([255]),
-      { bytesPerRow: 1 },
-      [1, 1]
-    );
-
-    const sampler = device.createSampler({
-      minFilter: 'linear',
-      magFilter: 'linear',
-      mipmapFilter: 'linear',
-    });
-
-    const storageBuffer = t.createBufferTracked({
-      size: 4 * (kMipGradientSteps + 1),
-      usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
-    });
-
-    const resultBuffer = t.createBufferTracked({
-      size: storageBuffer.size,
-      usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
-    });
-
-    const bindGroup = device.createBindGroup({
-      layout: pipeline.getBindGroupLayout(0),
-      entries: [
-        { binding: 0, resource: texture.createView() },
-        { binding: 1, resource: sampler },
-        { binding: 2, resource: { buffer: storageBuffer } },
-      ],
-    });
-
-    const encoder = device.createCommandEncoder();
-    const pass = encoder.beginComputePass();
-    pass.setPipeline(pipeline);
-    pass.setBindGroup(0, bindGroup);
-    pass.dispatchWorkgroups(kMipGradientSteps + 1);
-    pass.end();
-    encoder.copyBufferToBuffer(storageBuffer, 0, resultBuffer, 0, resultBuffer.size);
-    device.queue.submit([encoder.finish()]);
-
-    await resultBuffer.mapAsync(GPUMapMode.READ);
-    const weights = Array.from(new Float32Array(resultBuffer.getMappedRange()));
-    resultBuffer.unmap();
-
-    texture.destroy();
-    storageBuffer.destroy();
-    resultBuffer.destroy();
-
-    // Validate the weights
-    assert(weights[0] === 0);
-    assert(weights[kMipGradientSteps] === 1);
-    assert(weights[kMipGradientSteps / 2] === 0.5);
-
-    // Note: for 16 steps, these are the AMD weights
-    //
-    //                 standard
-    // step  mipLevel    gpu        AMD
-    // ----  --------  --------  ----------
-    //  0:   0         0           1
-    //  1:   0.0625    0.0625      0
-    //  2:   0.125     0.125       0.03125
-    //  3:   0.1875    0.1875      0.109375
-    //  4:   0.25      0.25        0.1875
-    //  5:   0.3125    0.3125      0.265625
-    //  6:   0.375     0.375       0.34375
-    //  7:   0.4375    0.4375      0.421875
-    //  8:   0.5       0.5         0.5
-    //  9:   0.5625    0.5625      0.578125
-    // 10:   0.625     0.625       0.65625
-    // 11:   0.6875    0.6875      0.734375
-    // 12:   0.75      0.75        0.8125
-    // 13:   0.8125    0.8125      0.890625
-    // 14:   0.875     0.875       0.96875
-    // 15:   0.9375    0.9375      1
-    // 16:   1         1           1
-    //
-    // notice step 1 is 0 and step 15 is 1.
-    // so we only check the 1 through 14.
-    for (let i = 1; i < kMipGradientSteps - 1; ++i) {
-      assert(weights[i] < weights[i + 1]);
-    }
-
-    s_deviceToMipGradientValues.set(device, weights);
+  let weightsP = s_deviceToMipGradientValuesPromise.get(device);
+  if (!weightsP) {
+    weightsP = queryMipGradientValuesForDevice(t);
+    s_deviceToMipGradientValuesPromise.set(device, weightsP);
   }
+  return await weightsP;
 }
 
 function getWeightForMipLevel(t: GPUTest, mipLevelCount: number, mipLevel: number) {
@@ -457,33 +549,55 @@ export type Dimensionality = vec1 | vec2 | vec3;
 
 type TextureCallArgKeys = keyof TextureCallArgs<vec1>;
 const kTextureCallArgNames: readonly TextureCallArgKeys[] = [
+  'component',
   'coords',
   'arrayIndex',
   'sampleIndex',
   'mipLevel',
   'ddx',
   'ddy',
+  'depthRef',
   'offset',
 ] as const;
 
 export interface TextureCallArgs<T extends Dimensionality> {
+  component?: number;
   coords?: T;
   mipLevel?: number;
   arrayIndex?: number;
   sampleIndex?: number;
+  depthRef?: number;
   ddx?: T;
   ddy?: T;
   offset?: T;
 }
 
+export type TextureBuiltin =
+  | 'textureGather'
+  | 'textureGatherCompare'
+  | 'textureLoad'
+  | 'textureSample'
+  | 'textureSampleBaseClampToEdge'
+  | 'textureSampleLevel';
+
 export interface TextureCall<T extends Dimensionality> extends TextureCallArgs<T> {
-  builtin: 'textureLoad' | 'textureSample' | 'textureSampleBaseClampToEdge' | 'textureSampleLevel';
+  builtin: TextureBuiltin;
   coordType: 'f' | 'i' | 'u';
   levelType?: 'i' | 'u' | 'f';
   arrayIndexType?: 'i' | 'u';
   sampleIndexType?: 'i' | 'u';
+  componentType?: 'i' | 'u';
 }
 
+const isBuiltinComparison = (builtin: TextureBuiltin) => builtin === 'textureGatherCompare';
+const isBuiltinGather = (builtin: TextureBuiltin | undefined) =>
+  builtin === 'textureGather' || builtin === 'textureGatherCompare';
+const builtinNeedsSampler = (builtin: TextureBuiltin) =>
+  builtin.startsWith('textureSample') || builtin.startsWith('textureGather');
+
+const isCubeViewDimension = (viewDescriptor?: GPUTextureViewDescriptor) =>
+  viewDescriptor?.dimension === 'cube' || viewDescriptor?.dimension === 'cube-array';
+
 const s_u32 = new Uint32Array(1);
 const s_f32 = new Float32Array(s_u32.buffer);
 const s_i32 = new Int32Array(s_u32.buffer);
@@ -512,6 +626,9 @@ function getCallArgType<T extends Dimensionality>(
   switch (argName) {
     case 'coords':
       return call.coordType;
+    case 'component':
+      assert(call.componentType !== undefined);
+      return call.componentType;
     case 'mipLevel':
       assert(call.levelType !== undefined);
       return call.levelType;
@@ -521,6 +638,7 @@ function getCallArgType<T extends Dimensionality>(
     case 'sampleIndex':
       assert(call.sampleIndexType !== undefined);
       return call.sampleIndexType;
+    case 'depthRef':
     case 'ddx':
     case 'ddy':
       return 'f';
@@ -634,6 +752,37 @@ function zeroValuePerTexelComponent(components: TexelComponent[]) {
   return out;
 }
 
+const kSamplerFns: Record<GPUCompareFunction, (ref: number, v: number) => boolean> = {
+  never: (ref: number, v: number) => false,
+  less: (ref: number, v: number) => ref < v,
+  equal: (ref: number, v: number) => ref === v,
+  'less-equal': (ref: number, v: number) => ref <= v,
+  greater: (ref: number, v: number) => ref > v,
+  'not-equal': (ref: number, v: number) => ref !== v,
+  'greater-equal': (ref: number, v: number) => ref >= v,
+  always: (ref: number, v: number) => true,
+} as const;
+
+function applyCompare<T extends Dimensionality>(
+  call: TextureCall<T>,
+  sampler: GPUSamplerDescriptor | undefined,
+  components: TexelComponent[],
+  src: PerTexelComponent<number>
+): PerTexelComponent<number> {
+  if (isBuiltinComparison(call.builtin)) {
+    assert(sampler !== undefined);
+    assert(call.depthRef !== undefined);
+    const out: PerTexelComponent<number> = {};
+    const compareFn = kSamplerFns[sampler.compare!];
+    for (const component of components) {
+      out[component] = compareFn(call.depthRef, src[component]!) ? 1 : 0;
+    }
+    return out;
+  } else {
+    return src;
+  }
+}
+
 /**
  * Returns the expect value for a WGSL builtin texture function for a single
  * mip level
@@ -661,10 +810,7 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
           sampler?.addressModeW ?? 'clamp-to-edge',
         ];
 
-  const isCube =
-    texture.viewDescriptor.dimension === 'cube' ||
-    texture.viewDescriptor.dimension === 'cube-array';
-
+  const isCube = isCubeViewDimension(texture.viewDescriptor);
   const arrayIndexMult = isCube ? 6 : 1;
   const numLayers = textureSize[2] / arrayIndexMult;
   assert(numLayers % 1 === 0);
@@ -684,6 +830,8 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
   };
 
   switch (call.builtin) {
+    case 'textureGather':
+    case 'textureGatherCompare':
     case 'textureSample':
     case 'textureSampleBaseClampToEdge':
     case 'textureSampleLevel': {
@@ -714,7 +862,7 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
 
       const samples: { at: number[]; weight: number }[] = [];
 
-      const filter = sampler?.minFilter ?? 'nearest';
+      const filter = isBuiltinGather(call.builtin) ? 'linear' : sampler?.minFilter ?? 'nearest';
       switch (filter) {
         case 'linear': {
           // 'p0' is the lower texel for 'at'
@@ -733,10 +881,11 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
               samples.push({ at: p1, weight: p1W[0] });
               break;
             case 2: {
-              samples.push({ at: p0, weight: p0W[0] * p0W[1] });
-              samples.push({ at: [p1[0], p0[1]], weight: p1W[0] * p0W[1] });
+              // Note: These are ordered to match textureGather
               samples.push({ at: [p0[0], p1[1]], weight: p0W[0] * p1W[1] });
               samples.push({ at: p1, weight: p1W[0] * p1W[1] });
+              samples.push({ at: [p1[0], p0[1]], weight: p1W[0] * p0W[1] });
+              samples.push({ at: p0, weight: p0W[0] * p0W[1] });
               break;
             }
             case 3: {
@@ -746,10 +895,11 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
               // the slice they'll be wrapped by wrapFaceCoordToCubeFaceAtEdgeBoundaries
               // below.
               if (isCube) {
-                samples.push({ at: p0, weight: p0W[0] * p0W[1] });
-                samples.push({ at: [p1[0], p0[1], p0[2]], weight: p1W[0] * p0W[1] });
+                // Note: These are ordered to match textureGather
                 samples.push({ at: [p0[0], p1[1], p0[2]], weight: p0W[0] * p1W[1] });
                 samples.push({ at: p1, weight: p1W[0] * p1W[1] });
+                samples.push({ at: [p1[0], p0[1], p0[2]], weight: p1W[0] * p0W[1] });
+                samples.push({ at: p0, weight: p0W[0] * p0W[1] });
                 const ndx = getUnusedCubeCornerSampleIndex(textureSize[0], coords as vec3);
                 if (ndx >= 0) {
                   // # Issues with corners of cubemaps
@@ -783,7 +933,16 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
                   // I'm not sure what "average the values of the three available samples"
                   // means. To me that would be (a+b+c)/3 or in other words, set all the
                   // weights to 0.33333 but that's not what the M1 is doing.
-                  unreachable('corners of cubemaps are not testable');
+                  //
+                  // We could check that, given the 3 texels at the corner, if all 3 texels
+                  // are the same value then the result must be the same value. Otherwise,
+                  // the result must be between the 3 values. For now, the code that
+                  // chooses test coordinates avoids corners. This has the restriction
+                  // that the smallest mip level be at least 4x4 so there are some non
+                  // corners to choose from.
+                  unreachable(
+                    `corners of cubemaps are not testable:\n   ${describeTextureCall(call)}`
+                  );
                 }
               } else {
                 const p = [p0, p1];
@@ -813,16 +972,33 @@ export function softwareTextureReadMipLevel<T extends Dimensionality>(
           unreachable();
       }
 
+      if (isBuiltinGather(call.builtin)) {
+        const componentNdx = call.component ?? 0;
+        assert(componentNdx >= 0 && componentNdx < 4);
+        assert(samples.length === 4);
+        const component = kRGBAComponents[componentNdx];
+        const out: PerTexelComponent<number> = {};
+        samples.forEach((sample, i) => {
+          const c = isCube
+            ? wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize[0], sample.at as vec3)
+            : applyAddressModesToCoords(addressMode, textureSize, sample.at);
+          const v = load(c);
+          const postV = applyCompare(call, sampler, rep.componentOrder, v);
+          const rgba = convertPerTexelComponentToResultFormat(postV, format);
+          out[kRGBAComponents[i]] = rgba[component];
+        });
+        return out;
+      }
+
       const out: PerTexelComponent<number> = {};
-      const ss = [];
       for (const sample of samples) {
         const c = isCube
           ? wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize[0], sample.at as vec3)
           : applyAddressModesToCoords(addressMode, textureSize, sample.at);
         const v = load(c);
-        ss.push(v);
+        const postV = applyCompare(call, sampler, rep.componentOrder, v);
         for (const component of rep.componentOrder) {
-          out[component] = (out[component] ?? 0) + v[component]! * sample.weight;
+          out[component] = (out[component] ?? 0) + postV[component]! * sample.weight;
         }
       }
 
@@ -856,7 +1032,8 @@ export function softwareTextureReadLevel<T extends Dimensionality>(
     return softwareTextureReadMipLevel<T>(call, texture, sampler, mipLevel);
   }
 
-  switch (sampler.mipmapFilter) {
+  const effectiveMipmapFilter = isBuiltinGather(call.builtin) ? 'nearest' : sampler.mipmapFilter;
+  switch (effectiveMipmapFilter) {
     case 'linear': {
       const clampedMipLevel = clamp(mipLevel, { min: 0, max: maxLevel });
       const baseMipLevel = Math.floor(clampedMipLevel);
@@ -1090,6 +1267,50 @@ function texelsApproximatelyEqual(
   return true;
 }
 
+// If it's `textureGather` then we need to convert all values to one component.
+// In other words, imagine the format is rg11b10ufloat. If it was
+// `textureSample` we'd have `r11, g11, b10, a=1` but for `textureGather`
+//
+// component = 0 => `r11, r11, r11, r11`
+// component = 1 => `g11, g11, g11, g11`
+// component = 2 => `b10, b10, b10, b10`
+//
+// etc..., each from a different texel
+//
+// The Texel utils don't handle this. So if `component = 2` we take each value,
+// copy it to the `B` component, run it through the texel utils so it returns
+// the correct ULP for a 10bit float (not an 11 bit float). Then copy it back to
+// the channel it came from.
+function getULPFromZeroForComponents(
+  rgba: PerTexelComponent<number>,
+  format: EncodableTextureFormat,
+  builtin: TextureBuiltin,
+  componentNdx?: number
+): PerTexelComponent<number> {
+  const rep = kTexelRepresentationInfo[format];
+  if (isBuiltinGather(builtin)) {
+    const out: PerTexelComponent<number> = {};
+    const component = kRGBAComponents[componentNdx ?? 0];
+    const temp: PerTexelComponent<number> = { R: 0, G: 0, B: 0, A: 1 };
+    for (const comp of kRGBAComponents) {
+      temp[component] = rgba[comp];
+      const texel = convertResultFormatToTexelViewFormat(temp, format);
+      const ulp = convertPerTexelComponentToResultFormat(
+        rep.bitsToULPFromZero(rep.numberToBits(texel)),
+        format
+      );
+      out[comp] = ulp[component];
+    }
+    return out;
+  } else {
+    const texel = convertResultFormatToTexelViewFormat(rgba, format);
+    return convertPerTexelComponentToResultFormat(
+      rep.bitsToULPFromZero(rep.numberToBits(texel)),
+      format
+    );
+  }
+}
+
 /**
  * Checks the result of each call matches the expected result.
  */
@@ -1099,11 +1320,10 @@ export async function checkCallResults<T extends Dimensionality>(
   textureType: string,
   sampler: GPUSamplerDescriptor | undefined,
   calls: TextureCall<T>[],
-  results: PerTexelComponent<number>[]
+  results: Awaited<ReturnType<typeof doTextureCalls<T>>>
 ) {
   const errs: string[] = [];
   const format = texture.texels[0].format;
-  const rep = kTexelRepresentationInfo[format];
   const size = reifyExtent3D(texture.descriptor.size);
   const maxFractionalDiff =
     sampler?.minFilter === 'linear' ||
@@ -1112,11 +1332,21 @@ export async function checkCallResults<T extends Dimensionality>(
       ? getMaxFractionalDiffForTextureFormat(texture.descriptor.format)
       : 0;
 
-  for (let callIdx = 0; callIdx < calls.length && errs.length === 0; callIdx++) {
+  for (let callIdx = 0; callIdx < calls.length; callIdx++) {
     const call = calls[callIdx];
-    const gotRGBA = results[callIdx];
+    const gotRGBA = results.results[callIdx];
     const expectRGBA = softwareTextureReadLevel(t, call, texture, sampler, call.mipLevel ?? 0);
 
+    // The spec says depth and stencil have implementation defined values for G, B, and A
+    // so if this is `textureGather` and component > 0 then there's nothing to check.
+    if (
+      isDepthOrStencilTextureFormat(format) &&
+      isBuiltinGather(call.builtin) &&
+      call.component! > 0
+    ) {
+      continue;
+    }
+
     if (texelsApproximatelyEqual(gotRGBA, expectRGBA, format, maxFractionalDiff)) {
       continue;
     }
@@ -1125,33 +1355,86 @@ export async function checkCallResults<T extends Dimensionality>(
       continue;
     }
 
-    const got = convertResultFormatToTexelViewFormat(gotRGBA, format);
-    const expect = convertResultFormatToTexelViewFormat(expectRGBA, format);
-    const gULP = rep.bitsToULPFromZero(rep.numberToBits(got));
-    const eULP = rep.bitsToULPFromZero(rep.numberToBits(expect));
-    for (const component of rep.componentOrder) {
-      const g = got[component]!;
-      const e = expect[component]!;
+    const gULP = getULPFromZeroForComponents(gotRGBA, format, call.builtin, call.component);
+    const eULP = getULPFromZeroForComponents(expectRGBA, format, call.builtin, call.component);
+
+    // from the spec: https://gpuweb.github.io/gpuweb/#reading-depth-stencil
+    // depth and stencil values are D, ?, ?, ?
+    const rgbaComponentsToCheck =
+      isBuiltinGather(call.builtin) || !isDepthOrStencilTextureFormat(format)
+        ? kRGBAComponents
+        : kRComponent;
+
+    let bad = false;
+    const diffs = rgbaComponentsToCheck.map(component => {
+      const g = gotRGBA[component]!;
+      const e = expectRGBA[component]!;
       const absDiff = Math.abs(g - e);
       const ulpDiff = Math.abs(gULP[component]! - eULP[component]!);
-      const relDiff = absDiff / Math.max(Math.abs(g), Math.abs(e));
+      assert(!Number.isNaN(ulpDiff));
+      const maxAbs = Math.max(Math.abs(g), Math.abs(e));
+      const relDiff = maxAbs > 0 ? absDiff / maxAbs : 0;
       if (ulpDiff > 3 && absDiff > maxFractionalDiff) {
-        const desc = describeTextureCall(call);
-        errs.push(`component was not as expected:
+        bad = true;
+      }
+      return { absDiff, relDiff, ulpDiff };
+    });
+
+    const isFloatType = (format: GPUTextureFormat) => {
+      const info = kTextureFormatInfo[format];
+      return info.color?.type === 'float' || info.depth?.type === 'depth';
+    };
+    const fix5 = (n: number) => (isFloatType(format) ? n.toFixed(5) : n.toString());
+    const fix5v = (arr: number[]) => arr.map(v => fix5(v)).join(', ');
+    const rgbaToArray = (p: PerTexelComponent<number>): number[] =>
+      rgbaComponentsToCheck.map(component => p[component]!);
+
+    if (bad) {
+      const desc = describeTextureCall(call);
+      errs.push(`result was not as expected:
       size: [${size.width}, ${size.height}, ${size.depthOrArrayLayers}]
   mipCount: ${texture.descriptor.mipLevelCount ?? 1}
-      call: ${desc}  // #${callIdx}
- component: ${component}
-       got: ${g}
-  expected: ${e}
-  abs diff: ${absDiff.toFixed(4)}
-  rel diff: ${(relDiff * 100).toFixed(2)}%
-  ulp diff: ${ulpDiff}
+      call: ${desc}  // #${callIdx}`);
+      if (isCubeViewDimension(texture.viewDescriptor)) {
+        const coord = convertCubeCoordToNormalized3DTextureCoord(call.coords as vec3);
+        const faceNdx = Math.floor(coord[2] * 6);
+        errs.push(`          : as 3D texture coord: (${coord[0]}, ${coord[1]}, ${coord[2]})`);
+        for (let mipLevel = 0; mipLevel < (texture.descriptor.mipLevelCount ?? 1); ++mipLevel) {
+          const mipSize = virtualMipSize(
+            texture.descriptor.dimension ?? '2d',
+            texture.descriptor.size,
+            mipLevel
+          );
+          const t = coord.slice(0, 2).map((v, i) => (v * mipSize[i]).toFixed(3));
+          errs.push(
+            `          : as texel coord mip level[${mipLevel}]: (${t[0]}, ${t[1]}), face: ${faceNdx}(${kFaceNames[faceNdx]})`
+          );
+        }
+      } else {
+        for (let mipLevel = 0; mipLevel < (texture.descriptor.mipLevelCount ?? 1); ++mipLevel) {
+          const mipSize = virtualMipSize(
+            texture.descriptor.dimension ?? '2d',
+            texture.descriptor.size,
+            mipLevel
+          );
+          const t = call.coords!.map((v, i) => (v * mipSize[i]).toFixed(3));
+          errs.push(`          : as texel coord @ mip level[${mipLevel}]: (${t.join(', ')})`);
+        }
+      }
+      errs.push(`\
+       got: ${fix5v(rgbaToArray(gotRGBA))}
+  expected: ${fix5v(rgbaToArray(expectRGBA))}
+  max diff: ${maxFractionalDiff}
+ abs diffs: ${fix5v(diffs.map(({ absDiff }) => absDiff))}
+ rel diffs: ${diffs.map(({ relDiff }) => `${(relDiff * 100).toFixed(2)}%`).join(', ')}
+ ulp diffs: ${diffs.map(({ ulpDiff }) => ulpDiff).join(', ')}
 `);
-        if (sampler) {
+
+      if (sampler) {
+        if (t.rec.debugging) {
           const expectedSamplePoints = [
             'expected:',
-            ...(await identifySamplePoints(texture, (texels: TexelView[]) => {
+            ...(await identifySamplePoints(texture, call, (texels: TexelView[]) => {
               return Promise.resolve(
                 softwareTextureReadLevel(
                   t,
@@ -1169,13 +1452,9 @@ export async function checkCallResults<T extends Dimensionality>(
           ];
           const gotSamplePoints = [
             'got:',
-            ...(await identifySamplePoints(texture, async (texels: TexelView[]) => {
-              const gpuTexture = createTextureFromTexelViews(t, texels, texture.descriptor);
-              const result = (
-                await doTextureCalls(t, gpuTexture, texture.viewDescriptor, textureType, sampler, [
-                  call,
-                ])
-              )[0];
+            ...(await identifySamplePoints(texture, call, async (texels: TexelView[]) => {
+              const gpuTexture = createTextureFromTexelViewsLocal(t, texels, texture.descriptor);
+              const result = (await results.run(gpuTexture))[callIdx];
               gpuTexture.destroy();
               return result;
             })),
@@ -1184,9 +1463,16 @@ export async function checkCallResults<T extends Dimensionality>(
           errs.push(layoutTwoColumns(expectedSamplePoints, gotSamplePoints).join('\n'));
           errs.push('', '');
         }
-      }
-    }
-  }
+      } // if (sampler)
+
+      // Don't report the other errors. There 50 sample points per subcase and
+      // 50-100 subcases so the log would get enormous if all 50 fail. One
+      // report per subcase is enough.
+      break;
+    } // if (bad)
+  } // for cellNdx
+
+  results.destroy();
 
   return errs.length > 0 ? new Error(errs.join('\n')) : undefined;
 }
@@ -1763,6 +2049,19 @@ export async function readTextureToTexelViews(
   return texelViews;
 }
 
+function createTextureFromTexelViewsLocal(
+  t: GPUTest,
+  texelViews: TexelView[],
+  desc: Omit<GPUTextureDescriptor, 'format'>
+): GPUTexture {
+  const modifiedDescriptor = { ...desc };
+  // If it's a depth or stencil texture we need to render to it to fill it with data.
+  if (isDepthOrStencilTextureFormat(texelViews[0].format)) {
+    modifiedDescriptor.usage = desc.usage | GPUTextureUsage.RENDER_ATTACHMENT;
+  }
+  return createTextureFromTexelViews(t, texelViews, modifiedDescriptor);
+}
+
 /**
  * Fills a texture with random data and returns that data as
  * an array of TexelView.
@@ -1791,14 +2090,14 @@ export async function createTextureWithRandomDataAndGetTexels(
     return { texture, texels };
   } else {
     const texels = createRandomTexelViewMipmap(descriptor);
-    const texture = createTextureFromTexelViews(t, texels, descriptor);
+    const texture = createTextureFromTexelViewsLocal(t, texels, descriptor);
     return { texture, texels };
   }
 }
 
 function valueIfAllComponentsAreEqual(
   c: PerTexelComponent<number>,
-  componentOrder: TexelComponent[]
+  componentOrder: readonly TexelComponent[]
 ) {
   const s = new Set(componentOrder.map(component => c[component]!));
   return s.size === 1 ? s.values().next().value : undefined;
@@ -1893,12 +2192,13 @@ const kFaceNames = ['+x', '-x', '+y', '-y', '+z', '-z'] as const;
  * a: at: [7, 1], weights: [R: 0.75000]
  * b: at: [7, 2], weights: [R: 0.25000]
  */
-async function identifySamplePoints(
+async function identifySamplePoints<T extends Dimensionality>(
   texture: Texture,
+  call: TextureCall<T>,
   run: (texels: TexelView[]) => Promise<PerTexelComponent<number>>
 ) {
   const info = texture.descriptor;
-  const isCube = texture.viewDescriptor.dimension === 'cube';
+  const isCube = isCubeViewDimension(texture.viewDescriptor);
   const mipLevelCount = texture.descriptor.mipLevelCount ?? 1;
   const mipLevelSize = range(mipLevelCount, mipLevel =>
     virtualMipSize(texture.descriptor.dimension ?? '2d', texture.descriptor.size, mipLevel)
@@ -1934,6 +2234,11 @@ async function identifySamplePoints(
   ) as EncodableTextureFormat;
   const rep = kTexelRepresentationInfo[format];
 
+  const components = isBuiltinGather(call.builtin) ? kRGBAComponents : rep.componentOrder;
+  const convertResultAsAppropriate = isBuiltinGather(call.builtin)
+    ? <T>(v: T) => v
+    : convertResultFormatToTexelViewFormat;
+
   // Identify all the texels that are sampled, and their weights.
   const sampledTexelWeights = new Map<number, PerTexelComponent<number>>();
   const unclassifiedStack = [new Set<number>(range(numTexels, v => v))];
@@ -1951,8 +2256,8 @@ async function identifySamplePoints(
       unclassifiedStack.push(setB);
     }
 
-    // See if any of the texels in setA were sampled.
-    const results = convertResultFormatToTexelViewFormat(
+    // See if any of the texels in setA were sampled.0
+    const results = convertResultAsAppropriate(
       await run(
         range(mipLevelCount, mipLevel =>
           TexelView.fromTexelsAsColors(
@@ -1978,7 +2283,7 @@ async function identifySamplePoints(
       ),
       format
     );
-    if (rep.componentOrder.some(c => results[c] !== 0)) {
+    if (components.some(c => results[c] !== 0)) {
       // One or more texels of setA were sampled.
       if (setA.size === 1) {
         // We identified a specific texel was sampled.
@@ -2040,13 +2345,20 @@ async function identifySamplePoints(
 
     for (let layer = 0; layer < depthOrArrayLayers; ++layer) {
       const layerEntries = level[layer];
-      if (!layerEntries) {
-        continue;
-      }
 
       const orderedTexelIndices: number[] = [];
       lines.push('');
-      lines.push(`layer: ${layer}${isCube ? ` (${kFaceNames[layer]})` : ''}`);
+      const unSampled = layerEntries ? '' : 'un-sampled';
+      if (isCube) {
+        const face = kFaceNames[layer % 6];
+        lines.push(`layer: ${layer}, cube-layer: ${(layer / 6) | 0} (${face}) ${unSampled}`);
+      } else {
+        lines.push(`layer: ${unSampled}`);
+      }
+
+      if (!layerEntries) {
+        continue;
+      }
 
       {
         let line = '  ';
@@ -2099,11 +2411,11 @@ async function identifySamplePoints(
         const weights = layerEntries.get(texelIdx)!;
         const y = Math.floor(texelIdx / texelsPerRow);
         const x = texelIdx % texelsPerRow;
-        const singleWeight = valueIfAllComponentsAreEqual(weights, rep.componentOrder);
+        const singleWeight = valueIfAllComponentsAreEqual(weights, components);
         const w =
           singleWeight !== undefined
             ? `weight: ${fix5(singleWeight)}`
-            : `weights: [${rep.componentOrder.map(c => `${c}: ${fix5(weights[c]!)}`).join(', ')}]`;
+            : `weights: [${components.map(c => `${c}: ${fix5(weights[c]!)}`).join(', ')}]`;
         const coord = `${pad2(x)}, ${pad2(y)}, ${pad2(layer)}`;
         lines.push(`${letter(idCount + i)}: mip(${mipLevel}) at: [${coord}], ${w}`);
       });
@@ -2163,7 +2475,9 @@ export function chooseTextureSize({
   const width = align(Math.max(minSize, blockWidth * minBlocks), blockWidth);
   const height = align(Math.max(minSize, blockHeight * minBlocks), blockHeight);
   if (viewDimension === 'cube' || viewDimension === 'cube-array') {
-    const size = lcm(width, height);
+    const blockLCM = lcm(blockWidth, blockHeight);
+    const largest = Math.max(width, height);
+    const size = align(largest, blockLCM);
     return [size, size, viewDimension === 'cube-array' ? 24 : 6];
   }
   const depthOrArrayLayers = getDepthOrArrayLayersForViewDimension(viewDimension);
@@ -2177,11 +2491,14 @@ export const kCubeSamplePointMethods = ['cube-edges', 'texel-centre', 'spiral']
 export type CubeSamplePointMethods = (typeof kSamplePointMethods)[number];
 
 type TextureBuiltinInputArgs = {
+  textureBuiltin?: TextureBuiltin;
   descriptor: GPUTextureDescriptor;
   sampler?: GPUSamplerDescriptor;
   mipLevel?: RangeDef;
   sampleIndex?: RangeDef;
   arrayIndex?: RangeDef;
+  component?: boolean;
+  depthRef?: boolean;
   offset?: boolean;
   hashInputs: (number | string | boolean)[];
 };
@@ -2201,7 +2518,15 @@ function generateTextureBuiltinInputsImpl<T extends Dimensionality>(
         radius?: number;
         loops?: number;
       })
-): { coords: T; mipLevel: number; sampleIndex?: number; arrayIndex?: number; offset?: T }[] {
+): {
+  coords: T;
+  mipLevel: number;
+  sampleIndex?: number;
+  arrayIndex?: number;
+  offset?: T;
+  component?: number;
+  depthRef?: number;
+}[] {
   const { method, descriptor } = args;
   const dimension = descriptor.dimension ?? '2d';
   const mipLevelCount = descriptor.mipLevelCount ?? 1;
@@ -2234,14 +2559,15 @@ function generateTextureBuiltinInputsImpl<T extends Dimensionality>(
     typeof v === 'string' ? sumOfCharCodesOfString(v) : typeof v === 'boolean' ? (v ? 1 : 0) : v
   );
   const makeRangeValue = ({ num, type }: RangeDef, ...hashInputs: number[]) => {
-    const range = num + type === 'u32' ? 1 : 2;
+    const range = num + (type === 'u32' ? 1 : 2);
     const number =
       (hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range - (type === 'u32' ? 0 : 1);
     return type === 'f32' ? number : Math.floor(number);
   };
-  const makeIntHashValue = (min: number, max: number, ...hashInputs: number[]) => {
+  // Generates the same values per coord instead of using all the extra `_hashInputs`.
+  const makeIntHashValueRepeatable = (min: number, max: number, ...hashInputs: number[]) => {
     const range = max - min;
-    return min + Math.floor((hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range);
+    return min + Math.floor((hashU32(...hashInputs) / 0x1_0000_0000) * range);
   };
 
   // Samplers across devices use different methods to interpolate.
@@ -2253,7 +2579,77 @@ function generateTextureBuiltinInputsImpl<T extends Dimensionality>(
   // Linux, AMD Radeon Pro WX 3200: 256
   // MacOS, M1 Mac: 256
   const kSubdivisionsPerTexel = 4;
-  const nearest = !args.sampler || args.sampler.minFilter === 'nearest';
+
+  // When filtering is nearest then we want to avoid edges of texels
+  //
+  //             U
+  //             |
+  //     +---+---+---+---+---+---+---+---+
+  //     |   | A | B |   |   |   |   |   |
+  //     +---+---+---+---+---+---+---+---+
+  //
+  // Above, coordinate U could sample either A or B
+  //
+  //               U
+  //               |
+  //     +---+---+---+---+---+---+---+---+
+  //     |   | A | B | C |   |   |   |   |
+  //     +---+---+---+---+---+---+---+---+
+  //
+  // For textureGather we want to avoid texel centers
+  // as for coordinate U could either gather A,B or B,C.
+
+  const avoidEdgeCase =
+    !args.sampler || args.sampler.minFilter === 'nearest' || isBuiltinGather(args.textureBuiltin);
+  const edgeRemainder = isBuiltinGather(args.textureBuiltin) ? kSubdivisionsPerTexel / 2 : 0;
+
+  // textureGather issues for 2d/3d textures
+  //
+  // If addressModeU is repeat, then on an 8x1 texture, u = 0.01 or u = 0.99
+  // would gather these texels
+  //
+  //     +---+---+---+---+---+---+---+---+
+  //     | * |   |   |   |   |   |   | * |
+  //     +---+---+---+---+---+---+---+---+
+  //
+  // If addressModeU is clamp-to-edge or mirror-repeat,
+  // then on an 8x1 texture, u = 0.01 would gather this texel
+  //
+  //     +---+---+---+---+---+---+---+---+
+  //     | * |   |   |   |   |   |   |   |
+  //     +---+---+---+---+---+---+---+---+
+  //
+  // and 0.99 would gather this texel
+  //
+  //     +---+---+---+---+---+---+---+---+
+  //     |   |   |   |   |   |   |   | * |
+  //     +---+---+---+---+---+---+---+---+
+  //
+  // This means we have to if addressMode is not `repeat`, we
+  // need to avoid the edge of the texture.
+  //
+  // Note: we don't have these specific issues with cube maps
+  // as they ignore addressMode
+  const euclideanModulo = (n: number, m: number) => ((n % m) + m) % m;
+  const addressMode: GPUAddressMode[] =
+    args.textureBuiltin === 'textureSampleBaseClampToEdge'
+      ? ['clamp-to-edge', 'clamp-to-edge', 'clamp-to-edge']
+      : [
+          args.sampler?.addressModeU ?? 'clamp-to-edge',
+          args.sampler?.addressModeV ?? 'clamp-to-edge',
+          args.sampler?.addressModeW ?? 'clamp-to-edge',
+        ];
+  const avoidTextureEdge = (axis: number, textureDimensionUnits: number, v: number) => {
+    assert(isBuiltinGather(args.textureBuiltin));
+    if (addressMode[axis] === 'repeat') {
+      return v;
+    }
+    const inside = euclideanModulo(v, textureDimensionUnits);
+    const outside = v - inside;
+    return outside + clamp(inside, { min: 1, max: textureDimensionUnits - 1 });
+  };
+
+  const numComponents = isDepthOrStencilTextureFormat(descriptor.format) ? 1 : 4;
   return coords.map((c, i) => {
     const mipLevel = args.mipLevel
       ? quantizeMipLevel(makeRangeValue(args.mipLevel, i), args.sampler?.mipmapFilter ?? 'nearest')
@@ -2265,11 +2661,13 @@ function generateTextureBuiltinInputsImpl<T extends Dimensionality>(
     const coords = c.map((v, i) => {
       // Quantize to kSubdivisionsPerPixel
       const v1 = Math.floor(v * q[i]);
-      // If it's nearest and we're on the edge of a texel then move us off the edge
-      // since the edge could choose one texel or another in nearest mode
-      const v2 = nearest && v1 % kSubdivisionsPerTexel === 0 ? v1 + 1 : v1;
+      // If it's nearest or textureGather and we're on the edge of a texel then move us off the edge
+      // since the edge could choose one texel or another.
+      const isTexelEdgeCase = Math.abs(v1 % kSubdivisionsPerTexel) === edgeRemainder;
+      const v2 = isTexelEdgeCase && avoidEdgeCase ? v1 + 1 : v1;
+      const v3 = isBuiltinGather(args.textureBuiltin) ? avoidTextureEdge(i, q[i], v2) : v2;
       // Convert back to texture coords
-      return v2 / q[i];
+      return v3 / q[i];
     }) as T;
 
     return {
@@ -2277,15 +2675,28 @@ function generateTextureBuiltinInputsImpl<T extends Dimensionality>(
       mipLevel,
       sampleIndex: args.sampleIndex ? makeRangeValue(args.sampleIndex, i, 1) : undefined,
       arrayIndex: args.arrayIndex ? makeRangeValue(args.arrayIndex, i, 2) : undefined,
+      depthRef: args.depthRef ? makeRangeValue({ num: 1, type: 'f32' }, i, 5) : undefined,
       offset: args.offset
-        ? (coords.map((_, j) => makeIntHashValue(-8, 8, i, 3 + j)) as T)
+        ? (coords.map((_, j) => makeIntHashValueRepeatable(-8, 8, i, 3 + j)) as T)
         : undefined,
+      component: args.component ? makeIntHashValueRepeatable(0, numComponents, i, 4) : undefined,
     };
   });
 }
 
+/**
+ * When mipmapFilter === 'nearest' we need to stay away from 0.5
+ * because the GPU could decide to choose one mip or the other.
+ *
+ * Some example transition values, the value at which the GPU chooses
+ * mip level 1 over mip level 0:
+ *
+ * M1 Mac: 0.515381
+ * Intel Mac: 0.49999
+ * AMD Mac: 0.5
+ */
 const kMipEpsilon = 0.02;
-function quantizeMipLevel(mipLevel: number, mipmapFilter: GPUFilterMode) {
+function quantizeMipLevel(mipLevel: number, mipmapFilter: GPUMipmapFilterMode) {
   if (mipmapFilter === 'linear') {
     return mipLevel;
   }
@@ -2395,135 +2806,35 @@ export function convertNormalized3DTexCoordToCubeCoord(uvLayer: vec3) {
 }
 
 /**
+ * Wrap a texel based face coord across cube faces
+ *
  * We have a face texture in texels coord where U/V choose a texel and W chooses the face.
  * If U/V are outside the size of the texture then, when normalized and converted
  * to a cube map coordinate, they'll end up pointing to a different face.
  *
  * addressMode is effectively ignored for cube
  *
- *             +-----------+
- *             |0->u       |
- *             |↓          |
- *             |v   +y     |
- *             |    (2)    |
- *             |           |
- * +-----------+-----------+-----------+-----------+
- * |0->u       |0->u       |0->u       |0->u       |
- * |↓          |↓          |↓          |↓          |
- * |v   -x     |v   +z     |v   +x     |v   -z     |
- * |    (1)    |    (4)    |    (0)    |    (5)    |
- * |           |           |           |           |
- * +-----------+-----------+-----------+-----------+
- *             |0->u       |
- *             |↓          |
- *             |v   -y     |
- *             |    (3)    |
- *             |           |
- *             +-----------+
+ * By converting from a texel based coord to a normalized coord and then to a cube map coord,
+ * if the texel was outside of the face, the cube map coord will end up pointing to a different
+ * face. We then convert back cube coord -> normalized face coord -> texel based coord
  */
-const kFaceConversions = {
-  u: (textureSize: number, faceCoord: vec3) => faceCoord[0],
-  v: (textureSize: number, faceCoord: vec3) => faceCoord[1],
-  'u+t': (textureSize: number, faceCoord: vec3) => faceCoord[0] + textureSize,
-  'u-t': (textureSize: number, faceCoord: vec3) => faceCoord[0] - textureSize,
-  'v+t': (textureSize: number, faceCoord: vec3) => faceCoord[1] + textureSize,
-  'v-t': (textureSize: number, faceCoord: vec3) => faceCoord[1] - textureSize,
-  't-v': (textureSize: number, faceCoord: vec3) => textureSize - faceCoord[1],
-  '1+u': (textureSize: number, faceCoord: vec3) => 1 + faceCoord[0],
-  '1+v': (textureSize: number, faceCoord: vec3) => 1 + faceCoord[1],
-  '-v-1': (textureSize: number, faceCoord: vec3) => -faceCoord[1] - 1,
-  't-u-1': (textureSize: number, faceCoord: vec3) => textureSize - faceCoord[0] - 1,
-  't-v-1': (textureSize: number, faceCoord: vec3) => textureSize - faceCoord[1] - 1,
-  '2t-u-1': (textureSize: number, faceCoord: vec3) => textureSize * 2 - faceCoord[0] - 1,
-  '2t-v-1': (textureSize: number, faceCoord: vec3) => textureSize * 2 - faceCoord[1] - 1,
-} as const;
-const kFaceConversionEnums = keysOf(kFaceConversions);
-type FaceCoordConversion = (typeof kFaceConversionEnums)[number];
-
-// For Each face
-//   face to go if u < 0
-//   face to go if u >= textureSize
-//   face to go if v < 0
-//   face to go if v >= textureSize
-const kFaceToFaceRemap: { to: number; u: FaceCoordConversion; v: FaceCoordConversion }[][] = [
-  // 0
-  [
-    /* -u */ { to: 4, u: 'u+t', v: 'v' },
-    /* +u */ { to: 5, u: 'u-t', v: 'v' },
-    /* -v */ { to: 2, u: 'v+t', v: 't-u-1' },
-    /* +v */ { to: 3, u: '2t-v-1', v: 'u' },
-  ],
-  // 1
-  [
-    /* -u */ { to: 5, u: 'u+t', v: 'v' },
-    /* +u */ { to: 4, u: 'u-t', v: 'v' },
-    /* -v */ { to: 2, u: '-v-1', v: 'u' }, // -1->0, -2->1  -3->2
-    /* +v */ { to: 3, u: 't-v', v: 't-u-1' },
-  ],
-  // 2
-  [
-    /* -u */ { to: 1, u: 'v', v: '1+u' },
-    /* +u */ { to: 0, u: 't-v-1', v: 'u-t' },
-    /* -v */ { to: 5, u: 't-u-1', v: '-v-1' },
-    /* +v */ { to: 4, u: 'u', v: 'v-t' },
-  ],
-  // 3
-  [
-    /* -u */ { to: 1, u: 't-v-1', v: 'u+t' },
-    /* +u */ { to: 0, u: 'v', v: '2t-u-1' },
-    /* -v */ { to: 4, u: 'u', v: 'v+t' },
-    /* +v */ { to: 5, u: 't-u-1', v: '2t-v-1' },
-  ],
-  // 4
-  [
-    /* -u */ { to: 1, u: 'u+t', v: 'v' },
-    /* +u */ { to: 0, u: 'u-t', v: 'v' },
-    /* -v */ { to: 2, u: 'u', v: 'v+t' },
-    /* +v */ { to: 3, u: 'u', v: 'v-t' },
-  ],
-  // 5
-  [
-    /* -u */ { to: 0, u: 'u+t', v: 'v' },
-    /* +u */ { to: 1, u: 'u-t', v: 'v' },
-    /* -v */ { to: 2, u: 't-u-1', v: '1+v' },
-    /* +v */ { to: 3, u: 't-u-1', v: '2t-v-1' },
-  ],
-];
-
-function getFaceWrapIndex(textureSize: number, faceCoord: vec3) {
-  if (faceCoord[0] < 0) {
-    return 0;
-  }
-  if (faceCoord[0] >= textureSize) {
-    return 1;
-  }
-  if (faceCoord[1] < 0) {
-    return 2;
-  }
-  if (faceCoord[1] >= textureSize) {
-    return 3;
-  }
-  return -1;
-}
-
-function applyFaceWrap(textureSize: number, faceCoord: vec3): vec3 {
-  const ndx = getFaceWrapIndex(textureSize, faceCoord);
-  if (ndx < 0) {
-    return faceCoord;
-  }
-  const { to, u, v } = kFaceToFaceRemap[faceCoord[2]][ndx];
-  return [
-    kFaceConversions[u](textureSize, faceCoord),
-    kFaceConversions[v](textureSize, faceCoord),
-    to,
+function wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize: number, faceCoord: vec3) {
+  // convert texel based face coord to normalized 2d-array coord
+  const nc0: vec3 = [
+    (faceCoord[0] + 0.5) / textureSize,
+    (faceCoord[1] + 0.5) / textureSize,
+    (faceCoord[2] + 0.5) / 6,
+  ];
+  const cc = convertNormalized3DTexCoordToCubeCoord(nc0);
+  const nc1 = convertCubeCoordToNormalized3DTextureCoord(cc);
+  // convert normalized 2d-array coord back texel based face coord
+  const fc = [
+    Math.floor(nc1[0] * textureSize),
+    Math.floor(nc1[1] * textureSize),
+    Math.floor(nc1[2] * 6),
   ];
-}
 
-function wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize: number, faceCoord: vec3) {
-  // If we're off both edges we need to wrap twice, once for each edge.
-  const faceCoord1 = applyFaceWrap(textureSize, faceCoord);
-  const faceCoord2 = applyFaceWrap(textureSize, faceCoord1);
-  return faceCoord2;
+  return fc;
 }
 
 function applyAddressModesToCoords(
@@ -2570,6 +2881,8 @@ export function generateSamplePointsCube(
   mipLevel: number;
   arrayIndex?: number;
   offset?: undefined;
+  component?: number;
+  depthRef?: number;
 }[] {
   const { method, descriptor } = args;
   const mipLevelCount = descriptor.mipLevelCount ?? 1;
@@ -2610,20 +2923,38 @@ export function generateSamplePointsCube(
       /* prettier-ignore */
       coords.push(
         // between edges
-        [-1.01, -1.02,  0],
-        [ 1.01, -1.02,  0],
-        [-1.01,  1.02,  0],
-        [ 1.01,  1.02,  0],
-
-        [-1.01,  0, -1.02],
-        [ 1.01,  0, -1.02],
-        [-1.01,  0,  1.02],
-        [ 1.01,  0,  1.02],
-
-        [-1.01, -1.02,  0],
-        [ 1.01, -1.02,  0],
-        [-1.01,  1.02,  0],
-        [ 1.01,  1.02,  0],
+        // +x
+        [  1   , -1.01,  0    ],  // wrap -y
+        [  1   , +1.01,  0    ],  // wrap +y
+        [  1   ,  0   , -1.01 ],  // wrap -z
+        [  1   ,  0   , +1.01 ],  // wrap +z
+        // -x
+        [ -1   , -1.01,  0    ],  // wrap -y
+        [ -1   , +1.01,  0    ],  // wrap +y
+        [ -1   ,  0   , -1.01 ],  // wrap -z
+        [ -1   ,  0   , +1.01 ],  // wrap +z
+
+        // +y
+        [ -1.01,  1   ,  0    ],  // wrap -x
+        [ +1.01,  1   ,  0    ],  // wrap +x
+        [  0   ,  1   , -1.01 ],  // wrap -z
+        [  0   ,  1   , +1.01 ],  // wrap +z
+        // -y
+        [ -1.01, -1   ,  0    ],  // wrap -x
+        [ +1.01, -1   ,  0    ],  // wrap +x
+        [  0   , -1   , -1.01 ],  // wrap -z
+        [  0   , -1   , +1.01 ],  // wrap +z
+
+        // +z
+        [ -1.01,  0   ,  1    ],  // wrap -x
+        [ +1.01,  0   ,  1    ],  // wrap +x
+        [  0   , -1.01,  1    ],  // wrap -y
+        [  0   , +1.01,  1    ],  // wrap +y
+        // -z
+        [ -1.01,  0   , -1    ],  // wrap -x
+        [ +1.01,  0   , -1    ],  // wrap +x
+        [  0   , -1.01, -1    ],  // wrap -y
+        [  0   , +1.01, -1    ],  // wrap +y
 
         // corners (see comment "Issues with corners of cubemaps")
         // for why these are commented out.
@@ -2644,11 +2975,15 @@ export function generateSamplePointsCube(
     typeof v === 'string' ? sumOfCharCodesOfString(v) : typeof v === 'boolean' ? (v ? 1 : 0) : v
   );
   const makeRangeValue = ({ num, type }: RangeDef, ...hashInputs: number[]) => {
-    const range = num + type === 'u32' ? 1 : 2;
+    const range = num + (type === 'u32' ? 1 : 2);
     const number =
       (hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range - (type === 'u32' ? 0 : 1);
     return type === 'f32' ? number : Math.floor(number);
   };
+  const makeIntHashValue = (min: number, max: number, ...hashInputs: number[]) => {
+    const range = max - min;
+    return min + Math.floor((hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range);
+  };
 
   // Samplers across devices use different methods to interpolate.
   // Quantizing the texture coordinates seems to hit coords that produce
@@ -2658,12 +2993,102 @@ export function generateSamplePointsCube(
   // Win 11, NVidia 2070 Super: 16
   // Linux, AMD Radeon Pro WX 3200: 256
   // MacOS, M1 Mac: 256
+  //
+  // Note: When doing `textureGather...` we can't use texel centers
+  // because which 4 pixels will be gathered jumps if we're slightly under
+  // or slightly over the center
+  //
+  // Similarly, if we're using 'nearest' filtering then we don't want texel
+  // edges for the same reason.
+  //
+  // Also note that for textureGather. The way it works for cube maps is to
+  // first convert from cube map coordinate to a 2D texture coordinate and
+  // a face. Then, choose 4 texels just like normal 2D texture coordinates.
+  // If one of the 4 texels is outside the current face, wrap it to the correct
+  // face.
+  //
+  // An issue this brings up though. Imagine a 2D texture with addressMode = 'repeat'
+  //
+  //       2d texture   (same texture repeated to show 'repeat')
+  //     ┌───┬───┬───┐     ┌───┬───┬───┐
+  //     │   │   │   │     │   │   │   │
+  //     ├───┼───┼───┤     ├───┼───┼───┤
+  //     │   │   │  a│     │c  │   │   │
+  //     ├───┼───┼───┤     ├───┼───┼───┤
+  //     │   │   │  b│     │d  │   │   │
+  //     └───┴───┴───┘     └───┴───┴───┘
+  //
+  // Assume the texture coordinate is at the bottom right corner of a.
+  // Then textureGather will grab c, d, b, a (no idea why that order).
+  // but think of it as top-right, bottom-right, bottom-left, top-left.
+  // Similarly, if the texture coordinate is at the top left of d it
+  // will select the same 4 texels.
+  //
+  // But, in the case of a cubemap, each face is in different direction
+  // relative to the face next to it.
+  //
+  //             +-----------+
+  //             |0->u       |
+  //             |↓          |
+  //             |v   +y     |
+  //             |    (2)    |
+  //             |           |
+  // +-----------+-----------+-----------+-----------+
+  // |0->u       |0->u       |0->u       |0->u       |
+  // |↓          |↓          |↓          |↓          |
+  // |v   -x     |v   +z     |v   +x     |v   -z     |
+  // |    (1)    |    (4)    |    (0)    |    (5)    |
+  // |           |           |           |           |
+  // +-----------+-----------+-----------+-----------+
+  //             |0->u       |
+  //             |↓          |
+  //             |v   -y     |
+  //             |    (3)    |
+  //             |           |
+  //             +-----------+
+  //
+  // As an example, imagine going from the +y to the +x face.
+  // See diagram above, the right edge of the +y face wraps
+  // to the top edge of the +x face.
+  //
+  //                             +---+---+
+  //                             |  a|c  |
+  //     ┌───┬───┬───┐           ┌───┬───┬───┐
+  //     │   │   │   │           │  b│d  │   │
+  //     ├───┼───┼───┤---+       ├───┼───┼───┤
+  //     │   │   │  a│ c |       │   │   │   │
+  //     ├───┼───┼───┤---+       ├───┼───┼───┤
+  //     │   │   │  b│ d |       │   │   │   │
+  //     └───┴───┴───┘---+       └───┴───┴───┘
+  //        +y face                 +x face
+  //
+  // If the texture coordinate is in the bottom right corner of a,
+  // the rectangle of texels we read are a,b,c,d and, if we the
+  // texture coordinate is in the top left corner of d we also
+  // read a,b,c,d according to the 2 diagrams above.
+  //
+  // But, notice that when reading from the POV of +y vs +x,
+  // which actual a,b,c,d texels are different.
+  //
+  // From the POV of face +x: a,b are in face +x and c,d are in face +y
+  // From the POV of face +y: a,c are in face +x and b,d are in face +y
+  //
+  // This is all the long way of saying that if we're on the edge of a cube
+  // face we could get drastically different results because the orientation
+  // of the rectangle of the 4 texels we use, rotates. So, we need to avoid
+  // any values too close to the edge just in case our math is different than
+  // the GPU's.
+  //
   const kSubdivisionsPerTexel = 4;
-  const nearest = !args.sampler || args.sampler.minFilter === 'nearest';
+  const avoidEdgeCase =
+    !args.sampler || args.sampler.minFilter === 'nearest' || isBuiltinGather(args.textureBuiltin);
+  const edgeRemainder = isBuiltinGather(args.textureBuiltin) ? kSubdivisionsPerTexel / 2 : 0;
   return coords.map((c, i) => {
-    const mipLevel = args.mipLevel ? makeRangeValue(args.mipLevel, i) : 0;
+    const mipLevel = args.mipLevel
+      ? quantizeMipLevel(makeRangeValue(args.mipLevel, i), args.sampler?.mipmapFilter ?? 'nearest')
+      : 0;
     const clampedMipLevel = clamp(mipLevel, { min: 0, max: mipLevelCount - 1 });
-    const mipSize = virtualMipSize('2d', size, clampedMipLevel);
+    const mipSize = virtualMipSize('2d', size, Math.ceil(clampedMipLevel));
     const q = [
       mipSize[0] * kSubdivisionsPerTexel,
       mipSize[0] * kSubdivisionsPerTexel,
@@ -2683,17 +3108,21 @@ export function generateSamplePointsCube(
     const quantizedUVW = uvw.map((v, i) => {
       // Quantize to kSubdivisionsPerPixel
       const v1 = Math.floor(v * q[i]);
-      // If it's nearest and we're on the edge of a texel then move us off the edge
-      // since the edge could choose one texel or another in nearest mode
-      const v2 = nearest && v1 % kSubdivisionsPerTexel === 0 ? v1 + 1 : v1;
-      // Convert back to texture coords
-      return v2 / q[i];
+      // If it's nearest or textureGather and we're on the edge of a texel then move us off the edge
+      // since the edge could choose one texel or another.
+      const isEdgeCase = Math.abs(v1 % kSubdivisionsPerTexel) === edgeRemainder;
+      const v2 = isEdgeCase && avoidEdgeCase ? v1 + 1 : v1;
+      // Convert back to texture coords slightly off
+      return (v2 + 1 / 16) / q[i];
     }) as vec3;
+
     const coords = convertNormalized3DTexCoordToCubeCoord(quantizedUVW);
     return {
       coords,
       mipLevel,
       arrayIndex: args.arrayIndex ? makeRangeValue(args.arrayIndex, i, 2) : undefined,
+      depthRef: args.depthRef ? makeRangeValue({ num: 1, type: 'f32' }, i, 5) : undefined,
+      component: args.component ? makeIntHashValue(0, 4, i, 4) : undefined,
     };
   });
 }
@@ -2751,8 +3180,8 @@ function binKey<T extends Dimensionality>(call: TextureCall<T>): string {
   for (const name of kTextureCallArgNames) {
     const value = call[name];
     if (value !== undefined) {
-      if (name === 'offset') {
-        // offset must be a constant expression
+      if (name === 'offset' || name === 'component') {
+        // offset and component must be constant expressions
         keys.push(`${name}: ${wgslExpr(value)}`);
       } else {
         keys.push(`${name}: ${wgslTypeFor(value, call.coordType)}`);
@@ -2763,12 +3192,19 @@ function binKey<T extends Dimensionality>(call: TextureCall<T>): string {
 }
 
 function buildBinnedCalls<T extends Dimensionality>(calls: TextureCall<T>[]) {
-  const args: string[] = ['T']; // All texture builtins take the texture as the first argument
+  const args: string[] = [];
   const fields: string[] = [];
   const data: number[] = [];
-
   const prototype = calls[0];
-  if (prototype.builtin.startsWith('textureSample')) {
+
+  if (isBuiltinGather(prototype.builtin) && prototype['componentType']) {
+    args.push(`/* component */ ${wgslExpr(prototype['component']!)}`);
+  }
+
+  // All texture builtins take a Texture
+  args.push('T');
+
+  if (builtinNeedsSampler(prototype.builtin)) {
     // textureSample*() builtins take a sampler as the second argument
     args.push('S');
   }
@@ -2778,6 +3214,8 @@ function buildBinnedCalls<T extends Dimensionality>(calls: TextureCall<T>[]) {
     if (value !== undefined) {
       if (name === 'offset') {
         args.push(`/* offset */ ${wgslExpr(value)}`);
+      } else if (name === 'component') {
+        // was handled above
       } else {
         const type =
           name === 'mipLevel'
@@ -2786,6 +3224,8 @@ function buildBinnedCalls<T extends Dimensionality>(calls: TextureCall<T>[]) {
             ? prototype.arrayIndexType!
             : name === 'sampleIndex'
             ? prototype.sampleIndexType!
+            : name === 'depthRef'
+            ? 'f'
             : prototype.coordType;
         args.push(`args.${name}`);
         fields.push(`@align(16) ${name} : ${wgslTypeFor(value, type)}`);
@@ -2800,7 +3240,7 @@ function buildBinnedCalls<T extends Dimensionality>(calls: TextureCall<T>[]) {
         (prototype[name] === undefined) === (value === undefined),
         'texture calls are not binned correctly'
       );
-      if (value !== undefined && name !== 'offset') {
+      if (value !== undefined && name !== 'offset' && name !== 'component') {
         const type = getCallArgType<T>(call, name);
         const bitcastToU32 = kBitCastFunctions[type];
         if (value instanceof Array) {
@@ -2840,13 +3280,17 @@ function binCalls<T extends Dimensionality>(calls: TextureCall<T>[]): number[][]
 }
 
 export function describeTextureCall<T extends Dimensionality>(call: TextureCall<T>): string {
-  const args: string[] = ['texture: T'];
-  if (call.builtin.startsWith('textureSample')) {
+  const args: string[] = [];
+  if (isBuiltinGather(call.builtin) && call.componentType) {
+    args.push(`component: ${wgslExprFor(call.component!, call.componentType)}`);
+  }
+  args.push('texture: T');
+  if (builtinNeedsSampler(call.builtin)) {
     args.push('sampler: S');
   }
   for (const name of kTextureCallArgNames) {
     const value = call[name];
-    if (value !== undefined) {
+    if (value !== undefined && name !== 'component') {
       if (name === 'coords') {
         args.push(`${name}: ${wgslExprFor(value, call.coordType)}`);
       } else if (name === 'mipLevel') {
@@ -2855,6 +3299,8 @@ export function describeTextureCall<T extends Dimensionality>(call: TextureCall<
         args.push(`${name}: ${wgslExprFor(value, call.arrayIndexType!)}`);
       } else if (name === 'sampleIndex') {
         args.push(`${name}: ${wgslExprFor(value, call.sampleIndexType!)}`);
+      } else if (name === 'depthRef') {
+        args.push(`${name}: ${wgslExprFor(value, 'f')}`);
       } else {
         args.push(`${name}: ${wgslExpr(value)}`);
       }
@@ -2876,6 +3322,18 @@ const s_deviceToPipelines = new WeakMap<GPUDevice, Map<string, GPURenderPipeline
  * Calls are "binned" by call parameters. Each bin has its own structure and
  * field in the storage buffer. This allows the calls to be non-homogenous and
  * each have their own data type for coordinates.
+ *
+ * Note: this function returns:
+ *
+ * 'results': an array of results, one for each call.
+ *
+ * 'run': a function that accepts a texture and runs the same class pipeline with
+ *        that texture as input, returning an array of results. This can be used by
+ *        identifySamplePoints to query the mix-weights used. We do this so we're
+ *        using the same shader that generated the original results when querying
+ *        the weights.
+ *
+ * 'destroy': a function that cleans up the buffers used by `run`.
  */
 export async function doTextureCalls<T extends Dimensionality>(
   t: GPUTest,
@@ -2885,6 +3343,21 @@ export async function doTextureCalls<T extends Dimensionality>(
   sampler: GPUSamplerDescriptor | undefined,
   calls: TextureCall<T>[]
 ) {
+  const {
+    format,
+    dimension,
+    depthOrArrayLayers,
+    sampleCount,
+  }: {
+    format: GPUTextureFormat;
+    dimension: GPUTextureDimension;
+    depthOrArrayLayers: number;
+    sampleCount: number;
+  } =
+    gpuTexture instanceof GPUExternalTexture
+      ? { format: 'rgba8unorm', dimension: '2d', depthOrArrayLayers: 1, sampleCount: 1 }
+      : gpuTexture;
+
   let structs = '';
   let body = '';
   let dataFields = '';
@@ -2917,14 +3390,20 @@ export async function doTextureCalls<T extends Dimensionality>(
   });
   t.device.queue.writeBuffer(dataBuffer, 0, new Uint32Array(data));
 
-  const { resultType, resultFormat, componentType } =
-    gpuTexture instanceof GPUExternalTexture
-      ? ({ resultType: 'vec4f', resultFormat: 'rgba32float', componentType: 'f32' } as const)
-      : textureType.includes('depth')
-      ? ({ resultType: 'f32', resultFormat: 'rgba32float', componentType: 'f32' } as const)
-      : getTextureFormatTypeInfo(gpuTexture.format);
+  const builtin = calls[0].builtin;
+  const isCompare = isBuiltinComparison(builtin);
+
+  const { resultType, resultFormat, componentType } = isBuiltinGather(builtin)
+    ? getTextureFormatTypeInfo(format)
+    : gpuTexture instanceof GPUExternalTexture
+    ? ({ resultType: 'vec4f', resultFormat: 'rgba32float', componentType: 'f32' } as const)
+    : textureType.includes('depth')
+    ? ({ resultType: 'f32', resultFormat: 'rgba32float', componentType: 'f32' } as const)
+    : getTextureFormatTypeInfo(format);
   const returnType = `vec4<${componentType}>`;
 
+  const samplerType = isCompare ? 'sampler_comparison' : 'sampler';
+
   const rtWidth = 256;
   const renderTarget = t.createTextureTracked({
     format: resultFormat,
@@ -2949,7 +3428,7 @@ fn vs_main(@builtin(vertex_index) vertex_index : u32) -> @builtin(position) vec4
 }
 
 @group(0) @binding(0) var          T    : ${textureType};
-${sampler ? '@group(0) @binding(1) var          S    : sampler' : ''};
+${sampler ? `@group(0) @binding(1) var          S    : ${samplerType}` : ''};
 @group(0) @binding(2) var<storage> data : Data;
 
 @fragment
@@ -2964,13 +3443,98 @@ ${body}
   const pipelines = s_deviceToPipelines.get(t.device) ?? new Map<string, GPURenderPipeline>();
   s_deviceToPipelines.set(t.device, pipelines);
 
-  const id = `${renderTarget.format}:${code}`;
+  // unfilterable-float textures can only be used with manually created bindGroupLayouts
+  // since the default 'auto' layout requires filterable textures/samplers.
+  // So, if we don't need filtering, don't request a filtering sampler. If we require
+  // filtering then check if the format is 32float format and if float32-filterable
+  // is enabled.
+  const info = kTextureFormatInfo[format ?? 'rgba8unorm'];
+  const isFiltering =
+    !!sampler &&
+    (sampler.minFilter === 'linear' ||
+      sampler.magFilter === 'linear' ||
+      sampler.mipmapFilter === 'linear');
+  let sampleType: GPUTextureSampleType = textureType.startsWith('texture_depth')
+    ? 'depth'
+    : isDepthTextureFormat(format)
+    ? 'unfilterable-float'
+    : isStencilTextureFormat(format)
+    ? 'uint'
+    : info.color?.type ?? 'float';
+  if (isFiltering && sampleType === 'unfilterable-float') {
+    assert(is32Float(format));
+    assert(t.device.features.has('float32-filterable'));
+    sampleType = 'float';
+  }
+  if (sampleCount > 1 && sampleType === 'float') {
+    sampleType = 'unfilterable-float';
+  }
+
+  const entries: GPUBindGroupLayoutEntry[] = [
+    {
+      binding: 2,
+      visibility: GPUShaderStage.FRAGMENT,
+      buffer: {
+        type: 'read-only-storage',
+      },
+    },
+  ];
+
+  const viewDimension = effectiveViewDimensionForDimension(
+    viewDescriptor.dimension,
+    dimension,
+    depthOrArrayLayers
+  );
+
+  if (textureType.includes('storage')) {
+    entries.push({
+      binding: 0,
+      visibility: GPUShaderStage.FRAGMENT,
+      storageTexture: {
+        access: 'read-only',
+        viewDimension,
+        format,
+      },
+    });
+  } else if (gpuTexture instanceof GPUExternalTexture) {
+    entries.push({
+      binding: 0,
+      visibility: GPUShaderStage.FRAGMENT,
+      externalTexture: {},
+    });
+  } else {
+    entries.push({
+      binding: 0,
+      visibility: GPUShaderStage.FRAGMENT,
+      texture: {
+        sampleType,
+        viewDimension,
+        multisampled: sampleCount > 1,
+      },
+    });
+  }
+
+  if (sampler) {
+    entries.push({
+      binding: 1,
+      visibility: GPUShaderStage.FRAGMENT,
+      sampler: {
+        type: isCompare ? 'comparison' : isFiltering ? 'filtering' : 'non-filtering',
+      },
+    });
+  }
+
+  const id = `${renderTarget.format}:${JSON.stringify(entries)}:${code}`;
   let pipeline = pipelines.get(id);
   if (!pipeline) {
     const shaderModule = t.device.createShaderModule({ code });
+    const bindGroupLayout = t.device.createBindGroupLayout({ entries });
+    const layout = t.device.createPipelineLayout({
+      bindGroupLayouts: [bindGroupLayout],
+    });
 
-    pipeline = await t.device.createRenderPipelineAsync({
-      layout: 'auto',
+    pipeline = t.device.createRenderPipeline({
+      layout,
       vertex: { module: shaderModule },
       fragment: {
         module: shaderModule,
@@ -2984,75 +3548,88 @@ ${body}
 
   const gpuSampler = sampler ? t.device.createSampler(sampler) : undefined;
 
-  const bindGroup = t.device.createBindGroup({
-    layout: pipeline.getBindGroupLayout(0),
-    entries: [
-      {
-        binding: 0,
-        resource:
-          gpuTexture instanceof GPUExternalTexture
-            ? gpuTexture
-            : gpuTexture.createView(viewDescriptor),
-      },
-      ...(sampler ? [{ binding: 1, resource: gpuSampler! }] : []),
-      { binding: 2, resource: { buffer: dataBuffer } },
-    ],
-  });
+  const run = async (gpuTexture: GPUTexture | GPUExternalTexture) => {
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline!.getBindGroupLayout(0),
+      entries: [
+        {
+          binding: 0,
+          resource:
+            gpuTexture instanceof GPUExternalTexture
+              ? gpuTexture
+              : gpuTexture.createView(viewDescriptor),
+        },
+        ...(sampler ? [{ binding: 1, resource: gpuSampler! }] : []),
+        { binding: 2, resource: { buffer: dataBuffer } },
+      ],
+    });
 
-  const bytesPerRow = align(16 * renderTarget.width, 256);
-  const resultBuffer = t.createBufferTracked({
-    size: renderTarget.height * bytesPerRow,
-    usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
-  });
-  const encoder = t.device.createCommandEncoder();
+    const bytesPerRow = align(16 * renderTarget.width, 256);
+    const resultBuffer = t.createBufferTracked({
+      size: renderTarget.height * bytesPerRow,
+      usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
+    });
 
-  const renderPass = encoder.beginRenderPass({
-    colorAttachments: [
-      {
-        view: renderTarget.createView(),
-        loadOp: 'clear',
-        storeOp: 'store',
-      },
-    ],
-  });
+    const encoder = t.device.createCommandEncoder();
 
-  renderPass.setPipeline(pipeline);
-  renderPass.setBindGroup(0, bindGroup);
-  renderPass.draw(4);
-  renderPass.end();
-  encoder.copyTextureToBuffer(
-    { texture: renderTarget },
-    { buffer: resultBuffer, bytesPerRow },
-    { width: renderTarget.width, height: renderTarget.height }
-  );
-  t.device.queue.submit([encoder.finish()]);
+    const renderPass = encoder.beginRenderPass({
+      colorAttachments: [
+        {
+          view: renderTarget.createView(),
+          loadOp: 'clear',
+          storeOp: 'store',
+        },
+      ],
+    });
 
-  await resultBuffer.mapAsync(GPUMapMode.READ);
+    renderPass.setPipeline(pipeline!);
+    renderPass.setBindGroup(0, bindGroup);
+    renderPass.draw(4);
+    renderPass.end();
+    encoder.copyTextureToBuffer(
+      { texture: renderTarget },
+      { buffer: resultBuffer, bytesPerRow },
+      { width: renderTarget.width, height: renderTarget.height }
+    );
+    t.device.queue.submit([encoder.finish()]);
 
-  const view = TexelView.fromTextureDataByReference(
-    renderTarget.format as EncodableTextureFormat,
-    new Uint8Array(resultBuffer.getMappedRange()),
-    {
-      bytesPerRow,
-      rowsPerImage: renderTarget.height,
-      subrectOrigin: [0, 0, 0],
-      subrectSize: [renderTarget.width, renderTarget.height],
-    }
-  );
+    await resultBuffer.mapAsync(GPUMapMode.READ);
 
-  let outIdx = 0;
-  const out = new Array<PerTexelComponent<number>>(calls.length);
-  for (const bin of binned) {
-    for (const callIdx of bin) {
-      const x = outIdx % rtWidth;
-      const y = Math.floor(outIdx / rtWidth);
-      out[callIdx] = view.color({ x, y, z: 0 });
-      outIdx++;
+    const view = TexelView.fromTextureDataByReference(
+      renderTarget.format as EncodableTextureFormat,
+      new Uint8Array(resultBuffer.getMappedRange()),
+      {
+        bytesPerRow,
+        rowsPerImage: renderTarget.height,
+        subrectOrigin: [0, 0, 0],
+        subrectSize: [renderTarget.width, renderTarget.height],
+      }
+    );
+
+    let outIdx = 0;
+    const out = new Array<PerTexelComponent<number>>(calls.length);
+    for (const bin of binned) {
+      for (const callIdx of bin) {
+        const x = outIdx % rtWidth;
+        const y = Math.floor(outIdx / rtWidth);
+        out[callIdx] = view.color({ x, y, z: 0 });
+        outIdx++;
+      }
     }
-  }
 
-  renderTarget.destroy();
-  resultBuffer.destroy();
+    resultBuffer.destroy();
 
-  return out;
+    return out;
+  };
+
+  const results = await run(gpuTexture);
+
+  return {
+    run,
+    results,
+    destroy() {
+      dataBuffer.destroy();
+      renderTarget.destroy();
+    },
+  };
 }
diff --git a/src/webgpu/shader/execution/robust_access_vertex.spec.ts b/src/webgpu/shader/execution/robust_access_vertex.spec.ts
index d5792de1185f..91933aa2048f 100644
--- a/src/webgpu/shader/execution/robust_access_vertex.spec.ts
+++ b/src/webgpu/shader/execution/robust_access_vertex.spec.ts
@@ -63,6 +63,10 @@ import { makeTestGroup } from '../../../common/framework/test_group.js';
 import { assert } from '../../../common/util/util.js';
 import { GPUTest, TextureTestMixin } from '../../gpu_test.js';
 
+// This is a tolerance that should be less strict than oneULP(X) of a f32 where X is any arbitraryValues or 0.
+// Given that in GLSL compat highp float can < 32 bit.
+const kFloatTolerance = 0.000001;
+
 // Encapsulates a draw call (either indexed or non-indexed)
 class DrawCall {
   private test: GPUTest;
@@ -265,11 +269,15 @@ const typeInfoMap: { [k: string]: VertexInfo } = {
     sizeInBytes: 12,
     validationFunc: 'return valid(v.x) && valid(v.y) && valid(v.z);',
   },
+  // It is valid to return (0, 0, 0, X) for an OOB access. (X can be anything)
+  // https://gpuweb.github.io/gpuweb/#security-shader
   float32x4: {
     wgslType: 'vec4<f32>',
     sizeInBytes: 16,
     validationFunc: `return (valid(v.x) && valid(v.y) && valid(v.z) && valid(v.w)) ||
-                            (v.x == 0.0 && v.y == 0.0 && v.z == 0.0 && (v.w == 0.0 || v.w == 1.0));`,
+                            (abs(v.x - 0.0) <= ${kFloatTolerance} &&
+                             abs(v.y - 0.0) <= ${kFloatTolerance} &&
+                             abs(v.z - 0.0) <= ${kFloatTolerance});`,
   },
 };
 
@@ -363,7 +371,7 @@ class F extends TextureTestMixin(GPUTest) {
       ${layoutStr}
 
       fn valid(f : f32) -> bool {
-        return ${validValues.map(v => `f == ${v}.0`).join(' || ')};
+        return ${validValues.map(v => `abs(f - ${v}.0) <= ${kFloatTolerance}`).join(' || ')};
       }
 
       fn validationFunc(v : ${typeInfo.wgslType}) -> bool {
diff --git a/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts
index ffd58976fc88..7a6aa8901e28 100644
--- a/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts
+++ b/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts
@@ -20,14 +20,17 @@ is evaluated per-fragment or per-sample. With @interpolate(, sample) or usage of
 import { makeTestGroup } from '../../../../common/framework/test_group.js';
 import { ErrorWithExtra, assert, range, unreachable } from '../../../../common/util/util.js';
 import { InterpolationSampling, InterpolationType } from '../../../constants.js';
-import { GPUTest } from '../../../gpu_test.js';
+import { kTextureFormatInfo } from '../../../format_info.js';
+import { GPUTest, TextureTestMixin } from '../../../gpu_test.js';
 import { getProvokingVertexForFlatInterpolationEitherSampling } from '../../../inter_stage.js';
 import { getMultisampleFragmentOffsets } from '../../../multisample_info.js';
-import { dotProduct, subtractVectors } from '../../../util/math.js';
+import { dotProduct, subtractVectors, align } from '../../../util/math.js';
 import { TexelView } from '../../../util/texture/texel_view.js';
 import { findFailedPixels } from '../../../util/texture/texture_ok.js';
 
-export const g = makeTestGroup(GPUTest);
+class FragmentBuiltinTest extends TextureTestMixin(GPUTest) {}
+
+export const g = makeTestGroup(FragmentBuiltinTest);
 
 const s_deviceToPipelineMap = new WeakMap<
   GPUDevice,
@@ -589,7 +592,7 @@ async function renderFragmentShaderInputsTo4TexturesAndReadbackValues(
 
       struct FragmentIn {
         @builtin(position) position: vec4f,
-        @location(0) @interpolate(${interpolate}) interpolatedValue: vec4f,
+@location(0) @interpolate(${interpolate}) interpolatedValue: vec4f,
         ${fragInCode}
       };
 
@@ -1424,6 +1427,385 @@ g.test('inputs,sample_mask')
     );
   });
 
-g.test('subgroup_size').unimplemented();
+const kSizes = [
+  [15, 15],
+  [16, 16],
+  [17, 17],
+  [19, 13],
+  [13, 10],
+  [111, 2],
+  [2, 111],
+  [35, 2],
+  [2, 35],
+  [53, 13],
+  [13, 53],
+] as const;
+
+/**
+ * @returns The population count of input.
+ *
+ * @param input Treated as an unsigned 32-bit integer
+ */
+function popcount(input: number): number {
+  let n = input;
+  n = n - ((n >> 1) & 0x55555555);
+  n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
+  return (((n + (n >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24;
+}
+
+/**
+ * Checks subgroup_size builtin value consistency.
+ *
+ * The builtin subgroup_size is not assumed to be uniform in fragment shaders.
+ * Therefore, this function checks the value is a power of two within the device
+ * limits and that the ballot size is less than the stated size.
+ * @param data An array of vec4u that contains (per texel):
+ *             * builtin value
+ *             * ballot size
+ *             * comparison to other invocations
+ *             * 0
+ * @param format The texture format for data
+ * @param min The minimum subgroup size from the device
+ * @param max The maximum subgroup size from the device
+ * @param width The width of the framebuffer
+ * @param height The height of the framebuffer
+ */
+function checkSubgroupSizeConsistency(
+  data: Uint32Array,
+  format: GPUTextureFormat,
+  min: number,
+  max: number,
+  width: number,
+  height: number
+): Error | undefined {
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  const blocksPerRow = width / blockWidth;
+  // Image copies require bytesPerRow to be a multiple of 256.
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const uintsPerRow = bytesPerRow / 4;
+  const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4;
+
+  for (let row = 0; row < height; row++) {
+    for (let col = 0; col < width; col++) {
+      const offset = uintsPerRow * row + col * uintsPerTexel;
+      const builtinSize = data[offset];
+      const ballotSize = data[offset + 1];
+      const comparison = data[offset + 2];
+      if (builtinSize === 0) {
+        continue;
+      }
+
+      if (popcount(builtinSize) !== 1) {
+        return new Error(`Subgroup size '${builtinSize}' is not a power of two`);
+      }
+
+      if (builtinSize < min) {
+        return new Error(`Subgroup size '${builtinSize}' is less than minimum '${min}'`);
+      }
+      if (max < builtinSize) {
+        return new Error(`Subgroup size '${builtinSize}' is greater than maximum '${max}'`);
+      }
+
+      if (builtinSize < ballotSize) {
+        return new Error(`Inconsistent subgroup ballot size
+-   icoord: (${row}, ${col})
+- expected: ${builtinSize}
+-      got: ${ballotSize}`);
+      }
+
+      if (comparison !== 1) {
+        return new Error(`Not all invocations in subgroup have same view of the size
+- icoord: (${row}, ${col})`);
+      }
+    }
+  }
+
+  return undefined;
+}
+
+/**
+ * Runs a subgroup builtin test for fragment shaders
+ *
+ * This test draws a full screen in 2 separate draw calls (half screen each).
+ * Results are checked for each draw.
+ * @param t The base test
+ * @param format The framebuffer format
+ * @param fsShader The fragment shader with the following interface:
+ *                 Location 0 output is framebuffer with format
+ *                 Group 0 binding 0 is a u32 sized data
+ * @param width The framebuffer width
+ * @param height The framebuffer height
+ * @param checker A functor to check the framebuffer values
+ */
+async function runSubgroupTest(
+  t: FragmentBuiltinTest,
+  format: GPUTextureFormat,
+  fsShader: string,
+  width: number,
+  height: number,
+  checker: (data: Uint32Array) => Error | undefined
+) {
+  const vsShader = `
+@vertex
+fn vsMain(@builtin(vertex_index) index : u32) -> @builtin(position) vec4f {
+  const vertices = array(
+    vec2(-1, -1), vec2(-1,  1), vec2( 1,  1),
+    vec2(-1, -1), vec2( 1, -1), vec2( 1,  1),
+  );
+  return vec4f(vec2f(vertices[index]), 0, 1);
+}`;
+
+  const pipeline = t.device.createRenderPipeline({
+    layout: 'auto',
+    vertex: {
+      module: t.device.createShaderModule({ code: vsShader }),
+    },
+    fragment: {
+      module: t.device.createShaderModule({ code: fsShader }),
+      targets: [{ format }],
+    },
+    primitive: {
+      topology: 'triangle-list',
+    },
+  });
+
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  assert(bytesPerBlock !== undefined);
+
+  const blocksPerRow = width / blockWidth;
+  const blocksPerColumn = height / blockHeight;
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const byteLength = bytesPerRow * blocksPerColumn;
+  const uintLength = byteLength / 4;
+
+  const buffer = t.makeBufferWithContents(
+    new Uint32Array([1]),
+    GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
+  );
+
+  const bg = t.device.createBindGroup({
+    layout: pipeline.getBindGroupLayout(0),
+    entries: [
+      {
+        binding: 0,
+        resource: {
+          buffer,
+        },
+      },
+    ],
+  });
 
-g.test('subgroup_invocation_id').unimplemented();
+  for (let i = 0; i < 2; i++) {
+    const framebuffer = t.createTextureTracked({
+      size: [width, height],
+      usage:
+        GPUTextureUsage.COPY_SRC |
+        GPUTextureUsage.COPY_DST |
+        GPUTextureUsage.RENDER_ATTACHMENT |
+        GPUTextureUsage.TEXTURE_BINDING,
+      format,
+    });
+
+    const encoder = t.device.createCommandEncoder();
+    const pass = encoder.beginRenderPass({
+      colorAttachments: [
+        {
+          view: framebuffer.createView(),
+          loadOp: 'clear',
+          storeOp: 'store',
+        },
+      ],
+    });
+    pass.setPipeline(pipeline);
+    pass.setBindGroup(0, bg);
+    pass.draw(3, 1, i);
+    pass.end();
+    t.queue.submit([encoder.finish()]);
+
+    const buffer = t.copyWholeTextureToNewBufferSimple(framebuffer, 0);
+    const readback = await t.readGPUBufferRangeTyped(buffer, {
+      srcByteOffset: 0,
+      type: Uint32Array,
+      typedLength: uintLength,
+      method: 'copy',
+    });
+    const data: Uint32Array = readback.data;
+
+    t.expectOK(checker(data));
+  }
+}
+
+g.test('subgroup_size')
+  .desc('Tests subgroup_size values')
+  .params(u =>
+    u
+      .combine('size', kSizes)
+      .beginSubcases()
+      .combineWithParams([{ format: 'rgba32uint' }] as const)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    interface SubgroupLimits extends GPUSupportedLimits {
+      minSubgroupSize: number;
+      maxSubgroupSize: number;
+    }
+    const { minSubgroupSize, maxSubgroupSize } = t.device.limits as SubgroupLimits;
+
+    const fsShader = `
+enable subgroups;
+
+const width = ${t.params.size[0]};
+const height = ${t.params.size[1]};
+
+@group(0) @binding(0) var<storage, read_write> for_layout : u32;
+
+@fragment
+fn fsMain(
+  @builtin(position) pos : vec4f,
+  @builtin(subgroup_size) sg_size : u32,
+) -> @location(0) vec4u {
+  _ = for_layout;
+
+  let ballot = countOneBits(subgroupBallot(true));
+  let ballotSize = ballot.x + ballot.y + ballot.z + ballot.w;
+
+  // Do all invocations in the subgroup see the same subgroup size?
+  let firstSize = subgroupBroadcast(sg_size, 0);
+  let compareBallot = countOneBits(subgroupBallot(firstSize == sg_size));
+  let compareSize = compareBallot.x + compareBallot.y + compareBallot.z + compareBallot.w;
+  let sameSize = select(0u, 1u, compareSize == ballotSize);
+
+  return vec4u(sg_size, ballotSize, sameSize, 0);
+}`;
+
+    await runSubgroupTest(
+      t,
+      t.params.format,
+      fsShader,
+      t.params.size[0],
+      t.params.size[1],
+      (data: Uint32Array) => {
+        return checkSubgroupSizeConsistency(
+          data,
+          t.params.format,
+          minSubgroupSize,
+          maxSubgroupSize,
+          t.params.size[0],
+          t.params.size[1]
+        );
+      }
+    );
+  });
+
+/**
+ * Checks subgroup_invocation_id value consistency
+ *
+ * Very little uniformity is expected for subgroup_invocation_id.
+ * This function checks that all ids are less than the subgroup size
+ * and no id is repeated.
+ * @param data An array of vec4u that contains (per texel):
+ *             * subgroup_invocation_id
+ *             * ballot size
+ *             * non-zero ID unique to each subgroup
+ *             * 0
+ * @param format The texture format of data
+ * @param width The width of the framebuffer
+ * @param height The height of the framebuffer
+ */
+function checkSubgroupInvocationIdConsistency(
+  data: Uint32Array,
+  format: GPUTextureFormat,
+  width: number,
+  height: number
+): Error | undefined {
+  const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format];
+  const blocksPerRow = width / blockWidth;
+  const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256);
+  const uintsPerRow = bytesPerRow / 4;
+  const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4;
+
+  const mappings = new Map<number, bigint>();
+  for (let row = 0; row < height; row++) {
+    for (let col = 0; col < width; col++) {
+      const offset = uintsPerRow * row + col * uintsPerTexel;
+      const id = data[offset];
+      const size = data[offset + 1];
+      const repId = data[offset + 2];
+
+      if (repId === 0) {
+        continue;
+      }
+
+      if (size < id) {
+        return new Error(
+          `Invocation id '${id}' is greater than subgroup size '${size}' for (${row}, ${col})`
+        );
+      }
+
+      let v = mappings.get(repId) ?? 0n;
+      const mask = 1n << BigInt(id);
+      if ((mask & v) !== 0n) {
+        return new Error(`Multiple invocations with id '${id}' in subgroup '${repId}'`);
+      }
+      v |= mask;
+      mappings.set(repId, v);
+    }
+  }
+
+  return undefined;
+}
+
+g.test('subgroup_invocation_id')
+  .desc('Tests subgroup_invocation_id built-in value')
+  .params(u =>
+    u
+      .combine('size', kSizes)
+      .beginSubcases()
+      .combineWithParams([{ format: 'rgba32uint' }] as const)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(async t => {
+    const fsShader = `
+enable subgroups;
+
+const width = ${t.params.size[0]};
+const height = ${t.params.size[1]};
+
+@group(0) @binding(0) var<storage, read_write> counter : atomic<u32>;
+
+@fragment
+fn fsMain(
+  @builtin(position) pos : vec4f,
+  @builtin(subgroup_invocation_id) id : u32,
+  @builtin(subgroup_size) sg_size : u32,
+) -> @location(0) vec4u {
+  let ballot = countOneBits(subgroupBallot(true));
+  let ballotSize = ballot.x + ballot.y + ballot.z + ballot.w;
+
+  // Generate representative id for this subgroup.
+  var repId = atomicAdd(&counter, 1);
+  repId = subgroupBroadcast(repId, 0);
+
+  return vec4u(id, ballotSize, repId, 0);
+}`;
+
+    await runSubgroupTest(
+      t,
+      t.params.format,
+      fsShader,
+      t.params.size[0],
+      t.params.size[1],
+      (data: Uint32Array) => {
+        return checkSubgroupInvocationIdConsistency(
+          data,
+          t.params.format,
+          t.params.size[0],
+          t.params.size[1]
+        );
+      }
+    );
+  });
diff --git a/src/webgpu/shader/execution/shader_io/vertex_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/vertex_builtins.spec.ts
new file mode 100644
index 000000000000..baf5c98326a8
--- /dev/null
+++ b/src/webgpu/shader/execution/shader_io/vertex_builtins.spec.ts
@@ -0,0 +1,150 @@
+export const description = `Test vertex shader builtin variables
+
+* test builtin(clip_distances)
+`;
+
+import { makeTestGroup } from '../../../../common/framework/test_group.js';
+import { GPUTest, TextureTestMixin } from '../../../gpu_test.js';
+
+class VertexBuiltinTest extends TextureTestMixin(GPUTest) {}
+
+export const g = makeTestGroup(VertexBuiltinTest);
+
+g.test('outputs,clip_distances')
+  .desc(
+    `
+    Test vertex shader builtin(clip_distances) values.
+
+    In the tests, we draw a square with two triangles (top-right and bottom left), whose vertices
+    have different clip distances values. (Top Left: -1, Bottom Right: 1 Top Right & Bottom Left: 0)
+    1. The clip distances values of the pixels in the top-left region should be less than 0 so these
+       pixels will all be invisible
+    2. The clip distances values of the pixels on the top-right-to-bottom-left diagonal line should
+       be equal to 0
+    3. The clip distances values of the pixels in the bottom-right region should be greater than 0
+
+    -1 - - - - - 0
+     | \\      x x
+     |   \\  x x x
+     |    \\ x x x
+     |   x x\\ x x
+     | x x x x\\ x
+     0 x x x x x 1
+  `
+  )
+  .params(u => u.combine('clipDistances', [1, 2, 3, 4, 5, 6, 7, 8] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('clip-distances');
+  })
+  .fn(t => {
+    const { clipDistances } = t.params;
+
+    // Draw two triangles (top-right and bottom left) into Red, whose vertices have different clip
+    // distances values. (Top Left: -1, Bottom Right: 1 Top Right & Bottom Left: 0)
+    const code = `
+    enable clip_distances;
+    const kClipDistancesSize = ${clipDistances};
+    struct VertexOutputs {
+        @builtin(position) position : vec4f,
+        @builtin(clip_distances) clipDistances : array<f32, kClipDistancesSize>,
+    }
+    @vertex
+    fn vsMain(@builtin(vertex_index) vertexIndex : u32) -> VertexOutputs {
+          var posAndClipDistances = array(
+              vec3f(-1.0,  1.0, -1.0),
+              vec3f( 1.0, -1.0,  1.0),
+              vec3f( 1.0,  1.0,  0.0),
+              vec3f(-1.0, -1.0,  0.0),
+              vec3f( 1.0, -1.0,  1.0),
+              vec3f(-1.0,  1.0, -1.0));
+          var vertexOutput : VertexOutputs;
+          vertexOutput.position = vec4f(posAndClipDistances[vertexIndex].xy, 0.0, 1.0);
+          vertexOutput.clipDistances[kClipDistancesSize - 1] = posAndClipDistances[vertexIndex].z;
+          return vertexOutput;
+    }
+    @fragment
+    fn fsMain() -> @location(0) vec4f {
+        return vec4f(1.0, 0.0, 0.0, 1.0);
+    }`;
+    const module = t.device.createShaderModule({ code });
+    const renderPipeline = t.device.createRenderPipeline({
+      layout: 'auto',
+      vertex: {
+        module,
+      },
+      fragment: {
+        module,
+        targets: [
+          {
+            format: 'rgba8unorm',
+          },
+        ],
+      },
+    });
+
+    const kSize = 7;
+    const outputTexture = t.createTextureTracked({
+      format: 'rgba8unorm',
+      size: [kSize, kSize, 1] as const,
+      usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
+    });
+
+    // Clear outputTexture to Green
+    const commandEncoder = t.device.createCommandEncoder();
+    const renderPassEncoder = commandEncoder.beginRenderPass({
+      colorAttachments: [
+        {
+          view: outputTexture.createView(),
+          loadOp: 'clear',
+          clearValue: { r: 0.0, g: 1.0, b: 0.0, a: 1.0 },
+          storeOp: 'store',
+        },
+      ],
+    });
+    renderPassEncoder.setPipeline(renderPipeline);
+    renderPassEncoder.draw(6);
+    renderPassEncoder.end();
+
+    const kBytesPerRow = 256;
+    const kBytesPerPixel = 4;
+    const outputDataSize = kBytesPerRow * (kSize - 1) + kSize * kBytesPerPixel;
+    const outputBuffer = t.createBufferTracked({
+      size: outputDataSize,
+      usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
+    });
+
+    commandEncoder.copyTextureToBuffer(
+      {
+        texture: outputTexture,
+      },
+      {
+        buffer: outputBuffer,
+        bytesPerRow: kBytesPerRow,
+        rowsPerImage: kSize,
+      },
+      [kSize, kSize, 1]
+    );
+    t.queue.submit([commandEncoder.finish()]);
+
+    // The top-left part should be Green and the bottom-right part should be Red
+    const expectedData = new Uint8Array(outputDataSize);
+    for (let y = 0; y < kSize; ++y) {
+      const baseOffset = kBytesPerRow * y;
+      for (let x = 0; x < kSize; ++x) {
+        const lastRed = kSize - y - 1;
+        for (let i = 0; i < lastRed; ++i) {
+          expectedData[baseOffset + i * 4] = 0;
+          expectedData[baseOffset + i * 4 + 1] = 255;
+          expectedData[baseOffset + i * 4 + 2] = 0;
+          expectedData[baseOffset + i * 4 + 3] = 255;
+        }
+        for (let j = lastRed; j < kSize; ++j) {
+          expectedData[baseOffset + j * 4] = 255;
+          expectedData[baseOffset + j * 4 + 1] = 0;
+          expectedData[baseOffset + j * 4 + 2] = 0;
+          expectedData[baseOffset + j * 4 + 3] = 255;
+        }
+      }
+    }
+    t.expectGPUBufferValuesEqual(outputBuffer, expectedData);
+  });
diff --git a/src/webgpu/shader/execution/statement/phony.spec.ts b/src/webgpu/shader/execution/statement/phony.spec.ts
index 1f28d040f2d8..309d8848523d 100644
--- a/src/webgpu/shader/execution/statement/phony.spec.ts
+++ b/src/webgpu/shader/execution/statement/phony.spec.ts
@@ -88,6 +88,10 @@ const kTests = {
     src: `_ = put(42i);`,
     values: [42, 0],
   },
+  call_in_subexpr: {
+    src: `_ = put(42i) + 1;`,
+    values: [42, 0],
+  },
   nested_call: {
     src: `_ = put(put(42)+1);`,
     values: [42, 43, 0],
diff --git a/src/webgpu/shader/validation/decl/var.spec.ts b/src/webgpu/shader/validation/decl/var.spec.ts
index f9e15bd6e2a6..1abf8bcf4fdb 100644
--- a/src/webgpu/shader/validation/decl/var.spec.ts
+++ b/src/webgpu/shader/validation/decl/var.spec.ts
@@ -749,7 +749,8 @@ g.test('var_access_mode_bad_other_template_contents')
   .fn(t => {
     const prog = `@group(0) @binding(0)
                   var<${t.params.prefix}${t.params.accessMode}${t.params.suffix}> x: i32;`;
-    const ok = t.params.prefix === 'storage,' && t.params.suffix === '';
+    const ok =
+      t.params.prefix === 'storage,' && (t.params.suffix === '' || t.params.suffix === ',');
     t.expectCompileResult(ok, prog);
   });
 
diff --git a/src/webgpu/shader/validation/expression/binary/short_circuiting_and_or.spec.ts b/src/webgpu/shader/validation/expression/binary/short_circuiting_and_or.spec.ts
new file mode 100644
index 000000000000..30f521e54944
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/binary/short_circuiting_and_or.spec.ts
@@ -0,0 +1,264 @@
+export const description = `
+Validation tests for short-circuiting && and || expressions.
+`;
+
+import { makeTestGroup } from '../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../common/util/data_tables.js';
+import {
+  kAllScalarsAndVectors,
+  ScalarType,
+  scalarTypeOf,
+  Type,
+} from '../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+// A list of scalar and vector types.
+const kScalarAndVectorTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('scalar_vector')
+  .desc(
+    `
+  Validates that scalar and vector short-circuiting operators are only accepted for scalar booleans.
+  `
+  )
+  .params(u =>
+    u
+      .combine('op', ['&&', '||'])
+      .combine('lhs', keysOf(kScalarAndVectorTypes))
+      .combine(
+        'rhs',
+        // Skip vec3 and vec4 on the RHS to keep the number of subcases down.
+        keysOf(kScalarAndVectorTypes).filter(
+          value => !(value.startsWith('vec3') || value.startsWith('vec4'))
+        )
+      )
+      .beginSubcases()
+  )
+  .beforeAllSubcases(t => {
+    if (
+      scalarTypeOf(kScalarAndVectorTypes[t.params.lhs]) === Type.f16 ||
+      scalarTypeOf(kScalarAndVectorTypes[t.params.rhs]) === Type.f16
+    ) {
+      t.selectDeviceOrSkipTestCase('shader-f16');
+    }
+  })
+  .fn(t => {
+    const lhs = kScalarAndVectorTypes[t.params.lhs];
+    const rhs = kScalarAndVectorTypes[t.params.rhs];
+    const lhsElement = scalarTypeOf(lhs);
+    const rhsElement = scalarTypeOf(rhs);
+    const hasF16 = lhsElement === Type.f16 || rhsElement === Type.f16;
+    const code = `
+${hasF16 ? 'enable f16;' : ''}
+const lhs = ${lhs.create(0).wgsl()};
+const rhs = ${rhs.create(0).wgsl()};
+const foo = lhs ${t.params.op} rhs;
+`;
+
+    // Determine if the types are compatible.
+    let valid = false;
+    if (lhs instanceof ScalarType && rhs instanceof ScalarType) {
+      valid = lhsElement === Type.bool && rhsElement === Type.bool;
+    }
+
+    t.expectCompileResult(valid, code);
+  });
+
+interface InvalidTypeConfig {
+  // An expression that produces a value of the target type.
+  expr: string;
+  // A function that converts an expression of the target type into a valid boolean operand.
+  control: (x: string) => string;
+}
+const kInvalidTypes: Record<string, InvalidTypeConfig> = {
+  mat2x2f: {
+    expr: 'm',
+    control: e => `bool(${e}[0][0])`,
+  },
+
+  array: {
+    expr: 'arr',
+    control: e => `${e}[0]`,
+  },
+
+  ptr: {
+    expr: '(&b)',
+    control: e => `*${e}`,
+  },
+
+  atomic: {
+    expr: 'a',
+    control: e => `bool(atomicLoad(&${e}))`,
+  },
+
+  texture: {
+    expr: 't',
+    control: e => `bool(textureLoad(${e}, vec2(), 0).x)`,
+  },
+
+  sampler: {
+    expr: 's',
+    control: e => `bool(textureSampleLevel(t, ${e}, vec2(), 0).x)`,
+  },
+
+  struct: {
+    expr: 'str',
+    control: e => `${e}.b`,
+  },
+};
+
+g.test('invalid_types')
+  .desc(
+    `
+  Validates that short-circuiting expressions are never accepted for non-scalar and non-vector types.
+  `
+  )
+  .params(u =>
+    u
+      .combine('op', ['&&', '||'])
+      .combine('type', keysOf(kInvalidTypes))
+      .combine('control', [true, false])
+      .beginSubcases()
+  )
+  .fn(t => {
+    const type = kInvalidTypes[t.params.type];
+    const expr = t.params.control ? type.control(type.expr) : type.expr;
+    const code = `
+@group(0) @binding(0) var t : texture_2d<f32>;
+@group(0) @binding(1) var s : sampler;
+@group(0) @binding(2) var<storage, read_write> a : atomic<i32>;
+
+struct S { b : bool }
+
+var<private> b : bool;
+var<private> m : mat2x2f;
+var<private> arr : array<bool, 4>;
+var<private> str : S;
+
+@compute @workgroup_size(1)
+fn main() {
+  let foo = ${expr} ${t.params.op} ${expr};
+}
+`;
+
+    t.expectCompileResult(t.params.control, code);
+  });
+
+// A map from operator to the value of the LHS that will cause short-circuiting.
+const kLhsForShortCircuit: Record<string, boolean> = {
+  '&&': false,
+  '||': true,
+};
+
+// A list of expressions that are invalid unless guarded by a short-circuiting expression.
+const kInvalidRhsExpressions: Record<string, string> = {
+  overflow: 'i32(1<<thirty_one) < 0',
+  div_zero_i32: '(1 / zero_i32) == 0',
+  div_zero_f32: '(one_f32 / 0) == 0',
+  builtin: 'sqrt(-one_f32) == 0',
+};
+
+g.test('invalid_rhs_const')
+  .desc(
+    `
+  Validates that a short-circuiting expression with a const-expression LHS guards the evaluation of its RHS expression.
+  `
+  )
+  .params(u =>
+    u
+      .combine('op', ['&&', '||'])
+      .combine('rhs', keysOf(kInvalidRhsExpressions))
+      .combine('short_circuit', [true, false])
+      .beginSubcases()
+  )
+  .fn(t => {
+    let lhs = kLhsForShortCircuit[t.params.op];
+    if (!t.params.short_circuit) {
+      lhs = !lhs;
+    }
+    const code = `
+const thirty_one = 31u;
+const zero_i32 = 0i;
+const one_f32 = 1.0f;
+
+@compute @workgroup_size(1)
+fn main() {
+  let foo = ${lhs} ${t.params.op} ${kInvalidRhsExpressions[t.params.rhs]};
+}
+`;
+
+    t.expectCompileResult(t.params.short_circuit, code);
+  });
+
+g.test('invalid_rhs_override')
+  .desc(
+    `
+  Validates that a short-circuiting expression with an override-expression LHS guards the evaluation of its RHS expression.
+  `
+  )
+  .params(u =>
+    u
+      .combine('op', ['&&', '||'])
+      .combine('rhs', keysOf(kInvalidRhsExpressions))
+      .combine('short_circuit', [true, false])
+      .beginSubcases()
+  )
+  .fn(t => {
+    let lhs = kLhsForShortCircuit[t.params.op];
+    if (!t.params.short_circuit) {
+      lhs = !lhs;
+    }
+    const code = `
+override cond : bool;
+override zero_i32 = 0i;
+override one_f32 = 1.0f;
+override thirty_one = 31u;
+override foo = cond ${t.params.op} ${kInvalidRhsExpressions[t.params.rhs]};
+`;
+
+    const constants: Record<string, number> = {};
+    constants['cond'] = lhs ? 1 : 0;
+    t.expectPipelineResult({
+      expectedResult: t.params.short_circuit,
+      code,
+      constants,
+      reference: ['foo'],
+    });
+  });
+
+// A list of expressions that are invalid unless guarded by a short-circuiting expression.
+// The control case will use `value = 10`, the failure case will use `value = 1`.
+const kInvalidArrayCounts: Record<string, string> = {
+  negative: 'value - 2',
+  sqrt_neg1: 'u32(sqrt(value - 2))',
+  nested: '10 + array<i32, value - 2>()[0]',
+};
+
+g.test('invalid_array_count_on_rhs')
+  .desc(
+    `
+  Validates that an invalid array count expression is not guarded by a short-circuiting expression.
+  `
+  )
+  .params(u =>
+    u
+      .combine('op', ['&&', '||'])
+      .combine('rhs', keysOf(kInvalidArrayCounts))
+      .combine('control', [true, false])
+      .beginSubcases()
+  )
+  .fn(t => {
+    const lhs = t.params.op === '&&' ? 'false' : 'true';
+    const code = `
+const value = ${t.params.control ? '10' : '1'};
+
+@compute @workgroup_size(1)
+fn main() {
+  let foo = ${lhs} ${t.params.op} array<bool, ${kInvalidArrayCounts[t.params.rhs]}>()[0];
+}
+`;
+
+    t.expectCompileResult(t.params.control, code);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts
index 1ac752a3bfa9..ff0114097f90 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts
@@ -127,6 +127,8 @@ Validates that low <= high.
         const scalar = scalarTypeOf(ty);
         return scalar !== Type.abstractInt && scalar !== Type.abstractFloat;
       })
+      // in_shader: Is the function call statically accessed by the entry point?
+      .combine('in_shader', [false, true] as const)
   )
   .beforeAllSubcases(t => {
     const ty = kValuesTypes[t.params.type];
@@ -176,7 +178,10 @@ fn foo() {
     const shader_error =
       error && t.params.lowStage === 'constant' && t.params.highStage === 'constant';
     const pipeline_error =
-      error && t.params.lowStage !== 'runtime' && t.params.highStage !== 'runtime';
+      t.params.in_shader &&
+      error &&
+      t.params.lowStage !== 'runtime' &&
+      t.params.highStage !== 'runtime';
     t.expectCompileResult(!shader_error, wgsl);
     if (!shader_error) {
       const constants: Record<string, number> = {};
@@ -187,6 +192,7 @@ fn foo() {
         code: wgsl,
         constants,
         reference: ['o_low', 'o_high'],
+        statements: t.params.in_shader ? ['foo();'] : [],
       });
     }
   });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts
index 80fe7ccaca5e..32abc477ee8f 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts
@@ -98,6 +98,8 @@ Validates that count and offset must be smaller than the size of the primitive.
         { offset: 0, count: 33 },
         { offset: 1, count: 33 },
       ] as const)
+      // in_shader: Is the function call statically accessed by the entry point?
+      .combine('in_shader', [false, true] as const)
   )
   .fn(t => {
     let offsetArg = '';
@@ -138,7 +140,10 @@ fn foo() {
     const shader_error =
       error && t.params.offsetStage === 'constant' && t.params.countStage === 'constant';
     const pipeline_error =
-      error && t.params.offsetStage !== 'runtime' && t.params.countStage !== 'runtime';
+      t.params.in_shader &&
+      error &&
+      t.params.offsetStage !== 'runtime' &&
+      t.params.countStage !== 'runtime';
     t.expectCompileResult(!shader_error, wgsl);
     if (!shader_error) {
       const constants: Record<string, number> = {};
@@ -149,6 +154,7 @@ fn foo() {
         code: wgsl,
         constants,
         reference: ['o_offset', 'o_count'],
+        statements: t.params.in_shader ? ['foo();'] : [],
       });
     }
   });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts
index 57644ad36fb4..b302bfd14677 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts
@@ -119,6 +119,8 @@ Validates that count and offset must be smaller than the size of the primitive.
         { offset: 0, count: 33 },
         { offset: 1, count: 33 },
       ] as const)
+      // in_shader: Is the function call statically accessed by the entry point?
+      .combine('in_shader', [false, true] as const)
   )
   .fn(t => {
     let offsetArg = '';
@@ -160,7 +162,10 @@ fn foo() {
     const shader_error =
       error && t.params.offsetStage === 'constant' && t.params.countStage === 'constant';
     const pipeline_error =
-      error && t.params.offsetStage !== 'runtime' && t.params.countStage !== 'runtime';
+      t.params.in_shader &&
+      error &&
+      t.params.offsetStage !== 'runtime' &&
+      t.params.countStage !== 'runtime';
     t.expectCompileResult(!shader_error, wgsl);
     if (!shader_error) {
       const constants: Record<string, number> = {};
@@ -171,6 +176,7 @@ fn foo() {
         code: wgsl,
         constants,
         reference: ['o_offset', 'o_count'],
+        statements: t.params.in_shader ? ['foo();'] : [],
       });
     }
   });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts
index 826354d1ff08..55a702d71f0a 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts
@@ -143,6 +143,8 @@ g.test('partial_values')
         cases.push({ value: bias + 2 });
         return cases;
       })
+      // in_shader: Is the functino call statically accessed by the entry point?
+      .combine('in_shader', [false, true] as const)
   )
   .beforeAllSubcases(t => {
     const ty = kValidArgumentTypesA[t.params.typeA];
@@ -179,7 +181,7 @@ fn foo() {
     const bias = biasForType(scalarTypeOf(tyA));
     const error = t.params.value > bias + 1;
     const shader_error = error && t.params.stage === 'constant';
-    const pipeline_error = error && t.params.stage === 'override';
+    const pipeline_error = t.params.in_shader && error && t.params.stage === 'override';
     t.expectCompileResult(!shader_error, wgsl);
     if (!shader_error) {
       const constants: Record<string, number> = {};
@@ -189,6 +191,7 @@ fn foo() {
         code: wgsl,
         constants,
         reference: ['o_b'],
+        statements: t.params.in_shader ? ['foo();'] : [],
       });
     }
   });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts
index 28e1d9cdc61b..bed18020632d 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts
@@ -12,7 +12,13 @@ import {
   scalarTypeOf,
   ScalarType,
 } from '../../../../../util/conversion.js';
-import { QuantizeFunc, quantizeToF16, quantizeToF32 } from '../../../../../util/math.js';
+import {
+  QuantizeFunc,
+  quantizeToF16,
+  quantizeToF32,
+  isSubnormalNumberF16,
+  isSubnormalNumberF32,
+} from '../../../../../util/math.js';
 import { ShaderValidationTest } from '../../../shader_validation_test.js';
 
 import {
@@ -37,6 +43,17 @@ function quantizeFunctionForScalarType(type: ScalarType): QuantizeFunc<number> {
   }
 }
 
+function isSubnormalFunctionForScalarType(type: ScalarType): (v: number) => boolean {
+  switch (type) {
+    case Type.f32:
+      return isSubnormalNumberF32;
+    case Type.f16:
+      return isSubnormalNumberF16;
+    default:
+      return (v: number) => false;
+  }
+}
+
 g.test('values')
   .desc(
     `
@@ -73,6 +90,11 @@ Validates that constant evaluation and override evaluation of ${builtin}() rejec
       expectedResult = false;
     }
 
+    // We skip tests with values that would involve subnormal computations in
+    // order to avoid defining a specific behavior (flush to zero).
+    const isSubnormalFn = isSubnormalFunctionForScalarType(scalarType);
+    t.skipIf(isSubnormalFn(vv) || isSubnormalFn(dp) || isSubnormalFn(len));
+
     validateConstOrOverrideBuiltinEval(
       t,
       builtin,
diff --git a/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts
new file mode 100644
index 000000000000..6988f17b9ede
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts
@@ -0,0 +1,286 @@
+export const description = `
+Validation tests for quadBroadcast
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import {
+  isConvertible,
+  Type,
+  elementTypeOf,
+  kAllScalarsAndVectors,
+} from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = quadBroadcast(0, 0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+g.test('requires_subgroups_f16')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName];
+    if (t.params.enable) {
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const wgsl = `
+enable f16;
+enable subgroups;
+${t.params.enable ? 'enable subgroups_f16;' : ''}
+fn foo() {
+  _ = quadBroadcast(0h, 0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors);
+
+const kStages: Record<string, string> = {
+  constant: `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = quadBroadcast(0, 0);
+}`,
+  override: `
+enable subgroups;
+override o = quadBroadcast(0, 0);`,
+  runtime: `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = quadBroadcast(0, 0);
+}`,
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage];
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}quadBroadcast(0, 0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u => u.combine('type', keysOf(kArgumentTypes)))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kArgumentTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kArgumentTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = quadBroadcast(${type.create(0).wgsl()}, 0);
+}`;
+
+    t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates data parameter type')
+  .params(u =>
+    u
+      .combine('dataType', keysOf(kArgumentTypes))
+      .combine('retType', keysOf(kArgumentTypes))
+      .filter(t => {
+        const retType = kArgumentTypes[t.retType];
+        const retEleTy = elementTypeOf(retType);
+        const dataType = kArgumentTypes[t.dataType];
+        const dataEleTy = elementTypeOf(dataType);
+        return (
+          retEleTy !== Type.abstractInt &&
+          retEleTy !== Type.abstractFloat &&
+          dataEleTy !== Type.abstractInt &&
+          dataEleTy !== Type.abstractFloat
+        );
+      })
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    let enables = `enable subgroups;\n`;
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = quadBroadcast(${dataType.create(0).wgsl()}, 0);
+}`;
+
+    const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType;
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('id_type')
+  .desc('Validates id parameter type')
+  .params(u => u.combine('type', keysOf(kArgumentTypes)))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const type = kArgumentTypes[t.params.type];
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(1)
+fn main() {
+  _ = quadBroadcast(0, ${type.create(0).wgsl()});
+}`;
+
+    const expect = isConvertible(type, Type.u32) || isConvertible(type, Type.i32);
+    t.expectCompileResult(expect, wgsl);
+  });
+
+const kIdCases = {
+  const_decl: {
+    code: 'const_decl',
+    valid: true,
+  },
+  const_literal: {
+    code: '0',
+    valid: true,
+  },
+  const_expr: {
+    code: 'const_decl + 2',
+    valid: true,
+  },
+  let_decl: {
+    code: 'let_decl',
+    valid: false,
+  },
+  override_decl: {
+    code: 'override_decl',
+    valid: false,
+  },
+  var_func_decl: {
+    code: 'var_func_decl',
+    valid: false,
+  },
+  var_priv_decl: {
+    code: 'var_priv_decl',
+    valid: false,
+  },
+};
+
+g.test('id_constness')
+  .desc('Validates that id must be a const-expression')
+  .params(u => u.combine('value', keysOf(kIdCases)))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+override override_decl : u32;
+var<private> var_priv_decl : u32;
+fn foo() {
+  var var_func_decl : u32;
+  let let_decl = var_func_decl;
+  const const_decl = 0u;
+  _ = quadBroadcast(0, ${kIdCases[t.params.value].code});
+}`;
+
+    t.expectCompileResult(kIdCases[t.params.value].valid, wgsl);
+  });
+
+g.test('stage')
+  .desc('Validates it is only usable in correct stage')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = quadBroadcast(0, 0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts
new file mode 100644
index 000000000000..3812ba057ed6
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts
@@ -0,0 +1,227 @@
+export const description = `
+Validation tests for quadSwapX, quadSwapY, and quadSwapDiagonal.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import {
+  Type,
+  elementTypeOf,
+  kAllScalarsAndVectors,
+  isConvertible,
+} from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+const kOps = ['quadSwapX', 'quadSwapY', 'quadSwapDiagonal'] as const;
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = ${t.params.op}(0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+g.test('requires_subgroups_f16')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName];
+    if (t.params.enable) {
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const wgsl = `
+enable f16;
+enable subgroups;
+${t.params.enable ? 'enable subgroups_f16;' : ''}
+fn foo() {
+  _ = ${t.params.op}(0h);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kStages: Record<string, (op: string) => string> = {
+  constant: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = ${op}(0);
+}`;
+  },
+  override: (op: string) => {
+    return `
+enable subgroups
+override o = ${op}(0);`;
+  },
+  runtime: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = ${op}(0);
+}`;
+  },
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage](t.params.op);
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.op}(${type.create(0).wgsl()});
+}`;
+
+    const eleType = elementTypeOf(type);
+    t.expectCompileResult(eleType !== Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates return type')
+  .params(u =>
+    u
+      .combine('retType', keysOf(kTypes))
+      .filter(t => {
+        const type = kTypes[t.retType];
+        const eleType = elementTypeOf(type);
+        return eleType !== Type.abstractInt && eleType !== Type.abstractFloat;
+      })
+      .combine('op', kOps)
+      .combine('paramType', keysOf(kTypes))
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    let enables = `enable subgroups;\n`;
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()});
+}`;
+
+    // Can't just use isConvertible since functions must concretize the parameter
+    // type before examining the whole statement.
+    const eleParamType = elementTypeOf(paramType);
+    const eleRetType = elementTypeOf(retType);
+    let expect = paramType === retType && eleRetType !== Type.bool;
+    if (eleParamType === Type.abstractInt) {
+      expect = eleRetType === Type.i32 && isConvertible(paramType, retType);
+    } else if (eleParamType === Type.abstractFloat) {
+      expect = eleRetType === Type.f32 && isConvertible(paramType, retType);
+    }
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('stage')
+  .desc('validates builtin is only usable in the correct stages')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = ${t.params.op}(0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts
index 51cf9553785c..387340f80e9f 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts
@@ -2,10 +2,21 @@ const builtin = 'refract';
 export const description = `
 Validation tests for the ${builtin}() builtin.
 `;
-
 import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
 import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
-import { Type, kConvertableToFloatVectors, scalarTypeOf } from '../../../../../util/conversion.js';
+import {
+  Type,
+  kConvertableToFloatVectors,
+  scalarTypeOf,
+  ScalarType,
+} from '../../../../../util/conversion.js';
+import {
+  QuantizeFunc,
+  quantizeToF16,
+  quantizeToF32,
+  isSubnormalNumberF16,
+  isSubnormalNumberF32,
+} from '../../../../../util/math.js';
 import { ShaderValidationTest } from '../../../shader_validation_test.js';
 
 import {
@@ -20,6 +31,28 @@ export const g = makeTestGroup(ShaderValidationTest);
 
 const kValidArgumentTypes = objectsToRecord(kConvertableToFloatVectors);
 
+function quantizeFunctionForScalarType(type: ScalarType): QuantizeFunc<number> {
+  switch (type) {
+    case Type.f32:
+      return quantizeToF32;
+    case Type.f16:
+      return quantizeToF16;
+    default:
+      return (v: number) => v;
+  }
+}
+
+function isSubnormalFunctionForScalarType(type: ScalarType): (v: number) => boolean {
+  switch (type) {
+    case Type.f32:
+      return isSubnormalNumberF32;
+    case Type.f16:
+      return isSubnormalNumberF16;
+    default:
+      return (v: number) => false;
+  }
+}
+
 g.test('values')
   .desc(
     `
@@ -64,6 +97,17 @@ where a the calculations result in a non-representable value for the given type.
     const c2_one_minus_b_dot_a_2 = vCheck.checkedResult(c2 * one_minus_b_dot_a_2);
     const k = vCheck.checkedResult(1.0 - c2_one_minus_b_dot_a_2);
 
+    const quantizeFn = quantizeFunctionForScalarType(scalarType);
+    const isSubnormalFn = isSubnormalFunctionForScalarType(scalarType);
+    // We skip tests with values that would involve subnormal computations in
+    // order to avoid defining a specific behavior (flush to zero).
+    t.skipIf(
+      isSubnormalFn(quantizeFn(b_dot_a)) ||
+        isSubnormalFn(quantizeFn(b_dot_a_2)) ||
+        isSubnormalFn(quantizeFn(c2)) ||
+        isSubnormalFn(quantizeFn(k))
+    );
+
     if (k >= 0) {
       // If the k is near zero it may fail on some implementations which implement sqrt as
       // 1/inversesqrt, so skip the test.
diff --git a/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts
index 5a5a28fc7362..2879055ab216 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts
@@ -51,16 +51,15 @@ Validates that constant evaluation and override evaluation of ${builtin}() rejec
   .fn(t => {
     const type = kValuesTypes[t.params.type];
 
-    // We expect to fail if low >= high as it results in a DBZ
-    const expectedResult = t.params.value1 >= t.params.value2;
+    // We expect to fail if low >= high.
+    const expectedResult = t.params.value1 < t.params.value2;
 
     validateConstOrOverrideBuiltinEval(
       t,
       builtin,
       expectedResult,
       [type.create(t.params.value1), type.create(t.params.value2), type.create(0)],
-      t.params.stage,
-      /* returnType */ concreteTypeOf(type, [Type.f32])
+      t.params.stage
     );
   });
 
@@ -81,6 +80,8 @@ g.test('partial_eval_errors')
       .beginSubcases()
       .expand('low', u => [0, 10])
       .expand('high', u => [0, 10])
+      // in_shader: Is the function call statically accessed by the entry point?
+      .combine('in_shader', [false, true] as const)
   )
   .beforeAllSubcases(t => {
     if (scalarTypeOf(kValuesTypes[t.params.type]) === Type.f16) {
@@ -130,7 +131,10 @@ fn foo() {
     const shader_error =
       error && t.params.lowStage === 'constant' && t.params.highStage === 'constant';
     const pipeline_error =
-      error && t.params.lowStage !== 'runtime' && t.params.highStage !== 'runtime';
+      t.params.in_shader &&
+      error &&
+      t.params.lowStage !== 'runtime' &&
+      t.params.highStage !== 'runtime';
     t.expectCompileResult(!shader_error, wgsl);
     if (!shader_error) {
       const constants: Record<string, number> = {};
@@ -141,6 +145,7 @@ fn foo() {
         code: wgsl,
         constants,
         reference: ['o_low', 'o_high'],
+        statements: t.params.in_shader ? ['foo();'] : [],
       });
     }
   });
@@ -159,10 +164,11 @@ Validates that scalar and vector arguments are rejected by ${builtin}() if not f
   })
   .fn(t => {
     const type = kArgumentTypes[t.params.type];
+    const expectedResult = isConvertibleToFloatType(elementTypeOf(type));
     validateConstOrOverrideBuiltinEval(
       t,
       builtin,
-      /* expectedResult */ isConvertibleToFloatType(elementTypeOf(type)),
+      expectedResult,
       [type.create(0), type.create(1), type.create(2)],
       'constant',
       /* returnType */ concreteTypeOf(type, [Type.f32])
@@ -344,7 +350,7 @@ g.test('early_eval_errors')
       t,
       builtin,
       /* expectedResult */ t.params.low < t.params.high,
-      [f32(0), f32(t.params.low), f32(t.params.high)],
+      [f32(t.params.low), f32(t.params.high), f32(0)],
       t.params.stage
     );
   });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupAdd.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupAdd.spec.ts
new file mode 100644
index 000000000000..4f2a2af52197
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupAdd.spec.ts
@@ -0,0 +1,235 @@
+export const description = `
+Validation tests for subgroupAdd and subgroupExclusiveAdd
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+const kBuiltins = ['subgroupAdd', 'subgroupExclusiveAdd', 'subgroupInclusiveAdd'] as const;
+
+const kStages: Record<string, (builtin: string) => string> = {
+  constant: (builtin: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = ${builtin}(0);
+}`;
+  },
+  override: (builtin: string) => {
+    return `
+enable subgroups;
+override o = ${builtin}(0);`;
+  },
+  runtime: (builtin: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = ${builtin}(0);
+}`;
+  },
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)).beginSubcases().combine('builtin', kBuiltins))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage](t.params.builtin);
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u =>
+    u
+      .combine('must_use', [true, false] as const)
+      .beginSubcases()
+      .combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}${t.params.builtin}(0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u =>
+    u.combine('type', keysOf(kArgumentTypes)).beginSubcases().combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kArgumentTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kArgumentTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.builtin}(${type.create(0).wgsl()});
+}`;
+
+    t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates data parameter type')
+  .params(u =>
+    u
+      .combine('dataType', keysOf(kArgumentTypes))
+      .combine('retType', keysOf(kArgumentTypes))
+      .filter(t => {
+        const retType = kArgumentTypes[t.retType];
+        const retEleTy = elementTypeOf(retType);
+        const dataType = kArgumentTypes[t.dataType];
+        const dataEleTy = elementTypeOf(dataType);
+        return (
+          retEleTy !== Type.abstractInt &&
+          retEleTy !== Type.abstractFloat &&
+          dataEleTy !== Type.abstractInt &&
+          dataEleTy !== Type.abstractFloat
+        );
+      })
+      .beginSubcases()
+      .combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    let enables = `enable subgroups;\n`;
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = ${t.params.builtin}(${dataType.create(0).wgsl()});
+}`;
+
+    const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType;
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('stage')
+  .desc('Validates it is only usable in correct stage')
+  .params(u =>
+    u
+      .combine('stage', ['compute', 'fragment', 'vertex'] as const)
+      .beginSubcases()
+      .combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = ${t.params.builtin}(0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
+
+const kInvalidTypeCases: Record<string, string> = {
+  array_u32: `array(1u,2u,3u)`,
+  array_f32: `array<f32, 4>()`,
+  struct_s: `S()`,
+  struct_t: `T(1, 1)`,
+  ptr_func: `&func_var`,
+  ptr_priv: `&priv_var`,
+  frexp_ret: `frexp(0)`,
+};
+
+g.test('invalid_types')
+  .desc('Tests that invalid non-plain types are rejected')
+  .params(u =>
+    u.combine('case', keysOf(kInvalidTypeCases)).beginSubcases().combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const val = kInvalidTypeCases[t.params.case];
+    const wgsl = `
+enable subgroups;
+
+struct S {
+  x : u32
+}
+
+struct T {
+  a : f32,
+  b : u32,
+}
+
+var<private> priv_var : f32;
+fn foo() {
+  var func_var : vec4u;
+  _ = ${t.params.builtin}(${val});
+}`;
+
+    t.expectCompileResult(false, wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupAnyAll.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupAnyAll.spec.ts
new file mode 100644
index 000000000000..eaee33e62cff
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupAnyAll.spec.ts
@@ -0,0 +1,186 @@
+export const description = `
+Validation tests for subgroupAny and subgroupAll.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+const kOps = ['subgroupAny', 'subgroupAll'] as const;
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = ${t.params.op}(true);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kStages: Record<string, (op: string) => string> = {
+  constant: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = ${op}(true);
+}`;
+  },
+  override: (op: string) => {
+    return `
+enable subgroups
+override o = select(0, 1, ${op}(true));`;
+  },
+  runtime: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = ${op}(true);
+}`;
+  },
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage](t.params.op);
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}${t.params.op}(false);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.op}(${type.create(0).wgsl()});
+}`;
+
+    t.expectCompileResult(type === Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates return type')
+  .params(u =>
+    u
+      .combine('type', keysOf(kTypes))
+      .filter(t => {
+        const type = kTypes[t.type];
+        const eleType = elementTypeOf(type);
+        return eleType !== Type.abstractInt && eleType !== Type.abstractFloat;
+      })
+      .combine('op', kOps)
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${type.toString()} = ${t.params.op}(true);
+}`;
+
+    t.expectCompileResult(type === Type.bool, wgsl);
+  });
+
+g.test('stage')
+  .desc('validates builtin is only usable in the correct stages')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = ${t.params.op}(true);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts
index afbe33e93c56..5f53847be25c 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts
@@ -9,6 +9,22 @@ import { ShaderValidationTest } from '../../../shader_validation_test.js';
 
 export const g = makeTestGroup(ShaderValidationTest);
 
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = subgroupBallot(true);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
 const kStages: Record<string, string> = {
   constant: `
 enable subgroups;
@@ -38,6 +54,23 @@ g.test('early_eval')
     t.expectCompileResult(t.params.stage === 'runtime', code);
   });
 
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}subgroupBallot(true);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
 const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors);
 
 g.test('data_type')
@@ -69,7 +102,7 @@ fn main() {
   });
 
 g.test('return_type')
-  .desc('Validates data parameter type')
+  .desc('Validates return type')
   .params(u =>
     u.combine('type', keysOf(kArgumentTypes)).filter(t => {
       const type = kArgumentTypes[t.type];
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBitwise.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBitwise.spec.ts
new file mode 100644
index 000000000000..ca0dfb6fd719
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBitwise.spec.ts
@@ -0,0 +1,204 @@
+export const description = `
+Validation tests for subgroupAnd, subgroupOr, and subgroupXor.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import {
+  Type,
+  elementTypeOf,
+  kAllScalarsAndVectors,
+  isConvertible,
+} from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+const kOps = ['subgroupAnd', 'subgroupOr', 'subgroupXor'] as const;
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = ${t.params.op}(0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kStages: Record<string, (op: string) => string> = {
+  constant: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = ${op}(0);
+}`;
+  },
+  override: (op: string) => {
+    return `
+enable subgroups
+override o = ${op}(0);`;
+  },
+  runtime: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = ${op}(0);
+}`;
+  },
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage](t.params.op);
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.op}(${type.create(0).wgsl()});
+}`;
+
+    const eleType = elementTypeOf(type);
+    const expect = isConvertible(eleType, Type.u32) || isConvertible(eleType, Type.i32);
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates return type')
+  .params(u =>
+    u
+      .combine('retType', keysOf(kTypes))
+      .filter(t => {
+        const type = kTypes[t.retType];
+        const eleType = elementTypeOf(type);
+        return eleType !== Type.abstractInt && eleType !== Type.abstractFloat;
+      })
+      .combine('op', kOps)
+      .combine('paramType', keysOf(kTypes))
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    let enables = `enable subgroups;\n`;
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()});
+}`;
+
+    // Can't just use isConvertible since functions must concretize the parameter
+    // type before examining the whole statement.
+    const eleParamType = elementTypeOf(paramType);
+    const eleRetType = elementTypeOf(retType);
+    let expect = paramType === retType && (eleRetType === Type.i32 || eleRetType === Type.u32);
+    if (eleParamType === Type.abstractInt) {
+      expect = eleRetType === Type.i32 && isConvertible(paramType, retType);
+    }
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('stage')
+  .desc('validates builtin is only usable in the correct stages')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = ${t.params.op}(0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts
index a71b145092c8..fd76cd419b7f 100644
--- a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts
@@ -14,6 +14,44 @@ import { ShaderValidationTest } from '../../../shader_validation_test.js';
 
 export const g = makeTestGroup(ShaderValidationTest);
 
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = subgroupBroadcast(0, 0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+g.test('requires_subgroups_f16')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName];
+    if (t.params.enable) {
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const wgsl = `
+enable f16;
+enable subgroups;
+${t.params.enable ? 'enable subgroups_f16;' : ''}
+fn foo() {
+  _ = subgroupBroadcast(0h, 0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
 const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors);
 
 const kStages: Record<string, string> = {
@@ -156,6 +194,58 @@ fn main() {
     t.expectCompileResult(expect, wgsl);
   });
 
+const kIdCases = {
+  const_decl: {
+    code: 'const_decl',
+    valid: true,
+  },
+  const_literal: {
+    code: '0',
+    valid: true,
+  },
+  const_expr: {
+    code: 'const_decl + 2',
+    valid: true,
+  },
+  let_decl: {
+    code: 'let_decl',
+    valid: false,
+  },
+  override_decl: {
+    code: 'override_decl',
+    valid: false,
+  },
+  var_func_decl: {
+    code: 'var_func_decl',
+    valid: false,
+  },
+  var_priv_decl: {
+    code: 'var_priv_decl',
+    valid: false,
+  },
+};
+
+g.test('id_constness')
+  .desc('Validates that id must be a const-expression')
+  .params(u => u.combine('value', keysOf(kIdCases)))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+override override_decl : u32;
+var<private> var_priv_decl : u32;
+fn foo() {
+  var var_func_decl : u32;
+  let let_decl = var_func_decl;
+  const const_decl = 0u;
+  _ = subgroupBroadcast(0, ${kIdCases[t.params.value].code});
+}`;
+
+    t.expectCompileResult(kIdCases[t.params.value].valid, wgsl);
+  });
+
 g.test('stage')
   .desc('Validates it is only usable in correct stage')
   .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const))
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts
new file mode 100644
index 000000000000..4525b6b97ef8
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts
@@ -0,0 +1,210 @@
+export const description = `
+Validation tests for subgroupBroadcastFirst
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = subgroupBroadcastFirst(0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+g.test('requires_subgroups_f16')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName];
+    if (t.params.enable) {
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const wgsl = `
+enable f16;
+enable subgroups;
+${t.params.enable ? 'enable subgroups_f16;' : ''}
+fn foo() {
+  _ = subgroupBroadcastFirst(0h);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors);
+
+const kStages: Record<string, string> = {
+  constant: `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = subgroupBroadcastFirst(0);
+}`,
+  override: `
+enable subgroups;
+override o = subgroupBroadcastFirst(0);`,
+  runtime: `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = subgroupBroadcastFirst(0);
+}`,
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage];
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}subgroupBroadcastFirst(0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u => u.combine('type', keysOf(kArgumentTypes)))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kArgumentTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kArgumentTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = subgroupBroadcastFirst(${type.create(0).wgsl()});
+}`;
+
+    t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates data parameter type')
+  .params(u =>
+    u
+      .combine('dataType', keysOf(kArgumentTypes))
+      .combine('retType', keysOf(kArgumentTypes))
+      .filter(t => {
+        const retType = kArgumentTypes[t.retType];
+        const retEleTy = elementTypeOf(retType);
+        const dataType = kArgumentTypes[t.dataType];
+        const dataEleTy = elementTypeOf(dataType);
+        return (
+          retEleTy !== Type.abstractInt &&
+          retEleTy !== Type.abstractFloat &&
+          dataEleTy !== Type.abstractInt &&
+          dataEleTy !== Type.abstractFloat
+        );
+      })
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    let enables = `enable subgroups;\n`;
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = subgroupBroadcastFirst(${dataType.create(0).wgsl()});
+}`;
+
+    const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType;
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('stage')
+  .desc('Validates it is only usable in correct stage')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = subgroupBroadcastFirst(0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupElect.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupElect.spec.ts
new file mode 100644
index 000000000000..5637860c59ce
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupElect.spec.ts
@@ -0,0 +1,175 @@
+export const description = `
+Validation tests for subgroupElect.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = subgroupElect();
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kStages: Record<string, string> = {
+  constant: `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = subgroupElect();
+}`,
+  override: `
+enable subgroups
+override o = select(0, 1, subgroupElect());`,
+  runtime: `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = subgroupElect();
+}`,
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage];
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}subgroupElect();
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates there are no valid data parameters')
+  .params(u => u.combine('type', keysOf(kTypes)))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = subgroupElect(${type.create(0).wgsl()});
+}`;
+
+    t.expectCompileResult(false, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates return type')
+  .params(u =>
+    u.combine('type', keysOf(kTypes)).filter(t => {
+      const type = kTypes[t.type];
+      const eleType = elementTypeOf(type);
+      return eleType !== Type.abstractInt && eleType !== Type.abstractFloat;
+    })
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${type.toString()} = subgroupElect();
+}`;
+
+    t.expectCompileResult(type === Type.bool, wgsl);
+  });
+
+g.test('stage')
+  .desc('validates builtin is only usable in the correct stages')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = subgroupElect();
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts
new file mode 100644
index 000000000000..84c1860019ee
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts
@@ -0,0 +1,227 @@
+export const description = `
+Validation tests for subgroupMin and subgroupMax.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import {
+  Type,
+  elementTypeOf,
+  kAllScalarsAndVectors,
+  isConvertible,
+} from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+const kOps = ['subgroupMin', 'subgroupMax'] as const;
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = ${t.params.op}(0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+g.test('requires_subgroups_f16')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName];
+    if (t.params.enable) {
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const wgsl = `
+enable f16;
+enable subgroups;
+${t.params.enable ? 'enable subgroups_f16;' : ''}
+fn foo() {
+  _ = ${t.params.op}(0h);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kStages: Record<string, (op: string) => string> = {
+  constant: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = ${op}(0);
+}`;
+  },
+  override: (op: string) => {
+    return `
+enable subgroups
+override o = ${op}(0);`;
+  },
+  runtime: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = ${op}(0);
+}`;
+  },
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage](t.params.op);
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.op}(${type.create(0).wgsl()});
+}`;
+
+    const eleType = elementTypeOf(type);
+    t.expectCompileResult(eleType !== Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates return type')
+  .params(u =>
+    u
+      .combine('retType', keysOf(kTypes))
+      .filter(t => {
+        const type = kTypes[t.retType];
+        const eleType = elementTypeOf(type);
+        return eleType !== Type.abstractInt && eleType !== Type.abstractFloat;
+      })
+      .combine('op', kOps)
+      .combine('paramType', keysOf(kTypes))
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    let enables = `enable subgroups;\n`;
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()});
+}`;
+
+    // Can't just use isConvertible since functions must concretize the parameter
+    // type before examining the whole statement.
+    const eleParamType = elementTypeOf(paramType);
+    const eleRetType = elementTypeOf(retType);
+    let expect = paramType === retType && eleRetType !== Type.bool;
+    if (eleParamType === Type.abstractInt) {
+      expect = eleRetType === Type.i32 && isConvertible(paramType, retType);
+    } else if (eleParamType === Type.abstractFloat) {
+      expect = eleRetType === Type.f32 && isConvertible(paramType, retType);
+    }
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('stage')
+  .desc('validates builtin is only usable in the correct stages')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = ${t.params.op}(0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupMul.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupMul.spec.ts
new file mode 100644
index 000000000000..0b50d4c9df2d
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupMul.spec.ts
@@ -0,0 +1,235 @@
+export const description = `
+Validation tests for subgroupMul, subgroupExclusiveMul, and subgroupInclusiveMul
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+const kBuiltins = ['subgroupMul', 'subgroupExclusiveMul', 'subgroupInclusiveMul'] as const;
+
+const kStages: Record<string, (builtin: string) => string> = {
+  constant: (builtin: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = ${builtin}(0);
+}`;
+  },
+  override: (builtin: string) => {
+    return `
+enable subgroups;
+override o = ${builtin}(0);`;
+  },
+  runtime: (builtin: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = ${builtin}(0);
+}`;
+  },
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)).beginSubcases().combine('builtin', kBuiltins))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage](t.params.builtin);
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u =>
+    u
+      .combine('must_use', [true, false] as const)
+      .beginSubcases()
+      .combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}${t.params.builtin}(0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u =>
+    u.combine('type', keysOf(kArgumentTypes)).beginSubcases().combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kArgumentTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kArgumentTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.builtin}(${type.create(0).wgsl()});
+}`;
+
+    t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates data parameter type')
+  .params(u =>
+    u
+      .combine('dataType', keysOf(kArgumentTypes))
+      .combine('retType', keysOf(kArgumentTypes))
+      .filter(t => {
+        const retType = kArgumentTypes[t.retType];
+        const retEleTy = elementTypeOf(retType);
+        const dataType = kArgumentTypes[t.dataType];
+        const dataEleTy = elementTypeOf(dataType);
+        return (
+          retEleTy !== Type.abstractInt &&
+          retEleTy !== Type.abstractFloat &&
+          dataEleTy !== Type.abstractInt &&
+          dataEleTy !== Type.abstractFloat
+        );
+      })
+      .beginSubcases()
+      .combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      features.push('subgroups-f16' as GPUFeatureName);
+      features.push('shader-f16');
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const dataType = kArgumentTypes[t.params.dataType];
+    const retType = kArgumentTypes[t.params.retType];
+    let enables = `enable subgroups;\n`;
+    if (dataType.requiresF16() || retType.requiresF16()) {
+      enables += `enable subgroups_f16;\nenable f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = ${t.params.builtin}(${dataType.create(0).wgsl()});
+}`;
+
+    const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType;
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('stage')
+  .desc('Validates it is only usable in correct stage')
+  .params(u =>
+    u
+      .combine('stage', ['compute', 'fragment', 'vertex'] as const)
+      .beginSubcases()
+      .combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = ${t.params.builtin}(0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
+
+const kInvalidTypeCases: Record<string, string> = {
+  array_u32: `array(1u,2u,3u)`,
+  array_f32: `array<f32, 4>()`,
+  struct_s: `S()`,
+  struct_t: `T(1, 1)`,
+  ptr_func: `&func_var`,
+  ptr_priv: `&priv_var`,
+  frexp_ret: `frexp(0)`,
+};
+
+g.test('invalid_types')
+  .desc('Tests that invalid non-plain types are rejected')
+  .params(u =>
+    u.combine('case', keysOf(kInvalidTypeCases)).beginSubcases().combine('builtin', kBuiltins)
+  )
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const val = kInvalidTypeCases[t.params.case];
+    const wgsl = `
+enable subgroups;
+
+struct S {
+  x : u32
+}
+
+struct T {
+  a : f32,
+  b : u32,
+}
+
+var<private> priv_var : f32;
+fn foo() {
+  var func_var : vec4u;
+  _ = ${t.params.builtin}(${val});
+}`;
+
+    t.expectCompileResult(false, wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts
new file mode 100644
index 000000000000..62ffb5af36dd
--- /dev/null
+++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts
@@ -0,0 +1,262 @@
+export const description = `
+Validation tests for subgroupShuffle, subgroupShuffleXor, subgroupShuffleUp, and subgroupShuffleDown.
+`;
+
+import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
+import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js';
+import {
+  Type,
+  elementTypeOf,
+  kAllScalarsAndVectors,
+  isConvertible,
+} from '../../../../../util/conversion.js';
+import { ShaderValidationTest } from '../../../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+const kOps = [
+  'subgroupShuffle',
+  'subgroupShuffleXor',
+  'subgroupShuffleUp',
+  'subgroupShuffleDown',
+] as const;
+
+g.test('requires_subgroups')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+${t.params.enable ? 'enable subgroups;' : ''}
+fn foo() {
+  _ = ${t.params.op}(0, 0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+g.test('requires_subgroups_f16')
+  .desc('Validates that the subgroups feature is required')
+  .params(u => u.combine('enable', [false, true] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName];
+    if (t.params.enable) {
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const wgsl = `
+enable f16;
+enable subgroups;
+${t.params.enable ? 'enable subgroups_f16;' : ''}
+fn foo() {
+  _ = ${t.params.op}(0h, 0);
+}`;
+
+    t.expectCompileResult(t.params.enable, wgsl);
+  });
+
+const kStages: Record<string, (op: string) => string> = {
+  constant: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  const x = ${op}(0, 0);
+}`;
+  },
+  override: (op: string) => {
+    return `
+enable subgroups
+override o = ${op}(0, 0);`;
+  },
+  runtime: (op: string) => {
+    return `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  let x = ${op}(0, 0);
+}`;
+  },
+};
+
+g.test('early_eval')
+  .desc('Ensures the builtin is not able to be compile time evaluated')
+  .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const code = kStages[t.params.stage](t.params.op);
+    t.expectCompileResult(t.params.stage === 'runtime', code);
+  });
+
+g.test('must_use')
+  .desc('Tests that the builtin has the @must_use attribute')
+  .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const wgsl = `
+enable subgroups;
+@compute @workgroup_size(16)
+fn main() {
+  ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0, 0);
+}`;
+
+    t.expectCompileResult(t.params.must_use, wgsl);
+  });
+
+const kTypes = objectsToRecord(kAllScalarsAndVectors);
+
+g.test('data_type')
+  .desc('Validates data parameter type')
+  .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.op}(${type.create(0).wgsl()}, 0);
+}`;
+
+    const eleType = elementTypeOf(type);
+    t.expectCompileResult(eleType !== Type.bool, wgsl);
+  });
+
+g.test('return_type')
+  .desc('Validates return type')
+  .params(u =>
+    u
+      .combine('retType', keysOf(kTypes))
+      .filter(t => {
+        const type = kTypes[t.retType];
+        const eleType = elementTypeOf(type);
+        return eleType !== Type.abstractInt && eleType !== Type.abstractFloat;
+      })
+      .combine('op', kOps)
+      .combine('paramType', keysOf(kTypes))
+  )
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const retType = kTypes[t.params.retType];
+    const paramType = kTypes[t.params.paramType];
+    let enables = `enable subgroups;\n`;
+    if (retType.requiresF16() || paramType.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()}, 0);
+}`;
+
+    // Can't just use isConvertible since functions must concretize the parameter
+    // type before examining the whole statement.
+    const eleParamType = elementTypeOf(paramType);
+    const eleRetType = elementTypeOf(retType);
+    let expect = paramType === retType && eleRetType !== Type.bool;
+    if (eleParamType === Type.abstractInt) {
+      expect = eleRetType === Type.i32 && isConvertible(paramType, retType);
+    } else if (eleParamType === Type.abstractFloat) {
+      expect = eleRetType === Type.f32 && isConvertible(paramType, retType);
+    }
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('param2_type')
+  .desc('Validates shuffle parameter type')
+  .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    const features = ['subgroups' as GPUFeatureName];
+    const type = kTypes[t.params.type];
+    if (type.requiresF16()) {
+      features.push('shader-f16');
+      features.push('subgroups-f16' as GPUFeatureName);
+    }
+    t.selectDeviceOrSkipTestCase(features);
+  })
+  .fn(t => {
+    const type = kTypes[t.params.type];
+    let enables = `enable subgroups;\n`;
+    if (type.requiresF16()) {
+      enables += `enable f16;\nenable subgroups_f16;`;
+    }
+    const wgsl = `
+${enables}
+@compute @workgroup_size(1)
+fn main() {
+  _ = ${t.params.op}(0, ${type.create(0).wgsl()});
+}`;
+
+    const expect =
+      isConvertible(type, Type.u32) || (type === Type.i32 && t.params.op === 'subgroupShuffle');
+    t.expectCompileResult(expect, wgsl);
+  });
+
+g.test('stage')
+  .desc('validates builtin is only usable in the correct stages')
+  .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps))
+  .beforeAllSubcases(t => {
+    t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+  })
+  .fn(t => {
+    const compute = `
+@compute @workgroup_size(1)
+fn main() {
+  foo();
+}`;
+
+    const fragment = `
+@fragment
+fn main() {
+  foo();
+}`;
+
+    const vertex = `
+@vertex
+fn main() -> @builtin(position) vec4f {
+  foo();
+  return vec4f();
+}`;
+
+    const entry = { compute, fragment, vertex }[t.params.stage];
+    const wgsl = `
+enable subgroups;
+fn foo() {
+  _ = ${t.params.op}(0, 0);
+}
+
+${entry}
+`;
+
+    t.expectCompileResult(t.params.stage !== 'vertex', wgsl);
+  });
diff --git a/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts b/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts
index 85bed5228482..d162ba3286b9 100644
--- a/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts
+++ b/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts
@@ -275,7 +275,7 @@ g.test('underflow_f16')
     let rhs = `mat${t.params.c}x${t.params.r}h(`;
     for (let i = 0; i < t.params.c; i++) {
       for (let k = 0; k < t.params.r; k++) {
-        lhs += `${kValue.f32.negative.min / 2},`;
+        lhs += `${kValue.f16.negative.min / 2},`;
         rhs += `${t.params.rhs},`;
       }
     }
diff --git a/src/webgpu/shader/validation/expression/matrix/mul.spec.ts b/src/webgpu/shader/validation/expression/matrix/mul.spec.ts
index e76e40265e09..a3a5d368dc2d 100644
--- a/src/webgpu/shader/validation/expression/matrix/mul.spec.ts
+++ b/src/webgpu/shader/validation/expression/matrix/mul.spec.ts
@@ -631,7 +631,7 @@ g.test('overflow_mat_f16_internal')
     for (let i = 0; i < t.params.c; i++) {
       for (let k = 0; k < t.params.r; k++) {
         lhs += `${t.params.lhs},`;
-        rhs += `1`;
+        rhs += `1,`;
       }
     }
     rhs += ')';
diff --git a/src/webgpu/shader/validation/extension/clip_distances.spec.ts b/src/webgpu/shader/validation/extension/clip_distances.spec.ts
new file mode 100644
index 000000000000..88957d8e8e62
--- /dev/null
+++ b/src/webgpu/shader/validation/extension/clip_distances.spec.ts
@@ -0,0 +1,43 @@
+export const description = `
+Validation tests for the clip_distances extension
+`;
+
+import { makeTestGroup } from '../../../../common/framework/test_group.js';
+import { ShaderValidationTest } from '../shader_validation_test.js';
+
+export const g = makeTestGroup(ShaderValidationTest);
+
+g.test('use_clip_distances_requires_extension_enabled')
+  .desc(
+    `Checks that the clip_distances built-in variable is only allowed with the WGSL extension
+     clip_distances enabled in shader and the WebGPU extension clip-distances supported on the
+     device.`
+  )
+  .params(u =>
+    u.combine('requireExtension', [true, false]).combine('enableExtension', [true, false])
+  )
+  .beforeAllSubcases(t => {
+    if (t.params.requireExtension) {
+      t.selectDeviceOrSkipTestCase({ requiredFeatures: ['clip-distances'] });
+    }
+  })
+  .fn(t => {
+    const { requireExtension, enableExtension } = t.params;
+
+    t.expectCompileResult(
+      requireExtension && enableExtension,
+      `
+        ${enableExtension ? 'enable clip_distances;' : ''}
+        struct VertexOut {
+          @builtin(clip_distances) my_clip_distances : array<f32, 1>,
+          @builtin(position) my_position : vec4f,
+        }
+        @vertex fn main() -> VertexOut {
+          var output : VertexOut;
+          output.my_clip_distances[0] = 1.0;
+          output.my_position = vec4f(0.0, 0.0, 0.0, 1.0);
+          return output;
+        }
+    `
+    );
+  });
diff --git a/src/webgpu/shader/validation/parse/identifiers.spec.ts b/src/webgpu/shader/validation/parse/identifiers.spec.ts
index 0dd429d0a72c..4a7ec70120ff 100644
--- a/src/webgpu/shader/validation/parse/identifiers.spec.ts
+++ b/src/webgpu/shader/validation/parse/identifiers.spec.ts
@@ -199,6 +199,8 @@ const kInvalidIdentifiers = new Set([
   'noexcept',
   'noinline',
   'nointerpolation',
+  'non_coherent',
+  'noncoherent',
   'noperspective',
   'null',
   'nullptr',
diff --git a/src/webgpu/shader/validation/shader_io/builtins.spec.ts b/src/webgpu/shader/validation/shader_io/builtins.spec.ts
index 85a30fa0ec60..3d01f8f23a3e 100644
--- a/src/webgpu/shader/validation/shader_io/builtins.spec.ts
+++ b/src/webgpu/shader/validation/shader_io/builtins.spec.ts
@@ -10,7 +10,7 @@ export const g = makeTestGroup(ShaderValidationTest);
 
 // List of all built-in variables and their stage, in|out usage, and type.
 // Taken from table in Section 15:
-// https://www.w3.org/TR/2021/WD-WGSL-20211013/#builtin-variables
+// https://www.w3.org/TR/WGSL/#builtin-inputs-outputs
 export const kBuiltins = [
   { name: 'vertex_index', stage: 'vertex', io: 'in', type: 'u32' },
   { name: 'instance_index', stage: 'vertex', io: 'in', type: 'u32' },
@@ -30,6 +30,14 @@ export const kBuiltins = [
   { name: 'subgroup_size', stage: 'compute', io: 'in', type: 'u32' },
   { name: 'subgroup_invocation_id', stage: 'fragment', io: 'in', type: 'u32' },
   { name: 'subgroup_size', stage: 'fragment', io: 'in', type: 'u32' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,1>' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,2>' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,3>' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,4>' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,5>' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,6>' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,7>' },
+  { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array<f32,8>' },
 ] as const;
 
 // List of types to test against.
@@ -64,7 +72,15 @@ const kTestTypes = [
   'array<bool,4>',
   'array<u32,4>',
   'array<i32,4>',
+  'array<f32,1>',
+  'array<f32,2>',
+  'array<f32,3>',
   'array<f32,4>',
+  'array<f32,5>',
+  'array<f32,6>',
+  'array<f32,7>',
+  'array<f32,8>',
+  'array<f32,9>',
   'MyStruct',
 ] as const;
 
@@ -87,7 +103,16 @@ g.test('stage_inout')
     );
     if (t.params.name.includes('subgroup')) {
       t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+    } else if (t.params.name === 'clip_distances') {
+      t.selectDeviceOrSkipTestCase('clip-distances' as GPUFeatureName);
     }
+    t.skipIf(
+      t.params.name !== 'position' &&
+        t.params.target_stage === 'vertex' &&
+        t.params.target_io === 'out' &&
+        !t.params.use_struct,
+      'missing @builtin(position) in the vertex output when the vertex output is not a struct'
+    );
   })
   .fn(t => {
     const code = generateShader({
@@ -117,9 +142,9 @@ g.test('type')
   .params(u =>
     u
       .combineWithParams(kBuiltins)
+      .combine('use_struct', [true, false] as const)
       .beginSubcases()
       .combine('target_type', kTestTypes)
-      .combine('use_struct', [true, false] as const)
   )
   .beforeAllSubcases(t => {
     t.skipIf(
@@ -128,7 +153,16 @@ g.test('type')
     );
     if (t.params.name.includes('subgroup')) {
       t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+    } else if (t.params.name === 'clip_distances') {
+      t.selectDeviceOrSkipTestCase('clip-distances' as GPUFeatureName);
     }
+    t.skipIf(
+      t.params.name !== 'position' &&
+        t.params.stage === 'vertex' &&
+        t.params.io === 'out' &&
+        !t.params.use_struct,
+      'missing @builtin(position) in the vertex output'
+    );
   })
   .fn(t => {
     let code = '';
@@ -297,14 +331,30 @@ g.test('reuse_builtin_name')
     u
       .combineWithParams(kBuiltins)
       .combine('use', ['alias', 'struct', 'function', 'module-var', 'function-var'])
+      .combine('enable_extension', [true, false])
+      .unless(
+        t => t.enable_extension && !(t.name.includes('subgroup') || t.name === 'clip_distances')
+      )
   )
   .beforeAllSubcases(t => {
+    if (!t.params.enable_extension) {
+      return;
+    }
     if (t.params.name.includes('subgroup')) {
       t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
+    } else if (t.params.name === 'clip_distances') {
+      t.selectDeviceOrSkipTestCase('clip-distances' as GPUFeatureName);
     }
   })
   .fn(t => {
     let code = '';
+    if (t.params.enable_extension) {
+      if (t.params.name.includes('subgroups')) {
+        code += 'enable subgroup;\n';
+      } else if (t.params.name === 'clip_distances') {
+        code += 'enable clip_distances;\n';
+      }
+    }
     if (t.params.use === 'alias') {
       code += `alias ${t.params.name} = i32;`;
     } else if (t.params.use === `struct`) {
diff --git a/src/webgpu/shader/validation/shader_io/interpolate.spec.ts b/src/webgpu/shader/validation/shader_io/interpolate.spec.ts
index 933093e16f0f..b716093144b0 100644
--- a/src/webgpu/shader/validation/shader_io/interpolate.spec.ts
+++ b/src/webgpu/shader/validation/shader_io/interpolate.spec.ts
@@ -9,15 +9,11 @@ import { generateShader } from './util.js';
 export const g = makeTestGroup(ShaderValidationTest);
 
 // List of valid interpolation attributes.
-const kValidCompatInterpolationAttributes = new Set([
+const kValidInterpolationAttributes = new Set([
   '',
-  '@interpolate(flat, either)',
   '@interpolate(perspective)',
   '@interpolate(perspective, center)',
   '@interpolate(perspective, centroid)',
-]);
-const kValidInterpolationAttributes = new Set([
-  ...kValidCompatInterpolationAttributes,
   '@interpolate(flat)',
   '@interpolate(flat, first)',
   '@interpolate(flat, either)',
@@ -83,10 +79,7 @@ g.test('type_and_sampling')
       io: t.params.io,
       use_struct: t.params.use_struct,
     });
-    const validInterpolationAttributes = t.isCompatibility
-      ? kValidCompatInterpolationAttributes
-      : kValidInterpolationAttributes;
-    t.expectCompileResult(validInterpolationAttributes.has(interpolate), code);
+    t.expectCompileResult(kValidInterpolationAttributes.has(interpolate), code);
   });
 
 g.test('require_location')
@@ -140,9 +133,7 @@ g.test('integral_types')
       use_struct: t.params.use_struct,
     });
 
-    const expectSuccess = t.isCompatibility
-      ? t.params.attribute === '@interpolate(flat, either)'
-      : t.params.attribute.startsWith('@interpolate(flat');
+    const expectSuccess = t.params.attribute.startsWith('@interpolate(flat');
     t.expectCompileResult(expectSuccess, code);
   });
 
@@ -160,7 +151,7 @@ g.test('duplicate')
     t.expectCompileResult(t.params.attr === '', code);
   });
 
-const kValidationTests: { [key: string]: { src: string; pass: boolean; compatPass?: boolean } } = {
+const kValidationTests: { [key: string]: { src: string; pass: boolean } } = {
   valid: {
     src: `@interpolate(perspective)`,
     pass: true,
@@ -172,7 +163,6 @@ const kValidationTests: { [key: string]: { src: string; pass: boolean; compatPas
   trailing_comma_one_arg: {
     src: `@interpolate(flat,)`,
     pass: true,
-    compatPass: false,
   },
   trailing_comma_two_arg: {
     src: `@interpolate(perspective, center,)`,
@@ -230,9 +220,6 @@ g.test('interpolation_validation')
     @builtin(position) vec4<f32> {
   return vec4f(0);
 }`;
-    const expectSuccess =
-      kValidationTests[t.params.attr].pass &&
-      (t.isCompatibility ? kValidationTests[t.params.attr].compatPass ?? true : true);
-
+    const expectSuccess = kValidationTests[t.params.attr].pass;
     t.expectCompileResult(expectSuccess, code);
   });
diff --git a/src/webgpu/shader/validation/shader_io/util.ts b/src/webgpu/shader/validation/shader_io/util.ts
index d115d79328b4..b71fd2aab42a 100644
--- a/src/webgpu/shader/validation/shader_io/util.ts
+++ b/src/webgpu/shader/validation/shader_io/util.ts
@@ -27,6 +27,9 @@ export function generateShader({
   if (attribute.includes('subgroup')) {
     code += 'enable subgroups;\n';
   }
+  if (attribute.includes('clip_distances')) {
+    code += 'enable clip_distances;\n';
+  }
 
   if (use_struct) {
     // Generate a struct that wraps the entry point IO variable.
diff --git a/src/webgpu/shader/validation/shader_validation_test.ts b/src/webgpu/shader/validation/shader_validation_test.ts
index 6a4cae331766..5db47bd586ba 100644
--- a/src/webgpu/shader/validation/shader_validation_test.ts
+++ b/src/webgpu/shader/validation/shader_validation_test.ts
@@ -119,9 +119,14 @@ export class ShaderValidationTest extends GPUTest {
     constants?: Record<string, GPUPipelineConstantValue>;
     // List of additional module-scope variable the entrypoint needs to reference
     reference?: string[];
+    // List of additional statements to insert in the entry point.
+    statements?: string[];
   }) {
     const phonies: Array<string> = [];
 
+    if (args.statements !== undefined) {
+      phonies.push(...args.statements);
+    }
     if (args.constants !== undefined) {
       phonies.push(...keysOf(args.constants).map(c => `_ = ${c};`));
     }
diff --git a/src/webgpu/shader/validation/types/textures.spec.ts b/src/webgpu/shader/validation/types/textures.spec.ts
index 7b8f1748c113..f619877e2bc1 100644
--- a/src/webgpu/shader/validation/types/textures.spec.ts
+++ b/src/webgpu/shader/validation/types/textures.spec.ts
@@ -120,7 +120,7 @@ Besides, the shader compilation should always pass regardless of whether the for
     const { format, access, comma } = t.params;
     // bgra8unorm is considered a valid storage format at shader compilation stage
     const isFormatValid =
-      isTextureFormatUsableAsStorageFormat(format, t.isCompatibility) || format === 'bgra8unorm';
+      isTextureFormatUsableAsStorageFormat(format, false) || format === 'bgra8unorm';
     const isAccessValid = kAccessModes.includes(access);
     const wgsl = `@group(0) @binding(0) var tex: texture_storage_2d<${format}, ${access}${comma}>;`;
     t.expectCompileResult(isFormatValid && isAccessValid, wgsl);
diff --git a/src/webgpu/util/math.ts b/src/webgpu/util/math.ts
index 20d7818df65d..d5ca2b41320e 100644
--- a/src/webgpu/util/math.ts
+++ b/src/webgpu/util/math.ts
@@ -961,6 +961,17 @@ export function scalarF32Range(
   counts.neg_norm = counts.neg_norm === undefined ? counts.pos_norm : counts.neg_norm;
   counts.neg_sub = counts.neg_sub === undefined ? counts.pos_sub : counts.neg_sub;
 
+  let special_pos: number[] = [];
+  // The first interior point for 'pos_norm' is at 3. Because we have two special values we start allowing these
+  // special values as soon as they will fit as interior values.
+  if (counts.pos_norm >= 4) {
+    special_pos = [
+      // Largest float as signed integer
+      0x4effffff,
+      // Largest float as unsigned integer
+      0x4f7fffff,
+    ];
+  }
   // Generating bit fields first and then converting to f32, so that the spread across the possible f32 values is more
   // even. Generating against the bounds of f32 values directly results in the values being extremely biased towards the
   // extremes, since they are so much larger.
@@ -980,7 +991,14 @@ export function scalarF32Range(
       kBit.f32.positive.subnormal.max,
       counts.pos_sub
     ),
-    ...linearRange(kBit.f32.positive.min, kBit.f32.positive.max, counts.pos_norm),
+    ...[
+      ...linearRange(
+        kBit.f32.positive.min,
+        kBit.f32.positive.max,
+        counts.pos_norm - special_pos.length
+      ),
+      ...special_pos,
+    ].sort((n1, n2) => n1 - n2),
   ].map(Math.trunc);
   return bit_fields.map(reinterpretU32AsF32);
 }
diff --git a/src/webgpu/util/texture.ts b/src/webgpu/util/texture.ts
index badce71baa34..20e99fdfad4d 100644
--- a/src/webgpu/util/texture.ts
+++ b/src/webgpu/util/texture.ts
@@ -17,6 +17,7 @@ const kLoadValueFromStorageInfo: Partial<{
     texelType: string;
     unpackWGSL: string;
     useFragDepth?: boolean;
+    discardWithStencil?: boolean;
   };
 }> = {
   r8unorm: {
@@ -233,17 +234,27 @@ const kLoadValueFromStorageInfo: Partial<{
     `,
     useFragDepth: true,
   },
+  stencil8: {
+    storageType: 'u32',
+    texelType: 'vec4u',
+    unpackWGSL: `
+      return vec4u(unpack4xU8(src[byteOffset / 4])[byteOffset % 4], 123, 123, 123)
+    `,
+    discardWithStencil: true,
+  },
 };
 
 function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) {
   const info = kLoadValueFromStorageInfo[format];
   assert(!!info);
-  const { storageType, texelType, unpackWGSL, useFragDepth } = info;
+  const { storageType, texelType, unpackWGSL, useFragDepth, discardWithStencil } = info;
 
   const [depthDecl, depthCode] = useFragDepth
     ? ['@builtin(frag_depth) d: f32,', 'fs.d = fs.v[0];']
     : ['', ''];
 
+  const stencilCode = discardWithStencil ? 'if ((fs.v.r & vin.stencilMask) == 0) { discard; }' : '';
+
   return `
     struct Uniforms {
       numTexelRows: u32,
@@ -255,9 +266,10 @@ function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) {
     struct VSOutput {
       @builtin(position) pos: vec4f,
       @location(0) @interpolate(flat, either) sampleIndex: u32,
+      @location(1) @interpolate(flat, either) stencilMask: u32,
     };
 
-    @vertex fn vs(@builtin(vertex_index) vNdx: u32) -> VSOutput {
+    @vertex fn vs(@builtin(vertex_index) vNdx: u32, @builtin(instance_index) iNdx: u32) -> VSOutput {
       let points = array(
         vec2f(0, 0), vec2f(1, 0), vec2f(0, 1), vec2f(1, 1),
       );
@@ -266,7 +278,10 @@ function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) {
       let rowOffset = f32(sampleRow) / numSampleRows;
       let rowMult = 1.0 / numSampleRows;
       let p = (points[vNdx % 4] * vec2f(1, rowMult) + vec2f(0, rowOffset)) * 2.0 - 1.0;
-      return VSOutput(vec4f(p, 0, 1), uni.sampleCount - sampleRow % uni.sampleCount - 1);
+      return VSOutput(
+        vec4f(p, 0, 1),
+        uni.sampleCount - sampleRow % uni.sampleCount - 1,
+        1u << iNdx);
     }
 
     @group(0) @binding(0) var<uniform> uni: Uniforms;
@@ -289,6 +304,7 @@ function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) {
       var fs: FSOutput;
       fs.v = unpack(byteOffset);
       ${depthCode}
+      ${stencilCode}
       return fs;
     }
     `;
@@ -312,114 +328,158 @@ function copyBufferToTextureViaRender(
 
   const msInfo = kLoadValueFromStorageInfo[format];
   assert(!!msInfo);
-  const { useFragDepth } = msInfo;
+  const { useFragDepth, discardWithStencil } = msInfo;
 
   const { device } = t;
-  const code = getCopyBufferToTextureViaRenderCode(format);
-  const id = JSON.stringify({ format, useFragDepth, sampleCount, code });
-  const pipelines =
-    s_copyBufferToTextureViaRenderPipelines.get(device) ?? new Map<string, GPURenderPipeline>();
-  s_copyBufferToTextureViaRenderPipelines.set(device, pipelines);
-  let pipeline = pipelines.get(id);
-  if (!pipeline) {
-    const module = device.createShaderModule({ code });
-    pipeline = device.createRenderPipeline({
-      layout: 'auto',
-      vertex: { module },
-      ...(useFragDepth
-        ? {
-            fragment: {
-              module,
-              targets: [],
-            },
-            depthStencil: {
-              depthWriteEnabled: true,
-              depthCompare: 'always',
-              format,
-            },
-          }
-        : {
-            fragment: {
-              module,
-              targets: [{ format }],
-            },
-          }),
-      primitive: {
-        topology: 'triangle-strip',
-      },
-      ...(sampleCount > 1 && { multisample: { count: sampleCount } }),
+  const numBlits = discardWithStencil ? 8 : 1;
+  for (let blitCount = 0; blitCount < numBlits; ++blitCount) {
+    const code = getCopyBufferToTextureViaRenderCode(format);
+    const stencilWriteMask = 1 << blitCount;
+    const id = JSON.stringify({
+      format,
+      useFragDepth,
+      stencilWriteMask,
+      discardWithStencil,
+      sampleCount,
+      code,
     });
-    pipelines.set(id, pipeline);
-  }
+    const pipelines =
+      s_copyBufferToTextureViaRenderPipelines.get(device) ?? new Map<string, GPURenderPipeline>();
+    s_copyBufferToTextureViaRenderPipelines.set(device, pipelines);
+    let pipeline = pipelines.get(id);
+    if (!pipeline) {
+      const module = device.createShaderModule({ code });
+      pipeline = device.createRenderPipeline({
+        label: `blitCopyFor-${format}`,
+        layout: 'auto',
+        vertex: { module },
+        ...(discardWithStencil
+          ? {
+              fragment: {
+                module,
+                targets: [],
+              },
+              depthStencil: {
+                depthWriteEnabled: false,
+                depthCompare: 'always',
+                format,
+                stencilWriteMask,
+                stencilFront: {
+                  passOp: 'replace',
+                },
+              },
+            }
+          : useFragDepth
+          ? {
+              fragment: {
+                module,
+                targets: [],
+              },
+              depthStencil: {
+                depthWriteEnabled: true,
+                depthCompare: 'always',
+                format,
+              },
+            }
+          : {
+              fragment: {
+                module,
+                targets: [{ format }],
+              },
+            }),
+        primitive: {
+          topology: 'triangle-strip',
+        },
+        ...(sampleCount > 1 && { multisample: { count: sampleCount } }),
+      });
+      pipelines.set(id, pipeline);
+    }
 
-  const info = kTextureFormatInfo[format];
-  const uniforms = new Uint32Array([
-    copySize.height, //  numTexelRows: u32,
-    source.bytesPerRow!, //  bytesPerRow: u32,
-    info.bytesPerBlock!, //  bytesPerSample: u32,
-    dest.texture.sampleCount, //  sampleCount: u32,
-  ]);
-  const uniformBuffer = t.makeBufferWithContents(
-    uniforms,
-    GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM
-  );
-  const storageBuffer = t.createBufferTracked({
-    size: source.buffer.size,
-    usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
-  });
-  encoder.copyBufferToBuffer(source.buffer, 0, storageBuffer, 0, storageBuffer.size);
-  const baseMipLevel = dest.mipLevel;
-  for (let l = 0; l < copySize.depthOrArrayLayers; ++l) {
-    const baseArrayLayer = origin.z + l;
-    const mipLevelCount = 1;
-    const arrayLayerCount = 1;
-    const pass = encoder.beginRenderPass(
-      useFragDepth
-        ? {
-            colorAttachments: [],
-            depthStencilAttachment: {
-              view: dest.texture.createView({
-                baseMipLevel,
-                baseArrayLayer,
-                mipLevelCount,
-                arrayLayerCount,
-              }),
-              depthClearValue: 0,
-              depthLoadOp: 'clear',
-              depthStoreOp: 'store',
-            },
-          }
-        : {
-            colorAttachments: [
-              {
+    const info = kTextureFormatInfo[format];
+    const uniforms = new Uint32Array([
+      copySize.height, //  numTexelRows: u32,
+      source.bytesPerRow!, //  bytesPerRow: u32,
+      info.bytesPerBlock!, //  bytesPerSample: u32,
+      dest.texture.sampleCount, //  sampleCount: u32,
+    ]);
+    const uniformBuffer = t.makeBufferWithContents(
+      uniforms,
+      GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM
+    );
+    const storageBuffer = t.createBufferTracked({
+      size: source.buffer.size,
+      usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE,
+    });
+    encoder.copyBufferToBuffer(source.buffer, 0, storageBuffer, 0, storageBuffer.size);
+    const baseMipLevel = dest.mipLevel;
+    for (let l = 0; l < copySize.depthOrArrayLayers; ++l) {
+      const baseArrayLayer = origin.z + l;
+      const mipLevelCount = 1;
+      const arrayLayerCount = 1;
+      const pass = encoder.beginRenderPass(
+        discardWithStencil
+          ? {
+              colorAttachments: [],
+              depthStencilAttachment: {
                 view: dest.texture.createView({
                   baseMipLevel,
                   baseArrayLayer,
                   mipLevelCount,
                   arrayLayerCount,
                 }),
-                loadOp: 'clear',
-                storeOp: 'store',
+                stencilClearValue: 0,
+                stencilLoadOp: 'load',
+                stencilStoreOp: 'store',
               },
-            ],
-          }
-    );
-    pass.setViewport(origin.x, origin.y, copySize.width, copySize.height, 0, 1);
-    pass.setPipeline(pipeline);
+            }
+          : useFragDepth
+          ? {
+              colorAttachments: [],
+              depthStencilAttachment: {
+                view: dest.texture.createView({
+                  baseMipLevel,
+                  baseArrayLayer,
+                  mipLevelCount,
+                  arrayLayerCount,
+                }),
+                depthClearValue: 0,
+                depthLoadOp: 'clear',
+                depthStoreOp: 'store',
+              },
+            }
+          : {
+              colorAttachments: [
+                {
+                  view: dest.texture.createView({
+                    baseMipLevel,
+                    baseArrayLayer,
+                    mipLevelCount,
+                    arrayLayerCount,
+                  }),
+                  loadOp: 'clear',
+                  storeOp: 'store',
+                },
+              ],
+            }
+      );
+      pass.setViewport(origin.x, origin.y, copySize.width, copySize.height, 0, 1);
+      pass.setPipeline(pipeline);
 
-    const offset =
-      (source.offset ?? 0) + (source.bytesPerRow ?? 0) * (source.rowsPerImage ?? 0) * l;
-    const bindGroup = device.createBindGroup({
-      layout: pipeline.getBindGroupLayout(0),
-      entries: [
-        { binding: 0, resource: { buffer: uniformBuffer } },
-        { binding: 1, resource: { buffer: storageBuffer, offset } },
-      ],
-    });
+      const offset =
+        (source.offset ?? 0) + (source.bytesPerRow ?? 0) * (source.rowsPerImage ?? 0) * l;
+      const bindGroup = device.createBindGroup({
+        layout: pipeline.getBindGroupLayout(0),
+        entries: [
+          { binding: 0, resource: { buffer: uniformBuffer } },
+          { binding: 1, resource: { buffer: storageBuffer, offset } },
+        ],
+      });
 
-    pass.setBindGroup(0, bindGroup);
-    pass.draw(4 * copySize.height * dest.texture.sampleCount);
-    pass.end();
+      pass.setBindGroup(0, bindGroup);
+      pass.setStencilReference(0xff);
+      pass.draw(4 * copySize.height * dest.texture.sampleCount, 1, 0, blitCount);
+      pass.end();
+    }
   }
 }
 
diff --git a/src/webgpu/util/texture/base.ts b/src/webgpu/util/texture/base.ts
index c5c6aaf20579..0cf6b7387318 100644
--- a/src/webgpu/util/texture/base.ts
+++ b/src/webgpu/util/texture/base.ts
@@ -255,6 +255,7 @@ export function reifyTextureViewDescriptor(
     format,
     dimension,
     aspect,
+    usage: texture.usage,
     baseMipLevel,
     mipLevelCount,
     baseArrayLayer,
diff --git a/standalone/index.html b/standalone/index.html
index d087d6584cd9..5c1daa89fcf8 100644
--- a/standalone/index.html
+++ b/standalone/index.html
@@ -13,8 +13,10 @@
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
     <link href="https://fonts.googleapis.com/css2?family=Poppins&display=swap" rel="stylesheet">
     <meta name="viewport" content="width=device-width" />
-    <!-- Chrome Origin Trial token for https://gpuweb.github.io (see dev_server.ts for localhost tokens) -->
-    <meta http-equiv="origin-trial" content="AmV1vLgjOQ01SlGnVhpoKXy7gLW+K/plXHwHKnYn4S4US98WaSesKBI+XSUMo95unQARyMGDvW70KsfyeYblZQ0AAABQeyJvcmlnaW4iOiJodHRwczovL2dwdXdlYi5naXRodWIuaW86NDQzIiwiZmVhdHVyZSI6IldlYkdQVSIsImV4cGlyeSI6MTY2MzcxODM5OX0=">
+    <!-- Chrome "WebGPU Subgroups Features" origin trial token for https://gpuweb.github.io -->
+    <meta http-equiv="origin-trial" content="AiZbfNa6FSBMZg2Ak2xeb7upejmg3jb1Ll47edOTVs7fkZLvrV4jjPh7p4J7quB9Lx6Z7l0IDc97gpPKb4F6OQcAAABheyJvcmlnaW4iOiJodHRwczovL2dwdXdlYi5naXRodWIuaW86NDQzIiwiZmVhdHVyZSI6IldlYkdQVVN1Ymdyb3Vwc0ZlYXR1cmVzIiwiZXhwaXJ5IjoxNzM5OTIzMTk5fQ==">
+    <!-- Chrome "WebGPU Subgroups Features" origin trial token for http://localhost:8080 -->
+    <meta http-equiv="origin-trial" content="AkMLfHisU+Fsbpi9g6tfKSZF4ngpsmjW4Oai360fUvZE2rgSPZDWSWb8ryrliJX5HR/Rw0yig0ir9el2hrnODwcAAABaeyJvcmlnaW4iOiJodHRwOi8vbG9jYWxob3N0OjgwODAiLCJmZWF0dXJlIjoiV2ViR1BVU3ViZ3JvdXBzRmVhdHVyZXMiLCJleHBpcnkiOjE3Mzk5MjMxOTl9">
     <link rel="stylesheet" href="third_party/normalize.min.css" />
     <script src="third_party/jquery/jquery-3.3.1.min.js"></script>
     <style>