diff --git a/src/webgpu/compat/api/validation/createBindGroupLayout_limits.spec.ts b/src/webgpu/compat/api/validation/createBindGroupLayout_limits.spec.ts
new file mode 100644
index 000000000000..d5bf7a063aaf
--- /dev/null
+++ b/src/webgpu/compat/api/validation/createBindGroupLayout_limits.spec.ts
@@ -0,0 +1,87 @@
+export const description = `
+Tests that, in compat mode, you can not create a bind group layout with with
+more than the max in stage limit even if the per stage limit is higher.
+`;
+
+import { makeTestGroup } from '../../../../common/framework/test_group.js';
+import { range } from '../../../../common/util/util.js';
+import { RequiredLimitsTestMixin } from '../../../gpu_test.js';
+import { CompatibilityTest } from '../../compatibility_test.js';
+
+export const g = makeTestGroup(
+  RequiredLimitsTestMixin(CompatibilityTest, {
+    getRequiredLimits(adapter: GPUAdapter) {
+      return {
+        maxStorageBuffersInFragmentStage: adapter.limits.maxStorageBuffersInFragmentStage! / 2,
+        maxStorageBuffersInVertexStage: adapter.limits.maxStorageBuffersInVertexStage! / 2,
+        maxStorageBuffersPerShaderStage: adapter.limits.maxStorageBuffersPerShaderStage,
+        maxStorageTexturesInFragmentStage: adapter.limits.maxStorageTexturesInFragmentStage! / 2,
+        maxStorageTexturesInVertexStage: adapter.limits.maxStorageTexturesInVertexStage! / 2,
+        maxStorageTexturesPerShaderStage: adapter.limits.maxStorageTexturesPerShaderStage,
+      };
+    },
+    key() {
+      return `
+      maxStorageBuffersInFragmentStage/2,
+      maxStorageBuffersInVertexStage/2,
+      maxStorageTexturesInFragmentStage/2,
+      maxStorageTexturesInVertexStage/2,
+      maxStorageBuffersPerShaderStage
+      maxStorageTexturesPerShaderStage
+    `;
+    },
+  })
+);
+
+g.test('maxStorageBuffersTexturesInVertexFragmentStage')
+  .desc(
+    `
+      Tests that you can't use more than maxStorage(Buffers/Textures)In(Fragment/Vertex)Stage when
+      the limit is less than maxStorage(Buffers/Textures)PerShaderStage
+    `
+  )
+  .params(u =>
+    u
+      .combine('limit', [
+        'maxStorageBuffersInFragmentStage',
+        'maxStorageBuffersInVertexStage',
+        'maxStorageTexturesInFragmentStage',
+        'maxStorageTexturesInVertexStage',
+      ] as const)
+      .beginSubcases()
+      .combine('extra', [0, 1] as const)
+  )
+  .fn(t => {
+    const { limit, extra } = t.params;
+    const { device } = t;
+
+    const isBuffer = limit.includes('Buffers');
+    const inStageLimit = device.limits[limit]!;
+    const perStageLimitName = isBuffer
+      ? 'maxStorageBuffersPerShaderStage'
+      : 'maxStorageTexturesPerShaderStage';
+    const perStageLimit = device.limits[perStageLimitName];
+
+    t.debug(`${limit}(${inStageLimit}), ${perStageLimitName}(${perStageLimit})`);
+
+    t.skipIf(inStageLimit === 0, `${limit} is 0`);
+    t.skipIf(
+      !(inStageLimit < perStageLimit),
+      `${limit}(${inStageLimit}) is not less than ${perStageLimitName}(${perStageLimit})`
+    );
+
+    const visibility = limit.includes('Fragment') ? GPUShaderStage.FRAGMENT : GPUShaderStage.VERTEX;
+
+    const expectFailure = extra > 0;
+    t.expectValidationError(() => {
+      device.createBindGroupLayout({
+        entries: range(inStageLimit + extra, i => ({
+          binding: i,
+          visibility,
+          ...(isBuffer
+            ? { buffer: { type: 'read-only-storage' } }
+            : { storageTexture: { format: 'r32float', access: 'read-only' } }),
+        })),
+      });
+    }, expectFailure);
+  });
diff --git a/src/webgpu/compat/api/validation/createPipelineLayout.spec.ts b/src/webgpu/compat/api/validation/createPipelineLayout.spec.ts
new file mode 100644
index 000000000000..a7c6c1429c98
--- /dev/null
+++ b/src/webgpu/compat/api/validation/createPipelineLayout.spec.ts
@@ -0,0 +1,91 @@
+export const description = `
+Tests that, in compat mode, you can not create a pipeline layout with with
+more than the max in stage limit even if the per stage limit is higher.
+`;
+
+import { makeTestGroup } from '../../../../common/framework/test_group.js';
+import { range } from '../../../../common/util/util.js';
+import { RequiredLimitsTestMixin } from '../../../gpu_test.js';
+import { CompatibilityTest } from '../../compatibility_test.js';
+
+export const g = makeTestGroup(
+  RequiredLimitsTestMixin(CompatibilityTest, {
+    getRequiredLimits(adapter: GPUAdapter) {
+      return {
+        maxStorageBuffersInFragmentStage: adapter.limits.maxStorageBuffersInFragmentStage! / 2,
+        maxStorageBuffersInVertexStage: adapter.limits.maxStorageBuffersInVertexStage! / 2,
+        maxStorageBuffersPerShaderStage: adapter.limits.maxStorageBuffersPerShaderStage,
+        maxStorageTexturesInFragmentStage: adapter.limits.maxStorageTexturesInFragmentStage! / 2,
+        maxStorageTexturesInVertexStage: adapter.limits.maxStorageTexturesInVertexStage! / 2,
+        maxStorageTexturesPerShaderStage: adapter.limits.maxStorageTexturesPerShaderStage,
+      };
+    },
+    key() {
+      return `
+      maxStorageBuffersInFragmentStage/2,
+      maxStorageBuffersInVertexStage/2,
+      maxStorageTexturesInFragmentStage/2,
+      maxStorageTexturesInVertexStage/2,
+      maxStorageBuffersPerShaderStage
+      maxStorageTexturesPerShaderStage
+    `;
+    },
+  })
+);
+
+g.test('maxStorageBuffersTexturesInVertexFragmentStage')
+  .desc(
+    `
+      Tests that you can't use more than maxStorage(Buffers/Textures)In(Fragment/Vertex)Stage when
+      the limit is less than maxStorage(Buffers/Textures)PerShaderStage
+    `
+  )
+  .params(u =>
+    u
+      .combine('limit', [
+        'maxStorageBuffersInFragmentStage',
+        'maxStorageBuffersInVertexStage',
+        'maxStorageTexturesInFragmentStage',
+        'maxStorageTexturesInVertexStage',
+      ] as const)
+      .beginSubcases()
+      .combine('extra', [0, 1] as const)
+  )
+  .fn(t => {
+    const { limit, extra } = t.params;
+    const { device } = t;
+
+    const isBuffer = limit.includes('Buffers');
+    const inStageLimit = device.limits[limit]!;
+    const perStageLimitName = isBuffer
+      ? 'maxStorageBuffersPerShaderStage'
+      : 'maxStorageTexturesPerShaderStage';
+    const perStageLimit = device.limits[perStageLimitName];
+
+    t.debug(`{${limit}(${inStageLimit}), ${perStageLimitName}(${perStageLimit}})`);
+
+    t.skipIf(inStageLimit === 0, `${limit} is 0`);
+    t.skipIf(
+      !(inStageLimit < perStageLimit),
+      `{${limit}(${inStageLimit}) is not less than ${perStageLimitName}(${perStageLimit}})`
+    );
+
+    const visibility = limit.includes('Fragment') ? GPUShaderStage.FRAGMENT : GPUShaderStage.VERTEX;
+
+    const bindGroupLayouts = [inStageLimit, extra].map(count =>
+      device.createBindGroupLayout({
+        entries: range(count, i => ({
+          binding: i,
+          visibility,
+          ...(isBuffer
+            ? { buffer: { type: 'read-only-storage' } }
+            : { storageTexture: { format: 'r32float', access: 'read-only' } }),
+        })),
+      })
+    );
+
+    const expectFailure = extra > 0;
+    t.expectValidationError(() => {
+      device.createPipelineLayout({ bindGroupLayouts });
+    }, expectFailure);
+  });
diff --git a/src/webgpu/compat/api/validation/render_pipeline/in_stage_limits.spec.ts b/src/webgpu/compat/api/validation/render_pipeline/in_stage_limits.spec.ts
new file mode 100644
index 000000000000..5e07c274958f
--- /dev/null
+++ b/src/webgpu/compat/api/validation/render_pipeline/in_stage_limits.spec.ts
@@ -0,0 +1,123 @@
+export const description = `
+Tests that, in compat mode, you can not create a pipeline layout with with
+more than the max in stage limit even if the per stage limit is higher.
+`;
+
+import { makeTestGroup } from '../../../../../common/framework/test_group.js';
+import { range } from '../../../../../common/util/util.js';
+import { RequiredLimitsTestMixin } from '../../../../gpu_test.js';
+import { CompatibilityTest } from '../../../compatibility_test.js';
+
+export const g = makeTestGroup(
+  RequiredLimitsTestMixin(CompatibilityTest, {
+    getRequiredLimits(adapter: GPUAdapter) {
+      return {
+        maxStorageBuffersInFragmentStage: adapter.limits.maxStorageBuffersInFragmentStage! / 2,
+        maxStorageBuffersInVertexStage: adapter.limits.maxStorageBuffersInVertexStage! / 2,
+        maxStorageBuffersPerShaderStage: adapter.limits.maxStorageBuffersPerShaderStage,
+        maxStorageTexturesInFragmentStage: adapter.limits.maxStorageTexturesInFragmentStage! / 2,
+        maxStorageTexturesInVertexStage: adapter.limits.maxStorageTexturesInVertexStage! / 2,
+        maxStorageTexturesPerShaderStage: adapter.limits.maxStorageTexturesPerShaderStage,
+      };
+    },
+    key() {
+      return `
+      maxStorageBuffersInFragmentStage/2,
+      maxStorageBuffersInVertexStage/2,
+      maxStorageTexturesInFragmentStage/2,
+      maxStorageTexturesInVertexStage/2,
+      maxStorageBuffersPerShaderStage
+      maxStorageTexturesPerShaderStage
+    `;
+    },
+  })
+);
+
+g.test('maxStorageBuffersTexturesInVertexFragmentStage')
+  .desc(
+    `
+      Tests that you can't use more than maxStorage(Buffers/Textures)In(Fragment/Vertex)Stage when
+      the limit is less than maxStorage(Buffers/Textures)PerShaderStage
+    `
+  )
+  .params(u =>
+    u
+      .combine('limit', [
+        'maxStorageBuffersInFragmentStage',
+        'maxStorageBuffersInVertexStage',
+        'maxStorageTexturesInFragmentStage',
+        'maxStorageTexturesInVertexStage',
+      ] as const)
+      .beginSubcases()
+      .combine('async', [false, true] as const)
+      .combine('extra', [0, 1] as const)
+  )
+  .fn(t => {
+    const { limit, extra, async } = t.params;
+    const { device } = t;
+
+    const isBuffer = limit.includes('Buffers');
+    const inStageLimit = device.limits[limit]!;
+    const perStageLimitName = isBuffer
+      ? 'maxStorageBuffersPerShaderStage'
+      : 'maxStorageTexturesPerShaderStage';
+    const perStageLimit = device.limits[perStageLimitName];
+
+    t.debug(`${limit}(${inStageLimit}), ${perStageLimitName}(${perStageLimit})`);
+
+    t.skipIf(inStageLimit === 0, `${limit} is 0`);
+    t.skipIf(
+      !(inStageLimit < perStageLimit),
+      `${limit}(${inStageLimit}) is not less than ${perStageLimitName}(${perStageLimit})`
+    );
+
+    const typeWGSLFn = isBuffer
+      ? (i: number) => `var<storage, read> v${i}: f32;`
+      : (i: number) => `var v${i}: texture_storage_2d<r32float, read>;`;
+
+    const count = inStageLimit + extra;
+    const code = `
+    ${range(count, i => `@group(0) @binding(${i}) ${typeWGSLFn(i)}`).join('\n')}
+
+    fn useResources() {
+      ${range(count, i => `_ = v${i};`).join('\n')}
+    }
+
+    @vertex fn vsNoUse() -> @builtin(position) vec4f {
+      return vec4f(0);
+    }
+
+    @vertex fn vsUse() -> @builtin(position) vec4f {
+      useResources();
+      return vec4f(0);
+    }
+
+    @fragment fn fsNoUse() -> @location(0) vec4f {
+      return vec4f(0);
+    }
+
+    @fragment fn fsUse() -> @location(0) vec4f {
+      useResources();
+      return vec4f(0);
+    }
+    `;
+
+    const module = device.createShaderModule({ code });
+
+    const isFragment = limit.includes('Fragment');
+    const pipelineDescriptor: GPURenderPipelineDescriptor = {
+      layout: 'auto',
+      vertex: {
+        module,
+        entryPoint: isFragment ? 'vsNoUse' : 'vsUse',
+      },
+      fragment: {
+        module,
+        entryPoint: isFragment ? 'fsUse' : 'fsNoUse',
+        targets: [{ format: 'rgba8unorm' }],
+      },
+    };
+
+    const success = extra === 0;
+    t.doCreateRenderPipelineTest(async, success, pipelineDescriptor);
+  });
diff --git a/src/webgpu/gpu_test.ts b/src/webgpu/gpu_test.ts
index 53b286d8103c..1e7fac4a984e 100644
--- a/src/webgpu/gpu_test.ts
+++ b/src/webgpu/gpu_test.ts
@@ -1298,63 +1298,145 @@ function getAdapterLimitsAsDeviceRequiredLimits(adapter: GPUAdapter) {
   return requiredLimits;
 }
 
-function setAllLimitsToAdapterLimits(
+/**
+ * Removes limits that don't exist on the adapter.
+ * A test might request a new limit that not all implementions support. The test itself
+ * should check the requested limit using code that expects undefined.
+ *
+ * ```ts
+ *    t.skipIf(limit < 2);     // BAD! Doesn't skip if unsupported beause undefined is never less than 2.
+ *    t.skipIf(!(limit >= 2)); // Good. Skips if limits is not >= 2. undefined is not >= 2.
+ * ```
+ */
+function removeNonExistantLimits(adapter: GPUAdapter, limits: Record<string, GPUSize64>) {
+  const filteredLimits: Record<string, GPUSize64> = {};
+  const adapterLimits = adapter.limits as unknown as Record<string, GPUSize64>;
+  for (const [limit, value] of Object.entries(limits)) {
+    if (adapterLimits[limit] !== undefined) {
+      filteredLimits[limit] = value;
+    }
+  }
+  return filteredLimits;
+}
+
+function applyLimitsToDescriptor(
   adapter: GPUAdapter,
-  desc: CanonicalDeviceDescriptor | undefined
+  desc: CanonicalDeviceDescriptor | undefined,
+  getRequiredLimits: (adapter: GPUAdapter) => Record<string, number>
 ) {
   const descWithMaxLimits: CanonicalDeviceDescriptor = {
     requiredFeatures: [],
     defaultQueue: {},
     ...desc,
-    requiredLimits: getAdapterLimitsAsDeviceRequiredLimits(adapter),
+    requiredLimits: removeNonExistantLimits(adapter, getRequiredLimits(adapter)),
   };
   return descWithMaxLimits;
 }
 
 /**
- * Used by MaxLimitsTest to request a device with all the max limits of the adapter.
+ * Used by RequiredLimitsTestMixin to allow you to request specific limits
+ *
+ * Supply a `getRequiredLimits` function that given a GPUAdapter, turns the limits
+ * you want.
+ *
+ * Also supply a key function that returns a device key. You should generally return
+ * the name of each limit you request and any math you did on the limit. For example
+ *
+ * ```js
+ * {
+ *   getRequiredLimits(adapter) {
+ *     return {
+ *       maxBindGroups: adapter.limits.maxBindGroups / 2,
+ *       maxTextureDimensions2D: Math.max(adapter.limits.maxTextureDimensions2D, 8192),
+ *     },
+ *   },
+ *   key() {
+ *     return `
+ *       maxBindGroups / 2,
+ *       max(maxTextureDimension2D, 8192),
+ *     `;
+ *   },
+ * }
+ * ```
+ *
+ * Its important to note, the key is used BEFORE knowing the adapter limits to get a device
+ * that was already created with the same key.
  */
-export class MaxLimitsGPUTestSubcaseBatchState extends GPUTestSubcaseBatchState {
+interface RequiredLimitsHelper {
+  getRequiredLimits: (adapter: GPUAdapter) => Record<string, number>;
+  key(): string;
+}
+
+/**
+ * Used by RequiredLimitsTest to request a device with all requested limits of the adapter.
+ */
+export class RequiredLimitsGPUTestSubcaseBatchState extends GPUTestSubcaseBatchState {
+  private requiredLimitsHelper: RequiredLimitsHelper;
+  constructor(
+    protected override readonly recorder: TestCaseRecorder,
+    public override readonly params: TestParams,
+    requiredLimitsHelper: RequiredLimitsHelper
+  ) {
+    super(recorder, params);
+    this.requiredLimitsHelper = requiredLimitsHelper;
+  }
   override selectDeviceOrSkipTestCase(
     descriptor: DeviceSelectionDescriptor,
     descriptorModifier?: DescriptorModifier
   ): void {
+    const requiredLimitsHelper = this.requiredLimitsHelper;
     const mod: DescriptorModifier = {
       descriptorModifier(adapter: GPUAdapter, desc: CanonicalDeviceDescriptor | undefined) {
         desc = descriptorModifier?.descriptorModifier
           ? descriptorModifier.descriptorModifier(adapter, desc)
           : desc;
-        return setAllLimitsToAdapterLimits(adapter, desc);
+        return applyLimitsToDescriptor(adapter, desc, requiredLimitsHelper.getRequiredLimits);
       },
       keyModifier(baseKey: string) {
-        return `${baseKey}:MaxLimits`;
+        return `${baseKey}:${requiredLimitsHelper.key()}`;
       },
     };
     super.selectDeviceOrSkipTestCase(initUncanonicalizedDeviceDescriptor(descriptor), mod);
   }
 }
 
-export type MaxLimitsTestMixinType = {
+export type RequiredLimitsTestMixinType = {
   // placeholder. Change to an interface if we need MaxLimits specific methods.
 };
 
-export function MaxLimitsTestMixin<F extends FixtureClass<GPUTestBase>>(
-  Base: F
-): FixtureClassWithMixin<F, MaxLimitsTestMixinType> {
-  class MaxLimitsImpl
+/**
+ * A text mixin to make it relatively easy to request specific limits.
+ */
+export function RequiredLimitsTestMixin<F extends FixtureClass<GPUTestBase>>(
+  Base: F,
+  requiredLimitsHelper: RequiredLimitsHelper
+): FixtureClassWithMixin<F, RequiredLimitsTestMixinType> {
+  class RequiredLimitsImpl
     extends (Base as FixtureClassInterface<GPUTestBase>)
-    implements MaxLimitsTestMixinType
+    implements RequiredLimitsTestMixinType
   {
     //
     public static override MakeSharedState(
       recorder: TestCaseRecorder,
       params: TestParams
     ): GPUTestSubcaseBatchState {
-      return new MaxLimitsGPUTestSubcaseBatchState(recorder, params);
+      return new RequiredLimitsGPUTestSubcaseBatchState(recorder, params, requiredLimitsHelper);
     }
   }
 
-  return MaxLimitsImpl as unknown as FixtureClassWithMixin<F, MaxLimitsTestMixinType>;
+  return RequiredLimitsImpl as unknown as FixtureClassWithMixin<F, RequiredLimitsTestMixinType>;
+}
+
+/**
+ * Requests all the max limits from the adapter.
+ */
+export function MaxLimitsTestMixin<F extends FixtureClass<GPUTestBase>>(Base: F) {
+  return RequiredLimitsTestMixin(Base, {
+    getRequiredLimits: getAdapterLimitsAsDeviceRequiredLimits,
+    key() {
+      return 'AllLimits';
+    },
+  });
 }
 
 /**
diff --git a/src/webgpu/listing_meta.json b/src/webgpu/listing_meta.json
index 618753748601..6b8ced65d043 100644
--- a/src/webgpu/listing_meta.json
+++ b/src/webgpu/listing_meta.json
@@ -867,6 +867,8 @@
   "webgpu:api,validation,texture,rg11b10ufloat_renderable:create_texture:*": { "subcaseMS": 12.700 },
   "webgpu:compat,api,validation,createBindGroup:viewDimension_matches_textureBindingViewDimension:*": { "subcaseMS": 6.523 },
   "webgpu:compat,api,validation,createBindGroupLayout:unsupportedStorageTextureFormats:*": { "subcaseMS": 0.601 },
+  "webgpu:compat,api,validation,createBindGroupLayout_limits:maxStorageBuffersTexturesInVertexFragmentStage:*": { "subcaseMS": 21.765 },
+  "webgpu:compat,api,validation,createPipelineLayout:maxStorageBuffersTexturesInVertexFragmentStage:*": { "subcaseMS": 7.776 },
   "webgpu:compat,api,validation,encoding,cmds,copyTextureToBuffer:compressed:*": { "subcaseMS": 202.929 },
   "webgpu:compat,api,validation,encoding,cmds,copyTextureToTexture:compressed:*": { "subcaseMS": 0.600 },
   "webgpu:compat,api,validation,encoding,cmds,copyTextureToTexture:multisample:*": { "subcaseMS": 0.600 },
@@ -874,8 +876,11 @@
   "webgpu:compat,api,validation,encoding,programmable,pipeline_bind_group_compat:twoDifferentTextureViews,compute_pass,used:*": { "subcaseMS": 49.405 },
   "webgpu:compat,api,validation,encoding,programmable,pipeline_bind_group_compat:twoDifferentTextureViews,render_pass,unused:*": { "subcaseMS": 16.002 },
   "webgpu:compat,api,validation,encoding,programmable,pipeline_bind_group_compat:twoDifferentTextureViews,render_pass,used:*": { "subcaseMS": 0.000 },
+  "webgpu:compat,api,validation,pipeline_creation:depth_textures:*": { "subcaseMS": 335.073 },
+  "webgpu:compat,api,validation,pipeline_creation:texture_sampler_combos:*": { "subcaseMS": 2072.005 },
   "webgpu:compat,api,validation,render_pipeline,depth_stencil_state:depthBiasClamp:*": { "subcaseMS": 1.604 },
   "webgpu:compat,api,validation,render_pipeline,fragment_state:colorState:*": { "subcaseMS": 32.604 },
+  "webgpu:compat,api,validation,render_pipeline,in_stage_limits:maxStorageBuffersTexturesInVertexFragmentStage:*": { "subcaseMS": 275.162 },
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:interpolate:*": { "subcaseMS": 3.488 },
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:sample_index:*": { "subcaseMS": 0.487 },
   "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:sample_mask:*": { "subcaseMS": 0.408 },
diff --git a/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts
index f8d9ec9a362b..8ecd864ed6b1 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts
@@ -496,7 +496,7 @@ g.test('fragment,all_active')
 enable subgroups;
 
 @group(0) @binding(0)
-var<storage, read_write> inputs : array<u32>; // unused
+var<uniform> inputs : array<vec4u, 1>; // unused
 
 @fragment
 fn main(
diff --git a/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts
index 7e754152c9b5..1e28e23a692a 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts
@@ -506,7 +506,7 @@ g.test('fragment,all_active')
 enable subgroups;
 
 @group(0) @binding(0)
-var<storage, read_write> inputs : array<u32>; // unused
+var<uniform> inputs : array<vec4u, 1>; // unused
 
 @fragment
 fn main(
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts
index a4f5b04f0529..6763c9de8a98 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts
@@ -504,7 +504,7 @@ g.test('fragment')
 enable subgroups;
 
 @group(0) @binding(0)
-var<storage> inputs : array<u32>;
+var<uniform> inputs : array<vec4u, 1>;
 
 @fragment
 fn main(
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts
index 5b8515c05762..86feaa3b5165 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts
@@ -342,7 +342,7 @@ g.test('fragment,all_active')
 enable subgroups;
 
 @group(0) @binding(0)
-var<storage, read_write> inputs : array<u32>;
+var<uniform> inputs : array<vec4u, ${inputData.length}>;
 
 @fragment
 fn main(
@@ -357,7 +357,7 @@ fn main(
   let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1);
   let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1);
   let in_range = x_in_range && y_in_range;
-  let input = select(1u, inputs[linear], in_range);
+  let input = select(1u, inputs[linear].x, in_range);
 
   let res = select(0u, 1u, subgroupAll(bool(input)));
   return vec2u(res, subgroup_id);
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts
index cad48235eceb..7fbdee0cfd68 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts
@@ -342,7 +342,7 @@ g.test('fragment,all_active')
 enable subgroups;
 
 @group(0) @binding(0)
-var<storage, read_write> inputs : array<u32>;
+var<uniform> inputs : array<vec4u, ${inputData.length}>;
 
 @fragment
 fn main(
@@ -357,7 +357,7 @@ fn main(
   let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1);
   let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1);
   let in_range = x_in_range && y_in_range;
-  let input = select(0u, inputs[linear], in_range);
+  let input = select(0u, inputs[linear].x, in_range);
 
   let res = select(0u, 1u, subgroupAny(bool(input)));
   return vec2u(res, subgroup_id);
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts
index b134e5db633d..14503fdf46b6 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts
@@ -561,7 +561,7 @@ g.test('fragment,all_active')
 enable subgroups;
 
 @group(0) @binding(0)
-var<storage, read_write> inputs : array<u32>;
+var<uniform> inputs : array<vec4u, ${inputData.length}>;
 
 @fragment
 fn main(
@@ -575,7 +575,7 @@ fn main(
   let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1);
   let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1);
   let in_range = x_in_range && y_in_range;
-  let input = select(${ident}, inputs[linear], in_range);
+  let input = select(${ident}, inputs[linear].x, in_range);
 
   let res = ${t.params.op}(input);
   return vec2u(res, subgroup_id);
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts
index 211cf1285340..cabc465af54f 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts
@@ -676,14 +676,16 @@ g.test('fragment')
 
     const broadcast =
       t.params.id === 0
-        ? `subgroupBroadcastFirst(input[linear])`
-        : `subgroupBroadcast(input[linear], ${t.params.id})`;
+        ? `subgroupBroadcastFirst(input[linear].x)`
+        : `subgroupBroadcast(input[linear].x, ${t.params.id})`;
+    const texels = t.params.size[0] * t.params.size[1];
+    const inputData = new Uint32Array([...iterRange(texels, x => x)]);
 
     const fsShader = `
 enable subgroups;
 
 @group(0) @binding(0)
-var<storage> input : array<u32>;
+var<uniform> input : array<vec4u, ${inputData.length}>;
 
 @fragment
 fn main(
@@ -696,8 +698,6 @@ fn main(
   return vec4u(${broadcast}, id, size, linear);
 }`;
 
-    const texels = t.params.size[0] * t.params.size[1];
-    const inputData = new Uint32Array([...iterRange(texels, x => x)]);
     await runFragmentTest(
       t,
       t.params.format,
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupElect.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupElect.spec.ts
index 074d8545dea7..23b86fe2a378 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/subgroupElect.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupElect.spec.ts
@@ -351,7 +351,7 @@ g.test('fragment')
 enable subgroups;
 
 @group(0) @binding(0)
-var<storage, read_write> inputs : array<u32>; // unused
+var<uniform> inputs : array<vec4u, 1>; // unused
 
 @fragment
 fn main(
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupMinMax.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupMinMax.spec.ts
index f070632488c0..d9bb796c4934 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/subgroupMinMax.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupMinMax.spec.ts
@@ -598,7 +598,7 @@ g.test('fragment')
 enable subgroups;
 
 @group(0) @binding(0)
-var<storage, read_write> inputs : array<u32>;
+var<uniform> inputs : array<vec4u, ${inputData.length}>;
 
 @fragment
 fn main(
@@ -612,7 +612,7 @@ fn main(
   let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1);
   let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1);
   let in_range = x_in_range && y_in_range;
-  let input = select(${identity}, inputs[linear], in_range);
+  let input = select(${identity}, inputs[linear].x, in_range);
 
   let res = ${t.params.op}(input);
   return vec2u(res, subgroup_id);
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts
index 2a6a8648a333..210a138b77fc 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts
@@ -528,11 +528,29 @@ g.test('fragment')
     t.skipIf(innerTexels < subgroupMinSize, 'Too few texels to be reliable');
     t.skipIf(subgroupMaxSize === 4 && t.params.quadIndex !== 0, 'Duplicate test');
 
+    // Max possible subgroup size is 128 which is too large so we reduce the
+    // multiplication by a factor of 4. We populate one element of each quad with a
+    // non-identity value. subgroupMaxSize of 4 is a special case where all
+    // elements are populated.
+    const numInputs = t.params.size[0] * t.params.size[1];
+    const inputData = new Uint32Array([
+      ...iterRange(numInputs, x => {
+        if (subgroupMaxSize === 4) {
+          return 2;
+        } else {
+          const row = Math.floor(x / t.params.size[0]);
+          const col = x % t.params.size[0];
+          const idx = (col % 2) + 2 * (row % 2);
+          return idx === t.params.quadIndex ? 2 : kIdentity;
+        }
+      }),
+    ]);
+
     const fsShader = `
 enable subgroups;
 
 @group(0) @binding(0)
-var<storage> inputs : array<u32>;
+var<uniform> inputs : array<vec4u, ${inputData.length}>;
 
 @fragment
 fn main(
@@ -547,28 +565,10 @@ fn main(
   let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1);
   let in_range = x_in_range && y_in_range;
 
-  let value = select(${kIdentity}, inputs[linear], in_range);
+  let value = select(${kIdentity}, inputs[linear].x, in_range);
   return vec4u(${t.params.op}(value), id, subgroup_id, 0);
 };`;
 
-    // Max possible subgroup size is 128 which is too large so we reduce the
-    // multiplication by a factor of 4. We populate one element of each quad with a
-    // non-identity value. subgroupMaxSize of 4 is a special case where all
-    // elements are populated.
-    const numInputs = t.params.size[0] * t.params.size[1];
-    const inputData = new Uint32Array([
-      ...iterRange(numInputs, x => {
-        if (subgroupMaxSize === 4) {
-          return 2;
-        } else {
-          const row = Math.floor(x / t.params.size[0]);
-          const col = x % t.params.size[0];
-          const idx = (col % 2) + 2 * (row % 2);
-          return idx === t.params.quadIndex ? 2 : kIdentity;
-        }
-      }),
-    ]);
-
     await runFragmentTest(
       t,
       t.params.format,
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupShuffle.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupShuffle.spec.ts
index 73e1fe6bcaa7..60c95edbc334 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/subgroupShuffle.spec.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupShuffle.spec.ts
@@ -893,7 +893,7 @@ g.test('fragment')
 enable subgroups;
 
 @group(0) @binding(0)
-var<storage, read_write> inputs : array<u32>; // unused
+var<uniform> inputs : array<vec4u, 1>; // unused
 
 @fragment
 fn main(
diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts
index 71852c6a315c..3566490d7ca3 100644
--- a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts
+++ b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts
@@ -521,9 +521,19 @@ fn vsMain(@builtin(vertex_index) index : u32) -> @builtin(position) vec4f {
   const byteLength = bytesPerRow * blocksPerColumn;
   const uintLength = byteLength / 4;
 
+  const expandedInputData = new (
+    inputData instanceof Uint32Array
+      ? Uint32Array
+      : inputData instanceof Float32Array
+      ? Float32Array
+      : Float16Array
+  )(inputData.length * 4);
+  for (let i = 0; i < inputData.length; ++i) {
+    expandedInputData[i * 4] = inputData[i];
+  }
   const buffer = t.makeBufferWithContents(
-    inputData,
-    GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
+    expandedInputData,
+    GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
   );
 
   const bg = t.device.createBindGroup({