From 4affbefcfc2c063c7ca71b4b0b18aa23dcfb937f Mon Sep 17 00:00:00 2001 From: Greggman Date: Thu, 5 Dec 2024 02:43:12 -0800 Subject: [PATCH 1/5] Fix texture builtin weight reading (#4078) The code was using the wrong size for compute stages. --- .../shader/execution/expression/call/builtin/texture_utils.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts b/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts index 2e4123c6cfeb..ba8a58ae861c 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts @@ -522,7 +522,7 @@ export async function queryMipLevelMixWeightsForDevice(t: GPUTest, stage: Shader pass.setBindGroup(0, createBindGroup(pipeline)); pass.dispatchWorkgroups(kMipLevelWeightSteps + 1); pass.end(); - encoder.copyBufferToBuffer(storageBuffer, 0, resultBuffer, 0, resultBuffer.size); + encoder.copyBufferToBuffer(storageBuffer, 0, resultBuffer, 0, storageBuffer.size); break; } case 'fragment': { From 81c809751682a4bf9d1252d6cb8ee51a14cc8174 Mon Sep 17 00:00:00 2001 From: David Neto Date: Thu, 5 Dec 2024 10:32:03 -0500 Subject: [PATCH 2/5] Use subgroupMinSize, subgroupMaxSize from GPUAdapterInfo (#4058) --- .../call/builtin/subgroupBitwise.spec.ts | 8 +++---- .../call/builtin/subgroupBroadcast.spec.ts | 18 +++++++------- .../shader_io/compute_builtins.spec.ts | 12 +++++----- .../shader_io/fragment_builtins.spec.ts | 24 +++++++++---------- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts index a8f423e1f729..b134e5db633d 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts @@ -547,12 +547,12 @@ g.test('fragment,all_active') .fn(async t => { const numInputs = t.params.size[0] * t.params.size[1]; - interface SubgroupLimits extends GPUSupportedLimits { - minSubgroupSize: number; + interface SubgroupProperties extends GPUAdapterInfo { + subgroupMinSize: number; } - const { minSubgroupSize } = t.device.limits as SubgroupLimits; + const { subgroupMinSize } = t.device.adapterInfo as SubgroupProperties; const innerTexels = (t.params.size[0] - 1) * (t.params.size[1] - 1); - t.skipIf(innerTexels < minSubgroupSize, 'Too few texels to be reliable'); + t.skipIf(innerTexels < subgroupMinSize, 'Too few texels to be reliable'); const inputData = generateInputData(t.params.case, numInputs, identity(t.params.op)); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts index 5c9650778a68..50dac8c4d894 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts @@ -444,12 +444,12 @@ g.test('compute,split') const testcase = kPredicateCases[t.params.predicate]; const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; - interface SubgroupLimits extends GPUSupportedLimits { - minSubgroupSize: number; - maxSubgroupSize: number; + interface SubgroupProperties extends GPUAdapterInfo { + subgroupMinSize: number; + subgroupMaxSize: number; } - const { minSubgroupSize, maxSubgroupSize } = t.device.limits as SubgroupLimits; - for (let size = minSubgroupSize; size <= maxSubgroupSize; size *= 2) { + const { subgroupMinSize, subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; + for (let size = subgroupMinSize; size <= subgroupMaxSize; size *= 2) { t.skipIf(!testcase.filter(t.params.id, size), 'Skipping potential undefined behavior'); } @@ -669,11 +669,11 @@ g.test('fragment') }) .fn(async t => { const innerTexels = (t.params.size[0] - 1) * (t.params.size[1] - 1); - interface SubgroupLimits extends GPUSupportedLimits { - maxSubgroupSize: number; + interface SubgroupProperties extends GPUAdapterInfo { + subgroupMaxSize: number; } - const { maxSubgroupSize } = t.device.limits as SubgroupLimits; - t.skipIf(innerTexels < maxSubgroupSize, 'Too few texels to be reliable'); + const { subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; + t.skipIf(innerTexels < subgroupMaxSize, 'Too few texels to be reliable'); const broadcast = t.params.id === 0 diff --git a/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts index a08d6eb39533..f264052f8cb5 100644 --- a/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts +++ b/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts @@ -398,11 +398,11 @@ g.test('subgroup_size') t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); }) .fn(async t => { - interface SubgroupLimits extends GPUSupportedLimits { - minSubgroupSize: number; - maxSubgroupSize: number; + interface SubgroupProperties extends GPUAdapterInfo { + subgroupMinSize: number; + subgroupMaxSize: number; } - const { minSubgroupSize, maxSubgroupSize } = t.device.limits as SubgroupLimits; + const { subgroupMinSize, subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; const wgx = t.params.sizes[0]; const wgy = t.params.sizes[1]; @@ -518,8 +518,8 @@ fn main(@builtin(subgroup_size) size : u32, checkSubgroupSizeConsistency( sizesData, compareData, - minSubgroupSize, - maxSubgroupSize, + subgroupMinSize, + subgroupMaxSize, wgThreads ) ); diff --git a/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts index bb7f3b113e26..5d10a3dafe2b 100644 --- a/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts +++ b/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts @@ -1655,16 +1655,16 @@ g.test('subgroup_size') t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); }) .fn(async t => { - interface SubgroupLimits extends GPUSupportedLimits { - minSubgroupSize: number; - maxSubgroupSize: number; + interface SubgroupProperties extends GPUAdapterInfo { + subgroupMinSize: number; + subgroupMaxSize: number; } - const { minSubgroupSize, maxSubgroupSize } = t.device.limits as SubgroupLimits; + const { subgroupMinSize, subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; const fsShader = ` enable subgroups; -const maxSubgroupSize = ${kMaximiumSubgroupSize}u; +const subgroupMaxSize = ${kMaximiumSubgroupSize}u; const noError = ${kSubgroupShaderNoError}u; const width = ${t.params.size[0]}; @@ -1686,7 +1686,7 @@ fn fsMain( var subgroupSizeBallotedInvocations: u32 = 0u; var ballotedSubgroupSize: u32 = 0u; - for (var i: u32 = 0; i <= maxSubgroupSize; i++) { + for (var i: u32 = 0; i <= subgroupMaxSize; i++) { let ballotSubgroupSizeEqualI = countOneBits(subgroupBallot(sg_size == i)); let countSubgroupSizeEqualI = ballotSubgroupSizeEqualI.x + ballotSubgroupSizeEqualI.y + ballotSubgroupSizeEqualI.z + ballotSubgroupSizeEqualI.w; subgroupSizeBallotedInvocations += countSubgroupSizeEqualI; @@ -1716,8 +1716,8 @@ fn fsMain( return checkSubgroupSizeConsistency( data, t.params.format, - minSubgroupSize, - maxSubgroupSize, + subgroupMinSize, + subgroupMaxSize, t.params.size[0], t.params.size[1] ); @@ -1816,7 +1816,7 @@ enable subgroups; const width = ${t.params.size[0]}; const height = ${t.params.size[1]}; -const maxSubgroupSize = ${kMaximiumSubgroupSize}u; +const subgroupMaxSize = ${kMaximiumSubgroupSize}u; // A non-zero magic number indicating no expectation error, in order to prevent the // false no-error result from zero-initialization. const noError = ${kSubgroupShaderNoError}u; @@ -1830,8 +1830,8 @@ fn fsMain( var error: u32 = noError; - // Validate that reported subgroup size is no larger than maxSubgroupSize - if (sg_size > maxSubgroupSize) { + // Validate that reported subgroup size is no larger than subgroupMaxSize + if (sg_size > subgroupMaxSize) { error++; } @@ -1843,7 +1843,7 @@ fn fsMain( // Validate that each subgroup id is assigned to at most one active invocation // in the subgroup var countAssignedId: u32 = 0u; - for (var i: u32 = 0; i < maxSubgroupSize; i++) { + for (var i: u32 = 0; i < subgroupMaxSize; i++) { let ballotIdEqualsI = countOneBits(subgroupBallot(id == i)); let countInvocationIdEqualsI = ballotIdEqualsI.x + ballotIdEqualsI.y + ballotIdEqualsI.z + ballotIdEqualsI.w; // Validate an id assigned at most once From ed1b78a79ad5220701330e7311b528f174097dfe Mon Sep 17 00:00:00 2001 From: David Neto Date: Thu, 5 Dec 2024 11:32:43 -0500 Subject: [PATCH 3/5] Revert "Use subgroupMinSize, subgroupMaxSize from GPUAdapterInfo" (#4079) This reverts commit aa0b241a871ba24c35d38e0df97705e231298280. --- .../call/builtin/subgroupBitwise.spec.ts | 8 +++---- .../call/builtin/subgroupBroadcast.spec.ts | 18 +++++++------- .../shader_io/compute_builtins.spec.ts | 12 +++++----- .../shader_io/fragment_builtins.spec.ts | 24 +++++++++---------- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts index b134e5db633d..a8f423e1f729 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts @@ -547,12 +547,12 @@ g.test('fragment,all_active') .fn(async t => { const numInputs = t.params.size[0] * t.params.size[1]; - interface SubgroupProperties extends GPUAdapterInfo { - subgroupMinSize: number; + interface SubgroupLimits extends GPUSupportedLimits { + minSubgroupSize: number; } - const { subgroupMinSize } = t.device.adapterInfo as SubgroupProperties; + const { minSubgroupSize } = t.device.limits as SubgroupLimits; const innerTexels = (t.params.size[0] - 1) * (t.params.size[1] - 1); - t.skipIf(innerTexels < subgroupMinSize, 'Too few texels to be reliable'); + t.skipIf(innerTexels < minSubgroupSize, 'Too few texels to be reliable'); const inputData = generateInputData(t.params.case, numInputs, identity(t.params.op)); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts index 50dac8c4d894..5c9650778a68 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts @@ -444,12 +444,12 @@ g.test('compute,split') const testcase = kPredicateCases[t.params.predicate]; const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; - interface SubgroupProperties extends GPUAdapterInfo { - subgroupMinSize: number; - subgroupMaxSize: number; + interface SubgroupLimits extends GPUSupportedLimits { + minSubgroupSize: number; + maxSubgroupSize: number; } - const { subgroupMinSize, subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; - for (let size = subgroupMinSize; size <= subgroupMaxSize; size *= 2) { + const { minSubgroupSize, maxSubgroupSize } = t.device.limits as SubgroupLimits; + for (let size = minSubgroupSize; size <= maxSubgroupSize; size *= 2) { t.skipIf(!testcase.filter(t.params.id, size), 'Skipping potential undefined behavior'); } @@ -669,11 +669,11 @@ g.test('fragment') }) .fn(async t => { const innerTexels = (t.params.size[0] - 1) * (t.params.size[1] - 1); - interface SubgroupProperties extends GPUAdapterInfo { - subgroupMaxSize: number; + interface SubgroupLimits extends GPUSupportedLimits { + maxSubgroupSize: number; } - const { subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; - t.skipIf(innerTexels < subgroupMaxSize, 'Too few texels to be reliable'); + const { maxSubgroupSize } = t.device.limits as SubgroupLimits; + t.skipIf(innerTexels < maxSubgroupSize, 'Too few texels to be reliable'); const broadcast = t.params.id === 0 diff --git a/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts index f264052f8cb5..a08d6eb39533 100644 --- a/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts +++ b/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts @@ -398,11 +398,11 @@ g.test('subgroup_size') t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); }) .fn(async t => { - interface SubgroupProperties extends GPUAdapterInfo { - subgroupMinSize: number; - subgroupMaxSize: number; + interface SubgroupLimits extends GPUSupportedLimits { + minSubgroupSize: number; + maxSubgroupSize: number; } - const { subgroupMinSize, subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; + const { minSubgroupSize, maxSubgroupSize } = t.device.limits as SubgroupLimits; const wgx = t.params.sizes[0]; const wgy = t.params.sizes[1]; @@ -518,8 +518,8 @@ fn main(@builtin(subgroup_size) size : u32, checkSubgroupSizeConsistency( sizesData, compareData, - subgroupMinSize, - subgroupMaxSize, + minSubgroupSize, + maxSubgroupSize, wgThreads ) ); diff --git a/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts index 5d10a3dafe2b..bb7f3b113e26 100644 --- a/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts +++ b/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts @@ -1655,16 +1655,16 @@ g.test('subgroup_size') t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); }) .fn(async t => { - interface SubgroupProperties extends GPUAdapterInfo { - subgroupMinSize: number; - subgroupMaxSize: number; + interface SubgroupLimits extends GPUSupportedLimits { + minSubgroupSize: number; + maxSubgroupSize: number; } - const { subgroupMinSize, subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; + const { minSubgroupSize, maxSubgroupSize } = t.device.limits as SubgroupLimits; const fsShader = ` enable subgroups; -const subgroupMaxSize = ${kMaximiumSubgroupSize}u; +const maxSubgroupSize = ${kMaximiumSubgroupSize}u; const noError = ${kSubgroupShaderNoError}u; const width = ${t.params.size[0]}; @@ -1686,7 +1686,7 @@ fn fsMain( var subgroupSizeBallotedInvocations: u32 = 0u; var ballotedSubgroupSize: u32 = 0u; - for (var i: u32 = 0; i <= subgroupMaxSize; i++) { + for (var i: u32 = 0; i <= maxSubgroupSize; i++) { let ballotSubgroupSizeEqualI = countOneBits(subgroupBallot(sg_size == i)); let countSubgroupSizeEqualI = ballotSubgroupSizeEqualI.x + ballotSubgroupSizeEqualI.y + ballotSubgroupSizeEqualI.z + ballotSubgroupSizeEqualI.w; subgroupSizeBallotedInvocations += countSubgroupSizeEqualI; @@ -1716,8 +1716,8 @@ fn fsMain( return checkSubgroupSizeConsistency( data, t.params.format, - subgroupMinSize, - subgroupMaxSize, + minSubgroupSize, + maxSubgroupSize, t.params.size[0], t.params.size[1] ); @@ -1816,7 +1816,7 @@ enable subgroups; const width = ${t.params.size[0]}; const height = ${t.params.size[1]}; -const subgroupMaxSize = ${kMaximiumSubgroupSize}u; +const maxSubgroupSize = ${kMaximiumSubgroupSize}u; // A non-zero magic number indicating no expectation error, in order to prevent the // false no-error result from zero-initialization. const noError = ${kSubgroupShaderNoError}u; @@ -1830,8 +1830,8 @@ fn fsMain( var error: u32 = noError; - // Validate that reported subgroup size is no larger than subgroupMaxSize - if (sg_size > subgroupMaxSize) { + // Validate that reported subgroup size is no larger than maxSubgroupSize + if (sg_size > maxSubgroupSize) { error++; } @@ -1843,7 +1843,7 @@ fn fsMain( // Validate that each subgroup id is assigned to at most one active invocation // in the subgroup var countAssignedId: u32 = 0u; - for (var i: u32 = 0; i < subgroupMaxSize; i++) { + for (var i: u32 = 0; i < maxSubgroupSize; i++) { let ballotIdEqualsI = countOneBits(subgroupBallot(id == i)); let countInvocationIdEqualsI = ballotIdEqualsI.x + ballotIdEqualsI.y + ballotIdEqualsI.z + ballotIdEqualsI.w; // Validate an id assigned at most once From b05322e2845dd138b8b395d75bbd4a0371ec0799 Mon Sep 17 00:00:00 2001 From: petermcneeleychromium <96925679+petermcneeleychromium@users.noreply.github.com> Date: Thu, 5 Dec 2024 13:53:17 -0500 Subject: [PATCH 4/5] Move towards deprecating subgroups_f16 enable (feature (#4072) * Initial commit * Use always compiles * Remove subgroup f16 requirement check * Update listing file --------- Co-authored-by: Peter McNeeley --- src/webgpu/listing_meta.json | 6 ----- .../call/builtin/quadBroadcast.spec.ts | 22 ------------------- .../expression/call/builtin/quadSwap.spec.ts | 22 ------------------- .../call/builtin/subgroupBroadcast.spec.ts | 22 ------------------- .../builtin/subgroupBroadcastFirst.spec.ts | 22 ------------------- .../call/builtin/subgroupMinMax.spec.ts | 22 ------------------- .../call/builtin/subgroupShuffle.spec.ts | 22 ------------------- 7 files changed, 138 deletions(-) diff --git a/src/webgpu/listing_meta.json b/src/webgpu/listing_meta.json index 6b3e8a361e3d..ce8fb3a2bcb4 100644 --- a/src/webgpu/listing_meta.json +++ b/src/webgpu/listing_meta.json @@ -2284,14 +2284,12 @@ "webgpu:shader,validation,expression,call,builtin,quadBroadcast:id_values:*": { "subcaseMS": 7.315 }, "webgpu:shader,validation,expression,call,builtin,quadBroadcast:must_use:*": { "subcaseMS": 41.658 }, "webgpu:shader,validation,expression,call,builtin,quadBroadcast:requires_subgroups:*": { "subcaseMS": 42.565 }, - "webgpu:shader,validation,expression,call,builtin,quadBroadcast:requires_subgroups_f16:*": { "subcaseMS": 44.998 }, "webgpu:shader,validation,expression,call,builtin,quadBroadcast:return_type:*": { "subcaseMS": 363.607 }, "webgpu:shader,validation,expression,call,builtin,quadBroadcast:stage:*": { "subcaseMS": 3.050 }, "webgpu:shader,validation,expression,call,builtin,quadSwap:data_type:*": { "subcaseMS": 89.379 }, "webgpu:shader,validation,expression,call,builtin,quadSwap:early_eval:*": { "subcaseMS": 108.243 }, "webgpu:shader,validation,expression,call,builtin,quadSwap:must_use:*": { "subcaseMS": 5.557 }, "webgpu:shader,validation,expression,call,builtin,quadSwap:requires_subgroups:*": { "subcaseMS": 113.624 }, - "webgpu:shader,validation,expression,call,builtin,quadSwap:requires_subgroups_f16:*": { "subcaseMS": 12.712 }, "webgpu:shader,validation,expression,call,builtin,quadSwap:return_type:*": { "subcaseMS": 1424.551 }, "webgpu:shader,validation,expression,call,builtin,quadSwap:stage:*": { "subcaseMS": 7.664 }, "webgpu:shader,validation,expression,call,builtin,quantizeToF16:args:*": { "subcaseMS": 1.000 }, @@ -2377,14 +2375,12 @@ "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:id_values:*": { "subcaseMS": 7.763 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:must_use:*": { "subcaseMS": 232.030 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:requires_subgroups:*": { "subcaseMS": 47.231 }, - "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:requires_subgroups_f16:*": { "subcaseMS": 38.503 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:return_type:*": { "subcaseMS": 496.031 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:stage:*": { "subcaseMS": 3.715 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:data_type:*": { "subcaseMS": 32.168 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:early_eval:*": { "subcaseMS": 57.922 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:must_use:*": { "subcaseMS": 36.296 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:requires_subgroups:*": { "subcaseMS": 42.522 }, - "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:requires_subgroups_f16:*": { "subcaseMS": 47.111 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:return_type:*": { "subcaseMS": 402.558 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:stage:*": { "subcaseMS": 2.869 }, "webgpu:shader,validation,expression,call,builtin,subgroupElect:data_type:*": { "subcaseMS": 72.441 }, @@ -2397,7 +2393,6 @@ "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:early_eval:*": { "subcaseMS": 551.671 }, "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:must_use:*": { "subcaseMS": 4.403 }, "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:requires_subgroups:*": { "subcaseMS": 87.208 }, - "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:requires_subgroups_f16:*": { "subcaseMS": 25.190 }, "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:return_type:*": { "subcaseMS": 911.454 }, "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:stage:*": { "subcaseMS": 6.395 }, "webgpu:shader,validation,expression,call,builtin,subgroupMul:data_type:*": { "subcaseMS": 45.396 }, @@ -2412,7 +2407,6 @@ "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:param2_early_eval:*": { "subcaseMS": 133.389 }, "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:param2_type:*": { "subcaseMS": 88.305 }, "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:requires_subgroups:*": { "subcaseMS": 102.779 }, - "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:requires_subgroups_f16:*": { "subcaseMS": 13.121 }, "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:return_type:*": { "subcaseMS": 1930.309 }, "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:stage:*": { "subcaseMS": 9.527 }, "webgpu:shader,validation,expression,call,builtin,tan:args:*": { "subcaseMS": 43.560 }, diff --git a/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts index f96d46aad7d7..80b1c2a38a69 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts @@ -30,28 +30,6 @@ fn foo() { t.expectCompileResult(t.params.enable, wgsl); }); -g.test('requires_subgroups_f16') - .desc('Validates that the subgroups feature is required') - .params(u => u.combine('enable', [false, true] as const)) - .beforeAllSubcases(t => { - const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; - if (t.params.enable) { - features.push('subgroups-f16' as GPUFeatureName); - } - t.selectDeviceOrSkipTestCase(features); - }) - .fn(t => { - const wgsl = ` -enable f16; -enable subgroups; -${t.params.enable ? 'enable subgroups_f16;' : ''} -fn foo() { - _ = quadBroadcast(0h, 0); -}`; - - t.expectCompileResult(t.params.enable, wgsl); - }); - const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); const kStages: Record = { diff --git a/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts index 3812ba057ed6..af5c397c52ae 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts @@ -32,28 +32,6 @@ fn foo() { t.expectCompileResult(t.params.enable, wgsl); }); -g.test('requires_subgroups_f16') - .desc('Validates that the subgroups feature is required') - .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) - .beforeAllSubcases(t => { - const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; - if (t.params.enable) { - features.push('subgroups-f16' as GPUFeatureName); - } - t.selectDeviceOrSkipTestCase(features); - }) - .fn(t => { - const wgsl = ` -enable f16; -enable subgroups; -${t.params.enable ? 'enable subgroups_f16;' : ''} -fn foo() { - _ = ${t.params.op}(0h); -}`; - - t.expectCompileResult(t.params.enable, wgsl); - }); - const kStages: Record string> = { constant: (op: string) => { return ` diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts index 33e5fd8622af..5feef1d2811b 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts @@ -30,28 +30,6 @@ fn foo() { t.expectCompileResult(t.params.enable, wgsl); }); -g.test('requires_subgroups_f16') - .desc('Validates that the subgroups feature is required') - .params(u => u.combine('enable', [false, true] as const)) - .beforeAllSubcases(t => { - const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; - if (t.params.enable) { - features.push('subgroups-f16' as GPUFeatureName); - } - t.selectDeviceOrSkipTestCase(features); - }) - .fn(t => { - const wgsl = ` -enable f16; -enable subgroups; -${t.params.enable ? 'enable subgroups_f16;' : ''} -fn foo() { - _ = subgroupBroadcast(0h, 0); -}`; - - t.expectCompileResult(t.params.enable, wgsl); - }); - const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); const kStages: Record = { diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts index 4525b6b97ef8..6f35c3350e04 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts @@ -25,28 +25,6 @@ fn foo() { t.expectCompileResult(t.params.enable, wgsl); }); -g.test('requires_subgroups_f16') - .desc('Validates that the subgroups feature is required') - .params(u => u.combine('enable', [false, true] as const)) - .beforeAllSubcases(t => { - const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; - if (t.params.enable) { - features.push('subgroups-f16' as GPUFeatureName); - } - t.selectDeviceOrSkipTestCase(features); - }) - .fn(t => { - const wgsl = ` -enable f16; -enable subgroups; -${t.params.enable ? 'enable subgroups_f16;' : ''} -fn foo() { - _ = subgroupBroadcastFirst(0h); -}`; - - t.expectCompileResult(t.params.enable, wgsl); - }); - const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); const kStages: Record = { diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts index 84c1860019ee..566fdc665ecc 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts @@ -32,28 +32,6 @@ fn foo() { t.expectCompileResult(t.params.enable, wgsl); }); -g.test('requires_subgroups_f16') - .desc('Validates that the subgroups feature is required') - .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) - .beforeAllSubcases(t => { - const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; - if (t.params.enable) { - features.push('subgroups-f16' as GPUFeatureName); - } - t.selectDeviceOrSkipTestCase(features); - }) - .fn(t => { - const wgsl = ` -enable f16; -enable subgroups; -${t.params.enable ? 'enable subgroups_f16;' : ''} -fn foo() { - _ = ${t.params.op}(0h); -}`; - - t.expectCompileResult(t.params.enable, wgsl); - }); - const kStages: Record string> = { constant: (op: string) => { return ` diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts index b46f90c545b4..4f14503d579d 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts @@ -37,28 +37,6 @@ fn foo() { t.expectCompileResult(t.params.enable, wgsl); }); -g.test('requires_subgroups_f16') - .desc('Validates that the subgroups feature is required') - .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) - .beforeAllSubcases(t => { - const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; - if (t.params.enable) { - features.push('subgroups-f16' as GPUFeatureName); - } - t.selectDeviceOrSkipTestCase(features); - }) - .fn(t => { - const wgsl = ` -enable f16; -enable subgroups; -${t.params.enable ? 'enable subgroups_f16;' : ''} -fn foo() { - _ = ${t.params.op}(0h, 0); -}`; - - t.expectCompileResult(t.params.enable, wgsl); - }); - const kStages: Record string> = { constant: (op: string) => { return ` From 08731e9049bd8c70662a04778821c50a9b301b4e Mon Sep 17 00:00:00 2001 From: David Neto Date: Thu, 5 Dec 2024 15:35:55 -0500 Subject: [PATCH 5/5] Reapply "Use subgroupMinSize, subgroupMaxSize from GPUAdapterInfo" (#4079) (#4080) This reverts commit ed1b78a79ad5220701330e7311b528f174097dfe. Also: Test adapter.info.subgroupMinSize, subgroupMaxSize --- src/webgpu/api/operation/adapter/info.spec.ts | 51 +++++++++++++++++++ src/webgpu/listing_meta.json | 3 ++ .../call/builtin/subgroupBitwise.spec.ts | 8 +-- .../call/builtin/subgroupBroadcast.spec.ts | 18 +++---- .../shader_io/compute_builtins.spec.ts | 12 ++--- .../shader_io/fragment_builtins.spec.ts | 24 ++++----- 6 files changed, 85 insertions(+), 31 deletions(-) diff --git a/src/webgpu/api/operation/adapter/info.spec.ts b/src/webgpu/api/operation/adapter/info.spec.ts index 4fef9a56ddf7..0d9e120015eb 100644 --- a/src/webgpu/api/operation/adapter/info.spec.ts +++ b/src/webgpu/api/operation/adapter/info.spec.ts @@ -7,6 +7,7 @@ import { makeTestGroup } from '../../../../common/framework/test_group.js'; import { keysOf } from '../../../../common/util/data_tables.js'; import { getGPU } from '../../../../common/util/navigator_gpu.js'; import { assert, objectEquals } from '../../../../common/util/util.js'; +import { isPowerOfTwo } from '../../../util/math.js'; export const g = makeTestGroup(Fixture); @@ -136,3 +137,53 @@ different orders to make sure that they are consistent regardless of the access t.expect(objectEquals(deviceInfo, adapterInfo)); } }); + +// This can be removed once 'subgroups' lands. +// See https://github.com/gpuweb/gpuweb/pull/4963 +interface SubgroupProperties extends GPUAdapterInfo { + subgroupMinSize?: number; + subgroupMaxSize?: number; +} + +const kSubgroupMinSizeBound = 4; +const kSubgroupMaxSizeBound = 128; + +g.test('subgroup_sizes') + .desc( + ` +Verify GPUAdapterInfo.subgroupMinSize, GPUAdapterInfo.subgroupMaxSize. +If the subgroups feature is supported, they must both exist. +If they exist, they must both exist and be powers of two, and +4 <= subgroupMinSize <= subgroupMaxSize <= 128. +` + ) + .fn(async t => { + const gpu = getGPU(t.rec); + const adapter = await gpu.requestAdapter(); + assert(adapter !== null); + const { subgroupMinSize, subgroupMaxSize } = adapter.info as SubgroupProperties; + // Once 'subgroups' lands, the properties should be defined with default values 4 and 128 + // when adapter does not support the feature. + // https://github.com/gpuweb/gpuweb/pull/4963 + if (adapter.features.has('subgroups')) { + t.expect( + subgroupMinSize !== undefined, + 'GPUAdapterInfo.subgroupMinSize must exist when subgroups supported' + ); + t.expect( + subgroupMaxSize !== undefined, + 'GPUAdapterInfo.subgroupMaxSize must exist when subgroups supported' + ); + } + t.expect( + (subgroupMinSize === undefined) === (subgroupMinSize === undefined), + 'GPUAdapterInfo.subgropuMinSize and GPUAdapterInfo.subgroupMaxSize must both be defined, or neither should be' + ); + if (subgroupMinSize !== undefined && subgroupMaxSize !== undefined) { + t.expect(isPowerOfTwo(subgroupMinSize)); + t.expect(isPowerOfTwo(subgroupMaxSize)); + t.expect(kSubgroupMinSizeBound <= subgroupMinSize); + t.expect(subgroupMinSize <= subgroupMaxSize); + t.expect(subgroupMaxSize <= kSubgroupMaxSizeBound); + } + }); diff --git a/src/webgpu/listing_meta.json b/src/webgpu/listing_meta.json index ce8fb3a2bcb4..b764febcc02e 100644 --- a/src/webgpu/listing_meta.json +++ b/src/webgpu/listing_meta.json @@ -1,6 +1,9 @@ { "_comment": "SEMI AUTO-GENERATED. This list is NOT exhaustive. Please read docs/adding_timing_metadata.md.", "webgpu:api,operation,adapter,info:adapter_info:*": { "subcaseMS": 32.901 }, + "webgpu:api,operation,adapter,info:device_matches_adapter:*": { "subcaseMS": 14.708 }, + "webgpu:api,operation,adapter,info:same_object:*": { "subcaseMS": 25.153 }, + "webgpu:api,operation,adapter,info:subgroup_sizes:*": { "subcaseMS": 18.831 }, "webgpu:api,operation,adapter,requestAdapter:requestAdapter:*": { "subcaseMS": 152.083 }, "webgpu:api,operation,adapter,requestAdapter:requestAdapter_no_parameters:*": { "subcaseMS": 384.601 }, "webgpu:api,operation,adapter,requestDevice:always_returns_device:*": { "subcaseMS": 19.450 }, diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts index a8f423e1f729..b134e5db633d 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts @@ -547,12 +547,12 @@ g.test('fragment,all_active') .fn(async t => { const numInputs = t.params.size[0] * t.params.size[1]; - interface SubgroupLimits extends GPUSupportedLimits { - minSubgroupSize: number; + interface SubgroupProperties extends GPUAdapterInfo { + subgroupMinSize: number; } - const { minSubgroupSize } = t.device.limits as SubgroupLimits; + const { subgroupMinSize } = t.device.adapterInfo as SubgroupProperties; const innerTexels = (t.params.size[0] - 1) * (t.params.size[1] - 1); - t.skipIf(innerTexels < minSubgroupSize, 'Too few texels to be reliable'); + t.skipIf(innerTexels < subgroupMinSize, 'Too few texels to be reliable'); const inputData = generateInputData(t.params.case, numInputs, identity(t.params.op)); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts index 5c9650778a68..50dac8c4d894 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts @@ -444,12 +444,12 @@ g.test('compute,split') const testcase = kPredicateCases[t.params.predicate]; const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; - interface SubgroupLimits extends GPUSupportedLimits { - minSubgroupSize: number; - maxSubgroupSize: number; + interface SubgroupProperties extends GPUAdapterInfo { + subgroupMinSize: number; + subgroupMaxSize: number; } - const { minSubgroupSize, maxSubgroupSize } = t.device.limits as SubgroupLimits; - for (let size = minSubgroupSize; size <= maxSubgroupSize; size *= 2) { + const { subgroupMinSize, subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; + for (let size = subgroupMinSize; size <= subgroupMaxSize; size *= 2) { t.skipIf(!testcase.filter(t.params.id, size), 'Skipping potential undefined behavior'); } @@ -669,11 +669,11 @@ g.test('fragment') }) .fn(async t => { const innerTexels = (t.params.size[0] - 1) * (t.params.size[1] - 1); - interface SubgroupLimits extends GPUSupportedLimits { - maxSubgroupSize: number; + interface SubgroupProperties extends GPUAdapterInfo { + subgroupMaxSize: number; } - const { maxSubgroupSize } = t.device.limits as SubgroupLimits; - t.skipIf(innerTexels < maxSubgroupSize, 'Too few texels to be reliable'); + const { subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; + t.skipIf(innerTexels < subgroupMaxSize, 'Too few texels to be reliable'); const broadcast = t.params.id === 0 diff --git a/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts index a08d6eb39533..f264052f8cb5 100644 --- a/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts +++ b/src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts @@ -398,11 +398,11 @@ g.test('subgroup_size') t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); }) .fn(async t => { - interface SubgroupLimits extends GPUSupportedLimits { - minSubgroupSize: number; - maxSubgroupSize: number; + interface SubgroupProperties extends GPUAdapterInfo { + subgroupMinSize: number; + subgroupMaxSize: number; } - const { minSubgroupSize, maxSubgroupSize } = t.device.limits as SubgroupLimits; + const { subgroupMinSize, subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; const wgx = t.params.sizes[0]; const wgy = t.params.sizes[1]; @@ -518,8 +518,8 @@ fn main(@builtin(subgroup_size) size : u32, checkSubgroupSizeConsistency( sizesData, compareData, - minSubgroupSize, - maxSubgroupSize, + subgroupMinSize, + subgroupMaxSize, wgThreads ) ); diff --git a/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts index bb7f3b113e26..5d10a3dafe2b 100644 --- a/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts +++ b/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts @@ -1655,16 +1655,16 @@ g.test('subgroup_size') t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); }) .fn(async t => { - interface SubgroupLimits extends GPUSupportedLimits { - minSubgroupSize: number; - maxSubgroupSize: number; + interface SubgroupProperties extends GPUAdapterInfo { + subgroupMinSize: number; + subgroupMaxSize: number; } - const { minSubgroupSize, maxSubgroupSize } = t.device.limits as SubgroupLimits; + const { subgroupMinSize, subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties; const fsShader = ` enable subgroups; -const maxSubgroupSize = ${kMaximiumSubgroupSize}u; +const subgroupMaxSize = ${kMaximiumSubgroupSize}u; const noError = ${kSubgroupShaderNoError}u; const width = ${t.params.size[0]}; @@ -1686,7 +1686,7 @@ fn fsMain( var subgroupSizeBallotedInvocations: u32 = 0u; var ballotedSubgroupSize: u32 = 0u; - for (var i: u32 = 0; i <= maxSubgroupSize; i++) { + for (var i: u32 = 0; i <= subgroupMaxSize; i++) { let ballotSubgroupSizeEqualI = countOneBits(subgroupBallot(sg_size == i)); let countSubgroupSizeEqualI = ballotSubgroupSizeEqualI.x + ballotSubgroupSizeEqualI.y + ballotSubgroupSizeEqualI.z + ballotSubgroupSizeEqualI.w; subgroupSizeBallotedInvocations += countSubgroupSizeEqualI; @@ -1716,8 +1716,8 @@ fn fsMain( return checkSubgroupSizeConsistency( data, t.params.format, - minSubgroupSize, - maxSubgroupSize, + subgroupMinSize, + subgroupMaxSize, t.params.size[0], t.params.size[1] ); @@ -1816,7 +1816,7 @@ enable subgroups; const width = ${t.params.size[0]}; const height = ${t.params.size[1]}; -const maxSubgroupSize = ${kMaximiumSubgroupSize}u; +const subgroupMaxSize = ${kMaximiumSubgroupSize}u; // A non-zero magic number indicating no expectation error, in order to prevent the // false no-error result from zero-initialization. const noError = ${kSubgroupShaderNoError}u; @@ -1830,8 +1830,8 @@ fn fsMain( var error: u32 = noError; - // Validate that reported subgroup size is no larger than maxSubgroupSize - if (sg_size > maxSubgroupSize) { + // Validate that reported subgroup size is no larger than subgroupMaxSize + if (sg_size > subgroupMaxSize) { error++; } @@ -1843,7 +1843,7 @@ fn fsMain( // Validate that each subgroup id is assigned to at most one active invocation // in the subgroup var countAssignedId: u32 = 0u; - for (var i: u32 = 0; i < maxSubgroupSize; i++) { + for (var i: u32 = 0; i < subgroupMaxSize; i++) { let ballotIdEqualsI = countOneBits(subgroupBallot(id == i)); let countInvocationIdEqualsI = ballotIdEqualsI.x + ballotIdEqualsI.y + ballotIdEqualsI.z + ballotIdEqualsI.w; // Validate an id assigned at most once