From c052d66b2b7994e619c2e950c5d779b001f307fa Mon Sep 17 00:00:00 2001 From: Alan Baker Date: Fri, 30 Aug 2024 10:47:06 -0400 Subject: [PATCH 1/4] Execution tests for subgroupAny and subgroupAll * Compute tests with all active invocation and partially active invocations * Fragment tests with all active invocations * Removed unimplemented dynamically uniform subgroupBroadcast test (due to const requirement) --- src/webgpu/listing_meta.json | 7 +- .../call/builtin/subgroupAll.spec.ts | 401 ++++++++++++++++++ .../call/builtin/subgroupAny.spec.ts | 401 ++++++++++++++++++ .../call/builtin/subgroupBroadcast.spec.ts | 2 - .../expression/call/builtin/subgroup_util.ts | 132 +++++- 5 files changed, 938 insertions(+), 5 deletions(-) create mode 100644 src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts create mode 100644 src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts diff --git a/src/webgpu/listing_meta.json b/src/webgpu/listing_meta.json index 0290bae1808a..9e73f590e65c 100644 --- a/src/webgpu/listing_meta.json +++ b/src/webgpu/listing_meta.json @@ -1529,13 +1529,18 @@ "webgpu:shader,execution,expression,call,builtin,subgroupAdd:data_types:*": { "subcaseMS": 9216.247 }, "webgpu:shader,execution,expression,call,builtin,subgroupAdd:fp_accuracy:*": { "subcaseMS": 9952.350 }, "webgpu:shader,execution,expression,call,builtin,subgroupAdd:fragment:*": { "subcaseMS": 0.229 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAll:compute,all_active:*": { "subcaseMS": 5162.414 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAll:compute,split:*": { "subcaseMS": 26610.627 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAll:fragment:*": { "subcaseMS": 0.172 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAny:compute,all_active:*": { "subcaseMS": 7028.394 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAny:compute,split:*": { "subcaseMS": 50.998 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAny:fragment:*": { "subcaseMS": 0.227 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:compute,split:*": { "subcaseMS": 38.740 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment,split:*": { "subcaseMS": 0.331 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment:*": { "subcaseMS": 0.059 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate:*": { "subcaseMS": 0.075 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate_and_control_flow:*": { "subcaseMS": 41.053 }, "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:data_types:*": { "subcaseMS": 252.374 }, - "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:dynamically_uniform_id:*": { "subcaseMS": 0.211 }, "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:fragment:*": { "subcaseMS": 0.108 }, "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:workgroup_uniform_load:*": { "subcaseMS": 109.832 }, "webgpu:shader,execution,expression,call,builtin,subgroupMul:compute,split:*": { "subcaseMS": 5034.263 }, diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts new file mode 100644 index 000000000000..71de510d1e8f --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts @@ -0,0 +1,401 @@ +export const description = ` +Execution tests for subgroupAll. + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf } from '../../../../../../common/util/data_tables.js'; +import { iterRange } from '../../../../../../common/util/util.js'; +import { kTextureFormatInfo } from '../../../../../format_info.js'; +import { align } from '../../../../../util/math.js'; +import { PRNG } from '../../../../../util/prng.js'; + +import { + kWGSizes, + kPredicateCases, + SubgroupTest, + kFramebufferSizes, + runComputeTest, + runFragmentTest, +} from './subgroup_util.js'; + +export const g = makeTestGroup(SubgroupTest); + +const kNumCases = 10; + +/** + * Checks the result of a subgroupAll operation + * + * Since subgroup size depends on the pipeline compile, we calculate the expected + * results after execution. The shader generates a subgroup id and records it for + * each invocation. The check first calculates the expected result for each subgroup + * and then compares to the actual result for each invocation. The filter functor + * ensures only the correct invocations contribute to the calculation. + * @param metadata An array of uints: + * * first half containing subgroup sizes (from builtin value) + * * second half subgroup invocation id + * @param output An array of uints containing: + * * first half is the outputs of subgroupAll + * * second half is a generated subgroup id + * @param numInvs Number of invocations executed + * @param input The input data (equal size to output) + * @param filter A functor to filter active invocations + */ +function checkAll( + metadata: Uint32Array, // unused + output: Uint32Array, + numInvs: number, + input: Uint32Array, + filter: (id: number, size: number) => boolean +): Error | undefined { + // First, generate expected results. + const expected = new Map(); + for (let inv = 0; inv < numInvs; inv++) { + const size = metadata[inv]; + const id = metadata[inv + numInvs]; + if (!filter(id, size)) { + continue; + } + const subgroup_id = output[numInvs + inv]; + let v = expected.get(subgroup_id) ?? 1; + v &= input[inv]; + expected.set(subgroup_id, v); + } + + // Second, check against actual results. + for (let inv = 0; inv < numInvs; inv++) { + const size = metadata[inv]; + const id = metadata[inv + numInvs]; + const res = output[inv]; + if (filter(id, size)) { + const subgroup_id = output[numInvs + inv]; + const expected_v = expected.get(subgroup_id) ?? 0; + if (expected_v !== res) { + return new Error(`Invocation ${inv}: +- expected: ${expected_v} +- got: ${res}`); + } + } else { + if (res !== 999) { + return new Error(`Invocation ${inv} unexpected write: +- subgroup invocation id: ${id} +- subgroup size: ${size}`); + } + } + } + + return undefined; +} + +g.test('compute,all_active') + .desc(`Test compute subgroupAll`) + .params(u => + u + .combine('wgSize', kWGSizes) + .beginSubcases() + .combine('case', [...iterRange(kNumCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size: array, + subgroup_invocation_id: array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.subgroup_size[lid] = subgroupSize; + + metadata.subgroup_invocation_id[lid] = id; + + // Record a representative subgroup id. + outputs[lid + ${wgThreads}] = subgroupBroadcastFirst(lid); + + let res = select(0u, 1u, subgroupAll(bool(inputs[lid]))); + outputs[lid] = res; +}`; + + const prng = new PRNG(t.params.case); + // Case 0 is all 0s. + // Case 1 is all 1s. + // Other cases are filled with random 0s and 1s. + const inputData = new Uint32Array([ + ...iterRange(wgThreads, x => { + if (t.params.case === 0) { + return 0; + } else if (t.params.case === 1) { + return 1; + } + return prng.uniformInt(2); + }), + ]); + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkAll(metadata, output, wgThreads, inputData, (id: number, size: number) => { + return true; + }); + } + ); + }); + +g.test('compute,split') + .desc('Test that only active invocation participate') + .params(u => + u + .combine('predicate', keysOf(kPredicateCases)) + .beginSubcases() + .combine('wgSize', kWGSizes) + .combine('case', [...iterRange(kNumCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kPredicateCases[t.params.predicate]; + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size : array, + subgroup_invocation_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.subgroup_size[lid] = subgroupSize; + + // Record subgroup invocation id for this invocation. + metadata.subgroup_invocation_id[lid] = id; + + // Record a generated subgroup id. + outputs[${wgThreads} + lid] = subgroupBroadcastFirst(lid); + + if ${testcase.cond} { + outputs[lid] = select(0u, 1u, subgroupAll(bool(inputs[lid]))); + } else { + return; + } +}`; + + const prng = new PRNG(t.params.case); + // Case 0 is all 0s. + // Case 1 is all 1s. + // Other cases are filled with random 0s and 1s. + const inputData = new Uint32Array([ + ...iterRange(wgThreads, x => { + if (t.params.case === 0) { + return 0; + } else if (t.params.case === 1) { + return 1; + } + return prng.uniformInt(2); + }), + ]); + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkAll(metadata, output, wgThreads, inputData, testcase.filter); + } + ); + }); + +/** + * Checks subgroupAll results from a fragment shader. + * + * @param data Framebuffer output + * * component 0 is result + * * component 1 is generated subgroup id + * @param input An array of input data offset by 1 uint + * @param format The framebuffer format + * @param width Framebuffer width + * @param height Framebuffer height + */ +function checkFragmentAll( + data: Uint32Array, + input: Uint32Array, + format: GPUTextureFormat, + width: number, + height: number +): Error | undefined { + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + const blocksPerRow = width / blockWidth; + // 256 minimum comes from image copy requirements. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const uintsPerRow = bytesPerRow / 4; + const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + + const expected = new Map(); + for (let row = 0; row < height; row++) { + for (let col = 0; col < width; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + return new Error(`Internal error: helper invocation at (${col}, ${row})`); + } + + let v = expected.get(subgroup_id) ?? 1; + // First index of input is an atomic counter. + v &= input[1 + row * width + col]; + expected.set(subgroup_id, v); + } + } + + for (let row = 0; row < height; row++) { + for (let col = 0; col < width; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const res = data[offset]; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + // Inactive in the fragment. + continue; + } + + const expected_v = expected.get(subgroup_id) ?? 0; + if (expected_v !== res) { + return new Error(`Row ${row}, col ${col}: incorrect results: +- expected: ${expected_v} +- got: ${res}`); + } + } + } + + return undefined; +} + +g.test('fragment') + .desc('Tests subgroupAll in fragment shaders') + .params(u => + u + .combine('size', kFramebufferSizes) + .beginSubcases() + .combine('case', [...iterRange(kNumCases, x => x)]) + .combineWithParams([{ format: 'rg32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const prng = new PRNG(t.params.case); + // Case 0 is all 0s. + // Case 1 is all 1s. + // Other cases are filled with random 0s and 1s. + // + // Note: the first index is used as an atomic counter for subgroup ids. + const numInputs = t.params.size[0] * t.params.size[1] + 1; + const inputData = new Uint32Array([ + ...iterRange(numInputs, x => { + if (x === 0) { + // All subgroup ids start from index 1. + return 1; + } else if (t.params.case === 0) { + return 0; + } else if (t.params.case === 1) { + return 1; + } + return prng.uniformInt(2); + }), + ]); + + const fsShader = ` +enable subgroups; + +struct Inputs { + subgroup_id : atomic, + data : array, +} + +@group(0) @binding(0) +var inputs : Inputs; + +@fragment +fn main( + @builtin(position) pos : vec4f, +) -> @location(0) vec2u { + var subgroup_id = 0u; + if subgroupElect() { + subgroup_id = atomicAdd(&inputs.subgroup_id, 1); + } + subgroup_id = subgroupBroadcastFirst(subgroup_id); + + // Filter out texels outside the frame (possible helper invocations). + var input = 1u; + if (u32(pos.x) >= 0 && u32(pos.x) < ${t.params.size[0]} && + u32(pos.y) >= 0 && u32(pos.y) < ${t.params.size[1]}) { + input = inputs.data[u32(pos.y) * ${t.params.size[0]} + u32(pos.x)]; + } + let res = select(0u, 1u, subgroupAll(bool(input))); + return vec2u(res, subgroup_id); +}`; + + await runFragmentTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + inputData, + (data: Uint32Array) => { + return checkFragmentAll( + data, + inputData, + t.params.format, + t.params.size[0], + t.params.size[1] + ); + } + ); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts new file mode 100644 index 000000000000..6bb2df6b3d75 --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts @@ -0,0 +1,401 @@ +export const description = ` +Execution tests for subgroupAny. + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf } from '../../../../../../common/util/data_tables.js'; +import { iterRange } from '../../../../../../common/util/util.js'; +import { kTextureFormatInfo } from '../../../../../format_info.js'; +import { align } from '../../../../../util/math.js'; +import { PRNG } from '../../../../../util/prng.js'; + +import { + kWGSizes, + kPredicateCases, + SubgroupTest, + runComputeTest, + runFragmentTest, + kFramebufferSizes, +} from './subgroup_util.js'; + +export const g = makeTestGroup(SubgroupTest); + +const kNumCases = 10; + +/** + * Checks the result of a subgroupAny operation + * + * Since subgroup size depends on the pipeline compile, we calculate the expected + * results after execution. The shader generates a subgroup id and records it for + * each invocation. The check first calculates the expected result for each subgroup + * and then compares to the actual result for each invocation. The filter functor + * ensures only the correct invocations contribute to the calculation. + * @param metadata An array of uints: + * * first half containing subgroup sizes (from builtin value) + * * second half subgroup invocation id + * @param output An array of uints containing: + * * first half is the outputs of subgroupAny + * * second half is a generated subgroup id + * @param numInvs Number of invocations executed + * @param input The input data (equal size to output) + * @param filter A functor to filter active invocations + */ +function checkAny( + metadata: Uint32Array, // unused + output: Uint32Array, + numInvs: number, + input: Uint32Array, + filter: (id: number, size: number) => boolean +): Error | undefined { + // First, generate expected results. + const expected = new Map(); + for (let inv = 0; inv < numInvs; inv++) { + const size = metadata[inv]; + const id = metadata[inv + numInvs]; + if (!filter(id, size)) { + continue; + } + const subgroup_id = output[numInvs + inv]; + let v = expected.get(subgroup_id) ?? 0; + v |= input[inv]; + expected.set(subgroup_id, v); + } + + // Second, check against actual results. + for (let inv = 0; inv < numInvs; inv++) { + const size = metadata[inv]; + const id = metadata[inv + numInvs]; + const res = output[inv]; + if (filter(id, size)) { + const subgroup_id = output[numInvs + inv]; + const expected_v = expected.get(subgroup_id) ?? 0; + if (expected_v !== res) { + return new Error(`Invocation ${inv}: +- expected: ${expected_v} +- got: ${res}`); + } + } else { + if (res !== 999) { + return new Error(`Invocation ${inv} unexpected write: +- subgroup invocation id: ${id} +- subgroup size: ${size}`); + } + } + } + + return undefined; +} + +g.test('compute,all_active') + .desc(`Test compute subgroupAny`) + .params(u => + u + .combine('wgSize', kWGSizes) + .beginSubcases() + .combine('case', [...iterRange(kNumCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size: array, + subgroup_invocation_id: array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.subgroup_size[lid] = subgroupSize; + + metadata.subgroup_invocation_id[lid] = id; + + // Record a representative subgroup id. + outputs[lid + ${wgThreads}] = subgroupBroadcastFirst(lid); + + let res = select(0u, 1u, subgroupAny(bool(inputs[lid]))); + outputs[lid] = res; +}`; + + const prng = new PRNG(t.params.case); + // Case 0 is all 0s. + // Case 1 is all 1s. + // Other cases are filled with random 0s and 1s. + const inputData = new Uint32Array([ + ...iterRange(wgThreads, x => { + if (t.params.case === 0) { + return 0; + } else if (t.params.case === 1) { + return 1; + } + return prng.uniformInt(2); + }), + ]); + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkAny(metadata, output, wgThreads, inputData, (id: number, size: number) => { + return true; + }); + } + ); + }); + +g.test('compute,split') + .desc('Test that only active invocation participate') + .params(u => + u + .combine('predicate', keysOf(kPredicateCases)) + .beginSubcases() + .combine('wgSize', kWGSizes) + .combine('case', [...iterRange(kNumCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kPredicateCases[t.params.predicate]; + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size : array, + subgroup_invocation_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.subgroup_size[lid] = subgroupSize; + + // Record subgroup invocation id for this invocation. + metadata.subgroup_invocation_id[lid] = id; + + // Record a generated subgroup id. + outputs[${wgThreads} + lid] = subgroupBroadcastFirst(lid); + + if ${testcase.cond} { + outputs[lid] = select(0u, 1u, subgroupAny(bool(inputs[lid]))); + } else { + return; + } +}`; + + const prng = new PRNG(t.params.case); + // Case 0 is all 0s. + // Case 1 is all 1s. + // Other cases are filled with random 0s and 1s. + const inputData = new Uint32Array([ + ...iterRange(wgThreads, x => { + if (t.params.case === 0) { + return 0; + } else if (t.params.case === 1) { + return 1; + } + return prng.uniformInt(2); + }), + ]); + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkAny(metadata, output, wgThreads, inputData, testcase.filter); + } + ); + }); + +/** + * Checks subgroupAny results from a fragment shader. + * + * @param data Framebuffer output + * * component 0 is result + * * component 1 is generated subgroup id + * @param input An array of input data offset by 1 uint + * @param format The framebuffer format + * @param width Framebuffer width + * @param height Framebuffer height + */ +function checkFragmentAny( + data: Uint32Array, + input: Uint32Array, + format: GPUTextureFormat, + width: number, + height: number +): Error | undefined { + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + const blocksPerRow = width / blockWidth; + // 256 minimum comes from image copy requirements. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const uintsPerRow = bytesPerRow / 4; + const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + + const expected = new Map(); + for (let row = 0; row < height; row++) { + for (let col = 0; col < width; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + return new Error(`Internal error: helper invocation at (${col}, ${row})`); + } + + let v = expected.get(subgroup_id) ?? 0; + // First index of input is an atomic counter. + v |= input[1 + row * width + col]; + expected.set(subgroup_id, v); + } + } + + for (let row = 0; row < height; row++) { + for (let col = 0; col < width; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const res = data[offset]; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + // Inactive in the fragment. + continue; + } + + const expected_v = expected.get(subgroup_id) ?? 0; + if (expected_v !== res) { + return new Error(`Row ${row}, col ${col}: incorrect results: +- expected: ${expected_v} +- got: ${res}`); + } + } + } + + return undefined; +} + +g.test('fragment') + .desc('Tests subgroupAny in fragment shaders') + .params(u => + u + .combine('size', kFramebufferSizes) + .beginSubcases() + .combine('case', [...iterRange(kNumCases, x => x)]) + .combineWithParams([{ format: 'rg32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const prng = new PRNG(t.params.case); + // Case 0 is all 0s. + // Case 1 is all 1s. + // Other cases are filled with random 0s and 1s. + // + // Note: the first index is used as an atomic counter for subgroup ids. + const numInputs = t.params.size[0] * t.params.size[1] + 1; + const inputData = new Uint32Array([ + ...iterRange(numInputs, x => { + if (x === 0) { + // All subgroup ids start from index 1. + return 1; + } else if (t.params.case === 0) { + return 0; + } else if (t.params.case === 1) { + return 1; + } + return prng.uniformInt(2); + }), + ]); + + const fsShader = ` +enable subgroups; + +struct Inputs { + subgroup_id : atomic, + data : array, +} + +@group(0) @binding(0) +var inputs : Inputs; + +@fragment +fn main( + @builtin(position) pos : vec4f, +) -> @location(0) vec2u { + var subgroup_id = 0u; + if subgroupElect() { + subgroup_id = atomicAdd(&inputs.subgroup_id, 1); + } + subgroup_id = subgroupBroadcastFirst(subgroup_id); + + // Filter out texels outside the frame (possible helper invocations). + var input = 0u; + if (u32(pos.x) >= 0 && u32(pos.x) < ${t.params.size[0]} && + u32(pos.y) >= 0 && u32(pos.y) < ${t.params.size[1]}) { + input = inputs.data[u32(pos.y) * ${t.params.size[0]} + u32(pos.x)]; + } + let res = select(0u, 1u, subgroupAny(bool(input))); + return vec2u(res, subgroup_id); +}`; + + await runFragmentTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + inputData, + (data: Uint32Array) => { + return checkFragmentAny( + data, + inputData, + t.params.format, + t.params.size[0], + t.params.size[1] + ); + } + ); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts index b2fa9e46ec7a..75fe27e8cb5d 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts @@ -318,6 +318,4 @@ fn main(@builtin(subgroup_invocation_id) id : u32, t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expect)); }); -g.test('dynamically_uniform_id').unimplemented(); - g.test('fragment').unimplemented(); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts index 60cf9f12a155..4544f624ca8f 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts @@ -1,10 +1,13 @@ import { assert, iterRange } from '../../../../../../common/util/util.js'; import { Float16Array } from '../../../../../../external/petamoriken/float16/float16.js'; -import { GPUTest } from '../../../../../gpu_test.js'; +import { kTextureFormatInfo } from '../../../../../format_info.js'; +import { GPUTest, TextureTestMixin } from '../../../../../gpu_test.js'; import { FPInterval } from '../../../../../util/floating_point.js'; -import { sparseScalarF16Range, sparseScalarF32Range } from '../../../../../util/math.js'; +import { sparseScalarF16Range, sparseScalarF32Range, align } from '../../../../../util/math.js'; import { PRNG } from '../../../../../util/prng.js'; +export class SubgroupTest extends TextureTestMixin(GPUTest) {} + export const kNumCases = 1000; export const kStride = 128; @@ -415,3 +418,128 @@ export async function runComputeTest( t.expectOK(checkFunction(metadata, output)); } + +export const kFramebufferSizes = [ + [15, 15], + [16, 16], + [17, 17], + [19, 13], + [13, 10], + [111, 2], + [2, 111], + [35, 2], + [2, 35], + [53, 13], + [13, 53], +] as const; + +/** + * Runs a subgroup builtin test for fragment shaders + * + * This test draws a full screen triangle. + * @param t The base test + * @param format The framebuffer format + * @param fsShader The fragment shader with the following interface: + * Location 0 output is framebuffer with format + * Group 0 binding 0 is input data + * @param width The framebuffer width + * @param height The framebuffer height + * @param inputData The input data + * @param checker A functor to check the framebuffer values + */ +export async function runFragmentTest( + t: SubgroupTest, + format: GPUTextureFormat, + fsShader: string, + width: number, + height: number, + inputData: Uint32Array | Float32Array | Float16Array, + checker: (data: Uint32Array) => Error | undefined +) { + const vsShader = ` +@vertex +fn vsMain(@builtin(vertex_index) index : u32) -> @builtin(position) vec4f { + const vertices = array( + vec2(-2, 4), vec2(-2, -4), vec2(2, 0), + ); + return vec4f(vec2f(vertices[index]), 0, 1); +}`; + + const pipeline = t.device.createRenderPipeline({ + layout: 'auto', + vertex: { + module: t.device.createShaderModule({ code: vsShader }), + }, + fragment: { + module: t.device.createShaderModule({ code: fsShader }), + targets: [{ format }], + }, + primitive: { + topology: 'triangle-list', + }, + }); + + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + assert(bytesPerBlock !== undefined); + + const blocksPerRow = width / blockWidth; + const blocksPerColumn = height / blockHeight; + // 256 minimum arises from image copy requirements. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const byteLength = bytesPerRow * blocksPerColumn; + const uintLength = byteLength / 4; + + const buffer = t.makeBufferWithContents( + inputData, + GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST + ); + + const bg = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { + binding: 0, + resource: { + buffer, + }, + }, + ], + }); + + const framebuffer = t.createTextureTracked({ + size: [width, height], + usage: + GPUTextureUsage.COPY_SRC | + GPUTextureUsage.COPY_DST | + GPUTextureUsage.RENDER_ATTACHMENT | + GPUTextureUsage.TEXTURE_BINDING, + format, + }); + + const encoder = t.device.createCommandEncoder(); + const pass = encoder.beginRenderPass({ + colorAttachments: [ + { + view: framebuffer.createView(), + loadOp: 'clear', + storeOp: 'store', + }, + ], + }); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bg); + pass.draw(3); + pass.end(); + t.queue.submit([encoder.finish()]); + + const copyBuffer = t.copyWholeTextureToNewBufferSimple(framebuffer, 0); + const readback = await t.readGPUBufferRangeTyped(copyBuffer, { + srcByteOffset: 0, + type: Uint32Array, + typedLength: uintLength, + method: 'copy', + }); + const data: Uint32Array = readback.data; + + t.expectOK(checker(data)); +} From 568f84bc1aee9fd5cade3ca677045a2954904ba6 Mon Sep 17 00:00:00 2001 From: Alan Baker Date: Fri, 30 Aug 2024 14:40:55 -0400 Subject: [PATCH 2/4] Changes for review * Remove fragment tests for now * Add helpers to generate input data and add new class of cases * Increase number of cases to 15 per variants * Export data sentinel value --- .../call/builtin/subgroupAll.spec.ts | 225 ++++-------------- .../call/builtin/subgroupAny.spec.ts | 225 ++++-------------- .../expression/call/builtin/subgroup_util.ts | 131 +--------- 3 files changed, 94 insertions(+), 487 deletions(-) diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts index 71de510d1e8f..b81c6fb003af 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts @@ -18,14 +18,50 @@ import { kWGSizes, kPredicateCases, SubgroupTest, - kFramebufferSizes, runComputeTest, - runFragmentTest, + kDataSentinel } from './subgroup_util.js'; export const g = makeTestGroup(SubgroupTest); -const kNumCases = 10; +const kNumCases = 15; + +/** + * Generate input data for testing. + * + * Data is generated in the following categories: + * Seed 0 generates all 0 data + * Seed 1 generates all 1 data + * Seeds 2-9 generates all 1s except for a zero randomly once per 32 elements + * Seeds 10+ generate all random data + * @param seed The seed for the PRNG + * @param num The number of data items to generate + * @param addCounter If true, treats the first index as an atomic counter + */ +function generateInputData(seed: number, num: number, addCounter: boolean): Uint32Array { + const prng = new PRNG(seed); + + const bound = Math.min(num, 32); + const index = prng.uniformInt(bound); + //console.log(`bound = ${bound}, index = ${index}`); + + return new Uint32Array([...iterRange(num, x => { + if (addCounter && x === 0) { + // Counter should start at 1 to avoid clear value. + return 1; + } + + if (seed === 0) { + return 0; + } else if (seed === 1) { + return 1; + } else if (seed < 10) { + const bounded = (addCounter ? x + 1 : x) % bound; + return bounded === index ? 0 : 1; + } + return prng.uniformInt(2); + })]); +} /** * Checks the result of a subgroupAll operation @@ -80,7 +116,7 @@ function checkAll( - got: ${res}`); } } else { - if (res !== 999) { + if (res !== kDataSentinel) { return new Error(`Invocation ${inv} unexpected write: - subgroup invocation id: ${id} - subgroup size: ${size}`); @@ -139,20 +175,8 @@ fn main( outputs[lid] = res; }`; - const prng = new PRNG(t.params.case); - // Case 0 is all 0s. - // Case 1 is all 1s. - // Other cases are filled with random 0s and 1s. - const inputData = new Uint32Array([ - ...iterRange(wgThreads, x => { - if (t.params.case === 0) { - return 0; - } else if (t.params.case === 1) { - return 1; - } - return prng.uniformInt(2); - }), - ]); + const includeCounter = false; + const inputData = generateInputData(t.params.case, wgThreads, includeCounter); const uintsPerOutput = 2; await runComputeTest( @@ -223,20 +247,8 @@ fn main( } }`; - const prng = new PRNG(t.params.case); - // Case 0 is all 0s. - // Case 1 is all 1s. - // Other cases are filled with random 0s and 1s. - const inputData = new Uint32Array([ - ...iterRange(wgThreads, x => { - if (t.params.case === 0) { - return 0; - } else if (t.params.case === 1) { - return 1; - } - return prng.uniformInt(2); - }), - ]); + const includeCounter = false; + const inputData = generateInputData(t.params.case, wgThreads, includeCounter); const uintsPerOutput = 2; await runComputeTest( @@ -251,151 +263,4 @@ fn main( ); }); -/** - * Checks subgroupAll results from a fragment shader. - * - * @param data Framebuffer output - * * component 0 is result - * * component 1 is generated subgroup id - * @param input An array of input data offset by 1 uint - * @param format The framebuffer format - * @param width Framebuffer width - * @param height Framebuffer height - */ -function checkFragmentAll( - data: Uint32Array, - input: Uint32Array, - format: GPUTextureFormat, - width: number, - height: number -): Error | undefined { - const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; - const blocksPerRow = width / blockWidth; - // 256 minimum comes from image copy requirements. - const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); - const uintsPerRow = bytesPerRow / 4; - const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; - - const expected = new Map(); - for (let row = 0; row < height; row++) { - for (let col = 0; col < width; col++) { - const offset = uintsPerRow * row + col * uintsPerTexel; - const subgroup_id = data[offset + 1]; - - if (subgroup_id === 0) { - return new Error(`Internal error: helper invocation at (${col}, ${row})`); - } - - let v = expected.get(subgroup_id) ?? 1; - // First index of input is an atomic counter. - v &= input[1 + row * width + col]; - expected.set(subgroup_id, v); - } - } - - for (let row = 0; row < height; row++) { - for (let col = 0; col < width; col++) { - const offset = uintsPerRow * row + col * uintsPerTexel; - const res = data[offset]; - const subgroup_id = data[offset + 1]; - - if (subgroup_id === 0) { - // Inactive in the fragment. - continue; - } - - const expected_v = expected.get(subgroup_id) ?? 0; - if (expected_v !== res) { - return new Error(`Row ${row}, col ${col}: incorrect results: -- expected: ${expected_v} -- got: ${res}`); - } - } - } - - return undefined; -} - -g.test('fragment') - .desc('Tests subgroupAll in fragment shaders') - .params(u => - u - .combine('size', kFramebufferSizes) - .beginSubcases() - .combine('case', [...iterRange(kNumCases, x => x)]) - .combineWithParams([{ format: 'rg32uint' }] as const) - ) - .beforeAllSubcases(t => { - t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); - }) - .fn(async t => { - const prng = new PRNG(t.params.case); - // Case 0 is all 0s. - // Case 1 is all 1s. - // Other cases are filled with random 0s and 1s. - // - // Note: the first index is used as an atomic counter for subgroup ids. - const numInputs = t.params.size[0] * t.params.size[1] + 1; - const inputData = new Uint32Array([ - ...iterRange(numInputs, x => { - if (x === 0) { - // All subgroup ids start from index 1. - return 1; - } else if (t.params.case === 0) { - return 0; - } else if (t.params.case === 1) { - return 1; - } - return prng.uniformInt(2); - }), - ]); - - const fsShader = ` -enable subgroups; - -struct Inputs { - subgroup_id : atomic, - data : array, -} - -@group(0) @binding(0) -var inputs : Inputs; - -@fragment -fn main( - @builtin(position) pos : vec4f, -) -> @location(0) vec2u { - var subgroup_id = 0u; - if subgroupElect() { - subgroup_id = atomicAdd(&inputs.subgroup_id, 1); - } - subgroup_id = subgroupBroadcastFirst(subgroup_id); - - // Filter out texels outside the frame (possible helper invocations). - var input = 1u; - if (u32(pos.x) >= 0 && u32(pos.x) < ${t.params.size[0]} && - u32(pos.y) >= 0 && u32(pos.y) < ${t.params.size[1]}) { - input = inputs.data[u32(pos.y) * ${t.params.size[0]} + u32(pos.x)]; - } - let res = select(0u, 1u, subgroupAll(bool(input))); - return vec2u(res, subgroup_id); -}`; - - await runFragmentTest( - t, - t.params.format, - fsShader, - t.params.size[0], - t.params.size[1], - inputData, - (data: Uint32Array) => { - return checkFragmentAll( - data, - inputData, - t.params.format, - t.params.size[0], - t.params.size[1] - ); - } - ); - }); +g.test('fragment').unimplemented() diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts index 6bb2df6b3d75..5b01b1593e46 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts @@ -19,13 +19,49 @@ import { kPredicateCases, SubgroupTest, runComputeTest, - runFragmentTest, - kFramebufferSizes, + kDataSentinel } from './subgroup_util.js'; export const g = makeTestGroup(SubgroupTest); -const kNumCases = 10; +const kNumCases = 15; + +/** + * Generate input data for testing. + * + * Data is generated in the following categories: + * Seed 0 generates all 0 data + * Seed 1 generates all 1 data + * Seeds 2-9 generates all 0s except for a one randomly once per 32 elements + * Seeds 10+ generate all random data + * @param seed The seed for the PRNG + * @param num The number of data items to generate + * @param addCounter If true, treats the first index as an atomic counter + */ +function generateInputData(seed: number, num: number, addCounter: boolean): Uint32Array { + const prng = new PRNG(seed); + + const bound = Math.min(num, 32); + const index = prng.uniformInt(bound); + //console.log(`bound = ${bound}, index = ${index}`); + + return new Uint32Array([...iterRange(num, x => { + if (addCounter && x === 0) { + // Counter should start at 1 to avoid clear value. + return 1; + } + + if (seed === 0) { + return 0; + } else if (seed === 1) { + return 1; + } else if (seed < 10) { + const bounded = (addCounter ? x + 1 : x) % bound; + return bounded === index ? 1 : 0; + } + return prng.uniformInt(2); + })]); +} /** * Checks the result of a subgroupAny operation @@ -80,7 +116,7 @@ function checkAny( - got: ${res}`); } } else { - if (res !== 999) { + if (res !== kDataSentinel) { return new Error(`Invocation ${inv} unexpected write: - subgroup invocation id: ${id} - subgroup size: ${size}`); @@ -139,20 +175,8 @@ fn main( outputs[lid] = res; }`; - const prng = new PRNG(t.params.case); - // Case 0 is all 0s. - // Case 1 is all 1s. - // Other cases are filled with random 0s and 1s. - const inputData = new Uint32Array([ - ...iterRange(wgThreads, x => { - if (t.params.case === 0) { - return 0; - } else if (t.params.case === 1) { - return 1; - } - return prng.uniformInt(2); - }), - ]); + const includeCounter = false; + const inputData = generateInputData(t.params.case, wgThreads, includeCounter); const uintsPerOutput = 2; await runComputeTest( @@ -223,20 +247,8 @@ fn main( } }`; - const prng = new PRNG(t.params.case); - // Case 0 is all 0s. - // Case 1 is all 1s. - // Other cases are filled with random 0s and 1s. - const inputData = new Uint32Array([ - ...iterRange(wgThreads, x => { - if (t.params.case === 0) { - return 0; - } else if (t.params.case === 1) { - return 1; - } - return prng.uniformInt(2); - }), - ]); + const includeCounter = false; + const inputData = generateInputData(t.params.case, wgThreads, includeCounter); const uintsPerOutput = 2; await runComputeTest( @@ -251,151 +263,4 @@ fn main( ); }); -/** - * Checks subgroupAny results from a fragment shader. - * - * @param data Framebuffer output - * * component 0 is result - * * component 1 is generated subgroup id - * @param input An array of input data offset by 1 uint - * @param format The framebuffer format - * @param width Framebuffer width - * @param height Framebuffer height - */ -function checkFragmentAny( - data: Uint32Array, - input: Uint32Array, - format: GPUTextureFormat, - width: number, - height: number -): Error | undefined { - const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; - const blocksPerRow = width / blockWidth; - // 256 minimum comes from image copy requirements. - const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); - const uintsPerRow = bytesPerRow / 4; - const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; - - const expected = new Map(); - for (let row = 0; row < height; row++) { - for (let col = 0; col < width; col++) { - const offset = uintsPerRow * row + col * uintsPerTexel; - const subgroup_id = data[offset + 1]; - - if (subgroup_id === 0) { - return new Error(`Internal error: helper invocation at (${col}, ${row})`); - } - - let v = expected.get(subgroup_id) ?? 0; - // First index of input is an atomic counter. - v |= input[1 + row * width + col]; - expected.set(subgroup_id, v); - } - } - - for (let row = 0; row < height; row++) { - for (let col = 0; col < width; col++) { - const offset = uintsPerRow * row + col * uintsPerTexel; - const res = data[offset]; - const subgroup_id = data[offset + 1]; - - if (subgroup_id === 0) { - // Inactive in the fragment. - continue; - } - - const expected_v = expected.get(subgroup_id) ?? 0; - if (expected_v !== res) { - return new Error(`Row ${row}, col ${col}: incorrect results: -- expected: ${expected_v} -- got: ${res}`); - } - } - } - - return undefined; -} - -g.test('fragment') - .desc('Tests subgroupAny in fragment shaders') - .params(u => - u - .combine('size', kFramebufferSizes) - .beginSubcases() - .combine('case', [...iterRange(kNumCases, x => x)]) - .combineWithParams([{ format: 'rg32uint' }] as const) - ) - .beforeAllSubcases(t => { - t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); - }) - .fn(async t => { - const prng = new PRNG(t.params.case); - // Case 0 is all 0s. - // Case 1 is all 1s. - // Other cases are filled with random 0s and 1s. - // - // Note: the first index is used as an atomic counter for subgroup ids. - const numInputs = t.params.size[0] * t.params.size[1] + 1; - const inputData = new Uint32Array([ - ...iterRange(numInputs, x => { - if (x === 0) { - // All subgroup ids start from index 1. - return 1; - } else if (t.params.case === 0) { - return 0; - } else if (t.params.case === 1) { - return 1; - } - return prng.uniformInt(2); - }), - ]); - - const fsShader = ` -enable subgroups; - -struct Inputs { - subgroup_id : atomic, - data : array, -} - -@group(0) @binding(0) -var inputs : Inputs; - -@fragment -fn main( - @builtin(position) pos : vec4f, -) -> @location(0) vec2u { - var subgroup_id = 0u; - if subgroupElect() { - subgroup_id = atomicAdd(&inputs.subgroup_id, 1); - } - subgroup_id = subgroupBroadcastFirst(subgroup_id); - - // Filter out texels outside the frame (possible helper invocations). - var input = 0u; - if (u32(pos.x) >= 0 && u32(pos.x) < ${t.params.size[0]} && - u32(pos.y) >= 0 && u32(pos.y) < ${t.params.size[1]}) { - input = inputs.data[u32(pos.y) * ${t.params.size[0]} + u32(pos.x)]; - } - let res = select(0u, 1u, subgroupAny(bool(input))); - return vec2u(res, subgroup_id); -}`; - - await runFragmentTest( - t, - t.params.format, - fsShader, - t.params.size[0], - t.params.size[1], - inputData, - (data: Uint32Array) => { - return checkFragmentAny( - data, - inputData, - t.params.format, - t.params.size[0], - t.params.size[1] - ); - } - ); - }); +g.test('fragment').unimplemented(); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts index 4544f624ca8f..c4000e4335ca 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts @@ -303,6 +303,8 @@ fn main( t.expectOK(checkAccuracy(metadata, output, [idx1, idx2], [val1, val2], identity, intervalGen)); } +export const kDataSentinel = 999; + /** * Runs compute shader subgroup test * @@ -349,14 +351,14 @@ export async function runComputeTest( const outputUints = outputUintsPerElement * wgThreads; const outputBuffer = t.makeBufferWithContents( - new Uint32Array([...iterRange(outputUints, x => 999)]), + new Uint32Array([...iterRange(outputUints, x => kDataSentinel)]), GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE ); t.trackForCleanup(outputBuffer); const numMetadata = 2 * wgThreads; const metadataBuffer = t.makeBufferWithContents( - new Uint32Array([...iterRange(numMetadata, x => 999)]), + new Uint32Array([...iterRange(numMetadata, x => kDataSentinel)]), GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE ); @@ -418,128 +420,3 @@ export async function runComputeTest( t.expectOK(checkFunction(metadata, output)); } - -export const kFramebufferSizes = [ - [15, 15], - [16, 16], - [17, 17], - [19, 13], - [13, 10], - [111, 2], - [2, 111], - [35, 2], - [2, 35], - [53, 13], - [13, 53], -] as const; - -/** - * Runs a subgroup builtin test for fragment shaders - * - * This test draws a full screen triangle. - * @param t The base test - * @param format The framebuffer format - * @param fsShader The fragment shader with the following interface: - * Location 0 output is framebuffer with format - * Group 0 binding 0 is input data - * @param width The framebuffer width - * @param height The framebuffer height - * @param inputData The input data - * @param checker A functor to check the framebuffer values - */ -export async function runFragmentTest( - t: SubgroupTest, - format: GPUTextureFormat, - fsShader: string, - width: number, - height: number, - inputData: Uint32Array | Float32Array | Float16Array, - checker: (data: Uint32Array) => Error | undefined -) { - const vsShader = ` -@vertex -fn vsMain(@builtin(vertex_index) index : u32) -> @builtin(position) vec4f { - const vertices = array( - vec2(-2, 4), vec2(-2, -4), vec2(2, 0), - ); - return vec4f(vec2f(vertices[index]), 0, 1); -}`; - - const pipeline = t.device.createRenderPipeline({ - layout: 'auto', - vertex: { - module: t.device.createShaderModule({ code: vsShader }), - }, - fragment: { - module: t.device.createShaderModule({ code: fsShader }), - targets: [{ format }], - }, - primitive: { - topology: 'triangle-list', - }, - }); - - const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; - assert(bytesPerBlock !== undefined); - - const blocksPerRow = width / blockWidth; - const blocksPerColumn = height / blockHeight; - // 256 minimum arises from image copy requirements. - const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); - const byteLength = bytesPerRow * blocksPerColumn; - const uintLength = byteLength / 4; - - const buffer = t.makeBufferWithContents( - inputData, - GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST - ); - - const bg = t.device.createBindGroup({ - layout: pipeline.getBindGroupLayout(0), - entries: [ - { - binding: 0, - resource: { - buffer, - }, - }, - ], - }); - - const framebuffer = t.createTextureTracked({ - size: [width, height], - usage: - GPUTextureUsage.COPY_SRC | - GPUTextureUsage.COPY_DST | - GPUTextureUsage.RENDER_ATTACHMENT | - GPUTextureUsage.TEXTURE_BINDING, - format, - }); - - const encoder = t.device.createCommandEncoder(); - const pass = encoder.beginRenderPass({ - colorAttachments: [ - { - view: framebuffer.createView(), - loadOp: 'clear', - storeOp: 'store', - }, - ], - }); - pass.setPipeline(pipeline); - pass.setBindGroup(0, bg); - pass.draw(3); - pass.end(); - t.queue.submit([encoder.finish()]); - - const copyBuffer = t.copyWholeTextureToNewBufferSimple(framebuffer, 0); - const readback = await t.readGPUBufferRangeTyped(copyBuffer, { - srcByteOffset: 0, - type: Uint32Array, - typedLength: uintLength, - method: 'copy', - }); - const data: Uint32Array = readback.data; - - t.expectOK(checker(data)); -} From 86327fb36b04b82144ab553942647bb55be7df79 Mon Sep 17 00:00:00 2001 From: Alan Baker Date: Fri, 30 Aug 2024 14:45:28 -0400 Subject: [PATCH 3/4] formatting --- .../call/builtin/subgroupAll.spec.ts | 38 +++++++++---------- .../call/builtin/subgroupAny.spec.ts | 36 +++++++++--------- .../expression/call/builtin/subgroup_util.ts | 3 +- 3 files changed, 38 insertions(+), 39 deletions(-) diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts index b81c6fb003af..8aef5a681aaa 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts @@ -10,8 +10,6 @@ local_invocation_index. Tests should avoid assuming there is. import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; import { keysOf } from '../../../../../../common/util/data_tables.js'; import { iterRange } from '../../../../../../common/util/util.js'; -import { kTextureFormatInfo } from '../../../../../format_info.js'; -import { align } from '../../../../../util/math.js'; import { PRNG } from '../../../../../util/prng.js'; import { @@ -19,7 +17,7 @@ import { kPredicateCases, SubgroupTest, runComputeTest, - kDataSentinel + kDataSentinel, } from './subgroup_util.js'; export const g = makeTestGroup(SubgroupTest); @@ -45,22 +43,24 @@ function generateInputData(seed: number, num: number, addCounter: boolean): Uint const index = prng.uniformInt(bound); //console.log(`bound = ${bound}, index = ${index}`); - return new Uint32Array([...iterRange(num, x => { - if (addCounter && x === 0) { - // Counter should start at 1 to avoid clear value. - return 1; - } + return new Uint32Array([ + ...iterRange(num, x => { + if (addCounter && x === 0) { + // Counter should start at 1 to avoid clear value. + return 1; + } - if (seed === 0) { - return 0; - } else if (seed === 1) { - return 1; - } else if (seed < 10) { - const bounded = (addCounter ? x + 1 : x) % bound; - return bounded === index ? 0 : 1; - } - return prng.uniformInt(2); - })]); + if (seed === 0) { + return 0; + } else if (seed === 1) { + return 1; + } else if (seed < 10) { + const bounded = (addCounter ? x + 1 : x) % bound; + return bounded === index ? 0 : 1; + } + return prng.uniformInt(2); + }), + ]); } /** @@ -263,4 +263,4 @@ fn main( ); }); -g.test('fragment').unimplemented() +g.test('fragment').unimplemented(); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts index 5b01b1593e46..4b915979afe8 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts @@ -10,8 +10,6 @@ local_invocation_index. Tests should avoid assuming there is. import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; import { keysOf } from '../../../../../../common/util/data_tables.js'; import { iterRange } from '../../../../../../common/util/util.js'; -import { kTextureFormatInfo } from '../../../../../format_info.js'; -import { align } from '../../../../../util/math.js'; import { PRNG } from '../../../../../util/prng.js'; import { @@ -19,7 +17,7 @@ import { kPredicateCases, SubgroupTest, runComputeTest, - kDataSentinel + kDataSentinel, } from './subgroup_util.js'; export const g = makeTestGroup(SubgroupTest); @@ -45,22 +43,24 @@ function generateInputData(seed: number, num: number, addCounter: boolean): Uint const index = prng.uniformInt(bound); //console.log(`bound = ${bound}, index = ${index}`); - return new Uint32Array([...iterRange(num, x => { - if (addCounter && x === 0) { - // Counter should start at 1 to avoid clear value. - return 1; - } + return new Uint32Array([ + ...iterRange(num, x => { + if (addCounter && x === 0) { + // Counter should start at 1 to avoid clear value. + return 1; + } - if (seed === 0) { - return 0; - } else if (seed === 1) { - return 1; - } else if (seed < 10) { - const bounded = (addCounter ? x + 1 : x) % bound; - return bounded === index ? 1 : 0; - } - return prng.uniformInt(2); - })]); + if (seed === 0) { + return 0; + } else if (seed === 1) { + return 1; + } else if (seed < 10) { + const bounded = (addCounter ? x + 1 : x) % bound; + return bounded === index ? 1 : 0; + } + return prng.uniformInt(2); + }), + ]); } /** diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts index c4000e4335ca..8749c136c3d2 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts @@ -1,9 +1,8 @@ import { assert, iterRange } from '../../../../../../common/util/util.js'; import { Float16Array } from '../../../../../../external/petamoriken/float16/float16.js'; -import { kTextureFormatInfo } from '../../../../../format_info.js'; import { GPUTest, TextureTestMixin } from '../../../../../gpu_test.js'; import { FPInterval } from '../../../../../util/floating_point.js'; -import { sparseScalarF16Range, sparseScalarF32Range, align } from '../../../../../util/math.js'; +import { sparseScalarF16Range, sparseScalarF32Range } from '../../../../../util/math.js'; import { PRNG } from '../../../../../util/prng.js'; export class SubgroupTest extends TextureTestMixin(GPUTest) {} From 833f9c580f4f06e0db3f8c3a7b986f41371cef7d Mon Sep 17 00:00:00 2001 From: Alan Baker Date: Fri, 30 Aug 2024 14:50:05 -0400 Subject: [PATCH 4/4] remove dead code --- .../shader/execution/expression/call/builtin/subgroupAll.spec.ts | 1 - .../shader/execution/expression/call/builtin/subgroupAny.spec.ts | 1 - 2 files changed, 2 deletions(-) diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts index 8aef5a681aaa..831c33d8ed1c 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts @@ -41,7 +41,6 @@ function generateInputData(seed: number, num: number, addCounter: boolean): Uint const bound = Math.min(num, 32); const index = prng.uniformInt(bound); - //console.log(`bound = ${bound}, index = ${index}`); return new Uint32Array([ ...iterRange(num, x => { diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts index 4b915979afe8..6418eb141dc5 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts @@ -41,7 +41,6 @@ function generateInputData(seed: number, num: number, addCounter: boolean): Uint const bound = Math.min(num, 32); const index = prng.uniformInt(bound); - //console.log(`bound = ${bound}, index = ${index}`); return new Uint32Array([ ...iterRange(num, x => {