From 568f84bc1aee9fd5cade3ca677045a2954904ba6 Mon Sep 17 00:00:00 2001 From: Alan Baker Date: Fri, 30 Aug 2024 14:40:55 -0400 Subject: [PATCH] Changes for review * Remove fragment tests for now * Add helpers to generate input data and add new class of cases * Increase number of cases to 15 per variants * Export data sentinel value --- .../call/builtin/subgroupAll.spec.ts | 225 ++++-------------- .../call/builtin/subgroupAny.spec.ts | 225 ++++-------------- .../expression/call/builtin/subgroup_util.ts | 131 +--------- 3 files changed, 94 insertions(+), 487 deletions(-) diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts index 71de510d1e8f..b81c6fb003af 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts @@ -18,14 +18,50 @@ import { kWGSizes, kPredicateCases, SubgroupTest, - kFramebufferSizes, runComputeTest, - runFragmentTest, + kDataSentinel } from './subgroup_util.js'; export const g = makeTestGroup(SubgroupTest); -const kNumCases = 10; +const kNumCases = 15; + +/** + * Generate input data for testing. + * + * Data is generated in the following categories: + * Seed 0 generates all 0 data + * Seed 1 generates all 1 data + * Seeds 2-9 generates all 1s except for a zero randomly once per 32 elements + * Seeds 10+ generate all random data + * @param seed The seed for the PRNG + * @param num The number of data items to generate + * @param addCounter If true, treats the first index as an atomic counter + */ +function generateInputData(seed: number, num: number, addCounter: boolean): Uint32Array { + const prng = new PRNG(seed); + + const bound = Math.min(num, 32); + const index = prng.uniformInt(bound); + //console.log(`bound = ${bound}, index = ${index}`); + + return new Uint32Array([...iterRange(num, x => { + if (addCounter && x === 0) { + // Counter should start at 1 to avoid clear value. + return 1; + } + + if (seed === 0) { + return 0; + } else if (seed === 1) { + return 1; + } else if (seed < 10) { + const bounded = (addCounter ? x + 1 : x) % bound; + return bounded === index ? 0 : 1; + } + return prng.uniformInt(2); + })]); +} /** * Checks the result of a subgroupAll operation @@ -80,7 +116,7 @@ function checkAll( - got: ${res}`); } } else { - if (res !== 999) { + if (res !== kDataSentinel) { return new Error(`Invocation ${inv} unexpected write: - subgroup invocation id: ${id} - subgroup size: ${size}`); @@ -139,20 +175,8 @@ fn main( outputs[lid] = res; }`; - const prng = new PRNG(t.params.case); - // Case 0 is all 0s. - // Case 1 is all 1s. - // Other cases are filled with random 0s and 1s. - const inputData = new Uint32Array([ - ...iterRange(wgThreads, x => { - if (t.params.case === 0) { - return 0; - } else if (t.params.case === 1) { - return 1; - } - return prng.uniformInt(2); - }), - ]); + const includeCounter = false; + const inputData = generateInputData(t.params.case, wgThreads, includeCounter); const uintsPerOutput = 2; await runComputeTest( @@ -223,20 +247,8 @@ fn main( } }`; - const prng = new PRNG(t.params.case); - // Case 0 is all 0s. - // Case 1 is all 1s. - // Other cases are filled with random 0s and 1s. - const inputData = new Uint32Array([ - ...iterRange(wgThreads, x => { - if (t.params.case === 0) { - return 0; - } else if (t.params.case === 1) { - return 1; - } - return prng.uniformInt(2); - }), - ]); + const includeCounter = false; + const inputData = generateInputData(t.params.case, wgThreads, includeCounter); const uintsPerOutput = 2; await runComputeTest( @@ -251,151 +263,4 @@ fn main( ); }); -/** - * Checks subgroupAll results from a fragment shader. - * - * @param data Framebuffer output - * * component 0 is result - * * component 1 is generated subgroup id - * @param input An array of input data offset by 1 uint - * @param format The framebuffer format - * @param width Framebuffer width - * @param height Framebuffer height - */ -function checkFragmentAll( - data: Uint32Array, - input: Uint32Array, - format: GPUTextureFormat, - width: number, - height: number -): Error | undefined { - const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; - const blocksPerRow = width / blockWidth; - // 256 minimum comes from image copy requirements. - const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); - const uintsPerRow = bytesPerRow / 4; - const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; - - const expected = new Map(); - for (let row = 0; row < height; row++) { - for (let col = 0; col < width; col++) { - const offset = uintsPerRow * row + col * uintsPerTexel; - const subgroup_id = data[offset + 1]; - - if (subgroup_id === 0) { - return new Error(`Internal error: helper invocation at (${col}, ${row})`); - } - - let v = expected.get(subgroup_id) ?? 1; - // First index of input is an atomic counter. - v &= input[1 + row * width + col]; - expected.set(subgroup_id, v); - } - } - - for (let row = 0; row < height; row++) { - for (let col = 0; col < width; col++) { - const offset = uintsPerRow * row + col * uintsPerTexel; - const res = data[offset]; - const subgroup_id = data[offset + 1]; - - if (subgroup_id === 0) { - // Inactive in the fragment. - continue; - } - - const expected_v = expected.get(subgroup_id) ?? 0; - if (expected_v !== res) { - return new Error(`Row ${row}, col ${col}: incorrect results: -- expected: ${expected_v} -- got: ${res}`); - } - } - } - - return undefined; -} - -g.test('fragment') - .desc('Tests subgroupAll in fragment shaders') - .params(u => - u - .combine('size', kFramebufferSizes) - .beginSubcases() - .combine('case', [...iterRange(kNumCases, x => x)]) - .combineWithParams([{ format: 'rg32uint' }] as const) - ) - .beforeAllSubcases(t => { - t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); - }) - .fn(async t => { - const prng = new PRNG(t.params.case); - // Case 0 is all 0s. - // Case 1 is all 1s. - // Other cases are filled with random 0s and 1s. - // - // Note: the first index is used as an atomic counter for subgroup ids. - const numInputs = t.params.size[0] * t.params.size[1] + 1; - const inputData = new Uint32Array([ - ...iterRange(numInputs, x => { - if (x === 0) { - // All subgroup ids start from index 1. - return 1; - } else if (t.params.case === 0) { - return 0; - } else if (t.params.case === 1) { - return 1; - } - return prng.uniformInt(2); - }), - ]); - - const fsShader = ` -enable subgroups; - -struct Inputs { - subgroup_id : atomic, - data : array, -} - -@group(0) @binding(0) -var inputs : Inputs; - -@fragment -fn main( - @builtin(position) pos : vec4f, -) -> @location(0) vec2u { - var subgroup_id = 0u; - if subgroupElect() { - subgroup_id = atomicAdd(&inputs.subgroup_id, 1); - } - subgroup_id = subgroupBroadcastFirst(subgroup_id); - - // Filter out texels outside the frame (possible helper invocations). - var input = 1u; - if (u32(pos.x) >= 0 && u32(pos.x) < ${t.params.size[0]} && - u32(pos.y) >= 0 && u32(pos.y) < ${t.params.size[1]}) { - input = inputs.data[u32(pos.y) * ${t.params.size[0]} + u32(pos.x)]; - } - let res = select(0u, 1u, subgroupAll(bool(input))); - return vec2u(res, subgroup_id); -}`; - - await runFragmentTest( - t, - t.params.format, - fsShader, - t.params.size[0], - t.params.size[1], - inputData, - (data: Uint32Array) => { - return checkFragmentAll( - data, - inputData, - t.params.format, - t.params.size[0], - t.params.size[1] - ); - } - ); - }); +g.test('fragment').unimplemented() diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts index 6bb2df6b3d75..5b01b1593e46 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts @@ -19,13 +19,49 @@ import { kPredicateCases, SubgroupTest, runComputeTest, - runFragmentTest, - kFramebufferSizes, + kDataSentinel } from './subgroup_util.js'; export const g = makeTestGroup(SubgroupTest); -const kNumCases = 10; +const kNumCases = 15; + +/** + * Generate input data for testing. + * + * Data is generated in the following categories: + * Seed 0 generates all 0 data + * Seed 1 generates all 1 data + * Seeds 2-9 generates all 0s except for a one randomly once per 32 elements + * Seeds 10+ generate all random data + * @param seed The seed for the PRNG + * @param num The number of data items to generate + * @param addCounter If true, treats the first index as an atomic counter + */ +function generateInputData(seed: number, num: number, addCounter: boolean): Uint32Array { + const prng = new PRNG(seed); + + const bound = Math.min(num, 32); + const index = prng.uniformInt(bound); + //console.log(`bound = ${bound}, index = ${index}`); + + return new Uint32Array([...iterRange(num, x => { + if (addCounter && x === 0) { + // Counter should start at 1 to avoid clear value. + return 1; + } + + if (seed === 0) { + return 0; + } else if (seed === 1) { + return 1; + } else if (seed < 10) { + const bounded = (addCounter ? x + 1 : x) % bound; + return bounded === index ? 1 : 0; + } + return prng.uniformInt(2); + })]); +} /** * Checks the result of a subgroupAny operation @@ -80,7 +116,7 @@ function checkAny( - got: ${res}`); } } else { - if (res !== 999) { + if (res !== kDataSentinel) { return new Error(`Invocation ${inv} unexpected write: - subgroup invocation id: ${id} - subgroup size: ${size}`); @@ -139,20 +175,8 @@ fn main( outputs[lid] = res; }`; - const prng = new PRNG(t.params.case); - // Case 0 is all 0s. - // Case 1 is all 1s. - // Other cases are filled with random 0s and 1s. - const inputData = new Uint32Array([ - ...iterRange(wgThreads, x => { - if (t.params.case === 0) { - return 0; - } else if (t.params.case === 1) { - return 1; - } - return prng.uniformInt(2); - }), - ]); + const includeCounter = false; + const inputData = generateInputData(t.params.case, wgThreads, includeCounter); const uintsPerOutput = 2; await runComputeTest( @@ -223,20 +247,8 @@ fn main( } }`; - const prng = new PRNG(t.params.case); - // Case 0 is all 0s. - // Case 1 is all 1s. - // Other cases are filled with random 0s and 1s. - const inputData = new Uint32Array([ - ...iterRange(wgThreads, x => { - if (t.params.case === 0) { - return 0; - } else if (t.params.case === 1) { - return 1; - } - return prng.uniformInt(2); - }), - ]); + const includeCounter = false; + const inputData = generateInputData(t.params.case, wgThreads, includeCounter); const uintsPerOutput = 2; await runComputeTest( @@ -251,151 +263,4 @@ fn main( ); }); -/** - * Checks subgroupAny results from a fragment shader. - * - * @param data Framebuffer output - * * component 0 is result - * * component 1 is generated subgroup id - * @param input An array of input data offset by 1 uint - * @param format The framebuffer format - * @param width Framebuffer width - * @param height Framebuffer height - */ -function checkFragmentAny( - data: Uint32Array, - input: Uint32Array, - format: GPUTextureFormat, - width: number, - height: number -): Error | undefined { - const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; - const blocksPerRow = width / blockWidth; - // 256 minimum comes from image copy requirements. - const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); - const uintsPerRow = bytesPerRow / 4; - const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; - - const expected = new Map(); - for (let row = 0; row < height; row++) { - for (let col = 0; col < width; col++) { - const offset = uintsPerRow * row + col * uintsPerTexel; - const subgroup_id = data[offset + 1]; - - if (subgroup_id === 0) { - return new Error(`Internal error: helper invocation at (${col}, ${row})`); - } - - let v = expected.get(subgroup_id) ?? 0; - // First index of input is an atomic counter. - v |= input[1 + row * width + col]; - expected.set(subgroup_id, v); - } - } - - for (let row = 0; row < height; row++) { - for (let col = 0; col < width; col++) { - const offset = uintsPerRow * row + col * uintsPerTexel; - const res = data[offset]; - const subgroup_id = data[offset + 1]; - - if (subgroup_id === 0) { - // Inactive in the fragment. - continue; - } - - const expected_v = expected.get(subgroup_id) ?? 0; - if (expected_v !== res) { - return new Error(`Row ${row}, col ${col}: incorrect results: -- expected: ${expected_v} -- got: ${res}`); - } - } - } - - return undefined; -} - -g.test('fragment') - .desc('Tests subgroupAny in fragment shaders') - .params(u => - u - .combine('size', kFramebufferSizes) - .beginSubcases() - .combine('case', [...iterRange(kNumCases, x => x)]) - .combineWithParams([{ format: 'rg32uint' }] as const) - ) - .beforeAllSubcases(t => { - t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); - }) - .fn(async t => { - const prng = new PRNG(t.params.case); - // Case 0 is all 0s. - // Case 1 is all 1s. - // Other cases are filled with random 0s and 1s. - // - // Note: the first index is used as an atomic counter for subgroup ids. - const numInputs = t.params.size[0] * t.params.size[1] + 1; - const inputData = new Uint32Array([ - ...iterRange(numInputs, x => { - if (x === 0) { - // All subgroup ids start from index 1. - return 1; - } else if (t.params.case === 0) { - return 0; - } else if (t.params.case === 1) { - return 1; - } - return prng.uniformInt(2); - }), - ]); - - const fsShader = ` -enable subgroups; - -struct Inputs { - subgroup_id : atomic, - data : array, -} - -@group(0) @binding(0) -var inputs : Inputs; - -@fragment -fn main( - @builtin(position) pos : vec4f, -) -> @location(0) vec2u { - var subgroup_id = 0u; - if subgroupElect() { - subgroup_id = atomicAdd(&inputs.subgroup_id, 1); - } - subgroup_id = subgroupBroadcastFirst(subgroup_id); - - // Filter out texels outside the frame (possible helper invocations). - var input = 0u; - if (u32(pos.x) >= 0 && u32(pos.x) < ${t.params.size[0]} && - u32(pos.y) >= 0 && u32(pos.y) < ${t.params.size[1]}) { - input = inputs.data[u32(pos.y) * ${t.params.size[0]} + u32(pos.x)]; - } - let res = select(0u, 1u, subgroupAny(bool(input))); - return vec2u(res, subgroup_id); -}`; - - await runFragmentTest( - t, - t.params.format, - fsShader, - t.params.size[0], - t.params.size[1], - inputData, - (data: Uint32Array) => { - return checkFragmentAny( - data, - inputData, - t.params.format, - t.params.size[0], - t.params.size[1] - ); - } - ); - }); +g.test('fragment').unimplemented(); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts index 4544f624ca8f..c4000e4335ca 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts @@ -303,6 +303,8 @@ fn main( t.expectOK(checkAccuracy(metadata, output, [idx1, idx2], [val1, val2], identity, intervalGen)); } +export const kDataSentinel = 999; + /** * Runs compute shader subgroup test * @@ -349,14 +351,14 @@ export async function runComputeTest( const outputUints = outputUintsPerElement * wgThreads; const outputBuffer = t.makeBufferWithContents( - new Uint32Array([...iterRange(outputUints, x => 999)]), + new Uint32Array([...iterRange(outputUints, x => kDataSentinel)]), GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE ); t.trackForCleanup(outputBuffer); const numMetadata = 2 * wgThreads; const metadataBuffer = t.makeBufferWithContents( - new Uint32Array([...iterRange(numMetadata, x => 999)]), + new Uint32Array([...iterRange(numMetadata, x => kDataSentinel)]), GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE ); @@ -418,128 +420,3 @@ export async function runComputeTest( t.expectOK(checkFunction(metadata, output)); } - -export const kFramebufferSizes = [ - [15, 15], - [16, 16], - [17, 17], - [19, 13], - [13, 10], - [111, 2], - [2, 111], - [35, 2], - [2, 35], - [53, 13], - [13, 53], -] as const; - -/** - * Runs a subgroup builtin test for fragment shaders - * - * This test draws a full screen triangle. - * @param t The base test - * @param format The framebuffer format - * @param fsShader The fragment shader with the following interface: - * Location 0 output is framebuffer with format - * Group 0 binding 0 is input data - * @param width The framebuffer width - * @param height The framebuffer height - * @param inputData The input data - * @param checker A functor to check the framebuffer values - */ -export async function runFragmentTest( - t: SubgroupTest, - format: GPUTextureFormat, - fsShader: string, - width: number, - height: number, - inputData: Uint32Array | Float32Array | Float16Array, - checker: (data: Uint32Array) => Error | undefined -) { - const vsShader = ` -@vertex -fn vsMain(@builtin(vertex_index) index : u32) -> @builtin(position) vec4f { - const vertices = array( - vec2(-2, 4), vec2(-2, -4), vec2(2, 0), - ); - return vec4f(vec2f(vertices[index]), 0, 1); -}`; - - const pipeline = t.device.createRenderPipeline({ - layout: 'auto', - vertex: { - module: t.device.createShaderModule({ code: vsShader }), - }, - fragment: { - module: t.device.createShaderModule({ code: fsShader }), - targets: [{ format }], - }, - primitive: { - topology: 'triangle-list', - }, - }); - - const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; - assert(bytesPerBlock !== undefined); - - const blocksPerRow = width / blockWidth; - const blocksPerColumn = height / blockHeight; - // 256 minimum arises from image copy requirements. - const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); - const byteLength = bytesPerRow * blocksPerColumn; - const uintLength = byteLength / 4; - - const buffer = t.makeBufferWithContents( - inputData, - GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST - ); - - const bg = t.device.createBindGroup({ - layout: pipeline.getBindGroupLayout(0), - entries: [ - { - binding: 0, - resource: { - buffer, - }, - }, - ], - }); - - const framebuffer = t.createTextureTracked({ - size: [width, height], - usage: - GPUTextureUsage.COPY_SRC | - GPUTextureUsage.COPY_DST | - GPUTextureUsage.RENDER_ATTACHMENT | - GPUTextureUsage.TEXTURE_BINDING, - format, - }); - - const encoder = t.device.createCommandEncoder(); - const pass = encoder.beginRenderPass({ - colorAttachments: [ - { - view: framebuffer.createView(), - loadOp: 'clear', - storeOp: 'store', - }, - ], - }); - pass.setPipeline(pipeline); - pass.setBindGroup(0, bg); - pass.draw(3); - pass.end(); - t.queue.submit([encoder.finish()]); - - const copyBuffer = t.copyWholeTextureToNewBufferSimple(framebuffer, 0); - const readback = await t.readGPUBufferRangeTyped(copyBuffer, { - srcByteOffset: 0, - type: Uint32Array, - typedLength: uintLength, - method: 'copy', - }); - const data: Uint32Array = readback.data; - - t.expectOK(checker(data)); -}