From ab7820468064213c9220e07cbfbc72e87b8c1e17 Mon Sep 17 00:00:00 2001 From: Alan Baker Date: Tue, 10 Dec 2024 15:46:59 -0500 Subject: [PATCH 1/2] Execution tests for subgroupMin and subgroupMax --- src/webgpu/listing_meta.json | 5 + .../call/builtin/subgroupMinMax.spec.ts | 641 ++++++++++++++++++ 2 files changed, 646 insertions(+) create mode 100644 src/webgpu/shader/execution/expression/call/builtin/subgroupMinMax.spec.ts diff --git a/src/webgpu/listing_meta.json b/src/webgpu/listing_meta.json index b764febcc02e..5d8f5ba0c256 100644 --- a/src/webgpu/listing_meta.json +++ b/src/webgpu/listing_meta.json @@ -1577,6 +1577,11 @@ "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:data_types:*": { "subcaseMS": 252.374 }, "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:fragment:*": { "subcaseMS": 0.108 }, "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:workgroup_uniform_load:*": { "subcaseMS": 109.832 }, + "webgpu:shader,execution,expression,call,builtin,subgroupMinMax:compute,all_active:*": { "subcaseMS": 6123.068 }, + "webgpu:shader,execution,expression,call,builtin,subgroupMinMax:compute,split:*": { "subcaseMS": 4848.217 }, + "webgpu:shader,execution,expression,call,builtin,subgroupMinMax:data_types:*": { "subcaseMS": 579.073 }, + "webgpu:shader,execution,expression,call,builtin,subgroupMinMax:fp_accuracy:*": { "subcaseMS": 71390.771 }, + "webgpu:shader,execution,expression,call,builtin,subgroupMinMax:fragment:*": { "subcaseMS": 6858.504 }, "webgpu:shader,execution,expression,call,builtin,subgroupMul:compute,split:*": { "subcaseMS": 5034.263 }, "webgpu:shader,execution,expression,call,builtin,subgroupMul:data_types:*": { "subcaseMS": 11861.865 }, "webgpu:shader,execution,expression,call,builtin,subgroupMul:fp_accuracy:*": { "subcaseMS": 35606.717 }, diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupMinMax.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupMinMax.spec.ts new file mode 100644 index 000000000000..68e33ab2eb08 --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupMinMax.spec.ts @@ -0,0 +1,641 @@ +export const description = ` +Execution tests for subgroupMin and subgroupMax. + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { assert, iterRange } from '../../../../../../common/util/util.js'; +import { kValue } from '../../../../../util/constants.js'; +import { + kConcreteNumericScalarsAndVectors, + Type, + VectorType, +} from '../../../../../util/conversion.js'; +import { FP, FPInterval } from '../../../../../util/floating_point.js'; +import { PRNG } from '../../../../../util/prng.js'; + +import { + kNumCases, + kStride, + kWGSizes, + kPredicateCases, + runAccuracyTest, + runComputeTest, + generateTypedInputs, + getUintsPerFramebuffer, + kFramebufferSizes, + runFragmentTest, + SubgroupTest, +} from './subgroup_util.js'; + +export const g = makeTestGroup(SubgroupTest); + +const kDataTypes = objectsToRecord(kConcreteNumericScalarsAndVectors); + +type Op = 'subgroupMin' | 'subgroupMax'; + +const kOps: Op[] = ['subgroupMin', 'subgroupMax']; + +/** + * Returns an identity value for the given operation and type. + * + * This function should use positive or negative infinity for min and max + * identities respectively, but implementations may assume infinities are not + * present so max value is used instead. + * @param op Min or max + * @param type The type (f16 or f32) + */ +function identity(op: Op, type: string): number { + assert(type === 'f16' || type === 'f32'); + if (op === 'subgroupMin') { + return type === 'f16' ? kValue.f16.positive.max : kValue.f32.positive.max; + } else { + return type === 'f16' ? kValue.f16.negative.min : kValue.f32.negative.min; + } +} + +/** + * Returns the interval generator for the given operation and type. + * + * @param op Min or max + * @param type The type (f16 or f32) + */ +function interval( + op: Op, + type: string +): (x: number | FPInterval, y: number | FPInterval) => FPInterval { + assert(type === 'f16' || type === 'f32'); + if (op === 'subgroupMin') { + return type === 'f16' ? FP.f16.minInterval : FP.f32.minInterval; + } else { + return type === 'f16' ? FP.f16.maxInterval : FP.f32.maxInterval; + } +} + +g.test('fp_accuracy') + .desc( + `Tests the accuracy of floating-point addition. + +The order of operations is implementation defined, most threads are filled with +the identity value and two receive random values. +Subgroup sizes are not known ahead of time so some cases may not perform any +interesting operations. The test biases towards checking subgroup sizes under 64. +These tests only check two values in order to reuse more of the existing infrastructure +and limit the number of permutations needed to calculate the final result.` + ) + .params(u => + u + .combine('case', [...iterRange(kNumCases, x => x)]) + .combine('type', ['f32', 'f16'] as const) + .combine('op', kOps) + .combine('wgSize', [ + [kStride, 1, 1], + [kStride / 2, 2, 1], + ] as const) + ) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; + if (t.params.type === 'f16') { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(async t => { + await runAccuracyTest( + t, + t.params.case, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + t.params.op, + t.params.type, + identity(t.params.op, t.params.type), + interval(t.params.op, t.params.type) + ); + }); + +/** + * Checks the results of subgroupMin and subgroupMax for allowed data types. + * + * The shader performs a subgroup operation and equivalent single invocation + * operation and the results are compared. Since the subgroup operation is a + * reduction all invocations should have the same expected result. + * @param metadata The expected reduction results + * @param output The subgroup operation outputs + * @param type The data type + */ +function checkDataTypes(metadata: Uint32Array, output: Uint32Array, type: Type): Error | undefined { + if (type.requiresF16() && !(type instanceof VectorType)) { + const expected = metadata[0]; + const expectF16 = expected & 0xffff; + for (let i = 0; i < 4; i++) { + const index = Math.floor(i / 2); + const shift = i % 2 === 1; + let res = output[index]; + if (shift) { + res >>= 16; + } + res &= 0xffff; + + if (res !== expectF16) { + return new Error(`Invocation ${i}: incorrect results +- expected: ${expectF16.toString(16)} +- got: ${res.toString(16)}`); + } + } + } else { + let uints = 1; + if (type instanceof VectorType) { + uints = type.width === 3 ? 4 : type.width; + if (type.requiresF16()) { + uints = Math.floor(uints / 2); + } + } + for (let i = 0; i < 4; i++) { + for (let j = 0; j < uints; j++) { + const expect = metadata[j]; + const res = output[i * uints + j]; + if (res !== expect) { + return new Error(`${uints * i + j}: incorrect result +- expected: ${expect} +- got: ${res}`); + } + } + } + } + + return undefined; +} + +g.test('data_types') + .desc('Test allowed data types') + .params(u => + u + .combine('op', kOps) + .combine('type', keysOf(kDataTypes)) + .beginSubcases() + .combine('idx1', [0, 1, 2, 3] as const) + .combine('idx2', [0, 1, 2, 3] as const) + .combine('idx1Id', [0, 1, 2, 3] as const) + ) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; + const type = kDataTypes[t.params.type]; + if (type.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(async t => { + const wgSize = [4, 1, 1]; + const type = kDataTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} + +@group(0) @binding(0) +var input : array<${type.toString()}>; + +@group(0) @binding(1) +var output : array<${type.toString()}>; + +@group(0) @binding(2) +var metadata : array<${type.toString()}>; + +@compute @workgroup_size(${wgSize[0]}, ${wgSize[1]}, ${wgSize[2]}) +fn main( + @builtin(subgroup_invocation_id) id : u32, +) { + let value = select(input[${t.params.idx2}], input[${t.params.idx1}], id == ${t.params.idx1Id}); + output[id] = ${t.params.op}(value); + + if (id == 0) { + metadata[0] = ${t.params.op === 'subgroupMin' ? 'min' : 'max'}(input[${t.params.idx1}], input[${ + t.params.idx2 + }]); + } +}`; + + const inputData = generateTypedInputs(type); + let uintsPerOutput = 1; + if (type instanceof VectorType) { + uintsPerOutput = type.width === 3 ? 4 : type.width; + if (type.requiresF16()) { + uintsPerOutput = Math.floor(uintsPerOutput / 2); + } + } + await runComputeTest( + t, + wgsl, + wgSize, + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkDataTypes(metadata, output, type); + } + ); + }); + +/** + * Returns a Uint32Array of randomized integers in the range [0, 2**30) + * + * @param seed The PRNG seed + * @param num The number of integers to generate + */ +function generateInputData(seed: number, num: number): Uint32Array { + const prng = new PRNG(seed); + return new Uint32Array([ + ...iterRange(num, x => { + return prng.uniformInt(1 << 30); + }), + ]); +} + +/** + * Checks results from compute shaders + * + * @param metadata An array of uint32s containing: + * * subgroup_invocation_id + * * generated unique subgroup id + * @param output An array of uint32s containing: + * * subgroup operation results + * * subgroup_size + * @param input An array of uint32s used as input data + * @param numInvs The number of invocations + * @param op The subgroup operation + * @param filter A functor for filtering active invocations + */ +function checkCompute( + metadata: Uint32Array, + output: Uint32Array, + input: Uint32Array, + numInvs: number, + op: Op, + filter: (id: number, size: number) => boolean +): Error | undefined { + const identity = op === 'subgroupMin' ? 0x7fffffff : 0; + const func = op === 'subgroupMin' ? Math.min : Math.max; + const expected = new Map(); + for (let i = 0; i < numInvs; i++) { + const id = metadata[i]; + const subgroup_id = metadata[numInvs + i]; + const size = output[numInvs + i]; + if (!filter(id, size)) { + continue; + } + + let e = expected.get(subgroup_id) ?? identity; + e = func(e, input[i]); + expected.set(subgroup_id, e); + } + + for (let i = 0; i < numInvs; i++) { + const id = metadata[i]; + const subgroup_id = metadata[numInvs + i]; + const size = output[numInvs + i]; + if (!filter(id, size)) { + continue; + } + + const res = output[i]; + const e = expected.get(subgroup_id) ?? identity; + if (res !== e) { + return new Error(`Invocation ${i}: incorrect result +- expected: ${e} +- got: ${res}`); + } + } + + return undefined; +} + +const kNumRandomCases = 15; + +g.test('compute,all_active') + .desc( + 'Test subgroupMin/Max in compute shader with all active invocations and varied workgroup sizes' + ) + .params(u => + u + .combine('op', kOps) + .combine('wgSize', kWGSizes) + .beginSubcases() + .combine('case', [...iterRange(kNumRandomCases, x => x)] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var input : array; + +struct Output { + res : array, + size : array +} + +@group(0) @binding(1) +var output : Output; + +struct Metadata { + id : array, + subgroup_id : array +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.id[lid] = id; + metadata.subgroup_id[lid] = subgroupBroadcastFirst(lid + 1); // avoid 0 + + output.size[lid] = subgroupSize; + output.res[lid] = ${t.params.op}(input[lid]); +}`; + + const inputData = generateInputData(t.params.case, wgThreads); + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkCompute( + metadata, + output, + inputData, + wgThreads, + t.params.op, + (id: number, size: number) => { + return true; + } + ); + } + ); + }); + +g.test('compute,split') + .desc('Test that only active invocations participate') + .params(u => + u + .combine('op', kOps) + .combine('predicate', keysOf(kPredicateCases)) + .beginSubcases() + .combine('wgSize', kWGSizes) + .combine('case', [...iterRange(kNumRandomCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kPredicateCases[t.params.predicate]; + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +diagnostic(off, subgroup_uniformity); + +@group(0) @binding(0) +var input : array; + +struct Output { + res : array, + size : array +} + +@group(0) @binding(1) +var output : Output; + +struct Metadata { + id : array, + subgroup_id : array +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.id[lid] = id; + metadata.subgroup_id[lid] = subgroupBroadcastFirst(lid + 1); // avoid 0 + + output.size[lid] = subgroupSize; + if ${testcase.cond} { + output.res[lid] = ${t.params.op}(input[lid]); + } else { + return; + } +}`; + + const inputData = generateInputData(t.params.case, wgThreads); + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkCompute(metadata, output, inputData, wgThreads, t.params.op, testcase.filter); + } + ); + }); + +/** + * Checks min/max ops results from a fragment shader. + * + * Avoids subgroups in last row or column to skip potential helper invocations. + * @param data Framebuffer output + * * component 0 is result + * * component 1 is generated subgroup id + * @param input An array of input data + * @param op The subgroup operation + * @param format The framebuffer format + * @param width Framebuffer width + * @param height Framebuffer height + */ +function checkFragment( + data: Uint32Array, + input: Uint32Array, + op: Op, + format: GPUTextureFormat, + width: number, + height: number +): Error | undefined { + const { uintsPerRow, uintsPerTexel } = getUintsPerFramebuffer(format, width, height); + + // Determine if the subgroup should be included in the checks. + const inBounds = new Map(); + for (let row = 0; row < height; row++) { + for (let col = 0; col < width; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const subgroup_id = data[offset + 1]; + if (subgroup_id === 0) { + return new Error(`Internal error: helper invocation at (${col}, ${row})`); + } + + let ok = inBounds.get(subgroup_id) ?? true; + ok = ok && row !== height - 1 && col !== width - 1; + inBounds.set(subgroup_id, ok); + } + } + + let anyInBounds = false; + for (const [_, value] of inBounds) { + const ok = Boolean(value); + anyInBounds = anyInBounds || ok; + } + if (!anyInBounds) { + // This variant would not reliably test behavior. + return undefined; + } + + const identity = op === 'subgroupMin' ? 0x7fffffff : 0; + + // Iteration skips subgroups in the last row or column to avoid helper + // invocations because it is not guaranteed whether or not they participate + // in the subgroup operation. + const expected = new Map(); + for (let row = 0; row < height; row++) { + for (let col = 0; col < width; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + return new Error(`Internal error: helper invocation at (${col}, ${row})`); + } + + const subgroupInBounds = inBounds.get(subgroup_id) ?? true; + if (!subgroupInBounds) { + continue; + } + + const func = op === 'subgroupMin' ? Math.min : Math.max; + let v = expected.get(subgroup_id) ?? identity; + v = func(v, input[row * width + col]); + expected.set(subgroup_id, v); + } + } + + for (let row = 0; row < height; row++) { + for (let col = 0; col < width; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const res = data[offset]; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + // Inactive in the fragment. + continue; + } + + const subgroupInBounds = inBounds.get(subgroup_id) ?? true; + if (!subgroupInBounds) { + continue; + } + + const expected_v = expected.get(subgroup_id) ?? identity; + if (expected_v !== res) { + return new Error(`Row ${row}, col ${col}: incorrect results: +- expected: ${expected_v} +- got: ${res}`); + } + } + } + + return undefined; +} + +g.test('fragment') + .desc('Test subgroupMin/Max in fragment shaders') + .params(u => + u + .combine('size', kFramebufferSizes) + .combine('op', kOps) + .beginSubcases() + .combine('case', [...iterRange(kNumRandomCases, x => x)]) + .combineWithParams([{ format: 'rg32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const numInputs = t.params.size[0] * t.params.size[1]; + + interface SubgroupProperties extends GPUAdapterInfo { + subgroupMinSize: number; + } + const { subgroupMinSize } = t.device.adapterInfo as SubgroupProperties; + const innerTexels = (t.params.size[0] - 1) * (t.params.size[1] - 1); + t.skipIf(innerTexels < subgroupMinSize, 'Too few texels to be reliable'); + + const inputData = generateInputData(t.params.case, numInputs); + + const identity = t.params.op === 'subgroupMin' ? 0x7fffffff : 0; + const fsShader = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@fragment +fn main( + @builtin(position) pos : vec4f, +) -> @location(0) vec2u { + // Generate a subgroup id based on linearized position, avoid 0. + let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]}; + let subgroup_id = subgroupBroadcastFirst(linear + 1); + + // Filter out possible helper invocations. + let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1); + let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1); + let in_range = x_in_range && y_in_range; + let input = select(${identity}, inputs[linear], in_range); + + let res = ${t.params.op}(input); + return vec2u(res, subgroup_id); +}`; + + await runFragmentTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + inputData, + (data: Uint32Array) => { + return checkFragment( + data, + inputData, + t.params.op, + t.params.format, + t.params.size[0], + t.params.size[1] + ); + } + ); + }); From 23b6fe658d3ecd4dace461288b81ee0431beb25e Mon Sep 17 00:00:00 2001 From: Alan Baker Date: Tue, 10 Dec 2024 23:15:31 -0500 Subject: [PATCH 2/2] remove subgroups_f16 --- .../execution/expression/call/builtin/subgroupMinMax.spec.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupMinMax.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupMinMax.spec.ts index 68e33ab2eb08..f070632488c0 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupMinMax.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupMinMax.spec.ts @@ -102,7 +102,6 @@ and limit the number of permutations needed to calculate the final result.` const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; if (t.params.type === 'f16') { features.push('shader-f16'); - features.push('subgroups-f16' as GPUFeatureName); } t.selectDeviceOrSkipTestCase(features); }) @@ -186,7 +185,6 @@ g.test('data_types') const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; const type = kDataTypes[t.params.type]; if (type.requiresF16()) { - features.push('subgroups-f16' as GPUFeatureName); features.push('shader-f16' as GPUFeatureName); } t.selectDeviceOrSkipTestCase(features); @@ -196,7 +194,7 @@ g.test('data_types') const type = kDataTypes[t.params.type]; let enables = `enable subgroups;\n`; if (type.requiresF16()) { - enables += `enable f16;\nenable subgroups_f16;`; + enables += `enable f16;`; } const wgsl = ` ${enables}