diff --git a/src/webgpu/listing_meta.json b/src/webgpu/listing_meta.json index a45fa21e1ef8..81f2449c9e7c 100644 --- a/src/webgpu/listing_meta.json +++ b/src/webgpu/listing_meta.json @@ -1575,6 +1575,13 @@ "webgpu:shader,execution,expression,call,builtin,subgroupMul:data_types:*": { "subcaseMS": 11861.865 }, "webgpu:shader,execution,expression,call,builtin,subgroupMul:fp_accuracy:*": { "subcaseMS": 35606.717 }, "webgpu:shader,execution,expression,call,builtin,subgroupMul:fragment:*": { "subcaseMS": 0.263 }, + "webgpu:shader,execution,expression,call,builtin,subgroupShuffle:compute,all_active:*": { "subcaseMS": 39.191 }, + "webgpu:shader,execution,expression,call,builtin,subgroupShuffle:compute,split:*": { "subcaseMS": 3074.451 }, + "webgpu:shader,execution,expression,call,builtin,subgroupShuffle:data_types:*": { "subcaseMS": 5767.334 }, + "webgpu:shader,execution,expression,call,builtin,subgroupShuffle:fragment:*": { "subcaseMS": 49.537 }, + "webgpu:shader,execution,expression,call,builtin,subgroupShuffle:shuffle,id:*": { "subcaseMS": 924.078 }, + "webgpu:shader,execution,expression,call,builtin,subgroupShuffle:shuffleUpDown,delta:*": { "subcaseMS": 81.870 }, + "webgpu:shader,execution,expression,call,builtin,subgroupShuffle:shuffleXor,mask:*": { "subcaseMS": 62.127 }, "webgpu:shader,execution,expression,call,builtin,tan:abstract_float:*": { "subcaseMS": 17043.428 }, "webgpu:shader,execution,expression,call,builtin,tan:f16:*": { "subcaseMS": 116.157 }, "webgpu:shader,execution,expression,call,builtin,tan:f32:*": { "subcaseMS": 13.532 }, diff --git a/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts index 691bc5e8a30e..521ae2925354 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/quadBroadcast.spec.ts @@ -9,16 +9,12 @@ local_invocation_index. Tests should avoid assuming there is. import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; -import { assert, unreachable } from '../../../../../../common/util/util.js'; -import { kTextureFormatInfo } from '../../../../../format_info.js'; -import { kBit } from '../../../../../util/constants.js'; +import { assert } from '../../../../../../common/util/util.js'; import { kConcreteNumericScalarsAndVectors, Type, VectorType, - scalarTypeOf, } from '../../../../../util/conversion.js'; -import { align } from '../../../../../util/math.js'; import { kWGSizes, @@ -28,120 +24,14 @@ import { SubgroupTest, kFramebufferSizes, runFragmentTest, + generateTypedInputs, + getUintsPerFramebuffer, } from './subgroup_util.js'; export const g = makeTestGroup(SubgroupTest); const kTypes = objectsToRecord(kConcreteNumericScalarsAndVectors); -/** - * Generates scalar values for type - * - * Generates 4 32-bit values whose bit patterns represent - * interesting values of the data type. - * @param type The data type - */ -function generateScalarValues(type: Type): number[] { - const scalarTy = scalarTypeOf(type); - switch (scalarTy) { - case Type.u32: - return [kBit.u32.min, kBit.u32.max, 1111, 2222]; - case Type.i32: - return [ - kBit.i32.positive.min, - kBit.i32.positive.max, - kBit.i32.negative.min, - 0xffffffff, // -1 - ]; - case Type.f32: - return [ - kBit.f32.positive.zero, - kBit.f32.positive.nearest_max, - kBit.f32.negative.nearest_min, - 0xbf800000, // -1 - ]; - case Type.f16: - return [ - kBit.f16.positive.zero, - kBit.f16.positive.nearest_max, - kBit.f16.negative.nearest_min, - 0xbc00, // -1 - ]; - default: - unreachable(`Unsupported type: ${type.toString()}`); - } - return [0, 0, 0, 0]; -} - -/** - * Generates input bit patterns for the input type - * - * Generates 4 values of type in a Uint32Array. - * 16-bit types are appropriately packed. - * @param type The data type - */ -function generateTypedInputs(type: Type): Uint32Array { - const scalarValues = generateScalarValues(type); - let elements = 1; - if (type instanceof VectorType) { - elements = type.width; - } - if (type.requiresF16()) { - switch (elements) { - case 1: - return new Uint32Array([ - scalarValues[0] | (scalarValues[1] << 16), - scalarValues[2] | (scalarValues[3] << 16), - ]); - case 2: - return new Uint32Array([ - scalarValues[0] | (scalarValues[0] << 16), - scalarValues[1] | (scalarValues[1] << 16), - scalarValues[2] | (scalarValues[2] << 16), - scalarValues[3] | (scalarValues[3] << 16), - ]); - case 3: - return new Uint32Array([ - scalarValues[0] | (scalarValues[0] << 16), - scalarValues[0] | (kDataSentinel << 16), - scalarValues[1] | (scalarValues[1] << 16), - scalarValues[1] | (kDataSentinel << 16), - scalarValues[2] | (scalarValues[2] << 16), - scalarValues[2] | (kDataSentinel << 16), - scalarValues[3] | (scalarValues[3] << 16), - scalarValues[3] | (kDataSentinel << 16), - ]); - case 4: - return new Uint32Array([ - scalarValues[0] | (scalarValues[0] << 16), - scalarValues[0] | (scalarValues[0] << 16), - scalarValues[1] | (scalarValues[1] << 16), - scalarValues[1] | (scalarValues[1] << 16), - scalarValues[2] | (scalarValues[2] << 16), - scalarValues[2] | (scalarValues[2] << 16), - scalarValues[3] | (scalarValues[3] << 16), - scalarValues[3] | (scalarValues[3] << 16), - ]); - default: - unreachable(`Unsupported type: ${type.toString()}`); - } - return new Uint32Array([0]); - } else { - const bound = elements === 3 ? 4 : elements; - const values: number[] = []; - for (let i = 0; i < 4; i++) { - for (let j = 0; j < bound; j++) { - if (j < elements) { - values.push(scalarValues[i]); - } else { - values.push(kDataSentinel); - } - } - } - return new Uint32Array(values); - } -} - /** * Checks results from data types test * @@ -528,12 +418,7 @@ function checkFragment( ); } - const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; - const blocksPerRow = width / blockWidth; - // 256 minimum comes from image copy requirements. - const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); - const uintsPerRow = bytesPerRow / 4; - const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + const { uintsPerRow, uintsPerTexel } = getUintsPerFramebuffer(format, width, height); const coordToIndex = (row: number, col: number) => { return uintsPerRow * row + col * uintsPerTexel; diff --git a/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts index 49489df0085c..0985b331b903 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/quadSwap.spec.ts @@ -10,15 +10,11 @@ local_invocation_index. Tests should avoid assuming there is. import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; import { assert, unreachable } from '../../../../../../common/util/util.js'; -import { kTextureFormatInfo } from '../../../../../format_info.js'; -import { kBit } from '../../../../../util/constants.js'; import { kConcreteNumericScalarsAndVectors, Type, VectorType, - scalarTypeOf, } from '../../../../../util/conversion.js'; -import { align } from '../../../../../util/math.js'; import { kWGSizes, @@ -28,6 +24,8 @@ import { SubgroupTest, kFramebufferSizes, runFragmentTest, + generateTypedInputs, + getUintsPerFramebuffer, } from './subgroup_util.js'; export const g = makeTestGroup(SubgroupTest); @@ -38,114 +36,6 @@ type SwapOp = 'quadSwapX' | 'quadSwapY' | 'quadSwapDiagonal'; const kOps: SwapOp[] = ['quadSwapX', 'quadSwapY', 'quadSwapDiagonal']; -/** - * Generates scalar values for type - * - * Generates 4 32-bit values whose bit patterns represent - * interesting values of the data type. - * @param type The data type - */ -function generateScalarValues(type: Type): number[] { - const scalarTy = scalarTypeOf(type); - switch (scalarTy) { - case Type.u32: - return [kBit.u32.min, kBit.u32.max, 1111, 2222]; - case Type.i32: - return [ - kBit.i32.positive.min, - kBit.i32.positive.max, - kBit.i32.negative.min, - 0xffffffff, // -1 - ]; - case Type.f32: - return [ - kBit.f32.positive.zero, - kBit.f32.positive.nearest_max, - kBit.f32.negative.nearest_min, - 0xbf800000, // -1 - ]; - case Type.f16: - return [ - kBit.f16.positive.zero, - kBit.f16.positive.nearest_max, - kBit.f16.negative.nearest_min, - 0xbc00, // -1 - ]; - default: - unreachable(`Unsupported type: ${type.toString()}`); - } - return [0, 0, 0, 0]; -} - -/** - * Generates input bit patterns for the input type - * - * Generates 4 values of type in a Uint32Array. - * 16-bit types are appropriately packed. - * @param type The data type - */ -function generateTypedInputs(type: Type): Uint32Array { - const scalarValues = generateScalarValues(type); - let elements = 1; - if (type instanceof VectorType) { - elements = type.width; - } - if (type.requiresF16()) { - switch (elements) { - case 1: - return new Uint32Array([ - scalarValues[0] | (scalarValues[1] << 16), - scalarValues[2] | (scalarValues[3] << 16), - ]); - case 2: - return new Uint32Array([ - scalarValues[0] | (scalarValues[0] << 16), - scalarValues[1] | (scalarValues[1] << 16), - scalarValues[2] | (scalarValues[2] << 16), - scalarValues[3] | (scalarValues[3] << 16), - ]); - case 3: - return new Uint32Array([ - scalarValues[0] | (scalarValues[0] << 16), - scalarValues[0] | (kDataSentinel << 16), - scalarValues[1] | (scalarValues[1] << 16), - scalarValues[1] | (kDataSentinel << 16), - scalarValues[2] | (scalarValues[2] << 16), - scalarValues[2] | (kDataSentinel << 16), - scalarValues[3] | (scalarValues[3] << 16), - scalarValues[3] | (kDataSentinel << 16), - ]); - case 4: - return new Uint32Array([ - scalarValues[0] | (scalarValues[0] << 16), - scalarValues[0] | (scalarValues[0] << 16), - scalarValues[1] | (scalarValues[1] << 16), - scalarValues[1] | (scalarValues[1] << 16), - scalarValues[2] | (scalarValues[2] << 16), - scalarValues[2] | (scalarValues[2] << 16), - scalarValues[3] | (scalarValues[3] << 16), - scalarValues[3] | (scalarValues[3] << 16), - ]); - default: - unreachable(`Unsupported type: ${type.toString()}`); - } - return new Uint32Array([0]); - } else { - const bound = elements === 3 ? 4 : elements; - const values: number[] = []; - for (let i = 0; i < 4; i++) { - for (let j = 0; j < bound; j++) { - if (j < elements) { - values.push(scalarValues[i]); - } else { - values.push(kDataSentinel); - } - } - } - return new Uint32Array(values); - } -} - /** * Returns the swapped quad invocation id for the given op * @@ -544,12 +434,7 @@ function checkFragment( ); } - const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; - const blocksPerRow = width / blockWidth; - // 256 minimum comes from image copy requirements. - const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); - const uintsPerRow = bytesPerRow / 4; - const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + const { uintsPerRow, uintsPerTexel } = getUintsPerFramebuffer(format, width, height); const coordToIndex = (row: number, col: number) => { return uintsPerRow * row + col * uintsPerTexel; diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts index 2b762053028e..5b8515c05762 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts @@ -10,8 +10,6 @@ local_invocation_index. Tests should avoid assuming there is. import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; import { keysOf } from '../../../../../../common/util/data_tables.js'; import { iterRange } from '../../../../../../common/util/util.js'; -import { kTextureFormatInfo } from '../../../../../format_info.js'; -import { align } from '../../../../../util/math.js'; import { PRNG } from '../../../../../util/prng.js'; import { @@ -22,6 +20,7 @@ import { kFramebufferSizes, runComputeTest, runFragmentTest, + getUintsPerFramebuffer, } from './subgroup_util.js'; export const g = makeTestGroup(SubgroupTest); @@ -279,12 +278,7 @@ function checkFragmentAll( width: number, height: number ): Error | undefined { - const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; - const blocksPerRow = width / blockWidth; - // 256 minimum comes from image copy requirements. - const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); - const uintsPerRow = bytesPerRow / 4; - const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + const { uintsPerRow, uintsPerTexel } = getUintsPerFramebuffer(format, width, height); // Iteration skips last row and column to avoid helper invocations because it is not // guaranteed whether or not they participate in the subgroup operation. diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts index 5254ade1739a..cad48235eceb 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts @@ -10,8 +10,6 @@ local_invocation_index. Tests should avoid assuming there is. import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; import { keysOf } from '../../../../../../common/util/data_tables.js'; import { iterRange } from '../../../../../../common/util/util.js'; -import { kTextureFormatInfo } from '../../../../../format_info.js'; -import { align } from '../../../../../util/math.js'; import { PRNG } from '../../../../../util/prng.js'; import { @@ -22,6 +20,7 @@ import { runComputeTest, runFragmentTest, kFramebufferSizes, + getUintsPerFramebuffer, } from './subgroup_util.js'; export const g = makeTestGroup(SubgroupTest); @@ -279,12 +278,7 @@ function checkFragmentAny( width: number, height: number ): Error | undefined { - const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; - const blocksPerRow = width / blockWidth; - // 256 minimum comes from image copy requirements. - const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); - const uintsPerRow = bytesPerRow / 4; - const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + const { uintsPerRow, uintsPerTexel } = getUintsPerFramebuffer(format, width, height); // Iteration skips last row and column to avoid helper invocations because it is not // guaranteed whether or not they participate in the subgroup operation. diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts index e95c486c3e59..dc46310937a0 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBitwise.spec.ts @@ -10,7 +10,6 @@ local_invocation_index. Tests should avoid assuming there is. import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; import { iterRange } from '../../../../../../common/util/util.js'; -import { kTextureFormatInfo } from '../../../../../format_info.js'; import { kConcreteSignedIntegerScalarsAndVectors, kConcreteUnsignedIntegerScalarsAndVectors, @@ -18,7 +17,6 @@ import { Type, VectorType, } from '../../../../../util/conversion.js'; -import { align } from '../../../../../util/math.js'; import { PRNG } from '../../../../../util/prng.js'; import { @@ -29,6 +27,7 @@ import { runComputeTest, runFragmentTest, kFramebufferSizes, + getUintsPerFramebuffer, } from './subgroup_util.js'; export const g = makeTestGroup(SubgroupTest); @@ -452,12 +451,7 @@ function checkBitwiseFragment( width: number, height: number ): Error | undefined { - const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; - const blocksPerRow = width / blockWidth; - // 256 minimum comes from image copy requirements. - const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); - const uintsPerRow = bytesPerRow / 4; - const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + const { uintsPerRow, uintsPerTexel } = getUintsPerFramebuffer(format, width, height); // Iteration skips last row and column to avoid helper invocations because it is not // guaranteed whether or not they participate in the subgroup operation. diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupShuffle.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupShuffle.spec.ts new file mode 100644 index 000000000000..3e0e17eefc19 --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupShuffle.spec.ts @@ -0,0 +1,938 @@ +export const description = ` +Execution tests for subgroupShuffle, subgroupShuffleUp, subgroupShuffleDown, and subgroupShuffleXor. + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { assert, iterRange } from '../../../../../../common/util/util.js'; +import { + kConcreteNumericScalarsAndVectors, + Type, + VectorType, +} from '../../../../../util/conversion.js'; +import { PRNG } from '../../../../../util/prng.js'; + +import { + kWGSizes, + kPredicateCases, + SubgroupTest, + runComputeTest, + runFragmentTest, + kFramebufferSizes, + generateTypedInputs, + getUintsPerFramebuffer, +} from './subgroup_util.js'; + +export const g = makeTestGroup(SubgroupTest); + +type ShuffleOp = + | 'subgroupShuffle' + | 'subgroupShuffleUp' + | 'subgroupShuffleDown' + | 'subgroupShuffleXor'; + +const kUpDownOps: ShuffleOp[] = ['subgroupShuffleUp', 'subgroupShuffleDown']; + +const kOps: ShuffleOp[] = ['subgroupShuffle', 'subgroupShuffleXor', ...kUpDownOps]; + +const kNumCases = 16; + +const kTypes = objectsToRecord(kConcreteNumericScalarsAndVectors); + +// This size is selected to guarantee a single subgroup. +const kSize = 4; +const kShuffleCases = { + no_shuffle: { + id: `id`, + expected: (input: Uint32Array, id: number) => { + return input[id]; + }, + }, + broadcast: { + id: `input[2]`, + expected: (input: Uint32Array, id: number) => { + return input[2]; + }, + }, + rotate_1_up: { + id: `select(id - 1, ${kSize} - 1, id == 0)`, + expected: (input: Uint32Array, id: number) => { + const idx = id === 0 ? kSize - 1 : id - 1; + return input[idx]; + }, + }, + rotate_2_down: { + id: `(id + 2) % ${kSize}`, + expected: (input: Uint32Array, id: number) => { + const idx = (id + 2) % kSize; + return input[idx]; + }, + }, + reversed: { + id: `${kSize} - id - 1`, + expected: (input: Uint32Array, id: number) => { + return input[kSize - id - 1]; + }, + }, + clamped: { + id: `clamp(id + 2, 1, 3)`, + expected: (input: Uint32Array, id: number) => { + const idx = Math.max(Math.min(id + 2, 3), 1); + return input[idx]; + }, + }, +}; + +function checkShuffleId( + metadata: Uint32Array, // unused + output: Uint32Array, + input: Uint32Array, + expected: (input: Uint32Array, id: number) => number +): Error | undefined { + for (let i = 0; i < kSize; i++) { + const expect = expected(input, i); + const res = output[i]; + if (res !== expect) { + return new Error(`Invocation ${i}: incorrect results +- expected: ${expect} +- got: ${res}`); + } + } + + return undefined; +} + +g.test('shuffle,id') + .desc(`Tests various ways to shuffle invocations`) + .params(u => u.combine('case', keysOf(kShuffleCases))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kShuffleCases[t.params.case]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var input : array; + +@group(0) @binding(1) +var output : array; + +@group(0) @binding(2) +var metadata : array; // unused + +@compute @workgroup_size(${kSize}) +fn main( + @builtin(subgroup_invocation_id) id : u32, +) { + // Force usage + _ = metadata[0]; + + output[id] = subgroupShuffle(input[id], ${testcase.id}); +}`; + + const inputData = new Uint32Array([...iterRange(kSize, x => x)]); + const uintsPerOutput = 1; + await runComputeTest( + t, + wgsl, + [kSize, 1, 1], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkShuffleId(metadata, output, inputData, testcase.expected); + } + ); + }); + +interface UpDownCase { + delta: string; + expected: (input: Uint32Array, id: number, op: ShuffleOp) => number | undefined; + diagnostic: string; +} + +// Delta must be dynamically uniform +const kUpDownCases: Record = { + no_shuffle: { + delta: `0`, + expected: (input: Uint32Array, id: number, op: ShuffleOp) => { + return input[id]; + }, + diagnostic: `error`, + }, + dynamic_1: { + delta: `input[1]`, + expected: (input: Uint32Array, id: number, op: ShuffleOp) => { + let idx = id; + if (op === 'subgroupShuffleUp') { + idx = id - 1; + if (idx < 0) { + return undefined; + } + return input[idx]; + } else { + idx = id + 1; + if (idx > 3) { + return undefined; + } + } + return input[idx]; + }, + diagnostic: `off`, + }, + override_2: { + delta: `override_idx`, + expected: (input: Uint32Array, id: number, op: ShuffleOp) => { + let idx = id; + if (op === 'subgroupShuffleUp') { + idx = id - 2; + if (idx < 0) { + return undefined; + } + return input[idx]; + } else { + idx = id + 2; + if (idx > 3) { + return undefined; + } + } + return input[idx]; + }, + diagnostic: `error`, + }, +}; + +function checkShuffleUpDownDelta( + metadata: Uint32Array, // unused + output: Uint32Array, + input: Uint32Array, + op: ShuffleOp, + expected: (input: Uint32Array, id: number, op: ShuffleOp) => number | undefined +): Error | undefined { + assert(op === 'subgroupShuffleUp' || op === 'subgroupShuffleDown'); + + for (let i = 0; i < kSize; i++) { + const expect = expected(input, i, op); + const res = output[i]; + if (expect && expect !== res) { + return new Error(`Invocation ${i}: incorrect results +- expected: ${expect} +- got: ${res}`); + } + } + + return undefined; +} + +g.test('shuffleUpDown,delta') + .desc(`Test ShuffleUp and ShuffleDown deltas`) + .params(u => u.combine('op', kUpDownOps).combine('case', keysOf(kUpDownCases))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kUpDownCases[t.params.case]; + + const wgsl = ` +enable subgroups; +diagnostic(${testcase.diagnostic}, subgroup_uniformity); + +override override_idx = 2u; + +@group(0) @binding(0) +var input : array; + +@group(0) @binding(1) +var output : array; + +@group(0) @binding(2) +var metadata : array; // unused + +@compute @workgroup_size(${kSize}) +fn main( + @builtin(subgroup_invocation_id) id : u32, +) { + // Force usage + _ = metadata[0]; + + output[id] = ${t.params.op}(input[id], ${testcase.delta}); +}`; + + const inputData = new Uint32Array([...iterRange(kSize, x => x)]); + const uintsPerOutput = 1; + await runComputeTest( + t, + wgsl, + [kSize, 1, 1], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkShuffleUpDownDelta(metadata, output, inputData, t.params.op, testcase.expected); + } + ); + }); + +const kMaskCases = { + no_shuffle: { + mask: `0`, + value: 0, + diagnostic: `error`, + }, + dynamic_1: { + mask: `input[1]`, + value: 1, + diagnostic: `off`, + }, + override_2: { + mask: `override_idx`, + value: 2, + diagnostic: `error`, + }, + expr_3: { + mask: `input[1] + input[2]`, + value: 3, + diagnostic: `off`, + }, +}; + +function checkShuffleMask( + metadata: Uint32Array, // unused + output: Uint32Array, + input: Uint32Array, + mask: number +): Error | undefined { + assert(mask === Math.trunc(mask)); + assert(0 <= mask && mask <= 3); + + for (let i = 0; i < kSize; i++) { + const expect = input[i ^ mask]; + const res = output[i]; + if (res !== expect) { + return new Error(`Invocation ${i}: incorrect result +- expected: ${expect} +- got: ${res}`); + } + } + + return undefined; +} + +g.test('shuffleXor,mask') + .desc(`Test ShuffleXor masks`) + .params(u => u.combine('case', keysOf(kMaskCases))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kMaskCases[t.params.case]; + + const wgsl = ` +enable subgroups; +diagnostic(${testcase.diagnostic}, subgroup_uniformity); + +override override_idx = 2u; + +@group(0) @binding(0) +var input : array; + +@group(0) @binding(1) +var output : array; + +@group(0) @binding(2) +var metadata : array; // unused + +@compute @workgroup_size(${kSize}) +fn main( + @builtin(subgroup_invocation_id) id : u32, +) { + // Force usage + _ = metadata[0]; + + output[id] = subgroupShuffleXor(input[id], ${testcase.mask}); +}`; + + const inputData = new Uint32Array([...iterRange(kSize, x => x)]); + const uintsPerOutput = 1; + await runComputeTest( + t, + wgsl, + [kSize, 1, 1], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkShuffleMask(metadata, output, inputData, testcase.value); + } + ); + }); + +/** + * Generate randomized inputs for testing shuffles + * + * 1/4 of the cases will be bounded to values in the range [0, 8) + * 1/4 of the cases will be bounded to values in the range [0, 16) + * 1/4 of the cases will be bounded to values in the range [0, 32) + * 1/4 of the cases will be bounded to values in the range [0, 128) + * @param seed The seed for the PRNG + * @param numInputs The number of inputs to generate + */ +function generateInputs(seed: number, numInputs: number): Uint32Array { + const prng = new PRNG(seed); + + let bound = 128; + if (seed < Math.floor(kNumCases / 4)) { + bound = 8; + } else if (seed < Math.floor(kNumCases / 2)) { + bound = 16; + } else if (seed < 3 * Math.floor(kNumCases / 4)) { + bound = 32; + } + return new Uint32Array([ + ...iterRange(numInputs, x => { + return prng.uniformInt(bound); + }), + ]); +} + +/** + * Returns the subgroup invocation id of requested shuffle + * + * @param id The invocation's subgroup_invocation_id + * @param value The shuffle value + * @param size The subgroup size + * @param op The shuffle operation + */ +function getShuffledId(id: number, value: number, op: ShuffleOp): number { + switch (op) { + case 'subgroupShuffle': + return value; + case 'subgroupShuffleUp': + return id - value; + case 'subgroupShuffleDown': + return id + value; + case 'subgroupShuffleXor': + return id ^ value; + } + assert(false); + return 0; +} + +/** + * Checks results of compute passes + * + * @param metadata An array of uint32 values + * * first half is subgroup_invocation_id + * * second half is unique generated subgroup id + * @param output An array of uint32 values + * * first half is shuffle results + * * second half is subgroup_size + * @param input An array of uint32 input values + * @param op The shuffle + * @param numInvs The number of invocations + * @param filter A predicate to filter invocations + */ +function checkCompute( + metadata: Uint32Array, + output: Uint32Array, + input: Uint32Array, + op: ShuffleOp, + numInvs: number, + filter: (id: number, size: number) => boolean +): Error | undefined { + const mapping = new Map(); + const empty = [...iterRange(128, x => -1)]; + for (let i = 0; i < numInvs; i++) { + const id = metadata[i]; + const subgroup_id = metadata[i + numInvs]; + const v = mapping.get(subgroup_id) ?? Array.from(empty); + v[id] = i; + mapping.set(subgroup_id, v); + } + + for (let i = 0; i < numInvs; i++) { + const id = metadata[i]; + const subgroup_id = metadata[i + numInvs]; + + const subgroupMapping = mapping.get(subgroup_id) ?? empty; + + const res = output[i]; + const size = output[i + numInvs]; + + if (!filter(id, size)) { + continue; + } + + let inputValue = input[i]; + if (op !== 'subgroupShuffle') { + inputValue = input[subgroupMapping[0]]; + } + + const index = getShuffledId(id, inputValue, op); + if (index < 0 || index >= 128 || subgroupMapping[index] === -1) { + continue; + } + + if (!filter(index, size)) { + continue; + } + + if (res !== subgroupMapping[index]) { + return new Error(`Invocation ${i}: unexpected result +- expected: ${subgroupMapping[index]} +- got: ${res}`); + } + } + + return undefined; +} + +g.test('compute,all_active') + .desc(`Test randomized inputs across many workgroup sizes`) + .params(u => + u + .combine('wgSize', kWGSizes) + .combine('op', kOps) + .beginSubcases() + .combine('case', [...iterRange(kNumCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + let selectValue = `input[lid]`; + if (t.params.op !== 'subgroupShuffle') { + // delta and mask operands must be subgroup uniform + selectValue = `subgroupBroadcastFirst(input[lid])`; + } + + const wgsl = ` +enable subgroups; +diagnostic(off, subgroup_uniformity); + +@group(0) @binding(0) +var input : array; + +struct Output { + res : array, + size : array +} + +@group(0) @binding(1) +var output : Output; + +struct Metadata { + id : array, + subgroup_id : array +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.id[lid] = id; + metadata.subgroup_id[lid] = subgroupBroadcastFirst(lid + 1); // avoid 0 + + output.size[lid] = subgroupSize; + output.res[lid] = ${t.params.op}(lid, ${selectValue}); +}`; + + const inputArray = generateInputs(t.params.case, wgThreads); + const numUintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + numUintsPerOutput, + inputArray, + (metadata: Uint32Array, output: Uint32Array) => { + return checkCompute( + metadata, + output, + inputArray, + t.params.op, + wgThreads, + (id: number, size: number) => { + return true; + } + ); + } + ); + }); + +g.test('compute,split') + .desc(`Test randomized inputs across many workgroup sizes`) + .params(u => + u + .combine('predicate', keysOf(kPredicateCases)) + .combine('op', kOps) + .beginSubcases() + .combine('wgSize', kWGSizes) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kPredicateCases[t.params.predicate]; + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + let value = `input[id]`; + if (t.params.op !== 'subgroupShuffle') { + value = `subgroupBroadcastFirst(input[id])`; + } + + const wgsl = ` +enable subgroups; +diagnostic(off, subgroup_uniformity); + +@group(0) @binding(0) +var input : array; + +struct Output { + res : array, + size : array +} + +@group(0) @binding(1) +var output : Output; + +struct Metadata { + id : array, + subgroup_id : array +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + _ = input[0]; + metadata.id[lid] = id; + metadata.subgroup_id[lid] = subgroupBroadcastFirst(lid + 1); // avoid 0 + + output.size[lid] = subgroupSize; + let value = ${value}; + if ${testcase.cond} { + output.res[lid] = ${t.params.op}(lid, value); + } else { + return; + } +}`; + + const inputArray = new Uint32Array([...iterRange(128, x => x)]); + const numUintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + numUintsPerOutput, + inputArray, + (metadata: Uint32Array, output: Uint32Array) => { + return checkCompute(metadata, output, inputArray, t.params.op, wgThreads, testcase.filter); + } + ); + }); + +/** + * Checks the results of the data types test + * + * The outputs for a given index are expected to match the input values + * for the given shuffle (op and id). + * @param metadata An unused parameter + * @param output The output data + * @param op The shuffle + * @param id The shuffle id/mask/delta parameter + * @param type The data type + */ +function checkDataTypes( + metadata: Uint32Array, // unused + output: Uint32Array, + input: Uint32Array, + op: ShuffleOp, + id: number, + type: Type +): Error | undefined { + if (type.requiresF16() && !(type instanceof VectorType)) { + for (let i = 0; i < 4; i++) { + const index = getShuffledId(i, id, op); + if (index < 0 || index >= 4) { + continue; + } + + const expectIdx = Math.floor(index / 2); + const expectShift = index % 2 === 1; + let expect = input[expectIdx]; + if (expectShift) { + expect >>= 16; + } + expect &= 0xffff; + + const resIdx = Math.floor(i / 2); + const resShift = i % 2 === 1; + let res = output[resIdx]; + if (resShift) { + res >>= 16; + } + res &= 0xffff; + + if (res !== expect) { + return new Error(`${i}: incorrect result +- expected: ${expect} +- got: ${res}`); + } + } + } else { + let uints = 1; + if (type instanceof VectorType) { + uints = type.width === 3 ? 4 : type.width; + if (type.requiresF16()) { + uints = Math.floor(uints / 2); + } + } + for (let i = 0; i < 4; i++) { + for (let j = 0; j < uints; j++) { + const index = getShuffledId(i, id, op); + if (index < 0 || index >= 4) { + continue; + } + + const expect = input[index * uints + j]; + const res = output[i * uints + j]; + if (res !== expect) { + return new Error(`${uints * i + j}: incorrect result +- expected: ${expect} +- got: ${res}`); + } + } + } + } + + return undefined; +} + +g.test('data_types') + .params(u => + u + .combine('op', kOps) + .combine('type', keysOf(kTypes)) + .beginSubcases() + .combine('id', [0, 1, 2, 3] as const) + ) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(async t => { + const wgSize = [4, 1, 1]; + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} + +@group(0) @binding(0) +var input : array<${type.toString()}>; + +@group(0) @binding(1) +var output : array<${type.toString()}>; + +@group(0) @binding(2) +var metadata : array; // unused + +@compute @workgroup_size(${wgSize[0]}, ${wgSize[1]}, ${wgSize[2]}) +fn main( + @builtin(subgroup_invocation_id) id : u32, +) { + // Force usage + _ = metadata[0]; + + output[id] = ${t.params.op}(input[id], ${t.params.id}); +}`; + + const inputData = generateTypedInputs(type); + let uintsPerOutput = 1; + if (type instanceof VectorType) { + uintsPerOutput = type.width === 3 ? 4 : type.width; + if (type.requiresF16()) { + uintsPerOutput = Math.floor(uintsPerOutput / 2); + } + } + await runComputeTest( + t, + wgsl, + wgSize, + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkDataTypes(metadata, output, inputData, t.params.op, t.params.id, type); + } + ); + }); + +/** + * Check subgroup shuffles in fragment shaders + * + * @param data The framebuffer output + * * component 0 is the result + * * component 1 is the subgroup_invocation_id + * * component 2 is a unique generated subgroup_id + * @param format The framebuffer format + * @param width Framebuffer width + * @param height Framebuffer height + * @param shuffleId The value of the shuffle parameter (e.g. id/mask/delta) + * @param op The shuffle operation + */ +function checkFragment( + data: Uint32Array, + format: GPUTextureFormat, + width: number, + height: number, + shuffleId: number, + op: ShuffleOp +): Error | undefined { + if (width < 3 || height < 3) { + return new Error( + `Insufficient framebuffer size [${width}w x ${height}h]. Minimum is [3w x 3h].` + ); + } + + const { uintsPerRow, uintsPerTexel } = getUintsPerFramebuffer(format, width, height); + + const coordToIndex = (row: number, col: number) => { + return uintsPerRow * row + col * uintsPerTexel; + }; + + const mapping = new Map(); + const empty = [...iterRange(128, x => -1)]; + + // Iteration skips last row and column to avoid helper invocations because it is not + // guaranteed whether or not they participate in the subgroup operation. + for (let row = 0; row < height - 1; row++) { + for (let col = 0; col < width - 1; col++) { + const offset = coordToIndex(row, col); + + const id = data[offset + 1]; + const subgroup_id = data[offset + 2]; + + const v = mapping.get(subgroup_id) ?? Array.from(empty); + v[id] = col + row * width; + mapping.set(subgroup_id, v); + } + } + + for (let row = 0; row < height - 1; row++) { + for (let col = 0; col < width - 1; col++) { + const offset = coordToIndex(row, col); + + const res = data[offset]; + const id = data[offset + 1]; + const subgroup_id = data[offset + 2]; + + const subgroupMapping = mapping.get(subgroup_id) ?? empty; + + const index = getShuffledId(id, shuffleId, op); + if (index < 0 || index >= 128 || subgroupMapping[index] === -1) { + continue; + } + + const shuffleLinear = subgroupMapping[index]; + const shuffleRow = Math.floor(shuffleLinear / width); + const shuffleCol = shuffleLinear % width; + if (shuffleRow === height - 1 || shuffleCol === width - 1) { + continue; + } + + if (res !== subgroupMapping[index]) { + return new Error(`Row ${row}, col ${col}: incorrect results: +- expected: ${subgroupMapping[index]} +- got: ${res}`); + } + } + } + + return undefined; +} + +g.test('fragment') + .desc(`Test shuffles in fragment shaders`) + .params(u => + u + .combine('size', kFramebufferSizes) + .beginSubcases() + .combine('op', kOps) + .combine('id', [0, 1, 2, 3] as const) + .combineWithParams([{ format: 'rgba32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + //t.skipIf(t.params.id !== 2); + //t.skipIf(t.params.op !== 'subgroupShuffleUp'); + const fsShader = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; // unused + +@fragment +fn main( + @builtin(position) pos : vec4f, + @builtin(subgroup_invocation_id) id : u32, +) -> @location(0) vec4u { + // Force usage + _ = inputs[0]; + + let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]}; + let subgroup_id = subgroupBroadcastFirst(linear + 1); + + // Filter out possible helper invocations. + let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1); + let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1); + let in_range = x_in_range && y_in_range; + + return vec4u(${t.params.op}(linear, ${t.params.id}), id, subgroup_id, linear); +}`; + + await runFragmentTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + new Uint32Array([0]), // unused, + (data: Uint32Array) => { + return checkFragment( + data, + t.params.format, + t.params.size[0], + t.params.size[1], + t.params.id, + t.params.op + ); + } + ); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts index 9438c265d7df..2d97eade1904 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts @@ -1,7 +1,9 @@ -import { assert, iterRange } from '../../../../../../common/util/util.js'; +import { assert, iterRange, unreachable } from '../../../../../../common/util/util.js'; import { Float16Array } from '../../../../../../external/petamoriken/float16/float16.js'; import { kTextureFormatInfo } from '../../../../../format_info.js'; import { GPUTest, TextureTestMixin } from '../../../../../gpu_test.js'; +import { kBit } from '../../../../../util/constants.js'; +import { Type, VectorType, scalarTypeOf } from '../../../../../util/conversion.js'; import { FPInterval } from '../../../../../util/floating_point.js'; import { sparseScalarF16Range, sparseScalarF32Range, align } from '../../../../../util/math.js'; import { PRNG } from '../../../../../util/prng.js'; @@ -438,6 +440,26 @@ export const kFramebufferSizes = [ [3, 3], ] as const; +/** + * Returns the number of uints per row and per texel in the framebuffer + * + * @param format The format + * @param width The width + * @param height The height + */ +export function getUintsPerFramebuffer(format: GPUTextureFormat, width: number, height: number) { + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + assert(bytesPerBlock !== undefined); + + const blocksPerRow = width / blockWidth; + // 256 minimum arises from image copy requirements. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const uintsPerRow = bytesPerRow / 4; + const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + + return { uintsPerRow, uintsPerTexel }; +} + /** * Runs a subgroup builtin test for fragment shaders * @@ -553,3 +575,111 @@ fn vsMain(@builtin(vertex_index) index : u32) -> @builtin(position) vec4f { t.expectOK(checker(data)); } + +/** + * Generates scalar values for type + * + * Generates 4 32-bit values whose bit patterns represent + * interesting values of the data type. + * @param type The data type + */ +function generateScalarValues(type: Type): number[] { + const scalarTy = scalarTypeOf(type); + switch (scalarTy) { + case Type.u32: + return [kBit.u32.min, kBit.u32.max, 1111, 2222]; + case Type.i32: + return [ + kBit.i32.positive.min, + kBit.i32.positive.max, + kBit.i32.negative.min, + 0xffffffff, // -1 + ]; + case Type.f32: + return [ + kBit.f32.positive.zero, + kBit.f32.positive.nearest_max, + kBit.f32.negative.nearest_min, + 0xbf800000, // -1 + ]; + case Type.f16: + return [ + kBit.f16.positive.zero, + kBit.f16.positive.nearest_max, + kBit.f16.negative.nearest_min, + 0xbc00, // -1 + ]; + default: + unreachable(`Unsupported type: ${type.toString()}`); + } + return [0, 0, 0, 0]; +} + +/** + * Generates input bit patterns for the input type + * + * Generates 4 values of type in a Uint32Array. + * 16-bit types are appropriately packed. + * @param type The data type + */ +export function generateTypedInputs(type: Type): Uint32Array { + const scalarValues = generateScalarValues(type); + let elements = 1; + if (type instanceof VectorType) { + elements = type.width; + } + if (type.requiresF16()) { + switch (elements) { + case 1: + return new Uint32Array([ + scalarValues[0] | (scalarValues[1] << 16), + scalarValues[2] | (scalarValues[3] << 16), + ]); + case 2: + return new Uint32Array([ + scalarValues[0] | (scalarValues[0] << 16), + scalarValues[1] | (scalarValues[1] << 16), + scalarValues[2] | (scalarValues[2] << 16), + scalarValues[3] | (scalarValues[3] << 16), + ]); + case 3: + return new Uint32Array([ + scalarValues[0] | (scalarValues[0] << 16), + scalarValues[0] | (kDataSentinel << 16), + scalarValues[1] | (scalarValues[1] << 16), + scalarValues[1] | (kDataSentinel << 16), + scalarValues[2] | (scalarValues[2] << 16), + scalarValues[2] | (kDataSentinel << 16), + scalarValues[3] | (scalarValues[3] << 16), + scalarValues[3] | (kDataSentinel << 16), + ]); + case 4: + return new Uint32Array([ + scalarValues[0] | (scalarValues[0] << 16), + scalarValues[0] | (scalarValues[0] << 16), + scalarValues[1] | (scalarValues[1] << 16), + scalarValues[1] | (scalarValues[1] << 16), + scalarValues[2] | (scalarValues[2] << 16), + scalarValues[2] | (scalarValues[2] << 16), + scalarValues[3] | (scalarValues[3] << 16), + scalarValues[3] | (scalarValues[3] << 16), + ]); + default: + unreachable(`Unsupported type: ${type.toString()}`); + } + return new Uint32Array([0]); + } else { + const bound = elements === 3 ? 4 : elements; + const values: number[] = []; + for (let i = 0; i < 4; i++) { + for (let j = 0; j < bound; j++) { + if (j < elements) { + values.push(scalarValues[i]); + } else { + values.push(kDataSentinel); + } + } + } + return new Uint32Array(values); + } +}