Skip to content

Commit

Permalink
Execution tests for subgroupAny and subgroupAll (#3924)
Browse files Browse the repository at this point in the history
* Compute tests with all active invocation and partially active
  invocations
* Removed unimplemented dynamically uniform subgroupBroadcast test (due
  to const requirement)
alan-baker authored Aug 30, 2024
1 parent 3d24384 commit 9b30f7a
Showing 5 changed files with 543 additions and 6 deletions.
7 changes: 6 additions & 1 deletion src/webgpu/listing_meta.json
Original file line number Diff line number Diff line change
@@ -1529,13 +1529,18 @@
"webgpu:shader,execution,expression,call,builtin,subgroupAdd:data_types:*": { "subcaseMS": 9216.247 },
"webgpu:shader,execution,expression,call,builtin,subgroupAdd:fp_accuracy:*": { "subcaseMS": 9952.350 },
"webgpu:shader,execution,expression,call,builtin,subgroupAdd:fragment:*": { "subcaseMS": 0.229 },
"webgpu:shader,execution,expression,call,builtin,subgroupAll:compute,all_active:*": { "subcaseMS": 5162.414 },
"webgpu:shader,execution,expression,call,builtin,subgroupAll:compute,split:*": { "subcaseMS": 26610.627 },
"webgpu:shader,execution,expression,call,builtin,subgroupAll:fragment:*": { "subcaseMS": 0.172 },
"webgpu:shader,execution,expression,call,builtin,subgroupAny:compute,all_active:*": { "subcaseMS": 7028.394 },
"webgpu:shader,execution,expression,call,builtin,subgroupAny:compute,split:*": { "subcaseMS": 50.998 },
"webgpu:shader,execution,expression,call,builtin,subgroupAny:fragment:*": { "subcaseMS": 0.227 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:compute,split:*": { "subcaseMS": 38.740 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment,split:*": { "subcaseMS": 0.331 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment:*": { "subcaseMS": 0.059 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate:*": { "subcaseMS": 0.075 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate_and_control_flow:*": { "subcaseMS": 41.053 },
"webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:data_types:*": { "subcaseMS": 252.374 },
"webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:dynamically_uniform_id:*": { "subcaseMS": 0.211 },
"webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:fragment:*": { "subcaseMS": 0.108 },
"webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:workgroup_uniform_load:*": { "subcaseMS": 109.832 },
"webgpu:shader,execution,expression,call,builtin,subgroupMul:compute,split:*": { "subcaseMS": 5034.263 },
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
export const description = `
Execution tests for subgroupAll.
Note: There is a lack of portability for non-uniform execution so these tests
restrict themselves to uniform control flow.
Note: There is no guaranteed mapping between subgroup_invocation_id and
local_invocation_index. Tests should avoid assuming there is.
`;

import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
import { keysOf } from '../../../../../../common/util/data_tables.js';
import { iterRange } from '../../../../../../common/util/util.js';
import { PRNG } from '../../../../../util/prng.js';

import {
kWGSizes,
kPredicateCases,
SubgroupTest,
runComputeTest,
kDataSentinel,
} from './subgroup_util.js';

export const g = makeTestGroup(SubgroupTest);

const kNumCases = 15;

/**
* Generate input data for testing.
*
* Data is generated in the following categories:
* Seed 0 generates all 0 data
* Seed 1 generates all 1 data
* Seeds 2-9 generates all 1s except for a zero randomly once per 32 elements
* Seeds 10+ generate all random data
* @param seed The seed for the PRNG
* @param num The number of data items to generate
* @param addCounter If true, treats the first index as an atomic counter
*/
function generateInputData(seed: number, num: number, addCounter: boolean): Uint32Array {
const prng = new PRNG(seed);

const bound = Math.min(num, 32);
const index = prng.uniformInt(bound);

return new Uint32Array([
...iterRange(num, x => {
if (addCounter && x === 0) {
// Counter should start at 1 to avoid clear value.
return 1;
}

if (seed === 0) {
return 0;
} else if (seed === 1) {
return 1;
} else if (seed < 10) {
const bounded = (addCounter ? x + 1 : x) % bound;
return bounded === index ? 0 : 1;
}
return prng.uniformInt(2);
}),
]);
}

/**
* Checks the result of a subgroupAll operation
*
* Since subgroup size depends on the pipeline compile, we calculate the expected
* results after execution. The shader generates a subgroup id and records it for
* each invocation. The check first calculates the expected result for each subgroup
* and then compares to the actual result for each invocation. The filter functor
* ensures only the correct invocations contribute to the calculation.
* @param metadata An array of uints:
* * first half containing subgroup sizes (from builtin value)
* * second half subgroup invocation id
* @param output An array of uints containing:
* * first half is the outputs of subgroupAll
* * second half is a generated subgroup id
* @param numInvs Number of invocations executed
* @param input The input data (equal size to output)
* @param filter A functor to filter active invocations
*/
function checkAll(
metadata: Uint32Array, // unused
output: Uint32Array,
numInvs: number,
input: Uint32Array,
filter: (id: number, size: number) => boolean
): Error | undefined {
// First, generate expected results.
const expected = new Map<number, number>();
for (let inv = 0; inv < numInvs; inv++) {
const size = metadata[inv];
const id = metadata[inv + numInvs];
if (!filter(id, size)) {
continue;
}
const subgroup_id = output[numInvs + inv];
let v = expected.get(subgroup_id) ?? 1;
v &= input[inv];
expected.set(subgroup_id, v);
}

// Second, check against actual results.
for (let inv = 0; inv < numInvs; inv++) {
const size = metadata[inv];
const id = metadata[inv + numInvs];
const res = output[inv];
if (filter(id, size)) {
const subgroup_id = output[numInvs + inv];
const expected_v = expected.get(subgroup_id) ?? 0;
if (expected_v !== res) {
return new Error(`Invocation ${inv}:
- expected: ${expected_v}
- got: ${res}`);
}
} else {
if (res !== kDataSentinel) {
return new Error(`Invocation ${inv} unexpected write:
- subgroup invocation id: ${id}
- subgroup size: ${size}`);
}
}
}

return undefined;
}

g.test('compute,all_active')
.desc(`Test compute subgroupAll`)
.params(u =>
u
.combine('wgSize', kWGSizes)
.beginSubcases()
.combine('case', [...iterRange(kNumCases, x => x)])
)
.beforeAllSubcases(t => {
t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
})
.fn(async t => {
const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];

const wgsl = `
enable subgroups;
@group(0) @binding(0)
var<storage> inputs : array<u32>;
@group(0) @binding(1)
var<storage, read_write> outputs : array<u32>;
struct Metadata {
subgroup_size: array<u32, ${wgThreads}>,
subgroup_invocation_id: array<u32, ${wgThreads}>,
}
@group(0) @binding(2)
var<storage, read_write> metadata : Metadata;
@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
fn main(
@builtin(local_invocation_index) lid : u32,
@builtin(subgroup_invocation_id) id : u32,
@builtin(subgroup_size) subgroupSize : u32,
) {
metadata.subgroup_size[lid] = subgroupSize;
metadata.subgroup_invocation_id[lid] = id;
// Record a representative subgroup id.
outputs[lid + ${wgThreads}] = subgroupBroadcastFirst(lid);
let res = select(0u, 1u, subgroupAll(bool(inputs[lid])));
outputs[lid] = res;
}`;

const includeCounter = false;
const inputData = generateInputData(t.params.case, wgThreads, includeCounter);

const uintsPerOutput = 2;
await runComputeTest(
t,
wgsl,
[t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
uintsPerOutput,
inputData,
(metadata: Uint32Array, output: Uint32Array) => {
return checkAll(metadata, output, wgThreads, inputData, (id: number, size: number) => {
return true;
});
}
);
});

g.test('compute,split')
.desc('Test that only active invocation participate')
.params(u =>
u
.combine('predicate', keysOf(kPredicateCases))
.beginSubcases()
.combine('wgSize', kWGSizes)
.combine('case', [...iterRange(kNumCases, x => x)])
)
.beforeAllSubcases(t => {
t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
})
.fn(async t => {
const testcase = kPredicateCases[t.params.predicate];
const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];

const wgsl = `
enable subgroups;
@group(0) @binding(0)
var<storage> inputs : array<u32>;
@group(0) @binding(1)
var<storage, read_write> outputs : array<u32>;
struct Metadata {
subgroup_size : array<u32, ${wgThreads}>,
subgroup_invocation_id : array<u32, ${wgThreads}>,
}
@group(0) @binding(2)
var<storage, read_write> metadata : Metadata;
@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
fn main(
@builtin(local_invocation_index) lid : u32,
@builtin(subgroup_invocation_id) id : u32,
@builtin(subgroup_size) subgroupSize : u32,
) {
metadata.subgroup_size[lid] = subgroupSize;
// Record subgroup invocation id for this invocation.
metadata.subgroup_invocation_id[lid] = id;
// Record a generated subgroup id.
outputs[${wgThreads} + lid] = subgroupBroadcastFirst(lid);
if ${testcase.cond} {
outputs[lid] = select(0u, 1u, subgroupAll(bool(inputs[lid])));
} else {
return;
}
}`;

const includeCounter = false;
const inputData = generateInputData(t.params.case, wgThreads, includeCounter);

const uintsPerOutput = 2;
await runComputeTest(
t,
wgsl,
[t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
uintsPerOutput,
inputData,
(metadata: Uint32Array, output: Uint32Array) => {
return checkAll(metadata, output, wgThreads, inputData, testcase.filter);
}
);
});

g.test('fragment').unimplemented();
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
export const description = `
Execution tests for subgroupAny.
Note: There is a lack of portability for non-uniform execution so these tests
restrict themselves to uniform control flow.
Note: There is no guaranteed mapping between subgroup_invocation_id and
local_invocation_index. Tests should avoid assuming there is.
`;

import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
import { keysOf } from '../../../../../../common/util/data_tables.js';
import { iterRange } from '../../../../../../common/util/util.js';
import { PRNG } from '../../../../../util/prng.js';

import {
kWGSizes,
kPredicateCases,
SubgroupTest,
runComputeTest,
kDataSentinel,
} from './subgroup_util.js';

export const g = makeTestGroup(SubgroupTest);

const kNumCases = 15;

/**
* Generate input data for testing.
*
* Data is generated in the following categories:
* Seed 0 generates all 0 data
* Seed 1 generates all 1 data
* Seeds 2-9 generates all 0s except for a one randomly once per 32 elements
* Seeds 10+ generate all random data
* @param seed The seed for the PRNG
* @param num The number of data items to generate
* @param addCounter If true, treats the first index as an atomic counter
*/
function generateInputData(seed: number, num: number, addCounter: boolean): Uint32Array {
const prng = new PRNG(seed);

const bound = Math.min(num, 32);
const index = prng.uniformInt(bound);

return new Uint32Array([
...iterRange(num, x => {
if (addCounter && x === 0) {
// Counter should start at 1 to avoid clear value.
return 1;
}

if (seed === 0) {
return 0;
} else if (seed === 1) {
return 1;
} else if (seed < 10) {
const bounded = (addCounter ? x + 1 : x) % bound;
return bounded === index ? 1 : 0;
}
return prng.uniformInt(2);
}),
]);
}

/**
* Checks the result of a subgroupAny operation
*
* Since subgroup size depends on the pipeline compile, we calculate the expected
* results after execution. The shader generates a subgroup id and records it for
* each invocation. The check first calculates the expected result for each subgroup
* and then compares to the actual result for each invocation. The filter functor
* ensures only the correct invocations contribute to the calculation.
* @param metadata An array of uints:
* * first half containing subgroup sizes (from builtin value)
* * second half subgroup invocation id
* @param output An array of uints containing:
* * first half is the outputs of subgroupAny
* * second half is a generated subgroup id
* @param numInvs Number of invocations executed
* @param input The input data (equal size to output)
* @param filter A functor to filter active invocations
*/
function checkAny(
metadata: Uint32Array, // unused
output: Uint32Array,
numInvs: number,
input: Uint32Array,
filter: (id: number, size: number) => boolean
): Error | undefined {
// First, generate expected results.
const expected = new Map<number, number>();
for (let inv = 0; inv < numInvs; inv++) {
const size = metadata[inv];
const id = metadata[inv + numInvs];
if (!filter(id, size)) {
continue;
}
const subgroup_id = output[numInvs + inv];
let v = expected.get(subgroup_id) ?? 0;
v |= input[inv];
expected.set(subgroup_id, v);
}

// Second, check against actual results.
for (let inv = 0; inv < numInvs; inv++) {
const size = metadata[inv];
const id = metadata[inv + numInvs];
const res = output[inv];
if (filter(id, size)) {
const subgroup_id = output[numInvs + inv];
const expected_v = expected.get(subgroup_id) ?? 0;
if (expected_v !== res) {
return new Error(`Invocation ${inv}:
- expected: ${expected_v}
- got: ${res}`);
}
} else {
if (res !== kDataSentinel) {
return new Error(`Invocation ${inv} unexpected write:
- subgroup invocation id: ${id}
- subgroup size: ${size}`);
}
}
}

return undefined;
}

g.test('compute,all_active')
.desc(`Test compute subgroupAny`)
.params(u =>
u
.combine('wgSize', kWGSizes)
.beginSubcases()
.combine('case', [...iterRange(kNumCases, x => x)])
)
.beforeAllSubcases(t => {
t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
})
.fn(async t => {
const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];

const wgsl = `
enable subgroups;
@group(0) @binding(0)
var<storage> inputs : array<u32>;
@group(0) @binding(1)
var<storage, read_write> outputs : array<u32>;
struct Metadata {
subgroup_size: array<u32, ${wgThreads}>,
subgroup_invocation_id: array<u32, ${wgThreads}>,
}
@group(0) @binding(2)
var<storage, read_write> metadata : Metadata;
@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
fn main(
@builtin(local_invocation_index) lid : u32,
@builtin(subgroup_invocation_id) id : u32,
@builtin(subgroup_size) subgroupSize : u32,
) {
metadata.subgroup_size[lid] = subgroupSize;
metadata.subgroup_invocation_id[lid] = id;
// Record a representative subgroup id.
outputs[lid + ${wgThreads}] = subgroupBroadcastFirst(lid);
let res = select(0u, 1u, subgroupAny(bool(inputs[lid])));
outputs[lid] = res;
}`;

const includeCounter = false;
const inputData = generateInputData(t.params.case, wgThreads, includeCounter);

const uintsPerOutput = 2;
await runComputeTest(
t,
wgsl,
[t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
uintsPerOutput,
inputData,
(metadata: Uint32Array, output: Uint32Array) => {
return checkAny(metadata, output, wgThreads, inputData, (id: number, size: number) => {
return true;
});
}
);
});

g.test('compute,split')
.desc('Test that only active invocation participate')
.params(u =>
u
.combine('predicate', keysOf(kPredicateCases))
.beginSubcases()
.combine('wgSize', kWGSizes)
.combine('case', [...iterRange(kNumCases, x => x)])
)
.beforeAllSubcases(t => {
t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
})
.fn(async t => {
const testcase = kPredicateCases[t.params.predicate];
const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];

const wgsl = `
enable subgroups;
@group(0) @binding(0)
var<storage> inputs : array<u32>;
@group(0) @binding(1)
var<storage, read_write> outputs : array<u32>;
struct Metadata {
subgroup_size : array<u32, ${wgThreads}>,
subgroup_invocation_id : array<u32, ${wgThreads}>,
}
@group(0) @binding(2)
var<storage, read_write> metadata : Metadata;
@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
fn main(
@builtin(local_invocation_index) lid : u32,
@builtin(subgroup_invocation_id) id : u32,
@builtin(subgroup_size) subgroupSize : u32,
) {
metadata.subgroup_size[lid] = subgroupSize;
// Record subgroup invocation id for this invocation.
metadata.subgroup_invocation_id[lid] = id;
// Record a generated subgroup id.
outputs[${wgThreads} + lid] = subgroupBroadcastFirst(lid);
if ${testcase.cond} {
outputs[lid] = select(0u, 1u, subgroupAny(bool(inputs[lid])));
} else {
return;
}
}`;

const includeCounter = false;
const inputData = generateInputData(t.params.case, wgThreads, includeCounter);

const uintsPerOutput = 2;
await runComputeTest(
t,
wgsl,
[t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
uintsPerOutput,
inputData,
(metadata: Uint32Array, output: Uint32Array) => {
return checkAny(metadata, output, wgThreads, inputData, testcase.filter);
}
);
});

g.test('fragment').unimplemented();
Original file line number Diff line number Diff line change
@@ -318,6 +318,4 @@ fn main(@builtin(subgroup_invocation_id) id : u32,
t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expect));
});

g.test('dynamically_uniform_id').unimplemented();

g.test('fragment').unimplemented();
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import { assert, iterRange } from '../../../../../../common/util/util.js';
import { Float16Array } from '../../../../../../external/petamoriken/float16/float16.js';
import { GPUTest } from '../../../../../gpu_test.js';
import { GPUTest, TextureTestMixin } from '../../../../../gpu_test.js';
import { FPInterval } from '../../../../../util/floating_point.js';
import { sparseScalarF16Range, sparseScalarF32Range } from '../../../../../util/math.js';
import { PRNG } from '../../../../../util/prng.js';

export class SubgroupTest extends TextureTestMixin(GPUTest) {}

export const kNumCases = 1000;
export const kStride = 128;

@@ -300,6 +302,8 @@ fn main(
t.expectOK(checkAccuracy(metadata, output, [idx1, idx2], [val1, val2], identity, intervalGen));
}

export const kDataSentinel = 999;

/**
* Runs compute shader subgroup test
*
@@ -346,14 +350,14 @@ export async function runComputeTest(

const outputUints = outputUintsPerElement * wgThreads;
const outputBuffer = t.makeBufferWithContents(
new Uint32Array([...iterRange(outputUints, x => 999)]),
new Uint32Array([...iterRange(outputUints, x => kDataSentinel)]),
GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
);
t.trackForCleanup(outputBuffer);

const numMetadata = 2 * wgThreads;
const metadataBuffer = t.makeBufferWithContents(
new Uint32Array([...iterRange(numMetadata, x => 999)]),
new Uint32Array([...iterRange(numMetadata, x => kDataSentinel)]),
GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
);

0 comments on commit 9b30f7a

Please sign in to comment.