Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Execution tests for subgroupAny and subgroupAll #3924

Merged
merged 4 commits into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/webgpu/listing_meta.json
Original file line number Diff line number Diff line change
Expand Up @@ -1529,13 +1529,18 @@
"webgpu:shader,execution,expression,call,builtin,subgroupAdd:data_types:*": { "subcaseMS": 9216.247 },
"webgpu:shader,execution,expression,call,builtin,subgroupAdd:fp_accuracy:*": { "subcaseMS": 9952.350 },
"webgpu:shader,execution,expression,call,builtin,subgroupAdd:fragment:*": { "subcaseMS": 0.229 },
"webgpu:shader,execution,expression,call,builtin,subgroupAll:compute,all_active:*": { "subcaseMS": 5162.414 },
"webgpu:shader,execution,expression,call,builtin,subgroupAll:compute,split:*": { "subcaseMS": 26610.627 },
"webgpu:shader,execution,expression,call,builtin,subgroupAll:fragment:*": { "subcaseMS": 0.172 },
"webgpu:shader,execution,expression,call,builtin,subgroupAny:compute,all_active:*": { "subcaseMS": 7028.394 },
"webgpu:shader,execution,expression,call,builtin,subgroupAny:compute,split:*": { "subcaseMS": 50.998 },
"webgpu:shader,execution,expression,call,builtin,subgroupAny:fragment:*": { "subcaseMS": 0.227 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:compute,split:*": { "subcaseMS": 38.740 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment,split:*": { "subcaseMS": 0.331 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment:*": { "subcaseMS": 0.059 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate:*": { "subcaseMS": 0.075 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate_and_control_flow:*": { "subcaseMS": 41.053 },
"webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:data_types:*": { "subcaseMS": 252.374 },
"webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:dynamically_uniform_id:*": { "subcaseMS": 0.211 },
"webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:fragment:*": { "subcaseMS": 0.108 },
"webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:workgroup_uniform_load:*": { "subcaseMS": 109.832 },
"webgpu:shader,execution,expression,call,builtin,subgroupMul:compute,split:*": { "subcaseMS": 5034.263 },
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
export const description = `
Execution tests for subgroupAll.

Note: There is a lack of portability for non-uniform execution so these tests
restrict themselves to uniform control flow.
Note: There is no guaranteed mapping between subgroup_invocation_id and
local_invocation_index. Tests should avoid assuming there is.
`;

import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
import { keysOf } from '../../../../../../common/util/data_tables.js';
import { iterRange } from '../../../../../../common/util/util.js';
import { PRNG } from '../../../../../util/prng.js';

import {
kWGSizes,
kPredicateCases,
SubgroupTest,
runComputeTest,
kDataSentinel,
} from './subgroup_util.js';

export const g = makeTestGroup(SubgroupTest);

const kNumCases = 15;

/**
* Generate input data for testing.
*
* Data is generated in the following categories:
* Seed 0 generates all 0 data
* Seed 1 generates all 1 data
* Seeds 2-9 generates all 1s except for a zero randomly once per 32 elements
* Seeds 10+ generate all random data
* @param seed The seed for the PRNG
* @param num The number of data items to generate
* @param addCounter If true, treats the first index as an atomic counter
*/
function generateInputData(seed: number, num: number, addCounter: boolean): Uint32Array {
const prng = new PRNG(seed);

const bound = Math.min(num, 32);
const index = prng.uniformInt(bound);

return new Uint32Array([
...iterRange(num, x => {
if (addCounter && x === 0) {
// Counter should start at 1 to avoid clear value.
return 1;
}

if (seed === 0) {
return 0;
} else if (seed === 1) {
return 1;
} else if (seed < 10) {
const bounded = (addCounter ? x + 1 : x) % bound;
return bounded === index ? 0 : 1;
}
return prng.uniformInt(2);
}),
]);
}

/**
* Checks the result of a subgroupAll operation
*
* Since subgroup size depends on the pipeline compile, we calculate the expected
* results after execution. The shader generates a subgroup id and records it for
* each invocation. The check first calculates the expected result for each subgroup
* and then compares to the actual result for each invocation. The filter functor
* ensures only the correct invocations contribute to the calculation.
* @param metadata An array of uints:
* * first half containing subgroup sizes (from builtin value)
* * second half subgroup invocation id
* @param output An array of uints containing:
* * first half is the outputs of subgroupAll
* * second half is a generated subgroup id
* @param numInvs Number of invocations executed
* @param input The input data (equal size to output)
* @param filter A functor to filter active invocations
*/
function checkAll(
metadata: Uint32Array, // unused
output: Uint32Array,
numInvs: number,
input: Uint32Array,
filter: (id: number, size: number) => boolean
): Error | undefined {
// First, generate expected results.
const expected = new Map<number, number>();
for (let inv = 0; inv < numInvs; inv++) {
const size = metadata[inv];
const id = metadata[inv + numInvs];
if (!filter(id, size)) {
continue;
}
const subgroup_id = output[numInvs + inv];
let v = expected.get(subgroup_id) ?? 1;
v &= input[inv];
expected.set(subgroup_id, v);
}

// Second, check against actual results.
for (let inv = 0; inv < numInvs; inv++) {
const size = metadata[inv];
const id = metadata[inv + numInvs];
const res = output[inv];
if (filter(id, size)) {
const subgroup_id = output[numInvs + inv];
const expected_v = expected.get(subgroup_id) ?? 0;
if (expected_v !== res) {
return new Error(`Invocation ${inv}:
- expected: ${expected_v}
- got: ${res}`);
}
} else {
if (res !== kDataSentinel) {
return new Error(`Invocation ${inv} unexpected write:
- subgroup invocation id: ${id}
- subgroup size: ${size}`);
}
}
}

return undefined;
}

g.test('compute,all_active')
.desc(`Test compute subgroupAll`)
.params(u =>
u
.combine('wgSize', kWGSizes)
.beginSubcases()
.combine('case', [...iterRange(kNumCases, x => x)])
)
.beforeAllSubcases(t => {
t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
})
.fn(async t => {
const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];

const wgsl = `
enable subgroups;

@group(0) @binding(0)
var<storage> inputs : array<u32>;

@group(0) @binding(1)
var<storage, read_write> outputs : array<u32>;

struct Metadata {
subgroup_size: array<u32, ${wgThreads}>,
subgroup_invocation_id: array<u32, ${wgThreads}>,
}

@group(0) @binding(2)
var<storage, read_write> metadata : Metadata;

@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
fn main(
@builtin(local_invocation_index) lid : u32,
@builtin(subgroup_invocation_id) id : u32,
@builtin(subgroup_size) subgroupSize : u32,
) {
metadata.subgroup_size[lid] = subgroupSize;

metadata.subgroup_invocation_id[lid] = id;

// Record a representative subgroup id.
outputs[lid + ${wgThreads}] = subgroupBroadcastFirst(lid);

let res = select(0u, 1u, subgroupAll(bool(inputs[lid])));
outputs[lid] = res;
}`;

const includeCounter = false;
const inputData = generateInputData(t.params.case, wgThreads, includeCounter);

const uintsPerOutput = 2;
await runComputeTest(
t,
wgsl,
[t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
uintsPerOutput,
inputData,
(metadata: Uint32Array, output: Uint32Array) => {
return checkAll(metadata, output, wgThreads, inputData, (id: number, size: number) => {
return true;
});
}
);
});

g.test('compute,split')
.desc('Test that only active invocation participate')
.params(u =>
u
.combine('predicate', keysOf(kPredicateCases))
.beginSubcases()
.combine('wgSize', kWGSizes)
.combine('case', [...iterRange(kNumCases, x => x)])
)
.beforeAllSubcases(t => {
t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
})
.fn(async t => {
const testcase = kPredicateCases[t.params.predicate];
const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2];

const wgsl = `
enable subgroups;

@group(0) @binding(0)
var<storage> inputs : array<u32>;

@group(0) @binding(1)
var<storage, read_write> outputs : array<u32>;

struct Metadata {
subgroup_size : array<u32, ${wgThreads}>,
subgroup_invocation_id : array<u32, ${wgThreads}>,
}

@group(0) @binding(2)
var<storage, read_write> metadata : Metadata;

@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]})
fn main(
@builtin(local_invocation_index) lid : u32,
@builtin(subgroup_invocation_id) id : u32,
@builtin(subgroup_size) subgroupSize : u32,
) {
metadata.subgroup_size[lid] = subgroupSize;

// Record subgroup invocation id for this invocation.
metadata.subgroup_invocation_id[lid] = id;

// Record a generated subgroup id.
outputs[${wgThreads} + lid] = subgroupBroadcastFirst(lid);

if ${testcase.cond} {
outputs[lid] = select(0u, 1u, subgroupAll(bool(inputs[lid])));
} else {
return;
}
}`;

const includeCounter = false;
const inputData = generateInputData(t.params.case, wgThreads, includeCounter);

const uintsPerOutput = 2;
await runComputeTest(
t,
wgsl,
[t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]],
uintsPerOutput,
inputData,
(metadata: Uint32Array, output: Uint32Array) => {
return checkAll(metadata, output, wgThreads, inputData, testcase.filter);
}
);
});

g.test('fragment').unimplemented();
Loading
Loading