Skip to content

Commit

Permalink
Subgroups basic cts (gpuweb#3862)
Browse files Browse the repository at this point in the history
* Validation
  * enables
  * built-in values
  * built-in functions: broadcast and ballot
* Execution
  * built-in values
  * built-in functions: broadcast and ballot
  • Loading branch information
alan-baker authored Jul 31, 2024
1 parent a70b240 commit 8199588
Show file tree
Hide file tree
Showing 11 changed files with 1,604 additions and 1 deletion.
23 changes: 23 additions & 0 deletions src/webgpu/listing_meta.json
Original file line number Diff line number Diff line change
Expand Up @@ -1523,6 +1523,15 @@
"webgpu:shader,execution,expression,call,builtin,step:f32:*": { "subcaseMS": 291.363 },
"webgpu:shader,execution,expression,call,builtin,storageBarrier:barrier:*": { "subcaseMS": 0.801 },
"webgpu:shader,execution,expression,call,builtin,storageBarrier:stage:*": { "subcaseMS": 2.402 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:compute,split:*": { "subcaseMS": 38.740 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment,split:*": { "subcaseMS": 0.331 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment:*": { "subcaseMS": 0.059 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate:*": { "subcaseMS": 0.075 },
"webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate_and_control_flow:*": { "subcaseMS": 41.053 },
"webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:data_types:*": { "subcaseMS": 252.374 },
"webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:dynamically_uniform_id:*": { "subcaseMS": 0.211 },
"webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:fragment:*": { "subcaseMS": 0.108 },
"webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:workgroup_uniform_load:*": { "subcaseMS": 109.832 },
"webgpu:shader,execution,expression,call,builtin,tan:abstract_float:*": { "subcaseMS": 17043.428 },
"webgpu:shader,execution,expression,call,builtin,tan:f16:*": { "subcaseMS": 116.157 },
"webgpu:shader,execution,expression,call,builtin,tan:f32:*": { "subcaseMS": 13.532 },
Expand Down Expand Up @@ -1842,12 +1851,16 @@
"webgpu:shader,execution,robust_access:linear_memory:*": { "subcaseMS": 5.293 },
"webgpu:shader,execution,robust_access_vertex:vertex_buffer_access:*": { "subcaseMS": 6.487 },
"webgpu:shader,execution,shader_io,compute_builtins:inputs:*": { "subcaseMS": 19.342 },
"webgpu:shader,execution,shader_io,compute_builtins:subgroup_invocation_id:*": { "subcaseMS": 217.700 },
"webgpu:shader,execution,shader_io,compute_builtins:subgroup_size:*": { "subcaseMS": 644.206 },
"webgpu:shader,execution,shader_io,fragment_builtins:inputs,front_facing:*": { "subcaseMS": 1.001 },
"webgpu:shader,execution,shader_io,fragment_builtins:inputs,interStage,centroid:*": { "subcaseMS": 1.001 },
"webgpu:shader,execution,shader_io,fragment_builtins:inputs,interStage:*": { "subcaseMS": 1.001 },
"webgpu:shader,execution,shader_io,fragment_builtins:inputs,position:*": { "subcaseMS": 1.001 },
"webgpu:shader,execution,shader_io,fragment_builtins:inputs,sample_index:*": { "subcaseMS": 1.001 },
"webgpu:shader,execution,shader_io,fragment_builtins:inputs,sample_mask:*": { "subcaseMS": 1.001 },
"webgpu:shader,execution,shader_io,fragment_builtins:subgroup_invocation_id:*": { "subcaseMS": 0.086 },
"webgpu:shader,execution,shader_io,fragment_builtins:subgroup_size:*": { "subcaseMS": 0.232 },
"webgpu:shader,execution,shader_io,shared_structs:shared_between_stages:*": { "subcaseMS": 9.601 },
"webgpu:shader,execution,shader_io,shared_structs:shared_with_buffer:*": { "subcaseMS": 20.701 },
"webgpu:shader,execution,shader_io,shared_structs:shared_with_non_entry_point_function:*": { "subcaseMS": 6.801 },
Expand Down Expand Up @@ -2259,6 +2272,16 @@
"webgpu:shader,validation,expression,call,builtin,step:args:*": { "subcaseMS": 1.000 },
"webgpu:shader,validation,expression,call,builtin,step:must_use:*": { "subcaseMS": 1.000 },
"webgpu:shader,validation,expression,call,builtin,step:values:*": { "subcaseMS": 1.000 },
"webgpu:shader,validation,expression,call,builtin,subgroupBallot:data_type:*": { "subcaseMS": 115.557 },
"webgpu:shader,validation,expression,call,builtin,subgroupBallot:early_eval:*": { "subcaseMS": 52.992 },
"webgpu:shader,validation,expression,call,builtin,subgroupBallot:return_type:*": { "subcaseMS": 22.381 },
"webgpu:shader,validation,expression,call,builtin,subgroupBallot:stage:*": { "subcaseMS": 3.712 },
"webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:data_type:*": { "subcaseMS": 97.991 },
"webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:early_eval:*": { "subcaseMS": 1.254 },
"webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:id_type:*": { "subcaseMS": 24.703 },
"webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:must_use:*": { "subcaseMS": 232.030 },
"webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:return_type:*": { "subcaseMS": 496.031 },
"webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:stage:*": { "subcaseMS": 3.715 },
"webgpu:shader,validation,expression,call,builtin,tan:args:*": { "subcaseMS": 43.560 },
"webgpu:shader,validation,expression,call,builtin,tan:must_use:*": { "subcaseMS": 5.401 },
"webgpu:shader,validation,expression,call,builtin,tan:values:*": { "subcaseMS": 0.350 },
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,339 @@
export const description = `
Execution tests for subgroupBallot
Note: There is a lack of portability for non-uniform execution so these tests
restrict themselves to uniform control flow or returning early.
Note: There is no guaranteed mapping between subgroup_invocation_id and
local_invocation_index. Tests should avoid assuming there is.
`;

import { makeTestGroup } from '../../../../../../common/framework/test_group.js';
import { keysOf } from '../../../../../../common/util/data_tables.js';
import { iterRange } from '../../../../../../common/util/util.js';
import { GPUTest } from '../../../../../gpu_test.js';

export const g = makeTestGroup(GPUTest);

// 128 is the maximum possible subgroup size.
const kInvocations = 128;

function getMask(size: number): bigint {
return (1n << BigInt(size)) - 1n;
}

function checkBallots(
data: Uint32Array,
subgroupSize: number,
filter: (id: number, s: number) => boolean,
expect: (s: number) => bigint,
allActive: boolean
): Error | undefined {
for (let i = 0; i < kInvocations; i++) {
const idx = i * 4;
let actual = 0n;
for (let j = 0; j < 4; j++) {
actual |= BigInt(data[idx + j]) << BigInt(32 * j);
}
let expectedResult = expect(subgroupSize);
const subgroupId = i % subgroupSize;
if (!allActive && !filter(subgroupId, subgroupSize)) {
expectedResult = 0n;
}
if (expectedResult !== actual) {
return new Error(
`Invocation ${i}, subgroup inv id ${i % subgroupSize}, size ${subgroupSize}
- expected: ${expectedResult.toString(16)}
- got: ${actual.toString(16)}`
);
}
}

return undefined;
}

async function runTest(
t: GPUTest,
wgsl: string,
filter: (id: number, s: number) => boolean,
expect: (s: number) => bigint,
allActive: boolean
) {
const sizeBuffer = t.makeBufferWithContents(
new Uint32Array([0]),
GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
);
t.trackForCleanup(sizeBuffer);

const outputNumInts = kInvocations * 4;
const outputBuffer = t.makeBufferWithContents(
new Uint32Array([...iterRange(outputNumInts, x => 0)]),
GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE
);
t.trackForCleanup(outputBuffer);

const pipeline = t.device.createComputePipeline({
layout: 'auto',
compute: {
module: t.device.createShaderModule({
code: wgsl,
}),
entryPoint: 'main',
},
});
const bg = t.device.createBindGroup({
layout: pipeline.getBindGroupLayout(0),
entries: [
{
binding: 0,
resource: {
buffer: sizeBuffer,
},
},
{
binding: 1,
resource: {
buffer: outputBuffer,
},
},
],
});

const encoder = t.device.createCommandEncoder();
const pass = encoder.beginComputePass();
pass.setPipeline(pipeline);
pass.setBindGroup(0, bg);
pass.dispatchWorkgroups(1, 1, 1);
pass.end();
t.queue.submit([encoder.finish()]);

const sizeReadback = await t.readGPUBufferRangeTyped(sizeBuffer, {
srcByteOffset: 0,
type: Uint32Array,
typedLength: 1,
method: 'copy',
});
const subgroupSize = sizeReadback.data[0];

const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, {
srcByteOffset: 0,
type: Uint32Array,
typedLength: outputNumInts,
method: 'copy',
});
const output = outputReadback.data;

t.expectOK(checkBallots(output, subgroupSize, filter, expect, allActive));
}

const kCases = {
every_even: {
cond: `id % 2 == 0`,
filter: (id: number, size: number) => {
return id % 2 === 0;
},
expect: (subgroupSize: number) => {
const base = BigInt('0x55555555555555555555555555555555');
const mask = getMask(subgroupSize);
return base & mask;
},
},
every_odd: {
cond: `id % 2 == 1`,
filter: (id: number, size: number) => {
return id % 2 === 1;
},
expect: (subgroupSize: number) => {
const base = BigInt('0xAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA');
const mask = getMask(subgroupSize);
return base & mask;
},
},
lower_half: {
cond: `id < subgroupSize / 2`,
filter: (id: number, size: number) => {
return id < Math.floor(size / 2);
},
expect: (size: number) => {
return getMask(Math.floor(size / 2));
},
},
upper_half: {
cond: `id >= subgroupSize / 2`,
filter: (id: number, size: number) => {
return id >= Math.floor(size / 2);
},
expect: (size: number) => {
return getMask(Math.floor(size / 2)) << BigInt(Math.floor(size / 2));
},
},
first_two: {
cond: `id == 0 || id == 1`,
filter: (id: number) => {
return id === 0 || id === 1;
},
expect: (size: number) => {
return getMask(2);
},
},
};

g.test('compute,split')
.desc('Tests ballot in a split subgroup')
.params(u => u.combine('case', keysOf(kCases)))
.beforeAllSubcases(t => {
t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
})
.fn(async t => {
const testcase = kCases[t.params.case];
const wgsl = `
enable subgroups;
@group(0) @binding(0)
var<storage, read_write> size : u32;
@group(0) @binding(1)
var<storage, read_write> output : array<vec4u>;
@compute @workgroup_size(${kInvocations})
fn main(@builtin(subgroup_size) subgroupSize : u32,
@builtin(subgroup_invocation_id) id : u32,
@builtin(local_invocation_index) lid : u32) {
if (lid == 0) {
size = subgroupSize;
}
if ${testcase.cond} {
output[lid] = subgroupBallot(true);
} else {
return;
}
}`;

await runTest(t, wgsl, testcase.filter, testcase.expect, false);
});

g.test('fragment,split').unimplemented();

g.test('predicate')
.desc('Tests the predicate parameter')
.params(u => u.combine('case', keysOf(kCases)))
.beforeAllSubcases(t => {
t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
})
.fn(async t => {
const testcase = kCases[t.params.case];
const wgsl = `
enable subgroups;
@group(0) @binding(0)
var<storage, read_write> size : u32;
@group(0) @binding(1)
var<storage, read_write> output : array<vec4u>;
@compute @workgroup_size(${kInvocations})
fn main(@builtin(subgroup_size) subgroupSize : u32,
@builtin(subgroup_invocation_id) id : u32,
@builtin(local_invocation_index) lid : u32) {
if (lid == 0) {
size = subgroupSize;
}
let cond = ${testcase.cond};
let b = subgroupBallot(cond);
output[lid] = b;
}`;

await runTest(t, wgsl, testcase.filter, testcase.expect, true);
});

const kBothCases = {
empty: {
cond: `id < subgroupSize / 2`,
pred: `id >= subgroupSize / 2`,
filter: (id: number, size: number) => {
return id < Math.floor(size / 2);
},
expect: (size: number) => {
return 0n;
},
},
full: {
cond: `id < 128`,
pred: `lid < 128`,
filter: (id: number, size: number) => {
return true;
},
expect: (size: number) => {
return getMask(size);
},
},
one_in_four: {
cond: `id % 2 == 0`,
pred: `id % 4 == 0`,
filter: (id: number, size: number) => {
return id % 2 === 0;
},
expect: (size: number) => {
const base = BigInt('0x11111111111111111111111111111111');
const mask = getMask(size);
return base & mask;
},
},
middle_half: {
cond: `id >= subgroupSize / 4`,
pred: `id < 3 * (subgroupSize / 4)`,
filter: (id: number, size: number) => {
return id >= Math.floor(size / 4);
},
expect: (size: number) => {
return getMask(Math.floor(size / 2)) << BigInt(Math.floor(size / 4));
},
},
middle_half_every_other: {
cond: `(id >= subgroupSize / 4) && (id < 3 * (subgroupSize / 4))`,
pred: `id % 2 == 0`,
filter: (id: number, size: number) => {
return id >= Math.floor(size / 4) && id < 3 * Math.floor(size / 4);
},
expect: (size: number) => {
const base = BigInt('0x55555555555555555555555555555555');
const mask = getMask(Math.floor(size / 2)) << BigInt(Math.floor(size / 4));
return base & mask;
},
},
};

g.test('predicate_and_control_flow')
.desc('Test dynamic predicate and control flow together')
.params(u => u.combine('case', keysOf(kBothCases)))
.beforeAllSubcases(t => {
t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName);
})
.fn(async t => {
const testcase = kBothCases[t.params.case];
const wgsl = `
enable subgroups;
@group(0) @binding(0)
var<storage, read_write> size : u32;
@group(0) @binding(1)
var<storage, read_write> output : array<vec4u>;
@compute @workgroup_size(${kInvocations})
fn main(@builtin(subgroup_size) subgroupSize : u32,
@builtin(subgroup_invocation_id) id : u32,
@builtin(local_invocation_index) lid : u32) {
if (lid == 0) {
size = subgroupSize;
}
if ${testcase.cond} {
output[lid] = subgroupBallot(${testcase.pred});
} else {
return;
}
}`;

await runTest(t, wgsl, testcase.filter, testcase.expect, false);
});

g.test('fragment').unimplemented();
Loading

0 comments on commit 8199588

Please sign in to comment.