Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compat: Refactor fwidth/Fine/Coarse for 0 storage buffers. #4128

Merged
merged 1 commit into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,11 @@ export function runDerivativeTest(
// We will populate a uniform buffer with these input values laid out sequentially:
// [ case_0_input_1, case_0_input_0, case_1_input_1, case_1_input_0, ...]
//
// The render pipeline will be launched once per pixel per pair of cases over
// a viewport size of (2, 2) with the viewport set to cover 1 pixel.
// Each 2x2 set of calls will will exercise two test cases. Each of these
// draw calls will use a different instance index, which is forwarded to the
// fragment shader. Each invocation returns the result which is stored in
// a rgba32uint texture.
// The render pipeline will be launched once per pair of cases over a viewport
// size of (2, 2). Each 2x2 set of calls will will exercise two test cases.
// Each of these draw calls will use a different instance index, which is
// forwarded to the fragment shader. Each invocation returns the result which
// is stored in a rgba32uint texture.
//
// Consider draw calls that test 4 cases (c_0, c_1, c_2, c_3).
//
Expand Down
283 changes: 156 additions & 127 deletions src/webgpu/shader/execution/expression/call/builtin/fwidth.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import { assert } from '../../../../../../common/util/util.js';
import { GPUTest } from '../../../../../gpu_test.js';
import { anyOf } from '../../../../../util/compare.js';
import { Type, Value } from '../../../../../util/conversion.js';
import { FPInterval } from '../../../../../util/floating_point.js';
import { align } from '../../../../../util/math.js';
import { Case } from '../../case.js';
import { toComparator } from '../../expectation.js';

Expand All @@ -22,14 +24,11 @@ export function runFWidthTest(
) {
////////////////////////////////////////////////////////////////
// The four input values for a given case are distributed to across the invocations in a quad.
// We will populate a storage buffer with these input values laid out sequentially:
// We will populate a uniform buffer with these input values laid out sequentially:
// [ case0_input0, case0_input1, case0_input2, case0_input3, ...]
//
// The render pipeline will be launched several times over a viewport size of (2, 2). Each draw
// call will execute a single quad (four fragment invocation), which will exercise one test case.
// Each of these draw calls will use a different instance index, which is forwarded to the
// fragment shader. Each invocation will determine its index into the storage buffer using its
// fragment position and the instance index for that draw call.
// The render pipeline a 512x2 texture. In the fragment shader, every 2x2 texels is one test case.
// The results are the output from the fragment shader.
//
// Consider two draw calls that test 2 cases (c0, c1).
//
Expand All @@ -46,46 +45,56 @@ export function runFWidthTest(
}

// Determine the WGSL type to use in the shader, and the stride in bytes between values.
let valueStride = 4;
let wgslType = 'f32';
const valueStride = 16;
let conversionFromInput = 'input.x';
let conversionToOutput = `vec4f(v, 0, 0, 0)`;
if (vectorize) {
wgslType = `vec${vectorize}f`;
valueStride = vectorize * 4;
if (vectorize === 3) {
valueStride = 16;
switch (vectorize) {
case 2:
conversionFromInput = 'input.xy';
conversionToOutput = 'vec4f(v, 0, 0)';
break;
case 3:
conversionFromInput = 'input.xyz';
conversionToOutput = 'vec4f(v, 0)';
break;
case 4:
conversionFromInput = 'input';
conversionToOutput = 'v';
break;
}
}

const kUniformBufferSize = 16384; // min supported by compat mode.
const kNumCasesPerUniformBuffer = kUniformBufferSize / 64;

// Define a vertex shader that draws a triangle over the full viewport, and a fragment shader that
// calls the fwidth builtin with a value loaded from that fragment's index into the storage
// buffer (determined using the quad index and fragment position, as described above).
const code = `
struct CaseInfo {
@builtin(position) position: vec4f,
@location(0) @interpolate(flat, either) quad_idx: u32,
}

@vertex
fn vert(@builtin(vertex_index) vertex_idx: u32,
@builtin(instance_index) instance_idx: u32) -> CaseInfo {
fn vert(@builtin(vertex_index) vertex_idx: u32) -> @builtin(position) vec4f {
const kVertices = array(
vec2f(-2, -2),
vec2f( 2, -2),
vec2f( 0, 2),
vec2f( 3, -1),
vec2f(-1, 3),
vec2f(-1, -1),
);
return CaseInfo(vec4(kVertices[vertex_idx], 0, 1), instance_idx);
return vec4(kVertices[vertex_idx], 0, 1);
}

@group(0) @binding(0) var<storage, read> inputs : array<${wgslType}>;
@group(0) @binding(1) var<storage, read_write> outputs : array<${wgslType}>;
@group(0) @binding(0) var<uniform> inputs : array<vec4f, ${kNumCasesPerUniformBuffer * 4}>;

@fragment
fn frag(info : CaseInfo) {
let inv_idx = u32(info.position.x) + u32(info.position.y)*2;
let index = info.quad_idx*4 + inv_idx;
fn frag(@builtin(position) position: vec4f) -> @location(0) vec4u {
let t = vec2u(position.xy);
let inv_idx = t.x % 2 + (t.y % 2) * 2;
let q = t / 2;
let quad_idx = q.y * 256 + q.x;
let index = quad_idx * 4 + inv_idx;
let input = inputs[index];
${non_uniform_discard ? 'if inv_idx == 0 { discard; }' : ''}
outputs[index] = ${builtin}(input);
let v = ${builtin}(${conversionFromInput});
return bitcast<vec4u>(${conversionToOutput});
}
`;

Expand All @@ -94,116 +103,136 @@ fn frag(info : CaseInfo) {
const pipeline = t.device.createRenderPipeline({
layout: 'auto',
vertex: { module },
fragment: { module, targets: [{ format: 'rgba8unorm', writeMask: 0 }] },
});

// Create storage buffers to hold the inputs and outputs.
const bufferSize = cases.length * 4 * valueStride;
const inputBuffer = t.createBufferTracked({
size: bufferSize,
usage: GPUBufferUsage.STORAGE,
mappedAtCreation: true,
});
const outputBuffer = t.createBufferTracked({
size: bufferSize,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
fragment: { module, targets: [{ format: 'rgba32uint' }] },
});

// Populate the input storage buffer with case input values.
const valuesData = new Uint8Array(inputBuffer.getMappedRange());
for (let i = 0; i < cases.length / vectorWidth; i++) {
for (let v = 0; v < vectorWidth; v++) {
const index = i * vectorWidth + v;
if (index >= cases.length) {
break;
}
const inputs = cases[index].input as ReadonlyArray<Value>;
for (let x = 0; x < 4; x++) {
inputs[x].copyTo(valuesData, (i * 4 + x) * valueStride + v * 4);
}
}
}
inputBuffer.unmap();

// Create a bind group for the storage buffers.
const group = t.device.createBindGroup({
entries: [
{ binding: 0, resource: { buffer: inputBuffer } },
{ binding: 1, resource: { buffer: outputBuffer } },
],
layout: pipeline.getBindGroupLayout(0),
});

// Create a texture to use as a color attachment.
// We only need this for launching the desired number of fragment invocations.
// Create a texture to use as a color attachment to receive the results;
const width = kNumCasesPerUniformBuffer * 2;
const height = 2;
// note: We could limit it to this size and increase height but kNumCasesPerUniformBuffer is limited to 256
// because we can't fit more into a single uniform buffer in compat.
assert(width < t.device.limits.maxTextureDimension2D);
const colorAttachment = t.createTextureTracked({
size: { width: 2, height: 2 },
format: 'rgba8unorm',
usage: GPUTextureUsage.RENDER_ATTACHMENT,
size: [width, height],
format: 'rgba32uint',
usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
});
const bytesPerRow = align(width * 16, 256);

// Submit the render pass to the device.
const results = [];
const encoder = t.device.createCommandEncoder();
const pass = encoder.beginRenderPass({
colorAttachments: [
{
view: colorAttachment.createView(),
loadOp: 'clear',
storeOp: 'discard',
},
],
});
pass.setPipeline(pipeline);
pass.setBindGroup(0, group);
for (let quad = 0; quad < cases.length / vectorWidth; quad++) {
pass.draw(3, 1, undefined, quad);
for (let c = 0; c < cases.length; c += kNumCasesPerUniformBuffer) {
// Create uniform buffer to hold the inputs.
const inputBuffer = t.createBufferTracked({
size: kUniformBufferSize,
usage: GPUBufferUsage.UNIFORM,
mappedAtCreation: true,
});
const valuesData = new Uint8Array(inputBuffer.getMappedRange());

// Populate the input uniform buffer with case input values.
for (let i = 0; i < kNumCasesPerUniformBuffer / vectorWidth; i++) {
for (let v = 0; v < vectorWidth; v++) {
const index = c + i * vectorWidth + v;
if (index >= cases.length) {
break;
}
const inputs = cases[index].input as ReadonlyArray<Value>;
for (let x = 0; x < 4; x++) {
inputs[x].copyTo(valuesData, (i * 4 + x) * valueStride + v * 4);
}
}
}
inputBuffer.unmap();

// Create a bind group for the input buffer.
const group = t.device.createBindGroup({
entries: [{ binding: 0, resource: { buffer: inputBuffer } }],
layout: pipeline.getBindGroupLayout(0),
});

// Submit the render pass to the device.
const pass = encoder.beginRenderPass({
colorAttachments: [
{
view: colorAttachment.createView(),
loadOp: 'clear',
storeOp: 'store',
},
],
});
pass.setPipeline(pipeline);
pass.setBindGroup(0, group);
pass.draw(3);
pass.end();

// Create buffer to hold the outputs.
const outputBuffer = t.createBufferTracked({
size: bytesPerRow * height,
usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
});
results.push(outputBuffer);

// Copy the texture to the output buffer
encoder.copyTextureToBuffer(
{ texture: colorAttachment },
{ buffer: outputBuffer, bytesPerRow },
[colorAttachment.width, colorAttachment.height]
);
}
pass.end();
t.queue.submit([encoder.finish()]);

// Check the outputs match the expected results.
t.expectGPUBufferValuesPassCheck(
outputBuffer,
(outputData: Uint8Array) => {
for (let i = 0; i < cases.length / vectorWidth; i++) {
for (let v = 0; v < vectorWidth; v++) {
const index = i * vectorWidth + v;
if (index >= cases.length) {
break;
}
const c = cases[index];

for (let x = 0; x < 4; x++) {
if (non_uniform_discard && x === 0) {
continue;
results.forEach((outputBuffer, groupNdx) => {
// Check the outputs match the expected results.
t.expectGPUBufferValuesPassCheck(
outputBuffer,
(outputData: Uint8Array) => {
const base = groupNdx * kNumCasesPerUniformBuffer;
const numCases = Math.min(kNumCasesPerUniformBuffer, cases.length - base);
const numQuads = numCases / vectorWidth;
for (let i = 0; i < numQuads; i++) {
for (let v = 0; v < vectorWidth; v++) {
const caseNdx = base + i * vectorWidth + v;
if (caseNdx >= cases.length) {
break;
}

const index = (i * 4 + x) * valueStride + v * 4;
const result = Type.f32.read(outputData, index);

let expected = c.expected;
if (builtin.endsWith('Fine')) {
expected = toComparator((expected as FPInterval[])[x]);
} else {
expected = anyOf(...(expected as FPInterval[]));
}

const cmp = expected.compare(result);
if (!cmp.matched) {
return new Error(`
inputs: (${(c.input as Value[]).join(', ')})
expected: ${cmp.expected}

returned: ${result}`);
const c = cases[caseNdx];

for (let x = 0; x < 4; x++) {
if (non_uniform_discard && x === 0) {
continue;
}

const tx = x % 2;
const ty = (x / 2) | 0;
const index = ty * bytesPerRow + i * 32 + tx * 16 + v * 4;
const result = Type.f32.read(outputData, index);

let expected = c.expected;
if (builtin.endsWith('Fine')) {
expected = toComparator((expected as FPInterval[])[x]);
} else {
expected = anyOf(...(expected as FPInterval[]));
}

const cmp = expected.compare(result);
if (!cmp.matched) {
return new Error(`
caseNdx: ${caseNdx} v: ${v} x: ${x}
inputs: (${(c.input as Value[]).join(', ')})
expected: ${cmp.expected}

returned: ${result}`);
}
}
}
}
return undefined;
},
{
type: Uint8Array,
typedLength: outputBuffer.size,
}
return undefined;
},
{
type: Uint8Array,
typedLength: bufferSize,
}
);
);
});
}
Loading