From dedb476e7ab74a1cbb1a55b860d5388e1c7a59ea Mon Sep 17 00:00:00 2001 From: Greggman Date: Fri, 13 Dec 2024 19:30:41 -0800 Subject: [PATCH] Refactor derivative tests for 0 storage buffers. (#4091) --- .../expression/call/builtin/derivatives.ts | 145 ++++++++++-------- 1 file changed, 82 insertions(+), 63 deletions(-) diff --git a/src/webgpu/shader/execution/expression/call/builtin/derivatives.ts b/src/webgpu/shader/execution/expression/call/builtin/derivatives.ts index 45337731ff9b..b6c7d54669d3 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/derivatives.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/derivatives.ts @@ -1,5 +1,6 @@ import { GPUTest } from '../../../../../gpu_test.js'; import { Type, Value } from '../../../../../util/conversion.js'; +import { align } from '../../../../../util/math.js'; import { Case } from '../../case.js'; import { toComparator } from '../../expectation.js'; import { packScalarsToVector } from '../../expression.js'; @@ -29,16 +30,17 @@ export function runDerivativeTest( //////////////////////////////////////////////////////////////// // The two input values for a given case are distributed to two different invocations in a quad. - // We will populate a storage buffer with these input values laid out sequentially: + // We will populate a uniform buffer with these input values laid out sequentially: // [ case_0_input_1, case_0_input_0, case_1_input_1, case_1_input_0, ...] // - // The render pipeline will be launched several times over a viewport size of (2, 2). Each draw - // call will execute a single quad (four fragment invocation), which will exercise two test cases. - // Each of these draw calls will use a different instance index, which is forwarded to the - // fragment shader. Each invocation will determine its index into the storage buffer using its - // fragment position and the instance index for that draw call. + // The render pipeline will be launched once per pixel per pair of cases over + // a viewport size of (2, 2) with the viewport set to cover 1 pixel. + // Each 2x2 set of calls will will exercise two test cases. Each of these + // draw calls will use a different instance index, which is forwarded to the + // fragment shader. Each invocation returns the result which is stored in + // a rgba32uint texture. // - // Consider two draw calls that test 4 cases (c_0, c_1, c_2, c_3). + // Consider draw calls that test 4 cases (c_0, c_1, c_2, c_3). // // For derivatives along the 'x' direction, the mapping from fragment position to case input is: // Quad 0: | c_0_i_1 | c_0_i_0 | Quad 1: | c_2_i_1 | c_2_i_0 | @@ -54,13 +56,23 @@ export function runDerivativeTest( const dir = builtin[3]; // Determine the WGSL type to use in the shader, and the stride in bytes between values. - let valueStride = 4; - let wgslType = 'f32'; + const valueStride = 16; + let conversionFromInput = 'input.x'; + let conversionToOutput = `vec4f(v)`; if (vectorize) { - wgslType = `vec${vectorize}f`; - valueStride = vectorize * 4; - if (vectorize === 3) { - valueStride = 16; + switch (vectorize) { + case 2: + conversionFromInput = 'input.xy'; + conversionToOutput = 'vec4f(v, 0, 0)'; + break; + case 3: + conversionFromInput = 'input.xyz'; + conversionToOutput = 'vec4f(v, 0)'; + break; + case 4: + conversionFromInput = 'input'; + conversionToOutput = 'v'; + break; } } @@ -84,17 +96,17 @@ fn vert(@builtin(vertex_index) vertex_idx: u32, return CaseInfo(vec4(kVertices[vertex_idx], 0, 1), instance_idx); } -@group(0) @binding(0) var inputs : array<${wgslType}>; -@group(0) @binding(1) var outputs : array<${wgslType}>; +@group(0) @binding(0) var inputs : array; @fragment -fn frag(info : CaseInfo) { +fn frag(info : CaseInfo) -> @location(0) vec4u { let case_idx = u32(info.position.${dir === 'x' ? 'y' : 'x'}); let inv_idx = u32(info.position.${dir}); let index = info.quad_idx*4 + case_idx*2 + inv_idx; let input = inputs[index]; ${non_uniform_discard ? 'if inv_idx == 0 { discard; }' : ''} - outputs[index] = ${builtin}(input); + let v = ${builtin}(${conversionFromInput}); + return bitcast(${conversionToOutput}); } `; @@ -103,22 +115,18 @@ fn frag(info : CaseInfo) { const pipeline = t.device.createRenderPipeline({ layout: 'auto', vertex: { module }, - fragment: { module, targets: [{ format: 'rgba8unorm', writeMask: 0 }] }, + fragment: { module, targets: [{ format: 'rgba32uint' }] }, }); // Create storage buffers to hold the inputs and outputs. const bufferSize = cases.length * 2 * valueStride; const inputBuffer = t.createBufferTracked({ size: bufferSize, - usage: GPUBufferUsage.STORAGE, + usage: GPUBufferUsage.UNIFORM, mappedAtCreation: true, }); - const outputBuffer = t.createBufferTracked({ - size: bufferSize, - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, - }); - // Populate the input storage buffer with case input values. + // Populate the input uniform buffer with case input values. const valuesData = new Uint8Array(inputBuffer.getMappedRange()); for (let i = 0; i < cases.length; i++) { const inputs = cases[i].input as ReadonlyArray; @@ -129,61 +137,72 @@ fn frag(info : CaseInfo) { // Create a bind group for the storage buffers. const group = t.device.createBindGroup({ - entries: [ - { binding: 0, resource: { buffer: inputBuffer } }, - { binding: 1, resource: { buffer: outputBuffer } }, - ], + entries: [{ binding: 0, resource: { buffer: inputBuffer } }], layout: pipeline.getBindGroupLayout(0), }); - // Create a texture to use as a color attachment. - // We only need this for launching the desired number of fragment invocations. const colorAttachment = t.createTextureTracked({ size: { width: 2, height: 2 }, - format: 'rgba8unorm', - usage: GPUTextureUsage.RENDER_ATTACHMENT, + format: 'rgba32uint', + usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC, }); + const bytesPerRow = align(valueStride * colorAttachment.width, 256); // Submit the render pass to the device. + const results = []; const encoder = t.device.createCommandEncoder(); - const pass = encoder.beginRenderPass({ - colorAttachments: [ - { - view: colorAttachment.createView(), - loadOp: 'clear', - storeOp: 'discard', - }, - ], - }); - pass.setPipeline(pipeline); - pass.setBindGroup(0, group); for (let quad = 0; quad < cases.length / 2; quad++) { - pass.draw(3, 1, undefined, quad); + const pass = encoder.beginRenderPass({ + colorAttachments: [ + { + view: colorAttachment.createView(), + loadOp: 'clear', + storeOp: 'store', + }, + ], + }); + pass.setPipeline(pipeline); + pass.setBindGroup(0, group); + pass.draw(3, 1, 0, quad); + pass.end(); + const outputBuffer = t.createBufferTracked({ + size: bytesPerRow * colorAttachment.height, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC, + }); + results.push(outputBuffer); + encoder.copyTextureToBuffer( + { texture: colorAttachment }, + { buffer: outputBuffer, bytesPerRow }, + [colorAttachment.width, colorAttachment.height] + ); } - pass.end(); + t.queue.submit([encoder.finish()]); // Check the outputs match the expected results. - t.expectGPUBufferValuesPassCheck( - outputBuffer, - (outputData: Uint8Array) => { - for (let i = 0; i < cases.length; i++) { - const c = cases[i]; - - // Both invocations involved in the derivative should get the same result. - for (let d = 0; d < 2; d++) { - if (non_uniform_discard && d === 0) { + results.forEach((outputBuffer, quadNdx) => { + t.expectGPUBufferValuesPassCheck( + outputBuffer, + (outputData: Uint8Array) => { + for (let i = 0; i < 4; ++i) { + const tx = i % 2; + const ty = (i / 2) | 0; + const [inputNdx, caseNdx] = dir === 'x' ? [tx, ty] : [ty, tx]; + const c = cases[quadNdx * 2 + caseNdx]; + + // Both invocations involved in the derivative should get the same result. + if (non_uniform_discard && inputNdx === 0) { continue; } - const index = (i * 2 + d) * valueStride; + const index = ty * bytesPerRow + tx * valueStride; const result = type.read(outputData, index); const cmp = toComparator(c.expected).compare(result); if (!cmp.matched) { // If this is a coarse derivative, the implementation is also allowed to calculate only // one of the two derivatives and return that result to all of the invocations. if (!builtin.endsWith('Fine')) { - const c0 = cases[i % 2 === 0 ? i + 1 : i - 1]; + const c0 = cases[inputNdx]; const cmp0 = toComparator(c0.expected).compare(result); if (!cmp0.matched) { return new Error(` @@ -204,12 +223,12 @@ fn frag(info : CaseInfo) { } } } + return undefined; + }, + { + type: Uint8Array, + typedLength: outputBuffer.size, } - return undefined; - }, - { - type: Uint8Array, - typedLength: bufferSize, - } - ); + ); + }); }