diff --git a/src/webgpu/shader/execution/expression/call/builtin/derivatives.ts b/src/webgpu/shader/execution/expression/call/builtin/derivatives.ts index b6c7d54669d..40f0adf279f 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/derivatives.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/derivatives.ts @@ -33,12 +33,11 @@ export function runDerivativeTest( // We will populate a uniform buffer with these input values laid out sequentially: // [ case_0_input_1, case_0_input_0, case_1_input_1, case_1_input_0, ...] // - // The render pipeline will be launched once per pixel per pair of cases over - // a viewport size of (2, 2) with the viewport set to cover 1 pixel. - // Each 2x2 set of calls will will exercise two test cases. Each of these - // draw calls will use a different instance index, which is forwarded to the - // fragment shader. Each invocation returns the result which is stored in - // a rgba32uint texture. + // The render pipeline will be launched once per pair of cases over a viewport + // size of (2, 2). Each 2x2 set of calls will will exercise two test cases. + // Each of these draw calls will use a different instance index, which is + // forwarded to the fragment shader. Each invocation returns the result which + // is stored in a rgba32uint texture. // // Consider draw calls that test 4 cases (c_0, c_1, c_2, c_3). // diff --git a/src/webgpu/shader/execution/expression/call/builtin/fwidth.ts b/src/webgpu/shader/execution/expression/call/builtin/fwidth.ts index d87f6b06c2d..23def979b84 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/fwidth.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/fwidth.ts @@ -2,6 +2,7 @@ import { GPUTest } from '../../../../../gpu_test.js'; import { anyOf } from '../../../../../util/compare.js'; import { Type, Value } from '../../../../../util/conversion.js'; import { FPInterval } from '../../../../../util/floating_point.js'; +import { align } from '../../../../../util/math.js'; import { Case } from '../../case.js'; import { toComparator } from '../../expectation.js'; @@ -13,7 +14,7 @@ import { toComparator } from '../../expectation.js'; * @param non_uniform_discard if true, one of each pair of invocations will discard * @param vectorize if defined, the vector width to use (2, 3, or 4) */ -export function runFWidthTest( +export function runFWidthTestCases( t: GPUTest, cases: Case[], builtin: string, @@ -22,14 +23,13 @@ export function runFWidthTest( ) { //////////////////////////////////////////////////////////////// // The four input values for a given case are distributed to across the invocations in a quad. - // We will populate a storage buffer with these input values laid out sequentially: + // We will populate a uniform buffer with these input values laid out sequentially: // [ case0_input0, case0_input1, case0_input2, case0_input3, ...] // // The render pipeline will be launched several times over a viewport size of (2, 2). Each draw // call will execute a single quad (four fragment invocation), which will exercise one test case. // Each of these draw calls will use a different instance index, which is forwarded to the - // fragment shader. Each invocation will determine its index into the storage buffer using its - // fragment position and the instance index for that draw call. + // fragment shader. The results are the output from the fragment shader. // // Consider two draw calls that test 2 cases (c0, c1). // @@ -46,13 +46,23 @@ export function runFWidthTest( } // Determine the WGSL type to use in the shader, and the stride in bytes between values. - let valueStride = 4; - let wgslType = 'f32'; + const valueStride = 16; + let conversionFromInput = 'input.x'; + let conversionToOutput = `vec4f(v, 0, 0, 0)`; if (vectorize) { - wgslType = `vec${vectorize}f`; - valueStride = vectorize * 4; - if (vectorize === 3) { - valueStride = 16; + switch (vectorize) { + case 2: + conversionFromInput = 'input.xy'; + conversionToOutput = 'vec4f(v, 0, 0)'; + break; + case 3: + conversionFromInput = 'input.xyz'; + conversionToOutput = 'vec4f(v, 0)'; + break; + case 4: + conversionFromInput = 'input'; + conversionToOutput = 'v'; + break; } } @@ -76,16 +86,16 @@ fn vert(@builtin(vertex_index) vertex_idx: u32, return CaseInfo(vec4(kVertices[vertex_idx], 0, 1), instance_idx); } -@group(0) @binding(0) var inputs : array<${wgslType}>; -@group(0) @binding(1) var outputs : array<${wgslType}>; +@group(0) @binding(0) var inputs : array; @fragment -fn frag(info : CaseInfo) { +fn frag(info : CaseInfo) -> @location(0) vec4u { let inv_idx = u32(info.position.x) + u32(info.position.y)*2; let index = info.quad_idx*4 + inv_idx; let input = inputs[index]; ${non_uniform_discard ? 'if inv_idx == 0 { discard; }' : ''} - outputs[index] = ${builtin}(input); + let v = ${builtin}(${conversionFromInput}); + return bitcast(${conversionToOutput}); } `; @@ -94,22 +104,18 @@ fn frag(info : CaseInfo) { const pipeline = t.device.createRenderPipeline({ layout: 'auto', vertex: { module }, - fragment: { module, targets: [{ format: 'rgba8unorm', writeMask: 0 }] }, + fragment: { module, targets: [{ format: 'rgba32uint' }] }, }); // Create storage buffers to hold the inputs and outputs. const bufferSize = cases.length * 4 * valueStride; const inputBuffer = t.createBufferTracked({ size: bufferSize, - usage: GPUBufferUsage.STORAGE, + usage: GPUBufferUsage.UNIFORM, mappedAtCreation: true, }); - const outputBuffer = t.createBufferTracked({ - size: bufferSize, - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, - }); - // Populate the input storage buffer with case input values. + // Populate the input uniform buffer with case input values. const valuesData = new Uint8Array(inputBuffer.getMappedRange()); for (let i = 0; i < cases.length / vectorWidth; i++) { for (let v = 0; v < vectorWidth; v++) { @@ -127,10 +133,7 @@ fn frag(info : CaseInfo) { // Create a bind group for the storage buffers. const group = t.device.createBindGroup({ - entries: [ - { binding: 0, resource: { buffer: inputBuffer } }, - { binding: 1, resource: { buffer: outputBuffer } }, - ], + entries: [{ binding: 0, resource: { buffer: inputBuffer } }], layout: pipeline.getBindGroupLayout(0), }); @@ -138,52 +141,67 @@ fn frag(info : CaseInfo) { // We only need this for launching the desired number of fragment invocations. const colorAttachment = t.createTextureTracked({ size: { width: 2, height: 2 }, - format: 'rgba8unorm', - usage: GPUTextureUsage.RENDER_ATTACHMENT, + format: 'rgba32uint', + usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC, }); + const bytesPerRow = align(valueStride * colorAttachment.width, 256); // Submit the render pass to the device. + const results = []; const encoder = t.device.createCommandEncoder(); - const pass = encoder.beginRenderPass({ - colorAttachments: [ - { - view: colorAttachment.createView(), - loadOp: 'clear', - storeOp: 'discard', - }, - ], - }); - pass.setPipeline(pipeline); - pass.setBindGroup(0, group); - for (let quad = 0; quad < cases.length / vectorWidth; quad++) { + for (let quad = 0; quad / vectorWidth; quad++) { + const pass = encoder.beginRenderPass({ + colorAttachments: [ + { + view: colorAttachment.createView(), + loadOp: 'clear', + storeOp: 'store', + }, + ], + }); + pass.setPipeline(pipeline); + pass.setBindGroup(0, group); pass.draw(3, 1, undefined, quad); + pass.end(); + const outputBuffer = t.createBufferTracked({ + size: bytesPerRow * colorAttachment.height, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC, + }); + results.push(outputBuffer); + encoder.copyTextureToBuffer( + { texture: colorAttachment }, + { buffer: outputBuffer, bytesPerRow }, + [colorAttachment.width, colorAttachment.height] + ); } - pass.end(); t.queue.submit([encoder.finish()]); // Check the outputs match the expected results. - t.expectGPUBufferValuesPassCheck( - outputBuffer, - (outputData: Uint8Array) => { - for (let i = 0; i < cases.length / vectorWidth; i++) { - for (let v = 0; v < vectorWidth; v++) { - const index = i * vectorWidth + v; - if (index >= cases.length) { - break; - } - const c = cases[index]; - - for (let x = 0; x < 4; x++) { + results.forEach((outputBuffer, quadNdx) => { + t.expectGPUBufferValuesPassCheck( + outputBuffer, + (outputData: Uint8Array) => { + for (let i = 0; i < 4; ++i) { + const tx = i % 2; + const ty = (i / 2) | 0; + const x = tx + ty * 2; + for (let v = 0; v < vectorWidth; v++) { if (non_uniform_discard && x === 0) { continue; } - const index = (i * 4 + x) * valueStride + v * 4; + const caseNdx = quadNdx * 4 + x; + if (caseNdx >= cases.length) { + break; + } + + const c = cases[quadNdx * 4 + x]; + const index = ty * bytesPerRow + tx * valueStride + v * 4; const result = Type.f32.read(outputData, index); let expected = c.expected; if (builtin.endsWith('Fine')) { - expected = toComparator((expected as FPInterval[])[x]); + expected = toComparator((expected as FPInterval[])[v]); } else { expected = anyOf(...(expected as FPInterval[])); } @@ -191,19 +209,46 @@ fn frag(info : CaseInfo) { const cmp = expected.compare(result); if (!cmp.matched) { return new Error(` - inputs: (${(c.input as Value[]).join(', ')}) - expected: ${cmp.expected} + inputs: (${(c.input as Value[]).join(', ')}) + expected: ${cmp.expected} - returned: ${result}`); + returned: ${result}`); } } } + return undefined; + }, + { + type: Uint8Array, + typedLength: outputBuffer.size, } - return undefined; - }, - { - type: Uint8Array, - typedLength: bufferSize, - } - ); + ); + }); +} + +/** + * Run a test for a fwidth builtin function. + * @param t the GPUTest + * @param cases list of test cases to run + * @param builtin the builtin function to test + * @param non_uniform_discard if true, one of each pair of invocations will discard + * @param vectorize if defined, the vector width to use (2, 3, or 4) + */ +export function runFWidthTest( + t: GPUTest, + cases: Case[], + builtin: string, + non_uniform_discard: boolean, + vectorize?: number +) { + const numCasesPerUniformBuffer = t.device.limits.maxUniformBufferBindingSize / 64; + for (let i = 0; i < cases.length; i += numCasesPerUniformBuffer) { + runFWidthTestCases( + t, + cases.slice(i, i + numCasesPerUniformBuffer), + builtin, + non_uniform_discard, + vectorize + ); + } }