diff --git a/membw.html b/membw.html index 40fb078..b4d6ae2 100644 --- a/membw.html +++ b/membw.html @@ -30,161 +30,163 @@ fail("Fatal error: Device does not support WebGPU."); } // [2**0, 2**9) - const workgroupSizes = [...Array(9).keys()].map((i) => 2 ** i); + const workgroupLimit = 1; // 9; + const workgroupSizes = [...Array(workgroupLimit).keys()].map( + (i) => 2 ** i + ); + // [2**0, 2**25) + const memsrcSizeLimit = 1; // 25; + const memsrcSizes = [...Array(memsrcSizeLimit).keys()].map((i) => 2 ** i); for (const workgroupSize of workgroupSizes) { - console.log("Creating new TimingHelper ..."); - const timingHelper = new TimingHelper(device); - console.log("... of workgroup size", workgroupSize); - - const memsrcSize = 2 ** 24; // (1M elements) - const itemsPerWorkgroup = memsrcSize / workgroupSize; - const dispatchGeometry = [itemsPerWorkgroup, 1]; - while ( - dispatchGeometry[0] > adapter.limits.maxComputeWorkgroupsPerDimension - ) { - dispatchGeometry[0] /= 2; - dispatchGeometry[1] *= 2; - } - console.log(`itemsPerWorkgroup: ${itemsPerWorkgroup} -workgroup size: ${workgroupSize} -maxComputeWGPerDim: ${adapter.limits.maxComputeWorkgroupsPerDimension} -dispatchGeometry: ${dispatchGeometry}`); - - const memsrc = new Uint32Array(memsrcSize); - for (let i = 0; i < memsrc.length; i++) { - memsrc[i] = i; - } + for (const memsrcSize of memsrcSizes) { + const timingHelper = new TimingHelper(device); + + const itemsPerWorkgroup = memsrcSize / workgroupSize; + const dispatchGeometry = [itemsPerWorkgroup, 1]; + while ( + dispatchGeometry[0] > + adapter.limits.maxComputeWorkgroupsPerDimension + ) { + dispatchGeometry[0] /= 2; + dispatchGeometry[1] *= 2; + } + console.log(`itemsPerWorkgroup: ${itemsPerWorkgroup} + workgroup size: ${workgroupSize} + maxComputeWGPerDim: ${adapter.limits.maxComputeWorkgroupsPerDimension} + dispatchGeometry: ${dispatchGeometry}`); + + const memsrc = new Uint32Array(memsrcSize); + for (let i = 0; i < memsrc.length; i++) { + memsrc[i] = i; + } + + const memcpyModule = device.createShaderModule({ + label: "copy large chunk of memory from memSrc to memDest", + code: /* wgsl */ ` + override wgSize: u32 = 1; + /* output */ + @group(0) @binding(0) var memDest: array; + /* input */ + @group(0) @binding(1) var memSrc: array; + + @compute @workgroup_size(wgSize) fn memcpyKernel( + @builtin(global_invocation_id) id: vec3u, + @builtin(num_workgroups) nwg: vec3u, + @builtin(workgroup_id) wgid: vec3u) { + let i = id.y * nwg.x * wgSize + id.x; + memDest[i] = memSrc[i] + 1; + } + `, + }); - const memcpyModule = device.createShaderModule({ - label: "copy large chunk of memory from memSrc to memDest", - code: /* wgsl */ ` - override wgSize: u32 = 1; - /* output */ - @group(0) @binding(0) var memDest: array; - /* input */ - @group(0) @binding(1) var memSrc: array; - - @compute @workgroup_size(wgSize) fn memcpyKernel( - @builtin(global_invocation_id) id: vec3u, - @builtin(num_workgroups) nwg: vec3u, - @builtin(workgroup_id) wgid: vec3u) { - let i = id.y * nwg.x * wgSize + id.x; - memDest[i] = memSrc[i] + 1; - } - `, - }); - - const memcpyPipeline = device.createComputePipeline({ - label: "memcpy compute pipeline", - layout: "auto", - compute: { - module: memcpyModule, - constants: { - wgSize: workgroupSize, + const memcpyPipeline = device.createComputePipeline({ + label: "memcpy compute pipeline", + layout: "auto", + compute: { + module: memcpyModule, + constants: { + wgSize: workgroupSize, + }, }, - }, - }); - - // create buffers on the GPU to hold data - // read-only inputs: - const memsrcBuffer = device.createBuffer({ - label: "memory source buffer", - size: memsrc.byteLength, - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST, - }); - device.queue.writeBuffer(memsrcBuffer, 0, memsrc); - - const memdestBuffer = device.createBuffer({ - label: "memory destination buffer", - size: memsrc.byteLength, - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, - }); - - const mappableMemdstBuffer = device.createBuffer({ - label: "mappable memory destination buffer", - size: memsrc.byteLength, - usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST, - }); - - /** Set up bindGroups per compute kernel to tell the shader which buffers to use */ - const memcpyBindGroup = device.createBindGroup({ - label: "bindGroup for memcpy kernel", - layout: memcpyPipeline.getBindGroupLayout(0), - entries: [ - { binding: 0, resource: { buffer: memdestBuffer } }, - { binding: 1, resource: { buffer: memsrcBuffer } }, - ], - }); - - const encoder = device.createCommandEncoder({ - label: "memcpy encoder", - }); - - console.log("Initializing TimingHelper on beginComputePass ..."); - const memcpyPass = timingHelper.beginComputePass(encoder, { - label: "memcpy compute pass", - }); - console.log("... done"); - memcpyPass.setPipeline(memcpyPipeline); - memcpyPass.setBindGroup(0, memcpyBindGroup); - // TODO handle not evenly divisible by wgSize - console.log("Dispatching ..."); - memcpyPass.dispatchWorkgroups(...dispatchGeometry); - console.log("... done"); - memcpyPass.end(); - - // Encode a command to copy the results to a mappable buffer. - // this is (from, to) - encoder.copyBufferToBuffer( - memdestBuffer, - 0, - mappableMemdstBuffer, - 0, - mappableMemdstBuffer.size - ); - - // Finish encoding and submit the commands - const command_buffer = encoder.finish(); - console.log("Submitting ..."); - device.queue.submit([command_buffer]); - console.log("... done"); - - // Read the results - await mappableMemdstBuffer.mapAsync(GPUMapMode.READ); - const memdest = new Uint32Array( - mappableMemdstBuffer.getMappedRange().slice() - ); - mappableMemdstBuffer.unmap(); - console.log(`Memdest size: ${memdest.length}`); - let errors = 0; - for (let i = 0; i < memdest.length; i++) { - if (memsrc[i] + 1 != memdest[i]) { - if (errors < 5) { - console.log( - `Error ${errors}: i=${i}, src=${memsrc[i]}, dest=${memdest[i]}` - ); + }); + + // create buffers on the GPU to hold data + // read-only inputs: + const memsrcBuffer = device.createBuffer({ + label: "memory source buffer", + size: memsrc.byteLength, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST, + }); + device.queue.writeBuffer(memsrcBuffer, 0, memsrc); + + const memdestBuffer = device.createBuffer({ + label: "memory destination buffer", + size: memsrc.byteLength, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); + + const mappableMemdstBuffer = device.createBuffer({ + label: "mappable memory destination buffer", + size: memsrc.byteLength, + usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST, + }); + + /** Set up bindGroups per compute kernel to tell the shader which buffers to use */ + const memcpyBindGroup = device.createBindGroup({ + label: "bindGroup for memcpy kernel", + layout: memcpyPipeline.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: { buffer: memdestBuffer } }, + { binding: 1, resource: { buffer: memsrcBuffer } }, + ], + }); + + const encoder = device.createCommandEncoder({ + label: "memcpy encoder", + }); + + const memcpyPass = timingHelper.beginComputePass(encoder, { + label: "memcpy compute pass", + }); + memcpyPass.setPipeline(memcpyPipeline); + memcpyPass.setBindGroup(0, memcpyBindGroup); + // TODO handle not evenly divisible by wgSize + memcpyPass.dispatchWorkgroups(...dispatchGeometry); + memcpyPass.end(); + + // Encode a command to copy the results to a mappable buffer. + // this is (from, to) + encoder.copyBufferToBuffer( + memdestBuffer, + 0, + mappableMemdstBuffer, + 0, + mappableMemdstBuffer.size + ); + + // Finish encoding and submit the commands + const command_buffer = encoder.finish(); + device.queue.submit([command_buffer]); + + // Read the results + await mappableMemdstBuffer.mapAsync(GPUMapMode.READ); + const memdest = new Uint32Array( + mappableMemdstBuffer.getMappedRange().slice() + ); + mappableMemdstBuffer.unmap(); + let errors = 0; + for (let i = 0; i < memdest.length; i++) { + if (memsrc[i] + 1 != memdest[i]) { + if (errors < 5) { + console.log( + `Error ${errors}: i=${i}, src=${memsrc[i]}, dest=${memdest[i]}` + ); + } + errors++; } - errors++; } - } - if (errors > 0) { - console.log(`Errors: ${errors}`); - } else { - console.log("No errors!"); - } + if (errors > 0) { + console.log(`Memdest size: ${memdest.length} | Errors: ${errors}`); + } else { + console.log(`Memdest size: ${memdest.length} | No errors!`); + } - timingHelper.getResult().then((ns) => { - console.log("Timing result", ns); - let bytesTransferred = 2 * memdest.byteLength; - console.log(`Transferred ${bytesTransferred} bytes`); - console.log(`Bandwidth = ${bytesTransferred / ns} GB/s`); - data.push({ - time: ns[0], - bytesTransferred: bytesTransferred, - bandwidth: bytesTransferred / ns[0], - workgroupSize: workgroupSize, + timingHelper.getResult().then((ns) => { + let bytesTransferred = 2 * memdest.byteLength; + console.log( + `Timing result: ${ns}; transferred ${bytesTransferred} bytes; bandwidth = ${ + bytesTransferred / ns + } GB/s` + ); + data.push({ + time: ns[0], + bytesTransferred: bytesTransferred, + memsrcSize: memsrcSize, + bandwidth: bytesTransferred / ns[0], + workgroupSize: workgroupSize, + }); }); - }); + } } console.log(data); @@ -194,7 +196,14 @@ } const plot = Plot.plot({ - marks: [Plot.lineY(data, { x: "workgroupSize", y: "bandwidth" })], + color: { legend: true }, + marks: [ + Plot.lineY(data, { + x: "memsrcSize", + y: "bandwidth", + stroke: "workgroupSize", + }), + ], }); const div = document.querySelector("#plot"); div.append(plot);