From 0ae837f598e658f1e72b8ff4300feb1ccaf32df1 Mon Sep 17 00:00:00 2001 From: John Owens Date: Wed, 6 Nov 2024 13:17:39 -0800 Subject: [PATCH] init checkin, appears to return 0s on M3 --- deno-timing-mre.js | 209 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100644 deno-timing-mre.js diff --git a/deno-timing-mre.js b/deno-timing-mre.js new file mode 100644 index 0000000..a9f28f5 --- /dev/null +++ b/deno-timing-mre.js @@ -0,0 +1,209 @@ +// begin TimingHelper code +function assert(cond, msg = "") { + if (!cond) { + throw new Error(msg); + } +} + +const adapter = await navigator.gpu?.requestAdapter(); +const canTimestamp = adapter.features.has("timestamp-query"); +const device = await adapter?.requestDevice({ + requiredFeatures: [...(canTimestamp ? ["timestamp-query"] : [])], +}); + +if (!device) { + fail("Fatal error: Device does not support WebGPU."); +} + +if (!canTimestamp) { + fail( + 'Fatal error: Device does not support WebGPU timestamp query (`adapter.features.has("timestamp-query")` is false).' + ); +} + +const workgroupSize = 64; +const memsrcSize = 2 ** 24; + +const workgroupCount = memsrcSize / workgroupSize; +const dispatchGeometry = [workgroupCount, 1]; +while (dispatchGeometry[0] > adapter.limits.maxComputeWorkgroupsPerDimension) { + dispatchGeometry[0] /= 2; + dispatchGeometry[1] *= 2; +} +console.log(`workgroup count: ${workgroupCount} + workgroup size: ${workgroupSize} + maxComputeWGPerDim: ${adapter.limits.maxComputeWorkgroupsPerDimension} + dispatchGeometry: ${dispatchGeometry}`); + +const memsrc = new Uint32Array(memsrcSize); +for (let i = 0; i < memsrc.length; i++) { + memsrc[i] = i; +} + +const memcpyModule = device.createShaderModule({ + label: "copy large chunk of memory from memSrc to memDest", + code: /* wgsl */ ` + /* output */ + @group(0) @binding(0) var memDest: array; + /* input */ + @group(0) @binding(1) var memSrc: array; + + @compute @workgroup_size(${workgroupSize}) fn memcpyKernel( + @builtin(global_invocation_id) id: vec3u, + @builtin(num_workgroups) nwg: vec3u, + @builtin(workgroup_id) wgid: vec3u) { + let i = id.y * nwg.x * ${workgroupSize} + id.x; + memDest[i] = memSrc[i] + 1; + } + `, +}); + +const memcpyPipeline = device.createComputePipeline({ + label: "memcpy compute pipeline", + layout: "auto", + compute: { + module: memcpyModule, + }, +}); + +// create buffers on the GPU to hold data +// read-only inputs: +const memsrcBuffer = device.createBuffer({ + label: "memory source buffer", + size: memsrc.byteLength, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST, +}); +device.queue.writeBuffer(memsrcBuffer, 0, memsrc); + +const memdestBuffer = device.createBuffer({ + label: "memory destination buffer", + size: memsrc.byteLength, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, +}); + +const mappableMemdstBuffer = device.createBuffer({ + label: "mappable memory destination buffer", + size: memsrc.byteLength, + usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST, +}); + +/** Set up bindGroups per compute kernel to tell the shader which buffers to use */ +const memcpyBindGroup = device.createBindGroup({ + label: "bindGroup for memcpy kernel", + layout: memcpyPipeline.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: { buffer: memdestBuffer } }, + { binding: 1, resource: { buffer: memsrcBuffer } }, + ], +}); + +const querySet = device.createQuerySet({ + type: "timestamp", + count: 2, +}); + +const timestampWrites = { + querySet, + beginningOfPassWriteIndex: 0, // Write timestamp in index 0 when pass begins. + endOfPassWriteIndex: 1, // Write timestamp in index 1 when pass ends. +}; + +const encoder = device.createCommandEncoder({ + label: "memcpy encoder", +}); + +const resolveBuffer = device.createBuffer({ + size: 2 * 8, // querySet.count * 8, + usage: GPUBufferUsage.QUERY_RESOLVE | GPUBufferUsage.COPY_SRC, +}); + +const resultBuffer = device.createBuffer({ + size: resolveBuffer.size, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, +}); + +const memcpyPass = encoder.beginComputePass(timestampWrites); +const oldMemcpyPassDescriptor = { + label: "memcpy compute pass", + timestampWrites: { + querySet, + beginningOfPassWriteIndex: 0, + endOfPassWriteIndex: 1, + }, +}; + +memcpyPass.setPipeline(memcpyPipeline); +memcpyPass.setBindGroup(0, memcpyBindGroup); +// TODO handle not evenly divisible by wgSize +memcpyPass.dispatchWorkgroups(...dispatchGeometry); +memcpyPass.end(); +encoder.resolveQuerySet(querySet, 0, 2 /*querySet.count*/, resolveBuffer, 0); + +// Encode a command to copy the results to a mappable buffer. +// this is (from, to) +encoder.copyBufferToBuffer( + memdestBuffer, + 0, + mappableMemdstBuffer, + 0, + mappableMemdstBuffer.size +); + +encoder.resolveQuerySet(querySet, 0, 2, resolveBuffer, 0); +if (resultBuffer.mapState === "unmapped") { + encoder.copyBufferToBuffer( + resolveBuffer, + 0, + resultBuffer, + 0, + resultBuffer.size + ); +} + +// Finish encoding and submit the commands +const command_buffer = encoder.finish(); +let ns; +device.queue.submit([command_buffer]); +if (canTimestamp && resultBuffer.mapState === "unmapped") { + resultBuffer.mapAsync(GPUMapMode.READ).then(() => { + const times = new BigInt64Array(resultBuffer.getMappedRange()); + ns = Number(times[1] - times[0]); + console.log(times[0], times[1], ns); + resultBuffer.unmap(); + }); +} +// kbr@ suggested the following line, which had no effect on M3 [23 Oct 2024] +// await device.queue.onSubmittedWorkDone(); + +// Read the results +await mappableMemdstBuffer.mapAsync(GPUMapMode.READ); +const memdest = new Uint32Array(mappableMemdstBuffer.getMappedRange().slice()); +mappableMemdstBuffer.unmap(); +let errors = 0; +for (let i = 0; i < memdest.length; i++) { + if (memsrc[i] + 1 != memdest[i]) { + if (errors < 5) { + console.log( + `Error ${errors}: i=${i}, src=${memsrc[i]}, dest=${memdest[i]}` + ); + } + errors++; + } +} +if (errors > 0) { + console.log(`Memdest size: ${memdest.length} | Errors: ${errors}`); +} else { + console.log(`Memdest size: ${memdest.length} | No errors!`); +} + +let bytesTransferred = 2 * memdest.byteLength; +console.log( + `Timing result: ${ns} ns; transferred ${bytesTransferred} bytes; bandwidth = ${ + bytesTransferred / ns + } GB/s` +); + +function fail(msg) { + // eslint-disable-next-line no-alert + alert(msg); +}