From 32ec9b1125e18ee5142c600e50be05256fb21ef6 Mon Sep 17 00:00:00 2001 From: cmhhelgeson <62450112+cmhhelgeson@users.noreply.github.com> Date: Thu, 26 Oct 2023 14:28:40 -0700 Subject: [PATCH 01/10] Changed default values and added auto sort at start --- src/sample/bitonicSort/main.ts | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/sample/bitonicSort/main.ts b/src/sample/bitonicSort/main.ts index cc8e5865..3b712b6d 100644 --- a/src/sample/bitonicSort/main.ts +++ b/src/sample/bitonicSort/main.ts @@ -39,7 +39,7 @@ interface SettingsInterface { 'Execute Sort Step': () => void; 'Log Elements': () => void; 'Complete Sort': () => void; - sortSpeed: number; + 'Sort Speed': number; } let init: SampleInit; @@ -48,19 +48,27 @@ SampleInitFactoryWebGPU( const maxWorkgroupsX = device.limits.maxComputeWorkgroupSizeX; const totalElementLengths = []; - for (let i = maxWorkgroupsX * 2; i >= 4; i /= 2) { + const maxElements = maxWorkgroupsX * 2; + for (let i = maxElements; i >= 4; i /= 2) { totalElementLengths.push(i); } + const defaultGridWidth = + Math.sqrt(maxElements) % 2 === 0 + ? Math.floor(Math.sqrt(maxElements)) + : Math.floor(Math.sqrt(maxElements / 2)); + + const defaultGridHeight = maxElements / defaultGridWidth; + const settings: SettingsInterface = { // number of cellElements. Must equal gridWidth * gridHeight and 'Total Threads' * 2 - 'Total Elements': 16, + 'Total Elements': maxElements, // width of screen in cells. - 'Grid Width': 4, + 'Grid Width': defaultGridWidth, // height of screen in cells - 'Grid Height': 4, + 'Grid Height': defaultGridHeight, // number of threads to execute in a workgroup ('Total Threads', 1, 1) - 'Total Threads': 16 / 2, + 'Total Threads': maxWorkgroupsX, // currently highlighted element hoveredElement: 0, // element the hoveredElement just swapped with, @@ -89,7 +97,7 @@ SampleInitFactoryWebGPU( 'Complete Sort': () => { return; }, - sortSpeed: 200, + 'Sort Speed': 50, }; // Initialize initial elements array @@ -98,7 +106,8 @@ SampleInitFactoryWebGPU( ); // Initialize elementsBuffer and elementsStagingBuffer - const elementsBufferSize = Float32Array.BYTES_PER_ELEMENT * 512; + const elementsBufferSize = + Float32Array.BYTES_PER_ELEMENT * totalElementLengths[0]; // Initialize input, output, staging buffers const elementsInputBuffer = device.createBuffer({ size: elementsBufferSize, @@ -288,7 +297,7 @@ SampleInitFactoryWebGPU( } settings.executeStep = true; setSwappedElement(); - }, settings.sortSpeed); + }, settings['Sort Speed']); }; // At top level, basic information about the number of elements sorted and the number of threads @@ -301,6 +310,7 @@ SampleInitFactoryWebGPU( // Folder with functions that control the execution of the sort const controlFolder = gui.addFolder('Sort Controls'); + controlFolder.add(settings, 'Sort Speed', 50, 1000).step(50); controlFolder.add(settings, 'Execute Sort Step').onChange(() => { endSortInterval(); settings.executeStep = true; @@ -380,6 +390,8 @@ SampleInitFactoryWebGPU( let highestBlockHeight = 2; + startSortInterval(); + async function frame() { if (!pageState.active) return; From acdc04025690827136871e13604ccc65dac7b41d Mon Sep 17 00:00:00 2001 From: cmhhelgeson <62450112+cmhhelgeson@users.noreply.github.com> Date: Thu, 26 Oct 2023 15:03:25 -0700 Subject: [PATCH 02/10] Added stepIndex and totalStep to execution Information and allowed for dynamic updating of sort speed --- src/sample/bitonicSort/computeShader.ts | 14 +++++------ src/sample/bitonicSort/main.ts | 32 ++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/sample/bitonicSort/computeShader.ts b/src/sample/bitonicSort/computeShader.ts index 4011eb41..6c8a1924 100644 --- a/src/sample/bitonicSort/computeShader.ts +++ b/src/sample/bitonicSort/computeShader.ts @@ -19,7 +19,7 @@ struct Uniforms { var local_data: array; //Compare and swap values in local_data -fn compare_and_swap(idx_before: u32, idx_after: u32) { +fn local_compare_and_swap(idx_before: u32, idx_after: u32) { //idx_before should always be < idx_after if (local_data[idx_after] < local_data[idx_before]) { var temp: u32 = local_data[idx_before]; @@ -30,7 +30,7 @@ fn compare_and_swap(idx_before: u32, idx_after: u32) { } // thread_id goes from 0 to threadsPerWorkgroup -fn prepare_flip(thread_id: u32, block_height: u32) { +fn get_flip_indices(thread_id: u32, block_height: u32) { let q: u32 = ((2 * thread_id) / block_height) * block_height; let half_height = block_height / 2; var idx: vec2 = vec2( @@ -38,10 +38,10 @@ fn prepare_flip(thread_id: u32, block_height: u32) { ); idx.x += q; idx.y += q; - compare_and_swap(idx.x, idx.y); + local_compare_and_swap(idx.x, idx.y); } -fn prepare_disperse(thread_id: u32, block_height: u32) { +fn get_disperse_indices(thread_id: u32, block_height: u32) { var q: u32 = ((2 * thread_id) / block_height) * block_height; let half_height = block_height / 2; var idx: vec2 = vec2( @@ -49,7 +49,7 @@ fn prepare_disperse(thread_id: u32, block_height: u32) { ); idx.x += q; idx.y += q; - compare_and_swap(idx.x, idx.y); + local_compare_and_swap(idx.x, idx.y); } @group(0) @binding(0) var input_data: array; @@ -73,10 +73,10 @@ fn computeMain( switch uniforms.algo { case 1: { //Local Flip - prepare_flip(local_id.x, uniforms.blockHeight); + get_flip_indices(local_id.x, uniforms.blockHeight); } case 2: { //Local Disperse - prepare_disperse(local_id.x, uniforms.blockHeight); + get_disperse_indices(local_id.x, uniforms.blockHeight); } default: { diff --git a/src/sample/bitonicSort/main.ts b/src/sample/bitonicSort/main.ts index 3b712b6d..3fe1ae77 100644 --- a/src/sample/bitonicSort/main.ts +++ b/src/sample/bitonicSort/main.ts @@ -29,6 +29,8 @@ interface SettingsInterface { 'Total Threads': number; hoveredElement: number; swappedElement: number; + 'Step Index': number; + 'Total Steps': number; 'Prev Step': StepType; 'Next Step': StepType; 'Prev Swap Span': number; @@ -42,6 +44,11 @@ interface SettingsInterface { 'Sort Speed': number; } +const getNumSteps = (numElements: number) => { + const n = Math.log2(numElements); + return (n * (n + 1)) / 2; +}; + let init: SampleInit; SampleInitFactoryWebGPU( async ({ pageState, device, gui, presentationFormat, context, canvas }) => { @@ -73,6 +80,10 @@ SampleInitFactoryWebGPU( hoveredElement: 0, // element the hoveredElement just swapped with, swappedElement: 1, + // Index of current step + 'Step Index': 0, + // Total steps to sort current number of elements + 'Total Steps': getNumSteps(maxElements), // Previously executed step 'Prev Step': 'NONE', // Next step to execute @@ -186,6 +197,10 @@ SampleInitFactoryWebGPU( const resetExecutionInformation = () => { totalThreadsCell.setValue(settings['Total Elements'] / 2); + // Reset step Index and number of steps based on elements size + stepIndexCell.setValue(0); + totalStepsCell.setValue(getNumSteps(settings['Total Elements'])); + // Get new width and height of screen display in cells const newCellWidth = Math.sqrt(settings['Total Elements']) % 2 === 0 @@ -290,11 +305,17 @@ SampleInitFactoryWebGPU( } }; const startSortInterval = () => { + const currentIntervalSpeed = settings['Sort Speed']; completeSortIntervalID = setInterval(() => { if (settings['Next Step'] === 'NONE') { clearInterval(completeSortIntervalID); completeSortIntervalID = null; } + if (settings['Sort Speed'] !== currentIntervalSpeed) { + clearInterval(completeSortIntervalID); + completeSortIntervalID = null; + startSortInterval(); + } settings.executeStep = true; setSwappedElement(); }, settings['Sort Speed']); @@ -335,6 +356,14 @@ SampleInitFactoryWebGPU( // Additional Information about the execution state of the sort const executionInformationFolder = gui.addFolder('Execution Information'); + const stepIndexCell = executionInformationFolder.add( + settings, + 'Step Index' + ); + const totalStepsCell = executionInformationFolder.add( + settings, + 'Total Steps' + ); const prevStepCell = executionInformationFolder.add(settings, 'Prev Step'); const nextStepCell = executionInformationFolder.add(settings, 'Next Step'); const prevBlockHeightCell = executionInformationFolder.add( @@ -380,6 +409,7 @@ SampleInitFactoryWebGPU( }); // Deactivate interaction with select GUI elements + stepIndexCell.domElement.style.pointerEvents = 'none'; prevStepCell.domElement.style.pointerEvents = 'none'; prevBlockHeightCell.domElement.style.pointerEvents = 'none'; nextStepCell.domElement.style.pointerEvents = 'none'; @@ -440,7 +470,7 @@ SampleInitFactoryWebGPU( computePassEncoder.setBindGroup(0, computeBGDescript.bindGroups[0]); computePassEncoder.dispatchWorkgroups(1); computePassEncoder.end(); - + stepIndexCell.setValue(settings['Step Index'] + 1); prevStepCell.setValue(settings['Next Step']); prevBlockHeightCell.setValue(settings['Next Swap Span']); nextBlockHeightCell.setValue(settings['Next Swap Span'] / 2); From 33bce1554cca66f1c1e699341a251ebabb970102 Mon Sep 17 00:00:00 2001 From: cmhhelgeson <62450112+cmhhelgeson@users.noreply.github.com> Date: Thu, 26 Oct 2023 17:58:08 -0700 Subject: [PATCH 03/10] Scaffolding for global sort and renaming of controller elements from 'cell' to 'controller' to accord with Dat.gui's built in types --- src/sample/bitonicSort/bitonicDisplay.ts | 16 +- src/sample/bitonicSort/computeShader.ts | 34 ++-- src/sample/bitonicSort/main.ts | 195 +++++++++++++---------- src/sample/bitonicSort/utils.ts | 9 +- 4 files changed, 146 insertions(+), 108 deletions(-) diff --git a/src/sample/bitonicSort/bitonicDisplay.ts b/src/sample/bitonicSort/bitonicDisplay.ts index 210ef232..dcdd7cd8 100644 --- a/src/sample/bitonicSort/bitonicDisplay.ts +++ b/src/sample/bitonicSort/bitonicDisplay.ts @@ -1,6 +1,6 @@ import { - BindGroupsObjectsAndLayout, - createBindGroupDescriptor, + BindGroupCluster, + createBindGroupCluster, Base2DRendererClass, } from './utils'; @@ -19,14 +19,14 @@ export default class BitonicDisplayRenderer extends Base2DRendererClass { switchBindGroup: (name: string) => void; setArguments: (args: BitonicDisplayRenderArgs) => void; - computeBGDescript: BindGroupsObjectsAndLayout; + computeBGDescript: BindGroupCluster; constructor( device: GPUDevice, presentationFormat: GPUTextureFormat, renderPassDescriptor: GPURenderPassDescriptor, bindGroupNames: string[], - computeBGDescript: BindGroupsObjectsAndLayout, + computeBGDescript: BindGroupCluster, label: string ) { super(); @@ -38,7 +38,7 @@ export default class BitonicDisplayRenderer extends Base2DRendererClass { usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, }); - const bgDescript = createBindGroupDescriptor( + const bgCluster = createBindGroupCluster( [0], [GPUShaderStage.FRAGMENT], ['buffer'], @@ -48,19 +48,19 @@ export default class BitonicDisplayRenderer extends Base2DRendererClass { device ); - this.currentBindGroup = bgDescript.bindGroups[0]; + this.currentBindGroup = bgCluster.bindGroups[0]; this.currentBindGroupName = bindGroupNames[0]; this.bindGroupMap = {}; - bgDescript.bindGroups.forEach((bg, idx) => { + bgCluster.bindGroups.forEach((bg, idx) => { this.bindGroupMap[bindGroupNames[idx]] = bg; }); this.pipeline = super.create2DRenderPipeline( device, label, - [bgDescript.bindGroupLayout, this.computeBGDescript.bindGroupLayout], + [bgCluster.bindGroupLayout, this.computeBGDescript.bindGroupLayout], bitonicDisplay, presentationFormat ); diff --git a/src/sample/bitonicSort/computeShader.ts b/src/sample/bitonicSort/computeShader.ts index 6c8a1924..6b9e731d 100644 --- a/src/sample/bitonicSort/computeShader.ts +++ b/src/sample/bitonicSort/computeShader.ts @@ -18,7 +18,7 @@ struct Uniforms { var local_data: array; -//Compare and swap values in local_data +// Compare and swap values in local_data fn local_compare_and_swap(idx_before: u32, idx_after: u32) { //idx_before should always be < idx_after if (local_data[idx_after] < local_data[idx_before]) { @@ -30,7 +30,7 @@ fn local_compare_and_swap(idx_before: u32, idx_after: u32) { } // thread_id goes from 0 to threadsPerWorkgroup -fn get_flip_indices(thread_id: u32, block_height: u32) { +fn get_flip_indices(thread_id: u32, block_height: u32) -> vec2 { let q: u32 = ((2 * thread_id) / block_height) * block_height; let half_height = block_height / 2; var idx: vec2 = vec2( @@ -38,10 +38,10 @@ fn get_flip_indices(thread_id: u32, block_height: u32) { ); idx.x += q; idx.y += q; - local_compare_and_swap(idx.x, idx.y); + return idx; } -fn get_disperse_indices(thread_id: u32, block_height: u32) { +fn get_disperse_indices(thread_id: u32, block_height: u32) -> vec2 { var q: u32 = ((2 * thread_id) / block_height) * block_height; let half_height = block_height / 2; var idx: vec2 = vec2( @@ -49,18 +49,26 @@ fn get_disperse_indices(thread_id: u32, block_height: u32) { ); idx.x += q; idx.y += q; - local_compare_and_swap(idx.x, idx.y); + return idx; } @group(0) @binding(0) var input_data: array; @group(0) @binding(1) var output_data: array; @group(0) @binding(2) var uniforms: Uniforms; +fn global_compare_and_swap(idx_before: u32, idx_after: u32) { + if (input_data[idx_after] < input_data[idx_before]) { + output_data[idx_before] = input_data[idx_after]; + output_data[idx_after] = input_data[idx_before]; + } +} + // Our compute shader will execute specified # of threads or elements / 2 threads @compute @workgroup_size(${threadsPerWorkgroup}, 1, 1) fn computeMain( @builtin(global_invocation_id) global_id: vec3, @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) workgroup: vec3, ) { //Each thread will populate the workgroup data... (1 thread for every 2 elements) local_data[local_id.x * 2] = input_data[local_id.x * 2]; @@ -72,18 +80,24 @@ fn computeMain( var num_elements = uniforms.width * uniforms.height; switch uniforms.algo { - case 1: { //Local Flip - get_flip_indices(local_id.x, uniforms.blockHeight); + case 1: { // Local Flip + let idx = get_flip_indices(local_id.x, uniforms.blockHeight); + local_compare_and_swap(idx.x, idx.y); + } + case 2: { // Local Disperse + let idx = get_disperse_indices(local_id.x, uniforms.blockHeight); + local_compare_and_swap(idx.x, idx.y); } - case 2: { //Local Disperse - get_disperse_indices(local_id.x, uniforms.blockHeight); + case 4: { // Global Flip + let idx = get_flip_indices(local_id.x, uniforms.blockHeight); + global_compare_and_swap(idx.x, idx.y); } default: { } } - //Ensure that all threads have swapped their own regions of data + // Ensure that all threads have swapped their own regions of data workgroupBarrier(); //Repopulate global data with local data diff --git a/src/sample/bitonicSort/main.ts b/src/sample/bitonicSort/main.ts index 3fe1ae77..da130e0b 100644 --- a/src/sample/bitonicSort/main.ts +++ b/src/sample/bitonicSort/main.ts @@ -1,6 +1,5 @@ import { makeSample, SampleInit } from '../../components/SampleLayout'; -import { SampleInitFactoryWebGPU } from './utils'; -import { createBindGroupDescriptor } from './utils'; +import { createBindGroupCluster, SampleInitFactoryWebGPU } from './utils'; import BitonicDisplayRenderer from './bitonicDisplay'; import bitonicDisplay from './bitonicDisplay.frag.wgsl'; import { NaiveBitonicCompute } from './computeShader'; @@ -8,10 +7,11 @@ import fullscreenTexturedQuad from '../../shaders/fullscreenTexturedQuad.wgsl'; // Type of step that will be executed in our shader enum StepEnum { - NONE = 0, - FLIP_LOCAL = 1, - DISPERSE_LOCAL = 2, - FLIP_DISPERSE_LOCAL = 3, + NONE, + FLIP_LOCAL, + DISPERSE_LOCAL, + FLIP_DISPERSE_LOCAL, + FLIP_GLOBAL, } // String access to StepEnum @@ -19,7 +19,8 @@ type StepType = | 'NONE' | 'FLIP_LOCAL' | 'DISPERSE_LOCAL' - | 'FLIP_DISPERSE_LOCAL'; + | 'FLIP_DISPERSE_LOCAL' + | 'FLIP_GLOBAL'; // Gui settings object interface SettingsInterface { @@ -27,15 +28,15 @@ interface SettingsInterface { 'Grid Width': number; 'Grid Height': number; 'Total Threads': number; - hoveredElement: number; - swappedElement: number; + 'Hovered Cell': number; + 'Swapped Cell': number; 'Step Index': number; 'Total Steps': number; 'Prev Step': StepType; 'Next Step': StepType; 'Prev Swap Span': number; 'Next Swap Span': number; - workLoads: number; + 'Total Workgroups': number; executeStep: boolean; 'Randomize Values': () => void; 'Execute Sort Step': () => void; @@ -76,10 +77,10 @@ SampleInitFactoryWebGPU( 'Grid Height': defaultGridHeight, // number of threads to execute in a workgroup ('Total Threads', 1, 1) 'Total Threads': maxWorkgroupsX, - // currently highlighted element - hoveredElement: 0, - // element the hoveredElement just swapped with, - swappedElement: 1, + // Cell in element grid mouse element is hovering over + 'Hovered Cell': 0, + // element the hovered cell just swapped with, + 'Swapped Cell': 1, // Index of current step 'Step Index': 0, // Total steps to sort current number of elements @@ -92,8 +93,8 @@ SampleInitFactoryWebGPU( 'Prev Swap Span': 0, // Max thread span of next block 'Next Swap Span': 2, - // workloads to dispatch per frame, - workLoads: 1, + // Workgroups to dispatch per frame, + 'Total Workgroups': 1, // Whether we will dispatch a workload this frame executeStep: false, 'Randomize Values': () => { @@ -140,7 +141,7 @@ SampleInitFactoryWebGPU( usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, }); - const computeBGDescript = createBindGroupDescriptor( + const computeBGCluster = createBindGroupCluster( [0, 1, 2], [ GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT, @@ -162,7 +163,7 @@ SampleInitFactoryWebGPU( let computePipeline = device.createComputePipeline({ layout: device.createPipelineLayout({ - bindGroupLayouts: [computeBGDescript.bindGroupLayout], + bindGroupLayouts: [computeBGCluster.bindGroupLayout], }), compute: { module: device.createShaderModule({ @@ -190,16 +191,25 @@ SampleInitFactoryWebGPU( presentationFormat, renderPassDescriptor, ['default'], - computeBGDescript, + computeBGCluster, 'BitonicDisplay' ); const resetExecutionInformation = () => { - totalThreadsCell.setValue(settings['Total Elements'] / 2); + // Total threads are either elements / 2 or maxWorkgroupsSizeX + totalThreadsController.setValue( + Math.min(settings['Total Elements'] / 2, maxWorkgroupsX) + ); + + // Dispatch a workgroup for every (Max threads * 2) elements + const workgroupsPerStep = + (settings['Total Elements'] - 1) / (maxWorkgroupsX * 2); + + totalWorkgroupsController.setValue(Math.ceil(workgroupsPerStep)); // Reset step Index and number of steps based on elements size - stepIndexCell.setValue(0); - totalStepsCell.setValue(getNumSteps(settings['Total Elements'])); + stepIndexController.setValue(0); + totalStepsController.setValue(getNumSteps(settings['Total Elements'])); // Get new width and height of screen display in cells const newCellWidth = @@ -207,16 +217,16 @@ SampleInitFactoryWebGPU( ? Math.floor(Math.sqrt(settings['Total Elements'])) : Math.floor(Math.sqrt(settings['Total Elements'] / 2)); const newCellHeight = settings['Total Elements'] / newCellWidth; - gridWidthCell.setValue(newCellWidth); - gridHeightCell.setValue(newCellHeight); + gridWidthController.setValue(newCellWidth); + gridHeightController.setValue(newCellHeight); // Set prevStep to None (restart) and next step to FLIP - prevStepCell.setValue('NONE'); - nextStepCell.setValue('FLIP_LOCAL'); + prevStepController.setValue('NONE'); + nextStepController.setValue('FLIP_LOCAL'); // Reset block heights - prevBlockHeightCell.setValue(0); - nextBlockHeightCell.setValue(2); + prevBlockHeightController.setValue(0); + nextBlockHeightController.setValue(2); highestBlockHeight = 2; }; @@ -245,7 +255,7 @@ SampleInitFactoryWebGPU( // Create new shader invocation with workgroupSize that reflects number of threads computePipeline = device.createComputePipeline({ layout: device.createPipelineLayout({ - bindGroupLayouts: [computeBGDescript.bindGroupLayout], + bindGroupLayouts: [computeBGCluster.bindGroupLayout], }), compute: { module: device.createShaderModule({ @@ -261,16 +271,16 @@ SampleInitFactoryWebGPU( randomizeElementArray(); - const setSwappedElement = () => { + const setSwappedCell = () => { let swappedIndex: number; switch (settings['Next Step']) { case 'FLIP_LOCAL': { const blockHeight = settings['Next Swap Span']; - const p2 = Math.floor(settings.hoveredElement / blockHeight) + 1; - const p3 = settings.hoveredElement % blockHeight; + const p2 = Math.floor(settings['Hovered Cell'] / blockHeight) + 1; + const p3 = settings['Hovered Cell'] % blockHeight; swappedIndex = blockHeight * p2 - p3 - 1; - swappedElementCell.setValue(swappedIndex); + swappedCellController.setValue(swappedIndex); } break; case 'DISPERSE_LOCAL': @@ -278,20 +288,20 @@ SampleInitFactoryWebGPU( const blockHeight = settings['Next Swap Span']; const halfHeight = blockHeight / 2; swappedIndex = - settings.hoveredElement % blockHeight < halfHeight - ? settings.hoveredElement + halfHeight - : settings.hoveredElement - halfHeight; - swappedElementCell.setValue(swappedIndex); + settings['Hovered Cell'] % blockHeight < halfHeight + ? settings['Hovered Cell'] + halfHeight + : settings['Hovered Cell'] - halfHeight; + swappedCellController.setValue(swappedIndex); } break; case 'NONE': { - swappedIndex = settings.hoveredElement; - swappedElementCell.setValue(swappedIndex); + swappedIndex = settings['Hovered Cell']; + swappedCellController.setValue(swappedIndex); } default: { - swappedIndex = settings.hoveredElement; - swappedElementCell.setValue(swappedIndex); + swappedIndex = settings['Hovered Cell']; + swappedCellController.setValue(swappedIndex); } break; } @@ -317,17 +327,28 @@ SampleInitFactoryWebGPU( startSortInterval(); } settings.executeStep = true; - setSwappedElement(); + setSwappedCell(); }, settings['Sort Speed']); }; - // At top level, basic information about the number of elements sorted and the number of threads - // deployed per workgroup. - gui.add(settings, 'Total Elements', totalElementLengths).onChange(() => { - endSortInterval(); - resizeElementArray(); - }); - const totalThreadsCell = gui.add(settings, 'Total Threads'); + // At top level, information about resources used to execute the compute shader + // i.e elements sorted, threads/invocations per workgroup, and workgroups dispatched + const computeResourcesFolder = gui.addFolder('Compute Resources'); + computeResourcesFolder + .add(settings, 'Total Elements', totalElementLengths) + .onChange(() => { + endSortInterval(); + resizeElementArray(); + }); + const totalThreadsController = computeResourcesFolder.add( + settings, + 'Total Threads' + ); + const totalWorkgroupsController = computeResourcesFolder.add( + settings, + 'Total Workgroups' + ); + computeResourcesFolder.open(); // Folder with functions that control the execution of the sort const controlFolder = gui.addFolder('Sort Controls'); @@ -347,40 +368,40 @@ SampleInitFactoryWebGPU( controlFolder.add(settings, 'Complete Sort').onChange(startSortInterval); controlFolder.open(); - // Folder with indexes of the hovered element - const hoverFolder = gui.addFolder('Hover Information'); - const hoveredElementCell = hoverFolder - .add(settings, 'hoveredElement') - .onChange(setSwappedElement); - const swappedElementCell = hoverFolder.add(settings, 'swappedElement'); + // Information about grid display + const gridFolder = gui.addFolder('Grid Information'); + const gridWidthController = gridFolder.add(settings, 'Grid Width'); + const gridHeightController = gridFolder.add(settings, 'Grid Height'); + const hoveredCellController = gridFolder + .add(settings, 'Hovered Cell') + .onChange(setSwappedCell); + const swappedCellController = gridFolder.add(settings, 'Swapped Cell'); // Additional Information about the execution state of the sort const executionInformationFolder = gui.addFolder('Execution Information'); - const stepIndexCell = executionInformationFolder.add( + const stepIndexController = executionInformationFolder.add( settings, 'Step Index' ); - const totalStepsCell = executionInformationFolder.add( + const totalStepsController = executionInformationFolder.add( settings, 'Total Steps' ); - const prevStepCell = executionInformationFolder.add(settings, 'Prev Step'); - const nextStepCell = executionInformationFolder.add(settings, 'Next Step'); - const prevBlockHeightCell = executionInformationFolder.add( + const prevStepController = executionInformationFolder.add( settings, - 'Prev Swap Span' + 'Prev Step' ); - const nextBlockHeightCell = executionInformationFolder.add( + const nextStepController = executionInformationFolder.add( settings, - 'Next Swap Span' + 'Next Step' ); - const gridWidthCell = executionInformationFolder.add( + const prevBlockHeightController = executionInformationFolder.add( settings, - 'Grid Width' + 'Prev Swap Span' ); - const gridHeightCell = executionInformationFolder.add( + const nextBlockHeightController = executionInformationFolder.add( settings, - 'Grid Height' + 'Next Swap Span' ); // Adjust styles of Function List Elements within GUI @@ -404,19 +425,19 @@ SampleInitFactoryWebGPU( const xIndex = Math.floor(event.offsetX / cellSize[0]); const yIndex = settings['Grid Height'] - 1 - Math.floor(event.offsetY / cellSize[1]); - hoveredElementCell.setValue(yIndex * settings['Grid Width'] + xIndex); - settings.hoveredElement = yIndex * settings['Grid Width'] + xIndex; + hoveredCellController.setValue(yIndex * settings['Grid Width'] + xIndex); + settings['Hovered Cell'] = yIndex * settings['Grid Width'] + xIndex; }); // Deactivate interaction with select GUI elements - stepIndexCell.domElement.style.pointerEvents = 'none'; - prevStepCell.domElement.style.pointerEvents = 'none'; - prevBlockHeightCell.domElement.style.pointerEvents = 'none'; - nextStepCell.domElement.style.pointerEvents = 'none'; - nextBlockHeightCell.domElement.style.pointerEvents = 'none'; - totalThreadsCell.domElement.style.pointerEvents = 'none'; - gridWidthCell.domElement.style.pointerEvents = 'none'; - gridHeightCell.domElement.style.pointerEvents = 'none'; + stepIndexController.domElement.style.pointerEvents = 'none'; + prevStepController.domElement.style.pointerEvents = 'none'; + prevBlockHeightController.domElement.style.pointerEvents = 'none'; + nextStepController.domElement.style.pointerEvents = 'none'; + nextBlockHeightController.domElement.style.pointerEvents = 'none'; + totalThreadsController.domElement.style.pointerEvents = 'none'; + gridWidthController.domElement.style.pointerEvents = 'none'; + gridHeightController.domElement.style.pointerEvents = 'none'; let highestBlockHeight = 2; @@ -467,27 +488,27 @@ SampleInitFactoryWebGPU( ) { const computePassEncoder = commandEncoder.beginComputePass(); computePassEncoder.setPipeline(computePipeline); - computePassEncoder.setBindGroup(0, computeBGDescript.bindGroups[0]); - computePassEncoder.dispatchWorkgroups(1); + computePassEncoder.setBindGroup(0, computeBGCluster.bindGroups[0]); + computePassEncoder.dispatchWorkgroups(settings['Total Workgroups']); computePassEncoder.end(); - stepIndexCell.setValue(settings['Step Index'] + 1); - prevStepCell.setValue(settings['Next Step']); - prevBlockHeightCell.setValue(settings['Next Swap Span']); - nextBlockHeightCell.setValue(settings['Next Swap Span'] / 2); + stepIndexController.setValue(settings['Step Index'] + 1); + prevStepController.setValue(settings['Next Step']); + prevBlockHeightController.setValue(settings['Next Swap Span']); + nextBlockHeightController.setValue(settings['Next Swap Span'] / 2); if (settings['Next Swap Span'] === 1) { highestBlockHeight *= 2; - nextStepCell.setValue( + nextStepController.setValue( highestBlockHeight === settings['Total Elements'] * 2 ? 'NONE' : 'FLIP_LOCAL' ); - nextBlockHeightCell.setValue( + nextBlockHeightController.setValue( highestBlockHeight === settings['Total Elements'] * 2 ? 0 : highestBlockHeight ); } else { - nextStepCell.setValue('DISPERSE_LOCAL'); + nextStepController.setValue('DISPERSE_LOCAL'); } commandEncoder.copyBufferToBuffer( elementsOutputBuffer, @@ -519,7 +540,7 @@ SampleInitFactoryWebGPU( const elementsOutput = new Uint32Array(elementsData); elementsStagingBuffer.unmap(); elements = elementsOutput; - setSwappedElement(); + setSwappedCell(); } settings.executeStep = false; requestAnimationFrame(frame); diff --git a/src/sample/bitonicSort/utils.ts b/src/sample/bitonicSort/utils.ts index fea2992f..287651a4 100644 --- a/src/sample/bitonicSort/utils.ts +++ b/src/sample/bitonicSort/utils.ts @@ -9,7 +9,10 @@ type BindGroupBindingLayout = | GPUStorageTextureBindingLayout | GPUExternalTextureBindingLayout; -export type BindGroupsObjectsAndLayout = { +// An object containing +// 1. A generated Bind Group Layout +// 2. An array of Bind Groups that accord to that layout +export type BindGroupCluster = { bindGroups: GPUBindGroup[]; bindGroupLayout: GPUBindGroupLayout; }; @@ -27,7 +30,7 @@ type ResourceTypeName = * @param {ResourceTypeName[]} resourceTypes - The resourceType at the corresponding index. * @returns {BindGroupsObjectsAndLayout} An object containing an array of bindGroups and the bindGroupLayout they implement. */ -export const createBindGroupDescriptor = ( +export const createBindGroupCluster = ( bindings: number[], visibilities: number[], resourceTypes: ResourceTypeName[], @@ -35,7 +38,7 @@ export const createBindGroupDescriptor = ( resources: GPUBindingResource[][], label: string, device: GPUDevice -): BindGroupsObjectsAndLayout => { +): BindGroupCluster => { const layoutEntries: GPUBindGroupLayoutEntry[] = []; for (let i = 0; i < bindings.length; i++) { const layoutEntry: any = {}; From 4b65446a48bc982a8a3d4468e58b6ec1793b20b4 Mon Sep 17 00:00:00 2001 From: cmhhelgeson <62450112+cmhhelgeson@users.noreply.github.com> Date: Thu, 26 Oct 2023 18:02:57 -0700 Subject: [PATCH 04/10] removed anys for greggman --- src/sample/bitonicSort/utils.ts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/sample/bitonicSort/utils.ts b/src/sample/bitonicSort/utils.ts index 287651a4..8d6096fe 100644 --- a/src/sample/bitonicSort/utils.ts +++ b/src/sample/bitonicSort/utils.ts @@ -41,11 +41,11 @@ export const createBindGroupCluster = ( ): BindGroupCluster => { const layoutEntries: GPUBindGroupLayoutEntry[] = []; for (let i = 0; i < bindings.length; i++) { - const layoutEntry: any = {}; - layoutEntry.binding = bindings[i]; - layoutEntry.visibility = visibilities[i % visibilities.length]; - layoutEntry[resourceTypes[i]] = resourceLayouts[i]; - layoutEntries.push(layoutEntry); + layoutEntries.push({ + binding: bindings[i], + visibility: visibilities[i % visibilities.length], + [resourceTypes[i]]: resourceLayouts[i], + }); } const bindGroupLayout = device.createBindGroupLayout({ @@ -61,10 +61,10 @@ export const createBindGroupCluster = ( for (let i = 0; i < resources.length; i++) { const groupEntries: GPUBindGroupEntry[] = []; for (let j = 0; j < resources[0].length; j++) { - const groupEntry: any = {}; - groupEntry.binding = j; - groupEntry.resource = resources[i][j]; - groupEntries.push(groupEntry); + groupEntries.push({ + binding: j, + resource: resources[i][j], + }); } const newBindGroup = device.createBindGroup({ label: `${label}.bindGroup${i}`, From ebf16840bb3208c664e3daf9f964731c65ed9565 Mon Sep 17 00:00:00 2001 From: cmhhelgeson <62450112+cmhhelgeson@users.noreply.github.com> Date: Sun, 29 Oct 2023 17:00:45 -0700 Subject: [PATCH 05/10] Safety commit --- .../bitonicSort/bitonicDisplay.frag.wgsl | 3 +- src/sample/bitonicSort/computeShader.ts | 56 ++++++++++++------- src/sample/bitonicSort/main.ts | 31 +++++----- 3 files changed, 50 insertions(+), 40 deletions(-) diff --git a/src/sample/bitonicSort/bitonicDisplay.frag.wgsl b/src/sample/bitonicSort/bitonicDisplay.frag.wgsl index 3f4a17ea..a963e97c 100644 --- a/src/sample/bitonicSort/bitonicDisplay.frag.wgsl +++ b/src/sample/bitonicSort/bitonicDisplay.frag.wgsl @@ -1,6 +1,8 @@ struct Uniforms { width: f32, height: f32, + algo: u32, + blockHeight: u32, } struct VertexOutput { @@ -9,7 +11,6 @@ struct VertexOutput { } @group(0) @binding(0) var uniforms: Uniforms; -@group(1) @binding(0) var data: array; @fragment fn frag_main(input: VertexOutput) -> @location(0) vec4 { diff --git a/src/sample/bitonicSort/computeShader.ts b/src/sample/bitonicSort/computeShader.ts index 6b9e731d..f0f2e781 100644 --- a/src/sample/bitonicSort/computeShader.ts +++ b/src/sample/bitonicSort/computeShader.ts @@ -31,24 +31,26 @@ fn local_compare_and_swap(idx_before: u32, idx_after: u32) { // thread_id goes from 0 to threadsPerWorkgroup fn get_flip_indices(thread_id: u32, block_height: u32) -> vec2 { - let q: u32 = ((2 * thread_id) / block_height) * block_height; + // Caculate index offset (i.e move indices into correct block) + let block_offset: u32 = ((2 * thread_id) / block_height) * block_height; let half_height = block_height / 2; + // Calculate index spacing var idx: vec2 = vec2( thread_id % half_height, block_height - (thread_id % half_height) - 1, ); - idx.x += q; - idx.y += q; + idx.x += block_offset; + idx.y += block_offset; return idx; } fn get_disperse_indices(thread_id: u32, block_height: u32) -> vec2 { - var q: u32 = ((2 * thread_id) / block_height) * block_height; + var block_offset: u32 = ((2 * thread_id) / block_height) * block_height; let half_height = block_height / 2; var idx: vec2 = vec2( thread_id % half_height, (thread_id % half_height) + half_height ); - idx.x += q; - idx.y += q; + idx.x += block_offset; + idx.y += block_offset; return idx; } @@ -56,6 +58,7 @@ fn get_disperse_indices(thread_id: u32, block_height: u32) -> vec2 { @group(0) @binding(1) var output_data: array; @group(0) @binding(2) var uniforms: Uniforms; + fn global_compare_and_swap(idx_before: u32, idx_after: u32) { if (input_data[idx_after] < input_data[idx_before]) { output_data[idx_before] = input_data[idx_after]; @@ -63,35 +66,44 @@ fn global_compare_and_swap(idx_before: u32, idx_after: u32) { } } +// Constants/enum +const ALGO_NONE = 0; +const ALGO_LOCAL_FLIP = 1; +const ALGO_LOCAL_DISPERSE = 2; +const ALGO_GLOBAL_FLIP = 3; + // Our compute shader will execute specified # of threads or elements / 2 threads @compute @workgroup_size(${threadsPerWorkgroup}, 1, 1) fn computeMain( @builtin(global_invocation_id) global_id: vec3, @builtin(local_invocation_id) local_id: vec3, - @builtin(workgroup_id) workgroup: vec3, + @builtin(workgroup_id) workgroup_id: vec3, ) { - //Each thread will populate the workgroup data... (1 thread for every 2 elements) - local_data[local_id.x * 2] = input_data[local_id.x * 2]; - local_data[local_id.x * 2 + 1] = input_data[local_id.x * 2 + 1]; + + // If we will perform a local swap, then populate the local data + if (uniforms.algo <= 2) { + //Each thread will populate the workgroup data... (1 thread for every 2 elements) + local_data[global_id.x * 2] = input_data[global_id.x * 2]; + local_data[global_id.x * 2 + 1] = input_data[global_id.x * 2 + 1]; + } //...and wait for each other to finish their own bit of data population. workgroupBarrier(); - var num_elements = uniforms.width * uniforms.height; - switch uniforms.algo { case 1: { // Local Flip - let idx = get_flip_indices(local_id.x, uniforms.blockHeight); + let idx = get_flip_indices(global_id.x, uniforms.blockHeight); local_compare_and_swap(idx.x, idx.y); - } + } case 2: { // Local Disperse - let idx = get_disperse_indices(local_id.x, uniforms.blockHeight); + let idx = get_disperse_indices(global_id.x, uniforms.blockHeight); local_compare_and_swap(idx.x, idx.y); - } - case 4: { // Global Flip - let idx = get_flip_indices(local_id.x, uniforms.blockHeight); + } + case 3: { // Global Flip + let idx = get_flip_indices(global_id.x, uniforms.blockHeight); global_compare_and_swap(idx.x, idx.y); } + // case 4: { //Global Disperse default: { } @@ -100,9 +112,11 @@ fn computeMain( // Ensure that all threads have swapped their own regions of data workgroupBarrier(); - //Repopulate global data with local data - output_data[local_id.x * 2] = local_data[local_id.x * 2]; - output_data[local_id.x * 2 + 1] = local_data[local_id.x * 2 + 1]; + if (uniforms.algo <= ALGO_LOCAL_DISPERSE) { + //Repopulate global data with local data + output_data[local_id.x * 2] = local_data[local_id.x * 2]; + output_data[local_id.x * 2 + 1] = local_data[local_id.x * 2 + 1]; + } }`; }; diff --git a/src/sample/bitonicSort/main.ts b/src/sample/bitonicSort/main.ts index da130e0b..ed2d7da4 100644 --- a/src/sample/bitonicSort/main.ts +++ b/src/sample/bitonicSort/main.ts @@ -10,17 +10,11 @@ enum StepEnum { NONE, FLIP_LOCAL, DISPERSE_LOCAL, - FLIP_DISPERSE_LOCAL, FLIP_GLOBAL, } // String access to StepEnum -type StepType = - | 'NONE' - | 'FLIP_LOCAL' - | 'DISPERSE_LOCAL' - | 'FLIP_DISPERSE_LOCAL' - | 'FLIP_GLOBAL'; +type StepType = 'NONE' | 'FLIP_LOCAL' | 'DISPERSE_LOCAL' | 'FLIP_GLOBAL'; // Gui settings object interface SettingsInterface { @@ -94,7 +88,7 @@ SampleInitFactoryWebGPU( // Max thread span of next block 'Next Swap Span': 2, // Workgroups to dispatch per frame, - 'Total Workgroups': 1, + 'Total Workgroups': maxElements / (maxWorkgroupsX * 2), // Whether we will dispatch a workload this frame executeStep: false, 'Randomize Values': () => { @@ -275,6 +269,7 @@ SampleInitFactoryWebGPU( let swappedIndex: number; switch (settings['Next Step']) { case 'FLIP_LOCAL': + case 'FLIP_GLOBAL': { const blockHeight = settings['Next Swap Span']; const p2 = Math.floor(settings['Hovered Cell'] / blockHeight) + 1; @@ -497,16 +492,16 @@ SampleInitFactoryWebGPU( nextBlockHeightController.setValue(settings['Next Swap Span'] / 2); if (settings['Next Swap Span'] === 1) { highestBlockHeight *= 2; - nextStepController.setValue( - highestBlockHeight === settings['Total Elements'] * 2 - ? 'NONE' - : 'FLIP_LOCAL' - ); - nextBlockHeightController.setValue( - highestBlockHeight === settings['Total Elements'] * 2 - ? 0 - : highestBlockHeight - ); + if (highestBlockHeight === settings['Total Elements'] * 2) { + nextStepController.setValue('NONE'); + nextBlockHeightController.setValue(0); + } else if (highestBlockHeight > settings['Total Threads'] * 2) { + nextStepController.setValue('FLIP_GLOBAL'); + nextBlockHeightController.setValue(highestBlockHeight); + } else { + nextStepController.setValue('FLIP_LOCAL'); + nextBlockHeightController.setValue(highestBlockHeight); + } } else { nextStepController.setValue('DISPERSE_LOCAL'); } From 181164c6cc21dfa5b3bccbe5df639cda439039e4 Mon Sep 17 00:00:00 2001 From: cmhhelgeson <62450112+cmhhelgeson@users.noreply.github.com> Date: Sun, 29 Oct 2023 17:48:43 -0700 Subject: [PATCH 06/10] Adjusted bindGroups and uniform placement and added different visualization of swap regions --- .../bitonicSort/bitonicDisplay.frag.wgsl | 23 +++++++++++-- src/sample/bitonicSort/bitonicDisplay.ts | 33 ++++++------------- src/sample/bitonicSort/main.ts | 14 +++++--- 3 files changed, 41 insertions(+), 29 deletions(-) diff --git a/src/sample/bitonicSort/bitonicDisplay.frag.wgsl b/src/sample/bitonicSort/bitonicDisplay.frag.wgsl index a963e97c..17842eab 100644 --- a/src/sample/bitonicSort/bitonicDisplay.frag.wgsl +++ b/src/sample/bitonicSort/bitonicDisplay.frag.wgsl @@ -1,16 +1,25 @@ -struct Uniforms { +struct ComputeUniforms { width: f32, height: f32, algo: u32, blockHeight: u32, } +struct FragmentUniforms { + // boolean, either 0 or 1 + highlight: u32, +} + struct VertexOutput { @builtin(position) Position: vec4, @location(0) fragUV: vec2 } -@group(0) @binding(0) var uniforms: Uniforms; +// Uniforms from compute shader +@group(0) @binding(0) var data: array; +@group(0) @binding(2) var uniforms: ComputeUniforms; +// Fragment shader uniforms +@group(1) @binding(0) var fragment_uniforms: FragmentUniforms; @fragment fn frag_main(input: VertexOutput) -> @location(0) vec4 { @@ -29,6 +38,16 @@ fn frag_main(input: VertexOutput) -> @location(0) vec4 { var subtracter = f32(colorChanger) / (uniforms.width * uniforms.height); + if (fragment_uniforms.highlight == 1) { + return select( + //If element is above halfHeight, highlight green + vec4(vec3(0.0, 1.0 - subtracter, 0.0).rgb, 1.0), + //If element is below halfheight, highlight red + vec4(vec3(1.0 - subtracter, 0.0, 0.0).rgb, 1.0), + elementIndex % uniforms.blockHeight < uniforms.blockHeight / 2 + ); + } + var color: vec3 = vec3f( 1.0 - subtracter ); diff --git a/src/sample/bitonicSort/bitonicDisplay.ts b/src/sample/bitonicSort/bitonicDisplay.ts index dcdd7cd8..b86f2905 100644 --- a/src/sample/bitonicSort/bitonicDisplay.ts +++ b/src/sample/bitonicSort/bitonicDisplay.ts @@ -1,14 +1,13 @@ import { BindGroupCluster, - createBindGroupCluster, Base2DRendererClass, + createBindGroupCluster, } from './utils'; import bitonicDisplay from './bitonicDisplay.frag.wgsl'; interface BitonicDisplayRenderArgs { - width: number; - height: number; + highlight: number; } export default class BitonicDisplayRenderer extends Base2DRendererClass { @@ -25,7 +24,6 @@ export default class BitonicDisplayRenderer extends Base2DRendererClass { device: GPUDevice, presentationFormat: GPUTextureFormat, renderPassDescriptor: GPURenderPassDescriptor, - bindGroupNames: string[], computeBGDescript: BindGroupCluster, label: string ) { @@ -34,7 +32,7 @@ export default class BitonicDisplayRenderer extends Base2DRendererClass { this.computeBGDescript = computeBGDescript; const uniformBuffer = device.createBuffer({ - size: Float32Array.BYTES_PER_ELEMENT * 2, + size: Uint32Array.BYTES_PER_ELEMENT, usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, }); @@ -49,40 +47,29 @@ export default class BitonicDisplayRenderer extends Base2DRendererClass { ); this.currentBindGroup = bgCluster.bindGroups[0]; - this.currentBindGroupName = bindGroupNames[0]; - - this.bindGroupMap = {}; - - bgCluster.bindGroups.forEach((bg, idx) => { - this.bindGroupMap[bindGroupNames[idx]] = bg; - }); this.pipeline = super.create2DRenderPipeline( device, label, - [bgCluster.bindGroupLayout, this.computeBGDescript.bindGroupLayout], + [this.computeBGDescript.bindGroupLayout, bgCluster.bindGroupLayout], bitonicDisplay, presentationFormat ); - this.switchBindGroup = (name: string) => { - this.currentBindGroup = this.bindGroupMap[name]; - this.currentBindGroupName = name; - }; - this.setArguments = (args: BitonicDisplayRenderArgs) => { - super.setUniformArguments(device, uniformBuffer, args, [ - 'width', - 'height', - ]); + device.queue.writeBuffer( + uniformBuffer, + 0, + new Uint32Array([args.highlight]) + ); }; } startRun(commandEncoder: GPUCommandEncoder, args: BitonicDisplayRenderArgs) { this.setArguments(args); super.executeRun(commandEncoder, this.renderPassDescriptor, this.pipeline, [ - this.currentBindGroup, this.computeBGDescript.bindGroups[0], + this.currentBindGroup, ]); } } diff --git a/src/sample/bitonicSort/main.ts b/src/sample/bitonicSort/main.ts index ed2d7da4..262d6baf 100644 --- a/src/sample/bitonicSort/main.ts +++ b/src/sample/bitonicSort/main.ts @@ -16,6 +16,8 @@ enum StepEnum { // String access to StepEnum type StepType = 'NONE' | 'FLIP_LOCAL' | 'DISPERSE_LOCAL' | 'FLIP_GLOBAL'; +type DisplayType = 'Elements' | 'Swap Highlight'; + // Gui settings object interface SettingsInterface { 'Total Elements': number; @@ -31,6 +33,7 @@ interface SettingsInterface { 'Prev Swap Span': number; 'Next Swap Span': number; 'Total Workgroups': number; + 'Display Mode': DisplayType; executeStep: boolean; 'Randomize Values': () => void; 'Execute Sort Step': () => void; @@ -91,6 +94,7 @@ SampleInitFactoryWebGPU( 'Total Workgroups': maxElements / (maxWorkgroupsX * 2), // Whether we will dispatch a workload this frame executeStep: false, + 'Display Mode': 'Elements', 'Randomize Values': () => { return; }, @@ -140,7 +144,7 @@ SampleInitFactoryWebGPU( [ GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT, GPUShaderStage.COMPUTE, - GPUShaderStage.COMPUTE, + GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT, ], ['buffer', 'buffer', 'buffer'], [{ type: 'read-only-storage' }, { type: 'storage' }, { type: 'uniform' }], @@ -184,7 +188,6 @@ SampleInitFactoryWebGPU( device, presentationFormat, renderPassDescriptor, - ['default'], computeBGCluster, 'BitonicDisplay' ); @@ -365,6 +368,10 @@ SampleInitFactoryWebGPU( // Information about grid display const gridFolder = gui.addFolder('Grid Information'); + const displayModeController = gridFolder.add(settings, 'Display Mode', [ + 'Elements', + 'Swap Highlight', + ]); const gridWidthController = gridFolder.add(settings, 'Grid Width'); const gridHeightController = gridFolder.add(settings, 'Grid Height'); const hoveredCellController = gridFolder @@ -474,8 +481,7 @@ SampleInitFactoryWebGPU( const commandEncoder = device.createCommandEncoder(); bitonicDisplayRenderer.startRun(commandEncoder, { - width: settings['Grid Width'], - height: settings['Grid Height'], + highlight: settings['Display Mode'] === 'Elements' ? 0 : 1, }); if ( settings.executeStep && From fc405218494db230d70a25b89c22e8fef94b2379 Mon Sep 17 00:00:00 2001 From: cmhhelgeson <62450112+cmhhelgeson@users.noreply.github.com> Date: Sun, 29 Oct 2023 17:49:39 -0700 Subject: [PATCH 07/10] Adjusted bindGroups, adjusted uniforms, and added a new visualization of swap regions --- src/sample/bitonicSort/main.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/sample/bitonicSort/main.ts b/src/sample/bitonicSort/main.ts index 262d6baf..e894b979 100644 --- a/src/sample/bitonicSort/main.ts +++ b/src/sample/bitonicSort/main.ts @@ -368,10 +368,7 @@ SampleInitFactoryWebGPU( // Information about grid display const gridFolder = gui.addFolder('Grid Information'); - const displayModeController = gridFolder.add(settings, 'Display Mode', [ - 'Elements', - 'Swap Highlight', - ]); + gridFolder.add(settings, 'Display Mode', ['Elements', 'Swap Highlight']); const gridWidthController = gridFolder.add(settings, 'Grid Width'); const gridHeightController = gridFolder.add(settings, 'Grid Height'); const hoveredCellController = gridFolder From 50a1bae2d851633437680550ff2632cbac7ab48f Mon Sep 17 00:00:00 2001 From: cmhhelgeson <62450112+cmhhelgeson@users.noreply.github.com> Date: Sun, 29 Oct 2023 18:22:51 -0700 Subject: [PATCH 08/10] Finished adding updates --- src/sample/bitonicSort/computeShader.ts | 22 ++++++++++++++-------- src/sample/bitonicSort/main.ts | 24 ++++++++++++++++-------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/sample/bitonicSort/computeShader.ts b/src/sample/bitonicSort/computeShader.ts index f0f2e781..5b112245 100644 --- a/src/sample/bitonicSort/computeShader.ts +++ b/src/sample/bitonicSort/computeShader.ts @@ -80,11 +80,14 @@ fn computeMain( @builtin(workgroup_id) workgroup_id: vec3, ) { + let offset = ${threadsPerWorkgroup} * 2 * workgroup_id.x; // If we will perform a local swap, then populate the local data if (uniforms.algo <= 2) { - //Each thread will populate the workgroup data... (1 thread for every 2 elements) - local_data[global_id.x * 2] = input_data[global_id.x * 2]; - local_data[global_id.x * 2 + 1] = input_data[global_id.x * 2 + 1]; + // Assign range of input_data to local_data. + // Range cannot exceed maxWorkgroupsX * 2 + // Each thread will populate the workgroup data... (1 thread for every 2 elements) + local_data[local_id.x * 2] = input_data[offset + local_id.x * 2]; + local_data[local_id.x * 2 + 1] = input_data[offset + local_id.x * 2 + 1]; } //...and wait for each other to finish their own bit of data population. @@ -92,18 +95,21 @@ fn computeMain( switch uniforms.algo { case 1: { // Local Flip - let idx = get_flip_indices(global_id.x, uniforms.blockHeight); + let idx = get_flip_indices(local_id.x, uniforms.blockHeight); local_compare_and_swap(idx.x, idx.y); } case 2: { // Local Disperse - let idx = get_disperse_indices(global_id.x, uniforms.blockHeight); + let idx = get_disperse_indices(local_id.x, uniforms.blockHeight); local_compare_and_swap(idx.x, idx.y); } case 3: { // Global Flip let idx = get_flip_indices(global_id.x, uniforms.blockHeight); global_compare_and_swap(idx.x, idx.y); } - // case 4: { //Global Disperse + case 4: { + let idx = get_disperse_indices(global_id.x, uniforms.blockHeight); + global_compare_and_swap(idx.x, idx.y); + } default: { } @@ -114,8 +120,8 @@ fn computeMain( if (uniforms.algo <= ALGO_LOCAL_DISPERSE) { //Repopulate global data with local data - output_data[local_id.x * 2] = local_data[local_id.x * 2]; - output_data[local_id.x * 2 + 1] = local_data[local_id.x * 2 + 1]; + output_data[offset + local_id.x * 2] = local_data[local_id.x * 2]; + output_data[offset + local_id.x * 2 + 1] = local_data[local_id.x * 2 + 1]; } }`; diff --git a/src/sample/bitonicSort/main.ts b/src/sample/bitonicSort/main.ts index e894b979..b89d7368 100644 --- a/src/sample/bitonicSort/main.ts +++ b/src/sample/bitonicSort/main.ts @@ -11,10 +11,16 @@ enum StepEnum { FLIP_LOCAL, DISPERSE_LOCAL, FLIP_GLOBAL, + DISPERSE_GLOBAL, } // String access to StepEnum -type StepType = 'NONE' | 'FLIP_LOCAL' | 'DISPERSE_LOCAL' | 'FLIP_GLOBAL'; +type StepType = + | 'NONE' + | 'FLIP_LOCAL' + | 'DISPERSE_LOCAL' + | 'FLIP_GLOBAL' + | 'DISPERSE_GLOBAL'; type DisplayType = 'Elements' | 'Swap Highlight'; @@ -50,10 +56,10 @@ const getNumSteps = (numElements: number) => { let init: SampleInit; SampleInitFactoryWebGPU( async ({ pageState, device, gui, presentationFormat, context, canvas }) => { - const maxWorkgroupsX = device.limits.maxComputeWorkgroupSizeX; + const maxThreadsX = device.limits.maxComputeWorkgroupSizeX; const totalElementLengths = []; - const maxElements = maxWorkgroupsX * 2; + const maxElements = maxThreadsX * 32; for (let i = maxElements; i >= 4; i /= 2) { totalElementLengths.push(i); } @@ -73,7 +79,7 @@ SampleInitFactoryWebGPU( // height of screen in cells 'Grid Height': defaultGridHeight, // number of threads to execute in a workgroup ('Total Threads', 1, 1) - 'Total Threads': maxWorkgroupsX, + 'Total Threads': maxThreadsX, // Cell in element grid mouse element is hovering over 'Hovered Cell': 0, // element the hovered cell just swapped with, @@ -91,7 +97,7 @@ SampleInitFactoryWebGPU( // Max thread span of next block 'Next Swap Span': 2, // Workgroups to dispatch per frame, - 'Total Workgroups': maxElements / (maxWorkgroupsX * 2), + 'Total Workgroups': maxElements / (maxThreadsX * 2), // Whether we will dispatch a workload this frame executeStep: false, 'Display Mode': 'Elements', @@ -195,12 +201,12 @@ SampleInitFactoryWebGPU( const resetExecutionInformation = () => { // Total threads are either elements / 2 or maxWorkgroupsSizeX totalThreadsController.setValue( - Math.min(settings['Total Elements'] / 2, maxWorkgroupsX) + Math.min(settings['Total Elements'] / 2, maxThreadsX) ); // Dispatch a workgroup for every (Max threads * 2) elements const workgroupsPerStep = - (settings['Total Elements'] - 1) / (maxWorkgroupsX * 2); + (settings['Total Elements'] - 1) / (maxThreadsX * 2); totalWorkgroupsController.setValue(Math.ceil(workgroupsPerStep)); @@ -506,7 +512,9 @@ SampleInitFactoryWebGPU( nextBlockHeightController.setValue(highestBlockHeight); } } else { - nextStepController.setValue('DISPERSE_LOCAL'); + settings['Next Swap Span'] > settings['Total Threads'] * 2 + ? nextStepController.setValue('DISPERSE_GLOBAL') + : nextStepController.setValue('DISPERSE_LOCAL'); } commandEncoder.copyBufferToBuffer( elementsOutputBuffer, From 174db187a89065acddcf9336bab331c505afea28 Mon Sep 17 00:00:00 2001 From: cmhhelgeson <62450112+cmhhelgeson@users.noreply.github.com> Date: Mon, 30 Oct 2023 18:11:52 -0700 Subject: [PATCH 09/10] Removed pointerEvents from non-interactive gui elements and added atomic account of number of swaps performed during a sort --- src/sample/bitonicSort/atomicToZero.wgsl | 7 ++ src/sample/bitonicSort/computeShader.ts | 13 ++-- src/sample/bitonicSort/main.ts | 95 ++++++++++++++++++++++-- 3 files changed, 104 insertions(+), 11 deletions(-) create mode 100644 src/sample/bitonicSort/atomicToZero.wgsl diff --git a/src/sample/bitonicSort/atomicToZero.wgsl b/src/sample/bitonicSort/atomicToZero.wgsl new file mode 100644 index 00000000..23e7e4ee --- /dev/null +++ b/src/sample/bitonicSort/atomicToZero.wgsl @@ -0,0 +1,7 @@ +@group(0) @binding(3) var counter: atomic; + +@compute @workgroup_size(1, 1, 1) +fn atomicToZero() { + let counterValue = atomicLoad(&counter); + atomicSub(&counter, counterValue); +} \ No newline at end of file diff --git a/src/sample/bitonicSort/computeShader.ts b/src/sample/bitonicSort/computeShader.ts index 5b112245..e0ca6134 100644 --- a/src/sample/bitonicSort/computeShader.ts +++ b/src/sample/bitonicSort/computeShader.ts @@ -15,13 +15,19 @@ struct Uniforms { } // Create local workgroup data that can contain all elements - var local_data: array; +// Define groups (functions refer to this data) +@group(0) @binding(0) var input_data: array; +@group(0) @binding(1) var output_data: array; +@group(0) @binding(2) var uniforms: Uniforms; +@group(0) @binding(3) var counter: atomic; + // Compare and swap values in local_data fn local_compare_and_swap(idx_before: u32, idx_after: u32) { //idx_before should always be < idx_after if (local_data[idx_after] < local_data[idx_before]) { + atomicAdd(&counter, 1); var temp: u32 = local_data[idx_before]; local_data[idx_before] = local_data[idx_after]; local_data[idx_after] = temp; @@ -54,11 +60,6 @@ fn get_disperse_indices(thread_id: u32, block_height: u32) -> vec2 { return idx; } -@group(0) @binding(0) var input_data: array; -@group(0) @binding(1) var output_data: array; -@group(0) @binding(2) var uniforms: Uniforms; - - fn global_compare_and_swap(idx_before: u32, idx_after: u32) { if (input_data[idx_after] < input_data[idx_before]) { output_data[idx_before] = input_data[idx_after]; diff --git a/src/sample/bitonicSort/main.ts b/src/sample/bitonicSort/main.ts index b89d7368..6e0411c9 100644 --- a/src/sample/bitonicSort/main.ts +++ b/src/sample/bitonicSort/main.ts @@ -4,6 +4,7 @@ import BitonicDisplayRenderer from './bitonicDisplay'; import bitonicDisplay from './bitonicDisplay.frag.wgsl'; import { NaiveBitonicCompute } from './computeShader'; import fullscreenTexturedQuad from '../../shaders/fullscreenTexturedQuad.wgsl'; +import atomicToZero from './atomicToZero.wgsl'; // Type of step that will be executed in our shader enum StepEnum { @@ -40,6 +41,7 @@ interface SettingsInterface { 'Next Swap Span': number; 'Total Workgroups': number; 'Display Mode': DisplayType; + 'Total Swaps': number; executeStep: boolean; 'Randomize Values': () => void; 'Execute Sort Step': () => void; @@ -98,6 +100,8 @@ SampleInitFactoryWebGPU( 'Next Swap Span': 2, // Workgroups to dispatch per frame, 'Total Workgroups': maxElements / (maxThreadsX * 2), + // The number of swap operations executed over time + 'Total Swaps': 0, // Whether we will dispatch a workload this frame executeStep: false, 'Display Mode': 'Elements', @@ -138,6 +142,17 @@ SampleInitFactoryWebGPU( usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST, }); + // Initialize atomic swap buffer on GPU and CPU. Counts number of swaps actually performed by + // compute shader (when value at index x is greater than value at index y) + const atomicSwapsOutputBuffer = device.createBuffer({ + size: Uint32Array.BYTES_PER_ELEMENT, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); + const atomicSwapsStagingBuffer = device.createBuffer({ + size: Uint32Array.BYTES_PER_ELEMENT, + usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST, + }); + // Create uniform buffer for compute shader const computeUniformsBuffer = device.createBuffer({ // width, height, blockHeight, algo @@ -146,22 +161,29 @@ SampleInitFactoryWebGPU( }); const computeBGCluster = createBindGroupCluster( - [0, 1, 2], + [0, 1, 2, 3], [ GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT, GPUShaderStage.COMPUTE, GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT, + GPUShaderStage.COMPUTE, + ], + ['buffer', 'buffer', 'buffer', 'buffer'], + [ + { type: 'read-only-storage' }, + { type: 'storage' }, + { type: 'uniform' }, + { type: 'storage' }, ], - ['buffer', 'buffer', 'buffer'], - [{ type: 'read-only-storage' }, { type: 'storage' }, { type: 'uniform' }], [ [ { buffer: elementsInputBuffer }, { buffer: elementsOutputBuffer }, { buffer: computeUniformsBuffer }, + { buffer: atomicSwapsOutputBuffer }, ], ], - 'NaiveBitonicSort', + 'BitonicSort', device ); @@ -177,6 +199,19 @@ SampleInitFactoryWebGPU( }, }); + // Simple pipeline that zeros out an atomic value at group 0 binding 3 + const atomicToZeroComputePipeline = device.createComputePipeline({ + layout: device.createPipelineLayout({ + bindGroupLayouts: [computeBGCluster.bindGroupLayout], + }), + compute: { + module: device.createShaderModule({ + code: atomicToZero, + }), + entryPoint: 'atomicToZero', + }, + }); + // Create bitonic debug renderer const renderPassDescriptor: GPURenderPassDescriptor = { colorAttachments: [ @@ -230,6 +265,17 @@ SampleInitFactoryWebGPU( // Reset block heights prevBlockHeightController.setValue(0); nextBlockHeightController.setValue(2); + + // Reset Total Swaps by setting atomic value to 0 + const commandEncoder = device.createCommandEncoder(); + const computePassEncoder = commandEncoder.beginComputePass(); + computePassEncoder.setPipeline(atomicToZeroComputePipeline); + computePassEncoder.setBindGroup(0, computeBGCluster.bindGroups[0]); + computePassEncoder.dispatchWorkgroups(1); + computePassEncoder.end(); + device.queue.submit([commandEncoder.finish()]); + totalSwapsController.setValue(0); + highestBlockHeight = 2; }; @@ -400,6 +446,10 @@ SampleInitFactoryWebGPU( settings, 'Next Step' ); + const totalSwapsController = executionInformationFolder.add( + settings, + 'Total Swaps' + ); const prevBlockHeightController = executionInformationFolder.add( settings, 'Prev Swap Span' @@ -435,7 +485,11 @@ SampleInitFactoryWebGPU( }); // Deactivate interaction with select GUI elements + totalWorkgroupsController.domElement.style.pointerEvents = 'none'; + hoveredCellController.domElement.style.pointerEvents = 'none'; + swappedCellController.domElement.style.pointerEvents = 'none'; stepIndexController.domElement.style.pointerEvents = 'none'; + totalStepsController.domElement.style.pointerEvents = 'none'; prevStepController.domElement.style.pointerEvents = 'none'; prevBlockHeightController.domElement.style.pointerEvents = 'none'; nextStepController.domElement.style.pointerEvents = 'none'; @@ -516,6 +570,8 @@ SampleInitFactoryWebGPU( ? nextStepController.setValue('DISPERSE_GLOBAL') : nextStepController.setValue('DISPERSE_LOCAL'); } + + // Copy GPU accessible buffers to CPU accessible buffers commandEncoder.copyBufferToBuffer( elementsOutputBuffer, 0, @@ -523,6 +579,14 @@ SampleInitFactoryWebGPU( 0, elementsBufferSize ); + + commandEncoder.copyBufferToBuffer( + atomicSwapsOutputBuffer, + 0, + atomicSwapsStagingBuffer, + 0, + Uint32Array.BYTES_PER_ELEMENT + ); } device.queue.submit([commandEncoder.finish()]); @@ -537,14 +601,31 @@ SampleInitFactoryWebGPU( 0, elementsBufferSize ); + // Copy atomic swaps data to CPU + await atomicSwapsStagingBuffer.mapAsync( + GPUMapMode.READ, + 0, + Uint32Array.BYTES_PER_ELEMENT + ); + const copySwapsBuffer = atomicSwapsStagingBuffer.getMappedRange( + 0, + Uint32Array.BYTES_PER_ELEMENT + ); // Get correct range of data from CPU copy of GPU Data const elementsData = copyElementsBuffer.slice( 0, Uint32Array.BYTES_PER_ELEMENT * settings['Total Elements'] ); + const swapsData = copySwapsBuffer.slice( + 0, + Uint32Array.BYTES_PER_ELEMENT + ); // Extract data const elementsOutput = new Uint32Array(elementsData); + totalSwapsController.setValue(new Uint32Array(swapsData)[0]); elementsStagingBuffer.unmap(); + atomicSwapsStagingBuffer.unmap(); + // Elements output becomes elements input, swap accumulate elements = elementsOutput; setSwappedCell(); } @@ -578,7 +659,11 @@ const bitonicSortExample: () => JSX.Element = () => }, { name: './bitonicCompute.frag.wgsl', - contents: NaiveBitonicCompute(16), + contents: NaiveBitonicCompute(64), + }, + { + name: './atomicToZero.wgsl', + contents: atomicToZero, }, ], filename: __filename, From 1a4a0d15b31b5b6244e9c1824fd01ee2716a554b Mon Sep 17 00:00:00 2001 From: cmhhelgeson <62450112+cmhhelgeson@users.noreply.github.com> Date: Mon, 30 Oct 2023 18:40:28 -0700 Subject: [PATCH 10/10] Made totalSwaps non-interactive :( --- src/sample/bitonicSort/main.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sample/bitonicSort/main.ts b/src/sample/bitonicSort/main.ts index 6e0411c9..1ef69e40 100644 --- a/src/sample/bitonicSort/main.ts +++ b/src/sample/bitonicSort/main.ts @@ -497,6 +497,7 @@ SampleInitFactoryWebGPU( totalThreadsController.domElement.style.pointerEvents = 'none'; gridWidthController.domElement.style.pointerEvents = 'none'; gridHeightController.domElement.style.pointerEvents = 'none'; + totalSwapsController.domElement.style.pointerEvents = 'none'; let highestBlockHeight = 2;