From aafa50d3b0a5ebdccd0541c0706288c0a642795c Mon Sep 17 00:00:00 2001 From: Reese Levine Date: Wed, 11 May 2022 13:39:14 -0700 Subject: [PATCH] Add comprehensive memory model tests (#1330) * Add comprehensive coherence tests * Add comprehensive barrier tests * Add all weak memory tests * Update runtimes * Remove unneeded params * Revert package-lock * Add a few more rmw variants --- .../execution/memory_model/atomicity.spec.ts | 4 +- .../execution/memory_model/barrier.spec.ts | 117 ++++- .../execution/memory_model/coherence.spec.ts | 440 +++++++++++++++++- .../memory_model/memory_model_setup.ts | 47 +- .../execution/memory_model/weak.spec.ts | 350 +++++++++++++- 5 files changed, 912 insertions(+), 46 deletions(-) diff --git a/src/webgpu/shader/execution/memory_model/atomicity.spec.ts b/src/webgpu/shader/execution/memory_model/atomicity.spec.ts index e6218978a05b..371eee5f926f 100644 --- a/src/webgpu/shader/execution/memory_model/atomicity.spec.ts +++ b/src/webgpu/shader/execution/memory_model/atomicity.spec.ts @@ -38,8 +38,6 @@ const memoryModelTestParams: MemoryModelTestParams = { permuteSecond: 419, memStride: 4, aliasedMemory: false, - numMemLocations: 1, - numReadOutputs: 1, numBehaviors: 4, }; @@ -100,5 +98,5 @@ g.test('atomicity') testShader, resultShader ); - await memModelTester.run(20, 3); + await memModelTester.run(10, 3); }); diff --git a/src/webgpu/shader/execution/memory_model/barrier.spec.ts b/src/webgpu/shader/execution/memory_model/barrier.spec.ts index a042b5325264..6cda6c3e1920 100644 --- a/src/webgpu/shader/execution/memory_model/barrier.spec.ts +++ b/src/webgpu/shader/execution/memory_model/barrier.spec.ts @@ -39,8 +39,6 @@ const memoryModelTestParams: MemoryModelTestParams = { permuteSecond: 419, memStride: 4, aliasedMemory: false, - numMemLocations: 1, - numReadOutputs: 1, numBehaviors: 2, }; @@ -96,5 +94,118 @@ g.test('workgroup_barrier_store_load') testShader, resultShader ); - await memModelTester.run(20, 1); + await memModelTester.run(15, 1); + }); + +const storageMemoryBarrierLoadStoreTestCode = ` + let r0 = test_locations.value[x_0]; + workgroupBarrier(); + test_locations.value[x_1] = 1u; + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); +`; + +const workgroupMemoryBarrierLoadStoreTestCode = ` + let r0 = wg_test_locations[x_0]; + workgroupBarrier(); + wg_test_locations[x_1] = 1u; + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); +`; + +g.test('workgroup_barrier_load_store') + .desc( + `Checks whether the workgroup barrier properly synchronizes a non-atomic write and read on + separate threads in the same workgroup. Within a workgroup, the barrier should force an invocation + before the barrier to not read the write from an invocation after the barrier. + ` + ) + .paramsSimple([ + { memType: MemoryType.NonAtomicStorageClass, _testCode: storageMemoryBarrierLoadStoreTestCode }, + { + memType: MemoryType.NonAtomicWorkgroupClass, + _testCode: workgroupMemoryBarrierLoadStoreTestCode, + }, + ]) + .fn(async t => { + const resultCode = ` + if (r0 == 0u) { + atomicAdd(&test_results.seq, 1u); + } else if (r0 == 1u) { + atomicAdd(&test_results.weak, 1u); + } + `; + const testShader = buildTestShader( + t.params._testCode, + t.params.memType, + TestType.IntraWorkgroup + ); + const resultShader = buildResultShader( + resultCode, + TestType.IntraWorkgroup, + ResultType.TwoBehavior + ); + const memModelTester = new MemoryModelTester( + t, + memoryModelTestParams, + testShader, + resultShader + ); + await memModelTester.run(12, 1); + }); + +const storageMemoryBarrierStoreStoreTestCode = ` + test_locations.value[x_0] = 1u; + storageBarrier(); + test_locations.value[x_1] = 2u; +`; + +const workgroupMemoryBarrierStoreStoreTestCode = ` + wg_test_locations[x_0] = 1u; + workgroupBarrier(); + wg_test_locations[x_1] = 2u; + workgroupBarrier(); + test_locations.value[shuffled_workgroup * workgroupXSize * stress_params.mem_stride * 2u + x_1] = wg_test_locations[x_1]; +`; + +g.test('workgroup_barrier_store_store') + .desc( + `Checks whether the workgroup barrier properly synchronizes non-atomic writes on + separate threads in the same workgroup. Within a workgroup, the barrier should force the value in memory + to be the result of the write after the barrier, not the write before. + ` + ) + .paramsSimple([ + { + memType: MemoryType.NonAtomicStorageClass, + _testCode: storageMemoryBarrierStoreStoreTestCode, + }, + { + memType: MemoryType.NonAtomicWorkgroupClass, + _testCode: workgroupMemoryBarrierStoreStoreTestCode, + }, + ]) + .fn(async t => { + const resultCode = ` + if (mem_x_0 == 2u) { + atomicAdd(&test_results.seq, 1u); + } else if (mem_x_0 == 1u) { + atomicAdd(&test_results.weak, 1u); + } + `; + const testShader = buildTestShader( + t.params._testCode, + t.params.memType, + TestType.IntraWorkgroup + ); + const resultShader = buildResultShader( + resultCode, + TestType.IntraWorkgroup, + ResultType.TwoBehavior + ); + const memModelTester = new MemoryModelTester( + t, + memoryModelTestParams, + testShader, + resultShader + ); + await memModelTester.run(10, 1); }); diff --git a/src/webgpu/shader/execution/memory_model/coherence.spec.ts b/src/webgpu/shader/execution/memory_model/coherence.spec.ts index 4117ec8d6141..5e802b5746fc 100644 --- a/src/webgpu/shader/execution/memory_model/coherence.spec.ts +++ b/src/webgpu/shader/execution/memory_model/coherence.spec.ts @@ -21,15 +21,15 @@ export const g = makeTestGroup(GPUTest); // A reasonable parameter set, determined heuristically. const memoryModelTestParams: MemoryModelTestParams = { workgroupSize: 256, - testingWorkgroups: 512, - maxWorkgroups: 1024, - shufflePct: 100, - barrierPct: 100, - memStressPct: 100, + testingWorkgroups: 39, + maxWorkgroups: 952, + shufflePct: 0, + barrierPct: 0, + memStressPct: 0, memStressIterations: 1024, memStressStoreFirstPct: 50, memStressStoreSecondPct: 50, - preStressPct: 100, + preStressPct: 0, preStressIterations: 1024, preStressStoreFirstPct: 50, preStressStoreSecondPct: 50, @@ -39,10 +39,8 @@ const memoryModelTestParams: MemoryModelTestParams = { stressStrategyBalancePct: 50, permuteFirst: 109, permuteSecond: 1, - memStride: 4, + memStride: 1, aliasedMemory: true, - numMemLocations: 1, - numReadOutputs: 2, numBehaviors: 4, }; @@ -54,6 +52,30 @@ const storageMemoryCorrTestCode = ` atomicStore(&results.value[id_1].r1, r1); `; +const workgroupStorageMemoryCorrTestCode = ` + atomicStore(&test_locations.value[x_0], 1u); + let r0 = atomicLoad(&test_locations.value[x_1]); + let r1 = atomicLoad(&test_locations.value[y_1]); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r0, r0); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r1, r1); +`; + +const storageMemoryCorrRMWTestCode = ` + atomicExchange(&test_locations.value[x_0], 1u); + let r0 = atomicLoad(&test_locations.value[x_1]); + let r1 = atomicAdd(&test_locations.value[y_1], 0u); + atomicStore(&results.value[id_1].r0, r0); + atomicStore(&results.value[id_1].r1, r1); +`; + +const workgroupStorageMemoryCorrRMWTestCode = ` + atomicExchange(&test_locations.value[x_0], 1u); + let r0 = atomicLoad(&test_locations.value[x_1]); + let r1 = atomicAdd(&test_locations.value[y_1], 0u); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r0, r0); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r1, r1); +`; + const workgroupMemoryCorrTestCode = ` atomicStore(&wg_test_locations[x_0], 1u); let r0 = atomicLoad(&wg_test_locations[x_1]); @@ -62,6 +84,14 @@ const workgroupMemoryCorrTestCode = ` atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r1, r1); `; +const workgroupMemoryCorrRMWTestCode = ` + atomicExchange(&wg_test_locations[x_0], 1u); + let r0 = atomicLoad(&wg_test_locations[x_1]); + let r1 = atomicAdd(&wg_test_locations[y_1], 0u); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r0, r0); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r1, r1); +`; + g.test('corr') .desc( `Ensures two reads on one thread cannot observe an inconsistent view of a write on a second thread. @@ -75,16 +105,34 @@ g.test('corr') testType: TestType.InterWorkgroup, _testCode: storageMemoryCorrTestCode, }, + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.InterWorkgroup, + _testCode: storageMemoryCorrRMWTestCode, + extraFlags: 'rmw_variant', + }, { memType: MemoryType.AtomicStorageClass, testType: TestType.IntraWorkgroup, - _testCode: storageMemoryCorrTestCode, + _testCode: workgroupStorageMemoryCorrTestCode, + }, + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.IntraWorkgroup, + _testCode: workgroupStorageMemoryCorrRMWTestCode, + extraFlags: 'rmw_variant', }, { memType: MemoryType.AtomicWorkgroupClass, testType: TestType.IntraWorkgroup, _testCode: workgroupMemoryCorrTestCode, }, + { + memType: MemoryType.AtomicWorkgroupClass, + testType: TestType.IntraWorkgroup, + _testCode: workgroupMemoryCorrRMWTestCode, + extraFlags: 'rmw_variant', + }, ]) .fn(async t => { const resultCode = ` @@ -98,18 +146,380 @@ g.test('corr') atomicAdd(&test_results.weak, 1u); } `; + const testShader = buildTestShader(t.params._testCode, t.params.memType, t.params.testType); + const resultShader = buildResultShader(resultCode, t.params.testType, ResultType.FourBehavior); + const memModelTester = new MemoryModelTester( + t, + memoryModelTestParams, + testShader, + resultShader + ); + await memModelTester.run(60, 3); + }); +const storageMemoryCowwTestCode = ` + atomicStore(&test_locations.value[x_0], 1u); + atomicStore(&test_locations.value[y_0], 2u); +`; + +const storageMemoryCowwRMWTestCode = ` + atomicExchange(&test_locations.value[x_0], 1u); + atomicStore(&test_locations.value[y_0], 2u); +`; + +const workgroupMemoryCowwTestCode = ` + atomicStore(&wg_test_locations[x_0], 1u); + atomicStore(&wg_test_locations[y_0], 2u); + workgroupBarrier(); + atomicStore(&test_locations.value[shuffled_workgroup * workgroupXSize * stress_params.mem_stride * 2u + x_0], atomicLoad(&wg_test_locations[x_0])); +`; + +const workgroupMemoryCowwRMWTestCode = ` + atomicExchange(&wg_test_locations[x_0], 1u); + atomicStore(&wg_test_locations[y_0], 2u); + workgroupBarrier(); + atomicStore(&test_locations.value[shuffled_workgroup * workgroupXSize * stress_params.mem_stride * 2u + x_0], atomicLoad(&wg_test_locations[x_0])); +`; + +g.test('coww') + .desc( + `Ensures two writes on one thread do not lead to incoherent results. The thread first writes 1 to + some location x and then writes 2 to the same location. If the value in memory after the test finishes + is 1, then there has been a coherence violation. + ` + ) + .paramsSimple([ + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.InterWorkgroup, + _testCode: storageMemoryCowwTestCode, + }, + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.InterWorkgroup, + _testCode: storageMemoryCowwRMWTestCode, + extraFlags: 'rmw_variant', + }, + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.IntraWorkgroup, + _testCode: storageMemoryCowwTestCode, + }, + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.IntraWorkgroup, + _testCode: storageMemoryCowwRMWTestCode, + extraFlags: 'rmw_variant', + }, + { + memType: MemoryType.AtomicWorkgroupClass, + testType: TestType.IntraWorkgroup, + _testCode: workgroupMemoryCowwTestCode, + }, + { + memType: MemoryType.AtomicWorkgroupClass, + testType: TestType.IntraWorkgroup, + _testCode: workgroupMemoryCowwRMWTestCode, + extraFlags: 'rmw_variant', + }, + ]) + .fn(async t => { + const resultCode = ` + if (mem_x_0 == 2u) { + atomicAdd(&test_results.seq, 1u); + } else if (mem_x_0 == 1u) { + atomicAdd(&test_results.weak, 1u); + } + `; const testShader = buildTestShader(t.params._testCode, t.params.memType, t.params.testType); - const resultShader = buildResultShader( - resultCode, - TestType.InterWorkgroup, - ResultType.FourBehavior + const resultShader = buildResultShader(resultCode, t.params.testType, ResultType.TwoBehavior); + const params = { + ...memoryModelTestParams, + numBehaviors: 2, + }; + const memModelTester = new MemoryModelTester(t, params, testShader, resultShader); + await memModelTester.run(60, 1); + }); + +const storageMemoryCowrTestCode = ` + atomicStore(&test_locations.value[x_0], 1u); + let r0 = atomicLoad(&test_locations.value[y_0]); + atomicStore(&test_locations.value[x_1], 2u); + atomicStore(&results.value[id_0].r0, r0); +`; + +const workgroupStorageMemoryCowrTestCode = ` + atomicStore(&test_locations.value[x_0], 1u); + let r0 = atomicLoad(&test_locations.value[y_0]); + atomicStore(&test_locations.value[x_1], 2u); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); +`; + +const storageMemoryCowrRMWTestCode = ` + atomicExchange(&test_locations.value[x_0], 1u); + let r0 = atomicAdd(&test_locations.value[y_0], 0u); + atomicExchange(&test_locations.value[x_1], 2u); + atomicStore(&results.value[id_0].r0, r0); +`; + +const workgroupStorageMemoryCowrRMWTestCode = ` + atomicExchange(&test_locations.value[x_0], 1u); + let r0 = atomicAdd(&test_locations.value[y_0], 0u); + atomicExchange(&test_locations.value[x_1], 2u); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); +`; + +const workgroupMemoryCowrTestCode = ` + atomicStore(&wg_test_locations[x_0], 1u); + let r0 = atomicLoad(&wg_test_locations[y_0]); + atomicStore(&wg_test_locations[x_1], 2u); + workgroupBarrier(); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); + atomicStore(&test_locations.value[shuffled_workgroup * workgroupXSize * stress_params.mem_stride * 2u + x_1], atomicLoad(&wg_test_locations[x_1])); +`; + +const workgroupMemoryCowrRMWTestCode = ` + atomicExchange(&wg_test_locations[x_0], 1u); + let r0 = atomicAdd(&wg_test_locations[y_0], 0u); + atomicExchange(&wg_test_locations[x_1], 2u); + workgroupBarrier(); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); + atomicStore(&test_locations.value[shuffled_workgroup * workgroupXSize * stress_params.mem_stride * 2u + x_1], atomicLoad(&wg_test_locations[x_1])); +`; + +g.test('cowr') + .desc( + `The first thread first writes 1 to some location x and then reads x. The second thread writes 2 to x. + If the first thread reads the value 2 and the value in memory at the end of the test is 1, then the read + and write on the first thread have been reordered, a coherence violation. + ` + ) + .paramsSimple([ + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.InterWorkgroup, + _testCode: storageMemoryCowrTestCode, + }, + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.InterWorkgroup, + _testCode: storageMemoryCowrRMWTestCode, + extraFlags: 'rmw_variant', + }, + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.IntraWorkgroup, + _testCode: workgroupStorageMemoryCowrTestCode, + }, + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.IntraWorkgroup, + _testCode: workgroupStorageMemoryCowrRMWTestCode, + extraFlags: 'rmw_variant', + }, + { + memType: MemoryType.AtomicWorkgroupClass, + testType: TestType.IntraWorkgroup, + _testCode: workgroupMemoryCowrTestCode, + }, + { + memType: MemoryType.AtomicWorkgroupClass, + testType: TestType.IntraWorkgroup, + _testCode: workgroupMemoryCowrRMWTestCode, + extraFlags: 'rmw_variant', + }, + ]) + .fn(async t => { + const resultCode = ` + if ((r0 == 1u && mem_x_0 == 2u)) { + atomicAdd(&test_results.seq0, 1u); + } else if ((r0 == 1u && mem_x_0 == 1u)) { + atomicAdd(&test_results.seq1, 1u); + } else if ((r0 == 2u && mem_x_0 == 2u)) { + atomicAdd(&test_results.interleaved, 1u); + } else if ((r0 == 2u && mem_x_0 == 1u)) { + atomicAdd(&test_results.weak, 1u); + } + `; + const testShader = buildTestShader(t.params._testCode, t.params.memType, t.params.testType); + const resultShader = buildResultShader(resultCode, t.params.testType, ResultType.FourBehavior); + const memModelTester = new MemoryModelTester( + t, + memoryModelTestParams, + testShader, + resultShader ); + await memModelTester.run(60, 3); + }); + +const storageMemoryCorw1TestCode = ` + let r0 = atomicLoad(&test_locations.value[x_0]); + atomicStore(&test_locations.value[x_0], 1u); + workgroupBarrier(); + atomicStore(&results.value[id_0].r0, r0); +`; + +const workgroupStorageMemoryCorw1TestCode = ` + let r0 = atomicLoad(&test_locations.value[x_0]); + atomicStore(&test_locations.value[y_0], 1u); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); +`; + +const workgroupMemoryCorw1TestCode = ` + let r0 = atomicLoad(&wg_test_locations[x_0]); + atomicStore(&wg_test_locations[y_0], 1u); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); +`; + +g.test('corw1') + .desc( + `One thread first reads from a memory location x and then writes 1 to x. If the read observes the subsequent + write, there has been a coherence violation. + ` + ) + .paramsSimple([ + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.InterWorkgroup, + _testCode: storageMemoryCorw1TestCode, + }, + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.IntraWorkgroup, + _testCode: workgroupStorageMemoryCorw1TestCode, + }, + { + memType: MemoryType.AtomicWorkgroupClass, + testType: TestType.IntraWorkgroup, + _testCode: workgroupMemoryCorw1TestCode, + }, + ]) + .fn(async t => { + const resultCode = ` + if (r0 == 0u) { + atomicAdd(&test_results.seq, 1u); + } else if (r0 == 1u) { + atomicAdd(&test_results.weak, 1u); + } + `; + const testShader = buildTestShader(t.params._testCode, t.params.memType, t.params.testType); + const resultShader = buildResultShader(resultCode, t.params.testType, ResultType.TwoBehavior); + const params = { + ...memoryModelTestParams, + numBehaviors: 2, + }; + const memModelTester = new MemoryModelTester(t, params, testShader, resultShader); + await memModelTester.run(60, 1); + }); + +const storageMemoryCorw2TestCode = ` + let r0 = atomicLoad(&test_locations.value[x_0]); + atomicStore(&test_locations.value[y_0], 1u); + atomicStore(&test_locations.value[x_1], 2u); + atomicStore(&results.value[id_0].r0, r0); +`; + +const workgroupStorageMemoryCorw2TestCode = ` + let r0 = atomicLoad(&test_locations.value[x_0]); + atomicStore(&test_locations.value[y_0], 1u); + atomicStore(&test_locations.value[x_1], 2u); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); +`; + +const storageMemoryCorw2RMWTestCode = ` + let r0 = atomicLoad(&test_locations.value[x_0]); + atomicStore(&test_locations.value[y_0], 1u); + atomicExchange(&test_locations.value[x_1], 2u); + atomicStore(&results.value[id_0].r0, r0); +`; + +const workgroupStorageMemoryCorw2RMWTestCode = ` + let r0 = atomicLoad(&test_locations.value[x_0]); + atomicStore(&test_locations.value[y_0], 1u); + atomicExchange(&test_locations.value[x_1], 2u); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); +`; + +const workgroupMemoryCorw2TestCode = ` + let r0 = atomicLoad(&wg_test_locations[x_0]); + atomicStore(&wg_test_locations[y_0], 1u); + atomicStore(&wg_test_locations[x_1], 2u); + workgroupBarrier(); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); + atomicStore(&test_locations.value[shuffled_workgroup * workgroupXSize * stress_params.mem_stride * 2u + x_1], atomicLoad(&wg_test_locations[x_1])); +`; + +const workgroupMemoryCorw2RMWTestCode = ` + let r0 = atomicLoad(&wg_test_locations[x_0]); + atomicStore(&wg_test_locations[y_0], 1u); + atomicExchange(&wg_test_locations[x_1], 2u); + workgroupBarrier(); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); + atomicStore(&test_locations.value[shuffled_workgroup * workgroupXSize * stress_params.mem_stride * 2u + x_1], atomicLoad(&wg_test_locations[x_1])); +`; + +g.test('corw2') + .desc( + `The first thread reads from some memory location x, and then writes 1 to x. The second thread + writes 2 to x. If the first thread reads the value 2, but the value in memory after the test + completes is 1, then the instructions on the first thread have been re-ordered, leading to a + coherence violation. + ` + ) + .paramsSimple([ + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.InterWorkgroup, + _testCode: storageMemoryCorw2TestCode, + }, + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.InterWorkgroup, + _testCode: storageMemoryCorw2RMWTestCode, + extraFlags: 'rmw_variant', + }, + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.IntraWorkgroup, + _testCode: workgroupStorageMemoryCorw2TestCode, + }, + { + memType: MemoryType.AtomicStorageClass, + testType: TestType.IntraWorkgroup, + _testCode: workgroupStorageMemoryCorw2RMWTestCode, + extraFlags: 'rmw_variant', + }, + { + memType: MemoryType.AtomicWorkgroupClass, + testType: TestType.IntraWorkgroup, + _testCode: workgroupMemoryCorw2TestCode, + }, + { + memType: MemoryType.AtomicWorkgroupClass, + testType: TestType.IntraWorkgroup, + _testCode: workgroupMemoryCorw2RMWTestCode, + extraFlags: 'rmw_variant', + }, + ]) + .fn(async t => { + const resultCode = ` + if ((r0 == 0u && mem_x_0 == 2u)) { + atomicAdd(&test_results.seq0, 1u); + } else if ((r0 == 2u && mem_x_0 == 1u)) { + atomicAdd(&test_results.seq1, 1u); + } else if ((r0 == 0u && mem_x_0 == 1u)) { + atomicAdd(&test_results.interleaved, 1u); + } else if ((r0 == 2u && mem_x_0 == 2u)) { + atomicAdd(&test_results.weak, 1u); + } + `; + const testShader = buildTestShader(t.params._testCode, t.params.memType, t.params.testType); + const resultShader = buildResultShader(resultCode, t.params.testType, ResultType.FourBehavior); const memModelTester = new MemoryModelTester( t, memoryModelTestParams, testShader, resultShader ); - await memModelTester.run(20, 3); + await memModelTester.run(60, 3); }); diff --git a/src/webgpu/shader/execution/memory_model/memory_model_setup.ts b/src/webgpu/shader/execution/memory_model/memory_model_setup.ts index f1f1c8915c7f..82f0f9d70d05 100644 --- a/src/webgpu/shader/execution/memory_model/memory_model_setup.ts +++ b/src/webgpu/shader/execution/memory_model/memory_model_setup.ts @@ -50,14 +50,16 @@ export type MemoryModelTestParams = { memStride: number; /** For tests that access one memory location, but use dynamic addresses to avoid compiler optimization, aliased memory should be set to true. */ aliasedMemory: boolean; - /** The number of memory locations accessed by this test. */ - numMemLocations: number; - /** The number of read outputs per test that need to be analyzed in the result aggregation shader. */ - numReadOutputs: number; /** The number of possible behaviors that a test can have. */ numBehaviors: number; }; +/** The number of memory locations accessed by a test. Currently, only tests with up to 2 memory locations are supported. */ +const numMemLocations = 2; + +/** The number of read outputs per test that need to be analyzed in the result aggregation shader. Currently, only tests with up to 2 read outputs are supported. */ +const numReadOutputs = 2; + /** Represents a device buffer and a utility buffer for resetting memory and copying parameters. */ type BufferWithSource = { /** Buffer used by shader code. */ @@ -135,7 +137,7 @@ export class MemoryModelTester { // set up buffers const testingThreads = this.params.workgroupSize * this.params.testingWorkgroups; const testLocationsSize = - testingThreads * this.params.numMemLocations * this.params.memStride * bytesPerWord; + testingThreads * numMemLocations * this.params.memStride * bytesPerWord; const testLocationsBuffer: BufferWithSource = { deviceBuf: this.test.device.createBuffer({ size: testLocationsSize, @@ -148,7 +150,7 @@ export class MemoryModelTester { size: testLocationsSize, }; - const readResultsSize = testingThreads * this.params.numReadOutputs * bytesPerWord; + const readResultsSize = testingThreads * numReadOutputs * bytesPerWord; const readResultsBuffer: BufferWithSource = { deviceBuf: this.test.device.createBuffer({ size: readResultsSize, @@ -318,7 +320,9 @@ export class MemoryModelTester { /** * Run the test for the specified number of iterations. Checks the testResults buffer on the weakIndex; if - * this value is not 0 then the test has failed. + * this value is not 0 then the test has failed. The number of iterations is chosen per test so that the + * full set of tests meets some time budget while still being reasonably effective at uncovering issues. + * Currently, we aim for each test to complete in under one second. */ async run(iterations: number, weakIndex: number): Promise { for (let i = 0; i < iterations; i++) { @@ -822,6 +826,26 @@ const intraWorkgroupTestShaderCode = [ `, ].join('\n'); +/** + * Tests that operate on storage memory and communicate with invocations in the same workgroup must offset their locations + * relative to global memory. + */ +const storageIntraWorkgroupTestShaderCode = ` + let total_ids = workgroupXSize; + let id_0 = local_invocation_id[0]; + let id_1 = permute_id(local_invocation_id[0], stress_params.permute_first, workgroupXSize); + let x_0 = (shuffled_workgroup * workgroupXSize + id_0) * stress_params.mem_stride * 2u; + let y_0 = (shuffled_workgroup * workgroupXSize + permute_id(id_0, stress_params.permute_second, total_ids)) * stress_params.mem_stride * 2u + stress_params.location_offset; + let x_1 = (shuffled_workgroup * workgroupXSize + id_1) * stress_params.mem_stride * 2u; + let y_1 = (shuffled_workgroup * workgroupXSize + permute_id(id_1, stress_params.permute_second, total_ids)) * stress_params.mem_stride * 2u + stress_params.location_offset; + if (stress_params.pre_stress == 1u) { + do_stress(stress_params.pre_stress_iterations, stress_params.pre_stress_pattern, shuffled_workgroup); + } + if (stress_params.do_barrier == 1u) { + spin(workgroupXSize); + } +`; + /** All test shaders may perform stress with non-testing threads. */ const testShaderCommonFooter = ` } else if (stress_params.mem_stress == 1u) { @@ -959,12 +983,15 @@ export function buildTestShader( testType: TestType ): string { let memoryTypeCode; + let isStorageAS = false; switch (memoryType) { case MemoryType.AtomicStorageClass: memoryTypeCode = storageMemoryAtomicTestShaderCode; + isStorageAS = true; break; case MemoryType.NonAtomicStorageClass: memoryTypeCode = storageMemoryNonAtomicTestShaderCode; + isStorageAS = true; break; case MemoryType.AtomicWorkgroupClass: memoryTypeCode = workgroupMemoryAtomicTestShaderCode; @@ -978,7 +1005,11 @@ export function buildTestShader( testTypeCode = interWorkgroupTestShaderCode; break; case TestType.IntraWorkgroup: - testTypeCode = intraWorkgroupTestShaderCode; + if (isStorageAS) { + testTypeCode = storageIntraWorkgroupTestShaderCode; + } else { + testTypeCode = intraWorkgroupTestShaderCode; + } } return [memoryTypeCode, testTypeCode, testCode, testShaderCommonFooter].join('\n'); } diff --git a/src/webgpu/shader/execution/memory_model/weak.spec.ts b/src/webgpu/shader/execution/memory_model/weak.spec.ts index 3b7e237eaee7..68f86a7d0000 100644 --- a/src/webgpu/shader/execution/memory_model/weak.spec.ts +++ b/src/webgpu/shader/execution/memory_model/weak.spec.ts @@ -21,28 +21,26 @@ export const g = makeTestGroup(GPUTest); // A reasonable parameter set, determined heuristically. const memoryModelTestParams: MemoryModelTestParams = { workgroupSize: 256, - testingWorkgroups: 512, - maxWorkgroups: 1024, - shufflePct: 100, - barrierPct: 100, - memStressPct: 100, + testingWorkgroups: 739, + maxWorkgroups: 885, + shufflePct: 0, + barrierPct: 0, + memStressPct: 0, memStressIterations: 1024, memStressStoreFirstPct: 50, memStressStoreSecondPct: 50, preStressPct: 100, - preStressIterations: 1024, - preStressStoreFirstPct: 50, - preStressStoreSecondPct: 50, - scratchMemorySize: 2048, - stressLineSize: 64, - stressTargetLines: 2, - stressStrategyBalancePct: 50, + preStressIterations: 33, + preStressStoreFirstPct: 0, + preStressStoreSecondPct: 100, + scratchMemorySize: 1408, + stressLineSize: 4, + stressTargetLines: 11, + stressStrategyBalancePct: 0, permuteFirst: 109, permuteSecond: 419, - memStride: 4, + memStride: 2, aliasedMemory: false, - numMemLocations: 2, - numReadOutputs: 2, numBehaviors: 4, }; @@ -68,7 +66,7 @@ const storageMemoryMessagePassingTestCode = ` atomicStore(&results.value[shuffled_workgroup * u32(workgroupXSize) + id_1].r1, r1); `; -g.test('message_passing_workgroup_memory') +g.test('message_passing') .desc( `Checks whether two reads on one thread can observe two writes in another thread in a way that is inconsistent with sequential consistency. In the message passing litmus test, one @@ -109,5 +107,323 @@ g.test('message_passing_workgroup_memory') testShader, messagePassingResultShader ); - await memModelTester.run(20, 3); + await memModelTester.run(40, 3); + }); + +const workgroupMemoryStoreTestCode = ` + atomicStore(&wg_test_locations[x_0], 2u); + workgroupBarrier(); + atomicStore(&wg_test_locations[y_0], 1u); + let r0 = atomicLoad(&wg_test_locations[y_1]); + workgroupBarrier(); + atomicStore(&wg_test_locations[x_1], 1u); + workgroupBarrier(); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r0, r0); + atomicStore(&test_locations.value[shuffled_workgroup * workgroupXSize * stress_params.mem_stride * 2u + x_1], atomicLoad(&wg_test_locations[x_1])); +`; + +const storageMemoryStoreTestCode = ` + atomicStore(&test_locations.value[x_0], 2u); + storageBarrier(); + atomicStore(&test_locations.value[y_0], 1u); + let r0 = atomicLoad(&test_locations.value[y_1]); + storageBarrier(); + atomicStore(&test_locations.value[x_1], 1u); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r0, r0); +`; + +g.test('store') + .desc( + `In the store litmus test, one thread writes 2 to some memory location x and then 1 to some memory location + y. A second thread reads the value of y and then writes 1 to x. If the read on the second thread returns 1, + but the value of x in memory after the test ends is 2, then there has been a re-ordering which is not allowed + when using WebGPU's barriers. + ` + ) + .paramsSimple([ + { memType: MemoryType.AtomicWorkgroupClass, _testCode: workgroupMemoryStoreTestCode }, + { memType: MemoryType.AtomicStorageClass, _testCode: storageMemoryStoreTestCode }, + ]) + .fn(async t => { + const testShader = buildTestShader( + t.params._testCode, + t.params.memType, + TestType.IntraWorkgroup + ); + const messagePassingResultShader = buildResultShader( + ` + if ((r0 == 1u && mem_x_0 == 1u)) { + atomicAdd(&test_results.seq0, 1u); + } else if ((r0 == 0u && mem_x_0 == 2u)) { + atomicAdd(&test_results.seq1, 1u); + } else if ((r0 == 0u && mem_x_0 == 1u)) { + atomicAdd(&test_results.interleaved, 1u); + } else if ((r0 == 1u && mem_x_0 == 2u)) { + atomicAdd(&test_results.weak, 1u); + } + `, + TestType.IntraWorkgroup, + ResultType.FourBehavior + ); + const memModelTester = new MemoryModelTester( + t, + memoryModelTestParams, + testShader, + messagePassingResultShader + ); + await memModelTester.run(40, 3); + }); + +const workgroupMemoryLoadBufferTestCode = ` + let r0 = atomicLoad(&wg_test_locations[y_0]); + workgroupBarrier(); + atomicStore(&wg_test_locations[x_0], 1u); + let r1 = atomicLoad(&wg_test_locations[x_1]); + workgroupBarrier(); + atomicStore(&wg_test_locations[y_1], 1u); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r1, r1); +`; + +const storageMemoryLoadBufferTestCode = ` + let r0 = atomicLoad(&test_locations.value[y_0]); + storageBarrier(); + atomicStore(&test_locations.value[x_0], 1u); + let r1 = atomicLoad(&test_locations.value[x_1]); + storageBarrier(); + atomicStore(&test_locations.value[y_1], 1u); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r1, r1); +`; + +g.test('load_buffer') + .desc( + `In the load buffer litmus test, one thread reads from memory location y and then writes 1 to memory location x. + A second thread reads from x and then writes 1 to y. If both threads read the value 0, then the loads have been + buffered or re-ordered, which is not allowed when used in conjunction with WebGPU's barriers. + ` + ) + .paramsSimple([ + { memType: MemoryType.AtomicWorkgroupClass, _testCode: workgroupMemoryLoadBufferTestCode }, + { memType: MemoryType.AtomicStorageClass, _testCode: storageMemoryLoadBufferTestCode }, + ]) + .fn(async t => { + const testShader = buildTestShader( + t.params._testCode, + t.params.memType, + TestType.IntraWorkgroup + ); + const messagePassingResultShader = buildResultShader( + ` + if ((r0 == 1u && r1 == 0u)) { + atomicAdd(&test_results.seq0, 1u); + } else if ((r0 == 0u && r1 == 1u)) { + atomicAdd(&test_results.seq1, 1u); + } else if ((r0 == 0u && r1 == 0u)) { + atomicAdd(&test_results.interleaved, 1u); + } else if ((r0 == 1u && r1 == 1u)) { + atomicAdd(&test_results.weak, 1u); + } + `, + TestType.IntraWorkgroup, + ResultType.FourBehavior + ); + const memModelTester = new MemoryModelTester( + t, + memoryModelTestParams, + testShader, + messagePassingResultShader + ); + await memModelTester.run(40, 3); + }); + +const workgroupMemoryReadTestCode = ` + atomicStore(&wg_test_locations[x_0], 1u); + workgroupBarrier(); + atomicExchange(&wg_test_locations[y_0], 1u); + atomicExchange(&wg_test_locations[y_1], 2u); + workgroupBarrier(); + let r0 = atomicLoad(&wg_test_locations[x_1]); + workgroupBarrier(); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r0, r0); + atomicStore(&test_locations.value[shuffled_workgroup * workgroupXSize * stress_params.mem_stride * 2u + y_1], atomicLoad(&wg_test_locations[y_1])); +`; + +const storageMemoryReadTestCode = ` + atomicStore(&test_locations.value[x_0], 1u); + storageBarrier(); + atomicExchange(&test_locations.value[y_0], 1u); + atomicExchange(&test_locations.value[y_1], 2u); + storageBarrier(); + let r0 = atomicLoad(&test_locations.value[x_1]); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r0, r0); +`; + +g.test('read') + .desc( + `In the read litmus test, one thread writes 1 to memory location x and then 1 to memory location y. A second thread + first writes 2 to y and then reads from x. If the value read by the second thread is 0 but the value in memory of y + after the test completes is 2, then there has been some re-ordering of instructions disallowed when using WebGPU's + barrier. Additionally, both writes to y are RMWs, so that the barrier forces the correct acquire/release memory ordering + synchronization. + ` + ) + .paramsSimple([ + { memType: MemoryType.AtomicWorkgroupClass, _testCode: workgroupMemoryReadTestCode }, + { memType: MemoryType.AtomicStorageClass, _testCode: storageMemoryReadTestCode }, + ]) + .fn(async t => { + const testShader = buildTestShader( + t.params._testCode, + t.params.memType, + TestType.IntraWorkgroup + ); + const messagePassingResultShader = buildResultShader( + ` + if ((r0 == 1u && mem_y_0 == 2u)) { + atomicAdd(&test_results.seq0, 1u); + } else if ((r0 == 0u && mem_y_0 == 1u)) { + atomicAdd(&test_results.seq1, 1u); + } else if ((r0 == 1u && mem_y_0 == 1u)) { + atomicAdd(&test_results.interleaved, 1u); + } else if ((r0 == 0u && mem_y_0 == 2u)) { + atomicAdd(&test_results.weak, 1u); + } + `, + TestType.IntraWorkgroup, + ResultType.FourBehavior + ); + const memModelTester = new MemoryModelTester( + t, + memoryModelTestParams, + testShader, + messagePassingResultShader + ); + await memModelTester.run(40, 3); + }); + +const workgroupMemoryStoreBufferTestCode = ` + atomicStore(&wg_test_locations[x_0], 1u); + workgroupBarrier(); + let r0 = atomicAdd(&wg_test_locations[y_0], 0u); + atomicExchange(&wg_test_locations[y_1], 1u); + workgroupBarrier(); + let r1 = atomicLoad(&wg_test_locations[x_1]); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r1, r1); +`; + +const storageMemoryStoreBufferTestCode = ` + atomicStore(&test_locations.value[x_0], 1u); + storageBarrier(); + let r0 = atomicAdd(&test_locations.value[y_0], 0u); + atomicExchange(&test_locations.value[y_1], 1u); + storageBarrier(); + let r1 = atomicLoad(&test_locations.value[x_1]); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_0].r0, r0); + atomicStore(&results.value[shuffled_workgroup * workgroupXSize + id_1].r1, r1); +`; + +g.test('store_buffer') + .desc( + `In the store buffer litmus test, one thread writes 1 to memory location x and then reads from memory location + y. A second thread writes 1 to y and then reads from x. If both reads return 0, then stores have been buffered + or some other re-ordering has occurred that is disallowed by WebGPU's barriers. Additionally, both the read + and store to y are RMWs to achieve the necessary synchronization across threads. + ` + ) + .paramsSimple([ + { memType: MemoryType.AtomicWorkgroupClass, _testCode: workgroupMemoryStoreBufferTestCode }, + { memType: MemoryType.AtomicStorageClass, _testCode: storageMemoryStoreBufferTestCode }, + ]) + .fn(async t => { + const testShader = buildTestShader( + t.params._testCode, + t.params.memType, + TestType.IntraWorkgroup + ); + const messagePassingResultShader = buildResultShader( + ` + if ((r0 == 1u && r1 == 0u)) { + atomicAdd(&test_results.seq0, 1u); + } else if ((r0 == 0u && r1 == 1u)) { + atomicAdd(&test_results.seq1, 1u); + } else if ((r0 == 1u && r1 == 1u)) { + atomicAdd(&test_results.interleaved, 1u); + } else if ((r0 == 0u && r1 == 0u)) { + atomicAdd(&test_results.weak, 1u); + } + `, + TestType.IntraWorkgroup, + ResultType.FourBehavior + ); + const memModelTester = new MemoryModelTester( + t, + memoryModelTestParams, + testShader, + messagePassingResultShader + ); + await memModelTester.run(40, 3); + }); + +const workgroupMemory2P2WTestCode = ` + atomicStore(&wg_test_locations[x_0], 2u); + workgroupBarrier(); + atomicExchange(&wg_test_locations[y_0], 1u); + atomicExchange(&wg_test_locations[y_1], 2u); + workgroupBarrier(); + atomicStore(&wg_test_locations[x_1], 1u); + workgroupBarrier(); + atomicStore(&test_locations.value[shuffled_workgroup * workgroupXSize * stress_params.mem_stride * 2u + x_1], atomicLoad(&wg_test_locations[x_1])); + atomicStore(&test_locations.value[shuffled_workgroup * workgroupXSize * stress_params.mem_stride * 2u + y_1], atomicLoad(&wg_test_locations[y_1])); +`; + +const storageMemory2P2WTestCode = ` + atomicStore(&test_locations.value[x_0], 2u); + storageBarrier(); + atomicExchange(&test_locations.value[y_0], 1u); + atomicExchange(&test_locations.value[y_1], 2u); + storageBarrier(); + atomicStore(&test_locations.value[x_1], 1u); +`; + +g.test('2_plus_2_write') + .desc( + `In the 2+2 write litmus test, one thread stores 2 to memory location x and then 1 to memory location y. + A second thread stores 2 to y and then 1 to x. If at the end of the test both memory locations are set to 2, + then some disallowed re-ordering has occurred. Both writes to y are RMWs to achieve the required synchronization. + ` + ) + .paramsSimple([ + { memType: MemoryType.AtomicWorkgroupClass, _testCode: workgroupMemory2P2WTestCode }, + { memType: MemoryType.AtomicStorageClass, _testCode: storageMemory2P2WTestCode }, + ]) + .fn(async t => { + const testShader = buildTestShader( + t.params._testCode, + t.params.memType, + TestType.IntraWorkgroup + ); + const messagePassingResultShader = buildResultShader( + ` + if ((mem_x_0 == 1u && mem_y_0 == 2u)) { + atomicAdd(&test_results.seq0, 1u); + } else if ((mem_x_0 == 2u && mem_y_0 == 1u)) { + atomicAdd(&test_results.seq1, 1u); + } else if ((mem_x_0 == 1u && mem_y_0 == 1u)) { + atomicAdd(&test_results.interleaved, 1u); + } else if ((mem_x_0 == 2u && mem_y_0 == 2u)) { + atomicAdd(&test_results.weak, 1u); + } + `, + TestType.IntraWorkgroup, + ResultType.FourBehavior + ); + const memModelTester = new MemoryModelTester( + t, + memoryModelTestParams, + testShader, + messagePassingResultShader + ); + await memModelTester.run(40, 3); });