Skip to content

Commit

Permalink
Enable WebGPU tests with descriptors (#294)
Browse files Browse the repository at this point in the history
Add GPUTest.asyncReinitDeviceWithDescriptor. Calling this will
reinitialize `this.device` with a device with the provided descriptor
(ensuring that `this.device` has not actually been used already).
  • Loading branch information
kainino0x authored Sep 25, 2020
1 parent ecf2a0f commit fe0cbfa
Show file tree
Hide file tree
Showing 3 changed files with 266 additions and 77 deletions.
248 changes: 185 additions & 63 deletions src/common/framework/gpu/device_pool.ts
Original file line number Diff line number Diff line change
@@ -1,56 +1,61 @@
import { assert, raceWithRejectOnTimeout, unreachable, assertReject } from '../util/util.js';
import { SkipTestCase } from '../fixture.js';
import {
assert,
raceWithRejectOnTimeout,
unreachable,
assertReject,
objectEquals,
} from '../util/util.js';

import { getGPU } from './implementation.js';

interface DeviceHolder {
acquired: boolean; // whether the device is currently in use by a test
device: GPUDevice;
lostReason?: string; // initially undefined; becomes set when the device is lost
export interface DeviceProvider {
acquire(): GPUDevice;
}

class TestFailedButDeviceReusable extends Error {}
export class TestOOMedShouldAttemptGC extends Error {}

const kPopErrorScopeTimeoutMS = 5000;

export class DevicePool {
failed: boolean = false; // if device init failed once, never try again
holder?: DeviceHolder = undefined; // undefined if "uninitialized" (not yet initialized, or lost)
/** Device with no descriptor. */
private defaultHolder: DeviceHolder | 'uninitialized' | 'failed' = 'uninitialized';
/** Devices with descriptors. */
private nonDefaultHolders = new DescriptorToDeviceMap();

async acquire(): Promise<GPUDevice> {
assert(!this.failed, 'WebGPU device previously failed to initialize; not retrying');

if (this.holder === undefined) {
/** Request a device from the pool. */
async reserve(desc?: GPUDeviceDescriptor): Promise<DeviceProvider> {
// Always attempt to initialize default device, to see if it succeeds.
if (this.defaultHolder === 'uninitialized') {
try {
this.holder = await DevicePool.makeHolder();
this.defaultHolder = await DeviceHolder.create();
} catch (ex) {
this.failed = true;
throw ex;
this.defaultHolder = 'failed';
}
}
assert(!this.holder.acquired, 'Device was in use on DevicePool.acquire');
this.holder.acquired = true;
assert(this.defaultHolder !== 'failed', 'WebGPU device failed to initialize; not retrying');

this.beginErrorScopes();
return this.holder.device;
let holder;
if (desc === undefined) {
holder = this.defaultHolder;
} else {
holder = await this.nonDefaultHolders.getOrInsert(desc, () => DeviceHolder.create(desc));
}

assert(holder.state === 'free', 'Device was in use on DevicePool.acquire');
holder.state = 'reserved';
return holder;
}

// When a test is done using a device, it's released back into the pool.
// This waits for error scopes, checks their results, and checks for various error conditions.
async release(device: GPUDevice): Promise<void> {
const holder = this.holder;
assert(holder !== undefined, 'trying to release a device while pool is uninitialized');
assert(holder.acquired, 'trying to release a device while already released');
assert(device === holder.device, 'Released device was the wrong device');
async release(holder: DeviceProvider): Promise<void> {
assert(this.defaultHolder instanceof DeviceHolder);
assert(holder instanceof DeviceHolder);

assert(holder.state !== 'free', 'trying to release a device while already released');

try {
// Time out if popErrorScope never completes. This could happen due to a browser bug - e.g.,
// as of this writing, on Chrome GPU process crash, popErrorScope just hangs.
await raceWithRejectOnTimeout(
this.endErrorScopes(),
kPopErrorScopeTimeoutMS,
'finalization popErrorScope timed out'
);
await holder.ensureRelease();

// (Hopefully if the device was lost, it has been reported by the time endErrorScopes()
// has finished (or timed out). If not, it could cause a finite number of extra test
Expand All @@ -64,66 +69,183 @@ export class DevicePool {
// Any error that isn't explicitly TestFailedButDeviceReusable forces a new device to be
// created for the next test.
if (!(ex instanceof TestFailedButDeviceReusable)) {
this.holder = undefined;
if (holder === this.defaultHolder) {
this.defaultHolder = 'uninitialized';
} else {
this.nonDefaultHolders.deleteByDevice(holder.device);
}
// TODO: device.destroy()
}
throw ex;
} finally {
// TODO: device.destroy()

// Mark the holder as free. (This only has an effect if the pool still has the holder.)
// This could be done at the top but is done here to guard against async-races during release.
holder.acquired = false;
holder.state = 'free';
}
}
}

interface DescriptorToDevice {
key: GPUDeviceDescriptor;
value: DeviceHolder;
}

/**
* Map from GPUDeviceDescriptor to DeviceHolder.
*/
class DescriptorToDeviceMap {
private unsupported: GPUDeviceDescriptor[] = [];
// TODO: Do something like stringifyPublicParamsUniquely if searching this array gets too slow.
private items: Set<DescriptorToDevice> = new Set();

/** Deletes an item from the map by GPUDevice value. */
deleteByDevice(device: GPUDevice): void {
for (const item of this.items) {
if (item.value.device === device) {
this.items.delete(item);
return;
}
}
}

/**
* Gets a DeviceHolder from the map if it exists; otherwise, calls create() to create one,
* inserts it, and returns it.
*
* Throws SkipTestCase if devices with this descriptor are unsupported.
*/
async getOrInsert(
key: GPUDeviceDescriptor,
create: () => Promise<DeviceHolder>
): Promise<DeviceHolder> {
// Never retry unsupported configurations.
for (const desc of this.unsupported) {
if (objectEquals(key, desc)) {
throw new SkipTestCase(`GPUDeviceDescriptor previously failed: ${JSON.stringify(key)}`);
}
}

// Search for an existing device with the same descriptor.
for (const item of this.items) {
if (objectEquals(key, item.key)) {
// Move the item to the end of the set (most recently used).
this.items.delete(item);
this.items.add(item);
return item.value;
}
}

// No existing item was found; add a new one.
let value;
try {
value = await create();
} catch (ex) {
this.unsupported.push(key);
throw new SkipTestCase(
`GPUDeviceDescriptor not supported: ${JSON.stringify(key)}\n${ex?.message ?? ''}`
);
}
this.insertAndCleanUp({ key, value });
return value;
}

/** Insert an entry, then remove the least-recently-used items if there are too many. */
private insertAndCleanUp(kv: DescriptorToDevice) {
this.items.add(kv);

const kMaxEntries = 5;
if (this.items.size > kMaxEntries) {
// Delete the first (least recently used) item in the set.
for (const item of this.items) {
this.items.delete(item);
return;
}
}
}
}

/**
* DeviceHolder has three states:
* - 'free': Free to be used for a new test.
* - 'reserved': Reserved by a running test, but has not had error scopes created yet.
* - 'acquired': Reserved by a running test, and has had error scopes created.
*/
type DeviceHolderState = 'free' | 'reserved' | 'acquired';

/**
* Holds a GPUDevice and tracks its state (free/reserved/acquired) and handles device loss.
*/
class DeviceHolder implements DeviceProvider {
readonly device: GPUDevice;
state: DeviceHolderState = 'free';
lostReason?: string; // initially undefined; becomes set when the device is lost

// Gets a device and creates a DeviceHolder.
// If the device is lost, DeviceHolder.lostReason gets set.
private static async makeHolder(): Promise<DeviceHolder> {
static async create(descriptor?: GPUDeviceDescriptor): Promise<DeviceHolder> {
const gpu = getGPU();
const adapter = await gpu.requestAdapter();
assert(adapter !== null);
const device = await adapter.requestDevice();
assert(device !== null);

const holder: DeviceHolder = {
acquired: false,
device,
lostReason: undefined,
};
holder.device.lost.then(ev => {
holder.lostReason = ev.message;
assert(adapter !== null, 'requestAdapter returned null');
const device = await adapter.requestDevice(descriptor);
assert(device !== null, 'requestDevice returned null');

return new DeviceHolder(device);
}

private constructor(device: GPUDevice) {
this.device = device;
this.device.lost.then(ev => {
this.lostReason = ev.message;
});
return holder;
}

// Create error scopes that wrap the entire test.
private beginErrorScopes(): void {
assert(this.holder !== undefined);
this.holder.device.pushErrorScope('out-of-memory');
this.holder.device.pushErrorScope('validation');
acquire(): GPUDevice {
assert(this.state === 'reserved');
this.state = 'acquired';
this.device.pushErrorScope('out-of-memory');
this.device.pushErrorScope('validation');
return this.device;
}

async ensureRelease(): Promise<void> {
const kPopErrorScopeTimeoutMS = 5000;

assert(this.state !== 'free');
try {
if (this.state === 'acquired') {
// Time out if popErrorScope never completes. This could happen due to a browser bug - e.g.,
// as of this writing, on Chrome GPU process crash, popErrorScope just hangs.
await raceWithRejectOnTimeout(
this.release(),
kPopErrorScopeTimeoutMS,
'finalization popErrorScope timed out'
);
}
} finally {
this.state = 'free';
}
}

// End the whole-test error scopes. Check that there are no extra error scopes, and that no
// otherwise-uncaptured errors occurred during the test.
private async endErrorScopes(): Promise<void> {
assert(this.holder !== undefined);
private async release(): Promise<void> {
// End the whole-test error scopes. Check that there are no extra error scopes, and that no
// otherwise-uncaptured errors occurred during the test.
let gpuValidationError: GPUValidationError | GPUOutOfMemoryError | null;
let gpuOutOfMemoryError: GPUValidationError | GPUOutOfMemoryError | null;

try {
// May reject if the device was lost.
gpuValidationError = await this.holder.device.popErrorScope();
gpuOutOfMemoryError = await this.holder.device.popErrorScope();
gpuValidationError = await this.device.popErrorScope();
gpuOutOfMemoryError = await this.device.popErrorScope();
} catch (ex) {
assert(
this.holder.lostReason !== undefined,
"popErrorScope failed, but device.lost hasn't fired (yet)"
this.lostReason !== undefined,
'popErrorScope failed; should only happen if device has been lost'
);
throw ex;
}

await assertReject(
this.holder.device.popErrorScope(),
this.device.popErrorScope(),
'There was an extra error scope on the stack after a test'
);

Expand Down
41 changes: 41 additions & 0 deletions src/webgpu/examples.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Examples of writing CTS tests with various features.
Start here when looking for examples of basic framework usage.
`;

import { pbool } from '../common/framework/params_builder.js';
import { makeTestGroup } from '../common/framework/test_group.js';

import { GPUTest } from './gpu_test.js';
Expand Down Expand Up @@ -100,3 +101,43 @@ g.test('gpu,buffers').fn(async t => {
// Like shouldReject, it must be awaited.
t.expectContents(src, data);
});

// One of the following two tests should be skipped on most platforms.

g.test('gpu,with_texture_compression,bc')
.params(pbool('textureCompressionBC'))
.fn(async t => {
const { textureCompressionBC } = t.params;

if (textureCompressionBC) {
await t.asyncReinitDeviceWithDescriptor({ extensions: ['texture-compression-bc'] });
}

const shouldError = !textureCompressionBC;
t.expectGPUError(
'validation',
() => {
t.device.createTexture({
format: 'bc1-rgba-unorm',
size: [4, 4, 1],
usage: GPUTextureUsage.SAMPLED,
});
},
shouldError
);
});

g.test('gpu,with_texture_compression,etc')
.params(pbool('textureCompressionETC'))
.fn(async t => {
const { textureCompressionETC } = t.params;

if (textureCompressionETC) {
await t.asyncReinitDeviceWithDescriptor({
extensions: ['texture-compression-etc' as GPUExtensionName],
});
}

t.device;
// TODO: Should actually test createTexture with an ETC format here.
});
Loading

0 comments on commit fe0cbfa

Please sign in to comment.