diff --git a/package-lock.json b/package-lock.json index 4837e5c70485..19429df6e283 100644 --- a/package-lock.json +++ b/package-lock.json @@ -24,7 +24,7 @@ "@types/w3c-image-capture": "^1.0.10", "@typescript-eslint/eslint-plugin": "^6.9.1", "@typescript-eslint/parser": "^6.9.1", - "@webgpu/types": "^0.1.43", + "@webgpu/types": "^0.1.46", "ansi-colors": "4.1.3", "babel-plugin-add-header-comment": "^1.0.3", "babel-plugin-const-enum": "^1.2.0", @@ -1539,9 +1539,9 @@ "dev": true }, "node_modules/@webgpu/types": { - "version": "0.1.43", - "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.43.tgz", - "integrity": "sha512-HoP+d+m+Kuq8CsE63BZ3+BYBKAemrqbHUNrCalxrUju5XW+q/094Q3oeIa+2pTraEbO8ckJmGpibzyGT4OV4YQ==", + "version": "0.1.46", + "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.46.tgz", + "integrity": "sha512-2iogO6Zh0pTbKLGZuuGWEmJpF/fTABGs7G9wXxpn7s24XSJchSUIiMqIJHURi5zsMZRRTuXrV/3GLOkmOFjq5w==", "dev": true }, "node_modules/abbrev": { @@ -10076,9 +10076,9 @@ "dev": true }, "@webgpu/types": { - "version": "0.1.43", - "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.43.tgz", - "integrity": "sha512-HoP+d+m+Kuq8CsE63BZ3+BYBKAemrqbHUNrCalxrUju5XW+q/094Q3oeIa+2pTraEbO8ckJmGpibzyGT4OV4YQ==", + "version": "0.1.46", + "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.46.tgz", + "integrity": "sha512-2iogO6Zh0pTbKLGZuuGWEmJpF/fTABGs7G9wXxpn7s24XSJchSUIiMqIJHURi5zsMZRRTuXrV/3GLOkmOFjq5w==", "dev": true }, "abbrev": { diff --git a/package.json b/package.json index 9d311579c314..c82fe0f2cba5 100644 --- a/package.json +++ b/package.json @@ -50,7 +50,7 @@ "@types/w3c-image-capture": "^1.0.10", "@typescript-eslint/eslint-plugin": "^6.9.1", "@typescript-eslint/parser": "^6.9.1", - "@webgpu/types": "^0.1.43", + "@webgpu/types": "^0.1.46", "ansi-colors": "4.1.3", "babel-plugin-add-header-comment": "^1.0.3", "babel-plugin-const-enum": "^1.2.0", diff --git a/src/common/internal/logging/test_case_recorder.ts b/src/common/internal/logging/test_case_recorder.ts index 78f625269e3d..eb03f4ea96f7 100644 --- a/src/common/internal/logging/test_case_recorder.ts +++ b/src/common/internal/logging/test_case_recorder.ts @@ -44,7 +44,7 @@ export class TestCaseRecorder { private startTime = -1; private logs: LogMessageWithStack[] = []; private logLinesAtCurrentSeverity = 0; - private debugging = false; + public debugging = false; constructor(result: LiveTestCaseResult, debugging: boolean) { this.result = result; diff --git a/src/common/internal/test_group.ts b/src/common/internal/test_group.ts index e1d0cde12d5c..aaaec9fe3f3a 100644 --- a/src/common/internal/test_group.ts +++ b/src/common/internal/test_group.ts @@ -620,7 +620,7 @@ class RunCaseSpecific implements RunCase { const subcasePrefix = 'subcase: ' + stringifyPublicParams(subParams); const subRec = new Proxy(rec, { get: (target, k: keyof TestCaseRecorder) => { - const prop = TestCaseRecorder.prototype[k]; + const prop = rec[k] ?? TestCaseRecorder.prototype[k]; if (typeof prop === 'function') { testHeartbeatCallback(); return function (...args: Parameters) { diff --git a/src/common/runtime/standalone.ts b/src/common/runtime/standalone.ts index 932c5668b587..0305031cc790 100644 --- a/src/common/runtime/standalone.ts +++ b/src/common/runtime/standalone.ts @@ -369,6 +369,9 @@ function makeSubtreeChildrenHTML( const runMySubtree = async () => { const results: SubtreeResult[] = []; for (const { runSubtree } of childFns) { + if (stopRequested) { + break; + } results.push(await runSubtree()); } return mergeSubtreeResults(...results); diff --git a/src/common/tools/dev_server.ts b/src/common/tools/dev_server.ts index 1d1313e4f51c..8d78855974d6 100644 --- a/src/common/tools/dev_server.ts +++ b/src/common/tools/dev_server.ts @@ -106,10 +106,6 @@ const app = express(); // Send Chrome Origin Trial tokens app.use((_req, res, next) => { - res.header('Origin-Trial', [ - // Token for http://localhost:8080 - 'AvyDIV+RJoYs8fn3W6kIrBhWw0te0klraoz04mw/nPb8VTus3w5HCdy+vXqsSzomIH745CT6B5j1naHgWqt/tw8AAABJeyJvcmlnaW4iOiJodHRwOi8vbG9jYWxob3N0OjgwODAiLCJmZWF0dXJlIjoiV2ViR1BVIiwiZXhwaXJ5IjoxNjYzNzE4Mzk5fQ==', - ]); next(); }); diff --git a/src/common/util/navigator_gpu.ts b/src/common/util/navigator_gpu.ts index 6f3a423db39f..4e58797097ed 100644 --- a/src/common/util/navigator_gpu.ts +++ b/src/common/util/navigator_gpu.ts @@ -68,12 +68,11 @@ export function getGPU(recorder: TestCaseRecorder | null): GPU { ): Promise { const promise = oldFn.call(this, { ...defaultRequestAdapterOptions, ...options }); if (recorder) { - void promise.then(async adapter => { + void promise.then(adapter => { if (adapter) { - // MAINTENANCE_TODO: Remove requestAdapterInfo when info is implemented. - const info = adapter.info || (await adapter.requestAdapterInfo()); - const infoString = `Adapter: ${info.vendor} / ${info.architecture} / ${info.device}`; - recorder.debug(new ErrorWithExtra(infoString, () => ({ adapterInfo: info }))); + const adapterInfo = adapter.info; + const infoString = `Adapter: ${adapterInfo.vendor} / ${adapterInfo.architecture} / ${adapterInfo.device}`; + recorder.debug(new ErrorWithExtra(infoString, () => ({ adapterInfo }))); } }); } diff --git a/src/resources/cache/hashes.json b/src/resources/cache/hashes.json index e0459422560f..bb92869fdf3e 100644 --- a/src/resources/cache/hashes.json +++ b/src/resources/cache/hashes.json @@ -1,112 +1,112 @@ { - "webgpu/shader/execution/binary/af_addition.bin": "338b5b67", - "webgpu/shader/execution/binary/af_logical.bin": "3b2aceb8", - "webgpu/shader/execution/binary/af_division.bin": "a77dc4c0", - "webgpu/shader/execution/binary/af_matrix_addition.bin": "136a7fbb", - "webgpu/shader/execution/binary/af_matrix_subtraction.bin": "90f2c731", - "webgpu/shader/execution/binary/af_multiplication.bin": "35ba40b9", - "webgpu/shader/execution/binary/af_remainder.bin": "41582f85", - "webgpu/shader/execution/binary/af_subtraction.bin": "a41420b2", - "webgpu/shader/execution/binary/f16_addition.bin": "ef10ca66", - "webgpu/shader/execution/binary/f16_logical.bin": "4bf24ca5", - "webgpu/shader/execution/binary/f16_division.bin": "f826b6ba", - "webgpu/shader/execution/binary/f16_matrix_addition.bin": "a910ddb0", - "webgpu/shader/execution/binary/f16_matrix_matrix_multiplication.bin": "9458671c", - "webgpu/shader/execution/binary/f16_matrix_scalar_multiplication.bin": "36be05d3", - "webgpu/shader/execution/binary/f16_matrix_subtraction.bin": "8aa6a88a", - "webgpu/shader/execution/binary/f16_matrix_vector_multiplication.bin": "38282a11", - "webgpu/shader/execution/binary/f16_multiplication.bin": "62f91819", - "webgpu/shader/execution/binary/f16_remainder.bin": "f829bb65", - "webgpu/shader/execution/binary/f16_subtraction.bin": "82d4e231", - "webgpu/shader/execution/binary/f32_addition.bin": "9b0a0c50", - "webgpu/shader/execution/binary/f32_logical.bin": "b75af25a", - "webgpu/shader/execution/binary/f32_division.bin": "f6d7832f", - "webgpu/shader/execution/binary/f32_matrix_addition.bin": "3317c75b", - "webgpu/shader/execution/binary/f32_matrix_matrix_multiplication.bin": "c6f990c8", - "webgpu/shader/execution/binary/f32_matrix_scalar_multiplication.bin": "b091a702", - "webgpu/shader/execution/binary/f32_matrix_subtraction.bin": "2d12a16b", - "webgpu/shader/execution/binary/f32_matrix_vector_multiplication.bin": "e1217524", - "webgpu/shader/execution/binary/f32_multiplication.bin": "19774fb3", - "webgpu/shader/execution/binary/f32_remainder.bin": "fd94bb9a", - "webgpu/shader/execution/binary/f32_subtraction.bin": "dba7cd7a", - "webgpu/shader/execution/binary/i32_arithmetic.bin": "e3b317e1", - "webgpu/shader/execution/binary/i32_comparison.bin": "63fa9be8", - "webgpu/shader/execution/binary/u32_arithmetic.bin": "e8b4008c", - "webgpu/shader/execution/binary/u32_comparison.bin": "d472fd61", - "webgpu/shader/execution/abs.bin": "631d932d", - "webgpu/shader/execution/acos.bin": "afcafcb1", - "webgpu/shader/execution/acosh.bin": "4b30eb95", - "webgpu/shader/execution/asin.bin": "c850c13d", - "webgpu/shader/execution/asinh.bin": "66a6acc0", - "webgpu/shader/execution/atan.bin": "2aabbb53", - "webgpu/shader/execution/atan2.bin": "82dd926a", - "webgpu/shader/execution/atanh.bin": "b98c937c", - "webgpu/shader/execution/bitcast.bin": "5daaee1b", - "webgpu/shader/execution/ceil.bin": "d0c32cf4", - "webgpu/shader/execution/clamp.bin": "4d1fc26a", - "webgpu/shader/execution/cos.bin": "dc837ae2", - "webgpu/shader/execution/cosh.bin": "d9e90580", - "webgpu/shader/execution/cross.bin": "ce7979f", - "webgpu/shader/execution/degrees.bin": "1436a196", - "webgpu/shader/execution/determinant.bin": "f36f1fa1", - "webgpu/shader/execution/distance.bin": "5103f8bd", - "webgpu/shader/execution/dot.bin": "4514172c", - "webgpu/shader/execution/exp.bin": "f41150bd", - "webgpu/shader/execution/exp2.bin": "19c494e", - "webgpu/shader/execution/faceForward.bin": "27b6e4a7", - "webgpu/shader/execution/floor.bin": "5bb5098b", - "webgpu/shader/execution/fma.bin": "daace9a4", - "webgpu/shader/execution/fract.bin": "be5f0334", - "webgpu/shader/execution/frexp.bin": "c9efaf7c", - "webgpu/shader/execution/inverseSqrt.bin": "8a50b907", - "webgpu/shader/execution/ldexp.bin": "cb4cea21", - "webgpu/shader/execution/length.bin": "a1b9fbeb", - "webgpu/shader/execution/log.bin": "9f2eb7c3", - "webgpu/shader/execution/log2.bin": "9ee7d861", - "webgpu/shader/execution/max.bin": "11e4608e", - "webgpu/shader/execution/min.bin": "7a084c44", - "webgpu/shader/execution/mix.bin": "7b892a4f", - "webgpu/shader/execution/modf.bin": "b3bf26d7", - "webgpu/shader/execution/normalize.bin": "18eba01d", - "webgpu/shader/execution/pack2x16float.bin": "82df446e", - "webgpu/shader/execution/pow.bin": "d3a05344", - "webgpu/shader/execution/quantizeToF16.bin": "7793770e", - "webgpu/shader/execution/radians.bin": "582c1f6b", - "webgpu/shader/execution/reflect.bin": "9161d6e5", - "webgpu/shader/execution/refract.bin": "817b59aa", - "webgpu/shader/execution/round.bin": "cb881aa2", - "webgpu/shader/execution/saturate.bin": "3716605e", - "webgpu/shader/execution/sign.bin": "549ac92f", - "webgpu/shader/execution/sin.bin": "5ec5bcb7", - "webgpu/shader/execution/sinh.bin": "62f6b736", - "webgpu/shader/execution/smoothstep.bin": "aa97768", - "webgpu/shader/execution/sqrt.bin": "d0a134ce", - "webgpu/shader/execution/step.bin": "b8035bb9", - "webgpu/shader/execution/tan.bin": "b34366cd", - "webgpu/shader/execution/tanh.bin": "8f5edddc", - "webgpu/shader/execution/transpose.bin": "1aa2de65", - "webgpu/shader/execution/trunc.bin": "cf43e3f7", - "webgpu/shader/execution/unpack2x16float.bin": "57ea7c02", - "webgpu/shader/execution/unpack2x16snorm.bin": "17fd3f86", - "webgpu/shader/execution/unpack2x16unorm.bin": "fc68bc4b", - "webgpu/shader/execution/unpack4x8snorm.bin": "fef504c1", - "webgpu/shader/execution/unpack4x8unorm.bin": "e8d8de93", - "webgpu/shader/execution/unary/af_arithmetic.bin": "14c0612a", - "webgpu/shader/execution/unary/af_assignment.bin": "3ad4afc", - "webgpu/shader/execution/unary/bool_conversion.bin": "15f7f3fb", - "webgpu/shader/execution/unary/f16_arithmetic.bin": "4a20db6d", - "webgpu/shader/execution/unary/f16_conversion.bin": "31f72f5a", - "webgpu/shader/execution/unary/f32_arithmetic.bin": "f1c311cb", - "webgpu/shader/execution/unary/f32_conversion.bin": "7539cdb3", - "webgpu/shader/execution/unary/i32_arithmetic.bin": "de945eec", - "webgpu/shader/execution/unary/i32_conversion.bin": "1728a03e", - "webgpu/shader/execution/unary/u32_conversion.bin": "9e6ca0ce", - "webgpu/shader/execution/unary/ai_assignment.bin": "1fd685a2", - "webgpu/shader/execution/binary/ai_arithmetic.bin": "90e651f4", - "webgpu/shader/execution/unary/ai_arithmetic.bin": "ba31d178", - "webgpu/shader/execution/binary/af_matrix_matrix_multiplication.bin": "bc8b52ef", - "webgpu/shader/execution/binary/af_matrix_scalar_multiplication.bin": "54edf6a2", - "webgpu/shader/execution/binary/af_matrix_vector_multiplication.bin": "43b036b1", - "webgpu/shader/execution/derivatives.bin": "65c15fc3", - "webgpu/shader/execution/fwidth.bin": "cc91c875" + "webgpu/shader/execution/binary/af_addition.bin": "82c9422d", + "webgpu/shader/execution/binary/af_logical.bin": "fe21109a", + "webgpu/shader/execution/binary/af_division.bin": "606ed00d", + "webgpu/shader/execution/binary/af_matrix_addition.bin": "731d24fb", + "webgpu/shader/execution/binary/af_matrix_subtraction.bin": "ada2bd52", + "webgpu/shader/execution/binary/af_multiplication.bin": "cc2892a3", + "webgpu/shader/execution/binary/af_remainder.bin": "6ac1fa", + "webgpu/shader/execution/binary/af_subtraction.bin": "f6e12b22", + "webgpu/shader/execution/binary/f16_addition.bin": "4b1f652f", + "webgpu/shader/execution/binary/f16_logical.bin": "8c8f6ced", + "webgpu/shader/execution/binary/f16_division.bin": "79519150", + "webgpu/shader/execution/binary/f16_matrix_addition.bin": "1aca77a1", + "webgpu/shader/execution/binary/f16_matrix_matrix_multiplication.bin": "5416bc07", + "webgpu/shader/execution/binary/f16_matrix_scalar_multiplication.bin": "3c3b8d39", + "webgpu/shader/execution/binary/f16_matrix_subtraction.bin": "8f4be1ef", + "webgpu/shader/execution/binary/f16_matrix_vector_multiplication.bin": "a3a2c8fd", + "webgpu/shader/execution/binary/f16_multiplication.bin": "d6602d76", + "webgpu/shader/execution/binary/f16_remainder.bin": "693a0ace", + "webgpu/shader/execution/binary/f16_subtraction.bin": "81e29c16", + "webgpu/shader/execution/binary/f32_addition.bin": "64946a10", + "webgpu/shader/execution/binary/f32_logical.bin": "69c18e28", + "webgpu/shader/execution/binary/f32_division.bin": "3d6326d2", + "webgpu/shader/execution/binary/f32_matrix_addition.bin": "1b9a4a03", + "webgpu/shader/execution/binary/f32_matrix_matrix_multiplication.bin": "7339e7a4", + "webgpu/shader/execution/binary/f32_matrix_scalar_multiplication.bin": "428a8238", + "webgpu/shader/execution/binary/f32_matrix_subtraction.bin": "9651223a", + "webgpu/shader/execution/binary/f32_matrix_vector_multiplication.bin": "3f9688e9", + "webgpu/shader/execution/binary/f32_multiplication.bin": "9291e0f2", + "webgpu/shader/execution/binary/f32_remainder.bin": "719f1d40", + "webgpu/shader/execution/binary/f32_subtraction.bin": "59b6582e", + "webgpu/shader/execution/binary/i32_arithmetic.bin": "c81cf318", + "webgpu/shader/execution/binary/i32_comparison.bin": "21e05bd7", + "webgpu/shader/execution/binary/u32_arithmetic.bin": "eb905f8f", + "webgpu/shader/execution/binary/u32_comparison.bin": "665ed925", + "webgpu/shader/execution/abs.bin": "ec493974", + "webgpu/shader/execution/acos.bin": "3806122f", + "webgpu/shader/execution/acosh.bin": "bef89cc8", + "webgpu/shader/execution/asin.bin": "2e9a0a5b", + "webgpu/shader/execution/asinh.bin": "e5e056b8", + "webgpu/shader/execution/atan.bin": "d89481bf", + "webgpu/shader/execution/atan2.bin": "5320957a", + "webgpu/shader/execution/atanh.bin": "46226444", + "webgpu/shader/execution/bitcast.bin": "3c797668", + "webgpu/shader/execution/ceil.bin": "3c12c9a8", + "webgpu/shader/execution/clamp.bin": "dee74684", + "webgpu/shader/execution/cos.bin": "425c0bf3", + "webgpu/shader/execution/cosh.bin": "1664b602", + "webgpu/shader/execution/cross.bin": "1a4286f4", + "webgpu/shader/execution/degrees.bin": "50d340d8", + "webgpu/shader/execution/determinant.bin": "bb023e5", + "webgpu/shader/execution/distance.bin": "f881e9c4", + "webgpu/shader/execution/dot.bin": "570fea29", + "webgpu/shader/execution/exp.bin": "76843e6f", + "webgpu/shader/execution/exp2.bin": "75dfc3ba", + "webgpu/shader/execution/faceForward.bin": "6f5f6a3b", + "webgpu/shader/execution/floor.bin": "388bd73", + "webgpu/shader/execution/fma.bin": "7fe4d24b", + "webgpu/shader/execution/fract.bin": "f614e69f", + "webgpu/shader/execution/frexp.bin": "3c21ac64", + "webgpu/shader/execution/inverseSqrt.bin": "c41a3b91", + "webgpu/shader/execution/ldexp.bin": "82715024", + "webgpu/shader/execution/length.bin": "8b8e6a96", + "webgpu/shader/execution/log.bin": "febf86a5", + "webgpu/shader/execution/log2.bin": "8961d4f8", + "webgpu/shader/execution/max.bin": "10aee992", + "webgpu/shader/execution/min.bin": "aebcc063", + "webgpu/shader/execution/mix.bin": "968355d6", + "webgpu/shader/execution/modf.bin": "2ed2e487", + "webgpu/shader/execution/normalize.bin": "6e7113f9", + "webgpu/shader/execution/pack2x16float.bin": "33bb2862", + "webgpu/shader/execution/pow.bin": "83285164", + "webgpu/shader/execution/quantizeToF16.bin": "22eb970e", + "webgpu/shader/execution/radians.bin": "2653234f", + "webgpu/shader/execution/reflect.bin": "a3ee7789", + "webgpu/shader/execution/refract.bin": "77a43445", + "webgpu/shader/execution/round.bin": "421742d7", + "webgpu/shader/execution/saturate.bin": "2382307d", + "webgpu/shader/execution/sign.bin": "af1a4dfa", + "webgpu/shader/execution/sin.bin": "9ebf5419", + "webgpu/shader/execution/sinh.bin": "7ec22790", + "webgpu/shader/execution/smoothstep.bin": "9f12610c", + "webgpu/shader/execution/sqrt.bin": "11c3e405", + "webgpu/shader/execution/step.bin": "9363eaf0", + "webgpu/shader/execution/tan.bin": "adfe8d47", + "webgpu/shader/execution/tanh.bin": "76f2df9", + "webgpu/shader/execution/transpose.bin": "1c664829", + "webgpu/shader/execution/trunc.bin": "599ddfaf", + "webgpu/shader/execution/unpack2x16float.bin": "4995ab2c", + "webgpu/shader/execution/unpack2x16snorm.bin": "4d7e0eb5", + "webgpu/shader/execution/unpack2x16unorm.bin": "5bd397da", + "webgpu/shader/execution/unpack4x8snorm.bin": "9a4d38cb", + "webgpu/shader/execution/unpack4x8unorm.bin": "66308de3", + "webgpu/shader/execution/unary/af_arithmetic.bin": "c6efec3", + "webgpu/shader/execution/unary/af_assignment.bin": "92d5b3da", + "webgpu/shader/execution/unary/bool_conversion.bin": "1dd65f27", + "webgpu/shader/execution/unary/f16_arithmetic.bin": "d3e48584", + "webgpu/shader/execution/unary/f16_conversion.bin": "caf76d89", + "webgpu/shader/execution/unary/f32_arithmetic.bin": "20e7caca", + "webgpu/shader/execution/unary/f32_conversion.bin": "a1d49d40", + "webgpu/shader/execution/unary/i32_arithmetic.bin": "e83d060d", + "webgpu/shader/execution/unary/i32_conversion.bin": "25421e80", + "webgpu/shader/execution/unary/u32_conversion.bin": "bd8f17ae", + "webgpu/shader/execution/unary/ai_assignment.bin": "7be15439", + "webgpu/shader/execution/binary/ai_arithmetic.bin": "ccff3bc2", + "webgpu/shader/execution/unary/ai_arithmetic.bin": "840cb623", + "webgpu/shader/execution/binary/af_matrix_matrix_multiplication.bin": "50719535", + "webgpu/shader/execution/binary/af_matrix_scalar_multiplication.bin": "758077e0", + "webgpu/shader/execution/binary/af_matrix_vector_multiplication.bin": "32ebd17e", + "webgpu/shader/execution/derivatives.bin": "381b6836", + "webgpu/shader/execution/fwidth.bin": "4fda6558" } \ No newline at end of file diff --git a/src/resources/cache/webgpu/shader/execution/abs.bin b/src/resources/cache/webgpu/shader/execution/abs.bin index 4cba9b72dff4..373fef8f3b32 100644 Binary files a/src/resources/cache/webgpu/shader/execution/abs.bin and b/src/resources/cache/webgpu/shader/execution/abs.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/acos.bin b/src/resources/cache/webgpu/shader/execution/acos.bin index 2ecaaa389a4e..5e311531fef4 100644 Binary files a/src/resources/cache/webgpu/shader/execution/acos.bin and b/src/resources/cache/webgpu/shader/execution/acos.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/acosh.bin b/src/resources/cache/webgpu/shader/execution/acosh.bin index d48659f3c325..82a3857ebdc6 100644 Binary files a/src/resources/cache/webgpu/shader/execution/acosh.bin and b/src/resources/cache/webgpu/shader/execution/acosh.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/asin.bin b/src/resources/cache/webgpu/shader/execution/asin.bin index b199953eaf4b..388de445c06b 100644 Binary files a/src/resources/cache/webgpu/shader/execution/asin.bin and b/src/resources/cache/webgpu/shader/execution/asin.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/asinh.bin b/src/resources/cache/webgpu/shader/execution/asinh.bin index b370c53b0179..120654f685c2 100644 Binary files a/src/resources/cache/webgpu/shader/execution/asinh.bin and b/src/resources/cache/webgpu/shader/execution/asinh.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/atan.bin b/src/resources/cache/webgpu/shader/execution/atan.bin index 6ab0ba106a9e..e81af87e15e0 100644 Binary files a/src/resources/cache/webgpu/shader/execution/atan.bin and b/src/resources/cache/webgpu/shader/execution/atan.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/atanh.bin b/src/resources/cache/webgpu/shader/execution/atanh.bin index e6a190b35df5..a7fee794094d 100644 Binary files a/src/resources/cache/webgpu/shader/execution/atanh.bin and b/src/resources/cache/webgpu/shader/execution/atanh.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/bitcast.bin b/src/resources/cache/webgpu/shader/execution/bitcast.bin index ead299d5e78f..e743a092553e 100644 Binary files a/src/resources/cache/webgpu/shader/execution/bitcast.bin and b/src/resources/cache/webgpu/shader/execution/bitcast.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/ceil.bin b/src/resources/cache/webgpu/shader/execution/ceil.bin index 9b93ed416f64..02cf23324cdf 100644 Binary files a/src/resources/cache/webgpu/shader/execution/ceil.bin and b/src/resources/cache/webgpu/shader/execution/ceil.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/cos.bin b/src/resources/cache/webgpu/shader/execution/cos.bin index 4e34eff3f1b1..a5d8573c6257 100644 Binary files a/src/resources/cache/webgpu/shader/execution/cos.bin and b/src/resources/cache/webgpu/shader/execution/cos.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/cosh.bin b/src/resources/cache/webgpu/shader/execution/cosh.bin index 5b30d2786c5e..25e8750cc7fc 100644 Binary files a/src/resources/cache/webgpu/shader/execution/cosh.bin and b/src/resources/cache/webgpu/shader/execution/cosh.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/degrees.bin b/src/resources/cache/webgpu/shader/execution/degrees.bin index 662558d78aca..eb514cb48b4d 100644 Binary files a/src/resources/cache/webgpu/shader/execution/degrees.bin and b/src/resources/cache/webgpu/shader/execution/degrees.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/distance.bin b/src/resources/cache/webgpu/shader/execution/distance.bin index 23a4756a69eb..06d0d9a8fc2a 100644 Binary files a/src/resources/cache/webgpu/shader/execution/distance.bin and b/src/resources/cache/webgpu/shader/execution/distance.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/floor.bin b/src/resources/cache/webgpu/shader/execution/floor.bin index b5341907f8ef..a199d0db9ff0 100644 Binary files a/src/resources/cache/webgpu/shader/execution/floor.bin and b/src/resources/cache/webgpu/shader/execution/floor.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/fract.bin b/src/resources/cache/webgpu/shader/execution/fract.bin index 7f09e8f60b23..bb80e873625f 100644 Binary files a/src/resources/cache/webgpu/shader/execution/fract.bin and b/src/resources/cache/webgpu/shader/execution/fract.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/frexp.bin b/src/resources/cache/webgpu/shader/execution/frexp.bin index 6811dfa29507..8f87d16a9c95 100644 Binary files a/src/resources/cache/webgpu/shader/execution/frexp.bin and b/src/resources/cache/webgpu/shader/execution/frexp.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/length.bin b/src/resources/cache/webgpu/shader/execution/length.bin index 3644d9b683ac..db42153edff7 100644 Binary files a/src/resources/cache/webgpu/shader/execution/length.bin and b/src/resources/cache/webgpu/shader/execution/length.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/log.bin b/src/resources/cache/webgpu/shader/execution/log.bin index ba591faad8a0..a5e62f8e02d8 100644 Binary files a/src/resources/cache/webgpu/shader/execution/log.bin and b/src/resources/cache/webgpu/shader/execution/log.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/log2.bin b/src/resources/cache/webgpu/shader/execution/log2.bin index 00641ce119cf..f19d77f41097 100644 Binary files a/src/resources/cache/webgpu/shader/execution/log2.bin and b/src/resources/cache/webgpu/shader/execution/log2.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/modf.bin b/src/resources/cache/webgpu/shader/execution/modf.bin index 363cc161fd72..74259a23a6f8 100644 Binary files a/src/resources/cache/webgpu/shader/execution/modf.bin and b/src/resources/cache/webgpu/shader/execution/modf.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/pack2x16float.bin b/src/resources/cache/webgpu/shader/execution/pack2x16float.bin index e95227d36e50..a7b99a0a6cfd 100644 Binary files a/src/resources/cache/webgpu/shader/execution/pack2x16float.bin and b/src/resources/cache/webgpu/shader/execution/pack2x16float.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/pow.bin b/src/resources/cache/webgpu/shader/execution/pow.bin index 4f5faf3293fa..f66ec5ca2fbe 100644 Binary files a/src/resources/cache/webgpu/shader/execution/pow.bin and b/src/resources/cache/webgpu/shader/execution/pow.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin b/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin index 9e4308d5cd30..d6d75befc06b 100644 Binary files a/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin and b/src/resources/cache/webgpu/shader/execution/quantizeToF16.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/radians.bin b/src/resources/cache/webgpu/shader/execution/radians.bin index f5285d108778..731e6be24ed5 100644 Binary files a/src/resources/cache/webgpu/shader/execution/radians.bin and b/src/resources/cache/webgpu/shader/execution/radians.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/round.bin b/src/resources/cache/webgpu/shader/execution/round.bin index c3b30b68f0a1..5ccab9e661c4 100644 Binary files a/src/resources/cache/webgpu/shader/execution/round.bin and b/src/resources/cache/webgpu/shader/execution/round.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/saturate.bin b/src/resources/cache/webgpu/shader/execution/saturate.bin index 2e1eb821a9e7..e7402f25af73 100644 Binary files a/src/resources/cache/webgpu/shader/execution/saturate.bin and b/src/resources/cache/webgpu/shader/execution/saturate.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/sign.bin b/src/resources/cache/webgpu/shader/execution/sign.bin index 033f2e8158f6..576019c008ee 100644 Binary files a/src/resources/cache/webgpu/shader/execution/sign.bin and b/src/resources/cache/webgpu/shader/execution/sign.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/sin.bin b/src/resources/cache/webgpu/shader/execution/sin.bin index a2ca632008ff..bdbbfe2bd539 100644 Binary files a/src/resources/cache/webgpu/shader/execution/sin.bin and b/src/resources/cache/webgpu/shader/execution/sin.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/sinh.bin b/src/resources/cache/webgpu/shader/execution/sinh.bin index 1176cd472bf2..b4b051a226ae 100644 Binary files a/src/resources/cache/webgpu/shader/execution/sinh.bin and b/src/resources/cache/webgpu/shader/execution/sinh.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/sqrt.bin b/src/resources/cache/webgpu/shader/execution/sqrt.bin index 6dd8088c0898..64a7db70d004 100644 Binary files a/src/resources/cache/webgpu/shader/execution/sqrt.bin and b/src/resources/cache/webgpu/shader/execution/sqrt.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/tan.bin b/src/resources/cache/webgpu/shader/execution/tan.bin index 572bee4df2a5..5af3e740d213 100644 Binary files a/src/resources/cache/webgpu/shader/execution/tan.bin and b/src/resources/cache/webgpu/shader/execution/tan.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/tanh.bin b/src/resources/cache/webgpu/shader/execution/tanh.bin index a13028b165f0..9687ff00235c 100644 Binary files a/src/resources/cache/webgpu/shader/execution/tanh.bin and b/src/resources/cache/webgpu/shader/execution/tanh.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/trunc.bin b/src/resources/cache/webgpu/shader/execution/trunc.bin index ba81e2ada427..e18bb52ed981 100644 Binary files a/src/resources/cache/webgpu/shader/execution/trunc.bin and b/src/resources/cache/webgpu/shader/execution/trunc.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin index 98a90ea45b9a..f28c275092f1 100644 Binary files a/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/bool_conversion.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin index 14299da76670..c47b3d0afcb0 100644 Binary files a/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/f16_conversion.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin b/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin index ebc60029fa60..6e93bec14f76 100644 Binary files a/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin and b/src/resources/cache/webgpu/shader/execution/unary/f32_arithmetic.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin index 66b2bc73f889..55e1f5ed945f 100644 Binary files a/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/f32_conversion.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin index 04841df60785..49969e9221ad 100644 Binary files a/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/i32_conversion.bin differ diff --git a/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin b/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin index 277ffc4d76b7..8dfc4e268561 100644 Binary files a/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin and b/src/resources/cache/webgpu/shader/execution/unary/u32_conversion.bin differ diff --git a/src/webgpu/api/operation/shader_module/compilation_info.spec.ts b/src/webgpu/api/operation/shader_module/compilation_info.spec.ts index 3382dabc3720..c0613087a96a 100644 --- a/src/webgpu/api/operation/shader_module/compilation_info.spec.ts +++ b/src/webgpu/api/operation/shader_module/compilation_info.spec.ts @@ -3,7 +3,6 @@ ShaderModule CompilationInfo tests. `; import { makeTestGroup } from '../../../../common/framework/test_group.js'; -import { keysOf } from '../../../../common/util/data_tables.js'; import { assert } from '../../../../common/util/util.js'; import { GPUTest } from '../../../gpu_test.js'; @@ -79,66 +78,24 @@ const kInvalidShaderSources = [ const kAllShaderSources = [...kValidShaderSources, ...kInvalidShaderSources]; -// This is the source the sourcemap refers to. -const kOriginalSource = new Array(20) - .fill(0) - .map((_, i) => `original line ${i}`) - .join('\n'); - -const kSourceMaps: { [name: string]: undefined | object } = { - none: undefined, - empty: {}, - // A valid source map. It maps `unknown` on lines 4 and line 5 to - // `wasUnknown` from lines 20, 21 respectively - valid: { - version: 3, - sources: ['myCode'], - sourcesContent: [kOriginalSource], - names: ['myMain', 'wasUnknown'], - mappings: ';kBAYkCA,OACd;SAElB;gBAKOC;gBACAA', - }, - // not a valid sourcemap - invalid: { - version: -123, - notAnything: {}, - }, - // The correct format but this data is for lines 11,12 even - // though the source only has 5 or 6 lines - nonMatching: { - version: 3, - sources: ['myCode'], - sourcesContent: [kOriginalSource], - names: ['myMain'], - mappings: ';;;;;;;;;;kBAYkCA,OACd;SAElB', - }, -}; -const kSourceMapsKeys = keysOf(kSourceMaps); - g.test('getCompilationInfo_returns') .desc( ` Test that getCompilationInfo() can be called on any ShaderModule. - Note: sourcemaps are not used in the WebGPU API. We are only testing that - browser that happen to use them don't fail or crash if the sourcemap is - bad or invalid. - - Test for both valid and invalid shader modules. - Test for shader modules containing only ASCII and those containing unicode characters. - Test that the compilation info for valid shader modules contains no errors. - Test that the compilation info for invalid shader modules contains at least one error.` ) - .params(u => - u.combineWithParams(kAllShaderSources).beginSubcases().combine('sourceMapName', kSourceMapsKeys) - ) + .params(u => u.combineWithParams(kAllShaderSources)) .fn(async t => { - const { _code, valid, sourceMapName } = t.params; + const { _code, valid } = t.params; const shaderModule = t.expectGPUError( 'validation', () => { - const sourceMap = kSourceMaps[sourceMapName]; - return t.device.createShaderModule({ code: _code, ...(sourceMap && { sourceMap }) }); + return t.device.createShaderModule({ code: _code }); }, !valid ); @@ -171,25 +128,15 @@ g.test('line_number_and_position') Test that line numbers reported by compilationInfo either point at an appropriate line and position or at 0:0, indicating an unknown position. - Note: sourcemaps are not used in the WebGPU API. We are only testing that - browser that happen to use them don't fail or crash if the sourcemap is - bad or invalid. - - Test for invalid shader modules containing containing at least one error. - Test for shader modules containing only ASCII and those containing unicode characters.` ) - .params(u => - u - .combineWithParams(kInvalidShaderSources) - .beginSubcases() - .combine('sourceMapName', kSourceMapsKeys) - ) + .params(u => u.combineWithParams(kInvalidShaderSources)) .fn(async t => { - const { _code, _errorLine, _errorLinePos, sourceMapName } = t.params; + const { _code, _errorLine, _errorLinePos } = t.params; const shaderModule = t.expectGPUError('validation', () => { - const sourceMap = kSourceMaps[sourceMapName]; - return t.device.createShaderModule({ code: _code, ...(sourceMap && { sourceMap }) }); + return t.device.createShaderModule({ code: _code }); }); const info = await shaderModule.getCompilationInfo(); @@ -232,24 +179,17 @@ g.test('offset_and_length') .desc( `Test that message offsets and lengths are valid and align with any reported lineNum and linePos. - Note: sourcemaps are not used in the WebGPU API. We are only testing that - browser that happen to use them don't fail or crash if the sourcemap is - bad or invalid. - - Test for valid and invalid shader modules. - Test for shader modules containing only ASCII and those containing unicode characters.` ) - .params(u => - u.combineWithParams(kAllShaderSources).beginSubcases().combine('sourceMapName', kSourceMapsKeys) - ) + .params(u => u.combineWithParams(kAllShaderSources)) .fn(async t => { - const { _code, valid, sourceMapName } = t.params; + const { _code, valid } = t.params; const shaderModule = t.expectGPUError( 'validation', () => { - const sourceMap = kSourceMaps[sourceMapName]; - return t.device.createShaderModule({ code: _code, ...(sourceMap && { sourceMap }) }); + return t.device.createShaderModule({ code: _code }); }, !valid ); diff --git a/src/webgpu/api/validation/capability_checks/features/clip_distances.spec.ts b/src/webgpu/api/validation/capability_checks/features/clip_distances.spec.ts new file mode 100644 index 000000000000..02ffd9e99db6 --- /dev/null +++ b/src/webgpu/api/validation/capability_checks/features/clip_distances.spec.ts @@ -0,0 +1,161 @@ +import { range } from '../../../../../common/util/util.js'; +import { align } from '../../../../util/math.js'; +import { kMaximumLimitBaseParams, makeLimitTestGroup } from '../limits/limit_utils.js'; + +function getPipelineDescriptorWithClipDistances( + device: GPUDevice, + interStageShaderVariables: number, + pointList: boolean, + clipDistances: number, + startLocation: number = 0 +): GPURenderPipelineDescriptor { + const vertexOutputVariables = + interStageShaderVariables - (pointList ? 1 : 0) - align(clipDistances, 4) / 4; + const maxVertexOutputVariables = + device.limits.maxInterStageShaderVariables - (pointList ? 1 : 0) - align(clipDistances, 4) / 4; + + const varyings = ` + ${range( + vertexOutputVariables, + i => `@location(${i + startLocation}) v4_${i + startLocation}: vec4f,` + ).join('\n')} + `; + + const code = ` + // test value : ${interStageShaderVariables} + // maxInterStageShaderVariables : ${device.limits.maxInterStageShaderVariables} + // num variables in vertex shader : ${vertexOutputVariables}${ + pointList ? ' + point-list' : '' + }${ + clipDistances > 0 + ? ` + ${align(clipDistances, 4) / 4} (clip_distances[${clipDistances}])` + : '' + } + // maxInterStageVariables: : ${maxVertexOutputVariables} + // num used inter stage variables : ${vertexOutputVariables} + // vertex output start location : ${startLocation} + + enable clip_distances; + + struct VSOut { + @builtin(position) p: vec4f, + ${varyings} + ${ + clipDistances > 0 + ? `@builtin(clip_distances) clipDistances: array,` + : '' + } + } + struct FSIn { + ${varyings} + } + struct FSOut { + @location(0) color: vec4f, + } + @vertex fn vs() -> VSOut { + var o: VSOut; + o.p = vec4f(0); + return o; + } + @fragment fn fs(i: FSIn) -> FSOut { + var o: FSOut; + o.color = vec4f(0); + return o; + } + `; + const module = device.createShaderModule({ code }); + const pipelineDescriptor: GPURenderPipelineDescriptor = { + layout: 'auto', + primitive: { + topology: pointList ? 'point-list' : 'triangle-list', + }, + vertex: { + module, + }, + fragment: { + module, + targets: [ + { + format: 'rgba8unorm', + }, + ], + }, + }; + return pipelineDescriptor; +} + +const limit = 'maxInterStageShaderVariables'; +export const { g, description } = makeLimitTestGroup(limit); + +g.test('createRenderPipeline,at_over') + .desc(`Test using at and over ${limit} limit with clip_distances in createRenderPipeline(Async)`) + .params( + kMaximumLimitBaseParams + .combine('async', [false, true]) + .combine('pointList', [false, true]) + .combine('clipDistances', [1, 2, 3, 4, 5, 6, 7, 8]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('clip-distances'); + }) + .fn(async t => { + const { limitTest, testValueName, async, pointList, clipDistances } = t.params; + await t.testDeviceWithRequestedMaximumLimits( + limitTest, + testValueName, + async ({ device, testValue, shouldError }) => { + const pipelineDescriptor = getPipelineDescriptorWithClipDistances( + device, + testValue, + pointList, + clipDistances + ); + + await t.testCreateRenderPipeline(pipelineDescriptor, async, shouldError); + }, + undefined, + ['clip-distances'] + ); + }); + +g.test('createRenderPipeline,max_vertex_output_location') + .desc(`Test using clip_distances will limit the maximum value of vertex output location`) + .params(u => + u + .combine('pointList', [false, true]) + .combine('clipDistances', [1, 2, 3, 4, 5, 6, 7, 8]) + .combine('startLocation', [0, 1, 2]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('clip-distances'); + }) + .fn(async t => { + const { pointList, clipDistances, startLocation } = t.params; + + const maxInterStageShaderVariables = t.adapter.limits.maxInterStageShaderVariables; + const deviceInTest = await t.requestDeviceTracked(t.adapter, { + requiredFeatures: ['clip-distances'], + requiredLimits: { + maxInterStageShaderVariables: t.adapter.limits.maxInterStageShaderVariables, + }, + }); + const pipelineDescriptor = getPipelineDescriptorWithClipDistances( + deviceInTest, + maxInterStageShaderVariables, + pointList, + clipDistances, + startLocation + ); + const vertexOutputVariables = + maxInterStageShaderVariables - (pointList ? 1 : 0) - align(clipDistances, 4) / 4; + const maxLocationInTest = startLocation + vertexOutputVariables - 1; + const maxAllowedLocation = maxInterStageShaderVariables - 1 - align(clipDistances, 4) / 4; + const shouldError = maxLocationInTest > maxAllowedLocation; + + deviceInTest.pushErrorScope('validation'); + deviceInTest.createRenderPipeline(pipelineDescriptor); + const error = await deviceInTest.popErrorScope(); + t.expect(!!error === shouldError, `${error?.message || 'no error when one was expected'}`); + + deviceInTest.destroy(); + }); diff --git a/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts b/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts index ea44b11c9148..14f1642cea9f 100644 --- a/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts +++ b/src/webgpu/api/validation/capability_checks/limits/limit_utils.ts @@ -535,11 +535,16 @@ export class LimitTestsImpl extends GPUTestBase { limitTest: MaximumLimitValueTest, testValueName: MaximumTestValue, fn: (inputs: MaximumLimitTestInputs) => void | Promise, - extraLimits?: LimitsRequest + extraLimits?: LimitsRequest, + extraFeatures: GPUFeatureName[] = [] ) { assert(!this._device); - const deviceAndLimits = await this._getDeviceWithRequestedMaximumLimit(limitTest, extraLimits); + const deviceAndLimits = await this._getDeviceWithRequestedMaximumLimit( + limitTest, + extraLimits, + extraFeatures + ); // If we request over the limit requestDevice will throw if (!deviceAndLimits) { return; diff --git a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderComponents.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderComponents.spec.ts deleted file mode 100644 index 1963d9f28c6c..000000000000 --- a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderComponents.spec.ts +++ /dev/null @@ -1,153 +0,0 @@ -import { range } from '../../../../../common/util/util.js'; - -import { kMaximumLimitBaseParams, LimitsRequest, makeLimitTestGroup } from './limit_utils.js'; - -function getPipelineDescriptor( - device: GPUDevice, - testValue: number, - pointList: boolean, - frontFacing: boolean, - sampleIndex: boolean, - sampleMaskIn: boolean, - sampleMaskOut: boolean -): { pipelineDescriptor: GPURenderPipelineDescriptor; code: string } { - const success = testValue <= device.limits.maxInterStageShaderComponents; - - const maxVertexOutputComponents = - device.limits.maxInterStageShaderComponents - (pointList ? 1 : 0); - const maxFragmentInputComponents = - device.limits.maxInterStageShaderComponents - - (frontFacing ? 1 : 0) - - (sampleIndex ? 1 : 0) - - (sampleMaskIn ? 1 : 0); - const maxOutputComponents = Math.min(maxVertexOutputComponents, maxFragmentInputComponents); - const maxInterStageVariables = Math.floor(maxOutputComponents / 4); - const maxUserDefinedVertexComponents = Math.floor(maxVertexOutputComponents / 4) * 4; - const maxUserDefinedFragmentComponents = Math.floor(maxFragmentInputComponents / 4) * 4; - - const numInterStageVariables = success ? maxInterStageVariables : maxInterStageVariables + 1; - const numUserDefinedComponents = numInterStageVariables * 4; - - const varyings = ` - ${range(numInterStageVariables, i => `@location(${i}) v4_${i}: vec4f,`).join('\n')} - `; - - const code = ` - // test value : ${testValue} - // maxInterStageShaderComponents : ${device.limits.maxInterStageShaderComponents} - // num components in vertex shader : ${numUserDefinedComponents}${ - pointList ? ' + point-list' : '' - } - // num components in fragment shader : ${numUserDefinedComponents}${ - frontFacing ? ' + front-facing' : '' - }${sampleIndex ? ' + sample_index' : ''}${sampleMaskIn ? ' + sample_mask' : ''} - // maxUserDefinedVertexShaderOutputComponents : ${maxUserDefinedVertexComponents} - // maxUserDefinedFragmentShaderInputComponents : ${maxUserDefinedFragmentComponents} - // maxInterStageVariables: : ${maxInterStageVariables} - // num used inter stage variables : ${numInterStageVariables} - - struct VSOut { - @builtin(position) p: vec4f, - ${varyings} - } - struct FSIn { - ${frontFacing ? '@builtin(front_facing) frontFacing: bool,' : ''} - ${sampleIndex ? '@builtin(sample_index) sampleIndex: u32,' : ''} - ${sampleMaskIn ? '@builtin(sample_mask) sampleMask: u32,' : ''} - ${varyings} - } - struct FSOut { - @location(0) color: vec4f, - ${sampleMaskOut ? '@builtin(sample_mask) sampleMask: u32,' : ''} - } - @vertex fn vs() -> VSOut { - var o: VSOut; - o.p = vec4f(0); - return o; - } - @fragment fn fs(i: FSIn) -> FSOut { - var o: FSOut; - o.color = vec4f(0); - return o; - } - `; - const module = device.createShaderModule({ code }); - const pipelineDescriptor: GPURenderPipelineDescriptor = { - layout: 'auto', - primitive: { - topology: pointList ? 'point-list' : 'triangle-list', - }, - vertex: { - module, - entryPoint: 'vs', - }, - fragment: { - module, - entryPoint: 'fs', - targets: [ - { - format: 'rgba8unorm', - }, - ], - }, - }; - return { pipelineDescriptor, code }; -} - -const limit = 'maxInterStageShaderComponents'; -export const { g, description } = makeLimitTestGroup(limit); - -g.test('createRenderPipeline,at_over') - .desc(`Test using at and over ${limit} limit in createRenderPipeline(Async)`) - .params( - kMaximumLimitBaseParams - .combine('async', [false, true]) - .combine('pointList', [false, true]) - .combine('frontFacing', [false, true]) - .combine('sampleIndex', [false, true]) - .combine('sampleMaskIn', [false, true]) - .combine('sampleMaskOut', [false, true]) - ) - .beforeAllSubcases(t => { - if (t.isCompatibility) { - t.skipIf( - t.params.sampleMaskIn || t.params.sampleMaskOut, - 'sample_mask not supported in compatibility mode' - ); - t.skipIf(t.params.sampleIndex, 'sample_index not supported in compatibility mode'); - } - }) - .fn(async t => { - const { - limitTest, - testValueName, - async, - pointList, - frontFacing, - sampleIndex, - sampleMaskIn, - sampleMaskOut, - } = t.params; - // Request the largest value of maxInterStageShaderVariables to allow the test using as many - // inter-stage shader components as possible without being limited by - // maxInterStageShaderVariables. - const extraLimits: LimitsRequest = { maxInterStageShaderVariables: 'adapterLimit' }; - await t.testDeviceWithRequestedMaximumLimits( - limitTest, - testValueName, - async ({ device, testValue, shouldError }) => { - const { pipelineDescriptor, code } = getPipelineDescriptor( - device, - testValue, - pointList, - frontFacing, - sampleIndex, - sampleMaskIn, - sampleMaskOut - ); - - await t.testCreateRenderPipeline(pipelineDescriptor, async, shouldError, code); - }, - extraLimits - ); - }); diff --git a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts index e54b7f7df178..5298e8c21587 100644 --- a/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts +++ b/src/webgpu/api/validation/capability_checks/limits/maxInterStageShaderVariables.spec.ts @@ -1,26 +1,86 @@ +import { range } from '../../../../../common/util/util.js'; + import { kMaximumLimitBaseParams, makeLimitTestGroup } from './limit_utils.js'; -function getPipelineDescriptor(device: GPUDevice, testValue: number): GPURenderPipelineDescriptor { +function getPipelineDescriptor( + device: GPUDevice, + testValue: number, + pointList: boolean, + frontFacing: boolean, + sampleIndex: boolean, + sampleMaskIn: boolean, + sampleMaskOut: boolean +): GPURenderPipelineDescriptor { + const vertexOutputVariables = testValue - (pointList ? 1 : 0); + const fragmentInputVariables = testValue - (frontFacing || sampleIndex || sampleMaskIn ? 1 : 0); + const numInterStageVariables = Math.min(vertexOutputVariables, fragmentInputVariables); + + const maxVertexOutputVariables = device.limits.maxInterStageShaderVariables - (pointList ? 1 : 0); + const maxFragmentInputVariables = + device.limits.maxInterStageShaderVariables - + (frontFacing || sampleIndex || sampleMaskIn ? 1 : 0); + const maxInterStageVariables = Math.min(maxVertexOutputVariables, maxFragmentInputVariables); + + const varyings = ` + ${range(numInterStageVariables, i => `@location(${i}) v4_${i}: vec4f,`).join('\n')} + `; + const code = ` + // test value : ${testValue} + // maxInterStageShaderVariables : ${device.limits.maxInterStageShaderVariables} + // num variables in vertex shader : ${vertexOutputVariables}${pointList ? ' + point-list' : ''} + // num variables in fragment shader : ${fragmentInputVariables}${ + frontFacing ? ' + front-facing' : '' + }${sampleIndex ? ' + sample_index' : ''}${sampleMaskIn ? ' + sample_mask' : ''} + // maxInterStageVariables: : ${maxInterStageVariables} + // num used inter stage variables : ${numInterStageVariables} + struct VSOut { @builtin(position) p: vec4f, - @location(${testValue}) v: f32, + ${varyings} + } + struct FSIn { + ${frontFacing ? '@builtin(front_facing) frontFacing: bool,' : ''} + ${sampleIndex ? '@builtin(sample_index) sampleIndex: u32,' : ''} + ${sampleMaskIn ? '@builtin(sample_mask) sampleMask: u32,' : ''} + ${varyings} + } + struct FSOut { + @location(0) color: vec4f, + ${sampleMaskOut ? '@builtin(sample_mask) sampleMask: u32,' : ''} } @vertex fn vs() -> VSOut { var o: VSOut; o.p = vec4f(0); - o.v = 1.0; + return o; + } + @fragment fn fs(i: FSIn) -> FSOut { + var o: FSOut; + o.color = vec4f(0); return o; } `; const module = device.createShaderModule({ code }); - return { + const pipelineDescriptor: GPURenderPipelineDescriptor = { layout: 'auto', + primitive: { + topology: pointList ? 'point-list' : 'triangle-list', + }, vertex: { module, entryPoint: 'vs', }, + fragment: { + module, + entryPoint: 'fs', + targets: [ + { + format: 'rgba8unorm', + }, + ], + }, }; + return pipelineDescriptor; } const limit = 'maxInterStageShaderVariables'; @@ -28,15 +88,48 @@ export const { g, description } = makeLimitTestGroup(limit); g.test('createRenderPipeline,at_over') .desc(`Test using at and over ${limit} limit in createRenderPipeline(Async)`) - .params(kMaximumLimitBaseParams.combine('async', [false, true])) + .params( + kMaximumLimitBaseParams + .combine('async', [false, true]) + .combine('pointList', [false, true]) + .combine('frontFacing', [false, true]) + .combine('sampleIndex', [false, true]) + .combine('sampleMaskIn', [false, true]) + .combine('sampleMaskOut', [false, true]) + ) + .beforeAllSubcases(t => { + if (t.isCompatibility) { + t.skipIf( + t.params.sampleMaskIn || t.params.sampleMaskOut, + 'sample_mask not supported in compatibility mode' + ); + t.skipIf(t.params.sampleIndex, 'sample_index not supported in compatibility mode'); + } + }) .fn(async t => { - const { limitTest, testValueName, async } = t.params; + const { + limitTest, + testValueName, + async, + pointList, + frontFacing, + sampleIndex, + sampleMaskIn, + sampleMaskOut, + } = t.params; await t.testDeviceWithRequestedMaximumLimits( limitTest, testValueName, async ({ device, testValue, shouldError }) => { - const lastIndex = testValue - 1; - const pipelineDescriptor = getPipelineDescriptor(device, lastIndex); + const pipelineDescriptor = getPipelineDescriptor( + device, + testValue, + pointList, + frontFacing, + sampleIndex, + sampleMaskIn, + sampleMaskOut + ); await t.testCreateRenderPipeline(pipelineDescriptor, async, shouldError); } diff --git a/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts index 9e5aaa144bfa..b37cc9230931 100644 --- a/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts +++ b/src/webgpu/api/validation/capability_checks/limits/maxVertexAttributes.spec.ts @@ -19,6 +19,7 @@ function getPipelineDescriptor(device: GPUDevice, lastIndex: number): GPURenderP }, ], }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }; } diff --git a/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts index 0af5724f2a2a..be9c7ffd7f7b 100644 --- a/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts +++ b/src/webgpu/api/validation/capability_checks/limits/maxVertexBufferArrayStride.spec.ts @@ -32,6 +32,7 @@ function getPipelineDescriptor(device: GPUDevice, testValue: number): GPURenderP }, ], }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }; } diff --git a/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts b/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts index 9a4108cb0c08..02701de0d1e9 100644 --- a/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts +++ b/src/webgpu/api/validation/capability_checks/limits/maxVertexBuffers.spec.ts @@ -19,6 +19,7 @@ function getPipelineDescriptor(device: GPUDevice, testValue: number): GPURenderP module, buffers, }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }; } diff --git a/src/webgpu/api/validation/layout_shader_compat.spec.ts b/src/webgpu/api/validation/layout_shader_compat.spec.ts index 2b5e609c55d6..5ee16510c77a 100644 --- a/src/webgpu/api/validation/layout_shader_compat.spec.ts +++ b/src/webgpu/api/validation/layout_shader_compat.spec.ts @@ -253,6 +253,7 @@ g.test('pipeline_layout_shader_exact_match') code: vertexShader, }), }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }); break; } diff --git a/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts b/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts index 1a8dec37464f..a3af6d675ac3 100644 --- a/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts +++ b/src/webgpu/api/validation/render_pipeline/inter_stage.spec.ts @@ -273,39 +273,29 @@ g.test('max_shader_variable_location') t.doCreateRenderPipelineTest(isAsync, location < maxInterStageShaderVariables, descriptor); }); -g.test('max_components_count,output') +g.test('max_variables_count,output') .desc( - `Tests that validation should fail when scalar components of all user-defined outputs > max vertex shader output components.` + `Tests that validation should fail when all user-defined outputs > max vertex shader output + variables.` ) .params(u => u.combine('isAsync', [false, true]).combineWithParams([ - // Number of user-defined output scalar components in test shader = - // Math.floor((device.limits.maxInterStageShaderComponents + numScalarDelta) / 4) * 4. - { numScalarDelta: 0, topology: 'triangle-list', _success: true }, - { numScalarDelta: 1, topology: 'triangle-list', _success: false }, - { numScalarDelta: 0, topology: 'point-list', _success: false }, - { numScalarDelta: -1, topology: 'point-list', _success: false }, - { numScalarDelta: -3, topology: 'point-list', _success: false }, - { numScalarDelta: -4, topology: 'point-list', _success: true }, + // Number of user-defined output variables in test shader = + // device.limits.maxInterStageShaderVariables + numVariablesDelta + { numVariablesDelta: 0, topology: 'triangle-list', _success: true }, + { numVariablesDelta: 1, topology: 'triangle-list', _success: false }, + { numVariablesDelta: 0, topology: 'point-list', _success: false }, + { numVariablesDelta: -1, topology: 'point-list', _success: true }, ] as const) ) .fn(t => { - const { isAsync, numScalarDelta, topology, _success } = t.params; + const { isAsync, numVariablesDelta, topology, _success } = t.params; - const numScalarComponents = t.device.limits.maxInterStageShaderComponents + numScalarDelta; - - const numVec4 = Math.floor(numScalarComponents / 4); - const numTrailingScalars = numScalarComponents % 4; + const numVec4 = t.device.limits.maxInterStageShaderVariables + numVariablesDelta; const outputs = range(numVec4, i => `@location(${i}) vout${i}: vec4`); const inputs = range(numVec4, i => `@location(${i}) fin${i}: vec4`); - if (numTrailingScalars > 0) { - const typeString = numTrailingScalars === 1 ? 'f32' : `vec${numTrailingScalars}`; - outputs.push(`@location(${numVec4}) vout${numVec4}: ${typeString}`); - inputs.push(`@location(${numVec4}) fin${numVec4}: ${typeString}`); - } - const descriptor = t.getDescriptorWithStates( t.getVertexStateWithOutputs(outputs), t.getFragmentStateWithInputs(inputs) @@ -315,42 +305,32 @@ g.test('max_components_count,output') t.doCreateRenderPipelineTest(isAsync, _success, descriptor); }); -g.test('max_components_count,input') +g.test('max_variables_count,input') .desc( - `Tests that validation should fail when scalar components of all user-defined inputs > max vertex shader output components.` + `Tests that validation should fail when all user-defined inputs > max vertex shader output + variables.` ) .params(u => u.combine('isAsync', [false, true]).combineWithParams([ - // Number of user-defined input scalar components in test shader = - // Math.floor((device.limits.maxInterStageShaderComponents + numScalarDelta) / 4) * 4. - { numScalarDelta: 0, useExtraBuiltinInputs: false }, - { numScalarDelta: 1, useExtraBuiltinInputs: false }, - { numScalarDelta: 0, useExtraBuiltinInputs: true }, - { numScalarDelta: -3, useExtraBuiltinInputs: true }, - { numScalarDelta: -4, useExtraBuiltinInputs: true }, + // Number of user-defined output variables in test shader = + // device.limits.maxInterStageShaderVariables + numVariablesDelta + { numVariablesDelta: 0, useExtraBuiltinInputs: false }, + { numVariablesDelta: 1, useExtraBuiltinInputs: false }, + { numVariablesDelta: 0, useExtraBuiltinInputs: true }, + { numVariablesDelta: -1, useExtraBuiltinInputs: true }, ] as const) ) .fn(t => { - const { isAsync, numScalarDelta, useExtraBuiltinInputs } = t.params; + const { isAsync, numVariablesDelta, useExtraBuiltinInputs } = t.params; - const numScalarComponents = - Math.floor((t.device.limits.maxInterStageShaderComponents + numScalarDelta) / 4) * 4; - const numExtraComponents = useExtraBuiltinInputs ? (t.isCompatibility ? 2 : 3) : 0; - const numUsedComponents = numScalarComponents + numExtraComponents; - const success = numUsedComponents <= t.device.limits.maxInterStageShaderComponents; - - const numVec4 = Math.floor(numScalarComponents / 4); - const numTrailingScalars = numScalarComponents % 4; + const numVec4 = t.device.limits.maxInterStageShaderVariables + numVariablesDelta; + const numExtraVariables = useExtraBuiltinInputs ? 1 : 0; + const numUsedVariables = numVec4 + numExtraVariables; + const success = numUsedVariables <= t.device.limits.maxInterStageShaderVariables; const outputs = range(numVec4, i => `@location(${i}) vout${i}: vec4`); const inputs = range(numVec4, i => `@location(${i}) fin${i}: vec4`); - if (numTrailingScalars > 0) { - const typeString = numTrailingScalars === 1 ? 'f32' : `vec${numTrailingScalars}`; - outputs.push(`@location(${numVec4}) vout${numVec4}: ${typeString}`); - inputs.push(`@location(${numVec4}) fin${numVec4}: ${typeString}`); - } - if (useExtraBuiltinInputs) { inputs.push('@builtin(front_facing) front_facing_in: bool'); if (!t.isCompatibility) { diff --git a/src/webgpu/api/validation/render_pipeline/misc.spec.ts b/src/webgpu/api/validation/render_pipeline/misc.spec.ts index 861eb4d24c7f..d10c7ca99985 100644 --- a/src/webgpu/api/validation/render_pipeline/misc.spec.ts +++ b/src/webgpu/api/validation/render_pipeline/misc.spec.ts @@ -36,7 +36,7 @@ g.test('no_attachment') g.test('vertex_state_only') .desc( `Tests creating vertex-state-only render pipeline. A vertex-only render pipeline has no fragment -state (and thus has no color state), and can be created with or without depth stencil state.` +state (and thus has no color state), and must have a depth-stencil state as an attachment is required.` ) .params(u => u @@ -76,7 +76,7 @@ state (and thus has no color state), and can be created with or without depth st targets: hasColor ? [{ format: 'rgba8unorm' }] : [], }); - t.doCreateRenderPipelineTest(isAsync, true, descriptor); + t.doCreateRenderPipelineTest(isAsync, depthStencilState !== undefined, descriptor); }); g.test('pipeline_layout,device_mismatch') diff --git a/src/webgpu/api/validation/shader_module/entry_point.spec.ts b/src/webgpu/api/validation/shader_module/entry_point.spec.ts index c956dc302144..67dbef1851e1 100644 --- a/src/webgpu/api/validation/shader_module/entry_point.spec.ts +++ b/src/webgpu/api/validation/shader_module/entry_point.spec.ts @@ -128,6 +128,7 @@ and check that the APIs only accept matching entryPoint. module: t.device.createShaderModule({ code }), entryPoint, }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }; let _success = true; if (shaderModuleStage !== 'vertex') { @@ -258,6 +259,7 @@ an undefined entryPoint is valid if there's an extra shader stage. }), entryPoint: undefined, }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }; const success = extraShaderModuleStage !== 'vertex'; diff --git a/src/webgpu/capability_info.ts b/src/webgpu/capability_info.ts index 24a103dfb4b1..62ac621dc66e 100644 --- a/src/webgpu/capability_info.ts +++ b/src/webgpu/capability_info.ts @@ -730,7 +730,6 @@ const [kLimitInfoKeys, kLimitInfoDefaults, kLimitInfoData] = 'maxBufferSize': [ , 268435456, 268435456, kMaxUnsignedLongLongValue], 'maxVertexAttributes': [ , 16, 16, ], 'maxVertexBufferArrayStride': [ , 2048, 2048, ], - 'maxInterStageShaderComponents': [ , 64, 60, ], 'maxInterStageShaderVariables': [ , 16, 15, ], 'maxColorAttachments': [ , 8, 4, ], @@ -818,19 +817,21 @@ export const kFeatureNameInfo: { readonly [k in GPUFeatureName]: {}; } = /* prettier-ignore */ { - 'bgra8unorm-storage': {}, - 'depth-clip-control': {}, - 'depth32float-stencil8': {}, - 'texture-compression-bc': {}, - 'texture-compression-etc2': {}, - 'texture-compression-astc': {}, - 'timestamp-query': {}, - 'indirect-first-instance': {}, - 'shader-f16': {}, - 'rg11b10ufloat-renderable': {}, - 'float32-filterable': {}, - 'clip-distances': {}, - 'dual-source-blending': {}, + 'bgra8unorm-storage': {}, + 'depth-clip-control': {}, + 'depth32float-stencil8': {}, + 'texture-compression-bc': {}, + 'texture-compression-bc-sliced-3d': {}, + 'texture-compression-etc2': {}, + 'texture-compression-astc': {}, + 'texture-compression-astc-sliced-3d': {}, + 'timestamp-query': {}, + 'indirect-first-instance': {}, + 'shader-f16': {}, + 'rg11b10ufloat-renderable': {}, + 'float32-filterable': {}, + 'clip-distances': {}, + 'dual-source-blending': {}, }; /** List of all GPUFeatureName values. */ export const kFeatureNames = keysOf(kFeatureNameInfo); diff --git a/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts b/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts index 805203870903..a74e8900904c 100644 --- a/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts +++ b/src/webgpu/compat/api/validation/render_pipeline/unsupported_wgsl.spec.ts @@ -232,6 +232,7 @@ g.test('unsupportedStorageTextureFormats,renderPipeline') t.doCreateRenderPipelineTest(async, isValid, { layout: 'auto', vertex: { module, entryPoint }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }); }); @@ -267,7 +268,7 @@ g.test('textureLoad_with_depth_textures,computePipeline') `, }); - const isValid = !t.isCompatibility; + const isValid = !t.isCompatibility || entryPoint === 'csWithoutDepthUsage'; t.doCreateComputePipelineTest(async, isValid, { layout: 'auto', compute: { module, entryPoint }, @@ -301,9 +302,10 @@ g.test('textureLoad_with_depth_textures,renderPipeline') `, }); - const isValid = !t.isCompatibility; + const isValid = !t.isCompatibility || entryPoint === 'vsWithoutDepthUsage'; t.doCreateRenderPipelineTest(async, isValid, { layout: 'auto', vertex: { module, entryPoint }, + depthStencil: { format: 'depth32float', depthWriteEnabled: true, depthCompare: 'always' }, }); }); diff --git a/src/webgpu/listing_meta.json b/src/webgpu/listing_meta.json index 85fe0bdc6a8b..b17a8c772873 100644 --- a/src/webgpu/listing_meta.json +++ b/src/webgpu/listing_meta.json @@ -283,6 +283,7 @@ "webgpu:api,validation,capability_checks,features,texture_formats:texture_descriptor:*": { "subcaseMS": 3.830 }, "webgpu:api,validation,capability_checks,features,texture_formats:texture_descriptor_view_formats:*": { "subcaseMS": 5.734 }, "webgpu:api,validation,capability_checks,features,texture_formats:texture_view_descriptor:*": { "subcaseMS": 4.113 }, + "webgpu:api,validation,capability_checks,features,clip_distances:createRenderPipeline,at_over:*": { "subcaseMS": 13.7 }, "webgpu:api,validation,capability_checks,limits,maxBindGroups:createPipeline,at_over:*": { "subcaseMS": 10.990 }, "webgpu:api,validation,capability_checks,limits,maxBindGroups:createPipelineLayout,at_over:*": { "subcaseMS": 9.310 }, "webgpu:api,validation,capability_checks,limits,maxBindGroups:setBindGroup,at_over:*": { "subcaseMS": 9.984 }, @@ -313,7 +314,6 @@ "webgpu:api,validation,capability_checks,limits,maxComputeWorkgroupsPerDimension:validate:*": { "subcaseMS": 138.900 }, "webgpu:api,validation,capability_checks,limits,maxDynamicStorageBuffersPerPipelineLayout:createBindGroupLayout,at_over:*": { "subcaseMS": 15.680 }, "webgpu:api,validation,capability_checks,limits,maxDynamicUniformBuffersPerPipelineLayout:createBindGroupLayout,at_over:*": { "subcaseMS": 10.268 }, - "webgpu:api,validation,capability_checks,limits,maxInterStageShaderComponents:createRenderPipeline,at_over:*": { "subcaseMS": 12.916 }, "webgpu:api,validation,capability_checks,limits,maxInterStageShaderVariables:createRenderPipeline,at_over:*": { "subcaseMS": 13.700 }, "webgpu:api,validation,capability_checks,limits,maxSampledTexturesPerShaderStage:createBindGroupLayout,at_over:*": { "subcaseMS": 47.857 }, "webgpu:api,validation,capability_checks,limits,maxSampledTexturesPerShaderStage:createPipeline,at_over:*": { "subcaseMS": 45.611 }, @@ -728,9 +728,9 @@ "webgpu:api,validation,render_pipeline,inter_stage:location,mismatch:*": { "subcaseMS": 7.280 }, "webgpu:api,validation,render_pipeline,inter_stage:location,subset:*": { "subcaseMS": 1.250 }, "webgpu:api,validation,render_pipeline,inter_stage:location,superset:*": { "subcaseMS": 0.901 }, - "webgpu:api,validation,render_pipeline,inter_stage:max_components_count,input:*": { "subcaseMS": 6.560 }, - "webgpu:api,validation,render_pipeline,inter_stage:max_components_count,output:*": { "subcaseMS": 8.426 }, "webgpu:api,validation,render_pipeline,inter_stage:max_shader_variable_location:*": { "subcaseMS": 11.050 }, + "webgpu:api,validation,render_pipeline,inter_stage:max_variables_count,input:*": { "subcaseMS": 6.560 }, + "webgpu:api,validation,render_pipeline,inter_stage:max_variables_count,output:*": { "subcaseMS": 8.426 }, "webgpu:api,validation,render_pipeline,inter_stage:type:*": { "subcaseMS": 6.170 }, "webgpu:api,validation,render_pipeline,misc:basic:*": { "subcaseMS": 0.901 }, "webgpu:api,validation,render_pipeline,misc:external_texture:*": { "subcaseMS": 35.189 }, @@ -862,7 +862,6 @@ "webgpu:compat,api,validation,encoding,programmable,pipeline_bind_group_compat:twoDifferentTextureViews,render_pass,used:*": { "subcaseMS": 0.000 }, "webgpu:compat,api,validation,render_pipeline,depth_stencil_state:depthBiasClamp:*": { "subcaseMS": 1.604 }, "webgpu:compat,api,validation,render_pipeline,fragment_state:colorState:*": { "subcaseMS": 32.604 }, - "webgpu:compat,api,validation,render_pipeline,vertex_state:maxVertexAttributesVertexIndexInstanceIndex:*": { "subcaseMS": 3.700 }, "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:interpolate:*": { "subcaseMS": 3.488 }, "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:sample_index:*": { "subcaseMS": 0.487 }, "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:sample_mask:*": { "subcaseMS": 0.408 }, @@ -870,6 +869,7 @@ "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:textureLoad_with_depth_textures,renderPipeline:*": { "subcaseMS": 1.259 }, "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:unsupportedStorageTextureFormats,computePipeline:*": { "subcaseMS": 1.206 }, "webgpu:compat,api,validation,render_pipeline,unsupported_wgsl:unsupportedStorageTextureFormats,renderPipeline:*": { "subcaseMS": 1.206 }, + "webgpu:compat,api,validation,render_pipeline,vertex_state:maxVertexAttributesVertexIndexInstanceIndex:*": { "subcaseMS": 3.700 }, "webgpu:compat,api,validation,texture,createTexture:depthOrArrayLayers_incompatible_with_textureBindingViewDimension:*": { "subcaseMS": 12.712 }, "webgpu:compat,api,validation,texture,createTexture:format_reinterpretation:*": { "subcaseMS": 7.012 }, "webgpu:compat,api,validation,texture,createTexture:invalidTextureBindingViewDimension:*": { "subcaseMS": 6.022 }, @@ -1525,15 +1525,30 @@ "webgpu:shader,execution,expression,call,builtin,step:f32:*": { "subcaseMS": 291.363 }, "webgpu:shader,execution,expression,call,builtin,storageBarrier:barrier:*": { "subcaseMS": 0.801 }, "webgpu:shader,execution,expression,call,builtin,storageBarrier:stage:*": { "subcaseMS": 2.402 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAdd:compute,split:*": { "subcaseMS": 2853.671 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAdd:data_types:*": { "subcaseMS": 9216.247 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAdd:fp_accuracy:*": { "subcaseMS": 9952.350 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAdd:fragment:*": { "subcaseMS": 0.229 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAll:compute,all_active:*": { "subcaseMS": 5162.414 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAll:compute,split:*": { "subcaseMS": 26610.627 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAll:fragment,all_active:*": { "subcaseMS": 0.172 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAll:fragment,split:*": { "subcaseMS": 0.327 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAny:compute,all_active:*": { "subcaseMS": 7028.394 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAny:compute,split:*": { "subcaseMS": 50.998 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAny:fragment,all_active:*": { "subcaseMS": 0.227 }, + "webgpu:shader,execution,expression,call,builtin,subgroupAny:fragment,split:*": { "subcaseMS": 0.309 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:compute,split:*": { "subcaseMS": 38.740 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment,split:*": { "subcaseMS": 0.331 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:fragment:*": { "subcaseMS": 0.059 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate:*": { "subcaseMS": 0.075 }, "webgpu:shader,execution,expression,call,builtin,subgroupBallot:predicate_and_control_flow:*": { "subcaseMS": 41.053 }, "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:data_types:*": { "subcaseMS": 252.374 }, - "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:dynamically_uniform_id:*": { "subcaseMS": 0.211 }, "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:fragment:*": { "subcaseMS": 0.108 }, "webgpu:shader,execution,expression,call,builtin,subgroupBroadcast:workgroup_uniform_load:*": { "subcaseMS": 109.832 }, + "webgpu:shader,execution,expression,call,builtin,subgroupMul:compute,split:*": { "subcaseMS": 5034.263 }, + "webgpu:shader,execution,expression,call,builtin,subgroupMul:data_types:*": { "subcaseMS": 11861.865 }, + "webgpu:shader,execution,expression,call,builtin,subgroupMul:fp_accuracy:*": { "subcaseMS": 35606.717 }, + "webgpu:shader,execution,expression,call,builtin,subgroupMul:fragment:*": { "subcaseMS": 0.263 }, "webgpu:shader,execution,expression,call,builtin,tan:abstract_float:*": { "subcaseMS": 17043.428 }, "webgpu:shader,execution,expression,call,builtin,tan:f16:*": { "subcaseMS": 116.157 }, "webgpu:shader,execution,expression,call,builtin,tan:f32:*": { "subcaseMS": 13.532 }, @@ -1554,8 +1569,8 @@ "webgpu:shader,execution,expression,call,builtin,textureGather:sampled_array_3d_coords:*": { "subcaseMS": 60.700 }, "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:array_2d_coords:*": { "subcaseMS": 291.301 }, "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:array_3d_coords:*": { "subcaseMS": 191.101 }, - "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_array_2d_coords:*": { "subcaseMS": 57.600 }, - "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_array_3d_coords:*": { "subcaseMS": 10.101 }, + "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_2d_coords:*": { "subcaseMS": 57.600 }, + "webgpu:shader,execution,expression,call,builtin,textureGatherCompare:sampled_3d_coords:*": { "subcaseMS": 10.101 }, "webgpu:shader,execution,expression,call,builtin,textureLoad:arrayed:*": { "subcaseMS": 30.501 }, "webgpu:shader,execution,expression,call,builtin,textureLoad:depth:*": { "subcaseMS": 3.200 }, "webgpu:shader,execution,expression,call,builtin,textureLoad:external:*": { "subcaseMS": 1.401 }, @@ -2005,6 +2020,11 @@ "webgpu:shader,validation,expression,binary,div_rem:scalar_vector:*": { "subcaseMS": 743.721 }, "webgpu:shader,validation,expression,binary,div_rem:scalar_vector_out_of_range:*": { "subcaseMS": 650.727 }, "webgpu:shader,validation,expression,binary,parse:all:*": { "subcaseMS": 527.287 }, + "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_array_count_on_rhs:*": { "subcaseMS": 4.309 }, + "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_rhs_const:*": { "subcaseMS": 4.341 }, + "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_rhs_override:*": { "subcaseMS": 27.490 }, + "webgpu:shader,validation,expression,binary,short_circuiting_and_or:invalid_types:*": { "subcaseMS": 13.409 }, + "webgpu:shader,validation,expression,binary,short_circuiting_and_or:scalar_vector:*": { "subcaseMS": 397.769 }, "webgpu:shader,validation,expression,call,builtin,abs:parameters:*": { "subcaseMS": 10.133 }, "webgpu:shader,validation,expression,call,builtin,abs:values:*": { "subcaseMS": 0.391 }, "webgpu:shader,validation,expression,call,builtin,acos:integer_argument:*": { "subcaseMS": 1.512 }, @@ -2226,6 +2246,22 @@ "webgpu:shader,validation,expression,call,builtin,pow:invalid_argument:*": { "subcaseMS": 1.000 }, "webgpu:shader,validation,expression,call,builtin,pow:must_use:*": { "subcaseMS": 1.000 }, "webgpu:shader,validation,expression,call,builtin,pow:values:*": { "subcaseMS": 1.000 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:data_type:*": { "subcaseMS": 39.783 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:early_eval:*": { "subcaseMS": 63.825 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:id_constness:*": { "subcaseMS": 15.347 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:id_type:*": { "subcaseMS": 26.268 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:must_use:*": { "subcaseMS": 41.658 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:requires_subgroups:*": { "subcaseMS": 42.565 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:requires_subgroups_f16:*": { "subcaseMS": 44.998 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:return_type:*": { "subcaseMS": 363.607 }, + "webgpu:shader,validation,expression,call,builtin,quadBroadcast:stage:*": { "subcaseMS": 3.050 }, + "webgpu:shader,validation,expression,call,builtin,quadSwap:data_type:*": { "subcaseMS": 89.379 }, + "webgpu:shader,validation,expression,call,builtin,quadSwap:early_eval:*": { "subcaseMS": 108.243 }, + "webgpu:shader,validation,expression,call,builtin,quadSwap:must_use:*": { "subcaseMS": 5.557 }, + "webgpu:shader,validation,expression,call,builtin,quadSwap:requires_subgroups:*": { "subcaseMS": 113.624 }, + "webgpu:shader,validation,expression,call,builtin,quadSwap:requires_subgroups_f16:*": { "subcaseMS": 12.712 }, + "webgpu:shader,validation,expression,call,builtin,quadSwap:return_type:*": { "subcaseMS": 1424.551 }, + "webgpu:shader,validation,expression,call,builtin,quadSwap:stage:*": { "subcaseMS": 7.664 }, "webgpu:shader,validation,expression,call,builtin,quantizeToF16:args:*": { "subcaseMS": 1.000 }, "webgpu:shader,validation,expression,call,builtin,quantizeToF16:must_use:*": { "subcaseMS": 1.000 }, "webgpu:shader,validation,expression,call,builtin,quantizeToF16:values:*": { "subcaseMS": 1.000 }, @@ -2278,16 +2314,73 @@ "webgpu:shader,validation,expression,call,builtin,step:args:*": { "subcaseMS": 1.000 }, "webgpu:shader,validation,expression,call,builtin,step:must_use:*": { "subcaseMS": 1.000 }, "webgpu:shader,validation,expression,call,builtin,step:values:*": { "subcaseMS": 1.000 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAdd:data_type:*": { "subcaseMS": 32.897 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAdd:early_eval:*": { "subcaseMS": 101.800 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAdd:invalid_types:*": { "subcaseMS": 95.889 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAdd:must_use:*": { "subcaseMS": 62.933 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAdd:return_type:*": { "subcaseMS": 363.546 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAdd:stage:*": { "subcaseMS": 3.536 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:data_type:*": { "subcaseMS": 57.943 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:early_eval:*": { "subcaseMS": 173.714 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:must_use:*": { "subcaseMS": 4.592 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:requires_subgroups:*": { "subcaseMS": 73.866 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:return_type:*": { "subcaseMS": 39.388 }, + "webgpu:shader,validation,expression,call,builtin,subgroupAnyAll:stage:*": { "subcaseMS": 6.862 }, "webgpu:shader,validation,expression,call,builtin,subgroupBallot:data_type:*": { "subcaseMS": 115.557 }, "webgpu:shader,validation,expression,call,builtin,subgroupBallot:early_eval:*": { "subcaseMS": 52.992 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBallot:must_use:*": { "subcaseMS": 39.441 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBallot:requires_subgroups:*": { "subcaseMS": 36.819 }, "webgpu:shader,validation,expression,call,builtin,subgroupBallot:return_type:*": { "subcaseMS": 22.381 }, "webgpu:shader,validation,expression,call,builtin,subgroupBallot:stage:*": { "subcaseMS": 3.712 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:data_type:*": { "subcaseMS": 94.072 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:early_eval:*": { "subcaseMS": 569.598 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:must_use:*": { "subcaseMS": 6.172 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:requires_subgroups:*": { "subcaseMS": 108.478 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:return_type:*": { "subcaseMS": 1430.736 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBitwise:stage:*": { "subcaseMS": 11.858 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:data_type:*": { "subcaseMS": 97.991 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:early_eval:*": { "subcaseMS": 1.254 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:id_constness:*": { "subcaseMS": 7.026 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:id_type:*": { "subcaseMS": 24.703 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:must_use:*": { "subcaseMS": 232.030 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:requires_subgroups:*": { "subcaseMS": 47.231 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:requires_subgroups_f16:*": { "subcaseMS": 38.503 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:return_type:*": { "subcaseMS": 496.031 }, "webgpu:shader,validation,expression,call,builtin,subgroupBroadcast:stage:*": { "subcaseMS": 3.715 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:data_type:*": { "subcaseMS": 32.168 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:early_eval:*": { "subcaseMS": 57.922 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:must_use:*": { "subcaseMS": 36.296 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:requires_subgroups:*": { "subcaseMS": 42.522 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:requires_subgroups_f16:*": { "subcaseMS": 47.111 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:return_type:*": { "subcaseMS": 402.558 }, + "webgpu:shader,validation,expression,call,builtin,subgroupBroadcastFirst:stage:*": { "subcaseMS": 2.869 }, + "webgpu:shader,validation,expression,call,builtin,subgroupElect:data_type:*": { "subcaseMS": 72.441 }, + "webgpu:shader,validation,expression,call,builtin,subgroupElect:early_eval:*": { "subcaseMS": 56.115 }, + "webgpu:shader,validation,expression,call,builtin,subgroupElect:must_use:*": { "subcaseMS": 32.820 }, + "webgpu:shader,validation,expression,call,builtin,subgroupElect:requires_subgroups:*": { "subcaseMS": 35.595 }, + "webgpu:shader,validation,expression,call,builtin,subgroupElect:return_type:*": { "subcaseMS": 22.712 }, + "webgpu:shader,validation,expression,call,builtin,subgroupElect:stage:*": { "subcaseMS": 3.790 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:data_type:*": { "subcaseMS": 64.143 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:early_eval:*": { "subcaseMS": 551.671 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:must_use:*": { "subcaseMS": 4.403 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:requires_subgroups:*": { "subcaseMS": 87.208 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:requires_subgroups_f16:*": { "subcaseMS": 25.190 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:return_type:*": { "subcaseMS": 911.454 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMinMax:stage:*": { "subcaseMS": 6.395 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMul:data_type:*": { "subcaseMS": 45.396 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMul:early_eval:*": { "subcaseMS": 56.571 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMul:invalid_types:*": { "subcaseMS": 91.040 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMul:must_use:*": { "subcaseMS": 39.041 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMul:return_type:*": { "subcaseMS": 549.172 }, + "webgpu:shader,validation,expression,call,builtin,subgroupMul:stage:*": { "subcaseMS": 4.489 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:data_type:*": { "subcaseMS": 115.093 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:early_eval:*": { "subcaseMS": 110.489 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:must_use:*": { "subcaseMS": 7.628 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:param2_type:*": { "subcaseMS": 88.305 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:requires_subgroups:*": { "subcaseMS": 102.779 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:requires_subgroups_f16:*": { "subcaseMS": 13.121 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:return_type:*": { "subcaseMS": 1930.309 }, + "webgpu:shader,validation,expression,call,builtin,subgroupShuffle:stage:*": { "subcaseMS": 9.527 }, "webgpu:shader,validation,expression,call,builtin,tan:args:*": { "subcaseMS": 43.560 }, "webgpu:shader,validation,expression,call,builtin,tan:must_use:*": { "subcaseMS": 5.401 }, "webgpu:shader,validation,expression,call,builtin,tan:values:*": { "subcaseMS": 0.350 }, diff --git a/src/webgpu/print_environment.spec.ts b/src/webgpu/print_environment.spec.ts index 9790c770cefa..f3ca67d3a2c0 100644 --- a/src/webgpu/print_environment.spec.ts +++ b/src/webgpu/print_environment.spec.ts @@ -35,9 +35,7 @@ NOTE: If your test runtime elides logs when tests pass, you won't see the prints in the logs. On non-WPT runtimes, it will also print to the console with console.log. WPT disallows console.log and doesn't support logs on passing tests, so this does nothing on WPT.` ) - .fn(async t => { - // MAINTENANCE_TODO: Remove requestAdapterInfo when info is implemented. - const adapterInfo = t.adapter.info || (await t.adapter.requestAdapterInfo()); + .fn(t => { const isCompatibilityMode = (t.adapter as unknown as { isCompatibilityMode?: boolean }) .isCompatibilityMode; @@ -51,7 +49,7 @@ WPT disallows console.log and doesn't support logs on passing tests, so this doe adapter: { isFallbackAdapter: t.adapter.isFallbackAdapter, isCompatibilityMode, - info: adapterInfo, + info: t.adapter.info, features: Array.from(t.adapter.features), limits: t.adapter.limits, }, diff --git a/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts index 42d8d09ff569..f65bb951bf25 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/smoothstep.spec.ts @@ -7,11 +7,16 @@ T is S or vecN Returns the smooth Hermite interpolation between 0 and 1. Component-wise when T is a vector. For scalar T, the result is t * t * (3.0 - 2.0 * t), where t = clamp((x - low) / (high - low), 0.0, 1.0). + +If low >= high: +* It is a shader-creation error if low and high are const-expressions. +* It is a pipeline-creation error if low and high are override-expressions. `; import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; import { GPUTest } from '../../../../../gpu_test.js'; -import { Type } from '../../../../../util/conversion.js'; +import { ScalarValue, Type, Value } from '../../../../../util/conversion.js'; +import { Case } from '../../case.js'; import { allInputSources, onlyConstInputSource, run } from '../../expression.js'; import { abstractFloatBuiltin, builtin } from './builtin.js'; @@ -19,6 +24,13 @@ import { d } from './smoothstep.cache.js'; export const g = makeTestGroup(GPUTest); +// Returns true if `c` is valid for a const evaluation of smoothstep. +function validForConst(c: Case): boolean { + const low = (c.input as Value[])[0] as ScalarValue; + const high = (c.input as Value[])[1] as ScalarValue; + return low.value < high.value; +} + g.test('abstract_float') .specURL('https://www.w3.org/TR/WGSL/#float-builtin-functions') .desc(`abstract float tests`) @@ -28,7 +40,7 @@ g.test('abstract_float') .combine('vectorize', [undefined, 2, 3, 4] as const) ) .fn(async t => { - const cases = await d.get('abstract_const'); + const cases = (await d.get('abstract_const')).filter(c => validForConst(c)); await run( t, abstractFloatBuiltin('smoothstep'), @@ -47,7 +59,15 @@ g.test('f32') ) .fn(async t => { const cases = await d.get(t.params.inputSource === 'const' ? 'f32_const' : 'f32_non_const'); - await run(t, builtin('smoothstep'), [Type.f32, Type.f32, Type.f32], Type.f32, t.params, cases); + const validCases = cases.filter(c => t.params.inputSource !== 'const' || validForConst(c)); + await run( + t, + builtin('smoothstep'), + [Type.f32, Type.f32, Type.f32], + Type.f32, + t.params, + validCases + ); }); g.test('f16') @@ -61,5 +81,13 @@ g.test('f16') }) .fn(async t => { const cases = await d.get(t.params.inputSource === 'const' ? 'f16_const' : 'f16_non_const'); - await run(t, builtin('smoothstep'), [Type.f16, Type.f16, Type.f16], Type.f16, t.params, cases); + const validCases = cases.filter(c => t.params.inputSource !== 'const' || validForConst(c)); + await run( + t, + builtin('smoothstep'), + [Type.f16, Type.f16, Type.f16], + Type.f16, + t.params, + validCases + ); }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts new file mode 100644 index 000000000000..04792b2d98c1 --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAdd.spec.ts @@ -0,0 +1,364 @@ +export const description = ` +Execution tests for subgroupAdd, subgroupExclusiveAdd, and subgroupInclusiveAdd + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { iterRange } from '../../../../../../common/util/util.js'; +import { GPUTest } from '../../../../../gpu_test.js'; +import { + kConcreteNumericScalarsAndVectors, + Type, + VectorType, + numberToFloatBits, + floatBitsToNumber, + kFloat32Format, + kFloat16Format, + scalarTypeOf, +} from '../../../../../util/conversion.js'; +import { FP } from '../../../../../util/floating_point.js'; + +import { + kNumCases, + kStride, + kWGSizes, + kPredicateCases, + runAccuracyTest, + runComputeTest, +} from './subgroup_util.js'; + +export const g = makeTestGroup(GPUTest); + +const kIdentity = 0; + +const kDataTypes = objectsToRecord(kConcreteNumericScalarsAndVectors); + +const kOperations = ['subgroupAdd', 'subgroupExclusiveAdd', 'subgroupInclusiveAdd'] as const; + +g.test('fp_accuracy') + .desc( + `Tests the accuracy of floating-point addition. + +The order of operations is implementation defined, most threads are filled with +the identity value and two receive random values. +Subgroup sizes are not known ahead of time so some cases may not perform any +interesting operations. The test biases towards checking subgroup sizes under 64. +These tests only check two values in order to reuse more of the existing infrastructure +and limit the number of permutations needed to calculate the final result.` + ) + .params(u => + u + .combine('case', [...iterRange(kNumCases, x => x)]) + .combine('type', ['f32', 'f16'] as const) + .combine('wgSize', [ + [kStride, 1, 1], + [kStride / 2, 2, 1], + ] as const) + ) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; + if (t.params.type === 'f16') { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(async t => { + await runAccuracyTest( + t, + t.params.case, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + 'subgroupAdd', + t.params.type, + kIdentity, + t.params.type === 'f16' ? FP.f16.additionInterval : FP.f32.additionInterval + ); + }); + +/** + * Checks subgroup additions + * + * Expected results: + * - subgroupAdd: each invocation should have result equal to real subgroup size + * - subgroupExclusiveAdd: each invocation should have result equal to its subgroup invocation id + * - subgroupInclusiveAdd: each invocation should be equal to the result of subgroupExclusiveAdd plus the fill value + * @param metadata An array containing actual subgroup size per invocation followed by + * subgroup invocation id per invocation + * @param output An array of additions + * @param type The data type + * @param operation Type of addition + * @param expectedfillValue The original value used to fill the test array + */ +function checkAddition( + metadata: Uint32Array, + output: Uint32Array, + type: Type, + operation: 'subgroupAdd' | 'subgroupExclusiveAdd' | 'subgroupInclusiveAdd', + expectedfillValue: number +): undefined | Error { + let numEles = 1; + if (type instanceof VectorType) { + numEles = type.width; + } + const scalarTy = scalarTypeOf(type); + const expectedOffset = operation === 'subgroupAdd' ? 0 : metadata.length / 2; + for (let i = 0; i < metadata.length / 2; i++) { + let expected = metadata[i + expectedOffset]; + if (operation === 'subgroupInclusiveAdd') { + expected += expectedfillValue; + } + + for (let j = 0; j < numEles; j++) { + let idx = i * numEles + j; + const isOdd = idx & 0x1; + if (scalarTy === Type.f16) { + idx = Math.floor(idx / 2); + } + let val = output[idx]; + if (scalarTy === Type.f32) { + val = floatBitsToNumber(val, kFloat32Format); + } else if (scalarTy === Type.f16) { + if (isOdd) { + val = val >> 16; + } + val = floatBitsToNumber(val & 0xffff, kFloat16Format); + } + if (expected !== val) { + return new Error(`Invocation ${i}, component ${j}: incorrect result +- expected: ${expected} +- got: ${val}`); + } + } + } + + return undefined; +} + +g.test('data_types') + .desc( + `Tests subgroup addition for valid data types + +Tests a simple addition of all 1 values. +Reductions expect result to be equal to actual subgroup size. +Exclusice scans expect result to be equal subgroup invocation id. + +TODO: support vec3 types. + ` + ) + .params(u => + u + .combine('type', keysOf(kDataTypes)) + .filter(t => { + const type = kDataTypes[t.type]; + if (type instanceof VectorType) { + return type.width !== 3; + } + return true; + }) + .beginSubcases() + .combine('wgSize', kWGSizes) + .combine('operation', kOperations) + ) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; + const type = kDataTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(async t => { + const type = kDataTypes[t.params.type]; + let numEles = 1; + if (type instanceof VectorType) { + numEles = type.width; + } + const scalarType = scalarTypeOf(type); + let enables = 'enable subgroups;\n'; + if (type.requiresF16()) { + enables += 'enable f16;\nenable subgroups_f16;\n'; + } + + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +${enables} + +@group(0) @binding(0) +var inputs : array<${type.toString()}>; + +@group(0) @binding(1) +var outputs : array<${type.toString()}>; + +struct Metadata { + subgroup_size : array, + subgroup_invocation_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, +) { + // Record the actual subgroup size for this invocation. + // Note: subgroup_size builtin is always a power-of-2 and might be larger + // if the subgroup is not full. + let ballot = subgroupBallot(true); + var size = countOneBits(ballot.x); + size += countOneBits(ballot.y); + size += countOneBits(ballot.z); + size += countOneBits(ballot.w); + metadata.subgroup_size[lid] = size; + + // Record subgroup invocation id for this invocation. + metadata.subgroup_invocation_id[lid] = id; + + outputs[lid] = ${t.params.operation}(inputs[lid]); +}`; + const expectedFillValue = 1; + let fillValue = expectedFillValue; + let numUints = wgThreads * numEles; + if (scalarType === Type.f32) { + fillValue = numberToFloatBits(1, kFloat32Format); + } else if (scalarType === Type.f16) { + const f16 = numberToFloatBits(1, kFloat16Format); + fillValue = f16 | (f16 << 16); + numUints = Math.ceil(numUints / 2); + } + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + numUints, + new Uint32Array([...iterRange(numUints, x => fillValue)]), + (metadata: Uint32Array, output: Uint32Array) => { + return checkAddition(metadata, output, type, t.params.operation, expectedFillValue); + } + ); + }); + +g.test('fragment').unimplemented(); + +/** + * Performs correctness checking for predicated additions + * + * Assumes the shader performs a predicated subgroup addition with the + * subgroup_invocation_id as the data. + * + * @param metadata An array containing subgroup sizes and subgroup invocation ids + * @param output An array containing the output results + * @param operation The type of addition + * @param filter A functor that mirrors the predication in the shader + */ +function checkPredicatedAddition( + metadata: Uint32Array, + output: Uint32Array, + operation: 'subgroupAdd' | 'subgroupExclusiveAdd' | 'subgroupInclusiveAdd', + filter: (id: number, size: number) => boolean +): Error | undefined { + for (let i = 0; i < output.length; i++) { + const size = metadata[i]; + const id = metadata[output.length + i]; + let expected = 0; + if (filter(id, size)) { + const bound = + operation === 'subgroupInclusiveAdd' ? id + 1 : operation === 'subgroupAdd' ? size : id; + for (let j = 0; j < bound; j++) { + if (filter(j, size)) { + expected += j; + } + } + } else { + expected = 999; + } + if (expected !== output[i]) { + return new Error(`Invocation ${i}: incorrect result +- expected: ${expected} +- got: ${output[i]}`); + } + } + return undefined; +} + +g.test('compute,split') + .desc('Tests that only active invocations contribute to the operation') + .params(u => + u + .combine('case', keysOf(kPredicateCases)) + .beginSubcases() + .combine('operation', kOperations) + .combine('wgSize', kWGSizes) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kPredicateCases[t.params.case]; + const outputUintsPerElement = 1; + const inputData = new Uint32Array([0]); // no input data + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var input : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size : array, + subgroup_invocation_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, +) { + _ = input[0]; + + // Record the actual subgroup size for this invocation. + // Note: subgroup_size builtin is always a power-of-2 and might be larger + // if the subgroup is not full. + let ballot = subgroupBallot(true); + var subgroupSize = countOneBits(ballot.x); + subgroupSize += countOneBits(ballot.y); + subgroupSize += countOneBits(ballot.z); + subgroupSize += countOneBits(ballot.w); + metadata.subgroup_size[lid] = subgroupSize; + + // Record subgroup invocation id for this invocation. + metadata.subgroup_invocation_id[lid] = id; + + if ${testcase.cond} { + outputs[lid] = ${t.params.operation}(id); + } else { + return; + } +}`; + + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + outputUintsPerElement, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkPredicatedAddition(metadata, output, t.params.operation, testcase.filter); + } + ); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts new file mode 100644 index 000000000000..0aa461c4a578 --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAll.spec.ts @@ -0,0 +1,390 @@ +export const description = ` +Execution tests for subgroupAll. + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf } from '../../../../../../common/util/data_tables.js'; +import { iterRange } from '../../../../../../common/util/util.js'; +import { kTextureFormatInfo } from '../../../../../format_info.js'; +import { align } from '../../../../../util/math.js'; +import { PRNG } from '../../../../../util/prng.js'; + +import { + kWGSizes, + kPredicateCases, + SubgroupTest, + kDataSentinel, + kFramebufferSizes, + runComputeTest, + runFragmentTest, +} from './subgroup_util.js'; + +export const g = makeTestGroup(SubgroupTest); + +const kNumCases = 15; + +/** + * Generate input data for testing. + * + * Data is generated in the following categories: + * Seed 0 generates all 0 data + * Seed 1 generates all 1 data + * Seeds 2-9 generates all 1s except for a zero randomly once per 32 elements + * Seeds 10+ generate all random data + * @param seed The seed for the PRNG + * @param num The number of data items to generate + */ +function generateInputData(seed: number, num: number): Uint32Array { + const prng = new PRNG(seed); + + const bound = Math.min(num, 32); + const index = prng.uniformInt(bound); + + return new Uint32Array([ + ...iterRange(num, x => { + if (seed === 0) { + return 0; + } else if (seed === 1) { + return 1; + } else if (seed < 10) { + const bounded = x % bound; + return bounded === index ? 0 : 1; + } + return prng.uniformInt(2); + }), + ]); +} + +/** + * Checks the result of a subgroupAll operation + * + * Since subgroup size depends on the pipeline compile, we calculate the expected + * results after execution. The shader generates a subgroup id and records it for + * each invocation. The check first calculates the expected result for each subgroup + * and then compares to the actual result for each invocation. The filter functor + * ensures only the correct invocations contribute to the calculation. + * @param metadata An array of uints: + * * first half containing subgroup sizes (from builtin value) + * * second half subgroup invocation id + * @param output An array of uints containing: + * * first half is the outputs of subgroupAll + * * second half is a generated subgroup id + * @param numInvs Number of invocations executed + * @param input The input data (equal size to output) + * @param filter A functor to filter active invocations + */ +function checkAll( + metadata: Uint32Array, // unused + output: Uint32Array, + numInvs: number, + input: Uint32Array, + filter: (id: number, size: number) => boolean +): Error | undefined { + // First, generate expected results. + const expected = new Map(); + for (let inv = 0; inv < numInvs; inv++) { + const size = metadata[inv]; + const id = metadata[inv + numInvs]; + if (!filter(id, size)) { + continue; + } + const subgroup_id = output[numInvs + inv]; + let v = expected.get(subgroup_id) ?? 1; + v &= input[inv]; + expected.set(subgroup_id, v); + } + + // Second, check against actual results. + for (let inv = 0; inv < numInvs; inv++) { + const size = metadata[inv]; + const id = metadata[inv + numInvs]; + const res = output[inv]; + if (filter(id, size)) { + const subgroup_id = output[numInvs + inv]; + const expected_v = expected.get(subgroup_id) ?? 0; + if (expected_v !== res) { + return new Error(`Invocation ${inv}: +- expected: ${expected_v} +- got: ${res}`); + } + } else { + if (res !== kDataSentinel) { + return new Error(`Invocation ${inv} unexpected write: +- subgroup invocation id: ${id} +- subgroup size: ${size}`); + } + } + } + + return undefined; +} + +g.test('compute,all_active') + .desc(`Test compute subgroupAll`) + .params(u => + u + .combine('wgSize', kWGSizes) + .beginSubcases() + .combine('case', [...iterRange(kNumCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size: array, + subgroup_invocation_id: array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.subgroup_size[lid] = subgroupSize; + + metadata.subgroup_invocation_id[lid] = id; + + // Record a representative subgroup id. + outputs[lid + ${wgThreads}] = subgroupBroadcastFirst(lid); + + let res = select(0u, 1u, subgroupAll(bool(inputs[lid]))); + outputs[lid] = res; +}`; + + const inputData = generateInputData(t.params.case, wgThreads); + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkAll(metadata, output, wgThreads, inputData, (id: number, size: number) => { + return true; + }); + } + ); + }); + +g.test('compute,split') + .desc('Test that only active invocation participate') + .params(u => + u + .combine('predicate', keysOf(kPredicateCases)) + .beginSubcases() + .combine('wgSize', kWGSizes) + .combine('case', [...iterRange(kNumCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kPredicateCases[t.params.predicate]; + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size : array, + subgroup_invocation_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.subgroup_size[lid] = subgroupSize; + + // Record subgroup invocation id for this invocation. + metadata.subgroup_invocation_id[lid] = id; + + // Record a generated subgroup id. + outputs[${wgThreads} + lid] = subgroupBroadcastFirst(lid); + + if ${testcase.cond} { + outputs[lid] = select(0u, 1u, subgroupAll(bool(inputs[lid]))); + } else { + return; + } +}`; + + const inputData = generateInputData(t.params.case, wgThreads); + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkAll(metadata, output, wgThreads, inputData, testcase.filter); + } + ); + }); + +/** + * Checks subgroupAll results from a fragment shader. + * + * @param data Framebuffer output + * * component 0 is result + * * component 1 is generated subgroup id + * @param input An array of input data + * @param format The framebuffer format + * @param width Framebuffer width + * @param height Framebuffer height + */ +function checkFragmentAll( + data: Uint32Array, + input: Uint32Array, + format: GPUTextureFormat, + width: number, + height: number +): Error | undefined { + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + const blocksPerRow = width / blockWidth; + // 256 minimum comes from image copy requirements. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const uintsPerRow = bytesPerRow / 4; + const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + + // Iteration skips last row and column to avoid helper invocations because it is not + // guaranteed whether or not they participate in the subgroup operation. + const expected = new Map(); + for (let row = 0; row < height - 1; row++) { + for (let col = 0; col < width - 1; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + return new Error(`Internal error: helper invocation at (${col}, ${row})`); + } + + let v = expected.get(subgroup_id) ?? 1; + // First index of input is an atomic counter. + v &= input[row * width + col]; + expected.set(subgroup_id, v); + } + } + + for (let row = 0; row < height - 1; row++) { + for (let col = 0; col < width - 1; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const res = data[offset]; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + // Inactive in the fragment. + continue; + } + + const expected_v = expected.get(subgroup_id) ?? 0; + if (expected_v !== res) { + return new Error(`Row ${row}, col ${col}: incorrect results: +- expected: ${expected_v} +- got: ${res}`); + } + } + } + + return undefined; +} + +g.test('fragment,all_active') + .desc('Tests subgroupAll in fragment shaders') + .params(u => + u + .combine('size', kFramebufferSizes) + .beginSubcases() + .combine('case', [...iterRange(kNumCases, x => x)]) + .combineWithParams([{ format: 'rg32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const numInputs = t.params.size[0] * t.params.size[1]; + const inputData = generateInputData(t.params.case, numInputs); + + const fsShader = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@fragment +fn main( + @builtin(position) pos : vec4f, +) -> @location(0) vec2u { + // Generate a subgroup id based on linearized position, but avoid 0. + let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]}; + var subgroup_id = linear + 1; + subgroup_id = subgroupBroadcastFirst(subgroup_id); + + // Filter out possible helper invocations. + let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1); + let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1); + let in_range = x_in_range && y_in_range; + let input = select(1u, inputs[linear], in_range); + + let res = select(0u, 1u, subgroupAll(bool(input))); + return vec2u(res, subgroup_id); +}`; + + await runFragmentTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + inputData, + (data: Uint32Array) => { + return checkFragmentAll( + data, + inputData, + t.params.format, + t.params.size[0], + t.params.size[1] + ); + } + ); + }); + +// Using subgroup operations in control with fragment shaders +// quickly leads to unportable behavior. +g.test('fragment,split').unimplemented(); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts new file mode 100644 index 000000000000..5d5b9de11420 --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupAny.spec.ts @@ -0,0 +1,390 @@ +export const description = ` +Execution tests for subgroupAny. + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf } from '../../../../../../common/util/data_tables.js'; +import { iterRange } from '../../../../../../common/util/util.js'; +import { kTextureFormatInfo } from '../../../../../format_info.js'; +import { align } from '../../../../../util/math.js'; +import { PRNG } from '../../../../../util/prng.js'; + +import { + kWGSizes, + kPredicateCases, + SubgroupTest, + kDataSentinel, + runComputeTest, + runFragmentTest, + kFramebufferSizes, +} from './subgroup_util.js'; + +export const g = makeTestGroup(SubgroupTest); + +const kNumCases = 15; + +/** + * Generate input data for testing. + * + * Data is generated in the following categories: + * Seed 0 generates all 0 data + * Seed 1 generates all 1 data + * Seeds 2-9 generates all 0s except for a one randomly once per 32 elements + * Seeds 10+ generate all random data + * @param seed The seed for the PRNG + * @param num The number of data items to generate + */ +function generateInputData(seed: number, num: number): Uint32Array { + const prng = new PRNG(seed); + + const bound = Math.min(num, 32); + const index = prng.uniformInt(bound); + + return new Uint32Array([ + ...iterRange(num, x => { + if (seed === 0) { + return 0; + } else if (seed === 1) { + return 1; + } else if (seed < 10) { + const bounded = x % bound; + return bounded === index ? 1 : 0; + } + return prng.uniformInt(2); + }), + ]); +} + +/** + * Checks the result of a subgroupAny operation + * + * Since subgroup size depends on the pipeline compile, we calculate the expected + * results after execution. The shader generates a subgroup id and records it for + * each invocation. The check first calculates the expected result for each subgroup + * and then compares to the actual result for each invocation. The filter functor + * ensures only the correct invocations contribute to the calculation. + * @param metadata An array of uints: + * * first half containing subgroup sizes (from builtin value) + * * second half subgroup invocation id + * @param output An array of uints containing: + * * first half is the outputs of subgroupAny + * * second half is a generated subgroup id + * @param numInvs Number of invocations executed + * @param input The input data (equal size to output) + * @param filter A functor to filter active invocations + */ +function checkAny( + metadata: Uint32Array, // unused + output: Uint32Array, + numInvs: number, + input: Uint32Array, + filter: (id: number, size: number) => boolean +): Error | undefined { + // First, generate expected results. + const expected = new Map(); + for (let inv = 0; inv < numInvs; inv++) { + const size = metadata[inv]; + const id = metadata[inv + numInvs]; + if (!filter(id, size)) { + continue; + } + const subgroup_id = output[numInvs + inv]; + let v = expected.get(subgroup_id) ?? 0; + v |= input[inv]; + expected.set(subgroup_id, v); + } + + // Second, check against actual results. + for (let inv = 0; inv < numInvs; inv++) { + const size = metadata[inv]; + const id = metadata[inv + numInvs]; + const res = output[inv]; + if (filter(id, size)) { + const subgroup_id = output[numInvs + inv]; + const expected_v = expected.get(subgroup_id) ?? 0; + if (expected_v !== res) { + return new Error(`Invocation ${inv}: +- expected: ${expected_v} +- got: ${res}`); + } + } else { + if (res !== kDataSentinel) { + return new Error(`Invocation ${inv} unexpected write: +- subgroup invocation id: ${id} +- subgroup size: ${size}`); + } + } + } + + return undefined; +} + +g.test('compute,all_active') + .desc(`Test compute subgroupAny`) + .params(u => + u + .combine('wgSize', kWGSizes) + .beginSubcases() + .combine('case', [...iterRange(kNumCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size: array, + subgroup_invocation_id: array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.subgroup_size[lid] = subgroupSize; + + metadata.subgroup_invocation_id[lid] = id; + + // Record a representative subgroup id. + outputs[lid + ${wgThreads}] = subgroupBroadcastFirst(lid); + + let res = select(0u, 1u, subgroupAny(bool(inputs[lid]))); + outputs[lid] = res; +}`; + + const inputData = generateInputData(t.params.case, wgThreads); + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkAny(metadata, output, wgThreads, inputData, (id: number, size: number) => { + return true; + }); + } + ); + }); + +g.test('compute,split') + .desc('Test that only active invocation participate') + .params(u => + u + .combine('predicate', keysOf(kPredicateCases)) + .beginSubcases() + .combine('wgSize', kWGSizes) + .combine('case', [...iterRange(kNumCases, x => x)]) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kPredicateCases[t.params.predicate]; + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size : array, + subgroup_invocation_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) subgroupSize : u32, +) { + metadata.subgroup_size[lid] = subgroupSize; + + // Record subgroup invocation id for this invocation. + metadata.subgroup_invocation_id[lid] = id; + + // Record a generated subgroup id. + outputs[${wgThreads} + lid] = subgroupBroadcastFirst(lid); + + if ${testcase.cond} { + outputs[lid] = select(0u, 1u, subgroupAny(bool(inputs[lid]))); + } else { + return; + } +}`; + + const inputData = generateInputData(t.params.case, wgThreads); + + const uintsPerOutput = 2; + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + uintsPerOutput, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkAny(metadata, output, wgThreads, inputData, testcase.filter); + } + ); + }); + +/** + * Checks subgroupAny results from a fragment shader. + * + * @param data Framebuffer output + * * component 0 is result + * * component 1 is generated subgroup id + * @param input An array of input data + * @param format The framebuffer format + * @param width Framebuffer width + * @param height Framebuffer height + */ +function checkFragmentAny( + data: Uint32Array, + input: Uint32Array, + format: GPUTextureFormat, + width: number, + height: number +): Error | undefined { + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + const blocksPerRow = width / blockWidth; + // 256 minimum comes from image copy requirements. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const uintsPerRow = bytesPerRow / 4; + const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + + // Iteration skips last row and column to avoid helper invocations because it is not + // guaranteed whether or not they participate in the subgroup operation. + const expected = new Map(); + for (let row = 0; row < height - 1; row++) { + for (let col = 0; col < width - 1; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + return new Error(`Internal error: helper invocation at (${col}, ${row})`); + } + + let v = expected.get(subgroup_id) ?? 0; + // First index of input is an atomic counter. + v |= input[row * width + col]; + expected.set(subgroup_id, v); + } + } + + for (let row = 0; row < height - 1; row++) { + for (let col = 0; col < width - 1; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const res = data[offset]; + const subgroup_id = data[offset + 1]; + + if (subgroup_id === 0) { + // Inactive in the fragment. + continue; + } + + const expected_v = expected.get(subgroup_id) ?? 0; + if (expected_v !== res) { + return new Error(`Row ${row}, col ${col}: incorrect results: +- expected: ${expected_v} +- got: ${res}`); + } + } + } + + return undefined; +} + +g.test('fragment,all_active') + .desc('Tests subgroupAny in fragment shaders') + .params(u => + u + .combine('size', kFramebufferSizes) + .beginSubcases() + .combine('case', [...iterRange(kNumCases, x => x)]) + .combineWithParams([{ format: 'rg32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const numInputs = t.params.size[0] * t.params.size[1]; + const inputData = generateInputData(t.params.case, numInputs); + + const fsShader = ` +enable subgroups; + +@group(0) @binding(0) +var inputs : array; + +@fragment +fn main( + @builtin(position) pos : vec4f, +) -> @location(0) vec2u { + // Generate a subgroup id based on linearized position, but avoid 0. + let linear = u32(pos.x) + u32(pos.y) * ${t.params.size[0]}; + var subgroup_id = linear + 1; + subgroup_id = subgroupBroadcastFirst(subgroup_id); + + // Filter out possible helper invocations. + let x_in_range = u32(pos.x) < (${t.params.size[0]} - 1); + let y_in_range = u32(pos.y) < (${t.params.size[1]} - 1); + let in_range = x_in_range && y_in_range; + let input = select(0u, inputs[linear], in_range); + + let res = select(0u, 1u, subgroupAny(bool(input))); + return vec2u(res, subgroup_id); +}`; + + await runFragmentTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + inputData, + (data: Uint32Array) => { + return checkFragmentAny( + data, + inputData, + t.params.format, + t.params.size[0], + t.params.size[1] + ); + } + ); + }); + +// Using subgroup operations in control with fragment shaders +// quickly leads to unportable behavior. +g.test('fragment,split').unimplemented(); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts index b2fa9e46ec7a..75fe27e8cb5d 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupBroadcast.spec.ts @@ -318,6 +318,4 @@ fn main(@builtin(subgroup_invocation_id) id : u32, t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expect)); }); -g.test('dynamically_uniform_id').unimplemented(); - g.test('fragment').unimplemented(); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts new file mode 100644 index 000000000000..d45c023cd17a --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroupMul.spec.ts @@ -0,0 +1,387 @@ +export const description = ` +Execution tests for subgroupMul, subgroupExclusiveMul, and subgroupInclusiveMul + +Note: There is a lack of portability for non-uniform execution so these tests +restrict themselves to uniform control flow. +Note: There is no guaranteed mapping between subgroup_invocation_id and +local_invocation_index. Tests should avoid assuming there is. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { iterRange } from '../../../../../../common/util/util.js'; +import { GPUTest } from '../../../../../gpu_test.js'; +import { + kConcreteNumericScalarsAndVectors, + Type, + VectorType, + numberToFloatBits, + floatBitsToNumber, + kFloat32Format, + kFloat16Format, + scalarTypeOf, +} from '../../../../../util/conversion.js'; +import { FP } from '../../../../../util/floating_point.js'; + +import { + kNumCases, + kStride, + kWGSizes, + kPredicateCases, + runAccuracyTest, + runComputeTest, +} from './subgroup_util.js'; + +export const g = makeTestGroup(GPUTest); + +const kIdentity = 1; + +const kDataTypes = objectsToRecord(kConcreteNumericScalarsAndVectors); + +const kOperations = ['subgroupMul', 'subgroupExclusiveMul', 'subgroupInclusiveMul'] as const; + +g.test('fp_accuracy') + .desc( + `Tests the accuracy of floating-point multiplication. + +The order of operations is implementation defined, most threads are filled with +the identity value and two receive random values. +Subgroup sizes are not known ahead of time so some cases may not perform any +interesting operations. The test biases towards checking subgroup sizes under 64. +These tests only check two values in order to reuse more of the existing infrastructure +and limit the number of permutations needed to calculate the final result.` + ) + .params(u => + u + .combine('case', [...iterRange(kNumCases, x => x)]) + .combine('type', ['f32', 'f16'] as const) + .combine('wgSize', [ + [kStride, 1, 1], + [kStride / 2, 2, 1], + ] as const) + ) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; + if (t.params.type === 'f16') { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(async t => { + await runAccuracyTest( + t, + t.params.case, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + 'subgroupMul', + t.params.type, + kIdentity, + t.params.type === 'f16' ? FP.f16.multiplicationInterval : FP.f32.multiplicationInterval + ); + }); + +/** + * Checks subgroup multiplications. + * + * Expected results: + * - subgroupMul: each invocation should have result equal to 2 to the real subgroup size + * - subgroupExclusiveMul: each invocation should have result equal to 2 to its subgroup invocation id + * - subgroupInclusiveMul: each invocation should be equal to subgroupExclusiveMul result multiplied by the fill value + * @param metadata An array containing actual subgroup size per invocation followed by + * subgroup invocation id per invocation + * @param output An array of multiplications + * @param type The data type + * @param operation Type of multiplication + * @param expectedFillValue The original value used to fill the test array + */ +function checkMultiplication( + metadata: Uint32Array, + output: Uint32Array, + type: Type, + operation: 'subgroupMul' | 'subgroupExclusiveMul' | 'subgroupInclusiveMul', + expectedfillValue: number +): undefined | Error { + let numEles = 1; + if (type instanceof VectorType) { + numEles = type.width; + } + const scalarTy = scalarTypeOf(type); + const expectedOffset = operation === 'subgroupMul' ? 0 : metadata.length / 2; + for (let i = 0; i < metadata.length / 2; i++) { + let expected = Math.pow(2, metadata[i + expectedOffset]); + if (operation === 'subgroupInclusiveMul') { + expected *= expectedfillValue; + } + for (let j = 0; j < numEles; j++) { + let idx = i * numEles + j; + const isOdd = idx & 0x1; + if (scalarTy === Type.f16) { + idx = Math.floor(idx / 2); + } + let val = output[idx]; + if (scalarTy === Type.f32) { + val = floatBitsToNumber(val, kFloat32Format); + } else if (scalarTy === Type.f16) { + if (isOdd) { + val = val >> 16; + } + val = floatBitsToNumber(val & 0xffff, kFloat16Format); + } + if (expected !== val) { + return new Error(`Invocation ${i}, component ${j}: incorrect result +- expected: ${expected} +- got: ${val}`); + } + } + } + + return undefined; +} + +g.test('data_types') + .desc( + `Tests subgroup multiplication for valid data types + +Tests a simple multiplication of all 2 values. +Reductions expect result to be equal to actual subgroup size. +Exclusice scans expect result to be equal subgroup invocation id. + +TODO: support vec3 types. + ` + ) + .params(u => + u + .combine('type', keysOf(kDataTypes)) + .filter(t => { + const type = kDataTypes[t.type]; + if (type instanceof VectorType) { + return type.width !== 3; + } + return true; + }) + .beginSubcases() + // Workgroup sizes are kept < 16 to avoid overflows. + // Other tests cover that the full subgroup will contribute. + .combine('wgSize', [ + [4, 1, 1], + [8, 1, 1], + [1, 4, 1], + [1, 8, 1], + [1, 1, 4], + [1, 1, 8], + [2, 2, 2], + [4, 2, 1], + [4, 1, 2], + [2, 4, 1], + [2, 1, 4], + [1, 4, 2], + [1, 2, 4], + [3, 3, 1], + [3, 1, 3], + [1, 3, 3], + ] as const) + .combine('operation', kOperations) + ) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['subgroups' as GPUFeatureName]; + const type = kDataTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(async t => { + const type = kDataTypes[t.params.type]; + let numEles = 1; + if (type instanceof VectorType) { + numEles = type.width; + } + const scalarType = scalarTypeOf(type); + let enables = 'enable subgroups;\n'; + if (type.requiresF16()) { + enables += 'enable f16;\nenable subgroups_f16;\n'; + } + + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +${enables} + +@group(0) @binding(0) +var inputs : array<${type.toString()}>; + +@group(0) @binding(1) +var outputs : array<${type.toString()}>; + +struct Metadata { + subgroup_size : array, + subgroup_invocation_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, +) { + // Record the actual subgroup size for this invocation. + // Note: subgroup_size builtin is always a power-of-2 and might be larger + // if the subgroup is not full. + let ballot = subgroupBallot(true); + var size = countOneBits(ballot.x); + size += countOneBits(ballot.y); + size += countOneBits(ballot.z); + size += countOneBits(ballot.w); + metadata.subgroup_size[lid] = size; + + // Record subgroup invocation id for this invocation. + metadata.subgroup_invocation_id[lid] = id; + + outputs[lid] = ${t.params.operation}(inputs[lid]); +}`; + + const expectedfillValue = 2; + let fillValue = expectedfillValue; + let numUints = wgThreads * numEles; + if (scalarType === Type.f32) { + fillValue = numberToFloatBits(fillValue, kFloat32Format); + } else if (scalarType === Type.f16) { + const f16 = numberToFloatBits(fillValue, kFloat16Format); + fillValue = f16 | (f16 << 16); + numUints = Math.ceil(numUints / 2); + } + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + numUints, + new Uint32Array([...iterRange(numUints, x => fillValue)]), + (metadata: Uint32Array, output: Uint32Array) => { + return checkMultiplication(metadata, output, type, t.params.operation, expectedfillValue); + } + ); + }); + +g.test('fragment').unimplemented(); + +/** + * Performs correctness checking for predicated multiplications + * + * Assumes the shader performs a predicated subgroup multiplication with the + * subgroup_invocation_id as the data. + * + * @param metadata An array containing subgroup sizes and subgroup invocation ids + * @param output An array containing the output results + * @param operation The type of multiplication + * @param filter A functor that mirrors the predication in the shader + */ +function checkPredicatedMultiplication( + metadata: Uint32Array, + output: Uint32Array, + operation: 'subgroupMul' | 'subgroupExclusiveMul' | 'subgroupInclusiveMul', + filter: (id: number, size: number) => boolean +): Error | undefined { + for (let i = 0; i < output.length; i++) { + const size = metadata[i]; + const id = metadata[output.length + i]; + let expected = 1; + if (filter(id, size)) { + // This function replicates the behavior in the shader. + const valueModFun = function (id: number) { + return (id % 4) + 1; + }; + const bound = + operation === 'subgroupInclusiveMul' ? id + 1 : operation === 'subgroupMul' ? size : id; + for (let j = 0; j < bound; j++) { + if (filter(j, size)) { + expected *= valueModFun(j); + } + } + } else { + expected = 999; + } + if (expected !== output[i]) { + return new Error(`Invocation ${i}: incorrect result +- expected: ${expected} +- got: ${output[i]}`); + } + } + return undefined; +} + +g.test('compute,split') + .desc('Tests that only active invocations contribute to the operation') + .params(u => + u + .combine('case', keysOf(kPredicateCases)) + .beginSubcases() + .combine('operation', kOperations) + .combine('wgSize', kWGSizes) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const testcase = kPredicateCases[t.params.case]; + const outputUintsPerElement = 1; + const inputData = new Uint32Array([0]); // no input data + const wgThreads = t.params.wgSize[0] * t.params.wgSize[1] * t.params.wgSize[2]; + + const wgsl = ` +enable subgroups; + +@group(0) @binding(0) +var input : array; + +@group(0) @binding(1) +var outputs : array; + +struct Metadata { + subgroup_size : array, + subgroup_invocation_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${t.params.wgSize[0]}, ${t.params.wgSize[1]}, ${t.params.wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, + @builtin(subgroup_invocation_id) id : u32, +) { + _ = input[0]; + + // Record the actual subgroup size for this invocation. + // Note: subgroup_size builtin is always a power-of-2 and might be larger + // if the subgroup is not full. + let ballot = subgroupBallot(true); + var subgroupSize = countOneBits(ballot.x); + subgroupSize += countOneBits(ballot.y); + subgroupSize += countOneBits(ballot.z); + subgroupSize += countOneBits(ballot.w); + metadata.subgroup_size[lid] = subgroupSize; + + // Record subgroup invocation id for this invocation. + metadata.subgroup_invocation_id[lid] = id; + + if ${testcase.cond} { + outputs[lid] = ${t.params.operation}((id % 4) + 1); + } else { + return; + } +}`; + + await runComputeTest( + t, + wgsl, + [t.params.wgSize[0], t.params.wgSize[1], t.params.wgSize[2]], + outputUintsPerElement, + inputData, + (metadata: Uint32Array, output: Uint32Array) => { + return checkPredicatedMultiplication(metadata, output, t.params.operation, testcase.filter); + } + ); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts new file mode 100644 index 000000000000..9d147de1968b --- /dev/null +++ b/src/webgpu/shader/execution/expression/call/builtin/subgroup_util.ts @@ -0,0 +1,554 @@ +import { assert, iterRange } from '../../../../../../common/util/util.js'; +import { Float16Array } from '../../../../../../external/petamoriken/float16/float16.js'; +import { kTextureFormatInfo } from '../../../../../format_info.js'; +import { GPUTest, TextureTestMixin } from '../../../../../gpu_test.js'; +import { FPInterval } from '../../../../../util/floating_point.js'; +import { sparseScalarF16Range, sparseScalarF32Range, align } from '../../../../../util/math.js'; +import { PRNG } from '../../../../../util/prng.js'; + +export class SubgroupTest extends TextureTestMixin(GPUTest) {} + +export const kNumCases = 1000; +export const kStride = 128; + +export const kWGSizes = [ + [4, 1, 1], + [8, 1, 1], + [16, 1, 1], + [32, 1, 1], + [64, 1, 1], + [128, 1, 1], + [256, 1, 1], + [1, 4, 1], + [1, 8, 1], + [1, 16, 1], + [1, 32, 1], + [1, 64, 1], + [1, 128, 1], + [1, 256, 1], + [1, 1, 4], + [1, 1, 8], + [1, 1, 16], + [1, 1, 32], + [1, 1, 64], + [3, 3, 3], + [4, 4, 4], + [16, 16, 1], + [16, 1, 16], + [1, 16, 16], + [15, 3, 3], + [3, 15, 3], + [3, 3, 15], +] as const; + +export const kPredicateCases = { + every_even: { + cond: `id % 2 == 0`, + filter: (id: number, size: number) => { + return id % 2 === 0; + }, + }, + every_odd: { + cond: `id % 2 == 1`, + filter: (id: number, size: number) => { + return id % 2 === 1; + }, + }, + lower_half: { + cond: `id < subgroupSize / 2`, + filter: (id: number, size: number) => { + return id < Math.floor(size / 2); + }, + }, + upper_half: { + cond: `id >= subgroupSize / 2`, + filter: (id: number, size: number) => { + return id >= Math.floor(size / 2); + }, + }, + first_two: { + cond: `id == 0 || id == 1`, + filter: (id: number) => { + return id === 0 || id === 1; + }, + }, +}; + +/** + * Check the accuracy of the reduction operation. + * + * @param metadata An array containing subgroup ids for each invocation + * @param output An array containing the results of the reduction for each invocation + * @param indices An array of two values containing the indices of the interesting values in the input + * @param values An array of two values containing the interesting values in the input + * @param identity The identity for the operation + * @param intervalGen A functor to generate an appropriate FPInterval for a binary operation + */ +function checkAccuracy( + metadata: Uint32Array, + output: Float32Array | Float16Array, + indices: number[], + values: number[], + identity: number, + intervalGen: (x: number | FPInterval, y: number | FPInterval) => FPInterval +): undefined | Error { + const subgroupIdIdx1 = metadata[indices[0]]; + const subgroupIdIdx2 = metadata[indices[1]]; + for (let i = 0; i < output.length; i++) { + const subgroupId = metadata[i]; + + const v1 = subgroupId === subgroupIdIdx1 ? values[0] : identity; + const v2 = subgroupId === subgroupIdIdx2 ? values[1] : identity; + const interval = intervalGen(v1, v2); + if (!interval.contains(output[i])) { + return new Error(`Invocation ${i}, subgroup id ${subgroupId}: incorrect result +- interval: ${interval.toString()} +- output: ${output[i]}`); + } + } + + return undefined; +} + +/** + * Run a floating-point accuracy subgroup test. + * + * @param t The base test + * @param seed A seed for the PRNG + * @param wgSize An array for the workgroup size + * @param operation The subgroup operation + * @param type The type (f16 or f32) + * @param identity The identity for the operation + * @param intervalGen A functor to generate an appropriate FPInterval for a binary operation + */ +export async function runAccuracyTest( + t: GPUTest, + seed: number, + wgSize: number[], + operation: string, + type: 'f16' | 'f32', + identity: number, + intervalGen: (x: number | FPInterval, y: number | FPInterval) => FPInterval +) { + assert(seed < kNumCases); + const prng = new PRNG(seed); + + // Compatibility mode has lower workgroup limits. + const wgThreads = wgSize[0] * wgSize[1] * wgSize[2]; + const { + maxComputeInvocationsPerWorkgroup, + maxComputeWorkgroupSizeX, + maxComputeWorkgroupSizeY, + maxComputeWorkgroupSizeZ, + } = t.device.limits; + t.skipIf( + maxComputeInvocationsPerWorkgroup < wgThreads || + maxComputeWorkgroupSizeX < wgSize[0] || + maxComputeWorkgroupSizeY < wgSize[1] || + maxComputeWorkgroupSizeZ < wgSize[2], + 'Workgroup size too large' + ); + + // Bias half the cases to lower indices since most subgroup sizes are <= 64. + let indexLimit = kStride; + if (seed < kNumCases / 4) { + indexLimit = 16; + } else if (seed < kNumCases / 2) { + indexLimit = 64; + } + + // Ensure two distinct indices are picked. + const idx1 = prng.uniformInt(indexLimit); + let idx2 = prng.uniformInt(indexLimit - 1); + if (idx1 === idx2) { + idx2++; + } + assert(idx2 < indexLimit); + + // Select two random values. + const range = type === 'f16' ? sparseScalarF16Range() : sparseScalarF32Range(); + const numVals = range.length; + const val1 = range[prng.uniformInt(numVals)]; + const val2 = range[prng.uniformInt(numVals)]; + + const extraEnables = type === 'f16' ? `enable f16;\nenable subgroups_f16;` : ``; + const wgsl = ` +enable subgroups; +${extraEnables} + +@group(0) @binding(0) +var inputs : array<${type}>; + +@group(0) @binding(1) +var outputs : array<${type}>; + +struct Metadata { + subgroup_id : array, +} + +@group(0) @binding(2) +var metadata : Metadata; + +@compute @workgroup_size(${wgSize[0]}, ${wgSize[1]}, ${wgSize[2]}) +fn main( + @builtin(local_invocation_index) lid : u32, +) { + metadata.subgroup_id[lid] = subgroupBroadcast(lid, 0); + outputs[lid] = ${operation}(inputs[lid]); +}`; + + const inputData = + type === 'f16' + ? new Float16Array([ + ...iterRange(kStride, x => { + if (x === idx1) return val1; + if (x === idx2) return val2; + return identity; + }), + ]) + : new Float32Array([ + ...iterRange(kStride, x => { + if (x === idx1) return val1; + if (x === idx2) return val2; + return identity; + }), + ]); + + const inputBuffer = t.makeBufferWithContents( + inputData, + GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + ); + t.trackForCleanup(inputBuffer); + + const outputBuffer = t.makeBufferWithContents( + new Float32Array([...iterRange(kStride, x => 0)]), + GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + ); + t.trackForCleanup(outputBuffer); + + const numMetadata = kStride; + const metadataBuffer = t.makeBufferWithContents( + new Uint32Array([...iterRange(numMetadata, x => 0)]), + GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + ); + + const pipeline = t.device.createComputePipeline({ + layout: 'auto', + compute: { + module: t.device.createShaderModule({ + code: wgsl, + }), + entryPoint: 'main', + }, + }); + const bg = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { + binding: 0, + resource: { + buffer: inputBuffer, + }, + }, + { + binding: 1, + resource: { + buffer: outputBuffer, + }, + }, + { + binding: 2, + resource: { + buffer: metadataBuffer, + }, + }, + ], + }); + + const encoder = t.device.createCommandEncoder(); + const pass = encoder.beginComputePass(); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bg); + pass.dispatchWorkgroups(1, 1, 1); + pass.end(); + t.queue.submit([encoder.finish()]); + + const metadataReadback = await t.readGPUBufferRangeTyped(metadataBuffer, { + srcByteOffset: 0, + type: Uint32Array, + typedLength: numMetadata, + method: 'copy', + }); + const metadata = metadataReadback.data; + + let output: Float16Array | Float32Array; + if (type === 'f16') { + const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, { + srcByteOffset: 0, + type: Float16Array, + typedLength: kStride, + method: 'copy', + }); + output = outputReadback.data; + } else { + const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, { + srcByteOffset: 0, + type: Float32Array, + typedLength: kStride, + method: 'copy', + }); + output = outputReadback.data; + } + + t.expectOK(checkAccuracy(metadata, output, [idx1, idx2], [val1, val2], identity, intervalGen)); +} + +export const kDataSentinel = 999; + +/** + * Runs compute shader subgroup test + * + * The test makes the following assumptions: + * * group(0) binding(0) is a storage buffer for input data + * * group(0) binding(1) is an output storage buffer for outputUintsPerElement * wgSize uints + * * group(0) binding(2) is an output storage buffer for 2 * wgSize uints + * + * @param t The base test + * @param wgsl The shader code + * @param outputUintsPerElement number of uints output per invocation + * @param inputData the input data + * @param checkFunction a functor that takes the output storage buffer data to check result validity + */ +export async function runComputeTest( + t: GPUTest, + wgsl: string, + wgSize: number[], + outputUintsPerElement: number, + inputData: Uint32Array, + checkFunction: (metadata: Uint32Array, output: Uint32Array) => Error | undefined +) { + // Compatibility mode has lower workgroup limits. + const wgThreads = wgSize[0] * wgSize[1] * wgSize[2]; + const { + maxComputeInvocationsPerWorkgroup, + maxComputeWorkgroupSizeX, + maxComputeWorkgroupSizeY, + maxComputeWorkgroupSizeZ, + } = t.device.limits; + t.skipIf( + maxComputeInvocationsPerWorkgroup < wgThreads || + maxComputeWorkgroupSizeX < wgSize[0] || + maxComputeWorkgroupSizeY < wgSize[1] || + maxComputeWorkgroupSizeZ < wgSize[2], + 'Workgroup size too large' + ); + + const inputBuffer = t.makeBufferWithContents( + inputData, + GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + ); + t.trackForCleanup(inputBuffer); + + const outputUints = outputUintsPerElement * wgThreads; + const outputBuffer = t.makeBufferWithContents( + new Uint32Array([...iterRange(outputUints, x => kDataSentinel)]), + GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + ); + t.trackForCleanup(outputBuffer); + + const numMetadata = 2 * wgThreads; + const metadataBuffer = t.makeBufferWithContents( + new Uint32Array([...iterRange(numMetadata, x => kDataSentinel)]), + GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE + ); + + const pipeline = t.device.createComputePipeline({ + layout: 'auto', + compute: { + module: t.device.createShaderModule({ + code: wgsl, + }), + }, + }); + const bg = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { + binding: 0, + resource: { + buffer: inputBuffer, + }, + }, + { + binding: 1, + resource: { + buffer: outputBuffer, + }, + }, + { + binding: 2, + resource: { + buffer: metadataBuffer, + }, + }, + ], + }); + + const encoder = t.device.createCommandEncoder(); + const pass = encoder.beginComputePass(); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bg); + pass.dispatchWorkgroups(1, 1, 1); + pass.end(); + t.queue.submit([encoder.finish()]); + + const metadataReadback = await t.readGPUBufferRangeTyped(metadataBuffer, { + srcByteOffset: 0, + type: Uint32Array, + typedLength: numMetadata, + method: 'copy', + }); + const metadata = metadataReadback.data; + + const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, { + srcByteOffset: 0, + type: Uint32Array, + typedLength: outputUints, + method: 'copy', + }); + const output = outputReadback.data; + + t.expectOK(checkFunction(metadata, output)); +} + +// Minimum size is [3, 3]. +export const kFramebufferSizes = [ + [15, 15], + [16, 16], + [17, 17], + [19, 13], + [13, 10], + [111, 3], + [3, 111], + [35, 3], + [3, 35], + [53, 13], + [13, 53], + [3, 3], +] as const; + +/** + * Runs a subgroup builtin test for fragment shaders + * + * This test draws a full screen triangle. + * Tests should avoid checking the last row or column to avoid helper + * invocations. Underlying APIs do not consistently guarantee whether + * helper invocations participate in subgroup operations. + * @param t The base test + * @param format The framebuffer format + * @param fsShader The fragment shader with the following interface: + * Location 0 output is framebuffer with format + * Group 0 binding 0 is input data + * @param width The framebuffer width + * @param height The framebuffer height + * @param inputData The input data + * @param checker A functor to check the framebuffer values + */ +export async function runFragmentTest( + t: SubgroupTest, + format: GPUTextureFormat, + fsShader: string, + width: number, + height: number, + inputData: Uint32Array | Float32Array | Float16Array, + checker: (data: Uint32Array) => Error | undefined +) { + const vsShader = ` +@vertex +fn vsMain(@builtin(vertex_index) index : u32) -> @builtin(position) vec4f { + const vertices = array( + vec2(-2, 4), vec2(-2, -4), vec2(2, 0), + ); + return vec4f(vec2f(vertices[index]), 0, 1); +}`; + + assert(width >= 3, 'Minimum width is 3'); + assert(height >= 3, 'Minimum height is 3'); + const pipeline = t.device.createRenderPipeline({ + layout: 'auto', + vertex: { + module: t.device.createShaderModule({ code: vsShader }), + }, + fragment: { + module: t.device.createShaderModule({ code: fsShader }), + targets: [{ format }], + }, + primitive: { + topology: 'triangle-list', + }, + }); + + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + assert(bytesPerBlock !== undefined); + + const blocksPerRow = width / blockWidth; + const blocksPerColumn = height / blockHeight; + // 256 minimum arises from image copy requirements. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const byteLength = bytesPerRow * blocksPerColumn; + const uintLength = byteLength / 4; + + const buffer = t.makeBufferWithContents( + inputData, + GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST + ); + + const bg = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { + binding: 0, + resource: { + buffer, + }, + }, + ], + }); + + const framebuffer = t.createTextureTracked({ + size: [width, height], + usage: + GPUTextureUsage.COPY_SRC | + GPUTextureUsage.COPY_DST | + GPUTextureUsage.RENDER_ATTACHMENT | + GPUTextureUsage.TEXTURE_BINDING, + format, + }); + + const encoder = t.device.createCommandEncoder(); + const pass = encoder.beginRenderPass({ + colorAttachments: [ + { + view: framebuffer.createView(), + loadOp: 'clear', + storeOp: 'store', + }, + ], + }); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bg); + pass.draw(3); + pass.end(); + t.queue.submit([encoder.finish()]); + + const copyBuffer = t.copyWholeTextureToNewBufferSimple(framebuffer, 0); + const readback = await t.readGPUBufferRangeTyped(copyBuffer, { + srcByteOffset: 0, + type: Uint32Array, + typedLength: uintLength, + method: 'copy', + }); + const data: Uint32Array = readback.data; + + t.expectOK(checker(data)); +} diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts index 40b331efaba9..fddb65e1420f 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureGather.spec.ts @@ -1,6 +1,8 @@ export const description = ` Execution tests for the 'textureGather' builtin function +- TODO: Test un-encodable formats. + A texture gather operation reads from a 2D, 2D array, cube, or cube array texture, computing a four-component vector as follows: * Find the four texels that would be used in a sampling operation with linear filtering, from mip level 0: - Use the specified coordinate, array index (when present), and offset (when present). @@ -23,11 +25,35 @@ A texture gather operation reads from a 2D, 2D array, cube, or cube array textur `; import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; -import { GPUTest } from '../../../../../gpu_test.js'; +import { + isDepthTextureFormat, + isEncodableTextureFormat, + kCompressedTextureFormats, + kDepthStencilFormats, + kEncodableTextureFormats, +} from '../../../../../format_info.js'; + +import { + appendComponentTypeForFormatToTextureType, + checkCallResults, + chooseTextureSize, + createTextureWithRandomDataAndGetTexels, + doTextureCalls, + generateSamplePointsCube, + generateTextureBuiltinInputs2D, + isFillable, + kCubeSamplePointMethods, + kSamplePointMethods, + skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice, + TextureCall, + vec2, + vec3, + WGSLTextureSampleTest, +} from './texture_utils.js'; -import { generateCoordBoundaries, generateOffsets } from './utils.js'; +const kTestableColorFormats = [...kEncodableTextureFormats, ...kCompressedTextureFormats] as const; -export const g = makeTestGroup(GPUTest); +export const g = makeTestGroup(WGSLTextureSampleTest); g.test('sampled_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') @@ -55,22 +81,78 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('T', ['f32-only', 'i32', 'u32'] as const) - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) + .combine('format', kTestableColorFormats) + .filter(t => isFillable(t.format)) + .combine('minFilter', ['nearest', 'linear'] as const) + .beginSubcases() .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - .combine('coords', generateCoordBoundaries(2)) - .combine('offset', generateOffsets(2)) + .combine('samplePoints', kSamplePointMethods) + .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) + .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) + .combine('offset', [false, true] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureFormatNotSupported(t.params.format); + skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.minFilter, t.params.format); + }) + .fn(async t => { + const { format, C, samplePoints, addressModeU, addressModeV, minFilter, offset } = t.params; + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height }, + mipLevelCount: 3, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU, + addressModeV, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureGather', + sampler, + descriptor, + offset, + component: true, + hashInputs: [format, C, samplePoints, addressModeU, addressModeV, minFilter, offset], + }).map(({ coords, component, offset }) => { + return { + builtin: 'textureGather', + coordType: 'f', + coords, + component, + componentType: C === 'i32' ? 'i' : 'u', + offset, + }; + }); + const textureType = appendComponentTypeForFormatToTextureType('texture_2d', format); + const viewDescriptor = {}; + const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results + ); + t.expectOK(res); + }); g.test('sampled_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') .desc( ` -C: i32, u32 T: i32, u32, f32 fn textureGather(component: C, t: texture_cube, s: sampler, coords: vec3) -> vec4 @@ -85,15 +167,75 @@ Parameters: * coords: The texture coordinates ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('T', ['f32-only', 'i32', 'u32'] as const) - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) + .combine('format', kTestableColorFormats) + .filter(t => isFillable(t.format)) + .combine('minFilter', ['nearest', 'linear'] as const) + .beginSubcases() .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - .combine('coords', generateCoordBoundaries(3)) + .combine('samplePoints', kCubeSamplePointMethods) + .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureFormatNotSupported(t.params.format); + skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.minFilter, t.params.format); + }) + .fn(async t => { + const { format, C, samplePoints, addressMode, minFilter } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube'; + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + const depthOrArrayLayers = 6; + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size: { width, height, depthOrArrayLayers }, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: addressMode, + addressModeV: addressMode, + addressModeW: addressMode, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + component: true, + textureBuiltin: 'textureGather', + hashInputs: [format, C, samplePoints, addressMode, minFilter], + }).map(({ coords, component }) => { + return { + builtin: 'textureGather', + component, + componentType: C === 'i32' ? 'i' : 'u', + coordType: 'f', + coords, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = appendComponentTypeForFormatToTextureType('texture_cube', format); + const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results + ); + t.expectOK(res); + }); g.test('sampled_array_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') @@ -122,17 +264,79 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('T', ['f32-only', 'i32', 'u32'] as const) - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) + .combine('format', kTestableColorFormats) + .filter(t => isFillable(t.format)) + .combine('minFilter', ['nearest', 'linear'] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - .combine('coords', generateCoordBoundaries(2)) - /* array_index not param'd as out-of-bounds is implementation specific */ - .combine('offset', generateOffsets(2)) + .combine('A', ['i32', 'u32'] as const) + .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) + .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) + .combine('offset', [false, true] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureFormatNotSupported(t.params.format); + skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.minFilter, t.params.format); + }) + .fn(async t => { + const { format, samplePoints, C, A, addressModeU, addressModeV, minFilter, offset } = t.params; + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const depthOrArrayLayers = 4; + + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height, depthOrArrayLayers }, + mipLevelCount: 3, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU, + addressModeV, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureGather', + sampler, + descriptor, + arrayIndex: { num: texture.depthOrArrayLayers, type: A }, + offset, + component: true, + hashInputs: [format, samplePoints, C, A, addressModeU, addressModeV, minFilter, offset], + }).map(({ coords, component, arrayIndex, offset }) => { + return { + builtin: 'textureGather', + component, + componentType: C === 'i32' ? 'i' : 'u', + coordType: 'f', + coords, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + offset, + }; + }); + const textureType = appendComponentTypeForFormatToTextureType('texture_2d_array', format); + const viewDescriptor = {}; + const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results + ); + t.expectOK(res); + }); g.test('sampled_array_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') @@ -140,8 +344,9 @@ g.test('sampled_array_3d_coords') ` C: i32, u32 T: i32, u32, f32 +A: i32, u32 -fn textureGather(component: C, t: texture_cube_array, s: sampler, coords: vec3, array_index: C) -> vec4 +fn textureGather(component: C, t: texture_cube_array, s: sampler, coords: vec3, array_index: A) -> vec4 Parameters: * component: @@ -154,17 +359,79 @@ Parameters: * array_index: The 0-based texture array index ` ) - .paramsSubcasesOnly( - u => - u - .combine('T', ['f32-only', 'i32', 'u32'] as const) - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4] as const) - .combine('coords', generateCoordBoundaries(3)) - /* array_index not param'd as out-of-bounds is implementation specific */ + .params(u => + u + .combine('format', kTestableColorFormats) + .filter(t => isFillable(t.format)) + .combine('minFilter', ['nearest', 'linear'] as const) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('C', ['i32', 'u32'] as const) + .combine('A', ['i32', 'u32'] as const) + .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureFormatNotSupported(t.params.format); + t.skipIfTextureViewDimensionNotSupported('cube-array'); + skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.minFilter, t.params.format); + }) + .fn(async t => { + const { format, C, A, samplePoints, addressMode, minFilter } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube-array'; + const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: addressMode, + addressModeV: addressMode, + addressModeW: addressMode, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + component: true, + textureBuiltin: 'textureGather', + arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A }, + hashInputs: [format, C, samplePoints, addressMode, minFilter], + }).map(({ coords, component, arrayIndex }) => { + return { + builtin: 'textureGather', + component, + componentType: C === 'i32' ? 'i' : 'u', + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + coordType: 'f', + coords, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = appendComponentTypeForFormatToTextureType('texture_cube_array', format); + const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results + ); + t.expectOK(res); + }); g.test('depth_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') @@ -185,13 +452,68 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(2)) - .combine('offset', generateOffsets(2)) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('minFilter', ['nearest', 'linear'] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) + .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) + .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) + .combine('offset', [false, true] as const) ) - .unimplemented(); + .fn(async t => { + const { format, samplePoints, addressModeU, addressModeV, minFilter, offset } = t.params; + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height }, + mipLevelCount: 3, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU, + addressModeV, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureGather', + sampler, + descriptor, + offset, + hashInputs: [format, samplePoints, addressModeU, addressModeV, minFilter, offset], + }).map(({ coords, offset }) => { + return { + builtin: 'textureGather', + coordType: 'f', + coords, + offset, + }; + }); + const textureType = 'texture_depth_2d'; + const viewDescriptor = {}; + const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results + ); + t.expectOK(res); + }); g.test('depth_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') @@ -205,21 +527,79 @@ Parameters: * coords: The texture coordinates ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(3)) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('minFilter', ['nearest', 'linear'] as const) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) ) - .unimplemented(); + .fn(async t => { + const { format, samplePoints, addressMode, minFilter } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube'; + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + const depthOrArrayLayers = 6; + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size: { width, height, depthOrArrayLayers }, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: addressMode, + addressModeV: addressMode, + addressModeW: addressMode, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + textureBuiltin: 'textureGather', + hashInputs: [format, samplePoints, addressMode, minFilter], + }).map(({ coords, component }) => { + return { + builtin: 'textureGather', + coordType: 'f', + coords, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = 'texture_depth_cube'; + const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results + ); + t.expectOK(res); + }); g.test('depth_array_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') .desc( ` -C: i32, u32 +A: i32, u32 -fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: C) -> vec4 -fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: C, offset: vec2) -> vec4 +fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: A) -> vec4 +fn textureGather(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: A, offset: vec2) -> vec4 Parameters: * t: The depth texture to read from @@ -234,23 +614,86 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('C', ['i32', 'u32'] as const) - .combine('coords', generateCoordBoundaries(2)) - /* array_index not param'd as out-of-bounds is implementation specific */ - .combine('offset', generateOffsets(2)) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('minFilter', ['nearest', 'linear'] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) + .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) + .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) + .combine('offset', [false, true] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureFormatNotSupported(t.params.format); + skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice(t, t.params.minFilter, t.params.format); + }) + .fn(async t => { + const { format, samplePoints, A, addressModeU, addressModeV, minFilter, offset } = t.params; + + // We want at least 4 blocks or something wide enough for 3 mip levels. + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const depthOrArrayLayers = 4; + + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height, depthOrArrayLayers }, + mipLevelCount: 3, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU, + addressModeV, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureGather', + sampler, + descriptor, + arrayIndex: { num: texture.depthOrArrayLayers, type: A }, + offset, + hashInputs: [format, samplePoints, A, addressModeU, addressModeV, minFilter, offset], + }).map(({ coords, arrayIndex, offset }) => { + return { + builtin: 'textureGather', + coordType: 'f', + coords, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + offset, + }; + }); + const textureType = 'texture_depth_2d_array'; + const viewDescriptor = {}; + const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results + ); + t.expectOK(res); + }); g.test('depth_array_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegather') .desc( ` -C: i32, u32 +A: i32, u32 -fn textureGather(t: texture_depth_cube_array, s: sampler, coords: vec3, array_index: C) -> vec4 +fn textureGather(t: texture_depth_cube_array, s: sampler, coords: vec3, array_index: A) -> vec4 Parameters: * t: The depth texture to read from @@ -259,12 +702,73 @@ Parameters: * array_index: The 0-based texture array index ` ) - .paramsSubcasesOnly( - u => - u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('C', ['i32', 'u32'] as const) - .combine('coords', generateCoordBoundaries(3)) - /* array_index not param'd as out-of-bounds is implementation specific */ + .params(u => + u + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('minFilter', ['nearest', 'linear'] as const) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) + .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureViewDimensionNotSupported('cube-array'); + }) + .fn(async t => { + const { format, A, samplePoints, addressMode, minFilter } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube-array'; + const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + mipLevelCount: 3, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: addressMode, + addressModeV: addressMode, + addressModeW: addressMode, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + textureBuiltin: 'textureGather', + arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A }, + hashInputs: [format, samplePoints, addressMode, minFilter], + }).map(({ coords, arrayIndex }) => { + return { + builtin: 'textureGather', + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + coordType: 'f', + coords, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = 'texture_depth_cube_array'; + const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results + ); + t.expectOK(res); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts index c743883ce849..89891738b0f6 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureGatherCompare.spec.ts @@ -17,20 +17,38 @@ A texture gather compare operation performs a depth comparison on four texels in `; import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; -import { GPUTest } from '../../../../../gpu_test.js'; +import { kCompareFunctions } from '../../../../../capability_info.js'; +import { + isDepthTextureFormat, + isEncodableTextureFormat, + kDepthStencilFormats, +} from '../../../../../format_info.js'; -import { generateCoordBoundaries, generateOffsets } from './utils.js'; +import { + checkCallResults, + chooseTextureSize, + createTextureWithRandomDataAndGetTexels, + doTextureCalls, + generateSamplePointsCube, + generateTextureBuiltinInputs2D, + kCubeSamplePointMethods, + kSamplePointMethods, + TextureCall, + vec2, + vec3, + WGSLTextureSampleTest, +} from './texture_utils.js'; -export const g = makeTestGroup(GPUTest); +export const g = makeTestGroup(WGSLTextureSampleTest); g.test('array_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare') .desc( ` -C: i32, u32 +A: i32, u32 -fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: C, depth_ref: f32) -> vec4 -fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: C, depth_ref: f32, offset: vec2) -> vec4 +fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: A, depth_ref: f32) -> vec4 +fn textureGatherCompare(t: texture_depth_2d_array, s: sampler_comparison, coords: vec2, array_index: A, depth_ref: f32, offset: vec2) -> vec4 Parameters: * t: The depth texture to read from @@ -46,24 +64,88 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4]) - .combine('coords', generateCoordBoundaries(2)) - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) - .combine('offset', generateOffsets(2)) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('minFilter', ['nearest', 'linear'] as const) + .beginSubcases() + .combine('samplePoints', kSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) + .combine('addressModeU', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) + .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) + .combine('compare', kCompareFunctions) + .combine('offset', [false, true] as const) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureFormatNotSupported(t.params.format); + }) + .fn(async t => { + const { format, samplePoints, A, addressModeU, addressModeV, minFilter, compare, offset } = + t.params; + + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const depthOrArrayLayers = 4; + + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height, depthOrArrayLayers }, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU, + addressModeV, + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureGatherCompare', + sampler, + descriptor, + arrayIndex: { num: texture.depthOrArrayLayers, type: A }, + depthRef: true, + offset, + hashInputs: [format, samplePoints, A, addressModeU, addressModeV, minFilter, offset], + }).map(({ coords, arrayIndex, depthRef, offset }) => { + return { + builtin: 'textureGatherCompare', + coordType: 'f', + coords, + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + depthRef, + offset, + }; + }); + const textureType = 'texture_depth_2d_array'; + const viewDescriptor = {}; + const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results + ); + t.expectOK(res); + }); g.test('array_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare') .desc( ` -C: i32, u32 +A: i32, u32 -fn textureGatherCompare(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3, array_index: C, depth_ref: f32) -> vec4 +fn textureGatherCompare(t: texture_depth_cube_array, s: sampler_comparison, coords: vec3, array_index: A, depth_ref: f32) -> vec4 Parameters: * t: The depth texture to read from @@ -73,17 +155,81 @@ Parameters: * depth_ref: The reference value to compare the sampled depth value against ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('C', ['i32', 'u32'] as const) - .combine('C_value', [-1, 0, 1, 2, 3, 4]) - .combine('coords', generateCoordBoundaries(3)) - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('minFilter', ['nearest', 'linear'] as const) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('A', ['i32', 'u32'] as const) + .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) + .combine('compare', kCompareFunctions) ) - .unimplemented(); + .beforeAllSubcases(t => { + t.skipIfTextureViewDimensionNotSupported('cube-array'); + }) + .fn(async t => { + const { format, A, samplePoints, addressMode, minFilter, compare } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube-array'; + const size = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: addressMode, + addressModeV: addressMode, + addressModeW: addressMode, + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; -g.test('sampled_array_2d_coords') + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + textureBuiltin: 'textureGatherCompare', + arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A }, + depthRef: true, + hashInputs: [format, samplePoints, addressMode, minFilter], + }).map(({ coords, depthRef, arrayIndex }) => { + return { + builtin: 'textureGatherCompare', + arrayIndex, + arrayIndexType: A === 'i32' ? 'i' : 'u', + coordType: 'f', + coords, + depthRef, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = 'texture_depth_cube_array'; + const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results + ); + t.expectOK(res); + }); + +g.test('sampled_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare') .desc( ` @@ -103,16 +249,72 @@ Parameters: Values outside of this range will result in a shader-creation error. ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(2)) - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) - .combine('offset', generateOffsets(2)) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('minFilter', ['nearest', 'linear'] as const) + .beginSubcases() + .combine('C', ['i32', 'u32'] as const) + .combine('samplePoints', kSamplePointMethods) + .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) + .combine('compare', kCompareFunctions) + .combine('offset', [false, true] as const) ) - .unimplemented(); + .fn(async t => { + const { format, C, samplePoints, addressMode, compare, minFilter, offset } = t.params; + + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 4, format }); + const descriptor: GPUTextureDescriptor = { + format, + size: { width, height }, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: addressMode, + addressModeV: addressMode, + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateTextureBuiltinInputs2D(50, { + method: samplePoints, + textureBuiltin: 'textureGatherCompare', + sampler, + descriptor, + offset, + depthRef: true, + hashInputs: [format, C, samplePoints, addressMode, minFilter, compare, offset], + }).map(({ coords, depthRef, offset }) => { + return { + builtin: 'textureGatherCompare', + coordType: 'f', + coords, + depthRef, + offset, + }; + }); + const textureType = 'texture_depth_2d'; + const viewDescriptor = {}; + const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results + ); + t.expectOK(res); + }); -g.test('sampled_array_3d_coords') +g.test('sampled_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturegathercompare') .desc( ` @@ -125,10 +327,70 @@ Parameters: * depth_ref: The reference value to compare the sampled depth value against ` ) - .paramsSubcasesOnly(u => + .params(u => u - .combine('S', ['clamp-to-edge', 'repeat', 'mirror-repeat']) - .combine('coords', generateCoordBoundaries(3)) - .combine('depth_ref', [-1 /* smaller ref */, 0 /* equal ref */, 1 /* larger ref */] as const) + .combine('format', kDepthStencilFormats) + // filter out stencil only formats + .filter(t => isDepthTextureFormat(t.format)) + // MAINTENANCE_TODO: Remove when support for depth24plus, depth24plus-stencil8, and depth32float-stencil8 is added. + .filter(t => isEncodableTextureFormat(t.format)) + .combine('minFilter', ['nearest', 'linear'] as const) + .beginSubcases() + .combine('samplePoints', kCubeSamplePointMethods) + .combine('addressMode', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) + .combine('compare', kCompareFunctions) ) - .unimplemented(); + .fn(async t => { + const { format, samplePoints, addressMode, minFilter, compare } = t.params; + + const viewDimension: GPUTextureViewDimension = 'cube'; + const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + const depthOrArrayLayers = 6; + + const descriptor: GPUTextureDescriptor = { + format, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), + size: { width, height, depthOrArrayLayers }, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, + }; + const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); + const sampler: GPUSamplerDescriptor = { + addressModeU: addressMode, + addressModeV: addressMode, + addressModeW: addressMode, + compare, + minFilter, + magFilter: minFilter, + mipmapFilter: minFilter, + }; + + const calls: TextureCall[] = generateSamplePointsCube(50, { + method: samplePoints, + sampler, + descriptor, + depthRef: true, + textureBuiltin: 'textureGatherCompare', + hashInputs: [format, samplePoints, addressMode, minFilter, compare], + }).map(({ coords, depthRef }) => { + return { + builtin: 'textureGatherCompare', + coordType: 'f', + coords, + depthRef, + }; + }); + const viewDescriptor = { + dimension: viewDimension, + }; + const textureType = 'texture_depth_cube'; + const results = await doTextureCalls(t, texture, viewDescriptor, textureType, sampler, calls); + const res = await checkCallResults( + t, + { texels, descriptor, viewDescriptor }, + textureType, + sampler, + calls, + results + ); + t.expectOK(res); + }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts index 879817ec8ca3..2f42fffeb768 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureLoad.spec.ts @@ -167,8 +167,7 @@ Parameters: .params(u => u .combine('format', kTestableColorFormats) - // MAINTENANCE_TODO: Update createTextureFromTexelViews to support stencil8 and remove this filter. - .filter(t => t.format !== 'stencil8' && !isCompressedFloatTextureFormat(t.format)) + .filter(t => !isCompressedFloatTextureFormat(t.format)) .beginSubcases() .combine('samplePoints', kSamplePointMethods) .combine('C', ['i32', 'u32'] as const) @@ -188,10 +187,7 @@ Parameters: const descriptor: GPUTextureDescriptor = { format, size, - usage: - GPUTextureUsage.COPY_DST | - GPUTextureUsage.TEXTURE_BINDING | - (canUseAsRenderTarget(format) ? GPUTextureUsage.RENDER_ATTACHMENT : 0), + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, mipLevelCount: maxMipLevelCount({ size }), }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); @@ -422,10 +418,7 @@ Parameters: const descriptor: GPUTextureDescriptor = { format, size, - usage: - GPUTextureUsage.COPY_DST | - GPUTextureUsage.TEXTURE_BINDING | - GPUTextureUsage.RENDER_ATTACHMENT, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, mipLevelCount: maxMipLevelCount({ size }), }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); @@ -478,6 +471,9 @@ Parameters: .combine('C', ['i32', 'u32'] as const) .combine('L', ['i32', 'u32'] as const) ) + .beforeAllSubcases(t => + t.skipIf(typeof VideoFrame === 'undefined', 'VideoFrames are not supported') + ) .fn(async t => { const { samplePoints, C, L } = t.params; @@ -490,6 +486,7 @@ Parameters: size, usage: GPUTextureUsage.COPY_DST, }; + const { texels, videoFrame } = createVideoFrameWithRandomDataAndGetTexels(descriptor.size); const texture = t.device.importExternalTexture({ source: videoFrame }); diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts index ca7ae3d0655c..500376321444 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureNumLayers.spec.ts @@ -51,13 +51,13 @@ Parameters .params(u => u .combine('texture_type', ['texture_2d_array', 'texture_cube_array'] as const) + .combine('view_type', ['full', 'partial'] as const) .beginSubcases() .combine('sampled_type', ['f32', 'i32', 'u32'] as const) - .combine('view_type', ['full', 'partial'] as const) ) .beforeAllSubcases(t => { t.skipIf( - t.isCompatibility && t.params.view === 'partial', + t.isCompatibility && t.params.view_type === 'partial', 'compatibility mode does not support partial layer views' ); t.skipIf( @@ -110,12 +110,11 @@ Parameters .params(u => u .combine('texture_type', ['texture_depth_2d_array', 'texture_depth_cube_array'] as const) - .beginSubcases() .combine('view_type', ['full', 'partial'] as const) ) .beforeAllSubcases(t => { t.skipIf( - t.isCompatibility && t.params.view === 'partial', + t.isCompatibility && t.params.view_type === 'partial', 'compatibility mode does not support partial layer views' ); t.skipIf( @@ -184,14 +183,20 @@ Parameters .params(u => u .combineWithParams(TexelFormats) + .combine('view_type', ['full', 'partial'] as const) .beginSubcases() .combine('access_mode', ['read', 'write', 'read_write'] as const) .filter( t => t.access_mode !== 'read_write' || kTextureFormatInfo[t.format].color?.readWriteStorage ) - .combine('view_type', ['full', 'partial'] as const) ) - .beforeAllSubcases(t => t.skipIfTextureFormatNotUsableAsStorageTexture(t.params.format)) + .beforeAllSubcases(t => { + t.skipIf( + t.isCompatibility && t.params.view_type === 'partial', + 'compatibility mode does not support partial layer views' + ); + t.skipIfTextureFormatNotUsableAsStorageTexture(t.params.format); + }) .fn(t => { const { format, access_mode, view_type } = t.params; diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts index 5610701601cb..471a462504d4 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureNumLevels.spec.ts @@ -88,6 +88,7 @@ Parameters const texture = t.createTextureTracked({ format, dimension, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), usage: GPUTextureUsage.TEXTURE_BINDING, size: { width, @@ -157,6 +158,7 @@ Parameters const texture = t.createTextureTracked({ format: 'depth32float', dimension, + ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), usage: GPUTextureUsage.TEXTURE_BINDING, size: { width, diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts index 452c3b4df710..b670c44035f9 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleBaseClampToEdge.spec.ts @@ -61,6 +61,12 @@ Parameters: .combine('addressModeV', ['clamp-to-edge', 'repeat', 'mirror-repeat'] as const) .combine('minFilter', ['nearest', 'linear'] as const) ) + .beforeAllSubcases(t => + t.skipIf( + t.params.textureType === 'texture_external' && typeof VideoFrame === 'undefined', + 'VideoFrames are not supported' + ) + ) .fn(async t => { const { textureType, samplePoints, addressModeU, addressModeV, minFilter } = t.params; diff --git a/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts index 729563553260..5c6e99eb9665 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/textureSampleLevel.spec.ts @@ -5,15 +5,6 @@ Must only be used in a fragment shader stage. Must only be invoked in uniform control flow. - TODO: Test un-encodable formats. -- TODO: set mipLevelCount to 3 for cubemaps. See MAINTENANCE_TODO below - - The issue is sampling a corner of a cubemap is undefined. We try to quantize coordinates - so we never get a corner but when sampling smaller mip levels that's more difficult unless we make the textures - larger. Larger is slower. - - Solution 1: Fix the quantization - Solution 2: special case checking cube corners. Expect some value between the color of the 3 corner texels. - `; import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; @@ -284,7 +275,7 @@ Parameters: .fn(async t => { const { format, viewDimension, samplePoints, addressMode, minFilter, offset } = t.params; - const [width, height] = chooseTextureSize({ minSize: 8, minBlocks: 2, format, viewDimension }); + const [width, height] = chooseTextureSize({ minSize: 32, minBlocks: 2, format, viewDimension }); const depthOrArrayLayers = getDepthOrArrayLayersForViewDimension(viewDimension); const descriptor: GPUTextureDescriptor = { @@ -293,8 +284,7 @@ Parameters: ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), size: { width, height, depthOrArrayLayers }, usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, - // MAINTENANCE_TODO: make mipLevelCount always 3 - mipLevelCount: viewDimension === 'cube' ? 1 : 3, + mipLevelCount: 3, }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); const sampler: GPUSamplerDescriptor = { @@ -396,7 +386,7 @@ Parameters: const viewDimension: GPUTextureViewDimension = 'cube-array'; const size = chooseTextureSize({ - minSize: 8, + minSize: 32, minBlocks: 4, format, viewDimension, @@ -405,8 +395,7 @@ Parameters: format, size, usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, - // MAINTENANCE_TODO: Set this to 3. See above. - mipLevelCount: 1, + mipLevelCount: 3, }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); const sampler: GPUSamplerDescriptor = { @@ -423,8 +412,8 @@ Parameters: sampler, descriptor, mipLevel: { num: texture.mipLevelCount, type: 'f32' }, - arrayIndex: { num: texture.depthOrArrayLayers, type: A }, - hashInputs: [format, viewDimension, samplePoints, addressMode, minFilter], + arrayIndex: { num: texture.depthOrArrayLayers / 6, type: A }, + hashInputs: [format, viewDimension, A, samplePoints, addressMode, minFilter], }).map(({ coords, mipLevel, arrayIndex }) => { return { builtin: 'textureSampleLevel', @@ -456,7 +445,7 @@ g.test('depth_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplelevel') .desc( ` -C is i32 or u32 +L is i32 or u32 fn textureSampleLevel(t: texture_depth_2d, s: sampler, coords: vec2, level: L) -> f32 fn textureSampleLevel(t: texture_depth_2d, s: sampler, coords: vec2, level: L, offset: vec2) -> f32 @@ -504,10 +493,7 @@ Parameters: format, size: { width, height }, mipLevelCount: 3, - usage: - GPUTextureUsage.COPY_DST | - GPUTextureUsage.TEXTURE_BINDING | - GPUTextureUsage.RENDER_ATTACHMENT, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); const sampler: GPUSamplerDescriptor = { @@ -553,7 +539,8 @@ g.test('depth_array_2d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplelevel') .desc( ` -C is i32 or u32 +A is i32 or u32 +L is i32 or u32 fn textureSampleLevel(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: A, level: L) -> f32 fn textureSampleLevel(t: texture_depth_2d_array, s: sampler, coords: vec2, array_index: A, level: L, offset: vec2) -> f32 @@ -603,10 +590,7 @@ Parameters: format, size: { width, height }, mipLevelCount: 3, - usage: - GPUTextureUsage.COPY_DST | - GPUTextureUsage.TEXTURE_BINDING | - GPUTextureUsage.RENDER_ATTACHMENT, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, ...(t.isCompatibility && { textureBindingViewDimension: '2d-array' }), }; const { texels, texture } = await createTextureWithRandomDataAndGetTexels(t, descriptor); @@ -656,7 +640,8 @@ g.test('depth_3d_coords') .specURL('https://www.w3.org/TR/WGSL/#texturesamplelevel') .desc( ` -C is i32 or u32 +L is i32 or u32 +A is i32 or u32 fn textureSampleLevel(t: texture_depth_cube, s: sampler, coords: vec3, level: L) -> f32 fn textureSampleLevel(t: texture_depth_cube_array, s: sampler, coords: vec3, array_index: A, level: L) -> f32 @@ -704,7 +689,7 @@ Parameters: const { format, viewDimension, samplePoints, A, L, addressMode, minFilter } = t.params; const size = chooseTextureSize({ - minSize: 8, + minSize: 32, minBlocks: 4, format, viewDimension, @@ -712,10 +697,7 @@ Parameters: const descriptor: GPUTextureDescriptor = { format, size, - usage: - GPUTextureUsage.COPY_DST | - GPUTextureUsage.TEXTURE_BINDING | - GPUTextureUsage.RENDER_ATTACHMENT, + usage: GPUTextureUsage.COPY_DST | GPUTextureUsage.TEXTURE_BINDING, mipLevelCount: 3, ...(t.isCompatibility && { textureBindingViewDimension: viewDimension }), }; @@ -733,8 +715,8 @@ Parameters: method: samplePoints, sampler, descriptor, - mipLevel: { num: texture.mipLevelCount, type: L }, - arrayIndex: A ? { num: texture.depthOrArrayLayers, type: A } : undefined, + mipLevel: { num: texture.mipLevelCount - 1, type: L }, + arrayIndex: A ? { num: texture.depthOrArrayLayers / 6, type: A } : undefined, hashInputs: [format, viewDimension, samplePoints, addressMode, minFilter], }).map(({ coords, mipLevel, arrayIndex }) => { return { diff --git a/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts b/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts index e997833a137f..f5e6b55dd23c 100644 --- a/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts +++ b/src/webgpu/shader/execution/expression/call/builtin/texture_utils.ts @@ -1,10 +1,11 @@ -import { keysOf } from '../../../../../../common/util/data_tables.js'; import { assert, range, unreachable } from '../../../../../../common/util/util.js'; import { EncodableTextureFormat, isCompressedFloatTextureFormat, isCompressedTextureFormat, isDepthOrStencilTextureFormat, + isDepthTextureFormat, + isStencilTextureFormat, kEncodableTextureFormats, kTextureFormatInfo, } from '../../../../../format_info.js'; @@ -77,16 +78,52 @@ export function getTextureTypeForTextureViewDimension(viewDimension: GPUTextureV } } +const is32Float = (format: GPUTextureFormat) => + format === 'r32float' || format === 'rg32float' || format === 'rgba32float'; + +/** + * Skips a subcase if the filter === 'linear' and the format is type + * 'unfilterable-float' and we cannot enable filtering. + */ +export function skipIfNeedsFilteringAndIsUnfilterableOrSelectDevice( + t: GPUTestSubcaseBatchState, + filter: GPUFilterMode, + format: GPUTextureFormat +) { + const features = new Set(); + features.add(kTextureFormatInfo[format].feature); + + if (filter === 'linear') { + t.skipIf(isDepthTextureFormat(format), 'depth texture are unfilterable'); + + const type = kTextureFormatInfo[format].color?.type; + if (type === 'unfilterable-float') { + assert(is32Float(format)); + features.add('float32-filterable'); + } + } + + if (features.size > 0) { + t.selectDeviceOrSkipTestCase(Array.from(features)); + } +} + +/** + * Returns if a texture format can be filled with random data. + */ +export function isFillable(format: GPUTextureFormat) { + // We can't easily put random bytes into compressed textures if they are float formats + // since we want the range to be +/- 1000 and not +/- infinity or NaN. + return !isCompressedTextureFormat(format) || !format.endsWith('float'); +} + /** * Returns if a texture format can potentially be filtered and can be filled with random data. */ export function isPotentiallyFilterableAndFillable(format: GPUTextureFormat) { const type = kTextureFormatInfo[format].color?.type; const canPotentiallyFilter = type === 'float' || type === 'unfilterable-float'; - // We can't easily put random bytes into compressed textures if they are float formats - // since we want the range to be +/- 1000 and not +/- infinity or NaN. - const isFillable = !isCompressedTextureFormat(format) || !format.endsWith('float'); - return canPotentiallyFilter && isFillable; + return canPotentiallyFilter && isFillable(format); } /** @@ -105,6 +142,156 @@ export function skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable( } } +async function queryMipGradientValuesForDevice(t: GPUTest) { + const { device } = t; + const module = device.createShaderModule({ + code: ` + @group(0) @binding(0) var tex: texture_2d; + @group(0) @binding(1) var smp: sampler; + @group(0) @binding(2) var result: array; + + @vertex fn vs(@builtin(vertex_index) vNdx: u32) -> @builtin(position) vec4f { + let pos = array( + vec2f(-1, 3), + vec2f( 3, -1), + vec2f(-1, -1), + ); + return vec4f(pos[vNdx], 0, 1); + } + @fragment fn fs(@builtin(position) pos: vec4f) -> @location(0) vec4f { + let mipLevel = floor(pos.x) / ${kMipGradientSteps}; + result[u32(pos.x)] = textureSampleLevel(tex, smp, vec2f(0.5), mipLevel).r; + return vec4f(0); + } + `, + }); + + const pipeline = device.createRenderPipeline({ + layout: 'auto', + vertex: { module }, + fragment: { module, targets: [{ format: 'rgba8unorm' }] }, + }); + + const target = t.createTextureTracked({ + size: [kMipGradientSteps + 1, 1, 1], + format: 'rgba8unorm', + usage: GPUTextureUsage.RENDER_ATTACHMENT, + }); + + const texture = t.createTextureTracked({ + size: [2, 2, 1], + format: 'r8unorm', + usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST, + mipLevelCount: 2, + }); + + device.queue.writeTexture( + { texture, mipLevel: 1 }, + new Uint8Array([255]), + { bytesPerRow: 1 }, + [1, 1] + ); + + const sampler = device.createSampler({ + minFilter: 'linear', + magFilter: 'linear', + mipmapFilter: 'linear', + }); + + const storageBuffer = t.createBufferTracked({ + size: 4 * (kMipGradientSteps + 1), + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); + + const resultBuffer = t.createBufferTracked({ + size: storageBuffer.size, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, + }); + + const bindGroup = device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: texture.createView() }, + { binding: 1, resource: sampler }, + { binding: 2, resource: { buffer: storageBuffer } }, + ], + }); + + const encoder = device.createCommandEncoder(); + const pass = encoder.beginRenderPass({ + colorAttachments: [ + { + view: target.createView(), + loadOp: 'clear', + storeOp: 'store', + }, + ], + }); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bindGroup); + pass.draw(3); + pass.end(); + encoder.copyBufferToBuffer(storageBuffer, 0, resultBuffer, 0, resultBuffer.size); + device.queue.submit([encoder.finish()]); + + await resultBuffer.mapAsync(GPUMapMode.READ); + const weights = Array.from(new Float32Array(resultBuffer.getMappedRange())); + resultBuffer.unmap(); + + texture.destroy(); + storageBuffer.destroy(); + resultBuffer.destroy(); + + const showWeights = () => weights.map((v, i) => `${i.toString().padStart(2)}: ${v}`).join('\n'); + + // Validate the weights + assert(weights[0] === 0, `weight 0 expected 0 but was ${weights[0]}\n${showWeights()}`); + assert( + weights[kMipGradientSteps] === 1, + `top weight expected 1 but was ${weights[kMipGradientSteps]}\n${showWeights()}` + ); + assert( + Math.abs(weights[kMipGradientSteps / 2] - 0.5) < 0.0001, + `middle weight expected approximately 0.5 but was ${ + weights[kMipGradientSteps / 2] + }\n${showWeights()}` + ); + + // Note: for 16 steps, these are the AMD weights + // + // standard + // step mipLevel gpu AMD + // ---- -------- -------- ---------- + // 0: 0 0 0 + // 1: 0.0625 0.0625 0 + // 2: 0.125 0.125 0.03125 + // 3: 0.1875 0.1875 0.109375 + // 4: 0.25 0.25 0.1875 + // 5: 0.3125 0.3125 0.265625 + // 6: 0.375 0.375 0.34375 + // 7: 0.4375 0.4375 0.421875 + // 8: 0.5 0.5 0.5 + // 9: 0.5625 0.5625 0.578125 + // 10: 0.625 0.625 0.65625 + // 11: 0.6875 0.6875 0.734375 + // 12: 0.75 0.75 0.8125 + // 13: 0.8125 0.8125 0.890625 + // 14: 0.875 0.875 0.96875 + // 15: 0.9375 0.9375 1 + // 16: 1 1 1 + // + // notice step 1 is 0 and step 15 is 1. + // so we only check the 1 through 14. + for (let i = 1; i < kMipGradientSteps - 1; ++i) { + assert( + weights[i] < weights[i + 1], + `weight[${i}] was not less than < weight[${i + 1}]\n${showWeights()}` + ); + } + + s_deviceToMipGradientValues.set(device, weights); +} + /** * Gets the mip gradient values for the current device. * The issue is, different GPUs have different ways of mixing between mip levels. @@ -121,123 +308,28 @@ export function skipIfTextureFormatNotSupportedNotAvailableOrNotFilterable( * * There's an assumption that the gradient will be the same for all formats * and usages. + * + * Note: The code below has 2 maps. One device->Promise, the other device->weights + * device->weights is meant to be used synchronously by other code so we don't + * want to leave initMipGradientValuesForDevice until the weights have been read. + * But, multiple subcases will run because this function is async. So, subcase 1 + * runs, hits this init code, this code waits for the weights. Then, subcase 2 + * runs and hits this init code. The weights will not be in the device->weights map + * yet which is why we have the device->Promise map. This is so subcase 2 waits + * for subcase 1's "query the weights" step. Otherwise, all subcases would do the + * "get the weights" step separately. */ const kMipGradientSteps = 16; +const s_deviceToMipGradientValuesPromise = new WeakMap>(); const s_deviceToMipGradientValues = new WeakMap(); async function initMipGradientValuesForDevice(t: GPUTest) { const { device } = t; - const weights = s_deviceToMipGradientValues.get(device); - if (!weights) { - const module = device.createShaderModule({ - code: ` - @group(0) @binding(0) var tex: texture_2d; - @group(0) @binding(1) var smp: sampler; - @group(0) @binding(2) var result: array; - - @compute @workgroup_size(1) fn cs(@builtin(global_invocation_id) id: vec3u) { - let mipLevel = f32(id.x) / ${kMipGradientSteps}; - result[id.x] = textureSampleLevel(tex, smp, vec2f(0.5), mipLevel).r; - } - `, - }); - - const pipeline = device.createComputePipeline({ - layout: 'auto', - compute: { module }, - }); - - const texture = t.createTextureTracked({ - size: [2, 2, 1], - format: 'r8unorm', - usage: GPUTextureUsage.TEXTURE_BINDING | GPUTextureUsage.COPY_DST, - mipLevelCount: 2, - }); - - device.queue.writeTexture( - { texture, mipLevel: 1 }, - new Uint8Array([255]), - { bytesPerRow: 1 }, - [1, 1] - ); - - const sampler = device.createSampler({ - minFilter: 'linear', - magFilter: 'linear', - mipmapFilter: 'linear', - }); - - const storageBuffer = t.createBufferTracked({ - size: 4 * (kMipGradientSteps + 1), - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, - }); - - const resultBuffer = t.createBufferTracked({ - size: storageBuffer.size, - usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, - }); - - const bindGroup = device.createBindGroup({ - layout: pipeline.getBindGroupLayout(0), - entries: [ - { binding: 0, resource: texture.createView() }, - { binding: 1, resource: sampler }, - { binding: 2, resource: { buffer: storageBuffer } }, - ], - }); - - const encoder = device.createCommandEncoder(); - const pass = encoder.beginComputePass(); - pass.setPipeline(pipeline); - pass.setBindGroup(0, bindGroup); - pass.dispatchWorkgroups(kMipGradientSteps + 1); - pass.end(); - encoder.copyBufferToBuffer(storageBuffer, 0, resultBuffer, 0, resultBuffer.size); - device.queue.submit([encoder.finish()]); - - await resultBuffer.mapAsync(GPUMapMode.READ); - const weights = Array.from(new Float32Array(resultBuffer.getMappedRange())); - resultBuffer.unmap(); - - texture.destroy(); - storageBuffer.destroy(); - resultBuffer.destroy(); - - // Validate the weights - assert(weights[0] === 0); - assert(weights[kMipGradientSteps] === 1); - assert(weights[kMipGradientSteps / 2] === 0.5); - - // Note: for 16 steps, these are the AMD weights - // - // standard - // step mipLevel gpu AMD - // ---- -------- -------- ---------- - // 0: 0 0 1 - // 1: 0.0625 0.0625 0 - // 2: 0.125 0.125 0.03125 - // 3: 0.1875 0.1875 0.109375 - // 4: 0.25 0.25 0.1875 - // 5: 0.3125 0.3125 0.265625 - // 6: 0.375 0.375 0.34375 - // 7: 0.4375 0.4375 0.421875 - // 8: 0.5 0.5 0.5 - // 9: 0.5625 0.5625 0.578125 - // 10: 0.625 0.625 0.65625 - // 11: 0.6875 0.6875 0.734375 - // 12: 0.75 0.75 0.8125 - // 13: 0.8125 0.8125 0.890625 - // 14: 0.875 0.875 0.96875 - // 15: 0.9375 0.9375 1 - // 16: 1 1 1 - // - // notice step 1 is 0 and step 15 is 1. - // so we only check the 1 through 14. - for (let i = 1; i < kMipGradientSteps - 1; ++i) { - assert(weights[i] < weights[i + 1]); - } - - s_deviceToMipGradientValues.set(device, weights); + let weightsP = s_deviceToMipGradientValuesPromise.get(device); + if (!weightsP) { + weightsP = queryMipGradientValuesForDevice(t); + s_deviceToMipGradientValuesPromise.set(device, weightsP); } + return await weightsP; } function getWeightForMipLevel(t: GPUTest, mipLevelCount: number, mipLevel: number) { @@ -457,33 +549,55 @@ export type Dimensionality = vec1 | vec2 | vec3; type TextureCallArgKeys = keyof TextureCallArgs; const kTextureCallArgNames: readonly TextureCallArgKeys[] = [ + 'component', 'coords', 'arrayIndex', 'sampleIndex', 'mipLevel', 'ddx', 'ddy', + 'depthRef', 'offset', ] as const; export interface TextureCallArgs { + component?: number; coords?: T; mipLevel?: number; arrayIndex?: number; sampleIndex?: number; + depthRef?: number; ddx?: T; ddy?: T; offset?: T; } +export type TextureBuiltin = + | 'textureGather' + | 'textureGatherCompare' + | 'textureLoad' + | 'textureSample' + | 'textureSampleBaseClampToEdge' + | 'textureSampleLevel'; + export interface TextureCall extends TextureCallArgs { - builtin: 'textureLoad' | 'textureSample' | 'textureSampleBaseClampToEdge' | 'textureSampleLevel'; + builtin: TextureBuiltin; coordType: 'f' | 'i' | 'u'; levelType?: 'i' | 'u' | 'f'; arrayIndexType?: 'i' | 'u'; sampleIndexType?: 'i' | 'u'; + componentType?: 'i' | 'u'; } +const isBuiltinComparison = (builtin: TextureBuiltin) => builtin === 'textureGatherCompare'; +const isBuiltinGather = (builtin: TextureBuiltin | undefined) => + builtin === 'textureGather' || builtin === 'textureGatherCompare'; +const builtinNeedsSampler = (builtin: TextureBuiltin) => + builtin.startsWith('textureSample') || builtin.startsWith('textureGather'); + +const isCubeViewDimension = (viewDescriptor?: GPUTextureViewDescriptor) => + viewDescriptor?.dimension === 'cube' || viewDescriptor?.dimension === 'cube-array'; + const s_u32 = new Uint32Array(1); const s_f32 = new Float32Array(s_u32.buffer); const s_i32 = new Int32Array(s_u32.buffer); @@ -512,6 +626,9 @@ function getCallArgType( switch (argName) { case 'coords': return call.coordType; + case 'component': + assert(call.componentType !== undefined); + return call.componentType; case 'mipLevel': assert(call.levelType !== undefined); return call.levelType; @@ -521,6 +638,7 @@ function getCallArgType( case 'sampleIndex': assert(call.sampleIndexType !== undefined); return call.sampleIndexType; + case 'depthRef': case 'ddx': case 'ddy': return 'f'; @@ -634,6 +752,37 @@ function zeroValuePerTexelComponent(components: TexelComponent[]) { return out; } +const kSamplerFns: Record boolean> = { + never: (ref: number, v: number) => false, + less: (ref: number, v: number) => ref < v, + equal: (ref: number, v: number) => ref === v, + 'less-equal': (ref: number, v: number) => ref <= v, + greater: (ref: number, v: number) => ref > v, + 'not-equal': (ref: number, v: number) => ref !== v, + 'greater-equal': (ref: number, v: number) => ref >= v, + always: (ref: number, v: number) => true, +} as const; + +function applyCompare( + call: TextureCall, + sampler: GPUSamplerDescriptor | undefined, + components: TexelComponent[], + src: PerTexelComponent +): PerTexelComponent { + if (isBuiltinComparison(call.builtin)) { + assert(sampler !== undefined); + assert(call.depthRef !== undefined); + const out: PerTexelComponent = {}; + const compareFn = kSamplerFns[sampler.compare!]; + for (const component of components) { + out[component] = compareFn(call.depthRef, src[component]!) ? 1 : 0; + } + return out; + } else { + return src; + } +} + /** * Returns the expect value for a WGSL builtin texture function for a single * mip level @@ -661,10 +810,7 @@ export function softwareTextureReadMipLevel( sampler?.addressModeW ?? 'clamp-to-edge', ]; - const isCube = - texture.viewDescriptor.dimension === 'cube' || - texture.viewDescriptor.dimension === 'cube-array'; - + const isCube = isCubeViewDimension(texture.viewDescriptor); const arrayIndexMult = isCube ? 6 : 1; const numLayers = textureSize[2] / arrayIndexMult; assert(numLayers % 1 === 0); @@ -684,6 +830,8 @@ export function softwareTextureReadMipLevel( }; switch (call.builtin) { + case 'textureGather': + case 'textureGatherCompare': case 'textureSample': case 'textureSampleBaseClampToEdge': case 'textureSampleLevel': { @@ -714,7 +862,7 @@ export function softwareTextureReadMipLevel( const samples: { at: number[]; weight: number }[] = []; - const filter = sampler?.minFilter ?? 'nearest'; + const filter = isBuiltinGather(call.builtin) ? 'linear' : sampler?.minFilter ?? 'nearest'; switch (filter) { case 'linear': { // 'p0' is the lower texel for 'at' @@ -733,10 +881,11 @@ export function softwareTextureReadMipLevel( samples.push({ at: p1, weight: p1W[0] }); break; case 2: { - samples.push({ at: p0, weight: p0W[0] * p0W[1] }); - samples.push({ at: [p1[0], p0[1]], weight: p1W[0] * p0W[1] }); + // Note: These are ordered to match textureGather samples.push({ at: [p0[0], p1[1]], weight: p0W[0] * p1W[1] }); samples.push({ at: p1, weight: p1W[0] * p1W[1] }); + samples.push({ at: [p1[0], p0[1]], weight: p1W[0] * p0W[1] }); + samples.push({ at: p0, weight: p0W[0] * p0W[1] }); break; } case 3: { @@ -746,10 +895,11 @@ export function softwareTextureReadMipLevel( // the slice they'll be wrapped by wrapFaceCoordToCubeFaceAtEdgeBoundaries // below. if (isCube) { - samples.push({ at: p0, weight: p0W[0] * p0W[1] }); - samples.push({ at: [p1[0], p0[1], p0[2]], weight: p1W[0] * p0W[1] }); + // Note: These are ordered to match textureGather samples.push({ at: [p0[0], p1[1], p0[2]], weight: p0W[0] * p1W[1] }); samples.push({ at: p1, weight: p1W[0] * p1W[1] }); + samples.push({ at: [p1[0], p0[1], p0[2]], weight: p1W[0] * p0W[1] }); + samples.push({ at: p0, weight: p0W[0] * p0W[1] }); const ndx = getUnusedCubeCornerSampleIndex(textureSize[0], coords as vec3); if (ndx >= 0) { // # Issues with corners of cubemaps @@ -783,7 +933,16 @@ export function softwareTextureReadMipLevel( // I'm not sure what "average the values of the three available samples" // means. To me that would be (a+b+c)/3 or in other words, set all the // weights to 0.33333 but that's not what the M1 is doing. - unreachable('corners of cubemaps are not testable'); + // + // We could check that, given the 3 texels at the corner, if all 3 texels + // are the same value then the result must be the same value. Otherwise, + // the result must be between the 3 values. For now, the code that + // chooses test coordinates avoids corners. This has the restriction + // that the smallest mip level be at least 4x4 so there are some non + // corners to choose from. + unreachable( + `corners of cubemaps are not testable:\n ${describeTextureCall(call)}` + ); } } else { const p = [p0, p1]; @@ -813,16 +972,33 @@ export function softwareTextureReadMipLevel( unreachable(); } + if (isBuiltinGather(call.builtin)) { + const componentNdx = call.component ?? 0; + assert(componentNdx >= 0 && componentNdx < 4); + assert(samples.length === 4); + const component = kRGBAComponents[componentNdx]; + const out: PerTexelComponent = {}; + samples.forEach((sample, i) => { + const c = isCube + ? wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize[0], sample.at as vec3) + : applyAddressModesToCoords(addressMode, textureSize, sample.at); + const v = load(c); + const postV = applyCompare(call, sampler, rep.componentOrder, v); + const rgba = convertPerTexelComponentToResultFormat(postV, format); + out[kRGBAComponents[i]] = rgba[component]; + }); + return out; + } + const out: PerTexelComponent = {}; - const ss = []; for (const sample of samples) { const c = isCube ? wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize[0], sample.at as vec3) : applyAddressModesToCoords(addressMode, textureSize, sample.at); const v = load(c); - ss.push(v); + const postV = applyCompare(call, sampler, rep.componentOrder, v); for (const component of rep.componentOrder) { - out[component] = (out[component] ?? 0) + v[component]! * sample.weight; + out[component] = (out[component] ?? 0) + postV[component]! * sample.weight; } } @@ -856,7 +1032,8 @@ export function softwareTextureReadLevel( return softwareTextureReadMipLevel(call, texture, sampler, mipLevel); } - switch (sampler.mipmapFilter) { + const effectiveMipmapFilter = isBuiltinGather(call.builtin) ? 'nearest' : sampler.mipmapFilter; + switch (effectiveMipmapFilter) { case 'linear': { const clampedMipLevel = clamp(mipLevel, { min: 0, max: maxLevel }); const baseMipLevel = Math.floor(clampedMipLevel); @@ -1090,6 +1267,50 @@ function texelsApproximatelyEqual( return true; } +// If it's `textureGather` then we need to convert all values to one component. +// In other words, imagine the format is rg11b10ufloat. If it was +// `textureSample` we'd have `r11, g11, b10, a=1` but for `textureGather` +// +// component = 0 => `r11, r11, r11, r11` +// component = 1 => `g11, g11, g11, g11` +// component = 2 => `b10, b10, b10, b10` +// +// etc..., each from a different texel +// +// The Texel utils don't handle this. So if `component = 2` we take each value, +// copy it to the `B` component, run it through the texel utils so it returns +// the correct ULP for a 10bit float (not an 11 bit float). Then copy it back to +// the channel it came from. +function getULPFromZeroForComponents( + rgba: PerTexelComponent, + format: EncodableTextureFormat, + builtin: TextureBuiltin, + componentNdx?: number +): PerTexelComponent { + const rep = kTexelRepresentationInfo[format]; + if (isBuiltinGather(builtin)) { + const out: PerTexelComponent = {}; + const component = kRGBAComponents[componentNdx ?? 0]; + const temp: PerTexelComponent = { R: 0, G: 0, B: 0, A: 1 }; + for (const comp of kRGBAComponents) { + temp[component] = rgba[comp]; + const texel = convertResultFormatToTexelViewFormat(temp, format); + const ulp = convertPerTexelComponentToResultFormat( + rep.bitsToULPFromZero(rep.numberToBits(texel)), + format + ); + out[comp] = ulp[component]; + } + return out; + } else { + const texel = convertResultFormatToTexelViewFormat(rgba, format); + return convertPerTexelComponentToResultFormat( + rep.bitsToULPFromZero(rep.numberToBits(texel)), + format + ); + } +} + /** * Checks the result of each call matches the expected result. */ @@ -1099,11 +1320,10 @@ export async function checkCallResults( textureType: string, sampler: GPUSamplerDescriptor | undefined, calls: TextureCall[], - results: PerTexelComponent[] + results: Awaited>> ) { const errs: string[] = []; const format = texture.texels[0].format; - const rep = kTexelRepresentationInfo[format]; const size = reifyExtent3D(texture.descriptor.size); const maxFractionalDiff = sampler?.minFilter === 'linear' || @@ -1112,11 +1332,21 @@ export async function checkCallResults( ? getMaxFractionalDiffForTextureFormat(texture.descriptor.format) : 0; - for (let callIdx = 0; callIdx < calls.length && errs.length === 0; callIdx++) { + for (let callIdx = 0; callIdx < calls.length; callIdx++) { const call = calls[callIdx]; - const gotRGBA = results[callIdx]; + const gotRGBA = results.results[callIdx]; const expectRGBA = softwareTextureReadLevel(t, call, texture, sampler, call.mipLevel ?? 0); + // The spec says depth and stencil have implementation defined values for G, B, and A + // so if this is `textureGather` and component > 0 then there's nothing to check. + if ( + isDepthOrStencilTextureFormat(format) && + isBuiltinGather(call.builtin) && + call.component! > 0 + ) { + continue; + } + if (texelsApproximatelyEqual(gotRGBA, expectRGBA, format, maxFractionalDiff)) { continue; } @@ -1125,33 +1355,86 @@ export async function checkCallResults( continue; } - const got = convertResultFormatToTexelViewFormat(gotRGBA, format); - const expect = convertResultFormatToTexelViewFormat(expectRGBA, format); - const gULP = rep.bitsToULPFromZero(rep.numberToBits(got)); - const eULP = rep.bitsToULPFromZero(rep.numberToBits(expect)); - for (const component of rep.componentOrder) { - const g = got[component]!; - const e = expect[component]!; + const gULP = getULPFromZeroForComponents(gotRGBA, format, call.builtin, call.component); + const eULP = getULPFromZeroForComponents(expectRGBA, format, call.builtin, call.component); + + // from the spec: https://gpuweb.github.io/gpuweb/#reading-depth-stencil + // depth and stencil values are D, ?, ?, ? + const rgbaComponentsToCheck = + isBuiltinGather(call.builtin) || !isDepthOrStencilTextureFormat(format) + ? kRGBAComponents + : kRComponent; + + let bad = false; + const diffs = rgbaComponentsToCheck.map(component => { + const g = gotRGBA[component]!; + const e = expectRGBA[component]!; const absDiff = Math.abs(g - e); const ulpDiff = Math.abs(gULP[component]! - eULP[component]!); - const relDiff = absDiff / Math.max(Math.abs(g), Math.abs(e)); + assert(!Number.isNaN(ulpDiff)); + const maxAbs = Math.max(Math.abs(g), Math.abs(e)); + const relDiff = maxAbs > 0 ? absDiff / maxAbs : 0; if (ulpDiff > 3 && absDiff > maxFractionalDiff) { - const desc = describeTextureCall(call); - errs.push(`component was not as expected: + bad = true; + } + return { absDiff, relDiff, ulpDiff }; + }); + + const isFloatType = (format: GPUTextureFormat) => { + const info = kTextureFormatInfo[format]; + return info.color?.type === 'float' || info.depth?.type === 'depth'; + }; + const fix5 = (n: number) => (isFloatType(format) ? n.toFixed(5) : n.toString()); + const fix5v = (arr: number[]) => arr.map(v => fix5(v)).join(', '); + const rgbaToArray = (p: PerTexelComponent): number[] => + rgbaComponentsToCheck.map(component => p[component]!); + + if (bad) { + const desc = describeTextureCall(call); + errs.push(`result was not as expected: size: [${size.width}, ${size.height}, ${size.depthOrArrayLayers}] mipCount: ${texture.descriptor.mipLevelCount ?? 1} - call: ${desc} // #${callIdx} - component: ${component} - got: ${g} - expected: ${e} - abs diff: ${absDiff.toFixed(4)} - rel diff: ${(relDiff * 100).toFixed(2)}% - ulp diff: ${ulpDiff} + call: ${desc} // #${callIdx}`); + if (isCubeViewDimension(texture.viewDescriptor)) { + const coord = convertCubeCoordToNormalized3DTextureCoord(call.coords as vec3); + const faceNdx = Math.floor(coord[2] * 6); + errs.push(` : as 3D texture coord: (${coord[0]}, ${coord[1]}, ${coord[2]})`); + for (let mipLevel = 0; mipLevel < (texture.descriptor.mipLevelCount ?? 1); ++mipLevel) { + const mipSize = virtualMipSize( + texture.descriptor.dimension ?? '2d', + texture.descriptor.size, + mipLevel + ); + const t = coord.slice(0, 2).map((v, i) => (v * mipSize[i]).toFixed(3)); + errs.push( + ` : as texel coord mip level[${mipLevel}]: (${t[0]}, ${t[1]}), face: ${faceNdx}(${kFaceNames[faceNdx]})` + ); + } + } else { + for (let mipLevel = 0; mipLevel < (texture.descriptor.mipLevelCount ?? 1); ++mipLevel) { + const mipSize = virtualMipSize( + texture.descriptor.dimension ?? '2d', + texture.descriptor.size, + mipLevel + ); + const t = call.coords!.map((v, i) => (v * mipSize[i]).toFixed(3)); + errs.push(` : as texel coord @ mip level[${mipLevel}]: (${t.join(', ')})`); + } + } + errs.push(`\ + got: ${fix5v(rgbaToArray(gotRGBA))} + expected: ${fix5v(rgbaToArray(expectRGBA))} + max diff: ${maxFractionalDiff} + abs diffs: ${fix5v(diffs.map(({ absDiff }) => absDiff))} + rel diffs: ${diffs.map(({ relDiff }) => `${(relDiff * 100).toFixed(2)}%`).join(', ')} + ulp diffs: ${diffs.map(({ ulpDiff }) => ulpDiff).join(', ')} `); - if (sampler) { + + if (sampler) { + if (t.rec.debugging) { const expectedSamplePoints = [ 'expected:', - ...(await identifySamplePoints(texture, (texels: TexelView[]) => { + ...(await identifySamplePoints(texture, call, (texels: TexelView[]) => { return Promise.resolve( softwareTextureReadLevel( t, @@ -1169,13 +1452,9 @@ export async function checkCallResults( ]; const gotSamplePoints = [ 'got:', - ...(await identifySamplePoints(texture, async (texels: TexelView[]) => { - const gpuTexture = createTextureFromTexelViews(t, texels, texture.descriptor); - const result = ( - await doTextureCalls(t, gpuTexture, texture.viewDescriptor, textureType, sampler, [ - call, - ]) - )[0]; + ...(await identifySamplePoints(texture, call, async (texels: TexelView[]) => { + const gpuTexture = createTextureFromTexelViewsLocal(t, texels, texture.descriptor); + const result = (await results.run(gpuTexture))[callIdx]; gpuTexture.destroy(); return result; })), @@ -1184,9 +1463,16 @@ export async function checkCallResults( errs.push(layoutTwoColumns(expectedSamplePoints, gotSamplePoints).join('\n')); errs.push('', ''); } - } - } - } + } // if (sampler) + + // Don't report the other errors. There 50 sample points per subcase and + // 50-100 subcases so the log would get enormous if all 50 fail. One + // report per subcase is enough. + break; + } // if (bad) + } // for cellNdx + + results.destroy(); return errs.length > 0 ? new Error(errs.join('\n')) : undefined; } @@ -1763,6 +2049,19 @@ export async function readTextureToTexelViews( return texelViews; } +function createTextureFromTexelViewsLocal( + t: GPUTest, + texelViews: TexelView[], + desc: Omit +): GPUTexture { + const modifiedDescriptor = { ...desc }; + // If it's a depth or stencil texture we need to render to it to fill it with data. + if (isDepthOrStencilTextureFormat(texelViews[0].format)) { + modifiedDescriptor.usage = desc.usage | GPUTextureUsage.RENDER_ATTACHMENT; + } + return createTextureFromTexelViews(t, texelViews, modifiedDescriptor); +} + /** * Fills a texture with random data and returns that data as * an array of TexelView. @@ -1791,14 +2090,14 @@ export async function createTextureWithRandomDataAndGetTexels( return { texture, texels }; } else { const texels = createRandomTexelViewMipmap(descriptor); - const texture = createTextureFromTexelViews(t, texels, descriptor); + const texture = createTextureFromTexelViewsLocal(t, texels, descriptor); return { texture, texels }; } } function valueIfAllComponentsAreEqual( c: PerTexelComponent, - componentOrder: TexelComponent[] + componentOrder: readonly TexelComponent[] ) { const s = new Set(componentOrder.map(component => c[component]!)); return s.size === 1 ? s.values().next().value : undefined; @@ -1893,12 +2192,13 @@ const kFaceNames = ['+x', '-x', '+y', '-y', '+z', '-z'] as const; * a: at: [7, 1], weights: [R: 0.75000] * b: at: [7, 2], weights: [R: 0.25000] */ -async function identifySamplePoints( +async function identifySamplePoints( texture: Texture, + call: TextureCall, run: (texels: TexelView[]) => Promise> ) { const info = texture.descriptor; - const isCube = texture.viewDescriptor.dimension === 'cube'; + const isCube = isCubeViewDimension(texture.viewDescriptor); const mipLevelCount = texture.descriptor.mipLevelCount ?? 1; const mipLevelSize = range(mipLevelCount, mipLevel => virtualMipSize(texture.descriptor.dimension ?? '2d', texture.descriptor.size, mipLevel) @@ -1934,6 +2234,11 @@ async function identifySamplePoints( ) as EncodableTextureFormat; const rep = kTexelRepresentationInfo[format]; + const components = isBuiltinGather(call.builtin) ? kRGBAComponents : rep.componentOrder; + const convertResultAsAppropriate = isBuiltinGather(call.builtin) + ? (v: T) => v + : convertResultFormatToTexelViewFormat; + // Identify all the texels that are sampled, and their weights. const sampledTexelWeights = new Map>(); const unclassifiedStack = [new Set(range(numTexels, v => v))]; @@ -1951,8 +2256,8 @@ async function identifySamplePoints( unclassifiedStack.push(setB); } - // See if any of the texels in setA were sampled. - const results = convertResultFormatToTexelViewFormat( + // See if any of the texels in setA were sampled.0 + const results = convertResultAsAppropriate( await run( range(mipLevelCount, mipLevel => TexelView.fromTexelsAsColors( @@ -1978,7 +2283,7 @@ async function identifySamplePoints( ), format ); - if (rep.componentOrder.some(c => results[c] !== 0)) { + if (components.some(c => results[c] !== 0)) { // One or more texels of setA were sampled. if (setA.size === 1) { // We identified a specific texel was sampled. @@ -2040,13 +2345,20 @@ async function identifySamplePoints( for (let layer = 0; layer < depthOrArrayLayers; ++layer) { const layerEntries = level[layer]; - if (!layerEntries) { - continue; - } const orderedTexelIndices: number[] = []; lines.push(''); - lines.push(`layer: ${layer}${isCube ? ` (${kFaceNames[layer]})` : ''}`); + const unSampled = layerEntries ? '' : 'un-sampled'; + if (isCube) { + const face = kFaceNames[layer % 6]; + lines.push(`layer: ${layer}, cube-layer: ${(layer / 6) | 0} (${face}) ${unSampled}`); + } else { + lines.push(`layer: ${unSampled}`); + } + + if (!layerEntries) { + continue; + } { let line = ' '; @@ -2099,11 +2411,11 @@ async function identifySamplePoints( const weights = layerEntries.get(texelIdx)!; const y = Math.floor(texelIdx / texelsPerRow); const x = texelIdx % texelsPerRow; - const singleWeight = valueIfAllComponentsAreEqual(weights, rep.componentOrder); + const singleWeight = valueIfAllComponentsAreEqual(weights, components); const w = singleWeight !== undefined ? `weight: ${fix5(singleWeight)}` - : `weights: [${rep.componentOrder.map(c => `${c}: ${fix5(weights[c]!)}`).join(', ')}]`; + : `weights: [${components.map(c => `${c}: ${fix5(weights[c]!)}`).join(', ')}]`; const coord = `${pad2(x)}, ${pad2(y)}, ${pad2(layer)}`; lines.push(`${letter(idCount + i)}: mip(${mipLevel}) at: [${coord}], ${w}`); }); @@ -2163,7 +2475,9 @@ export function chooseTextureSize({ const width = align(Math.max(minSize, blockWidth * minBlocks), blockWidth); const height = align(Math.max(minSize, blockHeight * minBlocks), blockHeight); if (viewDimension === 'cube' || viewDimension === 'cube-array') { - const size = lcm(width, height); + const blockLCM = lcm(blockWidth, blockHeight); + const largest = Math.max(width, height); + const size = align(largest, blockLCM); return [size, size, viewDimension === 'cube-array' ? 24 : 6]; } const depthOrArrayLayers = getDepthOrArrayLayersForViewDimension(viewDimension); @@ -2177,11 +2491,14 @@ export const kCubeSamplePointMethods = ['cube-edges', 'texel-centre', 'spiral'] export type CubeSamplePointMethods = (typeof kSamplePointMethods)[number]; type TextureBuiltinInputArgs = { + textureBuiltin?: TextureBuiltin; descriptor: GPUTextureDescriptor; sampler?: GPUSamplerDescriptor; mipLevel?: RangeDef; sampleIndex?: RangeDef; arrayIndex?: RangeDef; + component?: boolean; + depthRef?: boolean; offset?: boolean; hashInputs: (number | string | boolean)[]; }; @@ -2201,7 +2518,15 @@ function generateTextureBuiltinInputsImpl( radius?: number; loops?: number; }) -): { coords: T; mipLevel: number; sampleIndex?: number; arrayIndex?: number; offset?: T }[] { +): { + coords: T; + mipLevel: number; + sampleIndex?: number; + arrayIndex?: number; + offset?: T; + component?: number; + depthRef?: number; +}[] { const { method, descriptor } = args; const dimension = descriptor.dimension ?? '2d'; const mipLevelCount = descriptor.mipLevelCount ?? 1; @@ -2234,14 +2559,15 @@ function generateTextureBuiltinInputsImpl( typeof v === 'string' ? sumOfCharCodesOfString(v) : typeof v === 'boolean' ? (v ? 1 : 0) : v ); const makeRangeValue = ({ num, type }: RangeDef, ...hashInputs: number[]) => { - const range = num + type === 'u32' ? 1 : 2; + const range = num + (type === 'u32' ? 1 : 2); const number = (hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range - (type === 'u32' ? 0 : 1); return type === 'f32' ? number : Math.floor(number); }; - const makeIntHashValue = (min: number, max: number, ...hashInputs: number[]) => { + // Generates the same values per coord instead of using all the extra `_hashInputs`. + const makeIntHashValueRepeatable = (min: number, max: number, ...hashInputs: number[]) => { const range = max - min; - return min + Math.floor((hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range); + return min + Math.floor((hashU32(...hashInputs) / 0x1_0000_0000) * range); }; // Samplers across devices use different methods to interpolate. @@ -2253,7 +2579,77 @@ function generateTextureBuiltinInputsImpl( // Linux, AMD Radeon Pro WX 3200: 256 // MacOS, M1 Mac: 256 const kSubdivisionsPerTexel = 4; - const nearest = !args.sampler || args.sampler.minFilter === 'nearest'; + + // When filtering is nearest then we want to avoid edges of texels + // + // U + // | + // +---+---+---+---+---+---+---+---+ + // | | A | B | | | | | | + // +---+---+---+---+---+---+---+---+ + // + // Above, coordinate U could sample either A or B + // + // U + // | + // +---+---+---+---+---+---+---+---+ + // | | A | B | C | | | | | + // +---+---+---+---+---+---+---+---+ + // + // For textureGather we want to avoid texel centers + // as for coordinate U could either gather A,B or B,C. + + const avoidEdgeCase = + !args.sampler || args.sampler.minFilter === 'nearest' || isBuiltinGather(args.textureBuiltin); + const edgeRemainder = isBuiltinGather(args.textureBuiltin) ? kSubdivisionsPerTexel / 2 : 0; + + // textureGather issues for 2d/3d textures + // + // If addressModeU is repeat, then on an 8x1 texture, u = 0.01 or u = 0.99 + // would gather these texels + // + // +---+---+---+---+---+---+---+---+ + // | * | | | | | | | * | + // +---+---+---+---+---+---+---+---+ + // + // If addressModeU is clamp-to-edge or mirror-repeat, + // then on an 8x1 texture, u = 0.01 would gather this texel + // + // +---+---+---+---+---+---+---+---+ + // | * | | | | | | | | + // +---+---+---+---+---+---+---+---+ + // + // and 0.99 would gather this texel + // + // +---+---+---+---+---+---+---+---+ + // | | | | | | | | * | + // +---+---+---+---+---+---+---+---+ + // + // This means we have to if addressMode is not `repeat`, we + // need to avoid the edge of the texture. + // + // Note: we don't have these specific issues with cube maps + // as they ignore addressMode + const euclideanModulo = (n: number, m: number) => ((n % m) + m) % m; + const addressMode: GPUAddressMode[] = + args.textureBuiltin === 'textureSampleBaseClampToEdge' + ? ['clamp-to-edge', 'clamp-to-edge', 'clamp-to-edge'] + : [ + args.sampler?.addressModeU ?? 'clamp-to-edge', + args.sampler?.addressModeV ?? 'clamp-to-edge', + args.sampler?.addressModeW ?? 'clamp-to-edge', + ]; + const avoidTextureEdge = (axis: number, textureDimensionUnits: number, v: number) => { + assert(isBuiltinGather(args.textureBuiltin)); + if (addressMode[axis] === 'repeat') { + return v; + } + const inside = euclideanModulo(v, textureDimensionUnits); + const outside = v - inside; + return outside + clamp(inside, { min: 1, max: textureDimensionUnits - 1 }); + }; + + const numComponents = isDepthOrStencilTextureFormat(descriptor.format) ? 1 : 4; return coords.map((c, i) => { const mipLevel = args.mipLevel ? quantizeMipLevel(makeRangeValue(args.mipLevel, i), args.sampler?.mipmapFilter ?? 'nearest') @@ -2265,11 +2661,13 @@ function generateTextureBuiltinInputsImpl( const coords = c.map((v, i) => { // Quantize to kSubdivisionsPerPixel const v1 = Math.floor(v * q[i]); - // If it's nearest and we're on the edge of a texel then move us off the edge - // since the edge could choose one texel or another in nearest mode - const v2 = nearest && v1 % kSubdivisionsPerTexel === 0 ? v1 + 1 : v1; + // If it's nearest or textureGather and we're on the edge of a texel then move us off the edge + // since the edge could choose one texel or another. + const isTexelEdgeCase = Math.abs(v1 % kSubdivisionsPerTexel) === edgeRemainder; + const v2 = isTexelEdgeCase && avoidEdgeCase ? v1 + 1 : v1; + const v3 = isBuiltinGather(args.textureBuiltin) ? avoidTextureEdge(i, q[i], v2) : v2; // Convert back to texture coords - return v2 / q[i]; + return v3 / q[i]; }) as T; return { @@ -2277,15 +2675,28 @@ function generateTextureBuiltinInputsImpl( mipLevel, sampleIndex: args.sampleIndex ? makeRangeValue(args.sampleIndex, i, 1) : undefined, arrayIndex: args.arrayIndex ? makeRangeValue(args.arrayIndex, i, 2) : undefined, + depthRef: args.depthRef ? makeRangeValue({ num: 1, type: 'f32' }, i, 5) : undefined, offset: args.offset - ? (coords.map((_, j) => makeIntHashValue(-8, 8, i, 3 + j)) as T) + ? (coords.map((_, j) => makeIntHashValueRepeatable(-8, 8, i, 3 + j)) as T) : undefined, + component: args.component ? makeIntHashValueRepeatable(0, numComponents, i, 4) : undefined, }; }); } +/** + * When mipmapFilter === 'nearest' we need to stay away from 0.5 + * because the GPU could decide to choose one mip or the other. + * + * Some example transition values, the value at which the GPU chooses + * mip level 1 over mip level 0: + * + * M1 Mac: 0.515381 + * Intel Mac: 0.49999 + * AMD Mac: 0.5 + */ const kMipEpsilon = 0.02; -function quantizeMipLevel(mipLevel: number, mipmapFilter: GPUFilterMode) { +function quantizeMipLevel(mipLevel: number, mipmapFilter: GPUMipmapFilterMode) { if (mipmapFilter === 'linear') { return mipLevel; } @@ -2395,135 +2806,35 @@ export function convertNormalized3DTexCoordToCubeCoord(uvLayer: vec3) { } /** + * Wrap a texel based face coord across cube faces + * * We have a face texture in texels coord where U/V choose a texel and W chooses the face. * If U/V are outside the size of the texture then, when normalized and converted * to a cube map coordinate, they'll end up pointing to a different face. * * addressMode is effectively ignored for cube * - * +-----------+ - * |0->u | - * |↓ | - * |v +y | - * | (2) | - * | | - * +-----------+-----------+-----------+-----------+ - * |0->u |0->u |0->u |0->u | - * |↓ |↓ |↓ |↓ | - * |v -x |v +z |v +x |v -z | - * | (1) | (4) | (0) | (5) | - * | | | | | - * +-----------+-----------+-----------+-----------+ - * |0->u | - * |↓ | - * |v -y | - * | (3) | - * | | - * +-----------+ + * By converting from a texel based coord to a normalized coord and then to a cube map coord, + * if the texel was outside of the face, the cube map coord will end up pointing to a different + * face. We then convert back cube coord -> normalized face coord -> texel based coord */ -const kFaceConversions = { - u: (textureSize: number, faceCoord: vec3) => faceCoord[0], - v: (textureSize: number, faceCoord: vec3) => faceCoord[1], - 'u+t': (textureSize: number, faceCoord: vec3) => faceCoord[0] + textureSize, - 'u-t': (textureSize: number, faceCoord: vec3) => faceCoord[0] - textureSize, - 'v+t': (textureSize: number, faceCoord: vec3) => faceCoord[1] + textureSize, - 'v-t': (textureSize: number, faceCoord: vec3) => faceCoord[1] - textureSize, - 't-v': (textureSize: number, faceCoord: vec3) => textureSize - faceCoord[1], - '1+u': (textureSize: number, faceCoord: vec3) => 1 + faceCoord[0], - '1+v': (textureSize: number, faceCoord: vec3) => 1 + faceCoord[1], - '-v-1': (textureSize: number, faceCoord: vec3) => -faceCoord[1] - 1, - 't-u-1': (textureSize: number, faceCoord: vec3) => textureSize - faceCoord[0] - 1, - 't-v-1': (textureSize: number, faceCoord: vec3) => textureSize - faceCoord[1] - 1, - '2t-u-1': (textureSize: number, faceCoord: vec3) => textureSize * 2 - faceCoord[0] - 1, - '2t-v-1': (textureSize: number, faceCoord: vec3) => textureSize * 2 - faceCoord[1] - 1, -} as const; -const kFaceConversionEnums = keysOf(kFaceConversions); -type FaceCoordConversion = (typeof kFaceConversionEnums)[number]; - -// For Each face -// face to go if u < 0 -// face to go if u >= textureSize -// face to go if v < 0 -// face to go if v >= textureSize -const kFaceToFaceRemap: { to: number; u: FaceCoordConversion; v: FaceCoordConversion }[][] = [ - // 0 - [ - /* -u */ { to: 4, u: 'u+t', v: 'v' }, - /* +u */ { to: 5, u: 'u-t', v: 'v' }, - /* -v */ { to: 2, u: 'v+t', v: 't-u-1' }, - /* +v */ { to: 3, u: '2t-v-1', v: 'u' }, - ], - // 1 - [ - /* -u */ { to: 5, u: 'u+t', v: 'v' }, - /* +u */ { to: 4, u: 'u-t', v: 'v' }, - /* -v */ { to: 2, u: '-v-1', v: 'u' }, // -1->0, -2->1 -3->2 - /* +v */ { to: 3, u: 't-v', v: 't-u-1' }, - ], - // 2 - [ - /* -u */ { to: 1, u: 'v', v: '1+u' }, - /* +u */ { to: 0, u: 't-v-1', v: 'u-t' }, - /* -v */ { to: 5, u: 't-u-1', v: '-v-1' }, - /* +v */ { to: 4, u: 'u', v: 'v-t' }, - ], - // 3 - [ - /* -u */ { to: 1, u: 't-v-1', v: 'u+t' }, - /* +u */ { to: 0, u: 'v', v: '2t-u-1' }, - /* -v */ { to: 4, u: 'u', v: 'v+t' }, - /* +v */ { to: 5, u: 't-u-1', v: '2t-v-1' }, - ], - // 4 - [ - /* -u */ { to: 1, u: 'u+t', v: 'v' }, - /* +u */ { to: 0, u: 'u-t', v: 'v' }, - /* -v */ { to: 2, u: 'u', v: 'v+t' }, - /* +v */ { to: 3, u: 'u', v: 'v-t' }, - ], - // 5 - [ - /* -u */ { to: 0, u: 'u+t', v: 'v' }, - /* +u */ { to: 1, u: 'u-t', v: 'v' }, - /* -v */ { to: 2, u: 't-u-1', v: '1+v' }, - /* +v */ { to: 3, u: 't-u-1', v: '2t-v-1' }, - ], -]; - -function getFaceWrapIndex(textureSize: number, faceCoord: vec3) { - if (faceCoord[0] < 0) { - return 0; - } - if (faceCoord[0] >= textureSize) { - return 1; - } - if (faceCoord[1] < 0) { - return 2; - } - if (faceCoord[1] >= textureSize) { - return 3; - } - return -1; -} - -function applyFaceWrap(textureSize: number, faceCoord: vec3): vec3 { - const ndx = getFaceWrapIndex(textureSize, faceCoord); - if (ndx < 0) { - return faceCoord; - } - const { to, u, v } = kFaceToFaceRemap[faceCoord[2]][ndx]; - return [ - kFaceConversions[u](textureSize, faceCoord), - kFaceConversions[v](textureSize, faceCoord), - to, +function wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize: number, faceCoord: vec3) { + // convert texel based face coord to normalized 2d-array coord + const nc0: vec3 = [ + (faceCoord[0] + 0.5) / textureSize, + (faceCoord[1] + 0.5) / textureSize, + (faceCoord[2] + 0.5) / 6, + ]; + const cc = convertNormalized3DTexCoordToCubeCoord(nc0); + const nc1 = convertCubeCoordToNormalized3DTextureCoord(cc); + // convert normalized 2d-array coord back texel based face coord + const fc = [ + Math.floor(nc1[0] * textureSize), + Math.floor(nc1[1] * textureSize), + Math.floor(nc1[2] * 6), ]; -} -function wrapFaceCoordToCubeFaceAtEdgeBoundaries(textureSize: number, faceCoord: vec3) { - // If we're off both edges we need to wrap twice, once for each edge. - const faceCoord1 = applyFaceWrap(textureSize, faceCoord); - const faceCoord2 = applyFaceWrap(textureSize, faceCoord1); - return faceCoord2; + return fc; } function applyAddressModesToCoords( @@ -2570,6 +2881,8 @@ export function generateSamplePointsCube( mipLevel: number; arrayIndex?: number; offset?: undefined; + component?: number; + depthRef?: number; }[] { const { method, descriptor } = args; const mipLevelCount = descriptor.mipLevelCount ?? 1; @@ -2610,20 +2923,38 @@ export function generateSamplePointsCube( /* prettier-ignore */ coords.push( // between edges - [-1.01, -1.02, 0], - [ 1.01, -1.02, 0], - [-1.01, 1.02, 0], - [ 1.01, 1.02, 0], - - [-1.01, 0, -1.02], - [ 1.01, 0, -1.02], - [-1.01, 0, 1.02], - [ 1.01, 0, 1.02], - - [-1.01, -1.02, 0], - [ 1.01, -1.02, 0], - [-1.01, 1.02, 0], - [ 1.01, 1.02, 0], + // +x + [ 1 , -1.01, 0 ], // wrap -y + [ 1 , +1.01, 0 ], // wrap +y + [ 1 , 0 , -1.01 ], // wrap -z + [ 1 , 0 , +1.01 ], // wrap +z + // -x + [ -1 , -1.01, 0 ], // wrap -y + [ -1 , +1.01, 0 ], // wrap +y + [ -1 , 0 , -1.01 ], // wrap -z + [ -1 , 0 , +1.01 ], // wrap +z + + // +y + [ -1.01, 1 , 0 ], // wrap -x + [ +1.01, 1 , 0 ], // wrap +x + [ 0 , 1 , -1.01 ], // wrap -z + [ 0 , 1 , +1.01 ], // wrap +z + // -y + [ -1.01, -1 , 0 ], // wrap -x + [ +1.01, -1 , 0 ], // wrap +x + [ 0 , -1 , -1.01 ], // wrap -z + [ 0 , -1 , +1.01 ], // wrap +z + + // +z + [ -1.01, 0 , 1 ], // wrap -x + [ +1.01, 0 , 1 ], // wrap +x + [ 0 , -1.01, 1 ], // wrap -y + [ 0 , +1.01, 1 ], // wrap +y + // -z + [ -1.01, 0 , -1 ], // wrap -x + [ +1.01, 0 , -1 ], // wrap +x + [ 0 , -1.01, -1 ], // wrap -y + [ 0 , +1.01, -1 ], // wrap +y // corners (see comment "Issues with corners of cubemaps") // for why these are commented out. @@ -2644,11 +2975,15 @@ export function generateSamplePointsCube( typeof v === 'string' ? sumOfCharCodesOfString(v) : typeof v === 'boolean' ? (v ? 1 : 0) : v ); const makeRangeValue = ({ num, type }: RangeDef, ...hashInputs: number[]) => { - const range = num + type === 'u32' ? 1 : 2; + const range = num + (type === 'u32' ? 1 : 2); const number = (hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range - (type === 'u32' ? 0 : 1); return type === 'f32' ? number : Math.floor(number); }; + const makeIntHashValue = (min: number, max: number, ...hashInputs: number[]) => { + const range = max - min; + return min + Math.floor((hashU32(..._hashInputs, ...hashInputs) / 0x1_0000_0000) * range); + }; // Samplers across devices use different methods to interpolate. // Quantizing the texture coordinates seems to hit coords that produce @@ -2658,12 +2993,102 @@ export function generateSamplePointsCube( // Win 11, NVidia 2070 Super: 16 // Linux, AMD Radeon Pro WX 3200: 256 // MacOS, M1 Mac: 256 + // + // Note: When doing `textureGather...` we can't use texel centers + // because which 4 pixels will be gathered jumps if we're slightly under + // or slightly over the center + // + // Similarly, if we're using 'nearest' filtering then we don't want texel + // edges for the same reason. + // + // Also note that for textureGather. The way it works for cube maps is to + // first convert from cube map coordinate to a 2D texture coordinate and + // a face. Then, choose 4 texels just like normal 2D texture coordinates. + // If one of the 4 texels is outside the current face, wrap it to the correct + // face. + // + // An issue this brings up though. Imagine a 2D texture with addressMode = 'repeat' + // + // 2d texture (same texture repeated to show 'repeat') + // ┌───┬───┬───┐ ┌───┬───┬───┐ + // │ │ │ │ │ │ │ │ + // ├───┼───┼───┤ ├───┼───┼───┤ + // │ │ │ a│ │c │ │ │ + // ├───┼───┼───┤ ├───┼───┼───┤ + // │ │ │ b│ │d │ │ │ + // └───┴───┴───┘ └───┴───┴───┘ + // + // Assume the texture coordinate is at the bottom right corner of a. + // Then textureGather will grab c, d, b, a (no idea why that order). + // but think of it as top-right, bottom-right, bottom-left, top-left. + // Similarly, if the texture coordinate is at the top left of d it + // will select the same 4 texels. + // + // But, in the case of a cubemap, each face is in different direction + // relative to the face next to it. + // + // +-----------+ + // |0->u | + // |↓ | + // |v +y | + // | (2) | + // | | + // +-----------+-----------+-----------+-----------+ + // |0->u |0->u |0->u |0->u | + // |↓ |↓ |↓ |↓ | + // |v -x |v +z |v +x |v -z | + // | (1) | (4) | (0) | (5) | + // | | | | | + // +-----------+-----------+-----------+-----------+ + // |0->u | + // |↓ | + // |v -y | + // | (3) | + // | | + // +-----------+ + // + // As an example, imagine going from the +y to the +x face. + // See diagram above, the right edge of the +y face wraps + // to the top edge of the +x face. + // + // +---+---+ + // | a|c | + // ┌───┬───┬───┐ ┌───┬───┬───┐ + // │ │ │ │ │ b│d │ │ + // ├───┼───┼───┤---+ ├───┼───┼───┤ + // │ │ │ a│ c | │ │ │ │ + // ├───┼───┼───┤---+ ├───┼───┼───┤ + // │ │ │ b│ d | │ │ │ │ + // └───┴───┴───┘---+ └───┴───┴───┘ + // +y face +x face + // + // If the texture coordinate is in the bottom right corner of a, + // the rectangle of texels we read are a,b,c,d and, if we the + // texture coordinate is in the top left corner of d we also + // read a,b,c,d according to the 2 diagrams above. + // + // But, notice that when reading from the POV of +y vs +x, + // which actual a,b,c,d texels are different. + // + // From the POV of face +x: a,b are in face +x and c,d are in face +y + // From the POV of face +y: a,c are in face +x and b,d are in face +y + // + // This is all the long way of saying that if we're on the edge of a cube + // face we could get drastically different results because the orientation + // of the rectangle of the 4 texels we use, rotates. So, we need to avoid + // any values too close to the edge just in case our math is different than + // the GPU's. + // const kSubdivisionsPerTexel = 4; - const nearest = !args.sampler || args.sampler.minFilter === 'nearest'; + const avoidEdgeCase = + !args.sampler || args.sampler.minFilter === 'nearest' || isBuiltinGather(args.textureBuiltin); + const edgeRemainder = isBuiltinGather(args.textureBuiltin) ? kSubdivisionsPerTexel / 2 : 0; return coords.map((c, i) => { - const mipLevel = args.mipLevel ? makeRangeValue(args.mipLevel, i) : 0; + const mipLevel = args.mipLevel + ? quantizeMipLevel(makeRangeValue(args.mipLevel, i), args.sampler?.mipmapFilter ?? 'nearest') + : 0; const clampedMipLevel = clamp(mipLevel, { min: 0, max: mipLevelCount - 1 }); - const mipSize = virtualMipSize('2d', size, clampedMipLevel); + const mipSize = virtualMipSize('2d', size, Math.ceil(clampedMipLevel)); const q = [ mipSize[0] * kSubdivisionsPerTexel, mipSize[0] * kSubdivisionsPerTexel, @@ -2683,17 +3108,21 @@ export function generateSamplePointsCube( const quantizedUVW = uvw.map((v, i) => { // Quantize to kSubdivisionsPerPixel const v1 = Math.floor(v * q[i]); - // If it's nearest and we're on the edge of a texel then move us off the edge - // since the edge could choose one texel or another in nearest mode - const v2 = nearest && v1 % kSubdivisionsPerTexel === 0 ? v1 + 1 : v1; - // Convert back to texture coords - return v2 / q[i]; + // If it's nearest or textureGather and we're on the edge of a texel then move us off the edge + // since the edge could choose one texel or another. + const isEdgeCase = Math.abs(v1 % kSubdivisionsPerTexel) === edgeRemainder; + const v2 = isEdgeCase && avoidEdgeCase ? v1 + 1 : v1; + // Convert back to texture coords slightly off + return (v2 + 1 / 16) / q[i]; }) as vec3; + const coords = convertNormalized3DTexCoordToCubeCoord(quantizedUVW); return { coords, mipLevel, arrayIndex: args.arrayIndex ? makeRangeValue(args.arrayIndex, i, 2) : undefined, + depthRef: args.depthRef ? makeRangeValue({ num: 1, type: 'f32' }, i, 5) : undefined, + component: args.component ? makeIntHashValue(0, 4, i, 4) : undefined, }; }); } @@ -2751,8 +3180,8 @@ function binKey(call: TextureCall): string { for (const name of kTextureCallArgNames) { const value = call[name]; if (value !== undefined) { - if (name === 'offset') { - // offset must be a constant expression + if (name === 'offset' || name === 'component') { + // offset and component must be constant expressions keys.push(`${name}: ${wgslExpr(value)}`); } else { keys.push(`${name}: ${wgslTypeFor(value, call.coordType)}`); @@ -2763,12 +3192,19 @@ function binKey(call: TextureCall): string { } function buildBinnedCalls(calls: TextureCall[]) { - const args: string[] = ['T']; // All texture builtins take the texture as the first argument + const args: string[] = []; const fields: string[] = []; const data: number[] = []; - const prototype = calls[0]; - if (prototype.builtin.startsWith('textureSample')) { + + if (isBuiltinGather(prototype.builtin) && prototype['componentType']) { + args.push(`/* component */ ${wgslExpr(prototype['component']!)}`); + } + + // All texture builtins take a Texture + args.push('T'); + + if (builtinNeedsSampler(prototype.builtin)) { // textureSample*() builtins take a sampler as the second argument args.push('S'); } @@ -2778,6 +3214,8 @@ function buildBinnedCalls(calls: TextureCall[]) { if (value !== undefined) { if (name === 'offset') { args.push(`/* offset */ ${wgslExpr(value)}`); + } else if (name === 'component') { + // was handled above } else { const type = name === 'mipLevel' @@ -2786,6 +3224,8 @@ function buildBinnedCalls(calls: TextureCall[]) { ? prototype.arrayIndexType! : name === 'sampleIndex' ? prototype.sampleIndexType! + : name === 'depthRef' + ? 'f' : prototype.coordType; args.push(`args.${name}`); fields.push(`@align(16) ${name} : ${wgslTypeFor(value, type)}`); @@ -2800,7 +3240,7 @@ function buildBinnedCalls(calls: TextureCall[]) { (prototype[name] === undefined) === (value === undefined), 'texture calls are not binned correctly' ); - if (value !== undefined && name !== 'offset') { + if (value !== undefined && name !== 'offset' && name !== 'component') { const type = getCallArgType(call, name); const bitcastToU32 = kBitCastFunctions[type]; if (value instanceof Array) { @@ -2840,13 +3280,17 @@ function binCalls(calls: TextureCall[]): number[][] } export function describeTextureCall(call: TextureCall): string { - const args: string[] = ['texture: T']; - if (call.builtin.startsWith('textureSample')) { + const args: string[] = []; + if (isBuiltinGather(call.builtin) && call.componentType) { + args.push(`component: ${wgslExprFor(call.component!, call.componentType)}`); + } + args.push('texture: T'); + if (builtinNeedsSampler(call.builtin)) { args.push('sampler: S'); } for (const name of kTextureCallArgNames) { const value = call[name]; - if (value !== undefined) { + if (value !== undefined && name !== 'component') { if (name === 'coords') { args.push(`${name}: ${wgslExprFor(value, call.coordType)}`); } else if (name === 'mipLevel') { @@ -2855,6 +3299,8 @@ export function describeTextureCall(call: TextureCall< args.push(`${name}: ${wgslExprFor(value, call.arrayIndexType!)}`); } else if (name === 'sampleIndex') { args.push(`${name}: ${wgslExprFor(value, call.sampleIndexType!)}`); + } else if (name === 'depthRef') { + args.push(`${name}: ${wgslExprFor(value, 'f')}`); } else { args.push(`${name}: ${wgslExpr(value)}`); } @@ -2876,6 +3322,18 @@ const s_deviceToPipelines = new WeakMap( t: GPUTest, @@ -2885,6 +3343,21 @@ export async function doTextureCalls( sampler: GPUSamplerDescriptor | undefined, calls: TextureCall[] ) { + const { + format, + dimension, + depthOrArrayLayers, + sampleCount, + }: { + format: GPUTextureFormat; + dimension: GPUTextureDimension; + depthOrArrayLayers: number; + sampleCount: number; + } = + gpuTexture instanceof GPUExternalTexture + ? { format: 'rgba8unorm', dimension: '2d', depthOrArrayLayers: 1, sampleCount: 1 } + : gpuTexture; + let structs = ''; let body = ''; let dataFields = ''; @@ -2917,14 +3390,20 @@ export async function doTextureCalls( }); t.device.queue.writeBuffer(dataBuffer, 0, new Uint32Array(data)); - const { resultType, resultFormat, componentType } = - gpuTexture instanceof GPUExternalTexture - ? ({ resultType: 'vec4f', resultFormat: 'rgba32float', componentType: 'f32' } as const) - : textureType.includes('depth') - ? ({ resultType: 'f32', resultFormat: 'rgba32float', componentType: 'f32' } as const) - : getTextureFormatTypeInfo(gpuTexture.format); + const builtin = calls[0].builtin; + const isCompare = isBuiltinComparison(builtin); + + const { resultType, resultFormat, componentType } = isBuiltinGather(builtin) + ? getTextureFormatTypeInfo(format) + : gpuTexture instanceof GPUExternalTexture + ? ({ resultType: 'vec4f', resultFormat: 'rgba32float', componentType: 'f32' } as const) + : textureType.includes('depth') + ? ({ resultType: 'f32', resultFormat: 'rgba32float', componentType: 'f32' } as const) + : getTextureFormatTypeInfo(format); const returnType = `vec4<${componentType}>`; + const samplerType = isCompare ? 'sampler_comparison' : 'sampler'; + const rtWidth = 256; const renderTarget = t.createTextureTracked({ format: resultFormat, @@ -2949,7 +3428,7 @@ fn vs_main(@builtin(vertex_index) vertex_index : u32) -> @builtin(position) vec4 } @group(0) @binding(0) var T : ${textureType}; -${sampler ? '@group(0) @binding(1) var S : sampler' : ''}; +${sampler ? `@group(0) @binding(1) var S : ${samplerType}` : ''}; @group(0) @binding(2) var data : Data; @fragment @@ -2964,13 +3443,98 @@ ${body} const pipelines = s_deviceToPipelines.get(t.device) ?? new Map(); s_deviceToPipelines.set(t.device, pipelines); - const id = `${renderTarget.format}:${code}`; + // unfilterable-float textures can only be used with manually created bindGroupLayouts + // since the default 'auto' layout requires filterable textures/samplers. + // So, if we don't need filtering, don't request a filtering sampler. If we require + // filtering then check if the format is 32float format and if float32-filterable + // is enabled. + const info = kTextureFormatInfo[format ?? 'rgba8unorm']; + const isFiltering = + !!sampler && + (sampler.minFilter === 'linear' || + sampler.magFilter === 'linear' || + sampler.mipmapFilter === 'linear'); + let sampleType: GPUTextureSampleType = textureType.startsWith('texture_depth') + ? 'depth' + : isDepthTextureFormat(format) + ? 'unfilterable-float' + : isStencilTextureFormat(format) + ? 'uint' + : info.color?.type ?? 'float'; + if (isFiltering && sampleType === 'unfilterable-float') { + assert(is32Float(format)); + assert(t.device.features.has('float32-filterable')); + sampleType = 'float'; + } + if (sampleCount > 1 && sampleType === 'float') { + sampleType = 'unfilterable-float'; + } + + const entries: GPUBindGroupLayoutEntry[] = [ + { + binding: 2, + visibility: GPUShaderStage.FRAGMENT, + buffer: { + type: 'read-only-storage', + }, + }, + ]; + + const viewDimension = effectiveViewDimensionForDimension( + viewDescriptor.dimension, + dimension, + depthOrArrayLayers + ); + + if (textureType.includes('storage')) { + entries.push({ + binding: 0, + visibility: GPUShaderStage.FRAGMENT, + storageTexture: { + access: 'read-only', + viewDimension, + format, + }, + }); + } else if (gpuTexture instanceof GPUExternalTexture) { + entries.push({ + binding: 0, + visibility: GPUShaderStage.FRAGMENT, + externalTexture: {}, + }); + } else { + entries.push({ + binding: 0, + visibility: GPUShaderStage.FRAGMENT, + texture: { + sampleType, + viewDimension, + multisampled: sampleCount > 1, + }, + }); + } + + if (sampler) { + entries.push({ + binding: 1, + visibility: GPUShaderStage.FRAGMENT, + sampler: { + type: isCompare ? 'comparison' : isFiltering ? 'filtering' : 'non-filtering', + }, + }); + } + + const id = `${renderTarget.format}:${JSON.stringify(entries)}:${code}`; let pipeline = pipelines.get(id); if (!pipeline) { const shaderModule = t.device.createShaderModule({ code }); + const bindGroupLayout = t.device.createBindGroupLayout({ entries }); + const layout = t.device.createPipelineLayout({ + bindGroupLayouts: [bindGroupLayout], + }); - pipeline = await t.device.createRenderPipelineAsync({ - layout: 'auto', + pipeline = t.device.createRenderPipeline({ + layout, vertex: { module: shaderModule }, fragment: { module: shaderModule, @@ -2984,75 +3548,88 @@ ${body} const gpuSampler = sampler ? t.device.createSampler(sampler) : undefined; - const bindGroup = t.device.createBindGroup({ - layout: pipeline.getBindGroupLayout(0), - entries: [ - { - binding: 0, - resource: - gpuTexture instanceof GPUExternalTexture - ? gpuTexture - : gpuTexture.createView(viewDescriptor), - }, - ...(sampler ? [{ binding: 1, resource: gpuSampler! }] : []), - { binding: 2, resource: { buffer: dataBuffer } }, - ], - }); + const run = async (gpuTexture: GPUTexture | GPUExternalTexture) => { + const bindGroup = t.device.createBindGroup({ + layout: pipeline!.getBindGroupLayout(0), + entries: [ + { + binding: 0, + resource: + gpuTexture instanceof GPUExternalTexture + ? gpuTexture + : gpuTexture.createView(viewDescriptor), + }, + ...(sampler ? [{ binding: 1, resource: gpuSampler! }] : []), + { binding: 2, resource: { buffer: dataBuffer } }, + ], + }); - const bytesPerRow = align(16 * renderTarget.width, 256); - const resultBuffer = t.createBufferTracked({ - size: renderTarget.height * bytesPerRow, - usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, - }); - const encoder = t.device.createCommandEncoder(); + const bytesPerRow = align(16 * renderTarget.width, 256); + const resultBuffer = t.createBufferTracked({ + size: renderTarget.height * bytesPerRow, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, + }); - const renderPass = encoder.beginRenderPass({ - colorAttachments: [ - { - view: renderTarget.createView(), - loadOp: 'clear', - storeOp: 'store', - }, - ], - }); + const encoder = t.device.createCommandEncoder(); - renderPass.setPipeline(pipeline); - renderPass.setBindGroup(0, bindGroup); - renderPass.draw(4); - renderPass.end(); - encoder.copyTextureToBuffer( - { texture: renderTarget }, - { buffer: resultBuffer, bytesPerRow }, - { width: renderTarget.width, height: renderTarget.height } - ); - t.device.queue.submit([encoder.finish()]); + const renderPass = encoder.beginRenderPass({ + colorAttachments: [ + { + view: renderTarget.createView(), + loadOp: 'clear', + storeOp: 'store', + }, + ], + }); - await resultBuffer.mapAsync(GPUMapMode.READ); + renderPass.setPipeline(pipeline!); + renderPass.setBindGroup(0, bindGroup); + renderPass.draw(4); + renderPass.end(); + encoder.copyTextureToBuffer( + { texture: renderTarget }, + { buffer: resultBuffer, bytesPerRow }, + { width: renderTarget.width, height: renderTarget.height } + ); + t.device.queue.submit([encoder.finish()]); - const view = TexelView.fromTextureDataByReference( - renderTarget.format as EncodableTextureFormat, - new Uint8Array(resultBuffer.getMappedRange()), - { - bytesPerRow, - rowsPerImage: renderTarget.height, - subrectOrigin: [0, 0, 0], - subrectSize: [renderTarget.width, renderTarget.height], - } - ); + await resultBuffer.mapAsync(GPUMapMode.READ); - let outIdx = 0; - const out = new Array>(calls.length); - for (const bin of binned) { - for (const callIdx of bin) { - const x = outIdx % rtWidth; - const y = Math.floor(outIdx / rtWidth); - out[callIdx] = view.color({ x, y, z: 0 }); - outIdx++; + const view = TexelView.fromTextureDataByReference( + renderTarget.format as EncodableTextureFormat, + new Uint8Array(resultBuffer.getMappedRange()), + { + bytesPerRow, + rowsPerImage: renderTarget.height, + subrectOrigin: [0, 0, 0], + subrectSize: [renderTarget.width, renderTarget.height], + } + ); + + let outIdx = 0; + const out = new Array>(calls.length); + for (const bin of binned) { + for (const callIdx of bin) { + const x = outIdx % rtWidth; + const y = Math.floor(outIdx / rtWidth); + out[callIdx] = view.color({ x, y, z: 0 }); + outIdx++; + } } - } - renderTarget.destroy(); - resultBuffer.destroy(); + resultBuffer.destroy(); - return out; + return out; + }; + + const results = await run(gpuTexture); + + return { + run, + results, + destroy() { + dataBuffer.destroy(); + renderTarget.destroy(); + }, + }; } diff --git a/src/webgpu/shader/execution/robust_access_vertex.spec.ts b/src/webgpu/shader/execution/robust_access_vertex.spec.ts index d5792de1185f..91933aa2048f 100644 --- a/src/webgpu/shader/execution/robust_access_vertex.spec.ts +++ b/src/webgpu/shader/execution/robust_access_vertex.spec.ts @@ -63,6 +63,10 @@ import { makeTestGroup } from '../../../common/framework/test_group.js'; import { assert } from '../../../common/util/util.js'; import { GPUTest, TextureTestMixin } from '../../gpu_test.js'; +// This is a tolerance that should be less strict than oneULP(X) of a f32 where X is any arbitraryValues or 0. +// Given that in GLSL compat highp float can < 32 bit. +const kFloatTolerance = 0.000001; + // Encapsulates a draw call (either indexed or non-indexed) class DrawCall { private test: GPUTest; @@ -265,11 +269,15 @@ const typeInfoMap: { [k: string]: VertexInfo } = { sizeInBytes: 12, validationFunc: 'return valid(v.x) && valid(v.y) && valid(v.z);', }, + // It is valid to return (0, 0, 0, X) for an OOB access. (X can be anything) + // https://gpuweb.github.io/gpuweb/#security-shader float32x4: { wgslType: 'vec4', sizeInBytes: 16, validationFunc: `return (valid(v.x) && valid(v.y) && valid(v.z) && valid(v.w)) || - (v.x == 0.0 && v.y == 0.0 && v.z == 0.0 && (v.w == 0.0 || v.w == 1.0));`, + (abs(v.x - 0.0) <= ${kFloatTolerance} && + abs(v.y - 0.0) <= ${kFloatTolerance} && + abs(v.z - 0.0) <= ${kFloatTolerance});`, }, }; @@ -363,7 +371,7 @@ class F extends TextureTestMixin(GPUTest) { ${layoutStr} fn valid(f : f32) -> bool { - return ${validValues.map(v => `f == ${v}.0`).join(' || ')}; + return ${validValues.map(v => `abs(f - ${v}.0) <= ${kFloatTolerance}`).join(' || ')}; } fn validationFunc(v : ${typeInfo.wgslType}) -> bool { diff --git a/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts index ffd58976fc88..7a6aa8901e28 100644 --- a/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts +++ b/src/webgpu/shader/execution/shader_io/fragment_builtins.spec.ts @@ -20,14 +20,17 @@ is evaluated per-fragment or per-sample. With @interpolate(, sample) or usage of import { makeTestGroup } from '../../../../common/framework/test_group.js'; import { ErrorWithExtra, assert, range, unreachable } from '../../../../common/util/util.js'; import { InterpolationSampling, InterpolationType } from '../../../constants.js'; -import { GPUTest } from '../../../gpu_test.js'; +import { kTextureFormatInfo } from '../../../format_info.js'; +import { GPUTest, TextureTestMixin } from '../../../gpu_test.js'; import { getProvokingVertexForFlatInterpolationEitherSampling } from '../../../inter_stage.js'; import { getMultisampleFragmentOffsets } from '../../../multisample_info.js'; -import { dotProduct, subtractVectors } from '../../../util/math.js'; +import { dotProduct, subtractVectors, align } from '../../../util/math.js'; import { TexelView } from '../../../util/texture/texel_view.js'; import { findFailedPixels } from '../../../util/texture/texture_ok.js'; -export const g = makeTestGroup(GPUTest); +class FragmentBuiltinTest extends TextureTestMixin(GPUTest) {} + +export const g = makeTestGroup(FragmentBuiltinTest); const s_deviceToPipelineMap = new WeakMap< GPUDevice, @@ -589,7 +592,7 @@ async function renderFragmentShaderInputsTo4TexturesAndReadbackValues( struct FragmentIn { @builtin(position) position: vec4f, - @location(0) @interpolate(${interpolate}) interpolatedValue: vec4f, +@location(0) @interpolate(${interpolate}) interpolatedValue: vec4f, ${fragInCode} }; @@ -1424,6 +1427,385 @@ g.test('inputs,sample_mask') ); }); -g.test('subgroup_size').unimplemented(); +const kSizes = [ + [15, 15], + [16, 16], + [17, 17], + [19, 13], + [13, 10], + [111, 2], + [2, 111], + [35, 2], + [2, 35], + [53, 13], + [13, 53], +] as const; + +/** + * @returns The population count of input. + * + * @param input Treated as an unsigned 32-bit integer + */ +function popcount(input: number): number { + let n = input; + n = n - ((n >> 1) & 0x55555555); + n = (n & 0x33333333) + ((n >> 2) & 0x33333333); + return (((n + (n >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24; +} + +/** + * Checks subgroup_size builtin value consistency. + * + * The builtin subgroup_size is not assumed to be uniform in fragment shaders. + * Therefore, this function checks the value is a power of two within the device + * limits and that the ballot size is less than the stated size. + * @param data An array of vec4u that contains (per texel): + * * builtin value + * * ballot size + * * comparison to other invocations + * * 0 + * @param format The texture format for data + * @param min The minimum subgroup size from the device + * @param max The maximum subgroup size from the device + * @param width The width of the framebuffer + * @param height The height of the framebuffer + */ +function checkSubgroupSizeConsistency( + data: Uint32Array, + format: GPUTextureFormat, + min: number, + max: number, + width: number, + height: number +): Error | undefined { + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + const blocksPerRow = width / blockWidth; + // Image copies require bytesPerRow to be a multiple of 256. + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const uintsPerRow = bytesPerRow / 4; + const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + + for (let row = 0; row < height; row++) { + for (let col = 0; col < width; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const builtinSize = data[offset]; + const ballotSize = data[offset + 1]; + const comparison = data[offset + 2]; + if (builtinSize === 0) { + continue; + } + + if (popcount(builtinSize) !== 1) { + return new Error(`Subgroup size '${builtinSize}' is not a power of two`); + } + + if (builtinSize < min) { + return new Error(`Subgroup size '${builtinSize}' is less than minimum '${min}'`); + } + if (max < builtinSize) { + return new Error(`Subgroup size '${builtinSize}' is greater than maximum '${max}'`); + } + + if (builtinSize < ballotSize) { + return new Error(`Inconsistent subgroup ballot size +- icoord: (${row}, ${col}) +- expected: ${builtinSize} +- got: ${ballotSize}`); + } + + if (comparison !== 1) { + return new Error(`Not all invocations in subgroup have same view of the size +- icoord: (${row}, ${col})`); + } + } + } + + return undefined; +} + +/** + * Runs a subgroup builtin test for fragment shaders + * + * This test draws a full screen in 2 separate draw calls (half screen each). + * Results are checked for each draw. + * @param t The base test + * @param format The framebuffer format + * @param fsShader The fragment shader with the following interface: + * Location 0 output is framebuffer with format + * Group 0 binding 0 is a u32 sized data + * @param width The framebuffer width + * @param height The framebuffer height + * @param checker A functor to check the framebuffer values + */ +async function runSubgroupTest( + t: FragmentBuiltinTest, + format: GPUTextureFormat, + fsShader: string, + width: number, + height: number, + checker: (data: Uint32Array) => Error | undefined +) { + const vsShader = ` +@vertex +fn vsMain(@builtin(vertex_index) index : u32) -> @builtin(position) vec4f { + const vertices = array( + vec2(-1, -1), vec2(-1, 1), vec2( 1, 1), + vec2(-1, -1), vec2( 1, -1), vec2( 1, 1), + ); + return vec4f(vec2f(vertices[index]), 0, 1); +}`; + + const pipeline = t.device.createRenderPipeline({ + layout: 'auto', + vertex: { + module: t.device.createShaderModule({ code: vsShader }), + }, + fragment: { + module: t.device.createShaderModule({ code: fsShader }), + targets: [{ format }], + }, + primitive: { + topology: 'triangle-list', + }, + }); + + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + assert(bytesPerBlock !== undefined); + + const blocksPerRow = width / blockWidth; + const blocksPerColumn = height / blockHeight; + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const byteLength = bytesPerRow * blocksPerColumn; + const uintLength = byteLength / 4; + + const buffer = t.makeBufferWithContents( + new Uint32Array([1]), + GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST + ); + + const bg = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { + binding: 0, + resource: { + buffer, + }, + }, + ], + }); -g.test('subgroup_invocation_id').unimplemented(); + for (let i = 0; i < 2; i++) { + const framebuffer = t.createTextureTracked({ + size: [width, height], + usage: + GPUTextureUsage.COPY_SRC | + GPUTextureUsage.COPY_DST | + GPUTextureUsage.RENDER_ATTACHMENT | + GPUTextureUsage.TEXTURE_BINDING, + format, + }); + + const encoder = t.device.createCommandEncoder(); + const pass = encoder.beginRenderPass({ + colorAttachments: [ + { + view: framebuffer.createView(), + loadOp: 'clear', + storeOp: 'store', + }, + ], + }); + pass.setPipeline(pipeline); + pass.setBindGroup(0, bg); + pass.draw(3, 1, i); + pass.end(); + t.queue.submit([encoder.finish()]); + + const buffer = t.copyWholeTextureToNewBufferSimple(framebuffer, 0); + const readback = await t.readGPUBufferRangeTyped(buffer, { + srcByteOffset: 0, + type: Uint32Array, + typedLength: uintLength, + method: 'copy', + }); + const data: Uint32Array = readback.data; + + t.expectOK(checker(data)); + } +} + +g.test('subgroup_size') + .desc('Tests subgroup_size values') + .params(u => + u + .combine('size', kSizes) + .beginSubcases() + .combineWithParams([{ format: 'rgba32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + interface SubgroupLimits extends GPUSupportedLimits { + minSubgroupSize: number; + maxSubgroupSize: number; + } + const { minSubgroupSize, maxSubgroupSize } = t.device.limits as SubgroupLimits; + + const fsShader = ` +enable subgroups; + +const width = ${t.params.size[0]}; +const height = ${t.params.size[1]}; + +@group(0) @binding(0) var for_layout : u32; + +@fragment +fn fsMain( + @builtin(position) pos : vec4f, + @builtin(subgroup_size) sg_size : u32, +) -> @location(0) vec4u { + _ = for_layout; + + let ballot = countOneBits(subgroupBallot(true)); + let ballotSize = ballot.x + ballot.y + ballot.z + ballot.w; + + // Do all invocations in the subgroup see the same subgroup size? + let firstSize = subgroupBroadcast(sg_size, 0); + let compareBallot = countOneBits(subgroupBallot(firstSize == sg_size)); + let compareSize = compareBallot.x + compareBallot.y + compareBallot.z + compareBallot.w; + let sameSize = select(0u, 1u, compareSize == ballotSize); + + return vec4u(sg_size, ballotSize, sameSize, 0); +}`; + + await runSubgroupTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + (data: Uint32Array) => { + return checkSubgroupSizeConsistency( + data, + t.params.format, + minSubgroupSize, + maxSubgroupSize, + t.params.size[0], + t.params.size[1] + ); + } + ); + }); + +/** + * Checks subgroup_invocation_id value consistency + * + * Very little uniformity is expected for subgroup_invocation_id. + * This function checks that all ids are less than the subgroup size + * and no id is repeated. + * @param data An array of vec4u that contains (per texel): + * * subgroup_invocation_id + * * ballot size + * * non-zero ID unique to each subgroup + * * 0 + * @param format The texture format of data + * @param width The width of the framebuffer + * @param height The height of the framebuffer + */ +function checkSubgroupInvocationIdConsistency( + data: Uint32Array, + format: GPUTextureFormat, + width: number, + height: number +): Error | undefined { + const { blockWidth, blockHeight, bytesPerBlock } = kTextureFormatInfo[format]; + const blocksPerRow = width / blockWidth; + const bytesPerRow = align(blocksPerRow * (bytesPerBlock ?? 1), 256); + const uintsPerRow = bytesPerRow / 4; + const uintsPerTexel = (bytesPerBlock ?? 1) / blockWidth / blockHeight / 4; + + const mappings = new Map(); + for (let row = 0; row < height; row++) { + for (let col = 0; col < width; col++) { + const offset = uintsPerRow * row + col * uintsPerTexel; + const id = data[offset]; + const size = data[offset + 1]; + const repId = data[offset + 2]; + + if (repId === 0) { + continue; + } + + if (size < id) { + return new Error( + `Invocation id '${id}' is greater than subgroup size '${size}' for (${row}, ${col})` + ); + } + + let v = mappings.get(repId) ?? 0n; + const mask = 1n << BigInt(id); + if ((mask & v) !== 0n) { + return new Error(`Multiple invocations with id '${id}' in subgroup '${repId}'`); + } + v |= mask; + mappings.set(repId, v); + } + } + + return undefined; +} + +g.test('subgroup_invocation_id') + .desc('Tests subgroup_invocation_id built-in value') + .params(u => + u + .combine('size', kSizes) + .beginSubcases() + .combineWithParams([{ format: 'rgba32uint' }] as const) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(async t => { + const fsShader = ` +enable subgroups; + +const width = ${t.params.size[0]}; +const height = ${t.params.size[1]}; + +@group(0) @binding(0) var counter : atomic; + +@fragment +fn fsMain( + @builtin(position) pos : vec4f, + @builtin(subgroup_invocation_id) id : u32, + @builtin(subgroup_size) sg_size : u32, +) -> @location(0) vec4u { + let ballot = countOneBits(subgroupBallot(true)); + let ballotSize = ballot.x + ballot.y + ballot.z + ballot.w; + + // Generate representative id for this subgroup. + var repId = atomicAdd(&counter, 1); + repId = subgroupBroadcast(repId, 0); + + return vec4u(id, ballotSize, repId, 0); +}`; + + await runSubgroupTest( + t, + t.params.format, + fsShader, + t.params.size[0], + t.params.size[1], + (data: Uint32Array) => { + return checkSubgroupInvocationIdConsistency( + data, + t.params.format, + t.params.size[0], + t.params.size[1] + ); + } + ); + }); diff --git a/src/webgpu/shader/execution/shader_io/vertex_builtins.spec.ts b/src/webgpu/shader/execution/shader_io/vertex_builtins.spec.ts new file mode 100644 index 000000000000..baf5c98326a8 --- /dev/null +++ b/src/webgpu/shader/execution/shader_io/vertex_builtins.spec.ts @@ -0,0 +1,150 @@ +export const description = `Test vertex shader builtin variables + +* test builtin(clip_distances) +`; + +import { makeTestGroup } from '../../../../common/framework/test_group.js'; +import { GPUTest, TextureTestMixin } from '../../../gpu_test.js'; + +class VertexBuiltinTest extends TextureTestMixin(GPUTest) {} + +export const g = makeTestGroup(VertexBuiltinTest); + +g.test('outputs,clip_distances') + .desc( + ` + Test vertex shader builtin(clip_distances) values. + + In the tests, we draw a square with two triangles (top-right and bottom left), whose vertices + have different clip distances values. (Top Left: -1, Bottom Right: 1 Top Right & Bottom Left: 0) + 1. The clip distances values of the pixels in the top-left region should be less than 0 so these + pixels will all be invisible + 2. The clip distances values of the pixels on the top-right-to-bottom-left diagonal line should + be equal to 0 + 3. The clip distances values of the pixels in the bottom-right region should be greater than 0 + + -1 - - - - - 0 + | \\ x x + | \\ x x x + | \\ x x x + | x x\\ x x + | x x x x\\ x + 0 x x x x x 1 + ` + ) + .params(u => u.combine('clipDistances', [1, 2, 3, 4, 5, 6, 7, 8] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('clip-distances'); + }) + .fn(t => { + const { clipDistances } = t.params; + + // Draw two triangles (top-right and bottom left) into Red, whose vertices have different clip + // distances values. (Top Left: -1, Bottom Right: 1 Top Right & Bottom Left: 0) + const code = ` + enable clip_distances; + const kClipDistancesSize = ${clipDistances}; + struct VertexOutputs { + @builtin(position) position : vec4f, + @builtin(clip_distances) clipDistances : array, + } + @vertex + fn vsMain(@builtin(vertex_index) vertexIndex : u32) -> VertexOutputs { + var posAndClipDistances = array( + vec3f(-1.0, 1.0, -1.0), + vec3f( 1.0, -1.0, 1.0), + vec3f( 1.0, 1.0, 0.0), + vec3f(-1.0, -1.0, 0.0), + vec3f( 1.0, -1.0, 1.0), + vec3f(-1.0, 1.0, -1.0)); + var vertexOutput : VertexOutputs; + vertexOutput.position = vec4f(posAndClipDistances[vertexIndex].xy, 0.0, 1.0); + vertexOutput.clipDistances[kClipDistancesSize - 1] = posAndClipDistances[vertexIndex].z; + return vertexOutput; + } + @fragment + fn fsMain() -> @location(0) vec4f { + return vec4f(1.0, 0.0, 0.0, 1.0); + }`; + const module = t.device.createShaderModule({ code }); + const renderPipeline = t.device.createRenderPipeline({ + layout: 'auto', + vertex: { + module, + }, + fragment: { + module, + targets: [ + { + format: 'rgba8unorm', + }, + ], + }, + }); + + const kSize = 7; + const outputTexture = t.createTextureTracked({ + format: 'rgba8unorm', + size: [kSize, kSize, 1] as const, + usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC, + }); + + // Clear outputTexture to Green + const commandEncoder = t.device.createCommandEncoder(); + const renderPassEncoder = commandEncoder.beginRenderPass({ + colorAttachments: [ + { + view: outputTexture.createView(), + loadOp: 'clear', + clearValue: { r: 0.0, g: 1.0, b: 0.0, a: 1.0 }, + storeOp: 'store', + }, + ], + }); + renderPassEncoder.setPipeline(renderPipeline); + renderPassEncoder.draw(6); + renderPassEncoder.end(); + + const kBytesPerRow = 256; + const kBytesPerPixel = 4; + const outputDataSize = kBytesPerRow * (kSize - 1) + kSize * kBytesPerPixel; + const outputBuffer = t.createBufferTracked({ + size: outputDataSize, + usage: GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST, + }); + + commandEncoder.copyTextureToBuffer( + { + texture: outputTexture, + }, + { + buffer: outputBuffer, + bytesPerRow: kBytesPerRow, + rowsPerImage: kSize, + }, + [kSize, kSize, 1] + ); + t.queue.submit([commandEncoder.finish()]); + + // The top-left part should be Green and the bottom-right part should be Red + const expectedData = new Uint8Array(outputDataSize); + for (let y = 0; y < kSize; ++y) { + const baseOffset = kBytesPerRow * y; + for (let x = 0; x < kSize; ++x) { + const lastRed = kSize - y - 1; + for (let i = 0; i < lastRed; ++i) { + expectedData[baseOffset + i * 4] = 0; + expectedData[baseOffset + i * 4 + 1] = 255; + expectedData[baseOffset + i * 4 + 2] = 0; + expectedData[baseOffset + i * 4 + 3] = 255; + } + for (let j = lastRed; j < kSize; ++j) { + expectedData[baseOffset + j * 4] = 255; + expectedData[baseOffset + j * 4 + 1] = 0; + expectedData[baseOffset + j * 4 + 2] = 0; + expectedData[baseOffset + j * 4 + 3] = 255; + } + } + } + t.expectGPUBufferValuesEqual(outputBuffer, expectedData); + }); diff --git a/src/webgpu/shader/execution/statement/phony.spec.ts b/src/webgpu/shader/execution/statement/phony.spec.ts index 1f28d040f2d8..309d8848523d 100644 --- a/src/webgpu/shader/execution/statement/phony.spec.ts +++ b/src/webgpu/shader/execution/statement/phony.spec.ts @@ -88,6 +88,10 @@ const kTests = { src: `_ = put(42i);`, values: [42, 0], }, + call_in_subexpr: { + src: `_ = put(42i) + 1;`, + values: [42, 0], + }, nested_call: { src: `_ = put(put(42)+1);`, values: [42, 43, 0], diff --git a/src/webgpu/shader/validation/decl/var.spec.ts b/src/webgpu/shader/validation/decl/var.spec.ts index f9e15bd6e2a6..1abf8bcf4fdb 100644 --- a/src/webgpu/shader/validation/decl/var.spec.ts +++ b/src/webgpu/shader/validation/decl/var.spec.ts @@ -749,7 +749,8 @@ g.test('var_access_mode_bad_other_template_contents') .fn(t => { const prog = `@group(0) @binding(0) var<${t.params.prefix}${t.params.accessMode}${t.params.suffix}> x: i32;`; - const ok = t.params.prefix === 'storage,' && t.params.suffix === ''; + const ok = + t.params.prefix === 'storage,' && (t.params.suffix === '' || t.params.suffix === ','); t.expectCompileResult(ok, prog); }); diff --git a/src/webgpu/shader/validation/expression/binary/short_circuiting_and_or.spec.ts b/src/webgpu/shader/validation/expression/binary/short_circuiting_and_or.spec.ts new file mode 100644 index 000000000000..30f521e54944 --- /dev/null +++ b/src/webgpu/shader/validation/expression/binary/short_circuiting_and_or.spec.ts @@ -0,0 +1,264 @@ +export const description = ` +Validation tests for short-circuiting && and || expressions. +`; + +import { makeTestGroup } from '../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../common/util/data_tables.js'; +import { + kAllScalarsAndVectors, + ScalarType, + scalarTypeOf, + Type, +} from '../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +// A list of scalar and vector types. +const kScalarAndVectorTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('scalar_vector') + .desc( + ` + Validates that scalar and vector short-circuiting operators are only accepted for scalar booleans. + ` + ) + .params(u => + u + .combine('op', ['&&', '||']) + .combine('lhs', keysOf(kScalarAndVectorTypes)) + .combine( + 'rhs', + // Skip vec3 and vec4 on the RHS to keep the number of subcases down. + keysOf(kScalarAndVectorTypes).filter( + value => !(value.startsWith('vec3') || value.startsWith('vec4')) + ) + ) + .beginSubcases() + ) + .beforeAllSubcases(t => { + if ( + scalarTypeOf(kScalarAndVectorTypes[t.params.lhs]) === Type.f16 || + scalarTypeOf(kScalarAndVectorTypes[t.params.rhs]) === Type.f16 + ) { + t.selectDeviceOrSkipTestCase('shader-f16'); + } + }) + .fn(t => { + const lhs = kScalarAndVectorTypes[t.params.lhs]; + const rhs = kScalarAndVectorTypes[t.params.rhs]; + const lhsElement = scalarTypeOf(lhs); + const rhsElement = scalarTypeOf(rhs); + const hasF16 = lhsElement === Type.f16 || rhsElement === Type.f16; + const code = ` +${hasF16 ? 'enable f16;' : ''} +const lhs = ${lhs.create(0).wgsl()}; +const rhs = ${rhs.create(0).wgsl()}; +const foo = lhs ${t.params.op} rhs; +`; + + // Determine if the types are compatible. + let valid = false; + if (lhs instanceof ScalarType && rhs instanceof ScalarType) { + valid = lhsElement === Type.bool && rhsElement === Type.bool; + } + + t.expectCompileResult(valid, code); + }); + +interface InvalidTypeConfig { + // An expression that produces a value of the target type. + expr: string; + // A function that converts an expression of the target type into a valid boolean operand. + control: (x: string) => string; +} +const kInvalidTypes: Record = { + mat2x2f: { + expr: 'm', + control: e => `bool(${e}[0][0])`, + }, + + array: { + expr: 'arr', + control: e => `${e}[0]`, + }, + + ptr: { + expr: '(&b)', + control: e => `*${e}`, + }, + + atomic: { + expr: 'a', + control: e => `bool(atomicLoad(&${e}))`, + }, + + texture: { + expr: 't', + control: e => `bool(textureLoad(${e}, vec2(), 0).x)`, + }, + + sampler: { + expr: 's', + control: e => `bool(textureSampleLevel(t, ${e}, vec2(), 0).x)`, + }, + + struct: { + expr: 'str', + control: e => `${e}.b`, + }, +}; + +g.test('invalid_types') + .desc( + ` + Validates that short-circuiting expressions are never accepted for non-scalar and non-vector types. + ` + ) + .params(u => + u + .combine('op', ['&&', '||']) + .combine('type', keysOf(kInvalidTypes)) + .combine('control', [true, false]) + .beginSubcases() + ) + .fn(t => { + const type = kInvalidTypes[t.params.type]; + const expr = t.params.control ? type.control(type.expr) : type.expr; + const code = ` +@group(0) @binding(0) var t : texture_2d; +@group(0) @binding(1) var s : sampler; +@group(0) @binding(2) var a : atomic; + +struct S { b : bool } + +var b : bool; +var m : mat2x2f; +var arr : array; +var str : S; + +@compute @workgroup_size(1) +fn main() { + let foo = ${expr} ${t.params.op} ${expr}; +} +`; + + t.expectCompileResult(t.params.control, code); + }); + +// A map from operator to the value of the LHS that will cause short-circuiting. +const kLhsForShortCircuit: Record = { + '&&': false, + '||': true, +}; + +// A list of expressions that are invalid unless guarded by a short-circuiting expression. +const kInvalidRhsExpressions: Record = { + overflow: 'i32(1< + u + .combine('op', ['&&', '||']) + .combine('rhs', keysOf(kInvalidRhsExpressions)) + .combine('short_circuit', [true, false]) + .beginSubcases() + ) + .fn(t => { + let lhs = kLhsForShortCircuit[t.params.op]; + if (!t.params.short_circuit) { + lhs = !lhs; + } + const code = ` +const thirty_one = 31u; +const zero_i32 = 0i; +const one_f32 = 1.0f; + +@compute @workgroup_size(1) +fn main() { + let foo = ${lhs} ${t.params.op} ${kInvalidRhsExpressions[t.params.rhs]}; +} +`; + + t.expectCompileResult(t.params.short_circuit, code); + }); + +g.test('invalid_rhs_override') + .desc( + ` + Validates that a short-circuiting expression with an override-expression LHS guards the evaluation of its RHS expression. + ` + ) + .params(u => + u + .combine('op', ['&&', '||']) + .combine('rhs', keysOf(kInvalidRhsExpressions)) + .combine('short_circuit', [true, false]) + .beginSubcases() + ) + .fn(t => { + let lhs = kLhsForShortCircuit[t.params.op]; + if (!t.params.short_circuit) { + lhs = !lhs; + } + const code = ` +override cond : bool; +override zero_i32 = 0i; +override one_f32 = 1.0f; +override thirty_one = 31u; +override foo = cond ${t.params.op} ${kInvalidRhsExpressions[t.params.rhs]}; +`; + + const constants: Record = {}; + constants['cond'] = lhs ? 1 : 0; + t.expectPipelineResult({ + expectedResult: t.params.short_circuit, + code, + constants, + reference: ['foo'], + }); + }); + +// A list of expressions that are invalid unless guarded by a short-circuiting expression. +// The control case will use `value = 10`, the failure case will use `value = 1`. +const kInvalidArrayCounts: Record = { + negative: 'value - 2', + sqrt_neg1: 'u32(sqrt(value - 2))', + nested: '10 + array()[0]', +}; + +g.test('invalid_array_count_on_rhs') + .desc( + ` + Validates that an invalid array count expression is not guarded by a short-circuiting expression. + ` + ) + .params(u => + u + .combine('op', ['&&', '||']) + .combine('rhs', keysOf(kInvalidArrayCounts)) + .combine('control', [true, false]) + .beginSubcases() + ) + .fn(t => { + const lhs = t.params.op === '&&' ? 'false' : 'true'; + const code = ` +const value = ${t.params.control ? '10' : '1'}; + +@compute @workgroup_size(1) +fn main() { + let foo = ${lhs} ${t.params.op} array()[0]; +} +`; + + t.expectCompileResult(t.params.control, code); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts index 1ac752a3bfa9..ff0114097f90 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/clamp.spec.ts @@ -127,6 +127,8 @@ Validates that low <= high. const scalar = scalarTypeOf(ty); return scalar !== Type.abstractInt && scalar !== Type.abstractFloat; }) + // in_shader: Is the function call statically accessed by the entry point? + .combine('in_shader', [false, true] as const) ) .beforeAllSubcases(t => { const ty = kValuesTypes[t.params.type]; @@ -176,7 +178,10 @@ fn foo() { const shader_error = error && t.params.lowStage === 'constant' && t.params.highStage === 'constant'; const pipeline_error = - error && t.params.lowStage !== 'runtime' && t.params.highStage !== 'runtime'; + t.params.in_shader && + error && + t.params.lowStage !== 'runtime' && + t.params.highStage !== 'runtime'; t.expectCompileResult(!shader_error, wgsl); if (!shader_error) { const constants: Record = {}; @@ -187,6 +192,7 @@ fn foo() { code: wgsl, constants, reference: ['o_low', 'o_high'], + statements: t.params.in_shader ? ['foo();'] : [], }); } }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts index 80fe7ccaca5e..32abc477ee8f 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/extractBits.spec.ts @@ -98,6 +98,8 @@ Validates that count and offset must be smaller than the size of the primitive. { offset: 0, count: 33 }, { offset: 1, count: 33 }, ] as const) + // in_shader: Is the function call statically accessed by the entry point? + .combine('in_shader', [false, true] as const) ) .fn(t => { let offsetArg = ''; @@ -138,7 +140,10 @@ fn foo() { const shader_error = error && t.params.offsetStage === 'constant' && t.params.countStage === 'constant'; const pipeline_error = - error && t.params.offsetStage !== 'runtime' && t.params.countStage !== 'runtime'; + t.params.in_shader && + error && + t.params.offsetStage !== 'runtime' && + t.params.countStage !== 'runtime'; t.expectCompileResult(!shader_error, wgsl); if (!shader_error) { const constants: Record = {}; @@ -149,6 +154,7 @@ fn foo() { code: wgsl, constants, reference: ['o_offset', 'o_count'], + statements: t.params.in_shader ? ['foo();'] : [], }); } }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts index 57644ad36fb4..b302bfd14677 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/insertBits.spec.ts @@ -119,6 +119,8 @@ Validates that count and offset must be smaller than the size of the primitive. { offset: 0, count: 33 }, { offset: 1, count: 33 }, ] as const) + // in_shader: Is the function call statically accessed by the entry point? + .combine('in_shader', [false, true] as const) ) .fn(t => { let offsetArg = ''; @@ -160,7 +162,10 @@ fn foo() { const shader_error = error && t.params.offsetStage === 'constant' && t.params.countStage === 'constant'; const pipeline_error = - error && t.params.offsetStage !== 'runtime' && t.params.countStage !== 'runtime'; + t.params.in_shader && + error && + t.params.offsetStage !== 'runtime' && + t.params.countStage !== 'runtime'; t.expectCompileResult(!shader_error, wgsl); if (!shader_error) { const constants: Record = {}; @@ -171,6 +176,7 @@ fn foo() { code: wgsl, constants, reference: ['o_offset', 'o_count'], + statements: t.params.in_shader ? ['foo();'] : [], }); } }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts index 826354d1ff08..55a702d71f0a 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/ldexp.spec.ts @@ -143,6 +143,8 @@ g.test('partial_values') cases.push({ value: bias + 2 }); return cases; }) + // in_shader: Is the functino call statically accessed by the entry point? + .combine('in_shader', [false, true] as const) ) .beforeAllSubcases(t => { const ty = kValidArgumentTypesA[t.params.typeA]; @@ -179,7 +181,7 @@ fn foo() { const bias = biasForType(scalarTypeOf(tyA)); const error = t.params.value > bias + 1; const shader_error = error && t.params.stage === 'constant'; - const pipeline_error = error && t.params.stage === 'override'; + const pipeline_error = t.params.in_shader && error && t.params.stage === 'override'; t.expectCompileResult(!shader_error, wgsl); if (!shader_error) { const constants: Record = {}; @@ -189,6 +191,7 @@ fn foo() { code: wgsl, constants, reference: ['o_b'], + statements: t.params.in_shader ? ['foo();'] : [], }); } }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts index 28e1d9cdc61b..bed18020632d 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/normalize.spec.ts @@ -12,7 +12,13 @@ import { scalarTypeOf, ScalarType, } from '../../../../../util/conversion.js'; -import { QuantizeFunc, quantizeToF16, quantizeToF32 } from '../../../../../util/math.js'; +import { + QuantizeFunc, + quantizeToF16, + quantizeToF32, + isSubnormalNumberF16, + isSubnormalNumberF32, +} from '../../../../../util/math.js'; import { ShaderValidationTest } from '../../../shader_validation_test.js'; import { @@ -37,6 +43,17 @@ function quantizeFunctionForScalarType(type: ScalarType): QuantizeFunc { } } +function isSubnormalFunctionForScalarType(type: ScalarType): (v: number) => boolean { + switch (type) { + case Type.f32: + return isSubnormalNumberF32; + case Type.f16: + return isSubnormalNumberF16; + default: + return (v: number) => false; + } +} + g.test('values') .desc( ` @@ -73,6 +90,11 @@ Validates that constant evaluation and override evaluation of ${builtin}() rejec expectedResult = false; } + // We skip tests with values that would involve subnormal computations in + // order to avoid defining a specific behavior (flush to zero). + const isSubnormalFn = isSubnormalFunctionForScalarType(scalarType); + t.skipIf(isSubnormalFn(vv) || isSubnormalFn(dp) || isSubnormalFn(len)); + validateConstOrOverrideBuiltinEval( t, builtin, diff --git a/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts new file mode 100644 index 000000000000..6988f17b9ede --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/quadBroadcast.spec.ts @@ -0,0 +1,286 @@ +export const description = ` +Validation tests for quadBroadcast +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { + isConvertible, + Type, + elementTypeOf, + kAllScalarsAndVectors, +} from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = quadBroadcast(0, 0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +g.test('requires_subgroups_f16') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; + if (t.params.enable) { + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const wgsl = ` +enable f16; +enable subgroups; +${t.params.enable ? 'enable subgroups_f16;' : ''} +fn foo() { + _ = quadBroadcast(0h, 0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); + +const kStages: Record = { + constant: ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = quadBroadcast(0, 0); +}`, + override: ` +enable subgroups; +override o = quadBroadcast(0, 0);`, + runtime: ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = quadBroadcast(0, 0); +}`, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage]; + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}quadBroadcast(0, 0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => u.combine('type', keysOf(kArgumentTypes))) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kArgumentTypes[t.params.type]; + if (type.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kArgumentTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = quadBroadcast(${type.create(0).wgsl()}, 0); +}`; + + t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates data parameter type') + .params(u => + u + .combine('dataType', keysOf(kArgumentTypes)) + .combine('retType', keysOf(kArgumentTypes)) + .filter(t => { + const retType = kArgumentTypes[t.retType]; + const retEleTy = elementTypeOf(retType); + const dataType = kArgumentTypes[t.dataType]; + const dataEleTy = elementTypeOf(dataType); + return ( + retEleTy !== Type.abstractInt && + retEleTy !== Type.abstractFloat && + dataEleTy !== Type.abstractInt && + dataEleTy !== Type.abstractFloat + ); + }) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + if (dataType.requiresF16() || retType.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + let enables = `enable subgroups;\n`; + if (dataType.requiresF16() || retType.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = quadBroadcast(${dataType.create(0).wgsl()}, 0); +}`; + + const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType; + t.expectCompileResult(expect, wgsl); + }); + +g.test('id_type') + .desc('Validates id parameter type') + .params(u => u.combine('type', keysOf(kArgumentTypes))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const type = kArgumentTypes[t.params.type]; + const wgsl = ` +enable subgroups; +@compute @workgroup_size(1) +fn main() { + _ = quadBroadcast(0, ${type.create(0).wgsl()}); +}`; + + const expect = isConvertible(type, Type.u32) || isConvertible(type, Type.i32); + t.expectCompileResult(expect, wgsl); + }); + +const kIdCases = { + const_decl: { + code: 'const_decl', + valid: true, + }, + const_literal: { + code: '0', + valid: true, + }, + const_expr: { + code: 'const_decl + 2', + valid: true, + }, + let_decl: { + code: 'let_decl', + valid: false, + }, + override_decl: { + code: 'override_decl', + valid: false, + }, + var_func_decl: { + code: 'var_func_decl', + valid: false, + }, + var_priv_decl: { + code: 'var_priv_decl', + valid: false, + }, +}; + +g.test('id_constness') + .desc('Validates that id must be a const-expression') + .params(u => u.combine('value', keysOf(kIdCases))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +override override_decl : u32; +var var_priv_decl : u32; +fn foo() { + var var_func_decl : u32; + let let_decl = var_func_decl; + const const_decl = 0u; + _ = quadBroadcast(0, ${kIdCases[t.params.value].code}); +}`; + + t.expectCompileResult(kIdCases[t.params.value].valid, wgsl); + }); + +g.test('stage') + .desc('Validates it is only usable in correct stage') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = quadBroadcast(0, 0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts new file mode 100644 index 000000000000..3812ba057ed6 --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/quadSwap.spec.ts @@ -0,0 +1,227 @@ +export const description = ` +Validation tests for quadSwapX, quadSwapY, and quadSwapDiagonal. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { + Type, + elementTypeOf, + kAllScalarsAndVectors, + isConvertible, +} from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +const kOps = ['quadSwapX', 'quadSwapY', 'quadSwapDiagonal'] as const; + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = ${t.params.op}(0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +g.test('requires_subgroups_f16') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; + if (t.params.enable) { + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const wgsl = ` +enable f16; +enable subgroups; +${t.params.enable ? 'enable subgroups_f16;' : ''} +fn foo() { + _ = ${t.params.op}(0h); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kStages: Record string> = { + constant: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = ${op}(0); +}`; + }, + override: (op: string) => { + return ` +enable subgroups +override o = ${op}(0);`; + }, + runtime: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = ${op}(0); +}`; + }, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage](t.params.op); + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps)) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.op}(${type.create(0).wgsl()}); +}`; + + const eleType = elementTypeOf(type); + t.expectCompileResult(eleType !== Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates return type') + .params(u => + u + .combine('retType', keysOf(kTypes)) + .filter(t => { + const type = kTypes[t.retType]; + const eleType = elementTypeOf(type); + return eleType !== Type.abstractInt && eleType !== Type.abstractFloat; + }) + .combine('op', kOps) + .combine('paramType', keysOf(kTypes)) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + if (retType.requiresF16() || paramType.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + let enables = `enable subgroups;\n`; + if (retType.requiresF16() || paramType.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()}); +}`; + + // Can't just use isConvertible since functions must concretize the parameter + // type before examining the whole statement. + const eleParamType = elementTypeOf(paramType); + const eleRetType = elementTypeOf(retType); + let expect = paramType === retType && eleRetType !== Type.bool; + if (eleParamType === Type.abstractInt) { + expect = eleRetType === Type.i32 && isConvertible(paramType, retType); + } else if (eleParamType === Type.abstractFloat) { + expect = eleRetType === Type.f32 && isConvertible(paramType, retType); + } + t.expectCompileResult(expect, wgsl); + }); + +g.test('stage') + .desc('validates builtin is only usable in the correct stages') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = ${t.params.op}(0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts index 51cf9553785c..387340f80e9f 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/refract.spec.ts @@ -2,10 +2,21 @@ const builtin = 'refract'; export const description = ` Validation tests for the ${builtin}() builtin. `; - import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; -import { Type, kConvertableToFloatVectors, scalarTypeOf } from '../../../../../util/conversion.js'; +import { + Type, + kConvertableToFloatVectors, + scalarTypeOf, + ScalarType, +} from '../../../../../util/conversion.js'; +import { + QuantizeFunc, + quantizeToF16, + quantizeToF32, + isSubnormalNumberF16, + isSubnormalNumberF32, +} from '../../../../../util/math.js'; import { ShaderValidationTest } from '../../../shader_validation_test.js'; import { @@ -20,6 +31,28 @@ export const g = makeTestGroup(ShaderValidationTest); const kValidArgumentTypes = objectsToRecord(kConvertableToFloatVectors); +function quantizeFunctionForScalarType(type: ScalarType): QuantizeFunc { + switch (type) { + case Type.f32: + return quantizeToF32; + case Type.f16: + return quantizeToF16; + default: + return (v: number) => v; + } +} + +function isSubnormalFunctionForScalarType(type: ScalarType): (v: number) => boolean { + switch (type) { + case Type.f32: + return isSubnormalNumberF32; + case Type.f16: + return isSubnormalNumberF16; + default: + return (v: number) => false; + } +} + g.test('values') .desc( ` @@ -64,6 +97,17 @@ where a the calculations result in a non-representable value for the given type. const c2_one_minus_b_dot_a_2 = vCheck.checkedResult(c2 * one_minus_b_dot_a_2); const k = vCheck.checkedResult(1.0 - c2_one_minus_b_dot_a_2); + const quantizeFn = quantizeFunctionForScalarType(scalarType); + const isSubnormalFn = isSubnormalFunctionForScalarType(scalarType); + // We skip tests with values that would involve subnormal computations in + // order to avoid defining a specific behavior (flush to zero). + t.skipIf( + isSubnormalFn(quantizeFn(b_dot_a)) || + isSubnormalFn(quantizeFn(b_dot_a_2)) || + isSubnormalFn(quantizeFn(c2)) || + isSubnormalFn(quantizeFn(k)) + ); + if (k >= 0) { // If the k is near zero it may fail on some implementations which implement sqrt as // 1/inversesqrt, so skip the test. diff --git a/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts index 5a5a28fc7362..2879055ab216 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/smoothstep.spec.ts @@ -51,16 +51,15 @@ Validates that constant evaluation and override evaluation of ${builtin}() rejec .fn(t => { const type = kValuesTypes[t.params.type]; - // We expect to fail if low >= high as it results in a DBZ - const expectedResult = t.params.value1 >= t.params.value2; + // We expect to fail if low >= high. + const expectedResult = t.params.value1 < t.params.value2; validateConstOrOverrideBuiltinEval( t, builtin, expectedResult, [type.create(t.params.value1), type.create(t.params.value2), type.create(0)], - t.params.stage, - /* returnType */ concreteTypeOf(type, [Type.f32]) + t.params.stage ); }); @@ -81,6 +80,8 @@ g.test('partial_eval_errors') .beginSubcases() .expand('low', u => [0, 10]) .expand('high', u => [0, 10]) + // in_shader: Is the function call statically accessed by the entry point? + .combine('in_shader', [false, true] as const) ) .beforeAllSubcases(t => { if (scalarTypeOf(kValuesTypes[t.params.type]) === Type.f16) { @@ -130,7 +131,10 @@ fn foo() { const shader_error = error && t.params.lowStage === 'constant' && t.params.highStage === 'constant'; const pipeline_error = - error && t.params.lowStage !== 'runtime' && t.params.highStage !== 'runtime'; + t.params.in_shader && + error && + t.params.lowStage !== 'runtime' && + t.params.highStage !== 'runtime'; t.expectCompileResult(!shader_error, wgsl); if (!shader_error) { const constants: Record = {}; @@ -141,6 +145,7 @@ fn foo() { code: wgsl, constants, reference: ['o_low', 'o_high'], + statements: t.params.in_shader ? ['foo();'] : [], }); } }); @@ -159,10 +164,11 @@ Validates that scalar and vector arguments are rejected by ${builtin}() if not f }) .fn(t => { const type = kArgumentTypes[t.params.type]; + const expectedResult = isConvertibleToFloatType(elementTypeOf(type)); validateConstOrOverrideBuiltinEval( t, builtin, - /* expectedResult */ isConvertibleToFloatType(elementTypeOf(type)), + expectedResult, [type.create(0), type.create(1), type.create(2)], 'constant', /* returnType */ concreteTypeOf(type, [Type.f32]) @@ -344,7 +350,7 @@ g.test('early_eval_errors') t, builtin, /* expectedResult */ t.params.low < t.params.high, - [f32(0), f32(t.params.low), f32(t.params.high)], + [f32(t.params.low), f32(t.params.high), f32(0)], t.params.stage ); }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupAdd.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupAdd.spec.ts new file mode 100644 index 000000000000..4f2a2af52197 --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupAdd.spec.ts @@ -0,0 +1,235 @@ +export const description = ` +Validation tests for subgroupAdd and subgroupExclusiveAdd +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +const kBuiltins = ['subgroupAdd', 'subgroupExclusiveAdd', 'subgroupInclusiveAdd'] as const; + +const kStages: Record string> = { + constant: (builtin: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = ${builtin}(0); +}`; + }, + override: (builtin: string) => { + return ` +enable subgroups; +override o = ${builtin}(0);`; + }, + runtime: (builtin: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = ${builtin}(0); +}`; + }, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages)).beginSubcases().combine('builtin', kBuiltins)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage](t.params.builtin); + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => + u + .combine('must_use', [true, false] as const) + .beginSubcases() + .combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}${t.params.builtin}(0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => + u.combine('type', keysOf(kArgumentTypes)).beginSubcases().combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kArgumentTypes[t.params.type]; + if (type.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kArgumentTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.builtin}(${type.create(0).wgsl()}); +}`; + + t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates data parameter type') + .params(u => + u + .combine('dataType', keysOf(kArgumentTypes)) + .combine('retType', keysOf(kArgumentTypes)) + .filter(t => { + const retType = kArgumentTypes[t.retType]; + const retEleTy = elementTypeOf(retType); + const dataType = kArgumentTypes[t.dataType]; + const dataEleTy = elementTypeOf(dataType); + return ( + retEleTy !== Type.abstractInt && + retEleTy !== Type.abstractFloat && + dataEleTy !== Type.abstractInt && + dataEleTy !== Type.abstractFloat + ); + }) + .beginSubcases() + .combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + if (dataType.requiresF16() || retType.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + let enables = `enable subgroups;\n`; + if (dataType.requiresF16() || retType.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = ${t.params.builtin}(${dataType.create(0).wgsl()}); +}`; + + const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType; + t.expectCompileResult(expect, wgsl); + }); + +g.test('stage') + .desc('Validates it is only usable in correct stage') + .params(u => + u + .combine('stage', ['compute', 'fragment', 'vertex'] as const) + .beginSubcases() + .combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = ${t.params.builtin}(0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); + +const kInvalidTypeCases: Record = { + array_u32: `array(1u,2u,3u)`, + array_f32: `array()`, + struct_s: `S()`, + struct_t: `T(1, 1)`, + ptr_func: `&func_var`, + ptr_priv: `&priv_var`, + frexp_ret: `frexp(0)`, +}; + +g.test('invalid_types') + .desc('Tests that invalid non-plain types are rejected') + .params(u => + u.combine('case', keysOf(kInvalidTypeCases)).beginSubcases().combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const val = kInvalidTypeCases[t.params.case]; + const wgsl = ` +enable subgroups; + +struct S { + x : u32 +} + +struct T { + a : f32, + b : u32, +} + +var priv_var : f32; +fn foo() { + var func_var : vec4u; + _ = ${t.params.builtin}(${val}); +}`; + + t.expectCompileResult(false, wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupAnyAll.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupAnyAll.spec.ts new file mode 100644 index 000000000000..eaee33e62cff --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupAnyAll.spec.ts @@ -0,0 +1,186 @@ +export const description = ` +Validation tests for subgroupAny and subgroupAll. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +const kOps = ['subgroupAny', 'subgroupAll'] as const; + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = ${t.params.op}(true); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kStages: Record string> = { + constant: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = ${op}(true); +}`; + }, + override: (op: string) => { + return ` +enable subgroups +override o = select(0, 1, ${op}(true));`; + }, + runtime: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = ${op}(true); +}`; + }, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage](t.params.op); + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}${t.params.op}(false); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps)) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.op}(${type.create(0).wgsl()}); +}`; + + t.expectCompileResult(type === Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates return type') + .params(u => + u + .combine('type', keysOf(kTypes)) + .filter(t => { + const type = kTypes[t.type]; + const eleType = elementTypeOf(type); + return eleType !== Type.abstractInt && eleType !== Type.abstractFloat; + }) + .combine('op', kOps) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${type.toString()} = ${t.params.op}(true); +}`; + + t.expectCompileResult(type === Type.bool, wgsl); + }); + +g.test('stage') + .desc('validates builtin is only usable in the correct stages') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = ${t.params.op}(true); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts index afbe33e93c56..5f53847be25c 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBallot.spec.ts @@ -9,6 +9,22 @@ import { ShaderValidationTest } from '../../../shader_validation_test.js'; export const g = makeTestGroup(ShaderValidationTest); +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = subgroupBallot(true); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + const kStages: Record = { constant: ` enable subgroups; @@ -38,6 +54,23 @@ g.test('early_eval') t.expectCompileResult(t.params.stage === 'runtime', code); }); +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}subgroupBallot(true); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); g.test('data_type') @@ -69,7 +102,7 @@ fn main() { }); g.test('return_type') - .desc('Validates data parameter type') + .desc('Validates return type') .params(u => u.combine('type', keysOf(kArgumentTypes)).filter(t => { const type = kArgumentTypes[t.type]; diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBitwise.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBitwise.spec.ts new file mode 100644 index 000000000000..ca0dfb6fd719 --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBitwise.spec.ts @@ -0,0 +1,204 @@ +export const description = ` +Validation tests for subgroupAnd, subgroupOr, and subgroupXor. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { + Type, + elementTypeOf, + kAllScalarsAndVectors, + isConvertible, +} from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +const kOps = ['subgroupAnd', 'subgroupOr', 'subgroupXor'] as const; + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = ${t.params.op}(0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kStages: Record string> = { + constant: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = ${op}(0); +}`; + }, + override: (op: string) => { + return ` +enable subgroups +override o = ${op}(0);`; + }, + runtime: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = ${op}(0); +}`; + }, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage](t.params.op); + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps)) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.op}(${type.create(0).wgsl()}); +}`; + + const eleType = elementTypeOf(type); + const expect = isConvertible(eleType, Type.u32) || isConvertible(eleType, Type.i32); + t.expectCompileResult(expect, wgsl); + }); + +g.test('return_type') + .desc('Validates return type') + .params(u => + u + .combine('retType', keysOf(kTypes)) + .filter(t => { + const type = kTypes[t.retType]; + const eleType = elementTypeOf(type); + return eleType !== Type.abstractInt && eleType !== Type.abstractFloat; + }) + .combine('op', kOps) + .combine('paramType', keysOf(kTypes)) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + if (retType.requiresF16() || paramType.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + let enables = `enable subgroups;\n`; + if (retType.requiresF16() || paramType.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()}); +}`; + + // Can't just use isConvertible since functions must concretize the parameter + // type before examining the whole statement. + const eleParamType = elementTypeOf(paramType); + const eleRetType = elementTypeOf(retType); + let expect = paramType === retType && (eleRetType === Type.i32 || eleRetType === Type.u32); + if (eleParamType === Type.abstractInt) { + expect = eleRetType === Type.i32 && isConvertible(paramType, retType); + } + t.expectCompileResult(expect, wgsl); + }); + +g.test('stage') + .desc('validates builtin is only usable in the correct stages') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = ${t.params.op}(0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts index a71b145092c8..fd76cd419b7f 100644 --- a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcast.spec.ts @@ -14,6 +14,44 @@ import { ShaderValidationTest } from '../../../shader_validation_test.js'; export const g = makeTestGroup(ShaderValidationTest); +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = subgroupBroadcast(0, 0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +g.test('requires_subgroups_f16') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; + if (t.params.enable) { + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const wgsl = ` +enable f16; +enable subgroups; +${t.params.enable ? 'enable subgroups_f16;' : ''} +fn foo() { + _ = subgroupBroadcast(0h, 0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); const kStages: Record = { @@ -156,6 +194,58 @@ fn main() { t.expectCompileResult(expect, wgsl); }); +const kIdCases = { + const_decl: { + code: 'const_decl', + valid: true, + }, + const_literal: { + code: '0', + valid: true, + }, + const_expr: { + code: 'const_decl + 2', + valid: true, + }, + let_decl: { + code: 'let_decl', + valid: false, + }, + override_decl: { + code: 'override_decl', + valid: false, + }, + var_func_decl: { + code: 'var_func_decl', + valid: false, + }, + var_priv_decl: { + code: 'var_priv_decl', + valid: false, + }, +}; + +g.test('id_constness') + .desc('Validates that id must be a const-expression') + .params(u => u.combine('value', keysOf(kIdCases))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +override override_decl : u32; +var var_priv_decl : u32; +fn foo() { + var var_func_decl : u32; + let let_decl = var_func_decl; + const const_decl = 0u; + _ = subgroupBroadcast(0, ${kIdCases[t.params.value].code}); +}`; + + t.expectCompileResult(kIdCases[t.params.value].valid, wgsl); + }); + g.test('stage') .desc('Validates it is only usable in correct stage') .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const)) diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts new file mode 100644 index 000000000000..4525b6b97ef8 --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupBroadcastFirst.spec.ts @@ -0,0 +1,210 @@ +export const description = ` +Validation tests for subgroupBroadcastFirst +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = subgroupBroadcastFirst(0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +g.test('requires_subgroups_f16') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; + if (t.params.enable) { + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const wgsl = ` +enable f16; +enable subgroups; +${t.params.enable ? 'enable subgroups_f16;' : ''} +fn foo() { + _ = subgroupBroadcastFirst(0h); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); + +const kStages: Record = { + constant: ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = subgroupBroadcastFirst(0); +}`, + override: ` +enable subgroups; +override o = subgroupBroadcastFirst(0);`, + runtime: ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = subgroupBroadcastFirst(0); +}`, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage]; + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}subgroupBroadcastFirst(0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => u.combine('type', keysOf(kArgumentTypes))) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kArgumentTypes[t.params.type]; + if (type.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kArgumentTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = subgroupBroadcastFirst(${type.create(0).wgsl()}); +}`; + + t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates data parameter type') + .params(u => + u + .combine('dataType', keysOf(kArgumentTypes)) + .combine('retType', keysOf(kArgumentTypes)) + .filter(t => { + const retType = kArgumentTypes[t.retType]; + const retEleTy = elementTypeOf(retType); + const dataType = kArgumentTypes[t.dataType]; + const dataEleTy = elementTypeOf(dataType); + return ( + retEleTy !== Type.abstractInt && + retEleTy !== Type.abstractFloat && + dataEleTy !== Type.abstractInt && + dataEleTy !== Type.abstractFloat + ); + }) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + if (dataType.requiresF16() || retType.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + let enables = `enable subgroups;\n`; + if (dataType.requiresF16() || retType.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = subgroupBroadcastFirst(${dataType.create(0).wgsl()}); +}`; + + const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType; + t.expectCompileResult(expect, wgsl); + }); + +g.test('stage') + .desc('Validates it is only usable in correct stage') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = subgroupBroadcastFirst(0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupElect.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupElect.spec.ts new file mode 100644 index 000000000000..5637860c59ce --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupElect.spec.ts @@ -0,0 +1,175 @@ +export const description = ` +Validation tests for subgroupElect. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = subgroupElect(); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kStages: Record = { + constant: ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = subgroupElect(); +}`, + override: ` +enable subgroups +override o = select(0, 1, subgroupElect());`, + runtime: ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = subgroupElect(); +}`, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages))) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage]; + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}subgroupElect(); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates there are no valid data parameters') + .params(u => u.combine('type', keysOf(kTypes))) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = subgroupElect(${type.create(0).wgsl()}); +}`; + + t.expectCompileResult(false, wgsl); + }); + +g.test('return_type') + .desc('Validates return type') + .params(u => + u.combine('type', keysOf(kTypes)).filter(t => { + const type = kTypes[t.type]; + const eleType = elementTypeOf(type); + return eleType !== Type.abstractInt && eleType !== Type.abstractFloat; + }) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${type.toString()} = subgroupElect(); +}`; + + t.expectCompileResult(type === Type.bool, wgsl); + }); + +g.test('stage') + .desc('validates builtin is only usable in the correct stages') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = subgroupElect(); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts new file mode 100644 index 000000000000..84c1860019ee --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupMinMax.spec.ts @@ -0,0 +1,227 @@ +export const description = ` +Validation tests for subgroupMin and subgroupMax. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { + Type, + elementTypeOf, + kAllScalarsAndVectors, + isConvertible, +} from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +const kOps = ['subgroupMin', 'subgroupMax'] as const; + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = ${t.params.op}(0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +g.test('requires_subgroups_f16') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; + if (t.params.enable) { + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const wgsl = ` +enable f16; +enable subgroups; +${t.params.enable ? 'enable subgroups_f16;' : ''} +fn foo() { + _ = ${t.params.op}(0h); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kStages: Record string> = { + constant: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = ${op}(0); +}`; + }, + override: (op: string) => { + return ` +enable subgroups +override o = ${op}(0);`; + }, + runtime: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = ${op}(0); +}`; + }, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage](t.params.op); + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps)) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.op}(${type.create(0).wgsl()}); +}`; + + const eleType = elementTypeOf(type); + t.expectCompileResult(eleType !== Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates return type') + .params(u => + u + .combine('retType', keysOf(kTypes)) + .filter(t => { + const type = kTypes[t.retType]; + const eleType = elementTypeOf(type); + return eleType !== Type.abstractInt && eleType !== Type.abstractFloat; + }) + .combine('op', kOps) + .combine('paramType', keysOf(kTypes)) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + if (retType.requiresF16() || paramType.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + let enables = `enable subgroups;\n`; + if (retType.requiresF16() || paramType.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()}); +}`; + + // Can't just use isConvertible since functions must concretize the parameter + // type before examining the whole statement. + const eleParamType = elementTypeOf(paramType); + const eleRetType = elementTypeOf(retType); + let expect = paramType === retType && eleRetType !== Type.bool; + if (eleParamType === Type.abstractInt) { + expect = eleRetType === Type.i32 && isConvertible(paramType, retType); + } else if (eleParamType === Type.abstractFloat) { + expect = eleRetType === Type.f32 && isConvertible(paramType, retType); + } + t.expectCompileResult(expect, wgsl); + }); + +g.test('stage') + .desc('validates builtin is only usable in the correct stages') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = ${t.params.op}(0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupMul.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupMul.spec.ts new file mode 100644 index 000000000000..0b50d4c9df2d --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupMul.spec.ts @@ -0,0 +1,235 @@ +export const description = ` +Validation tests for subgroupMul, subgroupExclusiveMul, and subgroupInclusiveMul +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { Type, elementTypeOf, kAllScalarsAndVectors } from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +const kBuiltins = ['subgroupMul', 'subgroupExclusiveMul', 'subgroupInclusiveMul'] as const; + +const kStages: Record string> = { + constant: (builtin: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = ${builtin}(0); +}`; + }, + override: (builtin: string) => { + return ` +enable subgroups; +override o = ${builtin}(0);`; + }, + runtime: (builtin: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = ${builtin}(0); +}`; + }, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages)).beginSubcases().combine('builtin', kBuiltins)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage](t.params.builtin); + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => + u + .combine('must_use', [true, false] as const) + .beginSubcases() + .combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}${t.params.builtin}(0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kArgumentTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => + u.combine('type', keysOf(kArgumentTypes)).beginSubcases().combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kArgumentTypes[t.params.type]; + if (type.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kArgumentTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.builtin}(${type.create(0).wgsl()}); +}`; + + t.expectCompileResult(elementTypeOf(type) !== Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates data parameter type') + .params(u => + u + .combine('dataType', keysOf(kArgumentTypes)) + .combine('retType', keysOf(kArgumentTypes)) + .filter(t => { + const retType = kArgumentTypes[t.retType]; + const retEleTy = elementTypeOf(retType); + const dataType = kArgumentTypes[t.dataType]; + const dataEleTy = elementTypeOf(dataType); + return ( + retEleTy !== Type.abstractInt && + retEleTy !== Type.abstractFloat && + dataEleTy !== Type.abstractInt && + dataEleTy !== Type.abstractFloat + ); + }) + .beginSubcases() + .combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + if (dataType.requiresF16() || retType.requiresF16()) { + features.push('subgroups-f16' as GPUFeatureName); + features.push('shader-f16'); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const dataType = kArgumentTypes[t.params.dataType]; + const retType = kArgumentTypes[t.params.retType]; + let enables = `enable subgroups;\n`; + if (dataType.requiresF16() || retType.requiresF16()) { + enables += `enable subgroups_f16;\nenable f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = ${t.params.builtin}(${dataType.create(0).wgsl()}); +}`; + + const expect = elementTypeOf(dataType) !== Type.bool && dataType === retType; + t.expectCompileResult(expect, wgsl); + }); + +g.test('stage') + .desc('Validates it is only usable in correct stage') + .params(u => + u + .combine('stage', ['compute', 'fragment', 'vertex'] as const) + .beginSubcases() + .combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = ${t.params.builtin}(0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); + +const kInvalidTypeCases: Record = { + array_u32: `array(1u,2u,3u)`, + array_f32: `array()`, + struct_s: `S()`, + struct_t: `T(1, 1)`, + ptr_func: `&func_var`, + ptr_priv: `&priv_var`, + frexp_ret: `frexp(0)`, +}; + +g.test('invalid_types') + .desc('Tests that invalid non-plain types are rejected') + .params(u => + u.combine('case', keysOf(kInvalidTypeCases)).beginSubcases().combine('builtin', kBuiltins) + ) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const val = kInvalidTypeCases[t.params.case]; + const wgsl = ` +enable subgroups; + +struct S { + x : u32 +} + +struct T { + a : f32, + b : u32, +} + +var priv_var : f32; +fn foo() { + var func_var : vec4u; + _ = ${t.params.builtin}(${val}); +}`; + + t.expectCompileResult(false, wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts b/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts new file mode 100644 index 000000000000..62ffb5af36dd --- /dev/null +++ b/src/webgpu/shader/validation/expression/call/builtin/subgroupShuffle.spec.ts @@ -0,0 +1,262 @@ +export const description = ` +Validation tests for subgroupShuffle, subgroupShuffleXor, subgroupShuffleUp, and subgroupShuffleDown. +`; + +import { makeTestGroup } from '../../../../../../common/framework/test_group.js'; +import { keysOf, objectsToRecord } from '../../../../../../common/util/data_tables.js'; +import { + Type, + elementTypeOf, + kAllScalarsAndVectors, + isConvertible, +} from '../../../../../util/conversion.js'; +import { ShaderValidationTest } from '../../../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +const kOps = [ + 'subgroupShuffle', + 'subgroupShuffleXor', + 'subgroupShuffleUp', + 'subgroupShuffleDown', +] as const; + +g.test('requires_subgroups') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +${t.params.enable ? 'enable subgroups;' : ''} +fn foo() { + _ = ${t.params.op}(0, 0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +g.test('requires_subgroups_f16') + .desc('Validates that the subgroups feature is required') + .params(u => u.combine('enable', [false, true] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + const features: GPUFeatureName[] = ['shader-f16', 'subgroups' as GPUFeatureName]; + if (t.params.enable) { + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const wgsl = ` +enable f16; +enable subgroups; +${t.params.enable ? 'enable subgroups_f16;' : ''} +fn foo() { + _ = ${t.params.op}(0h, 0); +}`; + + t.expectCompileResult(t.params.enable, wgsl); + }); + +const kStages: Record string> = { + constant: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + const x = ${op}(0, 0); +}`; + }, + override: (op: string) => { + return ` +enable subgroups +override o = ${op}(0, 0);`; + }, + runtime: (op: string) => { + return ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + let x = ${op}(0, 0); +}`; + }, +}; + +g.test('early_eval') + .desc('Ensures the builtin is not able to be compile time evaluated') + .params(u => u.combine('stage', keysOf(kStages)).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const code = kStages[t.params.stage](t.params.op); + t.expectCompileResult(t.params.stage === 'runtime', code); + }); + +g.test('must_use') + .desc('Tests that the builtin has the @must_use attribute') + .params(u => u.combine('must_use', [true, false] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const wgsl = ` +enable subgroups; +@compute @workgroup_size(16) +fn main() { + ${t.params.must_use ? '_ = ' : ''}${t.params.op}(0, 0); +}`; + + t.expectCompileResult(t.params.must_use, wgsl); + }); + +const kTypes = objectsToRecord(kAllScalarsAndVectors); + +g.test('data_type') + .desc('Validates data parameter type') + .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps)) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.op}(${type.create(0).wgsl()}, 0); +}`; + + const eleType = elementTypeOf(type); + t.expectCompileResult(eleType !== Type.bool, wgsl); + }); + +g.test('return_type') + .desc('Validates return type') + .params(u => + u + .combine('retType', keysOf(kTypes)) + .filter(t => { + const type = kTypes[t.retType]; + const eleType = elementTypeOf(type); + return eleType !== Type.abstractInt && eleType !== Type.abstractFloat; + }) + .combine('op', kOps) + .combine('paramType', keysOf(kTypes)) + ) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + if (retType.requiresF16() || paramType.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const retType = kTypes[t.params.retType]; + const paramType = kTypes[t.params.paramType]; + let enables = `enable subgroups;\n`; + if (retType.requiresF16() || paramType.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + let res : ${retType.toString()} = ${t.params.op}(${paramType.create(0).wgsl()}, 0); +}`; + + // Can't just use isConvertible since functions must concretize the parameter + // type before examining the whole statement. + const eleParamType = elementTypeOf(paramType); + const eleRetType = elementTypeOf(retType); + let expect = paramType === retType && eleRetType !== Type.bool; + if (eleParamType === Type.abstractInt) { + expect = eleRetType === Type.i32 && isConvertible(paramType, retType); + } else if (eleParamType === Type.abstractFloat) { + expect = eleRetType === Type.f32 && isConvertible(paramType, retType); + } + t.expectCompileResult(expect, wgsl); + }); + +g.test('param2_type') + .desc('Validates shuffle parameter type') + .params(u => u.combine('type', keysOf(kTypes)).combine('op', kOps)) + .beforeAllSubcases(t => { + const features = ['subgroups' as GPUFeatureName]; + const type = kTypes[t.params.type]; + if (type.requiresF16()) { + features.push('shader-f16'); + features.push('subgroups-f16' as GPUFeatureName); + } + t.selectDeviceOrSkipTestCase(features); + }) + .fn(t => { + const type = kTypes[t.params.type]; + let enables = `enable subgroups;\n`; + if (type.requiresF16()) { + enables += `enable f16;\nenable subgroups_f16;`; + } + const wgsl = ` +${enables} +@compute @workgroup_size(1) +fn main() { + _ = ${t.params.op}(0, ${type.create(0).wgsl()}); +}`; + + const expect = + isConvertible(type, Type.u32) || (type === Type.i32 && t.params.op === 'subgroupShuffle'); + t.expectCompileResult(expect, wgsl); + }); + +g.test('stage') + .desc('validates builtin is only usable in the correct stages') + .params(u => u.combine('stage', ['compute', 'fragment', 'vertex'] as const).combine('op', kOps)) + .beforeAllSubcases(t => { + t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + }) + .fn(t => { + const compute = ` +@compute @workgroup_size(1) +fn main() { + foo(); +}`; + + const fragment = ` +@fragment +fn main() { + foo(); +}`; + + const vertex = ` +@vertex +fn main() -> @builtin(position) vec4f { + foo(); + return vec4f(); +}`; + + const entry = { compute, fragment, vertex }[t.params.stage]; + const wgsl = ` +enable subgroups; +fn foo() { + _ = ${t.params.op}(0, 0); +} + +${entry} +`; + + t.expectCompileResult(t.params.stage !== 'vertex', wgsl); + }); diff --git a/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts b/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts index 85bed5228482..d162ba3286b9 100644 --- a/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts +++ b/src/webgpu/shader/validation/expression/matrix/add_sub.spec.ts @@ -275,7 +275,7 @@ g.test('underflow_f16') let rhs = `mat${t.params.c}x${t.params.r}h(`; for (let i = 0; i < t.params.c; i++) { for (let k = 0; k < t.params.r; k++) { - lhs += `${kValue.f32.negative.min / 2},`; + lhs += `${kValue.f16.negative.min / 2},`; rhs += `${t.params.rhs},`; } } diff --git a/src/webgpu/shader/validation/expression/matrix/mul.spec.ts b/src/webgpu/shader/validation/expression/matrix/mul.spec.ts index e76e40265e09..a3a5d368dc2d 100644 --- a/src/webgpu/shader/validation/expression/matrix/mul.spec.ts +++ b/src/webgpu/shader/validation/expression/matrix/mul.spec.ts @@ -631,7 +631,7 @@ g.test('overflow_mat_f16_internal') for (let i = 0; i < t.params.c; i++) { for (let k = 0; k < t.params.r; k++) { lhs += `${t.params.lhs},`; - rhs += `1`; + rhs += `1,`; } } rhs += ')'; diff --git a/src/webgpu/shader/validation/extension/clip_distances.spec.ts b/src/webgpu/shader/validation/extension/clip_distances.spec.ts new file mode 100644 index 000000000000..88957d8e8e62 --- /dev/null +++ b/src/webgpu/shader/validation/extension/clip_distances.spec.ts @@ -0,0 +1,43 @@ +export const description = ` +Validation tests for the clip_distances extension +`; + +import { makeTestGroup } from '../../../../common/framework/test_group.js'; +import { ShaderValidationTest } from '../shader_validation_test.js'; + +export const g = makeTestGroup(ShaderValidationTest); + +g.test('use_clip_distances_requires_extension_enabled') + .desc( + `Checks that the clip_distances built-in variable is only allowed with the WGSL extension + clip_distances enabled in shader and the WebGPU extension clip-distances supported on the + device.` + ) + .params(u => + u.combine('requireExtension', [true, false]).combine('enableExtension', [true, false]) + ) + .beforeAllSubcases(t => { + if (t.params.requireExtension) { + t.selectDeviceOrSkipTestCase({ requiredFeatures: ['clip-distances'] }); + } + }) + .fn(t => { + const { requireExtension, enableExtension } = t.params; + + t.expectCompileResult( + requireExtension && enableExtension, + ` + ${enableExtension ? 'enable clip_distances;' : ''} + struct VertexOut { + @builtin(clip_distances) my_clip_distances : array, + @builtin(position) my_position : vec4f, + } + @vertex fn main() -> VertexOut { + var output : VertexOut; + output.my_clip_distances[0] = 1.0; + output.my_position = vec4f(0.0, 0.0, 0.0, 1.0); + return output; + } + ` + ); + }); diff --git a/src/webgpu/shader/validation/parse/identifiers.spec.ts b/src/webgpu/shader/validation/parse/identifiers.spec.ts index 0dd429d0a72c..4a7ec70120ff 100644 --- a/src/webgpu/shader/validation/parse/identifiers.spec.ts +++ b/src/webgpu/shader/validation/parse/identifiers.spec.ts @@ -199,6 +199,8 @@ const kInvalidIdentifiers = new Set([ 'noexcept', 'noinline', 'nointerpolation', + 'non_coherent', + 'noncoherent', 'noperspective', 'null', 'nullptr', diff --git a/src/webgpu/shader/validation/shader_io/builtins.spec.ts b/src/webgpu/shader/validation/shader_io/builtins.spec.ts index 85a30fa0ec60..3d01f8f23a3e 100644 --- a/src/webgpu/shader/validation/shader_io/builtins.spec.ts +++ b/src/webgpu/shader/validation/shader_io/builtins.spec.ts @@ -10,7 +10,7 @@ export const g = makeTestGroup(ShaderValidationTest); // List of all built-in variables and their stage, in|out usage, and type. // Taken from table in Section 15: -// https://www.w3.org/TR/2021/WD-WGSL-20211013/#builtin-variables +// https://www.w3.org/TR/WGSL/#builtin-inputs-outputs export const kBuiltins = [ { name: 'vertex_index', stage: 'vertex', io: 'in', type: 'u32' }, { name: 'instance_index', stage: 'vertex', io: 'in', type: 'u32' }, @@ -30,6 +30,14 @@ export const kBuiltins = [ { name: 'subgroup_size', stage: 'compute', io: 'in', type: 'u32' }, { name: 'subgroup_invocation_id', stage: 'fragment', io: 'in', type: 'u32' }, { name: 'subgroup_size', stage: 'fragment', io: 'in', type: 'u32' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, + { name: 'clip_distances', stage: 'vertex', io: 'out', type: 'array' }, ] as const; // List of types to test against. @@ -64,7 +72,15 @@ const kTestTypes = [ 'array', 'array', 'array', + 'array', + 'array', + 'array', 'array', + 'array', + 'array', + 'array', + 'array', + 'array', 'MyStruct', ] as const; @@ -87,7 +103,16 @@ g.test('stage_inout') ); if (t.params.name.includes('subgroup')) { t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + } else if (t.params.name === 'clip_distances') { + t.selectDeviceOrSkipTestCase('clip-distances' as GPUFeatureName); } + t.skipIf( + t.params.name !== 'position' && + t.params.target_stage === 'vertex' && + t.params.target_io === 'out' && + !t.params.use_struct, + 'missing @builtin(position) in the vertex output when the vertex output is not a struct' + ); }) .fn(t => { const code = generateShader({ @@ -117,9 +142,9 @@ g.test('type') .params(u => u .combineWithParams(kBuiltins) + .combine('use_struct', [true, false] as const) .beginSubcases() .combine('target_type', kTestTypes) - .combine('use_struct', [true, false] as const) ) .beforeAllSubcases(t => { t.skipIf( @@ -128,7 +153,16 @@ g.test('type') ); if (t.params.name.includes('subgroup')) { t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + } else if (t.params.name === 'clip_distances') { + t.selectDeviceOrSkipTestCase('clip-distances' as GPUFeatureName); } + t.skipIf( + t.params.name !== 'position' && + t.params.stage === 'vertex' && + t.params.io === 'out' && + !t.params.use_struct, + 'missing @builtin(position) in the vertex output' + ); }) .fn(t => { let code = ''; @@ -297,14 +331,30 @@ g.test('reuse_builtin_name') u .combineWithParams(kBuiltins) .combine('use', ['alias', 'struct', 'function', 'module-var', 'function-var']) + .combine('enable_extension', [true, false]) + .unless( + t => t.enable_extension && !(t.name.includes('subgroup') || t.name === 'clip_distances') + ) ) .beforeAllSubcases(t => { + if (!t.params.enable_extension) { + return; + } if (t.params.name.includes('subgroup')) { t.selectDeviceOrSkipTestCase('subgroups' as GPUFeatureName); + } else if (t.params.name === 'clip_distances') { + t.selectDeviceOrSkipTestCase('clip-distances' as GPUFeatureName); } }) .fn(t => { let code = ''; + if (t.params.enable_extension) { + if (t.params.name.includes('subgroups')) { + code += 'enable subgroup;\n'; + } else if (t.params.name === 'clip_distances') { + code += 'enable clip_distances;\n'; + } + } if (t.params.use === 'alias') { code += `alias ${t.params.name} = i32;`; } else if (t.params.use === `struct`) { diff --git a/src/webgpu/shader/validation/shader_io/interpolate.spec.ts b/src/webgpu/shader/validation/shader_io/interpolate.spec.ts index 933093e16f0f..b716093144b0 100644 --- a/src/webgpu/shader/validation/shader_io/interpolate.spec.ts +++ b/src/webgpu/shader/validation/shader_io/interpolate.spec.ts @@ -9,15 +9,11 @@ import { generateShader } from './util.js'; export const g = makeTestGroup(ShaderValidationTest); // List of valid interpolation attributes. -const kValidCompatInterpolationAttributes = new Set([ +const kValidInterpolationAttributes = new Set([ '', - '@interpolate(flat, either)', '@interpolate(perspective)', '@interpolate(perspective, center)', '@interpolate(perspective, centroid)', -]); -const kValidInterpolationAttributes = new Set([ - ...kValidCompatInterpolationAttributes, '@interpolate(flat)', '@interpolate(flat, first)', '@interpolate(flat, either)', @@ -83,10 +79,7 @@ g.test('type_and_sampling') io: t.params.io, use_struct: t.params.use_struct, }); - const validInterpolationAttributes = t.isCompatibility - ? kValidCompatInterpolationAttributes - : kValidInterpolationAttributes; - t.expectCompileResult(validInterpolationAttributes.has(interpolate), code); + t.expectCompileResult(kValidInterpolationAttributes.has(interpolate), code); }); g.test('require_location') @@ -140,9 +133,7 @@ g.test('integral_types') use_struct: t.params.use_struct, }); - const expectSuccess = t.isCompatibility - ? t.params.attribute === '@interpolate(flat, either)' - : t.params.attribute.startsWith('@interpolate(flat'); + const expectSuccess = t.params.attribute.startsWith('@interpolate(flat'); t.expectCompileResult(expectSuccess, code); }); @@ -160,7 +151,7 @@ g.test('duplicate') t.expectCompileResult(t.params.attr === '', code); }); -const kValidationTests: { [key: string]: { src: string; pass: boolean; compatPass?: boolean } } = { +const kValidationTests: { [key: string]: { src: string; pass: boolean } } = { valid: { src: `@interpolate(perspective)`, pass: true, @@ -172,7 +163,6 @@ const kValidationTests: { [key: string]: { src: string; pass: boolean; compatPas trailing_comma_one_arg: { src: `@interpolate(flat,)`, pass: true, - compatPass: false, }, trailing_comma_two_arg: { src: `@interpolate(perspective, center,)`, @@ -230,9 +220,6 @@ g.test('interpolation_validation') @builtin(position) vec4 { return vec4f(0); }`; - const expectSuccess = - kValidationTests[t.params.attr].pass && - (t.isCompatibility ? kValidationTests[t.params.attr].compatPass ?? true : true); - + const expectSuccess = kValidationTests[t.params.attr].pass; t.expectCompileResult(expectSuccess, code); }); diff --git a/src/webgpu/shader/validation/shader_io/util.ts b/src/webgpu/shader/validation/shader_io/util.ts index d115d79328b4..b71fd2aab42a 100644 --- a/src/webgpu/shader/validation/shader_io/util.ts +++ b/src/webgpu/shader/validation/shader_io/util.ts @@ -27,6 +27,9 @@ export function generateShader({ if (attribute.includes('subgroup')) { code += 'enable subgroups;\n'; } + if (attribute.includes('clip_distances')) { + code += 'enable clip_distances;\n'; + } if (use_struct) { // Generate a struct that wraps the entry point IO variable. diff --git a/src/webgpu/shader/validation/shader_validation_test.ts b/src/webgpu/shader/validation/shader_validation_test.ts index 6a4cae331766..5db47bd586ba 100644 --- a/src/webgpu/shader/validation/shader_validation_test.ts +++ b/src/webgpu/shader/validation/shader_validation_test.ts @@ -119,9 +119,14 @@ export class ShaderValidationTest extends GPUTest { constants?: Record; // List of additional module-scope variable the entrypoint needs to reference reference?: string[]; + // List of additional statements to insert in the entry point. + statements?: string[]; }) { const phonies: Array = []; + if (args.statements !== undefined) { + phonies.push(...args.statements); + } if (args.constants !== undefined) { phonies.push(...keysOf(args.constants).map(c => `_ = ${c};`)); } diff --git a/src/webgpu/shader/validation/types/textures.spec.ts b/src/webgpu/shader/validation/types/textures.spec.ts index 7b8f1748c113..f619877e2bc1 100644 --- a/src/webgpu/shader/validation/types/textures.spec.ts +++ b/src/webgpu/shader/validation/types/textures.spec.ts @@ -120,7 +120,7 @@ Besides, the shader compilation should always pass regardless of whether the for const { format, access, comma } = t.params; // bgra8unorm is considered a valid storage format at shader compilation stage const isFormatValid = - isTextureFormatUsableAsStorageFormat(format, t.isCompatibility) || format === 'bgra8unorm'; + isTextureFormatUsableAsStorageFormat(format, false) || format === 'bgra8unorm'; const isAccessValid = kAccessModes.includes(access); const wgsl = `@group(0) @binding(0) var tex: texture_storage_2d<${format}, ${access}${comma}>;`; t.expectCompileResult(isFormatValid && isAccessValid, wgsl); diff --git a/src/webgpu/util/math.ts b/src/webgpu/util/math.ts index 20d7818df65d..d5ca2b41320e 100644 --- a/src/webgpu/util/math.ts +++ b/src/webgpu/util/math.ts @@ -961,6 +961,17 @@ export function scalarF32Range( counts.neg_norm = counts.neg_norm === undefined ? counts.pos_norm : counts.neg_norm; counts.neg_sub = counts.neg_sub === undefined ? counts.pos_sub : counts.neg_sub; + let special_pos: number[] = []; + // The first interior point for 'pos_norm' is at 3. Because we have two special values we start allowing these + // special values as soon as they will fit as interior values. + if (counts.pos_norm >= 4) { + special_pos = [ + // Largest float as signed integer + 0x4effffff, + // Largest float as unsigned integer + 0x4f7fffff, + ]; + } // Generating bit fields first and then converting to f32, so that the spread across the possible f32 values is more // even. Generating against the bounds of f32 values directly results in the values being extremely biased towards the // extremes, since they are so much larger. @@ -980,7 +991,14 @@ export function scalarF32Range( kBit.f32.positive.subnormal.max, counts.pos_sub ), - ...linearRange(kBit.f32.positive.min, kBit.f32.positive.max, counts.pos_norm), + ...[ + ...linearRange( + kBit.f32.positive.min, + kBit.f32.positive.max, + counts.pos_norm - special_pos.length + ), + ...special_pos, + ].sort((n1, n2) => n1 - n2), ].map(Math.trunc); return bit_fields.map(reinterpretU32AsF32); } diff --git a/src/webgpu/util/texture.ts b/src/webgpu/util/texture.ts index badce71baa34..20e99fdfad4d 100644 --- a/src/webgpu/util/texture.ts +++ b/src/webgpu/util/texture.ts @@ -17,6 +17,7 @@ const kLoadValueFromStorageInfo: Partial<{ texelType: string; unpackWGSL: string; useFragDepth?: boolean; + discardWithStencil?: boolean; }; }> = { r8unorm: { @@ -233,17 +234,27 @@ const kLoadValueFromStorageInfo: Partial<{ `, useFragDepth: true, }, + stencil8: { + storageType: 'u32', + texelType: 'vec4u', + unpackWGSL: ` + return vec4u(unpack4xU8(src[byteOffset / 4])[byteOffset % 4], 123, 123, 123) + `, + discardWithStencil: true, + }, }; function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) { const info = kLoadValueFromStorageInfo[format]; assert(!!info); - const { storageType, texelType, unpackWGSL, useFragDepth } = info; + const { storageType, texelType, unpackWGSL, useFragDepth, discardWithStencil } = info; const [depthDecl, depthCode] = useFragDepth ? ['@builtin(frag_depth) d: f32,', 'fs.d = fs.v[0];'] : ['', '']; + const stencilCode = discardWithStencil ? 'if ((fs.v.r & vin.stencilMask) == 0) { discard; }' : ''; + return ` struct Uniforms { numTexelRows: u32, @@ -255,9 +266,10 @@ function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) { struct VSOutput { @builtin(position) pos: vec4f, @location(0) @interpolate(flat, either) sampleIndex: u32, + @location(1) @interpolate(flat, either) stencilMask: u32, }; - @vertex fn vs(@builtin(vertex_index) vNdx: u32) -> VSOutput { + @vertex fn vs(@builtin(vertex_index) vNdx: u32, @builtin(instance_index) iNdx: u32) -> VSOutput { let points = array( vec2f(0, 0), vec2f(1, 0), vec2f(0, 1), vec2f(1, 1), ); @@ -266,7 +278,10 @@ function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) { let rowOffset = f32(sampleRow) / numSampleRows; let rowMult = 1.0 / numSampleRows; let p = (points[vNdx % 4] * vec2f(1, rowMult) + vec2f(0, rowOffset)) * 2.0 - 1.0; - return VSOutput(vec4f(p, 0, 1), uni.sampleCount - sampleRow % uni.sampleCount - 1); + return VSOutput( + vec4f(p, 0, 1), + uni.sampleCount - sampleRow % uni.sampleCount - 1, + 1u << iNdx); } @group(0) @binding(0) var uni: Uniforms; @@ -289,6 +304,7 @@ function getCopyBufferToTextureViaRenderCode(format: GPUTextureFormat) { var fs: FSOutput; fs.v = unpack(byteOffset); ${depthCode} + ${stencilCode} return fs; } `; @@ -312,114 +328,158 @@ function copyBufferToTextureViaRender( const msInfo = kLoadValueFromStorageInfo[format]; assert(!!msInfo); - const { useFragDepth } = msInfo; + const { useFragDepth, discardWithStencil } = msInfo; const { device } = t; - const code = getCopyBufferToTextureViaRenderCode(format); - const id = JSON.stringify({ format, useFragDepth, sampleCount, code }); - const pipelines = - s_copyBufferToTextureViaRenderPipelines.get(device) ?? new Map(); - s_copyBufferToTextureViaRenderPipelines.set(device, pipelines); - let pipeline = pipelines.get(id); - if (!pipeline) { - const module = device.createShaderModule({ code }); - pipeline = device.createRenderPipeline({ - layout: 'auto', - vertex: { module }, - ...(useFragDepth - ? { - fragment: { - module, - targets: [], - }, - depthStencil: { - depthWriteEnabled: true, - depthCompare: 'always', - format, - }, - } - : { - fragment: { - module, - targets: [{ format }], - }, - }), - primitive: { - topology: 'triangle-strip', - }, - ...(sampleCount > 1 && { multisample: { count: sampleCount } }), + const numBlits = discardWithStencil ? 8 : 1; + for (let blitCount = 0; blitCount < numBlits; ++blitCount) { + const code = getCopyBufferToTextureViaRenderCode(format); + const stencilWriteMask = 1 << blitCount; + const id = JSON.stringify({ + format, + useFragDepth, + stencilWriteMask, + discardWithStencil, + sampleCount, + code, }); - pipelines.set(id, pipeline); - } + const pipelines = + s_copyBufferToTextureViaRenderPipelines.get(device) ?? new Map(); + s_copyBufferToTextureViaRenderPipelines.set(device, pipelines); + let pipeline = pipelines.get(id); + if (!pipeline) { + const module = device.createShaderModule({ code }); + pipeline = device.createRenderPipeline({ + label: `blitCopyFor-${format}`, + layout: 'auto', + vertex: { module }, + ...(discardWithStencil + ? { + fragment: { + module, + targets: [], + }, + depthStencil: { + depthWriteEnabled: false, + depthCompare: 'always', + format, + stencilWriteMask, + stencilFront: { + passOp: 'replace', + }, + }, + } + : useFragDepth + ? { + fragment: { + module, + targets: [], + }, + depthStencil: { + depthWriteEnabled: true, + depthCompare: 'always', + format, + }, + } + : { + fragment: { + module, + targets: [{ format }], + }, + }), + primitive: { + topology: 'triangle-strip', + }, + ...(sampleCount > 1 && { multisample: { count: sampleCount } }), + }); + pipelines.set(id, pipeline); + } - const info = kTextureFormatInfo[format]; - const uniforms = new Uint32Array([ - copySize.height, // numTexelRows: u32, - source.bytesPerRow!, // bytesPerRow: u32, - info.bytesPerBlock!, // bytesPerSample: u32, - dest.texture.sampleCount, // sampleCount: u32, - ]); - const uniformBuffer = t.makeBufferWithContents( - uniforms, - GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM - ); - const storageBuffer = t.createBufferTracked({ - size: source.buffer.size, - usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE, - }); - encoder.copyBufferToBuffer(source.buffer, 0, storageBuffer, 0, storageBuffer.size); - const baseMipLevel = dest.mipLevel; - for (let l = 0; l < copySize.depthOrArrayLayers; ++l) { - const baseArrayLayer = origin.z + l; - const mipLevelCount = 1; - const arrayLayerCount = 1; - const pass = encoder.beginRenderPass( - useFragDepth - ? { - colorAttachments: [], - depthStencilAttachment: { - view: dest.texture.createView({ - baseMipLevel, - baseArrayLayer, - mipLevelCount, - arrayLayerCount, - }), - depthClearValue: 0, - depthLoadOp: 'clear', - depthStoreOp: 'store', - }, - } - : { - colorAttachments: [ - { + const info = kTextureFormatInfo[format]; + const uniforms = new Uint32Array([ + copySize.height, // numTexelRows: u32, + source.bytesPerRow!, // bytesPerRow: u32, + info.bytesPerBlock!, // bytesPerSample: u32, + dest.texture.sampleCount, // sampleCount: u32, + ]); + const uniformBuffer = t.makeBufferWithContents( + uniforms, + GPUBufferUsage.COPY_DST | GPUBufferUsage.UNIFORM + ); + const storageBuffer = t.createBufferTracked({ + size: source.buffer.size, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.STORAGE, + }); + encoder.copyBufferToBuffer(source.buffer, 0, storageBuffer, 0, storageBuffer.size); + const baseMipLevel = dest.mipLevel; + for (let l = 0; l < copySize.depthOrArrayLayers; ++l) { + const baseArrayLayer = origin.z + l; + const mipLevelCount = 1; + const arrayLayerCount = 1; + const pass = encoder.beginRenderPass( + discardWithStencil + ? { + colorAttachments: [], + depthStencilAttachment: { view: dest.texture.createView({ baseMipLevel, baseArrayLayer, mipLevelCount, arrayLayerCount, }), - loadOp: 'clear', - storeOp: 'store', + stencilClearValue: 0, + stencilLoadOp: 'load', + stencilStoreOp: 'store', }, - ], - } - ); - pass.setViewport(origin.x, origin.y, copySize.width, copySize.height, 0, 1); - pass.setPipeline(pipeline); + } + : useFragDepth + ? { + colorAttachments: [], + depthStencilAttachment: { + view: dest.texture.createView({ + baseMipLevel, + baseArrayLayer, + mipLevelCount, + arrayLayerCount, + }), + depthClearValue: 0, + depthLoadOp: 'clear', + depthStoreOp: 'store', + }, + } + : { + colorAttachments: [ + { + view: dest.texture.createView({ + baseMipLevel, + baseArrayLayer, + mipLevelCount, + arrayLayerCount, + }), + loadOp: 'clear', + storeOp: 'store', + }, + ], + } + ); + pass.setViewport(origin.x, origin.y, copySize.width, copySize.height, 0, 1); + pass.setPipeline(pipeline); - const offset = - (source.offset ?? 0) + (source.bytesPerRow ?? 0) * (source.rowsPerImage ?? 0) * l; - const bindGroup = device.createBindGroup({ - layout: pipeline.getBindGroupLayout(0), - entries: [ - { binding: 0, resource: { buffer: uniformBuffer } }, - { binding: 1, resource: { buffer: storageBuffer, offset } }, - ], - }); + const offset = + (source.offset ?? 0) + (source.bytesPerRow ?? 0) * (source.rowsPerImage ?? 0) * l; + const bindGroup = device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: { buffer: uniformBuffer } }, + { binding: 1, resource: { buffer: storageBuffer, offset } }, + ], + }); - pass.setBindGroup(0, bindGroup); - pass.draw(4 * copySize.height * dest.texture.sampleCount); - pass.end(); + pass.setBindGroup(0, bindGroup); + pass.setStencilReference(0xff); + pass.draw(4 * copySize.height * dest.texture.sampleCount, 1, 0, blitCount); + pass.end(); + } } } diff --git a/src/webgpu/util/texture/base.ts b/src/webgpu/util/texture/base.ts index c5c6aaf20579..0cf6b7387318 100644 --- a/src/webgpu/util/texture/base.ts +++ b/src/webgpu/util/texture/base.ts @@ -255,6 +255,7 @@ export function reifyTextureViewDescriptor( format, dimension, aspect, + usage: texture.usage, baseMipLevel, mipLevelCount, baseArrayLayer, diff --git a/standalone/index.html b/standalone/index.html index d087d6584cd9..5c1daa89fcf8 100644 --- a/standalone/index.html +++ b/standalone/index.html @@ -13,8 +13,10 @@ - - + + + +