wgsl: Speed up ldexp execution tests (#3101)

Reduces thh number of cases by using sparse instead of full ranges, since there is going to be a cartesian product of input values when generating cases. Optimizes two quantization functions that had not been updated to re-used their TypedArray. Creation and then immediate destruction of TypedArrays are a type of hotspot we have encountered in other areas of the code base.
gpuweb · Oct 26, 2023 · aa7ab4f · aa7ab4f
1 parent ba9e5d6
commit aa7ab4f
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 10 deletions.
diff --git a/src/webgpu/shader/execution/expression/call/builtin/ldexp.spec.ts b/src/webgpu/shader/execution/expression/call/builtin/ldexp.spec.ts
@@ -20,10 +20,10 @@ import { i32, TypeF32, TypeF16, TypeI32 } from '../../../../../util/conversion.j
 import { FP } from '../../../../../util/floating_point.js';
 import {
   biasedRange,
-  fullF32Range,
-  fullF16Range,
-  fullI32Range,
   quantizeToI32,
+  sparseF32Range,
+  sparseI32Range,
+  sparseF16Range,
 } from '../../../../../util/math.js';
 import { makeCaseCache } from '../../case_cache.js';
 import { allInputSources, Case, run } from '../../expression.js';
@@ -63,20 +63,20 @@ const makeCase = (trait: 'f32' | 'f16', e1: number, e2: number): Case => {
 
 export const d = makeCaseCache('ldexp', {
   f32_non_const: () => {
-    return fullF32Range().flatMap(e1 => fullI32Range().map(e2 => makeCase('f32', e1, e2)));
+    return sparseF32Range().flatMap(e1 => sparseI32Range().map(e2 => makeCase('f32', e1, e2)));
   },
   f32_const: () => {
-    return fullF32Range().flatMap(e1 =>
+    return sparseF32Range().flatMap(e1 =>
       biasedRange(-bias.f32 - 10, bias.f32 + 1, 10).flatMap(e2 =>
         FP.f32.isFinite(e1 * 2 ** quantizeToI32(e2)) ? makeCase('f32', e1, e2) : []
       )
     );
   },
   f16_non_const: () => {
-    return fullF16Range().flatMap(e1 => fullI32Range().map(e2 => makeCase('f16', e1, e2)));
+    return sparseF16Range().flatMap(e1 => sparseI32Range().map(e2 => makeCase('f16', e1, e2)));
   },
   f16_const: () => {
-    return fullF16Range().flatMap(e1 =>
+    return sparseF16Range().flatMap(e1 =>
       biasedRange(-bias.f16 - 10, bias.f16 + 1, 10).flatMap(e2 =>
         FP.f16.isFinite(e1 * 2 ** quantizeToI32(e2)) ? makeCase('f16', e1, e2) : []
       )

diff --git a/src/webgpu/util/math.ts b/src/webgpu/util/math.ts
@@ -7,7 +7,6 @@ import {
 } from '../../external/petamoriken/float16/float16.js';
 
 import { kBit, kValue } from './constants.js';
-import { i32, u32 } from './conversion.js';
 import {
   reinterpretF64AsU64,
   reinterpretU64AsF64,
@@ -2031,14 +2030,22 @@ export function quantizeToF16(num: number): number {
   return quantizeToF16Data[0];
 }
 
+/** Statically allocate working data, so it doesn't need per-call creation */
+const quantizeToI32Data = new Int32Array(new ArrayBuffer(4));
+
 /** @returns the closest 32-bit signed integer value to the input */
 export function quantizeToI32(num: number): number {
-  return i32(num).value as number;
+  quantizeToI32Data[0] = num;
+  return quantizeToI32Data[0];
 }
 
+/** Statically allocate working data, so it doesn't need per-call creation */
+const quantizeToU32Data = new Uint32Array(new ArrayBuffer(4));
+
 /** @returns the closest 32-bit signed integer value to the input */
 export function quantizeToU32(num: number): number {
-  return u32(num).value as number;
+  quantizeToU32Data[0] = num;
+  return quantizeToU32Data[0];
 }
 
 /** @returns whether the number is an integer and a power of two */