Skip to content

Commit

Permalink
wgsl: Speed up ldexp execution tests (#3101)
Browse files Browse the repository at this point in the history
Reduces thh number of cases by using sparse instead of full ranges,
since there is going to be a cartesian product of input values when
generating cases.

Optimizes two quantization functions that had not been updated to
re-used their TypedArray. Creation and then immediate destruction of
TypedArrays are a type of hotspot we have encountered in other areas
of the code base.
  • Loading branch information
zoddicus authored Oct 26, 2023
1 parent ba9e5d6 commit aa7ab4f
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ import { i32, TypeF32, TypeF16, TypeI32 } from '../../../../../util/conversion.j
import { FP } from '../../../../../util/floating_point.js';
import {
biasedRange,
fullF32Range,
fullF16Range,
fullI32Range,
quantizeToI32,
sparseF32Range,
sparseI32Range,
sparseF16Range,
} from '../../../../../util/math.js';
import { makeCaseCache } from '../../case_cache.js';
import { allInputSources, Case, run } from '../../expression.js';
Expand Down Expand Up @@ -63,20 +63,20 @@ const makeCase = (trait: 'f32' | 'f16', e1: number, e2: number): Case => {

export const d = makeCaseCache('ldexp', {
f32_non_const: () => {
return fullF32Range().flatMap(e1 => fullI32Range().map(e2 => makeCase('f32', e1, e2)));
return sparseF32Range().flatMap(e1 => sparseI32Range().map(e2 => makeCase('f32', e1, e2)));
},
f32_const: () => {
return fullF32Range().flatMap(e1 =>
return sparseF32Range().flatMap(e1 =>
biasedRange(-bias.f32 - 10, bias.f32 + 1, 10).flatMap(e2 =>
FP.f32.isFinite(e1 * 2 ** quantizeToI32(e2)) ? makeCase('f32', e1, e2) : []
)
);
},
f16_non_const: () => {
return fullF16Range().flatMap(e1 => fullI32Range().map(e2 => makeCase('f16', e1, e2)));
return sparseF16Range().flatMap(e1 => sparseI32Range().map(e2 => makeCase('f16', e1, e2)));
},
f16_const: () => {
return fullF16Range().flatMap(e1 =>
return sparseF16Range().flatMap(e1 =>
biasedRange(-bias.f16 - 10, bias.f16 + 1, 10).flatMap(e2 =>
FP.f16.isFinite(e1 * 2 ** quantizeToI32(e2)) ? makeCase('f16', e1, e2) : []
)
Expand Down
13 changes: 10 additions & 3 deletions src/webgpu/util/math.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import {
} from '../../external/petamoriken/float16/float16.js';

import { kBit, kValue } from './constants.js';
import { i32, u32 } from './conversion.js';
import {
reinterpretF64AsU64,
reinterpretU64AsF64,
Expand Down Expand Up @@ -2031,14 +2030,22 @@ export function quantizeToF16(num: number): number {
return quantizeToF16Data[0];
}

/** Statically allocate working data, so it doesn't need per-call creation */
const quantizeToI32Data = new Int32Array(new ArrayBuffer(4));

/** @returns the closest 32-bit signed integer value to the input */
export function quantizeToI32(num: number): number {
return i32(num).value as number;
quantizeToI32Data[0] = num;
return quantizeToI32Data[0];
}

/** Statically allocate working data, so it doesn't need per-call creation */
const quantizeToU32Data = new Uint32Array(new ArrayBuffer(4));

/** @returns the closest 32-bit signed integer value to the input */
export function quantizeToU32(num: number): number {
return u32(num).value as number;
quantizeToU32Data[0] = num;
return quantizeToU32Data[0];
}

/** @returns whether the number is an integer and a power of two */
Expand Down

0 comments on commit aa7ab4f

Please sign in to comment.