diff --git a/common/utils.js b/common/utils.js index c28185fa..b26061ed 100644 --- a/common/utils.js +++ b/common/utils.js @@ -366,3 +366,69 @@ export async function isWebNN() { } } } + +// Derive from +// https://github.com/webmachinelearning/webnn-baseline/blob/main/src/lib/compute-padding.js +/** + * Compute the beginning and ending pad given input, filter and stride sizes. + * @param {String} autoPad + * @param {Number} inputSize + * @param {Number} effectiveFilterSize + * @param {Number} stride + * @param {Number} outputPadding + * @return {Array} [paddingBegin, paddingEnd] + */ +function computePadding1DForAutoPad( + autoPad, inputSize, effectiveFilterSize, stride, outputPadding) { + let totalPadding; + if (outputPadding === undefined) { + // for conv2d + const outSize = Math.ceil(inputSize / stride); + const neededInput = (outSize - 1) * stride + effectiveFilterSize; + totalPadding = neededInput > inputSize ? neededInput - inputSize : 0; + } else { + // for convTranspose2d + // totalPadding = beginning padding + ending padding + // SAME_UPPER or SAME_LOWER mean pad the input so that + // output size = input size * strides + // output size = (input size - 1) * stride + effectiveFilterSize + // - beginning padding - ending padding + output padding + totalPadding = (inputSize - 1) * stride + effectiveFilterSize + + outputPadding - inputSize * stride; + } + let paddingBegin; + let paddingEnd; + switch (autoPad) { + case 'same-upper': + paddingBegin = Math.floor(totalPadding / 2); + paddingEnd = Math.floor((totalPadding + 1) / 2); + break; + case 'same-lower': + paddingBegin = Math.floor((totalPadding + 1) / 2); + paddingEnd = Math.floor(totalPadding / 2); + break; + default: + throw new Error('The autoPad is invalid.'); + } + return [paddingBegin, paddingEnd]; +} + +// Compute explicit padding given input sizes, filter sizes, strides, dilations +// and auto pad mode 'same-upper' or 'same-lower'. +export function computePadding2DForAutoPad( + inputSizes, filterSizes, strides, dilations, autoPad) { + const [inputHeight, inputWidth] = inputSizes; + const [filterHeight, filterWidth] = filterSizes; + const [strideHeight, strideWidth] = strides ? strides : [1, 1]; + const [dilationHeight, dilationWidth] = dilations ? dilations: [1, 1]; + const effectiveFilterHeight = (filterHeight - 1) * dilationHeight + 1; + const effectiveFilterWidth = (filterWidth - 1) * dilationWidth + 1; + const [beginningPaddingHeight, endingPaddingHeight] = + computePadding1DForAutoPad( + autoPad, inputHeight, effectiveFilterHeight, strideHeight); + const [beginningPaddingWidth, endingPaddingWidth] = + computePadding1DForAutoPad( + autoPad, inputWidth, effectiveFilterWidth, strideWidth); + return [beginningPaddingHeight, endingPaddingHeight, + beginningPaddingWidth, endingPaddingWidth]; +} diff --git a/face_recognition/facenet_nchw.js b/face_recognition/facenet_nchw.js index d33ef09d..7025c01c 100644 --- a/face_recognition/facenet_nchw.js +++ b/face_recognition/facenet_nchw.js @@ -1,6 +1,6 @@ 'use strict'; -import {buildConstantByNpy} from '../common/utils.js'; +import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js'; const strides = [2, 2]; const autoPad = 'same-upper'; @@ -43,6 +43,14 @@ export class FaceNetNchw { if (relu) { options.activation = this.builder_.relu(); } + // WebNN spec drops autoPad support, compute the explicit padding instead. + if (options.autoPad == 'same-upper') { + options.padding = + computePadding2DForAutoPad( + /* nchw */[input.shape()[2], input.shape()[3]], + /* oihw */[weights.shape()[2], weights.shape()[3]], + options.strides, options.dilations, options.autoPad); + } return this.builder_.conv2d(input, weights, options); } diff --git a/face_recognition/facenet_nhwc.js b/face_recognition/facenet_nhwc.js index b365439d..d03a0dd5 100644 --- a/face_recognition/facenet_nhwc.js +++ b/face_recognition/facenet_nhwc.js @@ -1,6 +1,6 @@ 'use strict'; -import {buildConstantByNpy} from '../common/utils.js'; +import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js'; const strides = [2, 2]; const autoPad = 'same-upper'; @@ -45,6 +45,14 @@ export class FaceNetNhwc { if (relu) { options.activation = this.builder_.relu(); } + // WebNN spec drops autoPad support, compute the explicit padding instead. + if (options.autoPad == 'same-upper') { + options.padding = + computePadding2DForAutoPad( + /* nwhc */[input.shape()[1], input.shape()[2]], + /* ohwi */[weights.shape()[1], weights.shape()[2]], + options.strides, options.dilations, options.autoPad); + } return this.builder_.conv2d(input, weights, options); } diff --git a/facial_landmark_detection/ssd_mobilenetv2_face_nchw.js b/facial_landmark_detection/ssd_mobilenetv2_face_nchw.js index a6ecfd7c..2391867c 100644 --- a/facial_landmark_detection/ssd_mobilenetv2_face_nchw.js +++ b/facial_landmark_detection/ssd_mobilenetv2_face_nchw.js @@ -1,6 +1,6 @@ 'use strict'; -import {buildConstantByNpy} from '../common/utils.js'; +import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js'; // SSD MobileNet V2 Face model with 'nchw' layout. export class SsdMobilenetV2FaceNchw { @@ -36,7 +36,7 @@ export class SsdMobilenetV2FaceNchw { }; } - async buildConv_(input, nameArray, clip = true, options = undefined) { + async buildConv_(input, nameArray, clip = true, options = {}) { // nameArray: 0: keyword, 1: indice or suffix let prefix = this.weightsUrl_; const weightSuffix = '_weights.npy'; @@ -66,13 +66,12 @@ ${nameArray[1]}`; const weights = buildConstantByNpy(this.builder_, weightsName); const biasName = prefix + biasSuffix; const bias = buildConstantByNpy(this.builder_, biasName); - if (options !== undefined) { - options.autoPad = 'same-upper'; - } else { - options = { - autoPad: 'same-upper', - }; - } + const inputShape = (await input).shape(); + const weightsShape = (await weights).shape(); + options.padding = computePadding2DForAutoPad( + /* nchw */[inputShape[2], inputShape[3]], + /* oihw */[weightsShape[2], weightsShape[3]], + options.strides, options.dilations, 'same-upper'); options.bias = await bias; if (clip) { // TODO: Set clamp activation to options once it's supported in diff --git a/facial_landmark_detection/ssd_mobilenetv2_face_nhwc.js b/facial_landmark_detection/ssd_mobilenetv2_face_nhwc.js index 8a05b69f..0962a6f3 100644 --- a/facial_landmark_detection/ssd_mobilenetv2_face_nhwc.js +++ b/facial_landmark_detection/ssd_mobilenetv2_face_nhwc.js @@ -1,6 +1,6 @@ 'use strict'; -import {buildConstantByNpy} from '../common/utils.js'; +import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js'; // SSD MobileNet V2 Face model with 'nhwc' layout. export class SsdMobilenetV2FaceNhwc { @@ -69,18 +69,22 @@ ${nameArray[1]}`; if (options !== undefined) { options.inputLayout = 'nhwc'; options.filterLayout = 'ohwi'; - options.autoPad = 'same-upper'; } else { options = { inputLayout: 'nhwc', filterLayout: 'ohwi', - autoPad: 'same-upper', }; } if (nameArray[0].includes('depthwise')) { options.filterLayout = 'ihwo'; } options.bias = await bias; + const inputShape = (await input).shape(); + const weightsShape = (await weights).shape(); + options.padding = computePadding2DForAutoPad( + /* nhwc */[inputShape[1], inputShape[2]], + /* ohwi or ihwo */[weightsShape[1], weightsShape[2]], + options.strides, options.dilations, 'same-upper'); if (relu6) { // TODO: Set clamp activation to options once it's supported in // WebNN DML backend. diff --git a/image_classification/mobilenet_nhwc.js b/image_classification/mobilenet_nhwc.js index cce96cfb..4db2c3f6 100644 --- a/image_classification/mobilenet_nhwc.js +++ b/image_classification/mobilenet_nhwc.js @@ -1,6 +1,6 @@ 'use strict'; -import {buildConstantByNpy} from '../common/utils.js'; +import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js'; /* eslint max-len: ["error", {"code": 120}] */ @@ -29,6 +29,14 @@ export class MobileNetV2Nhwc { const bias = await buildConstantByNpy(this.builder_, biasName); options.inputLayout = 'nhwc'; options.bias = bias; + // WebNN spec drops autoPad support, compute the explicit padding instead. + if (options.autoPad == 'same-upper') { + options.padding = + computePadding2DForAutoPad( + /* nwhc */[input.shape()[1], input.shape()[2]], + /* ohwi or ihwo */[weights.shape()[1], weights.shape()[2]], + options.strides, options.dilations, options.autoPad); + } if (relu6) { // TODO: Set clamp activation to options once it's supported in // WebNN DML backend. diff --git a/image_classification/resnet50v2_nhwc.js b/image_classification/resnet50v2_nhwc.js index 7118baf0..f54a1376 100644 --- a/image_classification/resnet50v2_nhwc.js +++ b/image_classification/resnet50v2_nhwc.js @@ -1,6 +1,6 @@ 'use strict'; -import {buildConstantByNpy} from '../common/utils.js'; +import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js'; const autoPad = 'same-upper'; const strides = [2, 2]; @@ -50,6 +50,14 @@ export class ResNet50V2Nhwc { if (relu) { options.activation = this.builder_.relu(); } + // WebNN spec drops autoPad support, compute the explicit padding instead. + if (options.autoPad == 'same-upper') { + options.padding = + computePadding2DForAutoPad( + /* nwhc */[input.shape()[1], input.shape()[2]], + /* ohwi */[weights.shape()[1], weights.shape()[2]], + options.strides, options.dilations, options.autoPad); + } return this.builder_.conv2d(input, weights, options); } @@ -105,8 +113,13 @@ export class ResNet50V2Nhwc { }); const conv1 = await this.buildConv_( input, ['', '', '1'], {strides, padding: [3, 3, 3, 3]}, false); + const windowDimensions = [3, 3]; const pool = this.builder_.maxPool2d( - conv1, {windowDimensions: [3, 3], strides, layout, autoPad}); + conv1, {windowDimensions, strides, layout, + padding: computePadding2DForAutoPad( + /* nhwc */ [conv1.shape()[1], conv1.shape()[2]], + windowDimensions, strides, /* dilations */ undefined, + 'same-upper')}); // Block 1 const bottleneck1 = await this.buildBottleneckV2_(pool, ['1', '1'], true); const bottleneck2 = await this.buildBottleneckV2_( diff --git a/image_classification/squeezenet_nhwc.js b/image_classification/squeezenet_nhwc.js index 5f186457..eb56104c 100644 --- a/image_classification/squeezenet_nhwc.js +++ b/image_classification/squeezenet_nhwc.js @@ -1,6 +1,6 @@ 'use strict'; -import {buildConstantByNpy} from '../common/utils.js'; +import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js'; // SqueezeNet 1.0 model with 'nhwc' layout export class SqueezeNetNhwc { @@ -29,6 +29,14 @@ export class SqueezeNetNhwc { options.filterLayout = 'ohwi'; options.bias = bias; options.activation = this.builder_.relu(); + // WebNN spec drops autoPad support, compute the explicit padding instead. + if (options.autoPad == 'same-upper') { + options.padding = + computePadding2DForAutoPad( + /* nwhc */[input.shape()[1], input.shape()[2]], + /* ohwi */[weights.shape()[1], weights.shape()[2]], + options.strides, options.dilations, options.autoPad); + } return this.builder_.conv2d(input, weights, options); } diff --git a/object_detection/index.html b/object_detection/index.html index 96234020..5cf2bc30 100644 --- a/object_detection/index.html +++ b/object_detection/index.html @@ -46,21 +46,6 @@ -