From 678d452d2b1e69e84dd726fa5dcb00c68b91871c Mon Sep 17 00:00:00 2001
From: Ningxin Hu <ningxin.hu@intel.com>
Date: Fri, 8 Mar 2024 17:51:12 +0800
Subject: [PATCH] Remove the MLAutoPad usage and calculate the padding (#195)

WebNN spec drops the MLAutoPad support:

https://github.com/webmachinelearning/webnn/pull/587
---
 common/utils.js                               | 66 +++++++++++++++++++
 face_recognition/facenet_nchw.js              | 10 ++-
 face_recognition/facenet_nhwc.js              | 10 ++-
 .../ssd_mobilenetv2_face_nchw.js              | 17 +++--
 .../ssd_mobilenetv2_face_nhwc.js              | 10 ++-
 image_classification/mobilenet_nhwc.js        | 10 ++-
 image_classification/resnet50v2_nhwc.js       | 17 ++++-
 image_classification/squeezenet_nhwc.js       | 10 ++-
 object_detection/index.html                   | 15 -----
 object_detection/main.js                      | 13 ++--
 object_detection/ssd_mobilenetv1_nchw.js      |  7 +-
 object_detection/ssd_mobilenetv1_nhwc.js      |  8 ++-
 object_detection/tiny_yolov2_nchw.js          | 29 +++++---
 object_detection/tiny_yolov2_nhwc.js          | 30 ++++++---
 semantic_segmentation/deeplabv3_mnv2_nhwc.js  |  7 +-
 15 files changed, 194 insertions(+), 65 deletions(-)

diff --git a/common/utils.js b/common/utils.js
index c28185fa..b26061ed 100644
--- a/common/utils.js
+++ b/common/utils.js
@@ -366,3 +366,69 @@ export async function isWebNN() {
     }
   }
 }
+
+// Derive from
+// https://github.com/webmachinelearning/webnn-baseline/blob/main/src/lib/compute-padding.js
+/**
+ * Compute the beginning and ending pad given input, filter and stride sizes.
+ * @param {String} autoPad
+ * @param {Number} inputSize
+ * @param {Number} effectiveFilterSize
+ * @param {Number} stride
+ * @param {Number} outputPadding
+ * @return {Array} [paddingBegin, paddingEnd]
+ */
+function computePadding1DForAutoPad(
+    autoPad, inputSize, effectiveFilterSize, stride, outputPadding) {
+  let totalPadding;
+  if (outputPadding === undefined) {
+    // for conv2d
+    const outSize = Math.ceil(inputSize / stride);
+    const neededInput = (outSize - 1) * stride + effectiveFilterSize;
+    totalPadding = neededInput > inputSize ? neededInput - inputSize : 0;
+  } else {
+    // for convTranspose2d
+    // totalPadding = beginning padding + ending padding
+    // SAME_UPPER or SAME_LOWER mean pad the input so that
+    //   output size = input size * strides
+    // output size = (input size - 1) * stride + effectiveFilterSize
+    //     - beginning padding - ending padding + output padding
+    totalPadding = (inputSize - 1) * stride + effectiveFilterSize +
+        outputPadding - inputSize * stride;
+  }
+  let paddingBegin;
+  let paddingEnd;
+  switch (autoPad) {
+    case 'same-upper':
+      paddingBegin = Math.floor(totalPadding / 2);
+      paddingEnd = Math.floor((totalPadding + 1) / 2);
+      break;
+    case 'same-lower':
+      paddingBegin = Math.floor((totalPadding + 1) / 2);
+      paddingEnd = Math.floor(totalPadding / 2);
+      break;
+    default:
+      throw new Error('The autoPad is invalid.');
+  }
+  return [paddingBegin, paddingEnd];
+}
+
+// Compute explicit padding given input sizes, filter sizes, strides, dilations
+// and auto pad mode 'same-upper' or 'same-lower'.
+export function computePadding2DForAutoPad(
+    inputSizes, filterSizes, strides, dilations, autoPad) {
+  const [inputHeight, inputWidth] = inputSizes;
+  const [filterHeight, filterWidth] = filterSizes;
+  const [strideHeight, strideWidth] = strides ? strides : [1, 1];
+  const [dilationHeight, dilationWidth] = dilations ? dilations: [1, 1];
+  const effectiveFilterHeight = (filterHeight - 1) * dilationHeight + 1;
+  const effectiveFilterWidth = (filterWidth - 1) * dilationWidth + 1;
+  const [beginningPaddingHeight, endingPaddingHeight] =
+      computePadding1DForAutoPad(
+          autoPad, inputHeight, effectiveFilterHeight, strideHeight);
+  const [beginningPaddingWidth, endingPaddingWidth] =
+      computePadding1DForAutoPad(
+          autoPad, inputWidth, effectiveFilterWidth, strideWidth);
+  return [beginningPaddingHeight, endingPaddingHeight,
+    beginningPaddingWidth, endingPaddingWidth];
+}
diff --git a/face_recognition/facenet_nchw.js b/face_recognition/facenet_nchw.js
index d33ef09d..7025c01c 100644
--- a/face_recognition/facenet_nchw.js
+++ b/face_recognition/facenet_nchw.js
@@ -1,6 +1,6 @@
 'use strict';
 
-import {buildConstantByNpy} from '../common/utils.js';
+import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js';
 const strides = [2, 2];
 const autoPad = 'same-upper';
 
@@ -43,6 +43,14 @@ export class FaceNetNchw {
     if (relu) {
       options.activation = this.builder_.relu();
     }
+    // WebNN spec drops autoPad support, compute the explicit padding instead.
+    if (options.autoPad == 'same-upper') {
+      options.padding =
+        computePadding2DForAutoPad(
+            /* nchw */[input.shape()[2], input.shape()[3]],
+            /* oihw */[weights.shape()[2], weights.shape()[3]],
+            options.strides, options.dilations, options.autoPad);
+    }
     return this.builder_.conv2d(input, weights, options);
   }
 
diff --git a/face_recognition/facenet_nhwc.js b/face_recognition/facenet_nhwc.js
index b365439d..d03a0dd5 100644
--- a/face_recognition/facenet_nhwc.js
+++ b/face_recognition/facenet_nhwc.js
@@ -1,6 +1,6 @@
 'use strict';
 
-import {buildConstantByNpy} from '../common/utils.js';
+import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js';
 const strides = [2, 2];
 const autoPad = 'same-upper';
 
@@ -45,6 +45,14 @@ export class FaceNetNhwc {
     if (relu) {
       options.activation = this.builder_.relu();
     }
+    // WebNN spec drops autoPad support, compute the explicit padding instead.
+    if (options.autoPad == 'same-upper') {
+      options.padding =
+        computePadding2DForAutoPad(
+            /* nwhc */[input.shape()[1], input.shape()[2]],
+            /* ohwi */[weights.shape()[1], weights.shape()[2]],
+            options.strides, options.dilations, options.autoPad);
+    }
     return this.builder_.conv2d(input, weights, options);
   }
 
diff --git a/facial_landmark_detection/ssd_mobilenetv2_face_nchw.js b/facial_landmark_detection/ssd_mobilenetv2_face_nchw.js
index a6ecfd7c..2391867c 100644
--- a/facial_landmark_detection/ssd_mobilenetv2_face_nchw.js
+++ b/facial_landmark_detection/ssd_mobilenetv2_face_nchw.js
@@ -1,6 +1,6 @@
 'use strict';
 
-import {buildConstantByNpy} from '../common/utils.js';
+import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js';
 
 // SSD MobileNet V2 Face model with 'nchw' layout.
 export class SsdMobilenetV2FaceNchw {
@@ -36,7 +36,7 @@ export class SsdMobilenetV2FaceNchw {
     };
   }
 
-  async buildConv_(input, nameArray, clip = true, options = undefined) {
+  async buildConv_(input, nameArray, clip = true, options = {}) {
     // nameArray: 0: keyword, 1: indice or suffix
     let prefix = this.weightsUrl_;
     const weightSuffix = '_weights.npy';
@@ -66,13 +66,12 @@ ${nameArray[1]}`;
     const weights = buildConstantByNpy(this.builder_, weightsName);
     const biasName = prefix + biasSuffix;
     const bias = buildConstantByNpy(this.builder_, biasName);
-    if (options !== undefined) {
-      options.autoPad = 'same-upper';
-    } else {
-      options = {
-        autoPad: 'same-upper',
-      };
-    }
+    const inputShape = (await input).shape();
+    const weightsShape = (await weights).shape();
+    options.padding = computePadding2DForAutoPad(
+        /* nchw */[inputShape[2], inputShape[3]],
+        /* oihw */[weightsShape[2], weightsShape[3]],
+        options.strides, options.dilations, 'same-upper');
     options.bias = await bias;
     if (clip) {
       // TODO: Set clamp activation to options once it's supported in
diff --git a/facial_landmark_detection/ssd_mobilenetv2_face_nhwc.js b/facial_landmark_detection/ssd_mobilenetv2_face_nhwc.js
index 8a05b69f..0962a6f3 100644
--- a/facial_landmark_detection/ssd_mobilenetv2_face_nhwc.js
+++ b/facial_landmark_detection/ssd_mobilenetv2_face_nhwc.js
@@ -1,6 +1,6 @@
 'use strict';
 
-import {buildConstantByNpy} from '../common/utils.js';
+import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js';
 
 // SSD MobileNet V2 Face model with 'nhwc' layout.
 export class SsdMobilenetV2FaceNhwc {
@@ -69,18 +69,22 @@ ${nameArray[1]}`;
     if (options !== undefined) {
       options.inputLayout = 'nhwc';
       options.filterLayout = 'ohwi';
-      options.autoPad = 'same-upper';
     } else {
       options = {
         inputLayout: 'nhwc',
         filterLayout: 'ohwi',
-        autoPad: 'same-upper',
       };
     }
     if (nameArray[0].includes('depthwise')) {
       options.filterLayout = 'ihwo';
     }
     options.bias = await bias;
+    const inputShape = (await input).shape();
+    const weightsShape = (await weights).shape();
+    options.padding = computePadding2DForAutoPad(
+        /* nhwc */[inputShape[1], inputShape[2]],
+        /* ohwi or ihwo */[weightsShape[1], weightsShape[2]],
+        options.strides, options.dilations, 'same-upper');
     if (relu6) {
       // TODO: Set clamp activation to options once it's supported in
       // WebNN DML backend.
diff --git a/image_classification/mobilenet_nhwc.js b/image_classification/mobilenet_nhwc.js
index cce96cfb..4db2c3f6 100644
--- a/image_classification/mobilenet_nhwc.js
+++ b/image_classification/mobilenet_nhwc.js
@@ -1,6 +1,6 @@
 'use strict';
 
-import {buildConstantByNpy} from '../common/utils.js';
+import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js';
 
 /* eslint max-len: ["error", {"code": 120}] */
 
@@ -29,6 +29,14 @@ export class MobileNetV2Nhwc {
     const bias = await buildConstantByNpy(this.builder_, biasName);
     options.inputLayout = 'nhwc';
     options.bias = bias;
+    // WebNN spec drops autoPad support, compute the explicit padding instead.
+    if (options.autoPad == 'same-upper') {
+      options.padding =
+        computePadding2DForAutoPad(
+            /* nwhc */[input.shape()[1], input.shape()[2]],
+            /* ohwi or ihwo */[weights.shape()[1], weights.shape()[2]],
+            options.strides, options.dilations, options.autoPad);
+    }
     if (relu6) {
       // TODO: Set clamp activation to options once it's supported in
       // WebNN DML backend.
diff --git a/image_classification/resnet50v2_nhwc.js b/image_classification/resnet50v2_nhwc.js
index 7118baf0..f54a1376 100644
--- a/image_classification/resnet50v2_nhwc.js
+++ b/image_classification/resnet50v2_nhwc.js
@@ -1,6 +1,6 @@
 'use strict';
 
-import {buildConstantByNpy} from '../common/utils.js';
+import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js';
 
 const autoPad = 'same-upper';
 const strides = [2, 2];
@@ -50,6 +50,14 @@ export class ResNet50V2Nhwc {
     if (relu) {
       options.activation = this.builder_.relu();
     }
+    // WebNN spec drops autoPad support, compute the explicit padding instead.
+    if (options.autoPad == 'same-upper') {
+      options.padding =
+        computePadding2DForAutoPad(
+            /* nwhc */[input.shape()[1], input.shape()[2]],
+            /* ohwi */[weights.shape()[1], weights.shape()[2]],
+            options.strides, options.dilations, options.autoPad);
+    }
     return this.builder_.conv2d(input, weights, options);
   }
 
@@ -105,8 +113,13 @@ export class ResNet50V2Nhwc {
     });
     const conv1 = await this.buildConv_(
         input, ['', '', '1'], {strides, padding: [3, 3, 3, 3]}, false);
+    const windowDimensions = [3, 3];
     const pool = this.builder_.maxPool2d(
-        conv1, {windowDimensions: [3, 3], strides, layout, autoPad});
+        conv1, {windowDimensions, strides, layout,
+          padding: computePadding2DForAutoPad(
+              /* nhwc */ [conv1.shape()[1], conv1.shape()[2]],
+              windowDimensions, strides, /* dilations */ undefined,
+              'same-upper')});
     // Block 1
     const bottleneck1 = await this.buildBottleneckV2_(pool, ['1', '1'], true);
     const bottleneck2 = await this.buildBottleneckV2_(
diff --git a/image_classification/squeezenet_nhwc.js b/image_classification/squeezenet_nhwc.js
index 5f186457..eb56104c 100644
--- a/image_classification/squeezenet_nhwc.js
+++ b/image_classification/squeezenet_nhwc.js
@@ -1,6 +1,6 @@
 'use strict';
 
-import {buildConstantByNpy} from '../common/utils.js';
+import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js';
 
 // SqueezeNet 1.0 model with 'nhwc' layout
 export class SqueezeNetNhwc {
@@ -29,6 +29,14 @@ export class SqueezeNetNhwc {
     options.filterLayout = 'ohwi';
     options.bias = bias;
     options.activation = this.builder_.relu();
+    // WebNN spec drops autoPad support, compute the explicit padding instead.
+    if (options.autoPad == 'same-upper') {
+      options.padding =
+        computePadding2DForAutoPad(
+            /* nwhc */[input.shape()[1], input.shape()[2]],
+            /* ohwi */[weights.shape()[1], weights.shape()[2]],
+            options.strides, options.dilations, options.autoPad);
+    }
     return this.builder_.conv2d(input, weights, options);
   }
 
diff --git a/object_detection/index.html b/object_detection/index.html
index 96234020..5cf2bc30 100644
--- a/object_detection/index.html
+++ b/object_detection/index.html
@@ -46,21 +46,6 @@
             </div>
           </div>
         </div>
-        <div class="row mb-2 align-items-center">
-          <div class="col-1 col-md-1">
-            <span>Layout</span>
-          </div>
-          <div class="col-md-auto">
-            <div class="btn-group-toggle" data-toggle="buttons" id="layoutBtns">
-              <label class="btn btn-outline-info btn-sm">
-                <input type="radio" name="layout" id="nchw" autocomplete="off">NCHW
-              </label>
-              <label class="btn btn-outline-info btn-sm active">
-                <input type="radio" name="layout" id="nhwc" autocomplete="off" checked>NHWC
-              </label>
-            </div>
-          </div>
-        </div>
         <div class="row align-items-center">
           <div class="col-1 col-md-1">
             <span>Model</span>
diff --git a/object_detection/main.js b/object_detection/main.js
index 2b3a466a..4770ac4a 100644
--- a/object_detection/main.js
+++ b/object_detection/main.js
@@ -49,6 +49,13 @@ $(document).ready(async () => {
 
 $('#backendBtns .btn').on('change', async (e) => {
   if (inputType === 'camera') utils.stopCameraStream(rafReq, stream);
+  if ($(e.target).attr('id').indexOf('cpu') != -1) {
+    layout = 'nhwc';
+  } else if (($(e.target).attr('id').indexOf('gpu') != -1)) {
+    layout = 'nchw';
+  } else {
+    throw new Error('Unknown backend');
+  }
   await main();
 });
 
@@ -58,12 +65,6 @@ $('#modelBtns .btn').on('change', async (e) => {
   await main();
 });
 
-$('#layoutBtns .btn').on('change', async (e) => {
-  layout = $(e.target).attr('id');
-  if (inputType === 'camera') utils.stopCameraStream(rafReq, stream);
-  await main();
-});
-
 // Click trigger to do inference with <img> element
 $('#img').click(async () => {
   if (inputType === 'camera') utils.stopCameraStream(rafReq, stream);
diff --git a/object_detection/ssd_mobilenetv1_nchw.js b/object_detection/ssd_mobilenetv1_nchw.js
index dcc94552..abe6d584 100644
--- a/object_detection/ssd_mobilenetv1_nchw.js
+++ b/object_detection/ssd_mobilenetv1_nchw.js
@@ -1,6 +1,6 @@
 'use strict';
 
-import {buildConstantByNpy} from '../common/utils.js';
+import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js';
 
 // SSD MobileNet V1 model with 'nchw' layout, trained on the COCO dataset.
 export class SsdMobilenetV1Nchw {
@@ -58,7 +58,10 @@ ${nameArray[1]}_BatchNorm_batchnorm`;
     const weights = await buildConstantByNpy(this.builder_, weightsName);
     const biasName = this.biasUrl_ + prefix + biasSuffix;
     const bias = await buildConstantByNpy(this.builder_, biasName);
-    options.autoPad = 'same-upper';
+    options.padding = computePadding2DForAutoPad(
+        /* nchw */[input.shape()[2], input.shape()[3]],
+        /* oihw */[weights.shape()[2], weights.shape()[3]],
+        options.strides, options.dilations, 'same-upper');
     options.bias = bias;
     if (relu6) {
       // TODO: Set clamp activation to options once it's supported in
diff --git a/object_detection/ssd_mobilenetv1_nhwc.js b/object_detection/ssd_mobilenetv1_nhwc.js
index 04fc8d6b..ed03da96 100644
--- a/object_detection/ssd_mobilenetv1_nhwc.js
+++ b/object_detection/ssd_mobilenetv1_nhwc.js
@@ -1,6 +1,6 @@
 'use strict';
 
-import {buildConstantByNpy} from '../common/utils.js';
+import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js';
 
 // SSD MobileNet V1 model with 'nhwc' layout, trained on the COCO dataset.
 export class SsdMobilenetV1Nhwc {
@@ -59,18 +59,20 @@ ${nameArray[1]}_BatchNorm_batchnorm`;
     if (options !== undefined) {
       options.inputLayout = 'nhwc';
       options.filterLayout = 'ohwi';
-      options.autoPad = 'same-upper';
     } else {
       options = {
         inputLayout: 'nhwc',
         filterLayout: 'ohwi',
-        autoPad: 'same-upper',
       };
     }
     if (nameArray[0].includes('depthwise')) {
       options.filterLayout = 'ihwo';
     }
     options.bias = bias;
+    options.padding = computePadding2DForAutoPad(
+        /* nhwc */[input.shape()[1], input.shape()[2]],
+        /* ohwi or ihwo */[weights.shape()[1], weights.shape()[2]],
+        options.strides, options.dilations, 'same-upper');
     if (relu6) {
       // TODO: Set clamp activation to options once it's supported in
       // WebNN DML backend.
diff --git a/object_detection/tiny_yolov2_nchw.js b/object_detection/tiny_yolov2_nchw.js
index c255acb5..d86d200d 100644
--- a/object_detection/tiny_yolov2_nchw.js
+++ b/object_detection/tiny_yolov2_nchw.js
@@ -1,6 +1,6 @@
 'use strict';
 
-import {buildConstantByNpy} from '../common/utils.js';
+import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js';
 
 // Tiny Yolo V2 model with 'nchw' layout, trained on the Pascal VOC dataset.
 export class TinyYoloV2Nchw {
@@ -24,6 +24,10 @@ export class TinyYoloV2Nchw {
     const weightName = prefix + '_W.npy';
     const weight = await buildConstantByNpy(this.builder_, weightName);
     const options = {autoPad: 'same-upper'};
+    options.padding = computePadding2DForAutoPad(
+        /* nchw */[input.shape()[2], input.shape()[3]],
+        /* oihw */[weight.shape()[2], weight.shape()[3]],
+        options.strides, options.dilations, 'same-upper');
     if (useBias) {
       const biasName = prefix + '_B.npy';
       options.bias = await buildConstantByNpy(this.builder_, biasName);
@@ -31,6 +35,14 @@ export class TinyYoloV2Nchw {
     return this.builder_.conv2d(input, weight, options);
   }
 
+  buildMaxPool2d_(input, options) {
+    options.padding = computePadding2DForAutoPad(
+        /* nhwc */[input.shape()[1], input.shape()[2]],
+        options.windowDimensions,
+        options.strides, options.dilations, 'same-upper');
+    return this.builder_.maxPool2d(input, options);
+  }
+
   async buildBatchNorm_(input, name) {
     const prefix = this.weightsUrl_ + 'BatchNormalization';
     const scaleName = `${prefix}_scale${name}.npy`;
@@ -73,23 +85,22 @@ export class TinyYoloV2Nchw {
     const poolOptions = {
       windowDimensions: [2, 2],
       strides: [2, 2],
-      autoPad: 'same-upper',
     };
     const mul = this.builder_.mul(image, mulScale);
     const add = this.builder_.add(mul, addBias);
     const conv0 = await this.buildConvolutional_(add, '');
-    const pool0 = this.builder_.maxPool2d(conv0, poolOptions);
+    const pool0 = this.buildMaxPool2d_(conv0, poolOptions);
     const conv1 = await this.buildConvolutional_(pool0, '1');
-    const pool1 = this.builder_.maxPool2d(conv1, poolOptions);
+    const pool1 = this.buildMaxPool2d_(conv1, poolOptions);
     const conv2 = await this.buildConvolutional_(pool1, '2');
-    const pool2 = this.builder_.maxPool2d(conv2, poolOptions);
+    const pool2 = this.buildMaxPool2d_(conv2, poolOptions);
     const conv3 = await this.buildConvolutional_(pool2, '3');
-    const pool3 = this.builder_.maxPool2d(conv3, poolOptions);
+    const pool3 = this.buildMaxPool2d_(conv3, poolOptions);
     const conv4 = await this.buildConvolutional_(pool3, '4');
-    const pool4 = this.builder_.maxPool2d(conv4, poolOptions);
+    const pool4 = this.buildMaxPool2d_(conv4, poolOptions);
     const conv5 = await this.buildConvolutional_(pool4, '5');
-    const pool5 = this.builder_.maxPool2d(conv5,
-        {windowDimensions: [2, 2], autoPad: 'same-upper'});
+    const pool5 = this.buildMaxPool2d_(conv5,
+        {windowDimensions: [2, 2]});
     const conv6 = await this.buildConvolutional_(pool5, '6');
     const conv7 = await this.buildConvolutional_(conv6, '7');
     const conv = await this.buildConv_(conv7, '8', true);
diff --git a/object_detection/tiny_yolov2_nhwc.js b/object_detection/tiny_yolov2_nhwc.js
index ae9528c5..9144813c 100644
--- a/object_detection/tiny_yolov2_nhwc.js
+++ b/object_detection/tiny_yolov2_nhwc.js
@@ -1,6 +1,6 @@
 'use strict';
 
-import {buildConstantByNpy} from '../common/utils.js';
+import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js';
 
 // Tiny Yolo V2 model with 'nhwc' layout, trained on the Pascal VOC dataset.
 export class TinyYoloV2Nhwc {
@@ -29,9 +29,12 @@ export class TinyYoloV2Nhwc {
     const options = {
       inputLayout: 'nhwc',
       filterLayout: 'ohwi',
-      autoPad: 'same-upper',
     };
     options.bias = bias;
+    options.padding = computePadding2DForAutoPad(
+        /* nhwc */[input.shape()[1], input.shape()[2]],
+        /* ohwi */[weights.shape()[1], weights.shape()[2]],
+        options.strides, options.dilations, 'same-upper');
     let conv = this.builder_.conv2d(input, weights, options);
     if (leakyRelu) {
       // Fused leakyRelu is not supported by XNNPACK.
@@ -40,6 +43,14 @@ export class TinyYoloV2Nhwc {
     return conv;
   }
 
+  buildMaxPool2d_(input, options) {
+    options.padding = computePadding2DForAutoPad(
+        /* nhwc */[input.shape()[1], input.shape()[2]],
+        options.windowDimensions,
+        options.strides, options.dilations, 'same-upper');
+    return this.builder_.maxPool2d(input, options);
+  }
+
   async load(contextOptions) {
     this.context_ = await navigator.ml.createContext(contextOptions);
     this.builder_ = new MLGraphBuilder(this.context_);
@@ -52,22 +63,21 @@ export class TinyYoloV2Nhwc {
     const poolOptions = {
       windowDimensions: [2, 2],
       strides: [2, 2],
-      autoPad: 'same-upper',
       layout: 'nhwc',
     };
     const conv1 = await this.buildConv_(input, '1');
-    const pool1 = this.builder_.maxPool2d(conv1, poolOptions);
+    const pool1 = this.buildMaxPool2d_(conv1, poolOptions);
     const conv2 = await this.buildConv_(pool1, '2');
-    const pool2 = this.builder_.maxPool2d(conv2, poolOptions);
+    const pool2 = this.buildMaxPool2d_(conv2, poolOptions);
     const conv3 = await this.buildConv_(pool2, '3');
-    const pool3 = this.builder_.maxPool2d(conv3, poolOptions);
+    const pool3 = this.buildMaxPool2d_(conv3, poolOptions);
     const conv4 = await this.buildConv_(pool3, '4');
-    const pool4 = this.builder_.maxPool2d(conv4, poolOptions);
+    const pool4 = this.buildMaxPool2d_(conv4, poolOptions);
     const conv5 = await this.buildConv_(pool4, '5');
-    const pool5 = this.builder_.maxPool2d(conv5, poolOptions);
+    const pool5 = this.buildMaxPool2d_(conv5, poolOptions);
     const conv6 = await this.buildConv_(pool5, '6');
-    const pool6 = this.builder_.maxPool2d(conv6,
-        {windowDimensions: [2, 2], autoPad: 'same-upper', layout: 'nhwc'});
+    const pool6 = this.buildMaxPool2d_(conv6,
+        {windowDimensions: [2, 2], layout: 'nhwc'});
     const conv7 = await this.buildConv_(pool6, '7');
     const conv8 = await this.buildConv_(conv7, '8');
     return await this.buildConv_(conv8, '9', false);
diff --git a/semantic_segmentation/deeplabv3_mnv2_nhwc.js b/semantic_segmentation/deeplabv3_mnv2_nhwc.js
index 7c8388bc..07190bd0 100644
--- a/semantic_segmentation/deeplabv3_mnv2_nhwc.js
+++ b/semantic_segmentation/deeplabv3_mnv2_nhwc.js
@@ -1,6 +1,6 @@
 'use strict';
 
-import {buildConstantByNpy} from '../common/utils.js';
+import {buildConstantByNpy, computePadding2DForAutoPad} from '../common/utils.js';
 
 // DeepLab V3 MobileNet V2 model with 'nhwc' input layout
 export class DeepLabV3MNV2Nhwc {
@@ -36,12 +36,15 @@ export class DeepLabV3MNV2Nhwc {
     const weights = await buildConstantByNpy(this.builder_, weightsName);
     const bias = await buildConstantByNpy(this.builder_, biasName);
     options.inputLayout = 'nhwc';
-    options.autoPad = 'same-upper';
     if (namePrefix.includes('depthwise')) {
       options.filterLayout = 'ihwo';
     } else {
       options.filterLayout = 'ohwi';
     }
+    options.padding = computePadding2DForAutoPad(
+        /* nhwc */[input.shape()[1], input.shape()[2]],
+        /* ohwi or ihwo */[weights.shape()[1], weights.shape()[2]],
+        options.strides, options.dilations, 'same-upper');
     options.bias = bias;
     if (relu6) {
       // TODO: Set clamp activation to options once it's supported in