Support NPU for SSD MobileNetV1 example (#228)

* Support NPU for SSD MobileNetV1 example This change converts the float32 weigths to float16 at loading time. And it uses WebNN cast operator to convert float32 inputs to float16 before compute and convert float16 outputs back to float32 after compute. * Fix lint error
webmachinelearning · May 6, 2024 · 555aeab · 555aeab
1 parent e5bdbab
commit 555aeab
Show file tree

Hide file tree

Showing 3 changed files with 90 additions and 10 deletions.
diff --git a/common/utils.js b/common/utils.js
@@ -34,7 +34,57 @@ export async function getBufferFromUrl(url) {
   return arrayBuffer;
 }
 
-export async function buildConstantByNpy(builder, url) {
+// ref: http://stackoverflow.com/questions/32633585/how-do-you-convert-to-half-floats-in-javascript
+export const toHalf = (function() {
+  const floatView = new Float32Array(1);
+  const int32View = new Int32Array(floatView.buffer);
+
+  /* This method is faster than the OpenEXR implementation (very often
+   * used, eg. in Ogre), with the additional benefit of rounding, inspired
+   * by James Tursa?s half-precision code. */
+  return function toHalf(val) {
+    floatView[0] = val;
+    const x = int32View[0];
+
+    let bits = (x >> 16) & 0x8000; /* Get the sign */
+    let m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */
+    const e = (x >> 23) & 0xff; /* Using int is faster here */
+
+    /* If zero, or denormal, or exponent underflows too much for a denormal
+     * half, return signed zero. */
+    if (e < 103) {
+      return bits;
+    }
+
+    /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
+    if (e > 142) {
+      bits |= 0x7c00;
+      /* If exponent was 0xff and one mantissa bit was set, it means NaN,
+       * not Inf, so make sure we set one mantissa bit too. */
+      bits |= ((e == 255) ? 0 : 1) && (x & 0x007fffff);
+      return bits;
+    }
+
+    /* If exponent underflows but not too much, return a denormal */
+    if (e < 113) {
+      m |= 0x0800;
+      /* Extra rounding may overflow and set mantissa to 0 and exponent
+       * to 1, which is OK. */
+      bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
+      return bits;
+    }
+
+    bits |= ((e - 112) << 10) | (m >> 1);
+    /* Extra rounding. An overflow will set mantissa to 0 and increment
+     * the exponent, which is OK. */
+    bits += m & 1;
+    return bits;
+  };
+})();
+
+// Convert npy data in original data type to `targetType`, only support
+// 'float32' to 'float16' conversion currently.
+export async function buildConstantByNpy(builder, url, targetType) {
   const dataTypeMap = new Map([
     ['f2', {type: 'float16', array: Uint16Array}],
     ['f4', {type: 'float32', array: Float32Array}],
@@ -55,11 +105,22 @@ export async function buildConstantByNpy(builder, url) {
     throw new Error(`Data type ${npArray.dataType} is not supported.`);
   }
   const dimensions = npArray.shape;
-  const type = dataTypeMap.get(npArray.dataType).type;
+  let type = dataTypeMap.get(npArray.dataType).type;
   const TypedArrayConstructor = dataTypeMap.get(npArray.dataType).array;
   const dataView = new Uint8Array(npArray.data.buffer);
   const dataView2 = dataView.slice();
-  const typedArray = new TypedArrayConstructor(dataView2.buffer);
+  let typedArray = new TypedArrayConstructor(dataView2.buffer);
+  if (type === 'float32' && targetType === 'float16') {
+    const uint16Array = new Uint16Array(typedArray.length);
+    for (let i = 0; i < typedArray.length; ++i) {
+      uint16Array[i] = toHalf(typedArray[i]);
+    }
+    typedArray = uint16Array;
+    type = targetType;
+  } else if (type !== targetType) {
+    throw new Error(`Conversion from ${npArray.dataType} ` +
+        `to ${targetType} is not supported.`);
+  }
   return builder.constant({dataType: type, type, dimensions}, typedArray);
 }
 
@@ -494,7 +555,8 @@ export function getDefaultLayout(deviceType) {
     // Windows or Mac platform.
     if (deviceType.indexOf('cpu') != -1) {
       return 'nhwc';
-    } else if (deviceType.indexOf('gpu') != -1) {
+    } else if (deviceType.indexOf('gpu') != -1 ||
+               deviceType.indexOf('npu') != -1) {
       return 'nchw';
     }
   }

diff --git a/object_detection/index.html b/object_detection/index.html
@@ -43,6 +43,9 @@
               <label class="btn btn-outline-info custom" name="webnn">
                 <input type="radio" name="backend" id="webnn_gpu" autocomplete="off">WebNN (GPU)
               </label>
+              <label class="btn btn-outline-info custom" name="webnn">
+                <input type="radio" name="backend" id="webnn_npu" autocomplete="off">WebNN (NPU)
+              </label>
             </div>
           </div>
         </div>

diff --git a/object_detection/ssd_mobilenetv1_nchw.js b/object_detection/ssd_mobilenetv1_nchw.js
@@ -7,6 +7,7 @@ export class SsdMobilenetV1Nchw {
   constructor() {
     this.context_ = null;
     this.deviceType_ = null;
+    this.targetDataType_ = 'float32';
     this.model_ = null;
     this.builder_ = null;
     this.graph_ = null;
@@ -57,9 +58,11 @@ ${nameArray[1]}_BatchNorm_batchnorm`;
     }
 
     const weightsName = this.weightsUrl_ + prefix + weightSuffix;
-    const weights = await buildConstantByNpy(this.builder_, weightsName);
+    const weights = await buildConstantByNpy(
+        this.builder_, weightsName, this.targetDataType_);
     const biasName = this.biasUrl_ + prefix + biasSuffix;
-    const bias = await buildConstantByNpy(this.builder_, biasName);
+    const bias = await buildConstantByNpy(
+        this.builder_, biasName, this.targetDataType_);
     options.padding = computePadding2DForAutoPad(
         /* nchw */[input.shape()[2], input.shape()[3]],
         /* oihw */[weights.shape()[2], weights.shape()[3]],
@@ -69,7 +72,7 @@ ${nameArray[1]}_BatchNorm_batchnorm`;
       // TODO: Set clamp activation to options once it's supported in
       // WebNN DML backend.
       // Implement `clip` by `clamp` of  WebNN API
-      if (this.deviceType_ == 'gpu') {
+      if (this.deviceType_ == 'gpu' || this.deviceType_ == 'npu') {
         return this.builder_.clamp(
             this.builder_.conv2d(input, weights, options),
             {minValue: 0, maxValue: 6});
@@ -83,12 +86,17 @@ ${nameArray[1]}_BatchNorm_batchnorm`;
   async load(contextOptions) {
     this.context_ = await navigator.ml.createContext(contextOptions);
     this.deviceType_ = contextOptions.deviceType;
+    if (this.deviceType_ == 'gpu' || this.deviceType_ == 'npu') {
+      this.targetDataType_ = 'float16';
+    }
     this.builder_ = new MLGraphBuilder(this.context_);
-    const input = this.builder_.input('input', {
-      type: 'float32',
+    let input = this.builder_.input('input', {
       dataType: 'float32',
       dimensions: this.inputOptions.inputDimensions,
     });
+    if (this.targetDataType_ === 'float16') {
+      input = this.builder_.cast(input, 'float16');
+    }
     const strides = [2, 2];
     const conv0 = await this.buildConv_(
         input, ['', '0', '', '165__cf__168'],
@@ -249,7 +257,14 @@ ${nameArray[1]}_BatchNorm_batchnorm`;
     const concat1 = this.builder_.concat(
         [reshape6, reshape7, reshape8, reshape9, reshape10, reshape11], 1);
 
-    return {'boxes': concat0, 'scores': concat1};
+    let boxes = concat0;
+    let scores = concat1;
+
+    if (this.targetDataType_ === 'float16') {
+      boxes = this.builder_.cast(boxes, 'float32');
+      scores = this.builder_.cast(boxes, 'float32');
+    }
+    return {boxes, scores};
   }
 
   async build(outputOperand) {