From 5d8fc204a59f5d7cfed3d948084593866b16ceef Mon Sep 17 00:00:00 2001
From: Leandro Nunes <leandro.nunes@arm.com>
Date: Mon, 28 Nov 2022 09:23:21 +0000
Subject: [PATCH] [ACL] Enable int8 data type in QNN DENSE (#13487)

This enables int8 data type to be used in Compute Library
for the Arm(r) Architecture (ACL) BYOC integration.
---
 .../tvm/relay/op/contrib/arm_compute_lib.py   |  6 ++--
 .../test_arm_compute_lib/test_dense.py        | 29 ++++++++++++-------
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/python/tvm/relay/op/contrib/arm_compute_lib.py b/python/tvm/relay/op/contrib/arm_compute_lib.py
index d63cd8c83a93..95500c91e1f4 100644
--- a/python/tvm/relay/op/contrib/arm_compute_lib.py
+++ b/python/tvm/relay/op/contrib/arm_compute_lib.py
@@ -258,7 +258,7 @@ def check_dense(extract):
 
     def check_qnn_dense(extract):
         """Check qnn conv pattern is supported by ACL."""
-        if extract.attrs.out_dtype != "uint8":
+        if extract.attrs.out_dtype not in ("uint8", "int8"):
             return False
         call = extract
         while call.op.name != "qnn.dense":
@@ -414,10 +414,10 @@ def qnn_dense(expr):
     """Check if the external ACL codegen for qnn.dense should be used."""
     attrs, args = expr.attrs, expr.args
     data_typ = args[0].checked_type
-    if data_typ.dtype != "uint8":
+    if data_typ.dtype not in ("uint8", "int8"):
         return False
     kernel_typ = args[1].checked_type
-    if len(kernel_typ.shape) != 2 or kernel_typ.dtype != "uint8":
+    if len(kernel_typ.shape) != 2 or kernel_typ.dtype not in ("uint8", "int8"):
         return False
     if attrs.out_dtype != "int32":
         return False
diff --git a/tests/python/contrib/test_arm_compute_lib/test_dense.py b/tests/python/contrib/test_arm_compute_lib/test_dense.py
index 6bdff0fdb857..fa6057dd9a63 100644
--- a/tests/python/contrib/test_arm_compute_lib/test_dense.py
+++ b/tests/python/contrib/test_arm_compute_lib/test_dense.py
@@ -15,8 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 """Arm Compute Library integration dense tests."""
-
 import numpy as np
+import pytest
 
 import tvm
 from tvm import relay
@@ -104,14 +104,15 @@ def _get_qnn_model(
         relay.const(0, "int32"),  # input zero point
         relay.const(output_sc, "float32"),  # output scale
         relay.const(output_zp, "int32"),  # output zero point
-        out_dtype="uint8",
+        out_dtype=dtype,
     )
     return out, params
 
 
 def _get_expected_codegen(shape, weight_shape, units, dtype, has_bias=False):
     output_shape = (shape[0], units)
-    out_dtype = "int32" if dtype == "uint8" else "float32"
+    qnn_dtypes = ("uint8", "int8")
+    out_dtype = "int32" if dtype in qnn_dtypes else "float32"
 
     node = {
         "op": "kernel",
@@ -136,7 +137,7 @@ def _get_expected_codegen(shape, weight_shape, units, dtype, has_bias=False):
     ]
 
     # qnn.dense params, input and kernel
-    if dtype == "uint8":
+    if dtype in qnn_dtypes:
         node["name"] = "qnn.dense"
         for param_dtype in ["int32", "float32"]:
             for _ in range(2):
@@ -149,7 +150,7 @@ def _get_expected_codegen(shape, weight_shape, units, dtype, has_bias=False):
                 )
 
     if has_bias:
-        bias_dtype = "int32" if dtype == "uint8" else "float32"
+        bias_dtype = "int32" if dtype in qnn_dtypes else "float32"
         bias_shape = (
             [1, weight_shape[0]]
             if dtype == "float32" and weight_shape[0] != 1
@@ -164,7 +165,7 @@ def _get_expected_codegen(shape, weight_shape, units, dtype, has_bias=False):
         )
 
     # qnn.dense params, output
-    if dtype == "uint8":
+    if dtype in qnn_dtypes:
         for param_dtype in ["float32", "int32"]:
             inputs.append(
                 {"op": "const", "name": "", "attrs": {"shape": [[[]]], "dtype": [[param_dtype]]}}
@@ -251,7 +252,14 @@ def test_codegen_dense():
         verify_codegen(func, exp_codegen)
 
 
-def test_qnn_dense():
+@pytest.mark.parametrize(
+    "dtype,min_range,max_range",
+    [
+        ("uint8", 0, 255),
+        ("int8", -127, 128),
+    ],
+)
+def test_qnn_dense(dtype, min_range, max_range):
     Device.load("test_config.json")
 
     if skip_runtime_test():
@@ -260,7 +268,6 @@ def test_qnn_dense():
     device = Device()
     np.random.seed(0)
 
-    dtype = "uint8"
     trials = [
         [(1, 2), (2, 2), 2, True],
         [(1, 2), (2, 2), 2, False],
@@ -277,7 +284,7 @@ def test_qnn_dense():
     ]
     for shape, weight_shape, units, composite in trials:
         outputs = []
-        inputs = {"a": tvm.nd.array(np.random.uniform(0, 255, shape).astype(dtype))}
+        inputs = {"a": tvm.nd.array(np.random.uniform(min_range, max_range, shape).astype(dtype))}
         input_zp = 100
         input_sc = 0.5
         kernel_zp = 50
@@ -329,13 +336,13 @@ def test_qnn_dense():
         verify(outputs, atol=1, rtol=0, config=config, verify_saturation=True)
 
 
-def test_codegen_qnn_dense():
+@pytest.mark.parametrize("dtype", ["uint8", "int8"])
+def test_codegen_qnn_dense(dtype):
     if skip_codegen_test():
         return
 
     np.random.seed(0)
 
-    dtype = "uint8"
     trials = [
         [(1, 2), (2, 2), 2, True],
         [(1, 2), (2, 2), 2, False],