From 5d8fc204a59f5d7cfed3d948084593866b16ceef Mon Sep 17 00:00:00 2001 From: Leandro Nunes Date: Mon, 28 Nov 2022 09:23:21 +0000 Subject: [PATCH] [ACL] Enable int8 data type in QNN DENSE (#13487) This enables int8 data type to be used in Compute Library for the Arm(r) Architecture (ACL) BYOC integration. --- .../tvm/relay/op/contrib/arm_compute_lib.py | 6 ++-- .../test_arm_compute_lib/test_dense.py | 29 ++++++++++++------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/python/tvm/relay/op/contrib/arm_compute_lib.py b/python/tvm/relay/op/contrib/arm_compute_lib.py index d63cd8c83a93..95500c91e1f4 100644 --- a/python/tvm/relay/op/contrib/arm_compute_lib.py +++ b/python/tvm/relay/op/contrib/arm_compute_lib.py @@ -258,7 +258,7 @@ def check_dense(extract): def check_qnn_dense(extract): """Check qnn conv pattern is supported by ACL.""" - if extract.attrs.out_dtype != "uint8": + if extract.attrs.out_dtype not in ("uint8", "int8"): return False call = extract while call.op.name != "qnn.dense": @@ -414,10 +414,10 @@ def qnn_dense(expr): """Check if the external ACL codegen for qnn.dense should be used.""" attrs, args = expr.attrs, expr.args data_typ = args[0].checked_type - if data_typ.dtype != "uint8": + if data_typ.dtype not in ("uint8", "int8"): return False kernel_typ = args[1].checked_type - if len(kernel_typ.shape) != 2 or kernel_typ.dtype != "uint8": + if len(kernel_typ.shape) != 2 or kernel_typ.dtype not in ("uint8", "int8"): return False if attrs.out_dtype != "int32": return False diff --git a/tests/python/contrib/test_arm_compute_lib/test_dense.py b/tests/python/contrib/test_arm_compute_lib/test_dense.py index 6bdff0fdb857..fa6057dd9a63 100644 --- a/tests/python/contrib/test_arm_compute_lib/test_dense.py +++ b/tests/python/contrib/test_arm_compute_lib/test_dense.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. """Arm Compute Library integration dense tests.""" - import numpy as np +import pytest import tvm from tvm import relay @@ -104,14 +104,15 @@ def _get_qnn_model( relay.const(0, "int32"), # input zero point relay.const(output_sc, "float32"), # output scale relay.const(output_zp, "int32"), # output zero point - out_dtype="uint8", + out_dtype=dtype, ) return out, params def _get_expected_codegen(shape, weight_shape, units, dtype, has_bias=False): output_shape = (shape[0], units) - out_dtype = "int32" if dtype == "uint8" else "float32" + qnn_dtypes = ("uint8", "int8") + out_dtype = "int32" if dtype in qnn_dtypes else "float32" node = { "op": "kernel", @@ -136,7 +137,7 @@ def _get_expected_codegen(shape, weight_shape, units, dtype, has_bias=False): ] # qnn.dense params, input and kernel - if dtype == "uint8": + if dtype in qnn_dtypes: node["name"] = "qnn.dense" for param_dtype in ["int32", "float32"]: for _ in range(2): @@ -149,7 +150,7 @@ def _get_expected_codegen(shape, weight_shape, units, dtype, has_bias=False): ) if has_bias: - bias_dtype = "int32" if dtype == "uint8" else "float32" + bias_dtype = "int32" if dtype in qnn_dtypes else "float32" bias_shape = ( [1, weight_shape[0]] if dtype == "float32" and weight_shape[0] != 1 @@ -164,7 +165,7 @@ def _get_expected_codegen(shape, weight_shape, units, dtype, has_bias=False): ) # qnn.dense params, output - if dtype == "uint8": + if dtype in qnn_dtypes: for param_dtype in ["float32", "int32"]: inputs.append( {"op": "const", "name": "", "attrs": {"shape": [[[]]], "dtype": [[param_dtype]]}} @@ -251,7 +252,14 @@ def test_codegen_dense(): verify_codegen(func, exp_codegen) -def test_qnn_dense(): +@pytest.mark.parametrize( + "dtype,min_range,max_range", + [ + ("uint8", 0, 255), + ("int8", -127, 128), + ], +) +def test_qnn_dense(dtype, min_range, max_range): Device.load("test_config.json") if skip_runtime_test(): @@ -260,7 +268,6 @@ def test_qnn_dense(): device = Device() np.random.seed(0) - dtype = "uint8" trials = [ [(1, 2), (2, 2), 2, True], [(1, 2), (2, 2), 2, False], @@ -277,7 +284,7 @@ def test_qnn_dense(): ] for shape, weight_shape, units, composite in trials: outputs = [] - inputs = {"a": tvm.nd.array(np.random.uniform(0, 255, shape).astype(dtype))} + inputs = {"a": tvm.nd.array(np.random.uniform(min_range, max_range, shape).astype(dtype))} input_zp = 100 input_sc = 0.5 kernel_zp = 50 @@ -329,13 +336,13 @@ def test_qnn_dense(): verify(outputs, atol=1, rtol=0, config=config, verify_saturation=True) -def test_codegen_qnn_dense(): +@pytest.mark.parametrize("dtype", ["uint8", "int8"]) +def test_codegen_qnn_dense(dtype): if skip_codegen_test(): return np.random.seed(0) - dtype = "uint8" trials = [ [(1, 2), (2, 2), 2, True], [(1, 2), (2, 2), 2, False],