From 8579b14fc606a25ace4172b5ea4e0736fb59d75f Mon Sep 17 00:00:00 2001
From: CokeDong <408244909@qq.com>
Date: Wed, 12 Apr 2023 11:09:57 +0800
Subject: [PATCH 01/10] Add test scripts for mmcv ext ops (#62)

* Add diopi-test for mmcv ext ops

* add mmcv

* Fix

* Support device dispatch

* Fix
---
 DIOPI-TEST/.gitmodules              |  3 +++
 DIOPI-TEST/scripts/build_impl.sh    |  4 +++
 DIOPI-TEST/scripts/ci_script.sh     |  2 +-
 DIOPI-TEST/scripts/test_mmcv_ext.sh | 38 +++++++++++++++++++++++++++++
 DIOPI-TEST/third_party/mmcv_diopi   |  1 +
 5 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100755 DIOPI-TEST/scripts/test_mmcv_ext.sh
 create mode 160000 DIOPI-TEST/third_party/mmcv_diopi

diff --git a/DIOPI-TEST/.gitmodules b/DIOPI-TEST/.gitmodules
index 9a3b5b6f6..8b7c26766 100644
--- a/DIOPI-TEST/.gitmodules
+++ b/DIOPI-TEST/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "impl"]
 	path = impl
 	url = git@github.com:OpenComputeLab/DIOPI-IMPL.git
+[submodule "third_party/mmcv_diopi"]
+	path = third_party/mmcv_diopi
+	url = git@github.com:OpenComputeLab/mmcv_diopi.git
diff --git a/DIOPI-TEST/scripts/build_impl.sh b/DIOPI-TEST/scripts/build_impl.sh
index 33848465b..a2e34d40b 100644
--- a/DIOPI-TEST/scripts/build_impl.sh
+++ b/DIOPI-TEST/scripts/build_impl.sh
@@ -44,6 +44,10 @@ case $1 in
     (rm -rf build && mkdir build && cd build \
         &&cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DIMPL_OPT=TORCH -DHIP=ON && make -j4) \
     || exit -1;;
+  mmcv_ext)
+    (cd third_party/mmcv_diopi && rm -rf build && mkdir build \
+        && MMCV_WITH_DIOPI=1 MMCV_WITH_OPS=1 python setup.py build_ext -i) \
+    || exit -1;;
   *)
     echo -e "[ERROR] Incorrect compilation option:" $1;
 
diff --git a/DIOPI-TEST/scripts/ci_script.sh b/DIOPI-TEST/scripts/ci_script.sh
index 8dc8dfef2..0c9b1b135 100644
--- a/DIOPI-TEST/scripts/ci_script.sh
+++ b/DIOPI-TEST/scripts/ci_script.sh
@@ -4,7 +4,7 @@ set -e
 case $1 in
   py-lint)
     (echo "py-lint" && flake8 --ignore=E501,F841 python/conformance/diopi_functions.py \
-       && flake8 --ignore=E501,F401 --exclude=python/conformance/diopi_functions.py,scripts/cpplint.py,impl/,python/conformance/model_config/ \
+       && flake8 --ignore=E501,F401 --exclude=python/conformance/diopi_functions.py,scripts/cpplint.py,impl/,third_party/,python/conformance/model_config/ \
        && flake8 --ignore=E501,F401 python/conformance/model_config/process_config.py python/conformance/model_config/__init__.py ) \
     || exit -1;;
   cpp-lint)
diff --git a/DIOPI-TEST/scripts/test_mmcv_ext.sh b/DIOPI-TEST/scripts/test_mmcv_ext.sh
new file mode 100755
index 000000000..99a71da8e
--- /dev/null
+++ b/DIOPI-TEST/scripts/test_mmcv_ext.sh
@@ -0,0 +1,38 @@
+# !/bin/bash
+set -e
+
+if [ $# -ne 1 ]
+then
+    echo Usage: test_mmcv_ext.sh DEVICE
+    exit 1
+fi
+
+DEVICE=${1}
+
+if [[ $DEVICE == "CUDA" ]]; then
+    MMCV_TEST_LIST=(test_active_rotated_filter.py \
+    test_assign_score_withk.py \
+    test_bbox.py \
+    test_deform_roi_pool.py \
+    test_knn.py \
+    test_convex_iou.py \
+    test_min_area_polygons.py \
+    test_prroi_pool.py \
+    test_chamfer_distance.py \
+    test_border_align.py
+    )
+elif [[ $DEVICE == "MLU" ]]; then
+    MMCV_TEST_LIST=()
+else
+    echo DEVICE $DEVICE not supported!
+    exit 1
+fi
+
+cd third_party/mmcv_diopi
+export PYTHONPATH=${PWD}:$PYTHONPATH
+cd tests/test_ops
+
+for elem in ${MMCV_TEST_LIST[@]}
+do
+    python -m pytest $elem
+done
diff --git a/DIOPI-TEST/third_party/mmcv_diopi b/DIOPI-TEST/third_party/mmcv_diopi
new file mode 160000
index 000000000..6b1892966
--- /dev/null
+++ b/DIOPI-TEST/third_party/mmcv_diopi
@@ -0,0 +1 @@
+Subproject commit 6b18929667739f5e5d0bfa8542691b2e3bf8144f

From c3c082756a11aa810f5c6ec374579157e139e304 Mon Sep 17 00:00:00 2001
From: yangbofun <37532326+yangbofun@users.noreply.github.com>
Date: Thu, 13 Apr 2023 13:40:47 +0800
Subject: [PATCH 02/10] add log for max diff (#75)

---
 DIOPI-TEST/python/conformance/conformance_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/DIOPI-TEST/python/conformance/conformance_test.py b/DIOPI-TEST/python/conformance/conformance_test.py
index 0082b8b36..0ce8b3468 100644
--- a/DIOPI-TEST/python/conformance/conformance_test.py
+++ b/DIOPI-TEST/python/conformance/conformance_test.py
@@ -66,11 +66,12 @@ def allclose(cfg: dict, tensor1: np.ndarray, tensor2: np.ndarray, sum_to_compare
     passed = np.allclose(tensor1, tensor2, rtol, atol, True)
     if record:
         save_precision(cfg, tensor1, tensor2, passed, var_name)
-    if not passed and logger.level == 10:
+    if not passed:
         sum1 = tensor1.sum()
         sum2 = tensor2.sum()
         mask = np.isclose(tensor1, tensor2, rtol, atol, True)
         max_diff = np.abs(tensor1 - tensor2).max()
+        logger.info(f"Max of diff is {max_diff}.")
         logger.debug(f"Sum of {var_name} is {sum1}, Sum of {var_name}_ref is {sum2}, Max of diff is {max_diff}. \
                      \n" + f"{var_name} is {tensor1},\n{var_name}_ref is {tensor2},\nMask is {mask}\n")
     return passed

From d15cb7b896d2cd6d732c0547d4d309c1a4e5f7a7 Mon Sep 17 00:00:00 2001
From: zephyr <48674444+z379035389@users.noreply.github.com>
Date: Fri, 14 Apr 2023 13:24:42 +0800
Subject: [PATCH 03/10] Zephyr/add normal add hardswish (#76)

* add normal_ / hardswish /hardswish_

* fix lint

* fix lint

* fix lint

* fix lint

* fix formal parameters

* add meshgrid

* add meshgrid

* update impl

* add 1d assert and update function paras

* fix lint

* fix lint

* fix lint

* update from main

* fix conflict

* add hardswish_backward

* fix lint

* fix lint

* fix lint

* add normal_ / hardswish /hardswish_

* fix lint

* fix third-party lint error

* update hardswish backward test

---------

Co-authored-by: xintian-514 <121075453+xintian-514@users.noreply.github.com>
---
 DIOPI-TEST/diopirt                               |  2 +-
 DIOPI-TEST/impl                                  |  2 +-
 DIOPI-TEST/python/conformance/diopi_configs.py   |  1 +
 DIOPI-TEST/python/conformance/diopi_functions.py | 10 ++++++++++
 DIOPI-TEST/scripts/ci_script.sh                  |  2 +-
 5 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/DIOPI-TEST/diopirt b/DIOPI-TEST/diopirt
index d37d439cf..7cfe1ff1b 160000
--- a/DIOPI-TEST/diopirt
+++ b/DIOPI-TEST/diopirt
@@ -1 +1 @@
-Subproject commit d37d439cf9b6cbbb89ef006f336486be27c9c579
+Subproject commit 7cfe1ff1b73a1b3b81bbce22b8f78aa50eea34a6
diff --git a/DIOPI-TEST/impl b/DIOPI-TEST/impl
index 1d5f0ad9f..7fd6b5751 160000
--- a/DIOPI-TEST/impl
+++ b/DIOPI-TEST/impl
@@ -1 +1 @@
-Subproject commit 1d5f0ad9f95b542d558add058ba733d305bb84c2
+Subproject commit 7fd6b5751209fe118ef33e139b1d5e714b76de86
diff --git a/DIOPI-TEST/python/conformance/diopi_configs.py b/DIOPI-TEST/python/conformance/diopi_configs.py
index 499a9a617..28767816d 100644
--- a/DIOPI-TEST/python/conformance/diopi_configs.py
+++ b/DIOPI-TEST/python/conformance/diopi_configs.py
@@ -121,6 +121,7 @@
                     "ins": ['input'],
                     "shape": ((2, 4096), (64, 28, 28),
                               (32, 64, 112, 112), (64, 3, 7, 28, 28)),
+                    "requires_grad": [True],
                     "dtype": [Dtype.float32, Dtype.float64],
                     "gen_fn": Genfunc.randn,
                 },
diff --git a/DIOPI-TEST/python/conformance/diopi_functions.py b/DIOPI-TEST/python/conformance/diopi_functions.py
index e63126959..aedce190c 100644
--- a/DIOPI-TEST/python/conformance/diopi_functions.py
+++ b/DIOPI-TEST/python/conformance/diopi_functions.py
@@ -1772,6 +1772,16 @@ def hardtanh_backward(input, grad_outputs, min_val=-1.0, max_val=1.0, **kwargs)
     return {"input": grad_input}
 
 
+def hardswish_backward(input, grad_outputs, **kwargs) -> Tensor:
+    assert len(grad_outputs) == 1, "only accept 1 gradient to do backward"
+    grad_input = raw_like(input)
+    func = check_function("diopiHardswishBackward")
+    ret = func(input.context_handle, grad_input.tensor_handle, grad_outputs[0].tensor_handle,
+               input.tensor_handle)
+    check_returncode(ret)
+    return {"input": grad_input}
+
+
 def gelu_backward(input, grad_outputs, approximate='none', **kwargs) -> Tensor:
     assert len(grad_outputs) == 1, "only accept 1 gradient to do backward"
     grad_input = raw_like(input)
diff --git a/DIOPI-TEST/scripts/ci_script.sh b/DIOPI-TEST/scripts/ci_script.sh
index 0c9b1b135..4dd6ca404 100644
--- a/DIOPI-TEST/scripts/ci_script.sh
+++ b/DIOPI-TEST/scripts/ci_script.sh
@@ -9,7 +9,7 @@ case $1 in
     || exit -1;;
   cpp-lint)
     # for other cpplint version, maybe  -whitespace/indent is needed to check impl
-    (echo "cpp-lint" && python scripts/cpplint.py --linelength=160 \
+    (echo "cpp-lint" && python scripts/cpplint.py --exclude=impl/third_party/ --linelength=160 \
       --filter=-build/c++11,-legal/copyright,-build/include_subdir,-runtime/references,-runtime/printf,-runtime/int,-build/namespace \
       --recursive impl/ \
       && python scripts/cpplint.py --linelength=240 --filter=-build/header_guard --recursive diopirt/ ) \

From 8fb252d5ae1351fedc6770860c5584ba8d391cef Mon Sep 17 00:00:00 2001
From: ding0706 <chrysantd@gmail.com>
Date: Sat, 15 Apr 2023 11:04:51 +0800
Subject: [PATCH 04/10] td/device config for ci (#61)

* device config for ci

* fix bug when shape or value not exist

* fix lint

* add warning and error report

* optimize name

* fix error report

* copy->link
---
 .../conformance/device_config_helper.py       | 124 ++++++++++++++++++
 DIOPI-TEST/python/conformance/gen_data.py     |  89 ++++++++++++-
 DIOPI-TEST/python/main.py                     |   4 +-
 3 files changed, 209 insertions(+), 8 deletions(-)
 create mode 100644 DIOPI-TEST/python/conformance/device_config_helper.py

diff --git a/DIOPI-TEST/python/conformance/device_config_helper.py b/DIOPI-TEST/python/conformance/device_config_helper.py
new file mode 100644
index 000000000..fcb069d27
--- /dev/null
+++ b/DIOPI-TEST/python/conformance/device_config_helper.py
@@ -0,0 +1,124 @@
+import copy
+from .config import _must_be_the_type, _must_exist, expand_cfg_by_name
+
+
+class Skip:
+    def __init__(self, value):
+        self.value = value
+
+
+def _must_be_the_list_or_tuple_of_type(cfg_path: str, cfg_dict: dict, required_type, cfg_keys: list) -> None:
+    if isinstance(required_type, (list, tuple)):
+        types_str = ""
+        for i in required_type:
+            types_str += i.__name__
+            types_str += ' or '
+        types_str = types_str[:-4]
+    else:
+        types_str = required_type.__name__
+
+    err = f"key %s should be the list or tuple of {types_str} in {cfg_path}"
+    for key in cfg_keys:
+        if key in cfg_dict.keys():
+            assert isinstance(cfg_dict[key], (list, tuple)), err % key
+            for v in cfg_dict[key]:
+                assert isinstance(v, required_type), err % key
+
+
+def check_configs_format(cfgs_dict: dict):
+    for case_k, case_v in cfgs_dict.items():
+        domain = f"device_configs.{case_k}"
+        _must_be_the_type(domain, case_v, list, ["dtype"])
+        if "dtype" in case_v.keys():
+            _must_be_the_list_or_tuple_of_type(domain, case_v, Skip, ["dtype"])
+
+        _must_exist(domain, case_v, ['name'])
+        _must_be_the_type(domain, case_v, list, ['name'])
+
+        if "tensor_para" in case_v.keys():
+            _must_be_the_type(domain, case_v, dict, ['tensor_para'])
+            _must_exist(domain + ".tensor_para", case_v["tensor_para"], ["args"])
+            _must_be_the_type(domain + ".tensor_para", case_v["tensor_para"],
+                              (list, tuple), ['args'])
+            domain_tmp = domain + ".tensor_para.args"
+            for arg in case_v["tensor_para"]['args']:
+                _must_exist(domain_tmp, arg, ["ins"])
+                _must_be_the_list_or_tuple_of_type(domain_tmp, arg, Skip, ['shape', 'value', 'dtype'])
+
+        if "para" in case_v.keys():
+            _must_be_the_type(domain, case_v, dict, ['para'])
+            dict_obj = case_v["para"]
+            _must_be_the_list_or_tuple_of_type(domain + ".para", dict_obj, Skip,
+                                               [i for i in dict_obj.keys()])
+
+
+def expand_tensor_paras_args_by_ins(cfgs_dict):
+    '''
+    [
+        {
+            "ins": ['x1', 'x2'],
+            "shape": [(2, 3, 16), (4, 32, 7, 7)],
+        },
+    ]
+    ====>
+    {
+        'x1':{
+            "ins": ['x1'],
+            "shape": [(2, 3, 16), (4, 32, 7, 7)],
+        },
+        'x2':{
+            "ins": ['x2'],
+            "shape": [(2, 3, 16), (4, 32, 7, 7)],
+        },
+    }
+    '''
+    for cfg_name in cfgs_dict:
+        tensor_para_args = cfgs_dict[cfg_name]["tensor_para"]["args"]
+        tmp_tensor_para_args = {}
+        for arg in tensor_para_args:
+            assert isinstance(arg["ins"], (list, tuple))
+            for in_name in arg["ins"]:
+                tmp_tensor_para_args[in_name] = copy.deepcopy(arg)
+                tmp_tensor_para_args[in_name]["ins"] = [in_name]
+        cfgs_dict[cfg_name]["tensor_para"]["args"] = tmp_tensor_para_args
+
+
+def format_cfg(cases):
+    for case_k, case_v in cases.items():
+        # set [] for defalut para, tensor_para, para
+        if "tensor_para" not in case_v.keys():
+            case_v["tensor_para"] = {}
+        if "args" not in case_v["tensor_para"].keys():
+            case_v["tensor_para"]["args"] = []
+        if "para" not in case_v.keys():
+            case_v["para"] = {}
+
+
+def extract_value_from_skip(cfgs_dict):
+    for case_k, case_v in cfgs_dict.items():
+        if "dtype" in case_v.keys():
+            case_v["dtype"] = [x.value for x in case_v["dtype"]]
+        for para_k, para_v in case_v["para"].items():
+            case_v["para"][para_k] = [x.value for x in para_v]
+        for arg_k, arg_v in case_v["tensor_para"]["args"].items():
+            if "shape" in arg_v:
+                arg_v["shape"] = [x.value for x in arg_v["shape"]]
+            if "value" in arg_v:
+                arg_v["value"] = [x.value for x in arg_v["value"]]
+            if "dtype" in arg_v:
+                arg_v["dtype"] = [x.value for x in arg_v["dtype"]]
+
+
+class DeviceConfig(object):
+    r"""
+    Process device config file
+    """
+
+    @staticmethod
+    def process_configs(cfgs_dict: dict):
+        check_configs_format(cfgs_dict)
+        cfgs_dict = expand_cfg_by_name(cfgs_dict, 'name')
+        format_cfg(cfgs_dict)
+        expand_tensor_paras_args_by_ins(cfgs_dict)
+        extract_value_from_skip(cfgs_dict)
+        return cfgs_dict
diff --git a/DIOPI-TEST/python/conformance/gen_data.py b/DIOPI-TEST/python/conformance/gen_data.py
index 3070c46ce..d224cb265 100644
--- a/DIOPI-TEST/python/conformance/gen_data.py
+++ b/DIOPI-TEST/python/conformance/gen_data.py
@@ -19,6 +19,34 @@
 _cur_dir = os.path.dirname(os.path.abspath(__file__))
 
 
+def check_device_para_and_tensor_para(cfg_dict, device_cfg_dict):
+    para_dict = cfg_dict["para"]
+    device_para_dict = device_cfg_dict["para"]
+    for dk, dv in device_para_dict.items():
+        if dk in para_dict:
+            v = para_dict[dk]
+            for x in dv:
+                if x not in v:
+                    logger.warn(f"Para {x} of key {dk} in device_configs not found in diopi_configs. Ignored.")
+
+    args_list = cfg_dict["tensor_para"]["args"]
+    device_tensor_paras_dict = device_cfg_dict["tensor_para"]["args"]
+    for input in device_tensor_paras_dict.keys():
+        in_found = False
+        for args in args_list:
+            if "ins" in args:
+                ins = args["ins"]
+                if input in ins:
+                    in_found = True
+                    for key in ["dtype", "shape", "value"]:
+                        if key in device_tensor_paras_dict[input] and key in args:
+                            for dv in device_tensor_paras_dict[input][key]:
+                                if dv not in args[key]:
+                                    logger.warn(f"Tensor para {dv} of key {key} in device_configs found in diopi_configs for ins {ins}. Ignored.")
+        if not in_found:
+            logger.warn(f"Input name {input} in device_configs not found in diopi_configs. Ignored.")
+
+
 def expand_para(para_dict: dict, paras_list: list):
     r'''
     dict(a = [1,2], b = [11,22])
@@ -109,8 +137,28 @@ def expand_cfg_by_para(cfg_dict: dict):
     return paras_list, tensor_paras_list
 
 
-def expand_cfg_all(paras_list, tensor_paras_list, cfg_dict, filter_dtype_list) -> list:
+def expand_cfg_all(paras_list, tensor_paras_list, cfg_dict, filter_dtype_list, device_config) -> list:
     cfg_expand_list = []
+
+    if device_config is not None:
+        skipped_index = []
+        assert len(paras_list) == len(tensor_paras_list)
+        device_paras = device_config["para"]
+        device_tensor_paras = device_config["tensor_para"]["args"]
+        for idx, paras in enumerate(paras_list):
+            for skipped_para_name in device_paras:
+                if skipped_para_name in paras and paras[skipped_para_name] in device_paras[skipped_para_name]:
+                    skipped_index.append(idx)
+        for idx, tensor_paras in enumerate(tensor_paras_list):
+            for tensor_para in tensor_paras:
+                if tensor_para["ins"] in device_tensor_paras:
+                    if ("value" in tensor_para and "value" in device_tensor_paras[tensor_para["ins"]] and tensor_para["value"] in device_tensor_paras[tensor_para["ins"]]["value"]) or \
+                       ("shape" in tensor_para and "shape" in device_tensor_paras[tensor_para["ins"]] and tensor_para["shape"] in device_tensor_paras[tensor_para["ins"]]["shape"]):
+                        if idx not in skipped_index:
+                            skipped_index.append(idx)
+        paras_list = [paras_list[i] for i in range(len(paras_list)) if i not in skipped_index]
+        tensor_paras_list = [tensor_paras_list[i] for i in range(len(tensor_paras_list)) if i not in skipped_index]
+
     if len(tensor_paras_list) != 0:
         arg_dtype_num = 0
         for arg in cfg_dict["tensor_para"]["args"]:
@@ -131,7 +179,11 @@ def expand_cfg_all(paras_list, tensor_paras_list, cfg_dict, filter_dtype_list) -
                     for arg in tmp_cfg_dict["tensor_para"]["args"]:
                         if arg.get("dtype") is not None:
                             entry_dtype = arg["dtype"][i]
-                            if entry_dtype in filter_dtype_list:
+                            arg_filter_dtype_list = []
+                            if device_config is not None:
+                                if arg["ins"] in device_tensor_paras and "dtype" in device_tensor_paras[arg["ins"]]:
+                                    arg_filter_dtype_list = device_tensor_paras[arg["ins"]]["dtype"]
+                            if entry_dtype in filter_dtype_list or entry_dtype in arg_filter_dtype_list:
                                 filter_dtype = True
                                 break
                             else:
@@ -155,9 +207,11 @@ def expand_cfg_all(paras_list, tensor_paras_list, cfg_dict, filter_dtype_list) -
     return cfg_expand_list
 
 
-def expand_cfg_by_all_options(cfg_dict: dict, filter_dtype_list: list) -> list:
+def expand_cfg_by_all_options(cfg_dict: dict, filter_dtype_list: list, device_config: dict = None) -> list:
+    if device_config:
+        check_device_para_and_tensor_para(cfg_dict, device_config)
     paras_list, tensor_paras_list = expand_cfg_by_para(cfg_dict)
-    cfg_expand_list = expand_cfg_all(paras_list, tensor_paras_list, cfg_dict, filter_dtype_list)
+    cfg_expand_list = expand_cfg_all(paras_list, tensor_paras_list, cfg_dict, filter_dtype_list, device_config)
     return cfg_expand_list
 
 
@@ -311,14 +365,23 @@ class GenInputData(object):
     '''
 
     @staticmethod
-    def run(func_name, model_name, filter_dtype_str_list):
-
+    def run(func_name, model_name, filter_dtype_str_list, impl_folder):
         if model_name != "":
             diopi_config = "model_config." + model_name + "_config"
             configs = Config.process_configs(eval(diopi_config))
         else:
             configs = Config.process_configs(diopi_configs)
 
+        src_path = os.path.join(impl_folder, "device_configs.py")
+        use_device_configs = os.path.isfile(src_path)
+        if use_device_configs:
+            dst_path = os.path.join(_cur_dir, "device_configs.py")
+            os.symlink(src_path, dst_path)
+            from .device_configs import device_configs
+            os.unlink(dst_path)
+            from .device_config_helper import DeviceConfig
+            device_configs = DeviceConfig.process_configs(device_configs)
+
         inputs_dir_path = os.path.join(_cur_dir, "../data/" + model_name + "/inputs")
         if not os.path.exists(inputs_dir_path):
             os.makedirs(inputs_dir_path)
@@ -331,7 +394,19 @@ def run(func_name, model_name, filter_dtype_str_list):
                 continue
             logger.info(f"Generate benchmark input data for diopi_functions.{cfg_func_name}")
             filter_dtype_list = get_filter_dtype_list(filter_dtype_str_list)
-            cfg_expand_list = expand_cfg_by_all_options(configs[cfg_name], filter_dtype_list)
+
+            if use_device_configs and cfg_name in device_configs:
+                device_config = device_configs[cfg_name]
+                if 'dtype' in device_config:
+                    filter_dtype_list.extend(x for x in device_config['dtype'] if x not in filter_dtype_list)
+                tol_keys_list = ['atol', 'rtol', 'atol_half', 'rtol_half']
+                for key in tol_keys_list:
+                    if key in device_config:
+                        configs[cfg_name][key] = device_config[key]
+                cfg_expand_list = expand_cfg_by_all_options(configs[cfg_name], filter_dtype_list, device_config)
+            else:
+                cfg_expand_list = expand_cfg_by_all_options(configs[cfg_name], filter_dtype_list)
+
             cfg_counter += len(cfg_expand_list)
             gen_and_dump_data(inputs_dir_path, cfg_name, cfg_expand_list, cfg_save_dict)
 
diff --git a/DIOPI-TEST/python/main.py b/DIOPI-TEST/python/main.py
index 0713db3b8..f75c158be 100644
--- a/DIOPI-TEST/python/main.py
+++ b/DIOPI-TEST/python/main.py
@@ -24,6 +24,8 @@ def parse_args():
                         help='Whether to use nhwc layout for 3-dim Tensor')
     parser.add_argument('--four_bytes', action='store_true',
                         help='Whether to use 4-bytes data type for partial tests')
+    parser.add_argument('--impl_folder', type=str, default='',
+                        help='folder to find device configs')
     args = parser.parse_args()
     return args
 
@@ -58,7 +60,7 @@ def parse_args():
 
     if args.mode == 'gen_data':
         import conformance.gen_data as gd
-        gd.GenInputData.run(args.fname, args.model_name.lower(), args.filter_dtype)
+        gd.GenInputData.run(args.fname, args.model_name.lower(), args.filter_dtype, args.impl_folder)
         gd.GenOutputData.run(args.fname, args.model_name.lower(), args.filter_dtype)
         if args.model_name != '':
             logger.info(f"the op list of {args.model_name}: {real_op_list}")

From 5bb8cf0ed287280937d5432254e67ff49335b17a Mon Sep 17 00:00:00 2001
From: zephyr <48674444+z379035389@users.noreply.github.com>
Date: Mon, 17 Apr 2023 10:56:22 +0800
Subject: [PATCH 05/10] add silu (#68)

* add randn

* fix lint

* fix lint

* add randn

* add silu

* add silu

* update

* fix lint

* delete randn

* add silu_backward

* add silu_backward

* set p_value > 0.0005

* add randn

* fix lint

* add silu

* add randn

* fix lint

* set p_value > 0.0005

* fix conflict

* fix lint

* fix third-party lint error
---
 DIOPI-TEST/diopirt                            |  2 +-
 DIOPI-TEST/impl                               |  2 +-
 .../python/conformance/conformance_test.py    |  2 +-
 .../python/conformance/diopi_configs.py       | 20 +++++++++++++++++--
 .../python/conformance/diopi_functions.py     | 14 +++++++++++++
 5 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/DIOPI-TEST/diopirt b/DIOPI-TEST/diopirt
index 7cfe1ff1b..28a262592 160000
--- a/DIOPI-TEST/diopirt
+++ b/DIOPI-TEST/diopirt
@@ -1 +1 @@
-Subproject commit 7cfe1ff1b73a1b3b81bbce22b8f78aa50eea34a6
+Subproject commit 28a262592a436b6d073ee8ac46973b37fe1d5087
diff --git a/DIOPI-TEST/impl b/DIOPI-TEST/impl
index 7fd6b5751..c5f2a500b 160000
--- a/DIOPI-TEST/impl
+++ b/DIOPI-TEST/impl
@@ -1 +1 @@
-Subproject commit 7fd6b5751209fe118ef33e139b1d5e714b76de86
+Subproject commit c5f2a500b97c42f302978844c6da429744036a75
diff --git a/DIOPI-TEST/python/conformance/conformance_test.py b/DIOPI-TEST/python/conformance/conformance_test.py
index 0ce8b3468..9f6f0664c 100644
--- a/DIOPI-TEST/python/conformance/conformance_test.py
+++ b/DIOPI-TEST/python/conformance/conformance_test.py
@@ -199,7 +199,7 @@ def test_normal(mean, std, size=None):
         out_numpy = out_numpy.flatten()
         p_value = stats.kstest(out_numpy, 'norm', args=(mean, std))[1]
         # pytorch use 0.0001, but stats.kstest use 0.05 as threshold
-        assert p_value > 0.001, "failed to execute normal"
+        assert p_value > 0.0005, "failed to execute normal"
 
     def test_normal_(input, mean, std, shape=None):
         from scipy import stats
diff --git a/DIOPI-TEST/python/conformance/diopi_configs.py b/DIOPI-TEST/python/conformance/diopi_configs.py
index 28767816d..09f71f294 100644
--- a/DIOPI-TEST/python/conformance/diopi_configs.py
+++ b/DIOPI-TEST/python/conformance/diopi_configs.py
@@ -555,6 +555,23 @@
         ),
     ),
 
+    'silu': dict(
+        name=["silu"],
+        is_inplace=True,
+        tensor_para=dict(
+            args=[
+                {
+                    "ins": ['input'],
+                    "requires_grad": [True],
+                    "shape": ((182400,), (20267, 80), (8, 200, 304),
+                              (32, 16, 1, 1), (16, 32, 130, 130)),
+                    "dtype": [Dtype.float32, Dtype.float64],
+                    "gen_fn": Genfunc.randn,
+                },
+            ],
+        ),
+    ),
+
     'pow_float_tensor': dict(
         name=['pow'],
         interface=['torch'],
@@ -2168,8 +2185,7 @@
             args=[
                 {
                     "ins": ['input'],
-                    "shape": ((2, 4096), (32, 49, 256), (2, 16, 64, 64),
-                              (1, 2304, 1, 1, 1)),
+                    "shape": ((2, 4096), (32, 49, 256), (2, 16, 64, 64), (1, 2304, 1, 1, 1)),
                     "dtype": [Dtype.float32, Dtype.float64],
                     "gen_fn": Genfunc.randn,
                 },
diff --git a/DIOPI-TEST/python/conformance/diopi_functions.py b/DIOPI-TEST/python/conformance/diopi_functions.py
index aedce190c..7f38287d2 100644
--- a/DIOPI-TEST/python/conformance/diopi_functions.py
+++ b/DIOPI-TEST/python/conformance/diopi_functions.py
@@ -218,6 +218,20 @@ def sigmoid(input, inplace=False) -> Tensor:
     return unary_op(input, inplace, 'diopiSigmoid')
 
 
+def silu(input, inplace=False) -> Tensor:
+    return unary_op(input, inplace, 'diopiSilu')
+
+
+def silu_backward(input, grad_outputs, **kwargs) -> Tensor:
+    assert len(grad_outputs) == 1, "only accept 1 gradient to do backward"
+    grad_input = raw_like(input)
+    func = check_function("diopiSiluBackward")
+    ret = func(input.context_handle, grad_input.tensor_handle, grad_outputs[0].tensor_handle,
+               input.tensor_handle)
+    check_returncode(ret)
+    return {"input": grad_input}
+
+
 def sqrt(input, inplace=False) -> Tensor:
     return unary_op(input, inplace, 'diopiSqrt', promote_type(input, Dtype.float32))
 

From 12f494bb42f5f528a43a6e9a1909c135e5b187ef Mon Sep 17 00:00:00 2001
From: zephyr <48674444+z379035389@users.noreply.github.com>
Date: Mon, 17 Apr 2023 12:49:30 +0800
Subject: [PATCH 06/10] Zephyr/add baddbmm (#69)

* update main

* add baddbmm & baddbmm_

* update

* use aten::baddbmm_ and larger size

* use aten::baddbmm_ and larger size

* change normal p_value threshold

* fix lint

* fix lint

* fix lint

* delete error_report.csv

* fix conflict

* fix third_party lint error

* update impl
---
 DIOPI-TEST/diopirt                            |  2 +-
 DIOPI-TEST/impl                               |  2 +-
 .../python/conformance/diopi_configs.py       | 30 +++++++++++++++++++
 .../python/conformance/diopi_functions.py     | 22 ++++++++++++++
 4 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/DIOPI-TEST/diopirt b/DIOPI-TEST/diopirt
index 28a262592..ce8c68e98 160000
--- a/DIOPI-TEST/diopirt
+++ b/DIOPI-TEST/diopirt
@@ -1 +1 @@
-Subproject commit 28a262592a436b6d073ee8ac46973b37fe1d5087
+Subproject commit ce8c68e986a374aaf32157568b132776bf4b84a1
diff --git a/DIOPI-TEST/impl b/DIOPI-TEST/impl
index c5f2a500b..2a11dcf2a 160000
--- a/DIOPI-TEST/impl
+++ b/DIOPI-TEST/impl
@@ -1 +1 @@
-Subproject commit c5f2a500b97c42f302978844c6da429744036a75
+Subproject commit 2a11dcf2a4cf91b3e48c37e2c32c5875cbc23588
diff --git a/DIOPI-TEST/python/conformance/diopi_configs.py b/DIOPI-TEST/python/conformance/diopi_configs.py
index 09f71f294..cf45f880e 100644
--- a/DIOPI-TEST/python/conformance/diopi_configs.py
+++ b/DIOPI-TEST/python/conformance/diopi_configs.py
@@ -43,6 +43,36 @@
         ),
     ),
 
+    'baddbmm': dict(
+        name=["baddbmm"],
+        interface=["torch"],
+        is_inplace=True,
+        dtype=[Dtype.float32, Dtype.float16, Dtype.float64],
+        para=dict(
+            beta=[1, 0.5, 0.1],
+            alpha=[0.1, 0.2, 0.5],
+        ),
+        tensor_para=dict(
+            args=[
+                {
+                    "ins": ["input"],
+                    "shape": ((32, 64, 16), (32, 64, 32), (168, 52, 64)),
+                    "gen_fn": Genfunc.randn,
+                },
+                {
+                    "ins": ["batch1"],
+                    "shape": ((32, 64, 32), (32, 64, 8), (168, 52, 38)),
+                    "gen_fn": Genfunc.randn,
+                },
+                {
+                    "ins": ["batch2"],
+                    "shape": ((32, 32, 16), (32, 8, 32), (168, 38, 64)),
+                    "gen_fn": Genfunc.randn,
+                },
+            ]
+        ),
+    ),
+
     'conv_2d': dict(
         name=["conv2d"],
         atol=1e-3,
diff --git a/DIOPI-TEST/python/conformance/diopi_functions.py b/DIOPI-TEST/python/conformance/diopi_functions.py
index 7f38287d2..099184462 100644
--- a/DIOPI-TEST/python/conformance/diopi_functions.py
+++ b/DIOPI-TEST/python/conformance/diopi_functions.py
@@ -388,6 +388,28 @@ def bmm(input, mat2) -> Tensor:
     return out
 
 
+def baddbmm(input, batch1, batch2, beta, alpha, inplace=False) -> Tensor:
+    size1 = list(input.size())
+    assert (len(size1) == 3), 'input must be 3d tensor'
+    size2 = list(batch1.size())
+    assert (len(size2) == 3), 'batch1 must be 3d tensor'
+    size3 = list(batch2.size())
+    assert (len(size3) == 3), 'batch2 must be 3d tensor'
+    assert (size2[2] == size3[1] and size1[0] == size2[0] and size1[0] == size3[0]), 'invalid args'
+    assert (size1[2] == size3[2] or size1[2] == 1 or size3[2] == 1), 'invalid args'
+    if inplace:
+        func = check_function("diopiBaddbmmInp")
+        ret = func(input.context_handle, input.tensor_handle, batch1.tensor_handle, batch2.tensor_handle, c_double(beta), c_double(alpha))
+        check_returncode(ret)
+        return input
+    else:
+        out = raw_like(input)
+        func = check_function("diopiBaddbmm")
+        ret = func(input.context_handle, out.tensor_handle, input.tensor_handle, batch1.tensor_handle, batch2.tensor_handle, c_double(beta), c_double(alpha))
+        check_returncode(ret)
+        return out
+
+
 def addcmul(input, tensor1, tensor2, value=1, inplace=False) -> Tensor:
     size1 = list(tensor1.size())
     size2 = list(tensor2.size())

From 380ad5a9a7aa110cbd490cb8560c95d8df8d0a73 Mon Sep 17 00:00:00 2001
From: ding0706 <chrysantd@gmail.com>
Date: Tue, 18 Apr 2023 11:39:14 +0800
Subject: [PATCH 07/10] add debug info for config,input,and output when failing
 (#77)

* add debug info for config,input,and output

* optimize output format

* fix lint

* remove useless comment
---
 .../python/conformance/conformance_test.py    | 52 ++++++++++++++++---
 DIOPI-TEST/python/main.py                     |  4 +-
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/DIOPI-TEST/python/conformance/conformance_test.py b/DIOPI-TEST/python/conformance/conformance_test.py
index 9f6f0664c..87040a98d 100644
--- a/DIOPI-TEST/python/conformance/conformance_test.py
+++ b/DIOPI-TEST/python/conformance/conformance_test.py
@@ -210,12 +210,31 @@ def test_normal_(input, mean, std, shape=None):
         assert p_value > 0.05, "failed to execute normal_"
 
 
+def config_to_format_string(data, indent=0):
+    yaml_str = ""
+    if isinstance(data, dict):
+        for key, value in data.items():
+            if value is None or value == [] or value == {} or value == "":
+                continue
+            yaml_str += "\n" + " " * indent + f"{key}: "
+            if key not in ["shape", "value"]:
+                yaml_str += config_to_format_string(value, indent + 2)
+            else:
+                yaml_str += config_to_format_string(str(value), indent + 2)
+    elif isinstance(data, (list, tuple)):
+        for item in data:
+            yaml_str += "\n" + " " * indent + "- " + config_to_format_string(item, indent + 2)
+    else:
+        yaml_str += f"{data}"
+    return yaml_str
+
+
 class ConformanceTest(object):
     r'''
     Run all functions by using input, then compare_with_gen_output with saved output
     '''
     @staticmethod
-    def run(func_name, model_name, filter_dtype_str_list):
+    def run(func_name, model_name, filter_dtype_str_list, debug_level):
 
         _cur_dir = os.path.dirname(os.path.abspath(__file__))
         inputs_dir_path = os.path.join(_cur_dir, "../data/" + model_name + "/inputs")
@@ -264,8 +283,18 @@ def run(func_name, model_name, filter_dtype_str_list):
                     sum_to_compare = True if 'sorted' in kwargs and ~kwargs['sorted'] else False
                     passed = compare_with_gen_output(output, data['cfg'], output_reference, sum_to_compare) \
                         if need_output else True
-                    logger.info(f"Run diopi_functions.{cfg_func_name} succeed") \
-                        if passed else logger.error(f"Run diopi_functions.{cfg_func_name} failed", tag=test_tag, info=tensor_info)
+                    if passed:
+                        logger.info(f"Run diopi_functions.{cfg_func_name} succeed")
+                    else:
+                        logger.error(f"Run diopi_functions.{cfg_func_name} failed", tag=test_tag, info=tensor_info)
+                        if debug_level > 0:
+                            logger.error("failed config:\n%s", config_to_format_string(data['cfg']))
+                            if debug_level > 1:
+                                logger.error("failed arguments:")
+                                for key, arg in kwargs.items():
+                                    logger.error(f"{key}: {arg}")
+                                logger.error(f"output_reference:\n{output_reference}")
+                                logger.error(f"output:\n{output}")
                 except FunctionNotImplementedError as e:
                     logger.error(f"NotImplemented: {e}")
                     continue
@@ -300,10 +329,21 @@ def run(func_name, model_name, filter_dtype_str_list):
 
                     try:
                         grad_input = eval(f"F.{cfg_func_name}_backward(**kwargs, **backward_para)")
-                        # import pdb;pdb.set_trace()
                         passed = compare_with_gen_output(grad_input, data['cfg'], backward_out_reference)
-                        logger.info(f"Run diopi_functions.{cfg_func_name}_backward succeed") \
-                            if passed else logger.error(f"Run diopi_functions.{cfg_func_name}_backward failed", tag=test_tag, info=tensor_info)
+                        if passed:
+                            logger.info(f"Run diopi_functions.{cfg_func_name}_backward succeed")
+                        else:
+                            logger.error(f"Run diopi_functions.{cfg_func_name}_backward failed", tag=test_tag, info=tensor_info)
+                            if debug_level > 0:
+                                logger.error("failed config:\n%s", config_to_format_string(data['cfg']))
+                                if debug_level > 1:
+                                    logger.error("failed arguments:")
+                                    for key, arg in kwargs.items():
+                                        logger.error(f"{key}: {arg}")
+                                    for key, arg in backward_para.items():
+                                        logger.error(f"{key}: {arg}")
+                                    logger.error(f"grad_reference:\n{backward_out_reference}")
+                                    logger.error(f"grad:\n{grad_input}")
                         write_precision(data["cfg"], cfg_func_name + '_bp', passed)
                     except FunctionNotImplementedError as e:
                         logger.error(f"NotImplemented: {e}")
diff --git a/DIOPI-TEST/python/main.py b/DIOPI-TEST/python/main.py
index f75c158be..b0eb2b8b6 100644
--- a/DIOPI-TEST/python/main.py
+++ b/DIOPI-TEST/python/main.py
@@ -26,6 +26,8 @@ def parse_args():
                         help='Whether to use 4-bytes data type for partial tests')
     parser.add_argument('--impl_folder', type=str, default='',
                         help='folder to find device configs')
+    parser.add_argument('--failure_debug_level', type=int, default=0,
+                        help='Whether to print debug information when failing the test. 0 for printing nothing, 1 for printing config, 2 for printing config, inputs and outputs')
     args = parser.parse_args()
     return args
 
@@ -66,7 +68,7 @@ def parse_args():
             logger.info(f"the op list of {args.model_name}: {real_op_list}")
     elif args.mode == 'run_test':
         import conformance as cf
-        cf.ConformanceTest.run(args.fname, args.model_name.lower(), args.filter_dtype)
+        cf.ConformanceTest.run(args.fname, args.model_name.lower(), args.filter_dtype, args.failure_debug_level)
         write_report()
     elif args.mode == 'utest':
         call = "python3 -m pytest -vx tests"

From cec76661bcf9af7bbc1cfa6046733e0151d91ea3 Mon Sep 17 00:00:00 2001
From: ZhangQiu <100055343+NeosZhang@users.noreply.github.com>
Date: Thu, 20 Apr 2023 15:31:05 +0800
Subject: [PATCH 08/10] zhangqiu/fix_sort (#78)

* fix sort for unstable cases

---------

Co-authored-by: zhangqiu <zhangqiu@sensetime.com>
---
 .../python/conformance/diopi_functions.py     | 31 ++++++++++++++++---
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/DIOPI-TEST/python/conformance/diopi_functions.py b/DIOPI-TEST/python/conformance/diopi_functions.py
index 099184462..183050f1a 100644
--- a/DIOPI-TEST/python/conformance/diopi_functions.py
+++ b/DIOPI-TEST/python/conformance/diopi_functions.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2023, DeepLink.
 # -*- coding: UTF-8 -*-
 import math
+import itertools
 
 from ctypes import c_float, c_double, c_int64, c_bool, c_void_p, byref, pointer
 from .diopi_runtime import Sizes, Scalar, Tensor, TensorHandle, compute_nhwc_stride, compute_nhwc_stride_2d, compute_nhwc_stride_3d
@@ -1106,13 +1107,33 @@ def sort(input, dim=- 1, descending=False, stable=False):
     vals = raw_like(input)
     sizeI = input.size()
     indices = Tensor(sizeI, glob_vars.int_type)
-
-    stable = c_void_p() if stable is None else pointer(c_bool(stable))
-
+    stable_c = c_void_p() if stable is None else pointer(c_bool(stable))
     func = check_function("diopiSort")
     ret = func(input.context_handle, vals.tensor_handle, indices.tensor_handle,
-               input.tensor_handle, c_int64(dim), c_bool(descending), stable)
-    check_returncode(ret)
+               input.tensor_handle, c_int64(dim), c_bool(descending), stable_c)
+    check_returncode(ret)
+    # if not stable, need to reconstruct indices and use "input[indices]" to check
+    if not stable:
+        # reconstruct the indices
+        lst = []
+        for dim_size in input.shape:
+            temp_lst = [i for i in range(dim_size)]
+            lst.append(temp_lst)
+        temp_indices = list(itertools.product(*lst))
+        for i in range(len(temp_indices)):
+            temp_indices[i] = list(temp_indices[i])
+            temp_indices[i][dim] = indices.numpy().flatten()[i]
+
+        # use input[indices] to check
+        temp_vals = []
+        input_np = input.numpy()
+        for idx in temp_indices:
+            res = input_np
+            # use for loop to index since idx is a list
+            for i in idx:
+                res = res[i]
+            temp_vals.append(res)
+        return vals, temp_vals
     return vals, indices
 
 

From fec16b45c9a12975226e7c293022fad9e8669461 Mon Sep 17 00:00:00 2001
From: zephyr <48674444+z379035389@users.noreply.github.com>
Date: Thu, 20 Apr 2023 16:36:04 +0800
Subject: [PATCH 09/10] add `multinomial`, adjust `dropout`'s gen_function
 (#80)

* add `multinomial`, adjust `dropout`'s gen_function

* fix lint

* fix submodule

* fix submodule

* fix submodule

* update code

* update impl
---
 DIOPI-TEST/diopirt                            |  2 +-
 DIOPI-TEST/impl                               |  2 +-
 .../python/conformance/conformance_test.py    | 13 ++++++++++
 .../python/conformance/diopi_configs.py       | 26 ++++++++++++++++---
 .../python/conformance/diopi_functions.py     | 12 +++++++++
 5 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/DIOPI-TEST/diopirt b/DIOPI-TEST/diopirt
index ce8c68e98..fa65866a2 160000
--- a/DIOPI-TEST/diopirt
+++ b/DIOPI-TEST/diopirt
@@ -1 +1 @@
-Subproject commit ce8c68e986a374aaf32157568b132776bf4b84a1
+Subproject commit fa65866a2dc13fb40d0656b838cf6ca85cce9784
diff --git a/DIOPI-TEST/impl b/DIOPI-TEST/impl
index 2a11dcf2a..e63739e54 160000
--- a/DIOPI-TEST/impl
+++ b/DIOPI-TEST/impl
@@ -1 +1 @@
-Subproject commit 2a11dcf2a4cf91b3e48c37e2c32c5875cbc23588
+Subproject commit e63739e54f0ee0fa2a1fb11a1ae76e6ad145013a
diff --git a/DIOPI-TEST/python/conformance/conformance_test.py b/DIOPI-TEST/python/conformance/conformance_test.py
index 87040a98d..477cd10dc 100644
--- a/DIOPI-TEST/python/conformance/conformance_test.py
+++ b/DIOPI-TEST/python/conformance/conformance_test.py
@@ -209,6 +209,19 @@ def test_normal_(input, mean, std, shape=None):
         p_value = stats.kstest(out_numpy, 'norm', args=(mean, std))[1]
         assert p_value > 0.05, "failed to execute normal_"
 
+    def test_multinomial(input, num_samples, replacement):
+        out = F.multinomial(input, num_samples, replacement)
+        out_numpy = out.numpy()
+        has_duplicates = False
+        if len(out.size()) == 2:
+            has_duplicates = len(out_numpy[0]) != len(set(out_numpy[0]))
+        else:
+            has_duplicates = len(out_numpy) != len(set(out_numpy))
+        if not replacement:
+            assert has_duplicates is False, "failed to execute multinomial"
+        out_numpy = out_numpy.flatten()
+        assert len(out_numpy) % num_samples == 0, "failed to execute multinomial"
+
 
 def config_to_format_string(data, indent=0):
     yaml_str = ""
diff --git a/DIOPI-TEST/python/conformance/diopi_configs.py b/DIOPI-TEST/python/conformance/diopi_configs.py
index cf45f880e..69b046b7d 100644
--- a/DIOPI-TEST/python/conformance/diopi_configs.py
+++ b/DIOPI-TEST/python/conformance/diopi_configs.py
@@ -2217,7 +2217,7 @@
                     "ins": ['input'],
                     "shape": ((2, 4096), (32, 49, 256), (2, 16, 64, 64), (1, 2304, 1, 1, 1)),
                     "dtype": [Dtype.float32, Dtype.float64],
-                    "gen_fn": Genfunc.randn,
+                    "gen_fn": Genfunc.positive,
                 },
             ],
         ),
@@ -2237,7 +2237,7 @@
                     "shape": ((2, 4096), (32, 49, 256), (2, 16, 64, 64),
                               (1, 2304, 1, 1, 1)),
                     "dtype": [Dtype.float32, Dtype.float64],
-                    "gen_fn": Genfunc.randn,
+                    "gen_fn": Genfunc.positive,
                 },
             ],
         ),
@@ -2256,7 +2256,7 @@
                     "ins": ['input'],
                     "shape": ((32, 49, 256), (32, 16, 64, 64)),
                     "dtype": [Dtype.float32, Dtype.float64],
-                    "gen_fn": Genfunc.randn,
+                    "gen_fn": Genfunc.positive,
                 },
             ],
         ),
@@ -4341,4 +4341,24 @@
         ),
     ),
 
+    'multinomial': dict(
+        name=["multinomial"],
+        interface=['torch'],
+        no_output_ref=True,
+        para=dict(
+            num_samples=[6, 60, 200, 128],
+            replacement=[True, True, False, False],
+        ),
+        tensor_para=dict(
+            gen_fn=Genfunc.positive,
+            args=[
+                {
+                    "ins": ['input'],
+                    "shape": ((8, ), (16, 64,), (128, 256,), (256, 128,)),
+                    "dtype": [Dtype.float32, Dtype.float64, Dtype.float64, Dtype.float64],
+                },
+            ],
+        ),
+    ),
+
 }
diff --git a/DIOPI-TEST/python/conformance/diopi_functions.py b/DIOPI-TEST/python/conformance/diopi_functions.py
index 183050f1a..2c39cec25 100644
--- a/DIOPI-TEST/python/conformance/diopi_functions.py
+++ b/DIOPI-TEST/python/conformance/diopi_functions.py
@@ -3535,3 +3535,15 @@ def meshgrid(tensors, shape=None):
     ret = func(tensors[0].context_handle, pointer(co_tensors), pointer(c_tensors), c_int64(inputsNum))
     check_returncode(ret)
     return out
+
+
+def multinomial(input, num_samples, replacement) -> Tensor:
+    call = "diopiMultinomial"
+    func = check_function(call)
+    if len(input.size()) == 2:
+        out = Tensor(size=(input.size()[0], num_samples), dtype=Dtype.int64)
+    if len(input.size()) == 1:
+        out = Tensor(size=(num_samples,), dtype=Dtype.int64)
+    ret = func(input.context_handle, out.tensor_handle, input.tensor_handle, c_int64(num_samples), c_bool(replacement))
+    check_returncode(ret)
+    return out

From 44c5f705793e54fa7e58d0400f9126c7d51cd9d6 Mon Sep 17 00:00:00 2001
From: zephyr <48674444+z379035389@users.noreply.github.com>
Date: Thu, 20 Apr 2023 16:49:36 +0800
Subject: [PATCH 10/10] add rsqrt rsqrt_ (#79)

* add rsqrt rsqrt_

* fix lint

* fix submodule

* fix submodule

* update impl

* update impl
---
 DIOPI-TEST/python/conformance/diopi_configs.py   | 8 ++++----
 DIOPI-TEST/python/conformance/diopi_functions.py | 4 ++++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/DIOPI-TEST/python/conformance/diopi_configs.py b/DIOPI-TEST/python/conformance/diopi_configs.py
index 69b046b7d..472a724d4 100644
--- a/DIOPI-TEST/python/conformance/diopi_configs.py
+++ b/DIOPI-TEST/python/conformance/diopi_configs.py
@@ -372,7 +372,7 @@
 
     'pointwise_op': dict(
         name=['abs', 'cos', 'erf', 'exp', 'floor',
-              'neg', 'sin', 'sqrt', 'logical_not'],
+              'neg', 'sin', 'sqrt', 'logical_not', 'rsqrt'],
         interface=['torch'],
         is_inplace=True,
         dtype=[Dtype.float16, Dtype.float32, Dtype.float64],
@@ -391,7 +391,7 @@
 
     'pointwise_op_int_without_inplace': dict(
         name=['abs', 'cos', 'erf', 'exp',
-              'neg', 'sin', 'sqrt', 'logical_not'],
+              'neg', 'sin', 'sqrt', 'logical_not', 'rsqrt'],
         interface=['torch'],
         dtype=[Dtype.int16, Dtype.int32, Dtype.int64, Dtype.uint8, Dtype.int8],
         tensor_para=dict(
@@ -425,7 +425,7 @@
     ),
 
     'pointwise_op_bool': dict(
-        name=['cos', 'erf', 'exp', 'sin', 'sqrt'],
+        name=['cos', 'erf', 'exp', 'sin', 'sqrt', 'rsqrt'],
         interface=['torch'],
         dtype=[Dtype.bool],
         tensor_para=dict(
@@ -460,7 +460,7 @@
     ),
 
     'pointwise_op_abs_input': dict(
-        name=['log', 'log2', 'log10', 'sqrt'],
+        name=['log', 'log2', 'log10', 'sqrt', 'rsqrt'],
         interface=['torch'],
         is_inplace=True,
         dtype=[Dtype.float16, Dtype.float32, Dtype.float64],
diff --git a/DIOPI-TEST/python/conformance/diopi_functions.py b/DIOPI-TEST/python/conformance/diopi_functions.py
index 2c39cec25..e564833a6 100644
--- a/DIOPI-TEST/python/conformance/diopi_functions.py
+++ b/DIOPI-TEST/python/conformance/diopi_functions.py
@@ -237,6 +237,10 @@ def sqrt(input, inplace=False) -> Tensor:
     return unary_op(input, inplace, 'diopiSqrt', promote_type(input, Dtype.float32))
 
 
+def rsqrt(input, inplace=False) -> Tensor:
+    return unary_op(input, inplace, 'diopiRsqrt', promote_type(input, Dtype.float32))
+
+
 def neg(input, inplace=False) -> Tensor:
     return unary_op(input, inplace, 'diopiNeg')