From 07e5dce979124cdbe2a4d713455d325ada3b464d Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 14 Aug 2024 11:37:05 +0800 Subject: [PATCH 01/30] add expm1 on diopi torch impl --- diopi_test/python/configs/diopi_configs.py | 12 ++++++------ .../python/conformance/diopi_functions.py | 3 +++ impl/torch/functions/functions.cpp | 17 +++++++++++++++++ proto/include/diopi/functions.h | 16 ++++++++++++++++ 4 files changed, 42 insertions(+), 6 deletions(-) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 530d5d995..d81d708f4 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -1025,7 +1025,7 @@ ), 'pointwise_op': dict( - name=['abs', 'cos', 'erf', 'erfinv', 'exp', 'floor', + name=['abs', 'cos', 'erf', 'erfinv', 'exp', 'expm1', 'floor', 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'ceil', 'atan'], interface=['torch'], is_inplace=True, @@ -1047,7 +1047,7 @@ 'pointwise_op_int_without_inplace': dict( # name=['abs', 'cos', 'erf', 'erfinv', 'exp', # 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], - name=['abs', 'cos', 'erf', 'exp', + name=['abs', 'cos', 'erf', 'exp', 'expm1', 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], interface=['torch'], dtype=[np.int16, np.int32, np.int64, np.int8], @@ -1068,7 +1068,7 @@ 'pointwise_op_uint8': dict( # name=['abs', 'cos', 'erf', 'erfinv', 'exp', # 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], - name=['abs', 'cos', 'erf', 'exp', + name=['abs', 'cos', 'erf', 'exp', 'expm1', 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], interface=['torch'], dtype=[np.uint8], @@ -1103,7 +1103,7 @@ # FIXME erfinv输入int或bool报错 'pointwise_op_bool': dict( # name=['abs', 'cos', 'erf', 'erfinv', 'exp', 'sin', 'asin', 'sqrt', 'rsqrt', 'atan', 'logical_not'], - name=['abs', 'cos', 'erf', 'exp', 'sin', 'asin', 'sqrt', 'rsqrt', 'atan', 'logical_not'], + name=['abs', 'cos', 'erf', 'exp', 'expm1', 'sin', 'asin', 'sqrt', 'rsqrt', 'atan', 'logical_not'], interface=['torch'], dtype=[np.bool_], tensor_para=dict( @@ -1250,7 +1250,7 @@ ), 'pointwise_op_zero': dict( - name=['abs', 'exp', 'floor', 'neg', 'sqrt', + name=['abs', 'exp', 'expm1', 'floor', 'neg', 'sqrt', 'logical_not', 'rsqrt', 'ceil'], interface=['torch'], is_inplace=True, @@ -1267,7 +1267,7 @@ ), 'pointwise_op_without_inplace_zero': dict( - name=['abs', 'sign', 'exp', 'sqrt', + name=['abs', 'sign', 'exp', 'expm1', 'sqrt', 'logical_not', 'rsqrt'], interface=['torch'], dtype=[np.float16, np.float32, np.float64, np.int16, diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 66bb2ac98..2917d8104 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -416,6 +416,9 @@ def atan(input, inplace=False) -> Tensor: def exp(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiExp", promote_type(input, Dtype.float32)) +def expm1(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiExpm1", promote_type(input, Dtype.float32)) + def log(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiLog", promote_type(input, Dtype.float32)) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 0312be32b..bbbd6ece5 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -1018,6 +1018,23 @@ diopiError_t diopiExpInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { return diopiSuccess; } +diopiError_t diopiExpm1(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(expm1_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiExpm1Inp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(expm1_, atInput); + + return diopiSuccess; +} + diopiError_t diopiLog(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index a12bdbaca..2c1d619c7 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -978,6 +978,22 @@ DIOPI_API diopiError_t diopiExpInp(diopiContextHandle_t ctx, diopiTensorHandle_t */ DIOPI_API diopiError_t diopiExp(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); +/** + * @brief The in-place version of diopiExpm1(). + * @param[in] ctx Context environment. + * @param[in] input the input tensor and will be stroed reuslt tensor. type = [float16, float32, float64] + */ +DIOPI_API diopiError_t diopiExpm1Inp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Returns a new tensor with the exponential of the elements of the input tensor input and minus 1 + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64, int16, int32, + * int64, uint8, int8, bool]. + * @param[out] out the output tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiExpm1(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief The in-place version of diopiLog(). * @param[in] ctx Context environment. From a7fc14d32c13417e489441a8df1f4d7f17efd0ec Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 14 Aug 2024 13:58:38 +0800 Subject: [PATCH 02/30] add tan on diopi torch impl --- diopi_test/python/configs/diopi_configs.py | 8 ++++---- .../python/conformance/diopi_functions.py | 2 ++ impl/torch/functions/functions.cpp | 17 +++++++++++++++++ proto/include/diopi/functions.h | 16 ++++++++++++++++ 4 files changed, 39 insertions(+), 4 deletions(-) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index d81d708f4..ef03a683e 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -1025,7 +1025,7 @@ ), 'pointwise_op': dict( - name=['abs', 'cos', 'erf', 'erfinv', 'exp', 'expm1', 'floor', + name=['abs', 'cos', 'tan', 'erf', 'erfinv', 'exp', 'expm1', 'floor', 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'ceil', 'atan'], interface=['torch'], is_inplace=True, @@ -1047,7 +1047,7 @@ 'pointwise_op_int_without_inplace': dict( # name=['abs', 'cos', 'erf', 'erfinv', 'exp', # 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], - name=['abs', 'cos', 'erf', 'exp', 'expm1', + name=['abs', 'cos', 'tan', 'erf', 'exp', 'expm1', 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], interface=['torch'], dtype=[np.int16, np.int32, np.int64, np.int8], @@ -1068,7 +1068,7 @@ 'pointwise_op_uint8': dict( # name=['abs', 'cos', 'erf', 'erfinv', 'exp', # 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], - name=['abs', 'cos', 'erf', 'exp', 'expm1', + name=['abs', 'cos', 'tan', 'erf', 'exp', 'expm1', 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], interface=['torch'], dtype=[np.uint8], @@ -1103,7 +1103,7 @@ # FIXME erfinv输入int或bool报错 'pointwise_op_bool': dict( # name=['abs', 'cos', 'erf', 'erfinv', 'exp', 'sin', 'asin', 'sqrt', 'rsqrt', 'atan', 'logical_not'], - name=['abs', 'cos', 'erf', 'exp', 'expm1', 'sin', 'asin', 'sqrt', 'rsqrt', 'atan', 'logical_not'], + name=['abs', 'cos', 'tan', 'erf', 'exp', 'expm1', 'sin', 'asin', 'sqrt', 'rsqrt', 'atan', 'logical_not'], interface=['torch'], dtype=[np.bool_], tensor_para=dict( diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 2917d8104..41f804040 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -404,6 +404,8 @@ def sin(input, inplace=False) -> Tensor: def cos(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiCos", promote_type(input, Dtype.float32)) +def tan(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiTan", promote_type(input, Dtype.float32)) def tanh(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiTanh", promote_type(input, Dtype.float32)) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index bbbd6ece5..0781b9263 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -830,6 +830,23 @@ diopiError_t diopiCosInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { return diopiSuccess; } +diopiError_t diopiTan(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(tan_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiTanInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(tan_, atInput); + + return diopiSuccess; +} + diopiError_t diopiAbs(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index 2c1d619c7..2a78640f5 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -878,6 +878,22 @@ DIOPI_API diopiError_t diopiCosInp(diopiContextHandle_t ctx, diopiTensorHandle_t */ DIOPI_API diopiError_t diopiCos(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); +/** + * @brief The in-place version of diopiTan(). + * @param[in] ctx Context environment. + * @param[in] input the input and output tensor and will be stored result tensor, + * type = [float16, float32, float64, int16, int32, int64, uint8, int8]. + */ +DIOPI_API diopiError_t diopiTanInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Compute the element-wise tangent values of the input tensor input. + * @param[in] ctx Context environment. + * @param[in] input Input tensor, type = [float16, float32, float64, int16, int32, int64, uint8, int8]. + * @param[out] out the output tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiTan(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief The in-place version of diopiTanh(). * @param[in] ctx Context environment. From 299455ed2e3dc144625e512244775c1ac652e82a Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 14 Aug 2024 14:07:23 +0800 Subject: [PATCH 03/30] add acos on diopi torch impl --- diopi_test/python/configs/diopi_configs.py | 8 ++++---- .../python/conformance/diopi_functions.py | 3 +++ impl/torch/functions/functions.cpp | 17 +++++++++++++++++ proto/include/diopi/functions.h | 16 ++++++++++++++++ 4 files changed, 40 insertions(+), 4 deletions(-) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index ef03a683e..9f5c29fab 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -1025,7 +1025,7 @@ ), 'pointwise_op': dict( - name=['abs', 'cos', 'tan', 'erf', 'erfinv', 'exp', 'expm1', 'floor', + name=['abs', 'cos', 'acos', 'tan', 'erf', 'erfinv', 'exp', 'expm1', 'floor', 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'ceil', 'atan'], interface=['torch'], is_inplace=True, @@ -1047,7 +1047,7 @@ 'pointwise_op_int_without_inplace': dict( # name=['abs', 'cos', 'erf', 'erfinv', 'exp', # 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], - name=['abs', 'cos', 'tan', 'erf', 'exp', 'expm1', + name=['abs', 'cos', 'acos', 'tan', 'erf', 'exp', 'expm1', 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], interface=['torch'], dtype=[np.int16, np.int32, np.int64, np.int8], @@ -1068,7 +1068,7 @@ 'pointwise_op_uint8': dict( # name=['abs', 'cos', 'erf', 'erfinv', 'exp', # 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], - name=['abs', 'cos', 'tan', 'erf', 'exp', 'expm1', + name=['abs', 'cos', 'acos', 'tan', 'erf', 'exp', 'expm1', 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], interface=['torch'], dtype=[np.uint8], @@ -1103,7 +1103,7 @@ # FIXME erfinv输入int或bool报错 'pointwise_op_bool': dict( # name=['abs', 'cos', 'erf', 'erfinv', 'exp', 'sin', 'asin', 'sqrt', 'rsqrt', 'atan', 'logical_not'], - name=['abs', 'cos', 'tan', 'erf', 'exp', 'expm1', 'sin', 'asin', 'sqrt', 'rsqrt', 'atan', 'logical_not'], + name=['abs', 'cos', 'acos', 'tan', 'erf', 'exp', 'expm1', 'sin', 'asin', 'sqrt', 'rsqrt', 'atan', 'logical_not'], interface=['torch'], dtype=[np.bool_], tensor_para=dict( diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 41f804040..10424fc60 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -404,6 +404,9 @@ def sin(input, inplace=False) -> Tensor: def cos(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiCos", promote_type(input, Dtype.float32)) +def acos(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiAcos", promote_type(input, Dtype.float32)) + def tan(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiTan", promote_type(input, Dtype.float32)) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 0781b9263..7033b4cf2 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -830,6 +830,23 @@ diopiError_t diopiCosInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { return diopiSuccess; } +diopiError_t diopiAcos(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(acos_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiAcosInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(acos_, atInput); + + return diopiSuccess; +} + diopiError_t diopiTan(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index 2a78640f5..782c9bdc2 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -878,6 +878,22 @@ DIOPI_API diopiError_t diopiCosInp(diopiContextHandle_t ctx, diopiTensorHandle_t */ DIOPI_API diopiError_t diopiCos(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); +/** + * @brief The in-place version of diopiAcos(). + * @param[in] ctx Context environment. + * @param[in] input the input and output tensor and will be stored result tensor, + * type = [float16, float32, float64, int16, int32, int64, uint8, int8]. + */ +DIOPI_API diopiError_t diopiAcosInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Compute the element-wise arccosine values of the input tensor input. + * @param[in] ctx Context environment. + * @param[in] input Input tensor, type = [float16, float32, float64, int16, int32, int64, uint8, int8]. + * @param[out] out the output tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiAcos(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief The in-place version of diopiTan(). * @param[in] ctx Context environment. From 9a9a39e7a67dffc1f8d3fd9850ace50bb30c6f01 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 14 Aug 2024 14:25:10 +0800 Subject: [PATCH 04/30] fix format & add sinh on diopi torch impl --- diopi_test/python/configs/diopi_configs.py | 38 +++++++++++++++++++ .../python/conformance/diopi_functions.py | 8 ++++ impl/torch/functions/functions.cpp | 17 +++++++++ proto/include/diopi/functions.h | 15 ++++++++ 4 files changed, 78 insertions(+) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 9f5c29fab..6059959d6 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -1192,6 +1192,44 @@ ), ), + 'sinh': dict( + name=['sinh'], + interface=['torch'], + is_inplace=True, + saved_args=dict(output=0), + dtype=[np.float16, np.float32, np.float64], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), + (256, 128, 3, 3), + (2, 31, 512, 6, 40), + (0,), (16, 0), (1, 0, 6)), + }, + ], + ), + ), + + 'sinh_not_float': dict( + name=['sinh'], + interface=['torch'], + dtype=[np.int16, np.int32, np.int64, np.uint8, np.int8, np.bool_], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), + (256, 128, 3, 3), + (2, 31, 512, 6, 40), + (0,), (16, 0), (1, 0, 6)), + }, + ], + ), + ), + 'tanh': dict( name=['tanh'], interface=['torch'], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 10424fc60..4b11d1391 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -404,12 +404,19 @@ def sin(input, inplace=False) -> Tensor: def cos(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiCos", promote_type(input, Dtype.float32)) + def acos(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiAcos", promote_type(input, Dtype.float32)) + def tan(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiTan", promote_type(input, Dtype.float32)) + +def sinh(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiSinh", promote_type(input, Dtype.float32)) + + def tanh(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiTanh", promote_type(input, Dtype.float32)) @@ -421,6 +428,7 @@ def atan(input, inplace=False) -> Tensor: def exp(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiExp", promote_type(input, Dtype.float32)) + def expm1(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiExpm1", promote_type(input, Dtype.float32)) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 7033b4cf2..dc5513a8f 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -958,6 +958,23 @@ diopiError_t diopiSign(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiC return diopiSuccess; } +diopiError_t diopiSinh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(sinh_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiSinhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(sinh_, atInput); + + return diopiSuccess; +} + diopiError_t diopiTanh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index 782c9bdc2..bb2732436 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -910,6 +910,21 @@ DIOPI_API diopiError_t diopiTanInp(diopiContextHandle_t ctx, diopiTensorHandle_t */ DIOPI_API diopiError_t diopiTan(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); +/** + * @brief The in-place version of diopiSinh(). + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiSinhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Returns a new tensor with the hyperbolic sine of the elements of input. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64]. + * @param[out] out the input tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiSinh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief The in-place version of diopiTanh(). * @param[in] ctx Context environment. From dca4fcc9ce0055811d71f03fa6083f9dc8380689 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 14 Aug 2024 14:30:53 +0800 Subject: [PATCH 05/30] add cosh on diopi torch impl --- diopi_test/python/configs/diopi_configs.py | 38 +++++++++++++++++++ .../python/conformance/diopi_functions.py | 4 ++ impl/torch/functions/functions.cpp | 17 +++++++++ proto/include/diopi/functions.h | 15 ++++++++ 4 files changed, 74 insertions(+) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 6059959d6..dd58aa388 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -1230,6 +1230,44 @@ ), ), + 'cosh': dict( + name=['cosh'], + interface=['torch'], + is_inplace=True, + saved_args=dict(output=0), + dtype=[np.float16, np.float32, np.float64], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), + (256, 128, 3, 3), + (2, 31, 512, 6, 40), + (0,), (16, 0), (1, 0, 6)), + }, + ], + ), + ), + + 'cosh_not_float': dict( + name=['cosh'], + interface=['torch'], + dtype=[np.int16, np.int32, np.int64, np.uint8, np.int8, np.bool_], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), + (256, 128, 3, 3), + (2, 31, 512, 6, 40), + (0,), (16, 0), (1, 0, 6)), + }, + ], + ), + ), + 'tanh': dict( name=['tanh'], interface=['torch'], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 4b11d1391..61c36cec9 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -417,6 +417,10 @@ def sinh(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiSinh", promote_type(input, Dtype.float32)) +def cosh(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiCosh", promote_type(input, Dtype.float32)) + + def tanh(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiTanh", promote_type(input, Dtype.float32)) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index dc5513a8f..d38900db6 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -975,6 +975,23 @@ diopiError_t diopiSinhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { return diopiSuccess; } +diopiError_t diopiCosh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(cosh_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiCoshInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(cosh_, atInput); + + return diopiSuccess; +} + diopiError_t diopiTanh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index bb2732436..d057001df 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -925,6 +925,21 @@ DIOPI_API diopiError_t diopiSinhInp(diopiContextHandle_t ctx, diopiTensorHandle_ */ DIOPI_API diopiError_t diopiSinh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); +/** + * @brief The in-place version of diopiCosh(). + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiCoshInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Returns a new tensor with the hyperbolic cosine of the elements of input. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64]. + * @param[out] out the input tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiCosh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief The in-place version of diopiTanh(). * @param[in] ctx Context environment. From 766a9769c11cc7da3da8b15389810c71b8523016 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 14 Aug 2024 14:40:24 +0800 Subject: [PATCH 06/30] add asinh on diopi torch impl --- diopi_test/python/configs/diopi_configs.py | 38 +++++++++++++++++++ .../python/conformance/diopi_functions.py | 4 ++ impl/torch/functions/functions.cpp | 17 +++++++++ proto/include/diopi/functions.h | 15 ++++++++ 4 files changed, 74 insertions(+) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index dd58aa388..ee01db6d7 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -1307,6 +1307,44 @@ ), ), + 'asinh': dict( + name=['asinh'], + interface=['torch'], + is_inplace=True, + saved_args=dict(output=0), + dtype=[np.float16, np.float32, np.float64], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), + (256, 128, 3, 3), + (2, 31, 512, 6, 40), + (0,), (16, 0), (1, 0, 6)), + }, + ], + ), + ), + + 'asinh_not_float': dict( + name=['asinh'], + interface=['torch'], + dtype=[np.int16, np.int32, np.int64, np.uint8, np.int8, np.bool_], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), + (256, 128, 3, 3), + (2, 31, 512, 6, 40), + (0,), (16, 0), (1, 0, 6)), + }, + ], + ), + ), + 'sign': dict( name=['sign'], interface=['torch'], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 61c36cec9..cca369c8d 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -429,6 +429,10 @@ def atan(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiAtan", promote_type(input, Dtype.float32)) +def asinh(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiAsinh", promote_type(input, Dtype.float32)) + + def exp(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiExp", promote_type(input, Dtype.float32)) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index d38900db6..d7d199108 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -1026,6 +1026,23 @@ diopiError_t diopiAtanInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { return diopiSuccess; } +diopiError_t diopiAsinh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(asinh_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiAsinhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(asinh_, atInput); + + return diopiSuccess; +} + diopiError_t diopiSigmoid(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index d057001df..d5d01029c 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -977,6 +977,21 @@ DIOPI_API diopiError_t diopiAtan(diopiContextHandle_t ctx, diopiTensorHandle_t o */ DIOPI_API diopiError_t diopiAtanInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); +/** + * @brief The in-place version of diopiAsinh(). + * @param[in] ctx Context environment. + * @param[inout] input the input tensor and will be stroed reuslt tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiAsinhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Returns a new tensor with the arc hyperbolic sine of the elements of input. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64]. + * @param[out] out the output tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiAsinh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief The in-place version of diopiSigmoid(). * @param[in] ctx Context environment. From 6f4e9d657d3031b8ea5f65f4b1134f9c30b4a795 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 14 Aug 2024 14:45:35 +0800 Subject: [PATCH 07/30] add acosh on diopi torch impl --- diopi_test/python/configs/diopi_configs.py | 38 +++++++++++++++++++ .../python/conformance/diopi_functions.py | 4 ++ impl/torch/functions/functions.cpp | 17 +++++++++ proto/include/diopi/functions.h | 15 ++++++++ 4 files changed, 74 insertions(+) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index ee01db6d7..9a50cd4b6 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -1345,6 +1345,44 @@ ), ), + 'acosh': dict( + name=['acosh'], + interface=['torch'], + is_inplace=True, + saved_args=dict(output=0), + dtype=[np.float16, np.float32, np.float64], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), + (256, 128, 3, 3), + (2, 31, 512, 6, 40), + (0,), (16, 0), (1, 0, 6)), + }, + ], + ), + ), + + 'acosh_not_float': dict( + name=['acosh'], + interface=['torch'], + dtype=[np.int16, np.int32, np.int64, np.uint8, np.int8, np.bool_], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), + (256, 128, 3, 3), + (2, 31, 512, 6, 40), + (0,), (16, 0), (1, 0, 6)), + }, + ], + ), + ), + 'sign': dict( name=['sign'], interface=['torch'], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index cca369c8d..77a52de00 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -433,6 +433,10 @@ def asinh(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiAsinh", promote_type(input, Dtype.float32)) +def acosh(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiAcosh", promote_type(input, Dtype.float32)) + + def exp(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiExp", promote_type(input, Dtype.float32)) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index d7d199108..dabdf6ed3 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -1043,6 +1043,23 @@ diopiError_t diopiAsinhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) return diopiSuccess; } +diopiError_t diopiAcosh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(acosh_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiAcoshInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(acosh_, atInput); + + return diopiSuccess; +} + diopiError_t diopiSigmoid(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index d5d01029c..b3c7d9fe7 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -992,6 +992,21 @@ DIOPI_API diopiError_t diopiAsinhInp(diopiContextHandle_t ctx, diopiTensorHandle */ DIOPI_API diopiError_t diopiAsinh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); +/** + * @brief The in-place version of diopiAcosh(). + * @param[in] ctx Context environment. + * @param[inout] input the input tensor and will be stroed reuslt tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiAcoshInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Returns a new tensor with the arc hyperbolic cosine of the elements of input. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64]. + * @param[out] out the output tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiAcosh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief The in-place version of diopiSigmoid(). * @param[in] ctx Context environment. From 89168cde5668d60a44210c32f6e57816f5701c0c Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 14 Aug 2024 14:49:41 +0800 Subject: [PATCH 08/30] add atanh on diopi torch impl --- diopi_test/python/configs/diopi_configs.py | 38 +++++++++++++++++++ .../python/conformance/diopi_functions.py | 3 ++ impl/torch/functions/functions.cpp | 17 +++++++++ proto/include/diopi/functions.h | 15 ++++++++ 4 files changed, 73 insertions(+) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 9a50cd4b6..9f1aac0bc 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -1383,6 +1383,44 @@ ), ), + 'atanh': dict( + name=['atanh'], + interface=['torch'], + is_inplace=True, + saved_args=dict(output=0), + dtype=[np.float16, np.float32, np.float64], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), + (256, 128, 3, 3), + (2, 31, 512, 6, 40), + (0,), (16, 0), (1, 0, 6)), + }, + ], + ), + ), + + 'atanh_not_float': dict( + name=['atanh'], + interface=['torch'], + dtype=[np.int16, np.int32, np.int64, np.uint8, np.int8, np.bool_], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), + (256, 128, 3, 3), + (2, 31, 512, 6, 40), + (0,), (16, 0), (1, 0, 6)), + }, + ], + ), + ), + 'sign': dict( name=['sign'], interface=['torch'], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 77a52de00..065f0718c 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -436,6 +436,9 @@ def asinh(input, inplace=False) -> Tensor: def acosh(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiAcosh", promote_type(input, Dtype.float32)) +def atanh(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiAtanh", promote_type(input, Dtype.float32)) + def exp(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiExp", promote_type(input, Dtype.float32)) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index dabdf6ed3..6d15cd29b 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -1060,6 +1060,23 @@ diopiError_t diopiAcoshInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) return diopiSuccess; } +diopiError_t diopiAtanh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(atanh_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiAtanhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(atanh_, atInput); + + return diopiSuccess; +} + diopiError_t diopiSigmoid(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index b3c7d9fe7..a0dd3ebdf 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -1007,6 +1007,21 @@ DIOPI_API diopiError_t diopiAcoshInp(diopiContextHandle_t ctx, diopiTensorHandle */ DIOPI_API diopiError_t diopiAcosh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); +/** + * @brief The in-place version of diopiAtanh(). + * @param[in] ctx Context environment. + * @param[inout] input the input tensor and will be stroed reuslt tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiAtanhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Returns a new tensor with the arc hyperbolic tangent of the elements of input. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64]. + * @param[out] out the output tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiAtanh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief The in-place version of diopiSigmoid(). * @param[in] ctx Context environment. From 56b3150232e575db97b5fd8a138c7187d6af1ac5 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 14 Aug 2024 14:59:55 +0800 Subject: [PATCH 09/30] conbine some hyperbolic trigo fucntion diopi test into hyperbolic_trigo_function --- diopi_test/python/configs/diopi_configs.py | 160 +-------------------- 1 file changed, 4 insertions(+), 156 deletions(-) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 9f1aac0bc..644cd9bf9 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -1192,8 +1192,8 @@ ), ), - 'sinh': dict( - name=['sinh'], + 'hyperbolic_trigo_function_without_backward': dict( + name=['sinh', 'cosh', 'asinh', 'acosh', 'atanh'], interface=['torch'], is_inplace=True, saved_args=dict(output=0), @@ -1212,46 +1212,8 @@ ), ), - 'sinh_not_float': dict( - name=['sinh'], - interface=['torch'], - dtype=[np.int16, np.int32, np.int64, np.uint8, np.int8, np.bool_], - tensor_para=dict( - gen_fn='Genfunc.randn', - args=[ - { - "ins": ['input'], - "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), - (256, 128, 3, 3), - (2, 31, 512, 6, 40), - (0,), (16, 0), (1, 0, 6)), - }, - ], - ), - ), - - 'cosh': dict( - name=['cosh'], - interface=['torch'], - is_inplace=True, - saved_args=dict(output=0), - dtype=[np.float16, np.float32, np.float64], - tensor_para=dict( - gen_fn='Genfunc.randn', - args=[ - { - "ins": ['input'], - "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), - (256, 128, 3, 3), - (2, 31, 512, 6, 40), - (0,), (16, 0), (1, 0, 6)), - }, - ], - ), - ), - - 'cosh_not_float': dict( - name=['cosh'], + 'hyperbolic_trigo_function_without_backward_not_float': dict( + name=['sinh', 'cosh', 'asinh', 'acosh', 'atanh'], interface=['torch'], dtype=[np.int16, np.int32, np.int64, np.uint8, np.int8, np.bool_], tensor_para=dict( @@ -1307,120 +1269,6 @@ ), ), - 'asinh': dict( - name=['asinh'], - interface=['torch'], - is_inplace=True, - saved_args=dict(output=0), - dtype=[np.float16, np.float32, np.float64], - tensor_para=dict( - gen_fn='Genfunc.randn', - args=[ - { - "ins": ['input'], - "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), - (256, 128, 3, 3), - (2, 31, 512, 6, 40), - (0,), (16, 0), (1, 0, 6)), - }, - ], - ), - ), - - 'asinh_not_float': dict( - name=['asinh'], - interface=['torch'], - dtype=[np.int16, np.int32, np.int64, np.uint8, np.int8, np.bool_], - tensor_para=dict( - gen_fn='Genfunc.randn', - args=[ - { - "ins": ['input'], - "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), - (256, 128, 3, 3), - (2, 31, 512, 6, 40), - (0,), (16, 0), (1, 0, 6)), - }, - ], - ), - ), - - 'acosh': dict( - name=['acosh'], - interface=['torch'], - is_inplace=True, - saved_args=dict(output=0), - dtype=[np.float16, np.float32, np.float64], - tensor_para=dict( - gen_fn='Genfunc.randn', - args=[ - { - "ins": ['input'], - "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), - (256, 128, 3, 3), - (2, 31, 512, 6, 40), - (0,), (16, 0), (1, 0, 6)), - }, - ], - ), - ), - - 'acosh_not_float': dict( - name=['acosh'], - interface=['torch'], - dtype=[np.int16, np.int32, np.int64, np.uint8, np.int8, np.bool_], - tensor_para=dict( - gen_fn='Genfunc.randn', - args=[ - { - "ins": ['input'], - "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), - (256, 128, 3, 3), - (2, 31, 512, 6, 40), - (0,), (16, 0), (1, 0, 6)), - }, - ], - ), - ), - - 'atanh': dict( - name=['atanh'], - interface=['torch'], - is_inplace=True, - saved_args=dict(output=0), - dtype=[np.float16, np.float32, np.float64], - tensor_para=dict( - gen_fn='Genfunc.randn', - args=[ - { - "ins": ['input'], - "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), - (256, 128, 3, 3), - (2, 31, 512, 6, 40), - (0,), (16, 0), (1, 0, 6)), - }, - ], - ), - ), - - 'atanh_not_float': dict( - name=['atanh'], - interface=['torch'], - dtype=[np.int16, np.int32, np.int64, np.uint8, np.int8, np.bool_], - tensor_para=dict( - gen_fn='Genfunc.randn', - args=[ - { - "ins": ['input'], - "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), - (256, 128, 3, 3), - (2, 31, 512, 6, 40), - (0,), (16, 0), (1, 0, 6)), - }, - ], - ), - ), - 'sign': dict( name=['sign'], interface=['torch'], From 74b887232e10f2d5797f7dc8bd5beffae2a432fc Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 14 Aug 2024 15:23:02 +0800 Subject: [PATCH 10/30] add argmin on diopi torch impl & del diopi test hyperbolic_trigo_function's save_args param --- diopi_test/python/configs/diopi_configs.py | 41 ++++++++++++++++++- .../python/conformance/diopi_functions.py | 23 +++++++++++ impl/torch/functions/functions.cpp | 10 +++++ proto/include/diopi/functions.h | 10 +++++ 4 files changed, 83 insertions(+), 1 deletion(-) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 644cd9bf9..dff448be0 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -1196,7 +1196,6 @@ name=['sinh', 'cosh', 'asinh', 'acosh', 'atanh'], interface=['torch'], is_inplace=True, - saved_args=dict(output=0), dtype=[np.float16, np.float32, np.float64], tensor_para=dict( gen_fn='Genfunc.randn', @@ -5556,6 +5555,46 @@ ), ), + 'argmin': dict( + name=['argmin'], + interface=["torch"], + para=dict( + dim=[0, -1, 0, 1, None, -2, 2, 1], + keepdim=[True, False, True, False, False, True, True, False], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "shape": ((), (1,), (1024, 80), (2, 256, 256), (2, 1, 64, 64), + (12, 0), (2, 0, 9), (0, 9, 8, 7)), + "dtype": [np.float64, np.float16, np.float32, np.int32, np.int16, + np.int64, np.uint8, np.int8], + "gen_fn": 'Genfunc.randn', + }, + ], + ), + ), + + 'argmin_same_value': dict( + name=['argmin'], + interface=["torch"], + para=dict( + dim=[-1, 0, None, 1], + keepdim=[True, False, True, False], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "shape": ((1,), (1024, 80), (2, 256, 256), (2, 1, 64, 64)), + "dtype": [np.float32], + "gen_fn": 'Genfunc.zeros', + }, + ], + ), + ), + 'adadelta': dict( name=["adadelta"], interface=["CustomizedTest"], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 065f0718c..aef6c4076 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -3563,6 +3563,29 @@ def argmax(input, dim=None, keepdim=False): return out +def argmin(input, dim=None, keepdim=False): + sizeO = list(input.size().data) + if len(sizeO) > 0 and dim is not None: + assert dim < len(sizeO), "dim out of index" + if keepdim: + sizeO[dim] = 1 + else: + sizeO = sizeO[:dim] + sizeO[dim + 1 :] + else: + sizeO = [1] + + out = Tensor(sizeO, from_numpy_dtype(glob_vars.int_type)) + func = check_function("diopiArgmin") + # todo: check the reason of using keepdim + ret = ( + func(input.context(), out, input, keepdim) + if dim is None + else func(input.context(), out, input, dim, keepdim) + ) + check_returncode(ret) + + return out + def smooth_l1_loss(input, target, reduction="mean", beta=1.0): assert ( diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 6d15cd29b..7710fd6d4 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -3200,6 +3200,16 @@ diopiError_t diopiArgmax(diopiContextHandle_t ctx, diopiTensorHandle_t out, diop return diopiSuccess; } +diopiError_t diopiArgmin(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const int64_t* dim, bool keepdim) { + impl::aten::setCurStream(ctx); + auto atOut = impl::aten::buildATen(out); + auto atInput = impl::aten::buildATen(input); + c10::optional atDim = dim ? c10::optional(*dim) : c10::nullopt; + CALL_ATEN_CUDA_FUNC(argmin_out, atOut, atInput, atDim, keepdim); + + return diopiSuccess; +} + diopiError_t diopiSmoothL1Loss(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t target, diopiReduction_t reduction, double beta) { impl::aten::setCurStream(ctx); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index a0dd3ebdf..9a774dfd8 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -2713,6 +2713,16 @@ DIOPI_API diopiError_t diopiCdistBackward(diopiContextHandle_t ctx, diopiTensorH */ DIOPI_API diopiError_t diopiArgmax(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const int64_t* dim, bool keepdim); +/** + * @brief Returns the indices of the minimum values of a tensor across a dimension. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type=[float32, float64, float16, int16, int32, int64, uint8, int8, bool]. + * @param[in] dim the dimension to do the operation over. type=[int32, int64]. + * @param[in] keepdim whether the output tensor has dim retained or not. + * @param[out] out the output tensor. type=[int32, int64]. + */ +DIOPI_API diopiError_t diopiArgmin(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const int64_t* dim, bool keepdim); + /** * @brief The function is used to implement the Adadelta optimizer. Its functionality is to perform a single parameter update. * @param[in] ctx Context environment. From a8c9223eef159551fa8a26a31e62c0cae315da00 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 14 Aug 2024 16:38:46 +0800 Subject: [PATCH 11/30] add argsort on diopi torch impl --- diopi_test/python/configs/diopi_configs.py | 42 +++++++++++++++++++ .../python/conformance/diopi_functions.py | 8 ++++ impl/torch/functions/functions.cpp | 10 +++++ proto/include/diopi/functions.h | 12 ++++++ 4 files changed, 72 insertions(+) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index dff448be0..91dbaf2c7 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -5595,6 +5595,48 @@ ), ), + 'argsort': dict( + name=['argsort'], + interface=["torch"], + para=dict( + dim=[0, -1, 0, 1, -1, 0, 2, 1], + stable=[True, False, True, False, False, True, True, False], + descending=[True, False, True, False, False, True, True, False], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "shape": ((), (1,), (1024, 80), (2, 256, 256), (2, 1, 64, 64), + (12, 0), (2, 0, 9), (0, 9, 8, 7)), + "dtype": [np.float64, np.float16, np.float32, np.int32, np.int16, + np.int64, np.uint8, np.int8], + "gen_fn": 'Genfunc.randn', + }, + ], + ), + ), + + 'argsort_same_value': dict( + name=['argsort'], + interface=["torch"], + para=dict( + dim=[-1, 0, -1, 1], + stable=[True, False, True, False], + descending=[True, False, True, False], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "shape": ((1,), (1024, 80), (2, 256, 256), (2, 1, 64, 64)), + "dtype": [np.float32], + "gen_fn": 'Genfunc.zeros', + }, + ], + ), + ), + 'adadelta': dict( name=["adadelta"], interface=["CustomizedTest"], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index aef6c4076..39988e340 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -3586,6 +3586,14 @@ def argmin(input, dim=None, keepdim=False): return out +def argsort(input, dim=-1, descending=False, stable=False): + out = Tensor(input.size().data, from_numpy_dtype(glob_vars.int_type)) + func = check_function("diopiArgsort") + ret = func(input.context(), out, input, stable, dim, descending) + check_returncode(ret) + + return out + def smooth_l1_loss(input, target, reduction="mean", beta=1.0): assert ( diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 7710fd6d4..b19686cfa 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -3210,6 +3210,16 @@ diopiError_t diopiArgmin(diopiContextHandle_t ctx, diopiTensorHandle_t out, diop return diopiSuccess; } +diopiError_t diopiArgsort(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, bool stable, const int64_t* dim, bool descending) { + impl::aten::setCurStream(ctx); + auto atOut = impl::aten::buildATen(out); + auto atInput = impl::aten::buildATen(input); + atOut = CALL_ATEN_CUDA_FUNC(argsort, atInput, stable, (dim ? *dim : -1), descending); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + diopiError_t diopiSmoothL1Loss(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t target, diopiReduction_t reduction, double beta) { impl::aten::setCurStream(ctx); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index 9a774dfd8..670752bdb 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -2723,6 +2723,18 @@ DIOPI_API diopiError_t diopiArgmax(diopiContextHandle_t ctx, diopiTensorHandle_t */ DIOPI_API diopiError_t diopiArgmin(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const int64_t* dim, bool keepdim); +/** + * @brief Returns the indices that sort a tensor along a given dimension in ascending order by value. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type=[float32, float64, float16, int16, int32, int64, uint8, int8, bool]. + * @param[in] dim the dimension to do the operation over. type=[int32, int64]. + * @param[in] descending controls the sorting order (ascending or descending). + * @param[in] stable controls the relative order of equivalent elements. + * @param[out] out the output tensor. type=[int32, int64]. + */ +DIOPI_API diopiError_t diopiArgsort(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, bool stable, const int64_t* dim, + bool descending); + /** * @brief The function is used to implement the Adadelta optimizer. Its functionality is to perform a single parameter update. * @param[in] ctx Context environment. From 6fd5621975900eb1aa6046741589cd02b9c20c48 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Thu, 15 Aug 2024 15:45:04 +0800 Subject: [PATCH 12/30] add sort_backward on diopi torch impl --- diopi_test/python/configs/diopi_configs.py | 25 +++++++++++++++++++ .../python/conformance/diopi_functions.py | 10 ++++++++ impl/torch/functions/functions.cpp | 13 ++++++++++ proto/include/diopi/functions.h | 14 +++++++++++ 4 files changed, 62 insertions(+) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 91dbaf2c7..fab463aaf 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -4384,6 +4384,31 @@ ), ), + 'sort_backward': dict( + name=["sort"], + interface=['CustomizedTest'], + saved_args=dict(indice=1), + requires_backward = [0], + para=dict( + dim=[-1, 0, 1], + descending=[True, False, False], + stable=[True, True, True], + ), + dtype=[np.float16, np.float32], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((11400, ), + (4, 4, 16, 20), + (4, 4, 16, 2, 20)), + }, + ], + ), + ), + # FIXME topk输入0-d张量,且k为0时,结果精度不一致 'topk_nonzero': dict( name=['topk'], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 39988e340..18ba6b62d 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -1509,6 +1509,16 @@ def sort(input, dim=-1, descending=False, stable=None): return vals, indices +def sort_backward(input, grad_outputs, dim, indice, **kwargs): + grad_outputs = grad_outputs[0] + grad_input = raw_like(grad_outputs) + + func = check_function("diopiSortBackward") + ret = func(input.context(), grad_input, grad_outputs, dim, indice, input.size(), True) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + + def topk(input, k, dim=-1, largest=True, sorted=True): sizeI = input.size().data if len(sizeI) > 0: diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index b19686cfa..34831d542 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -755,6 +755,19 @@ diopiError_t diopiSort(diopiContextHandle_t ctx, diopiTensorHandle_t values, dio return diopiSuccess; } +diopiError_t diopiSortBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, int64_t dim, + diopiConstTensorHandle_t indices, diopiSize_t sizes, bool keepdim = 1) { + impl::aten::setCurStream(ctx); + auto atGrad_input = impl::aten::buildATen(grad_input); + auto atGrad_output = impl::aten::buildATen(grad_output); + auto atIndices = impl::aten::buildATen(indices); + auto atSizes = impl::aten::buildAtIntArray(sizes); + atGrad_input = CALL_ATEN_FUNC(value_selecting_reduction_backward, atGrad_output, dim, atIndices, atSizes, keepdim); + impl::aten::updateATen2Tensor(ctx, atGrad_input, grad_input); + + return diopiSuccess; +} + diopiError_t diopiTopk(diopiContextHandle_t ctx, diopiTensorHandle_t values, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, int64_t k, int64_t dim, bool largest, bool sorted) { impl::aten::setCurStream(ctx); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index 670752bdb..4683e0aa0 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -2486,6 +2486,20 @@ DIOPI_API diopiError_t diopiStack(diopiContextHandle_t ctx, diopiTensorHandle_t DIOPI_API diopiError_t diopiSort(diopiContextHandle_t ctx, diopiTensorHandle_t values, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, int64_t dim, bool descending, const bool* pStable); +/** + * @brief Computes the gradient of the input tensor with respect to the sorted output tensor during backpropagation. + * @param[in] ctx Context environment. + * @param[in] grad_output Tensor containing the gradient of the loss with respect to the sorted output. + * @param[in] dim The dimension along which the sorting was performed. This is used to correctly align the gradients with the corresponding dimension. + * @param[in] indices Tensor containing the indices that were used to sort the input tensor. + * @param[in] sizes The size of the tensor, which is necessary to manage the shape and alignment during gradient computation. + * @param[in] keepdim Boolean flag indicating whether to retain the reduced dimensions or not. If `true`, the dimensions that were reduced are retained with + * size one, which affects how gradients are accumulated. + * @param[out] grad_input Tensor to store the gradient with respect to the input tensor. This tensor will be updated with the computed gradient. + */ +DIOPI_API diopiError_t diopiSortBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, int64_t dim, + diopiConstTensorHandle_t indices, diopiSize_t sizes, bool keepdim); + /** * @brief Returns the k largest elements of the given input tensor along a given dimension. * @param[in] ctx Context environment. From 8e0948a8df82e57fa88883b8096cf3d8bd47d084 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Thu, 22 Aug 2024 15:44:31 +0800 Subject: [PATCH 13/30] refactor diopi_test for sort_backward & add cumsum_backward --- diopi_test/python/configs/diopi_configs.py | 27 +++++++++++++++++-- .../python/conformance/diopi_functions.py | 9 +++++++ impl/torch/functions/functions.cpp | 9 +++++++ proto/include/diopi/functions.h | 9 +++++++ 4 files changed, 52 insertions(+), 2 deletions(-) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index fab463aaf..bf22ad7df 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -4384,11 +4384,11 @@ ), ), - 'sort_backward': dict( + 'sort_stable_for_backward': dict( name=["sort"], interface=['CustomizedTest'], saved_args=dict(indice=1), - requires_backward = [0], + requires_backward=[0], para=dict( dim=[-1, 0, 1], descending=[True, False, False], @@ -5434,6 +5434,29 @@ ), ), + 'cumsum_float_for_backward': dict( + name=["cumsum"], + interface=['torch'], + atol=1e-6, + rtol=1e-5, + requires_backward=[0], + dtype=[np.float32], + para=dict( + dim=[0, -1, 1], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((), (12,), (2, 22, 33)), + "dtype": [np.float32, np.float64, np.float16], + "gen_fn": 'Genfunc.randn', + }, + ], + ), + ), + 'cdist': dict( name=['cdist'], interface=['torch'], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 18ba6b62d..447895ecb 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -3382,6 +3382,15 @@ def cumsum(input, dim, dtype=None): return out +def cumsum_backward(input, grad_outputs, dim, **kwargs): + grad_output = grad_outputs[0] + grad_input = raw_like(input) + func = check_function("diopiCumsumBackward") + ret = func(input.context(), grad_input, grad_output, dim) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + + def infer_size(a, b): dimsA = len(a) dimsB = len(b) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 34831d542..16de5bca5 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -3127,6 +3127,15 @@ diopiError_t diopiCumsum(diopiContextHandle_t ctx, diopiTensorHandle_t out, diop return diopiSuccess; } +diopiError_t diopiCumsumBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, int64_t dim) { + impl::aten::setCurStream(ctx); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atGradInput = atGradOutput; + impl::aten::updateATen2Tensor(ctx, atGradInput.flip(dim).cumsum(dim).flip(dim), grad_input); + + return diopiSuccess; +} + diopiError_t diopiCdist(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input1, diopiConstTensorHandle_t input2, double p, const int64_t* compute_mode) { impl::aten::setCurStream(ctx); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index 4683e0aa0..95f3543a5 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -2694,6 +2694,15 @@ DIOPI_API diopiError_t diopiUnfoldBackward(diopiContextHandle_t ctx, diopiTensor */ DIOPI_API diopiError_t diopiCumsum(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, int64_t dim); +/** + * @brief Computes the backward pass for diopiCumsum() + * @param[in] ctx Context environment. + * @param[in] grad_output the grad tensor of output, with the same shape as the forward pass output. type=[float16, float32, float64]. + * @param[in] dim the dimension to do the operation over. type = [int64]. + * @param[out] grad_input the grad tensor of input, with the same shape as the forward pass input. type=[float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiCumsumBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, int64_t dim); + /** * @brief Computes batched the p-norm distance between each pair of the two collections of row vectors. * @param[in] ctx Context environment. From 0f2194c1751efccfe698569682cd05c87c039057 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Thu, 22 Aug 2024 16:28:19 +0800 Subject: [PATCH 14/30] add complex on diopi torch impl --- diopi_test/python/configs/diopi_configs.py | 23 +++++++++++++++++++ .../python/conformance/diopi_functions.py | 11 +++++++++ impl/torch/functions/functions.cpp | 10 ++++++++ proto/include/diopi/functions.h | 8 +++++++ 4 files changed, 52 insertions(+) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index bf22ad7df..255a96863 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -4409,6 +4409,29 @@ ), ), + 'complex': dict( + name=["complex"], + interface=['torch'], + dtype=[np.float32, np.float64], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['real'], + "shape": ((11400, ), + (4, 4, 16, 20), + (4, 4, 16, 2, 20)), + }, + { + "ins": ['imag'], + "shape": ((11400, ), + (4, 4, 16, 20), + (4, 4, 16, 2, 20)), + }, + ], + ), + ), + # FIXME topk输入0-d张量,且k为0时,结果精度不一致 'topk_nonzero': dict( name=['topk'], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 447895ecb..981e15961 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -1518,6 +1518,17 @@ def sort_backward(input, grad_outputs, dim, indice, **kwargs): check_returncode(ret) return {"input": grad_input} if grad_input.requires_grad else {} +def complex(real, imag): + out_shape = infer_size(real.size().data, imag.size().data) + if real.get_dtype() == Dtype.float64: + out = Tensor(out_shape, Dtype.complex128) + elif real.get_dtype() == Dtype.float32: + out = Tensor(out_shape, Dtype.complex64) + func = check_function("diopiComplex") + ret = func(real.context(), out, real, imag) + + check_returncode(ret) + return out def topk(input, k, dim=-1, largest=True, sorted=True): sizeI = input.size().data diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 16de5bca5..63553324e 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -768,6 +768,16 @@ diopiError_t diopiSortBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gra return diopiSuccess; } +diopiError_t diopiComplex(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t real, diopiConstTensorHandle_t imag) { + auto atReal = impl::aten::buildATen(real); + auto atImag = impl::aten::buildATen(imag); + auto atOut = impl::aten::buildATen(out); + atOut = torch::complex(atReal, atImag); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + diopiError_t diopiTopk(diopiContextHandle_t ctx, diopiTensorHandle_t values, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, int64_t k, int64_t dim, bool largest, bool sorted) { impl::aten::setCurStream(ctx); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index 95f3543a5..c20d0c940 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -3690,6 +3690,14 @@ DIOPI_API diopiError_t diopiTriu(diopiContextHandle_t ctx, diopiTensorHandle_t o */ DIOPI_API diopiError_t diopiTriuInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, int64_t diagonal); +/** + * @brief Create a complex tensor with real part and image part. + * @param[in] ctx Context environment. + * @param[in] real the real part of the tensor. + * @param[in] imag the image part of the tensor. + */ +DIOPI_API diopiError_t diopiComplex(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t real, diopiConstTensorHandle_t imag); + /** * @brief Create a tensor filled with one. * @param[in] ctx Context environment. From 4f4870cd6521bde28a8cf69c46de6679d6ebceaf Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Thu, 22 Aug 2024 16:44:26 +0800 Subject: [PATCH 15/30] add conj on diopi torch impl --- diopi_test/python/configs/diopi_configs.py | 17 +++++++++++++++++ .../python/conformance/diopi_functions.py | 7 +++++++ impl/torch/functions/functions.cpp | 9 +++++++++ proto/include/diopi/functions.h | 8 ++++++++ 4 files changed, 41 insertions(+) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 255a96863..561503803 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -4432,6 +4432,23 @@ ), ), + 'conj': dict( + name=["conj"], + interface=['torch'], + dtype=[np.float32, np.float64, np.complex64, np.complex128], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((11400, ), + (4, 4, 16, 20), + (4, 4, 16, 2, 20)), + }, + ], + ), + ), + # FIXME topk输入0-d张量,且k为0时,结果精度不一致 'topk_nonzero': dict( name=['topk'], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 981e15961..706cb3e7f 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -1526,6 +1526,13 @@ def complex(real, imag): out = Tensor(out_shape, Dtype.complex64) func = check_function("diopiComplex") ret = func(real.context(), out, real, imag) + check_returncode(ret) + return out + +def conj(input): + out = raw_like(input) + func = check_function("diopiConj") + ret = func(input.context(), out, input) check_returncode(ret) return out diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 63553324e..274d4d8b9 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -778,6 +778,15 @@ diopiError_t diopiComplex(diopiContextHandle_t ctx, diopiTensorHandle_t out, dio return diopiSuccess; } +DIOPI_API diopiError_t diopiConj(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + atOut = torch::conj(atInput); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + diopiError_t diopiTopk(diopiContextHandle_t ctx, diopiTensorHandle_t values, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, int64_t k, int64_t dim, bool largest, bool sorted) { impl::aten::setCurStream(ctx); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index c20d0c940..301809465 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -3698,6 +3698,14 @@ DIOPI_API diopiError_t diopiTriuInp(diopiContextHandle_t ctx, diopiTensorHandle_ */ DIOPI_API diopiError_t diopiComplex(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t real, diopiConstTensorHandle_t imag); +/** + * @brief Return the complex conjugate of the input tensor. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. + * @param[in] out the complex conjugate of the input tensor. + */ +DIOPI_API diopiError_t diopiConj(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief Create a tensor filled with one. * @param[in] ctx Context environment. From 0e6bcc1a583b1f8c81202fe3929c33e1c4af7f17 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Thu, 22 Aug 2024 16:53:45 +0800 Subject: [PATCH 16/30] refactor some format & add imag on diopi torch impl --- diopi_test/python/configs/diopi_configs.py | 17 +++++++++++++++++ .../python/conformance/diopi_functions.py | 8 ++++++++ impl/torch/functions/functions.cpp | 11 ++++++++++- proto/include/diopi/functions.h | 8 ++++++++ 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 561503803..1c5e694ed 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -4449,6 +4449,23 @@ ), ), + 'imag': dict( + name=["imag"], + interface=['torch'], + dtype=[np.complex64, np.complex128], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((11400, ), + (4, 4, 16, 20), + (4, 4, 16, 2, 20)), + }, + ], + ), + ), + # FIXME topk输入0-d张量,且k为0时,结果精度不一致 'topk_nonzero': dict( name=['topk'], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 706cb3e7f..dd68d803c 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -1537,6 +1537,14 @@ def conj(input): check_returncode(ret) return out +def imag(input): + out = raw_like(input) + func = check_function("diopiImag") + ret = func(input.context(), out, input) + + check_returncode(ret) + return out + def topk(input, k, dim=-1, largest=True, sorted=True): sizeI = input.size().data if len(sizeI) > 0: diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 274d4d8b9..8475d7b40 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -778,7 +778,7 @@ diopiError_t diopiComplex(diopiContextHandle_t ctx, diopiTensorHandle_t out, dio return diopiSuccess; } -DIOPI_API diopiError_t diopiConj(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { +diopiError_t diopiConj(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { auto atInput = impl::aten::buildATen(input); auto atOut = impl::aten::buildATen(out); atOut = torch::conj(atInput); @@ -787,6 +787,15 @@ DIOPI_API diopiError_t diopiConj(diopiContextHandle_t ctx, diopiTensorHandle_t o return diopiSuccess; } +diopiError_t diopiImag(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + atOut = torch::imag(atInput); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + diopiError_t diopiTopk(diopiContextHandle_t ctx, diopiTensorHandle_t values, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, int64_t k, int64_t dim, bool largest, bool sorted) { impl::aten::setCurStream(ctx); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index 301809465..e74498f16 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -3706,6 +3706,14 @@ DIOPI_API diopiError_t diopiComplex(diopiContextHandle_t ctx, diopiTensorHandle_ */ DIOPI_API diopiError_t diopiConj(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); +/** + * @brief Return the image part of the input tensor. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. + * @param[in] out the image part of the input tensor. + */ +DIOPI_API diopiError_t diopiImag(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief Create a tensor filled with one. * @param[in] ctx Context environment. From 5eb369ef2287007a0be6e9c6928037e78c2b58a4 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Thu, 22 Aug 2024 16:58:50 +0800 Subject: [PATCH 17/30] add real on diopi torch impl --- diopi_test/python/configs/diopi_configs.py | 4 ++-- diopi_test/python/conformance/diopi_functions.py | 8 ++++++++ impl/torch/functions/functions.cpp | 9 +++++++++ proto/include/diopi/functions.h | 8 ++++++++ 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 1c5e694ed..73525e1cc 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -4449,8 +4449,8 @@ ), ), - 'imag': dict( - name=["imag"], + 'real_imag': dict( + name=["real", "imag"], interface=['torch'], dtype=[np.complex64, np.complex128], tensor_para=dict( diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index dd68d803c..ae7283d90 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -1545,6 +1545,14 @@ def imag(input): check_returncode(ret) return out +def real(input): + out = raw_like(input) + func = check_function("diopiReal") + ret = func(input.context(), out, input) + + check_returncode(ret) + return out + def topk(input, k, dim=-1, largest=True, sorted=True): sizeI = input.size().data if len(sizeI) > 0: diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 8475d7b40..4e25c362f 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -796,6 +796,15 @@ diopiError_t diopiImag(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiC return diopiSuccess; } +diopiError_t diopiReal(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + atOut = torch::real(atInput); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + diopiError_t diopiTopk(diopiContextHandle_t ctx, diopiTensorHandle_t values, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, int64_t k, int64_t dim, bool largest, bool sorted) { impl::aten::setCurStream(ctx); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index e74498f16..2e2a43d42 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -3714,6 +3714,14 @@ DIOPI_API diopiError_t diopiConj(diopiContextHandle_t ctx, diopiTensorHandle_t o */ DIOPI_API diopiError_t diopiImag(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); +/** + * @brief Return the real part of the input tensor. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. + * @param[in] out the real part of the input tensor. + */ +DIOPI_API diopiError_t diopiReal(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief Create a tensor filled with one. * @param[in] ctx Context environment. From 30fadeeae0ba18cceb467c08abcd22c798e8c5e2 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Fri, 23 Aug 2024 12:40:05 +0800 Subject: [PATCH 18/30] fix setCurStream & add grid_sample & prepare for diopiPool2d --- diopi_test/python/configs/diopi_configs.py | 25 +++++++++++ .../python/conformance/diopi_functions.py | 10 +++++ impl/torch/functions/functions.cpp | 43 +++++++++++++++++++ proto/include/diopi/functions.h | 17 ++++++++ 4 files changed, 95 insertions(+) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 73525e1cc..b8c66bd58 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -8202,6 +8202,31 @@ ), ), + 'grid_sample': dict( + name=["grid_sample"], + interface=['torch.nn.functional'], + para=dict( + mode=["bilinear", "nearest", "bilinear", "nearest"], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "shape": ((2, 3, 15, 15), (3, 3, 20, 20, 20), (2, 3, 25, 25), (3, 3, 30, 30, 30)), + "dtype": [np.float16, np.float32, np.float64], + "gen_fn": 'Genfunc.randn', + }, + { + "ins": ['grid'], + "shape": ((2, 5, 5, 2), (3, 10, 10, 10, 3), (2, 20, 20, 2), (3, 60, 60, 60, 3)), + "dtype": [np.float16, np.float32, np.float64], + "gen_fn": 'Genfunc.randn', + "gen_num_range": [1, 19], + }, + ], + ), + ), + 'multinomial': dict( name=["multinomial"], interface=['torch'], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index ae7283d90..9249b0fc4 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -5097,6 +5097,16 @@ def meshgrid(tensors, shape=None): check_returncode(ret) return out +def grid_sample(input, grid, mode="bilinear"): + if len(input.size().data) == 4: + out = Tensor(size=(input.size().data[0], input.size().data[1], grid.size().data[1], grid.size().data[2],), dtype=input.dtype()) + else: + out = Tensor(size=(input.size().data[0], input.size().data[1], grid.size().data[1], grid.size().data[2], grid.size().data[3],), dtype=input.dtype()) + func = check_function("diopiGridSample") + ret = func(input.context(), out, input, grid, mode) + check_returncode(ret) + return out + def cast_dtype(input, out) -> Tensor: call = "diopiCastDtype" diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 4e25c362f..59eb11a61 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -133,6 +133,29 @@ diopiError_t diopiMaxPool2dWithIndices(diopiContextHandle_t ctx, diopiTensorHand return diopiSuccess; } +// TODO +diopiError_t diopiPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t kernel_size, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode, bool exclusive, bool adaptive) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + at::IntArrayRef atDilation = impl::aten::buildAtIntArray(dilation); + bool atCeilMode = ceil_mode; + at::Tensor atOut = {}; + if (strcmp(mode, "max") == 0 && adaptive) { + } + if (strcmp(mode, "max") == 0 && !adaptive) { + } + if (strcmp(mode, "avg") == 0 && adaptive) { + } + if (strcmp(mode, "avg") == 0 && !adaptive) { + } + + return diopiSuccess; +} + /** * @brief * @param rounding_mode supported in pytorch>=1.8 @@ -769,6 +792,7 @@ diopiError_t diopiSortBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gra } diopiError_t diopiComplex(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t real, diopiConstTensorHandle_t imag) { + impl::aten::setCurStream(ctx); auto atReal = impl::aten::buildATen(real); auto atImag = impl::aten::buildATen(imag); auto atOut = impl::aten::buildATen(out); @@ -779,6 +803,7 @@ diopiError_t diopiComplex(diopiContextHandle_t ctx, diopiTensorHandle_t out, dio } diopiError_t diopiConj(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); auto atOut = impl::aten::buildATen(out); atOut = torch::conj(atInput); @@ -788,6 +813,7 @@ diopiError_t diopiConj(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiC } diopiError_t diopiImag(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); auto atOut = impl::aten::buildATen(out); atOut = torch::imag(atInput); @@ -797,6 +823,7 @@ diopiError_t diopiImag(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiC } diopiError_t diopiReal(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); auto atOut = impl::aten::buildATen(out); atOut = torch::real(atInput); @@ -3018,6 +3045,22 @@ diopiError_t diopiMeshGrid(diopiContextHandle_t ctx, diopiTensorHandle_t* outs, return diopiSuccess; } +diopiError_t diopiGridSample(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t grid, + const char* mode) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atGrid = impl::aten::buildATen(grid); + auto atOut = impl::aten::buildATen(out); + int interpolation_mode = 0; + if (strcmp(mode, "bilinear") != 0) { + interpolation_mode = 1; + } + atOut = CALL_ATEN_FUNC(grid_sampler, atInput, atGrid, interpolation_mode, 0, 0); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + diopiError_t diopiAdamW(diopiContextHandle_t ctx, diopiTensorHandle_t param, diopiConstTensorHandle_t grad, diopiTensorHandle_t exp_avg, diopiTensorHandle_t exp_avg_sq, diopiTensorHandle_t max_exp_avg_sq, float lr, float beta1, float beta2, float eps, float weight_decay, int64_t step, bool amsgrad) { diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index 2e2a43d42..309f9f51f 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -388,6 +388,12 @@ DIOPI_API diopiError_t diopiMaxPool2dBackward(diopiContextHandle_t ctx, diopiTen diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode, diopiConstTensorHandle_t indices); +/** +TODO + */ +DIOPI_API diopiError_t diopiPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t kernel_size, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode, bool exclusive, bool adaptive); + /** * @brief Applies a 2D adaptive average pooling over an input signal composed of several input planes. * @param[in] ctx Context environment. @@ -3503,6 +3509,17 @@ DIOPI_API diopiError_t diopiNormalInp(diopiContextHandle_t ctx, diopiTensorHandl */ DIOPI_API diopiError_t diopiMeshGrid(diopiContextHandle_t ctx, diopiTensorHandle_t* outs, diopiConstTensorHandle_t* inputs, int64_t inputsNum); +/** + * @brief Compute grid sample. + * @param[in] ctx Context environment. + * @param[in] input the original tensor to be sampled. + * @param[in] grid the pixel locations of sampling. + * @param[in] mode the sampling mode. [bilinear, nearest]. + * @param[out] out the result sampling tensor. + */ +DIOPI_API diopiError_t diopiGridSample(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t grid, + const char* mode); + /** * @brief Returns a tensor where each row contains num_samples indices sampled from the * multinomial probability distribution located in the corresponding row of tensor input. From dbebe510b43f3073fa03978dfc6161ebb8619af5 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Mon, 14 Oct 2024 13:55:08 +0800 Subject: [PATCH 19/30] add norm_backward, normalize, normalize_backward, layer_normGB, layer_normGB_backward --- diopi_test/python/configs/diopi_configs.py | 61 +++++ .../python/conformance/diopi_functions.py | 158 +++++++++++ impl/torch/functions/functions.cpp | 258 ++++++++++++++++++ proto/include/diopi/functions.h | 20 ++ 4 files changed, 497 insertions(+) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index b8c66bd58..fe5c61816 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -6561,6 +6561,7 @@ (0,), (0, 12), (13, 0, 4)), "dtype": [np.float32, np.float64, np.float16], "gen_fn": 'Genfunc.randn', + "requires_grad": [True], }, ], ), @@ -7649,6 +7650,44 @@ ) ), + 'layer_normGB': dict( + name=["layer_normGB"], + dtype=[np.float32, np.float64, np.float16], + atol=1e-5, + atol_half=1e-1, + rtol_half=1e-2, + para=dict( + eps=[1e-5, 1e-5, 1e-12, 0, -1e-5, 2], + normalized_shape=[(5, 3, 5), (128, ), (64, ), (32,), + (3, 5), (2, 16, 128)], + ), + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ["input"], + "requires_grad": [True], + "shape": ((2, 5, 3, 5), (2, 3136, 128), (2, 64), (32,), + (2, 5, 3, 5), (2, 16, 128)), + }, + { + "ins": ["weight"], + "requires_grad": [True], + "shape": (None, (128,), (64,), (32,), + (3, 5), (2, 16, 128)), + }, + { + "ins": ["bias"], + "requires_grad": [True], + "shape": (None, (128,), (64,), (32,), + (3, 5), (2, 16, 128)), + }, + ] + ) + ), + + + 'layer_norm_empty_tensor': dict( name=["layer_norm"], dtype=[np.float32, np.float64, np.float16], @@ -7676,6 +7715,28 @@ ) ), + 'normalize': dict( + name=["normalize"], + interface=['torch.nn.functional'], + dtype=[np.float32, np.float64, np.float16], + atol=1e-5, + para=dict( + eps=[1e-2, 1e-8, -3], + p=[1, 2, 3], + dim=[1, 1, 1], + ), + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ["input"], + "shape": ((3, 3), (3, 12), (6, 3, 9)), + "requires_grad": [True], + }, + ] + ) + ), + 'copy': dict( name=["copy_"], interface=['torch.Tensor'], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 9249b0fc4..4037d2b7b 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -3982,8 +3982,34 @@ def norm(input, p, dim=None, keepdim=False, dtype=None): func = check_function("diopiNorm") ret = func(input.context(), out, input, p, dim) check_returncode(ret) + + GLOBAL_STATE["norm"] = out return out +def norm_backward(grad_outputs, input, p, dim, keepdim=False, dtype=None): + if p == 0: + return {'input': None} + else: + grad_input = raw_like(input) + + p = Scalar(p) + + dim, _ = reduce_op_process(input, dim, keepdim, dtype) + + dim = Sizes(list(dim)) + + grad_output = grad_outputs[0] + + out = {"input": grad_input} + + func = check_function("diopiNormBackward") + + norm = GLOBAL_STATE.pop("norm") + ret = func(input.context(), grad_input, grad_output, input, norm, dim, p) + check_returncode(ret) + + return {k: v for k, v in out.items() if v.requires_grad} + def group_norm(input, num_groups, weight=None, bias=None, eps=1e-05): dim = list(input.size().data) @@ -4076,6 +4102,35 @@ def layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-05): GLOBAL_STATE["layer_norm_save_invstd"] = save_invstd return out +def layer_normGB(input, normalized_shape, weight=None, bias=None, eps=1e-05): + sizeI = input.size().data + dims = len(sizeI) - len(normalized_shape) + size = [i for i in sizeI[0:dims]] + save_mean = Tensor(size, input.get_dtype()) + save_invstd = raw_like(save_mean) + + weight = None if weight is None else weight + bias = None if bias is None else bias + + out = raw_like(input) + func = check_function("diopiLayerNormGB") + ret = func( + input.context(), + out, + save_mean, + save_invstd, + input, + weight, + bias, + eps, + dims + ) + check_returncode(ret) + GLOBAL_STATE["layer_norm_save_mean"] = save_mean + GLOBAL_STATE["layer_norm_save_invstd"] = save_invstd + return out + + def layer_norm_backward( input, @@ -4127,6 +4182,98 @@ def layer_norm_backward( check_returncode(ret) return {k: v for k, v in out.items() if v.requires_grad} +def layer_normGB_backward( + input, + grad_outputs, + normalized_shape, + weight=None, + bias=None, + eps=1e-05, + **kwargs, +) -> Tensor: + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + save_mean = GLOBAL_STATE.pop("layer_norm_save_mean") + save_invstd = GLOBAL_STATE.pop("layer_norm_save_invstd") + grad_input = raw_like(input) + out = {"input": grad_input} + + sizeI = input.size().data + dim = len(sizeI) - len(normalized_shape) + + if weight is None: + weight = None + grad_weight_capsule = None + else: + grad_weight = raw_like(weight) + weight = weight + grad_weight_capsule = grad_weight + out["weight"] = grad_weight + + if bias is None: + bias = None + grad_bias_capsule = None + else: + grad_bias = raw_like(bias) + bias = bias + grad_bias_capsule = grad_bias + out["bias"] = grad_bias + + func = check_function("diopiLayerNormGBBackward") + ret = func( + input.context(), + grad_input, + grad_weight_capsule, + grad_bias_capsule, + grad_outputs[0], + input, + weight, + bias, + save_mean, + save_invstd, + dim + ) + check_returncode(ret) + return {k: v for k, v in out.items() if v.requires_grad} + +def normalize(input, p, dim, eps): + output = raw_like(input) + + func = check_function("diopiNormalize") + + ret = func( + input.context(), + output, + input, + p, + dim, + eps + ) + + check_returncode(ret) + + return output + +def normalize_backward(grad_outputs, input, p, dim, eps): + + grad_output = grad_outputs[0] + + func = check_function("diopiNormalizeBackward") + + grad_input = raw_like(input) + + out = {'input': grad_input} + + ret = func( + input.context(), + grad_input, + grad_output, + input, + p, + dim, + eps + + ) + return {k: v for k, v in out.items() if v.requires_grad} def adaptive_avg_pool3d(input, output_size): sizeI = input.size().data @@ -6331,3 +6478,14 @@ def spmm(input, mat2) -> Tensor: ret = func(input.context(), out, input, mat2) check_returncode(ret) return out + + +def layer_norm(input, axis, weight, bias, eps): + out = raw_like(input) + running_mean = raw_like(input) + running_var = raw_like(input) + func = check_function("diopiLayerNorm") + ret = func(input.context(), out, running_mean, running_var, input, axis, weight, bias, eps) + check_returncode(ret) + return out, running_mean, running_var + diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 59eb11a61..a15e8c895 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -10,6 +10,7 @@ #include #include + // clang-format off // NOTE: this header does not include all its dependencies, so we need to keep the order of the includes #include @@ -3602,6 +3603,125 @@ diopiError_t diopiNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiC return diopiSuccess; } +at::Tensor unsqueeze_multiple( + const at::Tensor& t, + at::OptionalIntArrayRef opt_dim, + size_t n_dims) { + if (opt_dim.has_value()) { + at::IntArrayRef dim = opt_dim.value(); + auto dim_size = dim.size(); + // Optimisation for two common cases + if (dim_size == 0) { + return t; + } else if (dim_size == 1) { + return t.unsqueeze(dim[0]); + } + } + auto dims_to_unsqueeze = at::dim_list_to_bitset(opt_dim, n_dims); + at::Tensor res = t; + for (const auto i : c10::irange(n_dims)) { + if (dims_to_unsqueeze[i]) { + res = res.unsqueeze(i); + } + } + return res; +} + +at::Tensor norm_backward( + at::Tensor grad, + const at::Tensor& self, + const std::optional& p_, + at::Tensor norm, + at::IntArrayRef dim, + bool keepdim) { + // NB: We mask fill the NaNs in the output to be zero but still do float + // division + // by zero, which ASAN complains about. One way to appease ASAN is to fill + // the problematic values with something arbitrary before the division, + // but we decide not to due to the perf hit. Instead we just silence ASAN + // where necessary + size_t ndim = self.dim(); + double p = p_.value_or(2.0).toDouble(); + at::Tensor self_scaled; + at::Tensor scale_v; + + if (!keepdim && self.dim() != 0) { + grad = unsqueeze_multiple(grad, dim, ndim); + norm = unsqueeze_multiple(norm, dim, ndim); + } + + if (p == 0.0) { + return {}; + } else if (p == 1.0) { + return self.sgn() * grad; + } else if (p == 2.0) { + return grad * (self / norm).masked_fill_(norm == 0, 0); + } else if (std::isinf(p)) { + // Derivative of amax(abs(self), dim, keepdim) but respecting nans + // We create a mask of `argmax`: it's argmax if self.abs() == norm or it's + // NaN + auto self_abs = self.abs(); + auto mask = self_abs.eq(norm).logical_or(self_abs.isnan()); + return self.sgn() * ((grad / mask.sum(dim, true)) * mask); + } else if (p < 1.0) { + self_scaled = + self.sgn() * self.abs().pow_(p - 1).masked_fill_(self == 0, 0); + return self_scaled * grad * norm.pow(1 - p); + } else if (p < 2.0) { + self_scaled = self.sgn() * self.abs().pow_(p - 1); + scale_v = grad / norm.pow(p - 1); + scale_v.masked_fill_(norm == 0, 0); + return self_scaled * scale_v; + } else { + self_scaled = self * self.abs().pow_(p - 2); + scale_v = grad / norm.pow(p - 1); + scale_v.masked_fill_(norm == 0, 0); + return self_scaled * scale_v; + } +} + +diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t self, diopiConstTensorHandle_t norm, diopiSize_t dim, const diopiScalar_t* p) { + impl::aten::setCurStream(ctx); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atSelf = impl::aten::buildATen(self); + auto atP = impl::aten::buildAtScalar(p); + auto atNorm = impl::aten::buildATen(norm); + + at::IntArrayRef atDim = impl::aten::buildAtIntArray(dim); + bool keepdim = false; + + if (atSelf.dim() == atNorm.dim()) { + keepdim = true; + } + + auto atGradInput = norm_backward(atGradOutput, atSelf, atP, atNorm, atDim, keepdim); + + if (!atGradInput.defined()) { + return diopiSuccess; + } + + impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); + return diopiSuccess; +} + + + +/* +diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t grad_input, diopiConstTensorHandle_t result, const diopiScalar_t* p, diopiSize_t dim) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atGradInput = impl::aten::buildATen(grad_input); + auto atP = impl::aten::buildAtScalar(p); + auto atResult = impl::aten::buildATen(result); + at::IntArrayRef atDim = impl::aten::buildAtIntArray(dim); + + bool keepdim = true; + auto atGradOutput = torch::autograd::generated::details::norm_backward(atGradInput, atInput, atP, atResult, atDim, keepdim); + + return diopiSuccess; +} +*/ + diopiError_t diopiForeachnormScalar(diopiContextHandle_t ctx, diopiTensorHandle_t* outs, diopiConstTensorHandle_t* inputs, int64_t inputSize, const diopiScalar_t* ord) { DIOPI_CHECK_PTR(outs); @@ -3719,6 +3839,34 @@ diopiError_t diopiLayerNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, d return diopiSuccess; } +diopiError_t diopiLayerNormGB(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t running_mean, diopiTensorHandle_t running_var, diopiConstTensorHandle_t input, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps, const int64_t begin_norm_axis) { + + impl::aten::setCurStream(ctx); + + auto atOut = impl::aten::buildATen(out); + auto atMean = impl::aten::buildATen(running_mean); + auto atVar = impl::aten::buildATen(running_var); + + auto atInput = impl::aten::buildATen(input); + + DIOPI_IMPL_BUILD_ATEN_OPTIONAL(atBias, bias); + DIOPI_IMPL_BUILD_ATEN_OPTIONAL(atScale, scale); + + + at::IntArrayRef atNormalizedShape(atInput.sizes().begin() + begin_norm_axis, atInput.sizes().end()); + + + diopi_tensor_list vecOut = {out, running_mean, running_var}; + + + auto Out = CALL_ATEN_CUDA_FUNC(native_layer_norm, atInput, atNormalizedShape, atScale, atBias, eps); + + impl::aten::updateATen2Tensor(ctx, Out, vecOut); + + return diopiSuccess; +} + + diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiConstTensorHandle_t mean, diopiConstTensorHandle_t rstd, diopiSize_t normalized_shape) { @@ -3767,6 +3915,116 @@ diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_ return diopiSuccess; } +diopiError_t diopiLayerNormGBBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiConstTensorHandle_t running_mean, diopiConstTensorHandle_t running_std, const int64_t begin_norm_axis) { + impl::aten::setCurStream(ctx); + diopiDtype_t mDtype, rDtype; + if (running_std) { + diopiGetTensorDtype(running_std, &rDtype); + } + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + + at::IntArrayRef atNormalizedShape(atInput.sizes().begin() + begin_norm_axis, atInput.sizes().end()); + + DIOPI_IMPL_BUILD_ATEN_OPTIONAL(atWeight, weight); + DIOPI_IMPL_BUILD_ATEN_OPTIONAL(atBias, bias); + auto grad_input_mask = std::array{true, atWeight.has_value(), atBias.has_value()}; + + auto atSaveMean = impl::aten::buildATen(running_mean); + diopiGetTensorDtype(running_mean, &mDtype); + if (diopiDtype_t::diopi_dtype_float16 == mDtype) { + atSaveMean = at::native::to(atSaveMean, impl::aten::getATenType(diopiDtype_t::diopi_dtype_float32).toScalarType(), false, true, c10::nullopt); + } + auto atSaveVar = impl::aten::buildATen(running_std); + diopiGetTensorDtype(running_std, &rDtype); + if (diopiDtype_t::diopi_dtype_float16 == rDtype) { + atSaveVar = at::native::to(atSaveVar, impl::aten::getATenType(diopiDtype_t::diopi_dtype_float32).toScalarType(), false, true, c10::nullopt); + } + + if (grad_input && grad_weight && grad_bias) { + auto atGradInput = impl::aten::buildATen(grad_input); + auto atGradWeight = impl::aten::buildATen(grad_weight); + auto atGradBias = impl::aten::buildATen(grad_bias); + at::native_layer_norm_backward_out( + atGradInput, atGradWeight, atGradBias, atGradOutput, atInput, atNormalizedShape, atSaveMean, atSaveVar, atWeight, atBias, grad_input_mask); + } else { + auto atOut = at::native_layer_norm_backward(atGradOutput, atInput, atNormalizedShape, atSaveMean, atSaveVar, atWeight, atBias, grad_input_mask); + if (grad_input) { + impl::aten::updateATen2Tensor(ctx, std::get<0>(atOut), grad_input); + } + if (grad_weight) { + impl::aten::updateATen2Tensor(ctx, std::get<1>(atOut), grad_weight); + } + if (grad_bias) { + impl::aten::updateATen2Tensor(ctx, std::get<2>(atOut), grad_bias); + } + } + + return diopiSuccess; +} + +diopiError_t diopiInstanceNorm(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const int64_t axis, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps) { + impl::aten::setCurStream(ctx); + diopiSize_t input_size; + diopiGetTensorShape(input, &input_size); + + std::vector array(input_size.data, input_size.data + input_size.len); + + int64_t batch_channel = 1; + for (int i = 0; i < axis; i++) { + batch_channel *= array[i]; + } + + std::vector array2 = {1, batch_channel}; + + for (int i = axis; i < array.size(); i++) { + array2.push_back(array[i]); + } + + diopiSize_t reshaped_size; + reshaped_size.data = array2.data(); + reshaped_size.len = static_cast(array2.size()); + + // input->reset_shape(reshaped_size); + + // diopiBatchNorm(ctx, output, nullptr, nullptr, input, scale, bias, nullptr, nullptr, true, 0.0, eps); + + return diopiSuccess; +} + +diopiError_t diopiNormalize(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps) { + impl::aten::setCurStream(ctx); + + auto atInput = impl::aten::buildATen(input); + + auto atOut = impl::aten::buildATen(output); + + auto atDenom = atInput.norm(p, axis, true).clamp_min(eps).expand_as(atInput); + + CALL_ATEN_FUNC(div_out, atOut, atInput, atDenom); + + return diopiSuccess; +} + +diopiError_t diopiNormalizeBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps) { + impl::aten::setCurStream(ctx); + + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + auto atNorm = atInput.norm(p, axis, true); + auto atClamp = atNorm.clamp_min(eps); + auto atDenom = atClamp.expand_as(atInput); + auto atGradDenom = atInput * (-1 / atDenom / atDenom) * atGradOutput; + auto atGradClamp = atGradDenom.sum(axis, true); + auto atGradNorm = atGradClamp.masked_fill_(atNorm < eps, 0); + auto atGradOriginInput = norm_backward(atGradNorm, atInput, p, atNorm, axis, true); + auto atGradInput = (1 / atDenom) * atGradOutput + atGradOriginInput; + + impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); + return diopiSuccess; +} + + diopiError_t diopiAdaptiveAvgPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index 309f9f51f..b6020d4a1 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -3139,6 +3139,16 @@ DIOPI_API diopiError_t diopiFlip(diopiContextHandle_t ctx, diopiTensorHandle_t o */ DIOPI_API diopiError_t diopiNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* p, diopiSize_t dim); +DIOPI_API diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t self, diopiConstTensorHandle_t norm, diopiSize_t dim, const diopiScalar_t* p); +/** + * + * + * + * + * + */ +DIOPI_API diopiError_t diopiLayerNormGB(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t running_mean, diopiTensorHandle_t running_var, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, const double eps, const int64_t begin_norm_axis); + /** * @brief Returns the matrix norm or vector norm of a given tensor list. * @param[in] ctx Context environment. @@ -3567,6 +3577,16 @@ DIOPI_API diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTen diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiConstTensorHandle_t mean, diopiConstTensorHandle_t rstd, diopiSize_t normalized_shape); +DIOPI_API diopiError_t diopiLayerNormGBBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiConstTensorHandle_t running_mean, diopiConstTensorHandle_t running_std, const int64_t begin_norm_axis); + + +DIOPI_API diopiError_t diopiInstanceNorm(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const int64_t axis, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps); + +DIOPI_API diopiError_t diopiNormalize(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps); + +DIOPI_API diopiError_t diopiNormalizeBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps); + + /** * @brief Copies the elements from src into dest tensor. * @param[in] ctx Context environment. From e66858d912579cde240eb5c03aa36254494b3f4d Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Mon, 14 Oct 2024 20:32:46 +0800 Subject: [PATCH 20/30] prepare for pool1d, pool2d, pool3d --- impl/torch/functions/functions.cpp | 270 +++++++++++++++++++++++++++++ 1 file changed, 270 insertions(+) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index a15e8c895..2b68dd804 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -103,6 +103,189 @@ diopiError_t diopiLeakyReluInp(diopiContextHandle_t ctx, diopiTensorHandle_t inp return diopiSuccess; } +diopiError_t diopiMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, + diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + at::IntArrayRef atDilation = impl::aten::buildAtIntArray(dilation); + bool atCeilMode = ceil_mode; + auto atOut = CALL_ATEN_FUNC(max_pool1d, atInput, atKernelSize, atStride, atPadding, atDilation, atCeilMode); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + +diopiError_t diopiMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, + bool ceil_mode, diopiConstTensorHandle_t indices) { + impl::aten::setCurStream(ctx); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + at::IntArrayRef atDilation = impl::aten::buildAtIntArray(dilation); + auto atIndices = impl::aten::buildATen(indices); + + auto atGrad2d = CALL_ATEN_FUNC(max_pool2d_with_indices_backward, atGradOutput.unsqueeze(-2), atInput.unsqueeze(-2), {1, atKernelSize[0]}, {1, atStride[0]}, {0, atPadding[0]}, {1, atDilation[0]}, ceil_mode, atIndices.unsqueeze(-2)); + + auto atGradInput = atGrad2d.squeeze(-2); + + impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); + + // CALL_ATEN_FUNC( + // max_pool1d_with_indices_backward_out, atGradInput, atGradOutput, atInput, atKernelSize, atStride, atPadding, atDilation, ceil_mode, atIndices); + + return diopiSuccess; +} + +diopiError_t diopiMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, + diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + at::IntArrayRef atDilation = impl::aten::buildAtIntArray(dilation); + auto atOut = impl::aten::buildATen(out); + auto atIndices = impl::aten::buildATen(indices); + bool atCeilMode = ceil_mode; + std::tuple atRes = CALL_ATEN_FUNC(max_pool1d_with_indices, atInput, atKernelSize, atStride, atPadding, atDilation, atCeilMode); + + impl::aten::updateATen2Tensor(ctx, std::get<0>(atRes), out); + impl::aten::updateATen2Tensor(ctx, std::get<1>(atRes), indices); + + return diopiSuccess; +} + +diopiError_t diopiAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, + diopiSize_t padding, bool ceil_mode, bool count_include_pad) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + auto atOut = CALL_ATEN_FUNC(avg_pool1d, atInput, atKernelSize, atStride, atPadding, ceil_mode, count_include_pad); + + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + +diopiError_t diopiAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, + bool count_include_pad) { + impl::aten::setCurStream(ctx); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + + auto atGrad2d = CALL_ATEN_FUNC(avg_pool2d_backward, atGradOutput.unsqueeze(-2), atInput.unsqueeze(-2), {1, atKernelSize[0]}, {1, atStride[0]}, {0, atPadding[0]}, ceil_mode, count_include_pad, c10::nullopt); + + auto atGradInput = atGrad2d.squeeze(-2); + + impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); + + return diopiSuccess; +} + +diopiError_t diopiAdaptiveMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOutSize = impl::aten::buildAtIntArray(output_size); + auto atOuts = at::adaptive_max_pool1d(atInput, atOutSize); + impl::aten::updateATen2Tensor(ctx, std::get<0>(atOuts), out); + + return diopiSuccess; +} + +diopiError_t diopiAdaptiveMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, + diopiSize_t output_size) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOutSize = impl::aten::buildAtIntArray(output_size); + auto [atOut, atIndices] = CALL_ATEN_FUNC(adaptive_max_pool1d, atInput, atOutSize); + + impl::aten::updateATen2Tensor(ctx, atOut, out); + impl::aten::updateATen2Tensor(ctx, atIndices, indices); + + return diopiSuccess; +} + +diopiError_t diopiAdaptiveMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiConstTensorHandle_t indices) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atIndices = impl::aten::buildATen(indices); + auto atGrad2d = CALL_ATEN_FUNC(adaptive_max_pool2d_backward, atGradOutput.unsqueeze(-2), atInput.unsqueeze(-2), atIndices.squeeze(-2)); + auto atGradInput = atGrad2d.squeeze(-2); + + impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); + + return diopiSuccess; +} + +diopiError_t diopiAdaptiveAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + auto atGrad2d = CALL_ATEN_FUNC(_adaptive_avg_pool2d_backward, atGradOutput.unsqueeze(-2), atInput.unsqueeze(-2)); + auto atGradInput = atGrad2d.squeeze(-2); + + impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); + + return diopiSuccess; +} + +diopiError_t diopiAdaptiveAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOutSize = impl::aten::buildAtIntArray(output_size); + auto atOut = CALL_ATEN_FUNC(adaptive_avg_pool1d, atInput, atOutSize); + + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + +diopiError_t diopiPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size) { + impl::aten::setCurStream(ctx); + + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool1d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool1d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool1d(ctx, out, input, output_size); + } else { + return impl::cuda::diopiAdaptiveAvgPool1d(ctx, out, input, output_size); + } + +} + +diopiError_t diopiPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) { + impl::aten::setCurStream(ctx); + + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool1dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool1dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool1dBackward(ctx, grad_input, grad_output, input, indices); + } else { + return impl::cuda::diopiAdaptiveAvgPool1dBackward(ctx, grad_input, grad_output, input); + } + +} + + diopiError_t diopiMaxPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) { impl::aten::setCurStream(ctx); @@ -118,6 +301,7 @@ diopiError_t diopiMaxPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, d return diopiSuccess; } + diopiError_t diopiMaxPool2dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) { impl::aten::setCurStream(ctx); @@ -2114,6 +2298,36 @@ diopiError_t diopiAvgPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, d return diopiSuccess; } +diopiError_t diopiPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size) { + impl::aten::setCurStream(ctx); + + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool2d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool2d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool2d(ctx, out, input, output_size); + } else { + return impl::cuda::diopiAdaptiveAvgPool2d(ctx, out, input, output_size); + } + +} + +diopiError_t diopiPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) { + impl::aten::setCurStream(ctx); + + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool2dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool2dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool2dBackward(ctx, grad_input, grad_output, input, indices); + } else { + return impl::cuda::diopiAdaptiveAvgPool2dBackward(ctx, grad_input, grad_output, input); + } + +} + diopiError_t diopiDropout(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t mask, diopiConstTensorHandle_t input, double p, bool train, diopiGeneratorHandle_t generator) { impl::aten::setCurStream(ctx); @@ -4129,6 +4343,62 @@ diopiError_t diopiMaxPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_ return diopiSuccess; } +diopiError_t diopiAvgPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override) { + impl::aten::setCurStream(ctx); + + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + c10::optional atDivisorOverride = divisor_override ? c10::optional(*divisor_override) : c10::nullopt; + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(avg_pool3d_out, atOut, atInput, atKernelSize, atStride, atPadding, ceil_mode, count_include_pad, atDivisorOverride); + + return diopiSuccess; +} + +diopiError_t diopiAvgPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override) { + impl::aten::setCurStream(ctx); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + c10::optional atDivisorOverride = divisor_override ? c10::optional(*divisor_override) : c10::nullopt; + auto atGradInput = impl::aten::buildATen(grad_input); + CALL_ATEN_CUDA_FUNC(avg_pool3d_backward_out, atGradInput, atGradOutput, atInput, atKernelSize, atStride, atPadding, ceil_mode, count_include_pad, atDivisorOverride); +} + +diopiError_t diopiPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size) { + impl::aten::setCurStream(ctx); + + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool3d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool3d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool3d(ctx, out, input, output_size); + } else { + return impl::cuda::diopiAdaptiveAvgPool3d(ctx, out, input, output_size); + } + +} + +diopiError_t diopiPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) { + impl::aten::setCurStream(ctx); + + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool3dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool3dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool3dBackward(ctx, grad_input, grad_output, input, indices); + } else { + return impl::cuda::diopiAdaptiveAvgPool3dBackward(ctx, grad_input, grad_output, input); + } + +} + diopiError_t diopiPermute(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t dims) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); From 41fad0bc57dbecafd9150189083694b6ef9c9884 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Tue, 15 Oct 2024 15:18:12 +0800 Subject: [PATCH 21/30] finish diopi_pool related --- diopi_test/python/configs/diopi_configs.py | 166 +++++++ .../python/conformance/diopi_functions.py | 435 ++++++++++++++++++ .../python/conformance/diopi_runtime.py | 13 + impl/torch/functions/functions.cpp | 1 + proto/include/diopi/functions.h | 49 +- 5 files changed, 659 insertions(+), 5 deletions(-) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index fe5c61816..f82517e4b 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -743,6 +743,149 @@ ), ), + 'avg_pool1d': dict( + name=["avg_pool1d"], + para=dict( + kernel_size=[2, 2, 6, 2, 3], + stride=[None, None, 3, 1, 2], + padding=[0, 0, 2, 1, 0], + ceil_mode=[False, True, False, True, False], + count_include_pad=[True, True, False, True, False], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((2, 16), (5, 2, 16), (3, 4, 16), + (2, 1024, 14), (256, 28, 28)), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + ), + + 'max_pool1d': dict( + name=["max_pool1d"], + para=dict( + kernel_size=[6, 5, 6, 8, 3, 2, 3, 3], + stride=[None, 2, 2, 3, 2, 2, 1, 2], + padding=[0, 2, 2, 3, 1, 0, 1, 0], + dilation=[1, 3, 2, 2, 1, 1, 1, 2], + ceil_mode=[False, True, False, True, False, True, False, True], + return_indices=[False, False, False, False, False, False, False, False], + ), + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((3, 12), (5, 4, 17), + (6, 17), (1, 4, 17), + (2, 64, 352), + (2, 256, 12), + (2, 512, 4), + (3, 4)), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + ), + + 'max_pool1d_return_indices': dict( + name=["max_pool1d"], + para=dict( + kernel_size=[6, 6, 8, 8], + stride=[None, 3, 3, 2], + padding=[0, 1, 2, 3], + dilation=[1, 4, 2, 3], + ceil_mode=[False, True, False, True], + return_indices=[True, True, True, True], + ), + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((3, 12), (5, 4, 17), + (6, 17), (1, 4, 17),), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + requires_backward=[0], + ), + + 'adaptive_avg_pool1d': dict( + name=["adaptive_avg_pool1d"], + atol=1e-5, + rtol=1e-4, + atol_half=1e-2, + rtol_half=1e-2, + para=dict( + output_size=[5, 26, 3, 1, 2, + 1, 3, 7, 10], + ), + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((3, 16), (4, 7, 27), (4, 16), + (2, 2048, 8), (2, 288, 33), + (2, 144, 65), (2, 1280, 7), + (2, 265, 7), (2, 265, 7)), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + ), + + 'adaptive_max_pool1d': dict( + name=["adaptive_max_pool1d"], + atol=1e-5, + rtol=1e-4, + para=dict( + output_size=[5, 26, 3, 2, 1, 3, 33, 40], + return_indices=[False, False, False, False, False, False, False, False] + ), + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((3, 16), (4, 7, 27), (4, 16), + (288, 33), (2, 144, 33), (2, 16, 130), + (2, 144, 33), (2, 144, 33)), + "dtype": [np.float32, np.float16, np.float64], + }, + ] + ), + ), + + 'adaptive_max_pool2d_return_indices': dict( + name=["adaptive_max_pool2d"], + atol=1e-5, + rtol=1e-4, + para=dict( + output_size=[5, (26, 40), (None, None), (0, 0)], + return_indices=[True, True, True, True] + ), + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((3, 16, 8), (4, 7, 27, 39), (4, 16, 12), (4, 16, 12)), + "dtype": [np.float32, np.float16, np.float64], + }, + ] + ), + ), + 'avg_pool2d': dict( name=["avg_pool2d"], para=dict( @@ -5931,6 +6074,29 @@ ), ), + 'avg_pool3d': dict( + name=["avg_pool3d"], + para=dict( + kernel_size=[2, (2, 2, 2), (20, 13, 13), (2, 2, 2), 3], + stride=[None, None, 3, 1, (1, 2, 2)], + padding=[0, (0, 0, 0), (2, 3, 2), (1, 1, 1), 0], + ceil_mode=[False, True, False, True, False], + count_include_pad=[True, True, False, True, False], + divisor_override=[None, None, -3, None, 2], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((2, 16, 7, 7), (5, 2, 16, 7, 7), (3, 4, 23, 23, 23), + (2, 1024, 14, 14, 14), (256, 28, 28, 28)), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + ), + 'max_pool3d': dict( name=['max_pool3d'], para=dict( diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 4037d2b7b..7c7d7d3eb 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -1075,6 +1075,314 @@ def conv2d( check_returncode(ret) return out +def avg_pool1d( + input, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + count_include_pad=True, +) -> Tensor: + sizeI = input.size().data + assert len(sizeI) == 3 or len(sizeI) == 2, "input must be 2d or 3d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 3: + sizeO.append(sizeI[1]) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size,) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride,) + if isinstance(padding, int): + padding = (padding,) + + for i in range(-1, 0): + if ceil_mode: + sizeO.append( + math.ceil((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + else: + sizeO.append( + math.floor((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiAvgPool1d") + ret = func( + input.context(), + out, + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + ) + check_returncode(ret) + return out + +def avg_pool1d_backward( + input, + grad_outputs, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + count_include_pad=True, + **kwargs, +) -> Tensor: + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + grad_input = raw_like(input) + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride, stride) + if isinstance(padding, int): + padding = (padding, padding) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + + func = check_function("diopiAvgPool1dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + +def max_pool1d( + input, + kernel_size, + stride=None, + padding=0, + dilation=1, + ceil_mode=False, + return_indices=False, +) -> Tensor: + sizeI = input.size().data + assert len(sizeI) == 3 or len(sizeI) == 2, "input must be 2d or 3d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 3: + sizeO.append(sizeI[1]) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size,) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride,) + if isinstance(padding, int): + padding = (padding,) + if isinstance(dilation, int): + dilation = (dilation,) + + for i in range(-1, 0): + tmp_ker_size = kernel_size[i] + (kernel_size[i] - 1) * (dilation[i] - 1) + tmp_size = (sizeI[i] - tmp_ker_size + 2 * padding[i]) / stride[i] + 1 + tmp_size = tmp_size if tmp_size > 1 else 1 + if ceil_mode: + sizeO.append(math.ceil(tmp_size)) + else: + sizeO.append(math.floor(tmp_size)) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + if not return_indices: + func = check_function("diopiMaxPool1d") + ret = func( + input.context(), + out, + input, + kernel_size, + stride, + padding, + dilation, + ceil_mode, + ) + check_returncode(ret) + return out + else: + func = check_function("diopiMaxPool1dWithIndices") + nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None + indices = Tensor( + sizeO, from_numpy_dtype(glob_vars.int_type), stride=nhwc_stride + ) + ret = func( + input.context(), + out, + indices, + input, + kernel_size, + stride, + padding, + dilation, + ceil_mode, + ) + check_returncode(ret) + return out, indices + +def max_pool1d_backward( + input, + grad_outputs, + kernel_size, + stride=None, + padding=0, + dilation=1, + ceil_mode=False, + **kwargs, +) -> Tensor: + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + grad_input = raw_like(input) + sizeI = input.size().data + assert len(sizeI) == 3 or len(sizeI) == 2, "input must be 2d or 3d tensors" + + if isinstance(kernel_size, int): + kernel_size = (kernel_size,) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride,) + if isinstance(padding, int): + padding = (padding,) + if isinstance(dilation, int): + dilation = (dilation,) + + _, indices = max_pool1d( + input, kernel_size, stride, padding, dilation, ceil_mode, True + ) + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + func = check_function("diopiMaxPool1dBackward") + ret = func( + input.context(), + grad_input, + grad_outputs[0], + input, + kernel_size, + stride, + padding, + dilation, + ceil_mode, + indices, + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + +def adaptive_avg_pool1d(input, output_size): + sizeI = input.size().data + assert len(sizeI) == 3 or len(sizeI) == 2, "input must be 2d or 3d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 3: + sizeO.append(sizeI[1]) + + if isinstance(output_size, int): + output_size = (output_size, output_size) + + for i in range(-1, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-1],])) + + func = check_function("diopiAdaptiveAvgPool1d") + ret = func(input.context(), out, input, output_size) + check_returncode(ret) + return out + + +def adaptive_max_pool1d(input, output_size, return_indices=False): + sizeI = input.size().data + assert len(sizeI) == 3 or len(sizeI) == 2, "input must be 2d or 3d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 3: + sizeO.append(sizeI[1]) + + if isinstance(output_size, int): + output_size = (output_size, output_size) + + for i in range(-1, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-1],])) + + if return_indices: + func = check_function("diopiAdaptiveMaxPool1dWithIndices") + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + indices = Tensor( + sizeO, from_numpy_dtype(glob_vars.int_type), stride=nhwc_stride + ) + ret = func(input.context(), out, indices, input, output_size) + check_returncode(ret) + return out, indices + else: + func = check_function("diopiAdaptiveMaxPool1d") + ret = func(input.context(), out, input, output_size) + check_returncode(ret) + return out + +def adaptive_avg_pool1d_backward(input, grad_outputs, **kwargs) -> Tensor: + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + grad_input = raw_like(input) + func = check_function("diopiAdaptiveAvgPool1dBackward") + ret = func(input.context(), grad_input, grad_outputs[0], input) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + + +def adaptive_max_pool1d_backward(input, grad_outputs, output_size, **kwargs) -> Tensor: + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + grad_input = raw_like(input) + _, indices = adaptive_max_pool1d(input, output_size, return_indices=True) + + func = check_function("diopiAdaptiveMaxPool1dBackward") + ret = func(input.context(), grad_input, grad_outputs[0], input, indices) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} def avg_pool2d( input, @@ -4275,6 +4583,133 @@ def normalize_backward(grad_outputs, input, p, dim, eps): ) return {k: v for k, v in out.items() if v.requires_grad} +def avg_pool3d( + input, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + count_include_pad=True, + divisor_override=None, +) -> Tensor: + sizeI = input.size().data + assert len(sizeI) == 5 or len(sizeI) == 4, "input must be 4d or 5d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 5: + sizeO.append(sizeI[1]) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size, kernel_size) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride, stride, stride) + if isinstance(padding, int): + padding = (padding, padding, padding) + + for i in range(-3, 0): + if ceil_mode: + sizeO.append( + math.ceil((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + else: + sizeO.append( + math.floor((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + nhwc_stride = compute_nhwc_stride_3d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiAvgPool3d") + if divisor_override: + ret = func( + input.context(), + out, + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + divisor_override, + ) + else: + ret = func( + input.context(), + out, + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + ) + check_returncode(ret) + return out + +def avg_pool3d_backward( + input, + grad_outputs, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + count_include_pad=True, + divisor_override=None, + **kwargs, +) -> Tensor: + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + grad_input = raw_like(input) + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size, kernel_size) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride, stride, stride) + if isinstance(padding, int): + padding = (padding, padding, padding) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + + func = check_function("diopiAvgPool3dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + divisor_override, + ) + if divisor_override + else func( + input.context(), + grad_input, + grad_outputs[0], + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + + + def adaptive_avg_pool3d(input, output_size): sizeI = input.size().data assert len(sizeI) == 5 or len(sizeI) == 4, "input must be 4d or 5d tensors" diff --git a/diopi_test/python/conformance/diopi_runtime.py b/diopi_test/python/conformance/diopi_runtime.py index 5b319ec7a..c5f5826b6 100644 --- a/diopi_test/python/conformance/diopi_runtime.py +++ b/diopi_test/python/conformance/diopi_runtime.py @@ -138,6 +138,19 @@ def to_numpy_dtype(dtype: Dtype) -> np.dtype: def is_dtype(dtype) -> bool: return isinstance(dtype, Dtype) +def compute_nhwc_stride_1d(sizes, itemsize=1): + dim = len(sizes) + strides = [itemsize for i in range(dim)] + assert dim == 2 or dim == 3, "not supported dim" + if dim == 2: + strides[0] = itemsize + strides[1] = strides[0] * sizes[0] + elif dim == 3: + strides[1] = itemsize + strides[2] = strides[0] * sizes[1] + strides[0] = strides[2] * sizes[2] + return strides + def compute_nhwc_stride_2d(sizes, itemsize=1): dim = len(sizes) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 2b68dd804..73c52332b 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -4367,6 +4367,7 @@ diopiError_t diopiAvgPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_ c10::optional atDivisorOverride = divisor_override ? c10::optional(*divisor_override) : c10::nullopt; auto atGradInput = impl::aten::buildATen(grad_input); CALL_ATEN_CUDA_FUNC(avg_pool3d_backward_out, atGradInput, atGradOutput, atInput, atKernelSize, atStride, atPadding, ceil_mode, count_include_pad, atDivisorOverride); + return diopiSuccess; } diopiError_t diopiPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size) { diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index b6020d4a1..d6bb71a41 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -302,6 +302,39 @@ DIOPI_API diopiError_t diopiLeakyReluInp(diopiContextHandle_t ctx, diopiTensorHa DIOPI_API diopiError_t diopiLeakyReluBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const diopiScalar_t* negative_slope, bool input_is_result); +DIOPI_API diopiError_t diopiMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, + diopiSize_t padding, diopiSize_t dilation, bool ceil_mode); + +DIOPI_API diopiError_t diopiMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, + bool ceil_mode, diopiConstTensorHandle_t indices); + +DIOPI_API diopiError_t diopiMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, + diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode); + +DIOPI_API diopiError_t diopiAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, + diopiSize_t padding, bool ceil_mode, bool count_include_pad); + +DIOPI_API diopiError_t diopiAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, + bool count_include_pad); + +DIOPI_API diopiError_t diopiAdaptiveMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size); + +DIOPI_API diopiError_t diopiAdaptiveMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, + diopiSize_t output_size); + +DIOPI_API diopiError_t diopiAdaptiveMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiConstTensorHandle_t indices); + +DIOPI_API diopiError_t diopiAdaptiveAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input); + +DIOPI_API diopiError_t diopiAdaptiveAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size); + +DIOPI_API diopiError_t diopiPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size); + +DIOPI_API diopiError_t diopiPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices); /** * @brief Applies 2D average-pooling operation in kH×kW regions by step size sH×sW steps. * @param[in] ctx Context environment. @@ -388,11 +421,9 @@ DIOPI_API diopiError_t diopiMaxPool2dBackward(diopiContextHandle_t ctx, diopiTen diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode, diopiConstTensorHandle_t indices); -/** -TODO - */ -DIOPI_API diopiError_t diopiPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t kernel_size, - diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode, bool exclusive, bool adaptive); +DIOPI_API diopiError_t diopiPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size); + +DIOPI_API diopiError_t diopiPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices); /** * @brief Applies a 2D adaptive average pooling over an input signal composed of several input planes. @@ -2879,6 +2910,9 @@ DIOPI_API diopiError_t diopiConvolution3dBackward(diopiContextHandle_t ctx, diop diopiConstTensorHandle_t weight, diopiSize_t* bias_sizes, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, int64_t groups); +DIOPI_API diopiError_t diopiAvgPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override); + +DIOPI_API diopiError_t diopiAvgPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override); /** * \brief Applies a 3D max pooling over an input signal composed of several input planes. * @param[in] ctx Context environment. @@ -2975,6 +3009,11 @@ DIOPI_API diopiError_t diopiAdaptiveMaxPool3dWithIndices(diopiContextHandle_t ct DIOPI_API diopiError_t diopiAdaptiveMaxPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t indices); +DIOPI_API diopiError_t diopiPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size); + +DIOPI_API diopiError_t diopiPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices); + + /** * \brief Returns a new 1-D tensor which indexes the input tensor according to the boolean mask. * @param[in] ctx Context environment. From de320eb9ab2b6f8dd6193dcac308f51c8e7dde7d Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Tue, 15 Oct 2024 17:08:04 +0800 Subject: [PATCH 22/30] add part of pool1d in diopi_configs.py and diopi_functions.py --- diopi_test/python/configs/diopi_configs.py | 34 ++- .../python/conformance/customized_test.py | 6 + .../python/conformance/diopi_functions.py | 245 +++++++++++++++++- 3 files changed, 276 insertions(+), 9 deletions(-) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index f82517e4b..0b26c2c20 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -743,6 +743,32 @@ ), ), + 'pool1d': dict( + name=['pool1d'], + interface=['CustomizedTest'], + para=dict( + kernel_size=[2, 2, 6, 2, 3, 6, 5], + stride=[None, None, 3, 1, 2, None, 2], + padding=[0, 0, 2, 1, 0, 0, 2], + dilation=[0, 0, 0, 0, 0, 1, 3], + ceil_mode=[False, True, False, True, False, False, True], + count_include_pad=[True, True, False, True, False, False, False], + mode=["avg", "avg", "avg", "avg", "avg", "max", "max"], + adaptive=[False, False, False, False, False, False, False], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((2, 16), (5, 2, 16), (3, 4, 16), + (2, 1024, 14), (256, 28, 28), (3, 12), (5, 4, 17)), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + ), + 'avg_pool1d': dict( name=["avg_pool1d"], para=dict( @@ -866,12 +892,12 @@ ), ), - 'adaptive_max_pool2d_return_indices': dict( - name=["adaptive_max_pool2d"], + 'adaptive_max_pool1d_return_indices': dict( + name=["adaptive_max_pool1d"], atol=1e-5, rtol=1e-4, para=dict( - output_size=[5, (26, 40), (None, None), (0, 0)], + output_size=[5, 26, 3, 0], return_indices=[True, True, True, True] ), tensor_para=dict( @@ -879,7 +905,7 @@ args=[ { "ins": ['input'], - "shape": ((3, 16, 8), (4, 7, 27, 39), (4, 16, 12), (4, 16, 12)), + "shape": ((3, 16), (4, 7, 27), (4, 16), (4, 16)), "dtype": [np.float32, np.float16, np.float64], }, ] diff --git a/diopi_test/python/conformance/customized_test.py b/diopi_test/python/conformance/customized_test.py index 9326c231a..92341dd1c 100644 --- a/diopi_test/python/conformance/customized_test.py +++ b/diopi_test/python/conformance/customized_test.py @@ -824,3 +824,9 @@ def nll_loss_v2(input, target, weight=None, ignore_index=-100, reduction="mean") input, target, weight, None, ignore_index, None, reduction ) return out + + def pool1d(input, kernel_size, stride, padding, dilation, ceil_mode, count_include_pad, mode, adaptive): + if mode == "avg" and adaptive == False: + return torch.avg_pool1d(input, kernel_size, stride, padding, ceil_mode, count_include_pad) + elif mode == "max" and adaptive == False: + return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode) diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 7c7d7d3eb..6e2cd83cf 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -1075,6 +1075,241 @@ def conv2d( check_returncode(ret) return out +def pool1d(input, kernel_size=0, stride=None, padding=0, dilation=1, ceil_mode=False, count_include_pad=True, output_size=0, mode="avg", adaptive=False) -> Tensor: + + sizeI = input.size().data + assert len(sizeI) == 3 or len(sizeI) == 2, "input must be 2d or 3d tensors" + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 3: + sizeO.append(sizeI[1]) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size,) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride,) + if isinstance(padding, int): + padding = (padding,) + if isinstance(dilation, int): + dilation = (dilation,) + if isinstance(output_size, int): + output_size = (output_size,) + + if mode == "avg" and adaptive == False: + for i in range(-1, 0): + if ceil_mode: + sizeO.append( + math.ceil((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + else: + sizeO.append( + math.floor((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + output_size = Sizes(list(output_size)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiPool1d") + ret = func( + input.context(), + out, + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + output_size, + ) + check_returncode(ret) + return out + elif mode == "max" and adaptive == False: + for i in range(-1, 0): + tmp_ker_size = kernel_size[i] + (kernel_size[i] - 1) * (dilation[i] - 1) + tmp_size = (sizeI[i] - tmp_ker_size + 2 * padding[i]) / stride[i] + 1 + tmp_size = tmp_size if tmp_size > 1 else 1 + if ceil_mode: + sizeO.append(math.ceil(tmp_size)) + else: + sizeO.append(math.floor(tmp_size)) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + output_size = Sizes(list(output_size)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiPool1d") + ret = func( + input.context(), + out, + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + output_size, + ) + check_returncode(ret) + return out + elif mode == "avg" and adaptive == True: + for i in range(-1, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-1],])) + + func = check_function("diopiPool1d") + ret = func( + input.context(), + out, + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + output_size, + ) + + check_returncode(ret) + return out + else: + for i in range(-1, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-1],])) + + func = check_function("diopiPool1d") + ret = func( + input.context(), + out, + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + output_size, + ) + + check_returncode(ret) + return out + + +def pool1d_backward(input, grad_outputs, kernel_size=0, stride=0, padding=0, dilation=0, ceil_mode=0, count_include_pad=True, mode="avg", adaptive=False, **kwargs,) -> Tensor: + + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + grad_input = raw_like(input) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size,) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride,) + if isinstance(padding, int): + padding = (padding,) + if isinstance(dilation, int): + dilation = (dilation,) + + + if mode == "avg" and adaptive == False: + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + indices = raw_like(input) + + func = check_function("diopiPool1dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "max" and adaptive == False: + + _, indices = max_pool1d( + input, kernel_size, stride, padding, dilation, ceil_mode, True + ) + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + func = check_function("diopiPool1dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + + + + + + def avg_pool1d( input, kernel_size, @@ -1143,13 +1378,13 @@ def avg_pool1d_backward( assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" grad_input = raw_like(input) if isinstance(kernel_size, int): - kernel_size = (kernel_size, kernel_size) + kernel_size = (kernel_size,) if stride is None: stride = kernel_size if isinstance(stride, int): - stride = (stride, stride) + stride = (stride,) if isinstance(padding, int): - padding = (padding, padding) + padding = (padding,) stride = Sizes(list(stride)) padding = Sizes(list(padding)) @@ -1310,7 +1545,7 @@ def adaptive_avg_pool1d(input, output_size): sizeO.append(sizeI[1]) if isinstance(output_size, int): - output_size = (output_size, output_size) + output_size = (output_size,) for i in range(-1, 0): if output_size[i] is None: @@ -1338,7 +1573,7 @@ def adaptive_max_pool1d(input, output_size, return_indices=False): sizeO.append(sizeI[1]) if isinstance(output_size, int): - output_size = (output_size, output_size) + output_size = (output_size,) for i in range(-1, 0): if output_size[i] is None: From d0b25fa69e787b4cb46998994678604533e7c23d Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Tue, 15 Oct 2024 18:56:17 +0800 Subject: [PATCH 23/30] finish all of pool1d, pool2d and pool3d --- diopi_test/python/configs/diopi_configs.py | 72 +- .../python/conformance/customized_test.py | 30 +- .../python/conformance/diopi_functions.py | 1003 ++++++++++++++--- 3 files changed, 924 insertions(+), 181 deletions(-) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 0b26c2c20..f737e2798 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -747,14 +747,15 @@ name=['pool1d'], interface=['CustomizedTest'], para=dict( - kernel_size=[2, 2, 6, 2, 3, 6, 5], - stride=[None, None, 3, 1, 2, None, 2], - padding=[0, 0, 2, 1, 0, 0, 2], - dilation=[0, 0, 0, 0, 0, 1, 3], - ceil_mode=[False, True, False, True, False, False, True], - count_include_pad=[True, True, False, True, False, False, False], - mode=["avg", "avg", "avg", "avg", "avg", "max", "max"], - adaptive=[False, False, False, False, False, False, False], + kernel_size=[2, 2, 6, 2, 3, 6, 5, 0, 0, 0, 0], + stride=[None, None, 3, 1, 2, None, 2, 0, 0, 0, 0], + padding=[0, 0, 2, 1, 0, 0, 2, 0, 0, 0, 0], + dilation=[0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0], + ceil_mode=[False, True, False, True, False, False, True, False, False, False, False], + count_include_pad=[True, True, False, True, False, False, False, False, False, False, False], + mode=["avg", "avg", "avg", "avg", "avg", "max", "max", "avg", "avg", "max", "max"], + adaptive=[False, False, False, False, False, False, False, True, True, True, True], + output_size=[0, 0, 0, 0, 0, 0, 0, 5, 26, 3, 2] ), tensor_para=dict( args=[ @@ -762,7 +763,7 @@ "ins": ['input'], "requires_grad": [True], "shape": ((2, 16), (5, 2, 16), (3, 4, 16), - (2, 1024, 14), (256, 28, 28), (3, 12), (5, 4, 17)), + (2, 1024, 14), (256, 28, 28), (3, 12), (5, 4, 17), (3, 16), (4, 7, 27), (4, 16), (288, 33)), "dtype": [np.float16, np.float32, np.float64], }, ] @@ -912,6 +913,32 @@ ), ), + 'pool2d': dict( + name=['pool2d'], + interface=['CustomizedTest'], + para=dict( + kernel_size=[2, 2, 6, 2, 3, 6, 5, 0, 0, 0, 0], + stride=[None, None, 3, 1, 2, None, 2, 0, 0, 0, 0], + padding=[0, 0, 2, 1, 0, 0, 2, 0, 0, 0, 0], + dilation=[0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0], + ceil_mode=[False, True, False, True, False, False, True, False, False, False, False], + count_include_pad=[True, True, False, True, False, False, False, False, False, False, False], + mode=["avg", "avg", "avg", "avg", "avg", "max", "max", "avg", "avg", "max", "max"], + adaptive=[False, False, False, False, False, False, False, True, True, True, True], + output_size=[0, 0, 0, 0, 0, 0, 0, 5, 26, 3, 2] + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((2, 16, 16), (5, 2, 16, 16), (3, 4, 16, 16), + (2, 1024, 14, 16), (256, 28, 28, 16), (3, 12, 12), (5, 4, 17, 17), (3, 16, 16), (4, 7, 27, 27), (4, 16, 17), (288, 33, 33)), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + ), 'avg_pool2d': dict( name=["avg_pool2d"], para=dict( @@ -6100,6 +6127,33 @@ ), ), + 'pool3d': dict( + name=['pool3d'], + interface=['CustomizedTest'], + para=dict( + kernel_size=[2, 2, 6, 2, 3, 6, 5, 0, 0, 0, 0], + stride=[None, None, 3, 1, 2, None, 2, 0, 0, 0, 0], + padding=[0, 0, 2, 1, 0, 0, 2, 0, 0, 0, 0], + dilation=[0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0], + ceil_mode=[False, True, False, True, False, False, True, False, False, False, False], + count_include_pad=[True, True, False, True, False, False, False, False, False, False, False], + mode=["avg", "avg", "avg", "avg", "avg", "max", "max", "avg", "avg", "max", "max"], + adaptive=[False, False, False, False, False, False, False, True, True, True, True], + output_size=[0, 0, 0, 0, 0, 0, 0, 5, 26, 3, 2] + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((2, 16, 16, 16), (5, 2, 16, 16, 17), (3, 4, 16, 16, 18), + (2, 1024, 14, 16, 20), (256, 28, 28, 16, 17), (3, 12, 12, 18), (5, 4, 17, 17, 21), (3, 16, 16, 20), (4, 7, 27, 27, 26), (4, 16, 17, 23), (288, 33, 33, 35)), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + ), + 'avg_pool3d': dict( name=["avg_pool3d"], para=dict( diff --git a/diopi_test/python/conformance/customized_test.py b/diopi_test/python/conformance/customized_test.py index 92341dd1c..7fa7f899b 100644 --- a/diopi_test/python/conformance/customized_test.py +++ b/diopi_test/python/conformance/customized_test.py @@ -825,8 +825,32 @@ def nll_loss_v2(input, target, weight=None, ignore_index=-100, reduction="mean") ) return out - def pool1d(input, kernel_size, stride, padding, dilation, ceil_mode, count_include_pad, mode, adaptive): + def pool1d(input, kernel_size, stride, padding, dilation, ceil_mode, count_include_pad, mode, adaptive, output_size): if mode == "avg" and adaptive == False: - return torch.avg_pool1d(input, kernel_size, stride, padding, ceil_mode, count_include_pad) + return torch.nn.functional.avg_pool1d(input, kernel_size, stride, padding, ceil_mode, count_include_pad) elif mode == "max" and adaptive == False: - return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode) + return torch.nn.functional.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode, return_indices=False) + elif mode == "avg" and adaptive == True: + return torch.nn.functional.adaptive_avg_pool1d(input, output_size) + elif mode == "max" and adaptive == True: + return torch.nn.functional.adaptive_max_pool1d(input, output_size, return_indices=False) + + def pool2d(input, kernel_size, stride, padding, dilation, ceil_mode, count_include_pad, mode, adaptive, output_size): + if mode == "avg" and adaptive == False: + return torch.nn.functional.avg_pool2d(input, kernel_size, stride, padding, ceil_mode, count_include_pad) + elif mode == "max" and adaptive == False: + return torch.nn.functional.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode, return_indices=False) + elif mode == "avg" and adaptive == True: + return torch.nn.functional.adaptive_avg_pool2d(input, output_size) + elif mode == "max" and adaptive == True: + return torch.nn.functional.adaptive_max_pool2d(input, output_size, return_indices=False) + + def pool3d(input, kernel_size, stride, padding, dilation, ceil_mode, count_include_pad, mode, adaptive, output_size): + if mode == "avg" and adaptive == False: + return torch.nn.functional.avg_pool3d(input, kernel_size, stride, padding, ceil_mode, count_include_pad) + elif mode == "max" and adaptive == False: + return torch.nn.functional.max_pool3d(input, kernel_size, stride, padding, dilation, ceil_mode, return_indices=False) + elif mode == "avg" and adaptive == True: + return torch.nn.functional.adaptive_avg_pool3d(input, output_size) + elif mode == "max" and adaptive == True: + return torch.nn.functional.adaptive_max_pool3d(input, output_size, return_indices=False) diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 6e2cd83cf..2b2201c81 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -1079,6 +1079,7 @@ def pool1d(input, kernel_size=0, stride=None, padding=0, dilation=1, ceil_mode=F sizeI = input.size().data assert len(sizeI) == 3 or len(sizeI) == 2, "input must be 2d or 3d tensors" + assert mode in ["avg", "max"] and adaptive in [True, False], "mode or adaptive has wrong type" sizeO = [] sizeO.append(sizeI[0]) if len(sizeI) == 3: @@ -1175,6 +1176,11 @@ def pool1d(input, kernel_size=0, stride=None, padding=0, dilation=1, ceil_mode=F else: sizeO.append(output_size[i]) + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) output_size = Sizes(list([sizeO[-1],])) @@ -1204,6 +1210,11 @@ def pool1d(input, kernel_size=0, stride=None, padding=0, dilation=1, ceil_mode=F else: sizeO.append(output_size[i]) + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) output_size = Sizes(list([sizeO[-1],])) @@ -1228,21 +1239,24 @@ def pool1d(input, kernel_size=0, stride=None, padding=0, dilation=1, ceil_mode=F return out -def pool1d_backward(input, grad_outputs, kernel_size=0, stride=0, padding=0, dilation=0, ceil_mode=0, count_include_pad=True, mode="avg", adaptive=False, **kwargs,) -> Tensor: +def pool1d_backward(input, grad_outputs, kernel_size=0, stride=0, padding=0, dilation=0, ceil_mode=0, count_include_pad=True, mode="avg", adaptive=False, output_size=0, **kwargs,) -> Tensor: assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + assert mode in ["avg", "max"] and adaptive in [True, False], "mode or adaptive has wrong type" grad_input = raw_like(input) if isinstance(kernel_size, int): kernel_size = (kernel_size,) - if stride is None: - stride = kernel_size - if isinstance(stride, int): - stride = (stride,) - if isinstance(padding, int): - padding = (padding,) - if isinstance(dilation, int): - dilation = (dilation,) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride,) + if isinstance(padding, int): + padding = (padding,) + if isinstance(dilation, int): + dilation = (dilation,) + if isinstance(output_size, int): + output_size = (output_size, ) if mode == "avg" and adaptive == False: @@ -1304,12 +1318,63 @@ def pool1d_backward(input, grad_outputs, kernel_size=0, stride=0, padding=0, dil ) check_returncode(ret) return {"input": grad_input} if grad_input.requires_grad else {} - - - - + elif mode == "avg" and adaptive == True: + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + indices = raw_like(input) + + func = check_function("diopiPool1dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "max" and adaptive == True: + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + _, indices = adaptive_max_pool1d(input, output_size, return_indices=True) + + func = check_function("diopiPool1dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + def avg_pool1d( input, kernel_size, @@ -1619,88 +1684,11 @@ def adaptive_max_pool1d_backward(input, grad_outputs, output_size, **kwargs) -> check_returncode(ret) return {"input": grad_input} if grad_input.requires_grad else {} -def avg_pool2d( - input, - kernel_size, - stride=None, - padding=0, - ceil_mode=False, - count_include_pad=True, - divisor_override=None, -) -> Tensor: - sizeI = input.size().data - assert len(sizeI) == 4 or len(sizeI) == 3, "input must be 3d or 4d tensors" - - sizeO = [] - sizeO.append(sizeI[0]) - if len(sizeI) == 4: - sizeO.append(sizeI[1]) - - if isinstance(kernel_size, int): - kernel_size = (kernel_size, kernel_size) - if stride is None: - stride = kernel_size - if isinstance(stride, int): - stride = (stride, stride) - if isinstance(padding, int): - padding = (padding, padding) - - for i in range(-2, 0): - if ceil_mode: - sizeO.append( - math.ceil((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 - ) - else: - sizeO.append( - math.floor((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 - ) - - stride = Sizes(list(stride)) - padding = Sizes(list(padding)) - kernel_size = Sizes(list(kernel_size)) - nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None - out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) - - func = check_function("diopiAvgPool2d") - if divisor_override: - ret = func( - input.context(), - out, - input, - kernel_size, - stride, - padding, - ceil_mode, - count_include_pad, - divisor_override, - ) - else: - ret = func( - input.context(), - out, - input, - kernel_size, - stride, - padding, - ceil_mode, - count_include_pad, - ) - check_returncode(ret) - return out - +def pool2d(input, kernel_size=0, stride=None, padding=0, dilation=1, ceil_mode=False, count_include_pad=True, output_size=0, mode="avg", adaptive=False) -> Tensor: -def max_pool2d( - input, - kernel_size, - stride=None, - padding=0, - dilation=1, - ceil_mode=False, - return_indices=False, -) -> Tensor: sizeI = input.size().data assert len(sizeI) == 4 or len(sizeI) == 3, "input must be 3d or 4d tensors" - + assert mode in ["avg", "max"] and adaptive in [True, False], "mode or adaptive has wrong type" sizeO = [] sizeO.append(sizeI[0]) if len(sizeI) == 4: @@ -1716,87 +1704,295 @@ def max_pool2d( padding = (padding, padding) if isinstance(dilation, int): dilation = (dilation, dilation) + if isinstance(output_size, int): + output_size = (output_size, output_size) - for i in range(-2, 0): - tmp_ker_size = kernel_size[i] + (kernel_size[i] - 1) * (dilation[i] - 1) - tmp_size = (sizeI[i] - tmp_ker_size + 2 * padding[i]) / stride[i] + 1 - tmp_size = tmp_size if tmp_size > 1 else 1 - if ceil_mode: - sizeO.append(math.ceil(tmp_size)) - else: - sizeO.append(math.floor(tmp_size)) - - stride = Sizes(list(stride)) - padding = Sizes(list(padding)) - kernel_size = Sizes(list(kernel_size)) - dilation = Sizes(list(dilation)) - nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None - out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) - - if not return_indices: - func = check_function("diopiMaxPool2d") + if mode == "avg" and adaptive == False: + for i in range(-2, 0): + if ceil_mode: + sizeO.append( + math.ceil((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + else: + sizeO.append( + math.floor((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + output_size = Sizes(list(output_size)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiPool2d") ret = func( - input.context(), - out, - input, - kernel_size, - stride, - padding, - dilation, - ceil_mode, + input.context(), + out, + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + output_size, ) check_returncode(ret) return out - else: - func = check_function("diopiMaxPool2dWithIndices") - nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None - indices = Tensor( - sizeO, from_numpy_dtype(glob_vars.int_type), stride=nhwc_stride - ) + elif mode == "max" and adaptive == False: + for i in range(-2, 0): + tmp_ker_size = kernel_size[i] + (kernel_size[i] - 1) * (dilation[i] - 1) + tmp_size = (sizeI[i] - tmp_ker_size + 2 * padding[i]) / stride[i] + 1 + tmp_size = tmp_size if tmp_size > 1 else 1 + if ceil_mode: + sizeO.append(math.ceil(tmp_size)) + else: + sizeO.append(math.floor(tmp_size)) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + output_size = Sizes(list(output_size)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiPool2d") ret = func( - input.context(), - out, - indices, - input, - kernel_size, - stride, - padding, - dilation, - ceil_mode, + input.context(), + out, + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + output_size, ) check_returncode(ret) - return out, indices - + return out + elif mode == "avg" and adaptive == True: + for i in range(-2, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) -def adaptive_avg_pool2d(input, output_size): - sizeI = input.size().data - assert len(sizeI) == 4 or len(sizeI) == 3, "input must be 3d or 4d tensors" + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-2], sizeO[-1]])) + + func = check_function("diopiPool2d") + ret = func( + input.context(), + out, + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + output_size, + ) + + check_returncode(ret) + return out + else: + for i in range(-2, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) - sizeO = [] - sizeO.append(sizeI[0]) - if len(sizeI) == 4: - sizeO.append(sizeI[1]) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-2], sizeO[-1]])) + + func = check_function("diopiPool2d") + ret = func( + input.context(), + out, + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + output_size, + ) + + check_returncode(ret) + return out + + +def pool2d_backward(input, grad_outputs, kernel_size=0, stride=0, padding=0, dilation=0, ceil_mode=0, count_include_pad=True, mode="avg", adaptive=False, output_size=0, **kwargs,) -> Tensor: + + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + assert mode in ["avg", "max"] and adaptive in [True, False], "mode or adaptive has wrong type" + grad_input = raw_like(input) + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride, stride) + if isinstance(padding, int): + padding = (padding, padding) + if isinstance(dilation, int): + dilation = (dilation, dilation) if isinstance(output_size, int): output_size = (output_size, output_size) - for i in range(-2, 0): - if output_size[i] is None: - sizeO.append(sizeI[i]) - else: - sizeO.append(output_size[i]) - - nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None - out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) - output_size = Sizes(list([sizeO[-2], sizeO[-1]])) + + if mode == "avg" and adaptive == False: + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + indices = raw_like(input) + + func = check_function("diopiPool2dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "max" and adaptive == False: - func = check_function("diopiAdaptiveAvgPool2d") - ret = func(input.context(), out, input, output_size) - check_returncode(ret) - return out + _, indices = max_pool2d( + input, kernel_size, stride, padding, dilation, ceil_mode, True + ) + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + func = check_function("diopiPool2dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "avg" and adaptive == True: + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + indices = raw_like(input) + + func = check_function("diopiPool2dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "max" and adaptive == True: + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) -def adaptive_max_pool2d(input, output_size, return_indices=False): + _, indices = adaptive_max_pool2d(input, output_size, return_indices=True) + + func = check_function("diopiPool2dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + +def avg_pool2d( + input, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + count_include_pad=True, + divisor_override=None, +) -> Tensor: sizeI = input.size().data assert len(sizeI) == 4 or len(sizeI) == 3, "input must be 3d or 4d tensors" @@ -1805,27 +2001,196 @@ def adaptive_max_pool2d(input, output_size, return_indices=False): if len(sizeI) == 4: sizeO.append(sizeI[1]) - if isinstance(output_size, int): - output_size = (output_size, output_size) + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride, stride) + if isinstance(padding, int): + padding = (padding, padding) for i in range(-2, 0): - if output_size[i] is None: - sizeO.append(sizeI[i]) + if ceil_mode: + sizeO.append( + math.ceil((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) else: - sizeO.append(output_size[i]) + sizeO.append( + math.floor((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) - output_size = Sizes(list([sizeO[-2], sizeO[-1]])) - if return_indices: - func = check_function("diopiAdaptiveMaxPool2dWithIndices") - nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None - indices = Tensor( - sizeO, from_numpy_dtype(glob_vars.int_type), stride=nhwc_stride + func = check_function("diopiAvgPool2d") + if divisor_override: + ret = func( + input.context(), + out, + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + divisor_override, ) - ret = func(input.context(), out, indices, input, output_size) - check_returncode(ret) + else: + ret = func( + input.context(), + out, + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + ) + check_returncode(ret) + return out + + +def max_pool2d( + input, + kernel_size, + stride=None, + padding=0, + dilation=1, + ceil_mode=False, + return_indices=False, +) -> Tensor: + sizeI = input.size().data + assert len(sizeI) == 4 or len(sizeI) == 3, "input must be 3d or 4d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 4: + sizeO.append(sizeI[1]) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride, stride) + if isinstance(padding, int): + padding = (padding, padding) + if isinstance(dilation, int): + dilation = (dilation, dilation) + + for i in range(-2, 0): + tmp_ker_size = kernel_size[i] + (kernel_size[i] - 1) * (dilation[i] - 1) + tmp_size = (sizeI[i] - tmp_ker_size + 2 * padding[i]) / stride[i] + 1 + tmp_size = tmp_size if tmp_size > 1 else 1 + if ceil_mode: + sizeO.append(math.ceil(tmp_size)) + else: + sizeO.append(math.floor(tmp_size)) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + if not return_indices: + func = check_function("diopiMaxPool2d") + ret = func( + input.context(), + out, + input, + kernel_size, + stride, + padding, + dilation, + ceil_mode, + ) + check_returncode(ret) + return out + else: + func = check_function("diopiMaxPool2dWithIndices") + nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None + indices = Tensor( + sizeO, from_numpy_dtype(glob_vars.int_type), stride=nhwc_stride + ) + ret = func( + input.context(), + out, + indices, + input, + kernel_size, + stride, + padding, + dilation, + ceil_mode, + ) + check_returncode(ret) + return out, indices + + +def adaptive_avg_pool2d(input, output_size): + sizeI = input.size().data + assert len(sizeI) == 4 or len(sizeI) == 3, "input must be 3d or 4d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 4: + sizeO.append(sizeI[1]) + + if isinstance(output_size, int): + output_size = (output_size, output_size) + + for i in range(-2, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-2], sizeO[-1]])) + + func = check_function("diopiAdaptiveAvgPool2d") + ret = func(input.context(), out, input, output_size) + check_returncode(ret) + return out + + +def adaptive_max_pool2d(input, output_size, return_indices=False): + sizeI = input.size().data + assert len(sizeI) == 4 or len(sizeI) == 3, "input must be 3d or 4d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 4: + sizeO.append(sizeI[1]) + + if isinstance(output_size, int): + output_size = (output_size, output_size) + + for i in range(-2, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-2], sizeO[-1]])) + + if return_indices: + func = check_function("diopiAdaptiveMaxPool2dWithIndices") + nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None + indices = Tensor( + sizeO, from_numpy_dtype(glob_vars.int_type), stride=nhwc_stride + ) + ret = func(input.context(), out, indices, input, output_size) + check_returncode(ret) return out, indices else: func = check_function("diopiAdaptiveMaxPool2d") @@ -4818,6 +5183,306 @@ def normalize_backward(grad_outputs, input, p, dim, eps): ) return {k: v for k, v in out.items() if v.requires_grad} +def pool3d(input, kernel_size=0, stride=None, padding=0, dilation=1, ceil_mode=False, count_include_pad=True, output_size=0, mode="avg", adaptive=False) -> Tensor: + + sizeI = input.size().data + assert len(sizeI) == 5 or len(sizeI) == 4, "input must be 4d or 5d tensors" + assert mode in ["avg", "max"] and adaptive in [True, False], "mode or adaptive has wrong type" + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 5: + sizeO.append(sizeI[1]) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size, kernel_size) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride, stride, stride) + if isinstance(padding, int): + padding = (padding, padding, padding) + if isinstance(dilation, int): + dilation = (dilation, dilation, dilation) + if isinstance(output_size, int): + output_size = (output_size, output_size, output_size) + + if mode == "avg" and adaptive == False: + for i in range(-3, 0): + if ceil_mode: + sizeO.append( + math.ceil((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + else: + sizeO.append( + math.floor((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + output_size = Sizes(list(output_size)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiPool3d") + ret = func( + input.context(), + out, + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + output_size, + ) + check_returncode(ret) + return out + elif mode == "max" and adaptive == False: + for i in range(-3, 0): + tmp_ker_size = kernel_size[i] + (kernel_size[i] - 1) * (dilation[i] - 1) + tmp_size = (sizeI[i] - tmp_ker_size + 2 * padding[i]) / stride[i] + 1 + tmp_size = tmp_size if tmp_size > 1 else 1 + if ceil_mode: + sizeO.append(math.ceil(tmp_size)) + else: + sizeO.append(math.floor(tmp_size)) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + output_size = Sizes(list(output_size)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiPool3d") + ret = func( + input.context(), + out, + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + output_size, + ) + check_returncode(ret) + return out + elif mode == "avg" and adaptive == True: + for i in range(-3, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-3], sizeO[-2], sizeO[-1]])) + + func = check_function("diopiPool3d") + ret = func( + input.context(), + out, + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + output_size, + ) + + check_returncode(ret) + return out + else: + for i in range(-3, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-3], sizeO[-2], sizeO[-1]])) + + func = check_function("diopiPool3d") + ret = func( + input.context(), + out, + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + output_size, + ) + + check_returncode(ret) + return out + + +def pool3d_backward(input, grad_outputs, kernel_size=0, stride=0, padding=0, dilation=0, ceil_mode=0, count_include_pad=True, mode="avg", adaptive=False, output_size=0, **kwargs,) -> Tensor: + + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + assert mode in ["avg", "max"] and adaptive in [True, False], "mode or adaptive has wrong type" + grad_input = raw_like(input) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size, kernel_size) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride, stride, stride) + if isinstance(padding, int): + padding = (padding, padding, padding) + if isinstance(dilation, int): + dilation = (dilation, dilation, dilation) + if isinstance(output_size, int): + output_size = (output_size, output_size, output_size) + + + if mode == "avg" and adaptive == False: + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + indices = raw_like(input) + + func = check_function("diopiPool3dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "max" and adaptive == False: + + _, indices = max_pool3d( + input, kernel_size, stride, padding, dilation, ceil_mode, True + ) + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + func = check_function("diopiPool3dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "avg" and adaptive == True: + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + indices = raw_like(input) + + func = check_function("diopiPool3dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "max" and adaptive == True: + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + _, indices = adaptive_max_pool3d(input, output_size, return_indices=True) + + func = check_function("diopiPool3dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + def avg_pool3d( input, kernel_size, From d106821175dd8473e2a2e7b4e68f6334ffbc660c Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 16 Oct 2024 15:56:43 +0800 Subject: [PATCH 24/30] update layer_normGB, add instance_norm_backward --- diopi_test/python/configs/diopi_configs.py | 37 ++++++++ .../python/conformance/customized_test.py | 4 + .../python/conformance/diopi_functions.py | 56 +++++++++++ impl/torch/functions/functions.cpp | 92 +++++++++++++++---- proto/include/diopi/functions.h | 2 + 5 files changed, 175 insertions(+), 16 deletions(-) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index f737e2798..8eac634d5 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -7896,8 +7896,45 @@ ) ), + 'instance_norm': dict( + name=["instance_norm"], + dtype=[np.float32, np.float64], + atol=1e-5, + atol_half=1e-1, + rtol_half=1e-2, + para=dict( + eps=[1e-5, 1e-5, 1e-12, 0, -1e-5, 2], + ), + tensor_para=dict( + args=[ + { + "ins": ["input"], + "shape": ((3, 5, 3, 5), (2, 16, 128), (2, 64, 16), (2, 32, 16), + (4, 5, 3, 5), (5, 16, 128)), + 'gen_fn':'Genfunc.randn', + 'requires_grad': [True], + }, + { + "ins": ["weight"], + "shape": ((5,), (16,), (64,), (32,), + (5,), (16,)), + 'gen_fn':'Genfunc.randn', + 'requires_grad': [True], + }, + { + "ins": ["bias"], + "shape": ((5,), (16,), (64,), (32,), + (5,), (16,)), + 'gen_fn':'Genfunc.randn', + 'requires_grad': [True], + }, + ] + ) + ), + 'layer_normGB': dict( name=["layer_normGB"], + interface=['CustomizedTest'], dtype=[np.float32, np.float64, np.float16], atol=1e-5, atol_half=1e-1, diff --git a/diopi_test/python/conformance/customized_test.py b/diopi_test/python/conformance/customized_test.py index 7fa7f899b..a31990933 100644 --- a/diopi_test/python/conformance/customized_test.py +++ b/diopi_test/python/conformance/customized_test.py @@ -854,3 +854,7 @@ def pool3d(input, kernel_size, stride, padding, dilation, ceil_mode, count_inclu return torch.nn.functional.adaptive_avg_pool3d(input, output_size) elif mode == "max" and adaptive == True: return torch.nn.functional.adaptive_max_pool3d(input, output_size, return_indices=False) + + def layer_normGB(input, weight, bias, eps, normalized_shape): + return torch.nn.functional.layer_norm(input=input, weight=weight, bias=bias, eps=eps, normalized_shape=normalized_shape) + diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 2b2201c81..a109481aa 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -4981,6 +4981,62 @@ def group_norm_backward( check_returncode(ret) return {k: v for k, v in out.items() if v.requires_grad} +def instance_norm(input, axis=2, weight=None, bias=None, eps=1e-05): + weight = None if weight is None else weight + bias = None if bias is None else bias + + out = raw_like(input) + func = check_function("diopiInstanceNorm") + ret = func( + input.context(), + out, + input, + axis, + weight, + bias, + eps, + ) + check_returncode(ret) + return out + +def instance_norm_backward(input, grad_outputs, axis=2, weight=None, bias=None, eps=1e-05, **kwargs): + grad_input = raw_like(input) + out = {"input": grad_input} + + if weight is None: + weight = None + grad_weight_capsule = None + else: + grad_weight = raw_like(weight) + weight = weight + grad_weight_capsule = grad_weight + out["weight"] = grad_weight + + if bias is None: + bias = None + grad_bias_capsule = None + else: + grad_bias = raw_like(bias) + bias = bias + grad_bias_capsule = grad_bias + out["bias"] = grad_bias + + func = check_function("diopiInstanceNormBackward") + ret = func( + input.context(), + grad_input, + grad_weight_capsule, + grad_bias_capsule, + grad_outputs[0], + input, + weight, + bias, + axis, + eps, + ) + check_returncode(ret) + return {k: v for k, v in out.items() if v.requires_grad} + def layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-05): sizeI = input.size().data diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 73c52332b..8783908b2 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -4179,30 +4179,90 @@ diopiError_t diopiLayerNormGBBackward(diopiContextHandle_t ctx, diopiTensorHandl diopiError_t diopiInstanceNorm(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const int64_t axis, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps) { impl::aten::setCurStream(ctx); - diopiSize_t input_size; - diopiGetTensorShape(input, &input_size); - std::vector array(input_size.data, input_size.data + input_size.len); - - int64_t batch_channel = 1; - for (int i = 0; i < axis; i++) { - batch_channel *= array[i]; + auto atInput = impl::aten::buildATen(input); + auto atScale = impl::aten::buildATen(scale); + auto atBias = impl::aten::buildATen(bias); + auto input_size = atInput.sizes().vec(); + + std::vector reshaped_size = {1}; + int64_t shape = 1; + for (int i = 0; i < std::min(axis, (int64_t)input_size.size()); i++) { + shape = shape * input_size[i]; } + reshaped_size.push_back(shape); + for (int i = axis; i < input_size.size(); i++) { + reshaped_size.push_back(input_size[i]); + } + + auto atInputReshaped = atInput.contiguous().view(reshaped_size); + auto atScale_ = atScale.repeat(input_size[0]); + auto atBias_ = atBias.repeat(input_size[0]); + + // auto atRunningMean = torch::empty({reshaped_size[1]}, torch::TensorOptions().device(torch::kCUDA)); + // auto atRunningStd = torch::empty({reshaped_size[1]}, torch::TensorOptions().device(torch::kCUDA)); + + + auto atOutput = CALL_ATEN_FUNC(batch_norm, atInputReshaped, atScale_, atBias_, c10::nullopt, c10::nullopt, true, 0.0, eps, false); + impl::aten::updateATen2Tensor(ctx, atOutput, output); + return diopiSuccess; +} + +diopiError_t diopiInstanceNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_scale, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const int64_t axis, const double eps) { + impl::aten::setCurStream(ctx); + + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + auto atScale = impl::aten::buildATen(scale); + auto atBias = impl::aten::buildATen(bias); + auto input_size = atInput.sizes().vec(); - std::vector array2 = {1, batch_channel}; + auto atScale_ = atScale.repeat(input_size[0]); - for (int i = axis; i < array.size(); i++) { - array2.push_back(array[i]); + std::vector reshaped_size = {1}; + int64_t shape = 1; + for (int i = 0; i < std::min(axis, (int64_t)input_size.size()); i++) { + shape = shape * input_size[i]; + } + reshaped_size.push_back(shape); + for (int i = axis; i < input_size.size(); i++) { + reshaped_size.push_back(input_size[i]); } - diopiSize_t reshaped_size; - reshaped_size.data = array2.data(); - reshaped_size.len = static_cast(array2.size()); + std::vector mean_dim = {0}; + for (int i = 2; i < reshaped_size.size(); i++) { + mean_dim.push_back(i); + } - // input->reset_shape(reshaped_size); + auto atInputReshaped = atInput.contiguous().view(reshaped_size); + auto atGradOutputReshaped = atGradOutput.contiguous().view(reshaped_size); + + auto atMean = torch::mean(atInputReshaped, mean_dim); + auto atStd = torch::std(atInputReshaped, mean_dim); + + auto grad_input_mask = std::array{grad_input != nullptr, grad_scale != nullptr, grad_bias != nullptr}; + + auto atOut = at::native_batch_norm_backward(atGradOutputReshaped, atInputReshaped, atScale_, c10::nullopt, c10::nullopt, atMean, atStd, true, eps, grad_input_mask); + + if (grad_input) { + impl::aten::updateATen2Tensor(ctx, std::get<0>(atOut), grad_input); + } + if (grad_scale) { + auto atGradScaleOrigin = torch::zeros_like(atScale); + for (int i = 0; i < std::get<1>(atOut).size(0); i++) { + atGradScaleOrigin[i % (atScale_.size(0) / input_size[0])] += std::get<1>(atOut)[i]; + } + impl::aten::updateATen2Tensor(ctx, atGradScaleOrigin, grad_scale); + } + if (grad_bias) { + auto atGradBiasOrigin = torch::zeros_like(atBias); + for (int i = 0; i < std::get<2>(atOut).size(0); i++) { + atGradBiasOrigin[i % (atScale_.size(0) / input_size[0])] += std::get<2>(atOut)[i]; + } + impl::aten::updateATen2Tensor(ctx, atGradBiasOrigin, grad_bias); + } + - // diopiBatchNorm(ctx, output, nullptr, nullptr, input, scale, bias, nullptr, nullptr, true, 0.0, eps); - return diopiSuccess; } diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index d6bb71a41..ccfe1d77e 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -3621,6 +3621,8 @@ DIOPI_API diopiError_t diopiLayerNormGBBackward(diopiContextHandle_t ctx, diopiT DIOPI_API diopiError_t diopiInstanceNorm(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const int64_t axis, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps); +DIOPI_API diopiError_t diopiInstanceNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_scale, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const int64_t axis, const double eps); + DIOPI_API diopiError_t diopiNormalize(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps); DIOPI_API diopiError_t diopiNormalizeBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps); From 87df8ec28a83786c2450c295aab1ffeaea1501e9 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 16 Oct 2024 18:35:50 +0800 Subject: [PATCH 25/30] add conv_transpose3d --- diopi_test/python/configs/diopi_configs.py | 43 ++++++ .../python/conformance/diopi_functions.py | 128 ++++++++++++++++++ impl/torch/functions/functions.cpp | 86 ++++++++++++ proto/include/diopi/functions.h | 8 ++ 4 files changed, 265 insertions(+) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 8eac634d5..009322f27 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -5618,6 +5618,49 @@ ), ), + 'conv_transpose3d': dict( + name=["conv_transpose3d"], + atol=1e-3, + rtol=1e-3, + atol_half=1e2, + rtol_half=1e2, + para=dict( + stride=[1, 1, 2, 1, 2, (2, 2, 2), 1], + padding=[0, 0, 1, 0, 1, (1, 0, 1), 0], + output_padding=[0, 0, 1, 0, 1, (0, 1, 1), 0], + groups=[1, 1, 8, 1, 1, 1, 1], + dilation=[1, 1, 2, 1, 2, (1, 2, 2), 1], + ), + tensor_para=dict( + args=[ + { + "ins": ["input"], + "requires_grad": [True], + "shape": ((6, 16, 20, 8, 5), + (2, 256, 14, 14, 5), (2, 128, 32, 32, 4), + (2, 64, 160, 160, 5), (2, 64, 320, 320, 5), (2, 64, 320, 320, 5), + (0, 16, 20, 8, 5)), + "dtype": [np.float32, np.float64, np.float16], + }, + { + "ins": ["weight"], + "requires_grad": [True], + "shape": ((16, 2, 12, 2, 2), + (256, 256, 2, 2, 2), (128, 128, 4, 4, 4), + (64, 64, 2, 2, 2), (64, 1, 2, 2, 2), (64, 1, 2, 2, 2), + (16, 2, 12, 2, 2)), + "dtype": [np.float32, np.float64, np.float16], + }, + { + "ins": ["bias"], + "requires_grad": [True], + "shape": (None, (256,), None, (64,), (1,), (1,), None), + "dtype": [np.float32, np.float64, np.float16], + }, + ] + ), + ), + 'unfold': dict( name=["unfold"], interface=['torch.Tensor'], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index a109481aa..60db07928 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -3549,6 +3549,70 @@ def conv_transpose2d_backward( check_returncode(ret) return {k: v for k, v in out.items() if v.requires_grad} +def conv_transpose3d_backward( + input, + grad_outputs, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + output_padding=0, + **kwargs, +) -> Tensor: + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + sizeI = input.size().data + sizeW = weight.size().data + assert len(sizeI) == 5 and len(sizeW) == 5, "input and weight must be 5d tensors" + + if isinstance(stride, int): + stride = (stride, stride, stride) + if isinstance(padding, int): + padding = (padding, padding, padding) + if isinstance(dilation, int): + dilation = (dilation, dilation, dilation) + if isinstance(output_padding, int): + output_padding = (output_padding, output_padding, output_padding) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + dilation = Sizes(list(dilation)) + output_padding = Sizes(list(output_padding)) + + grad_input = raw_like(input) + grad_weight = raw_like(weight) + out = {"input": grad_input, "weight": grad_weight} + + if bias is None: + grad_bias = None + sizeBias = None + else: + gradBias = raw_like(bias) + grad_bias = gradBias + sizeBias = bias.size() + out.update({"bias": grad_bias}) + + func = check_function("diopiConvTranspose3dBackward") + ret = func( + input.context(), + grad_input, + grad_weight, + grad_bias, + grad_outputs[0], + input, + weight, + sizeBias, + stride, + padding, + dilation, + output_padding, + groups, + ) + check_returncode(ret) + return {k: v for k, v in out.items() if v.requires_grad} + + def hardtanh_backward( input, grad_outputs, min_val=-1.0, max_val=1.0, **kwargs @@ -4303,6 +4367,70 @@ def conv_transpose2d( check_returncode(ret) return out +def conv_transpose3d( + input, + weight, + bias=None, + stride=1, + padding=0, + output_padding=0, + groups=1, + dilation=1, +) -> Tensor: + if bias is not None: + assert isinstance(bias, Tensor), "bias must be a Tensor" + + sizeI = input.size().data + sizeW = list(weight.size().data) + assert len(sizeI) == 5 and len(sizeW) == 5, "input and weight must be 5d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + sizeO.append(sizeW[1] * groups) + + if isinstance(stride, int): + stride = (stride, stride, stride) + if isinstance(padding, int): + padding = (padding, padding, padding) + if isinstance(output_padding, int): + output_padding = (output_padding, output_padding, output_padding) + if isinstance(dilation, int): + dilation = (dilation, dilation, dilation) + for i in range(-3, 0): + # equivalent kernel size + sizeW[i] = (sizeW[i] - 1) * dilation[i] + sizeO.append( + int( + (sizeI[i] - 1) * stride[i] + - 2 * padding[i] + + sizeW[i] + + output_padding[i] + ) + + 1 + ) + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + output_padding = Sizes(list(output_padding)) + dilation = Sizes(list(dilation)) + + out = Tensor(sizeO, input.get_dtype()) + func = check_function("diopiConvTranspose3d") + ret = func( + input.context(), + out, + input, + weight, + bias, + stride, + padding, + output_padding, + groups, + dilation, + ) + check_returncode(ret) + return out + + def cumsum(input, dim, dtype=None): assert isinstance(dim, int), "dim should be int" diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 8783908b2..213b7dbd3 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -2788,6 +2788,75 @@ diopiError_t diopiConvTranspose2dBackward(diopiContextHandle_t ctx, diopiTensorH return diopiSuccess; } +diopiError_t diopiConvTranspose3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, + diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, + diopiConstTensorHandle_t weight, diopiSize_t* bias_sizes, diopiSize_t stride, diopiSize_t padding, + diopiSize_t dilation, diopiSize_t output_padding, int64_t groups) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atGrad = impl::aten::buildATen(grad_output); + auto atWeight = impl::aten::buildATen(weight); + auto atStride = impl::aten::buildAtIntArray(stride); + auto atPadding = impl::aten::buildAtIntArray(padding); + auto atOutputPadding = impl::aten::buildAtIntArray(output_padding); + auto atDilation = impl::aten::buildAtIntArray(dilation); +#ifdef USE_HIP + auto grad_input_mask = std::array{true, true, false}; + auto atOut = CALL_ATEN_FUNC(miopen_convolution_transpose_backward, + atInput, + atGrad, + atWeight, + atPadding, + atOutputPadding, + atStride, + atDilation, + groups, + false, + false, + grad_input_mask); + updateATen2Tensor(ctx, atOut, vecOut); + if (bias_sizes != nullptr && grad_bias != nullptr) { + auto atGradBias = impl::aten::buildATen(grad_bias); + at::Tensor atTmp = atGrad; + int64_t size = atGrad.dim() - 1; + while (atGradBias.dim() != size) { + atTmp = at::sum(atTmp, -1, false); + size -= 1; + } + CALL_ATEN_CUDA_FUNC(sum_out, atGradBias, atTmp, 0, false); + } +#else + if (grad_input && grad_weight && grad_bias && bias_sizes) { + auto atBiasSizes = impl::aten::buildAtIntArray(bias_sizes); + auto atGradInput = impl::aten::buildATen(grad_input); + auto atGradWeight = impl::aten::buildATen(grad_weight); + auto atGradBias = impl::aten::buildATen(grad_bias); + auto tempOut = CALL_ATEN_CUDA_FUNC( + convolution_backward, atGrad, atInput, atWeight, atBiasSizes, atStride, atPadding, atDilation, true, atOutputPadding, groups, {true, true, true}); + at::native::copy_(atGradInput, std::get<0>(tempOut), true); + at::native::copy_(atGradWeight, std::get<1>(tempOut), true); + at::native::copy_(atGradBias, std::get<2>(tempOut), true); + } else { + auto grad_inputs = at::convolution_backward( + atGrad, atInput, atWeight, c10::nullopt, atStride, atPadding, atDilation, true, atOutputPadding, groups, {true, true, false}); + impl::aten::updateATen2Tensor(ctx, std::get<0>(grad_inputs), grad_input); + impl::aten::updateATen2Tensor(ctx, std::get<1>(grad_inputs), grad_weight); + if (bias_sizes != nullptr && grad_bias != nullptr) { + auto atGradBias = impl::aten::buildATen(grad_bias); + at::Tensor atTmp = atGrad; + int64_t size = atGrad.dim() - 1; + while (atGradBias.dim() != size) { + atTmp = at::sum(atTmp, -1, false); + size -= 1; + } + CALL_ATEN_CUDA_FUNC(sum_out, atGradBias, atTmp, 0, false); + } + } +#endif + + return diopiSuccess; +} + diopiError_t diopiEmbeddingBackward(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t grad, diopiConstTensorHandle_t indices, int64_t numWeights, int64_t paddingIdx, bool scaleGradByFreq, bool sparse) { impl::aten::setCurStream(ctx); @@ -3413,6 +3482,23 @@ diopiError_t diopiConvTranspose2d(diopiContextHandle_t ctx, diopiTensorHandle_t return diopiSuccess; } +diopiError_t diopiConvTranspose3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, + diopiConstTensorHandle_t bias, diopiSize_t stride, diopiSize_t padding, diopiSize_t output_padding, int64_t groups, + diopiSize_t dilation) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atWeight = impl::aten::buildATen(weight); + auto atBias = impl::aten::buildATen(bias); + auto atStride = impl::aten::buildAtIntArray(stride); + auto atPadding = impl::aten::buildAtIntArray(padding); + auto atOutputPadding = impl::aten::buildAtIntArray(output_padding); + auto atDilation = impl::aten::buildAtIntArray(dilation); + auto atOut = CALL_ATEN_FUNC(conv_transpose3d, atInput, atWeight, atBias, atStride, atPadding, atOutputPadding, groups, atDilation); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + diopiError_t diopiCumsum(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, int64_t dim) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index ccfe1d77e..7a1916f19 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -2699,6 +2699,14 @@ DIOPI_API diopiError_t diopiConvTranspose2dBackward(diopiContextHandle_t ctx, di diopiConstTensorHandle_t weight, diopiSize_t* bias_sizes, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, diopiSize_t output_padding, int64_t groups); +DIOPI_API diopiError_t diopiConvTranspose3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, + diopiConstTensorHandle_t bias, diopiSize_t stride, diopiSize_t padding, diopiSize_t output_padding, int64_t groups, + diopiSize_t dilation); + +DIOPI_API diopiError_t diopiConvTranspose3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, + diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, + diopiConstTensorHandle_t weight, diopiSize_t* bias_sizes, diopiSize_t stride, diopiSize_t padding, + diopiSize_t dilation, diopiSize_t output_padding, int64_t groups); /** * @brief Extracts sliding local blocks from a batched input tensor. * @param[in] ctx Context environment. From d6eac979b2c8fff627dcf768f2d1ff294d296381 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 16 Oct 2024 18:45:48 +0800 Subject: [PATCH 26/30] fix clang format --- impl/torch/functions/functions.cpp | 583 +++++++++++++++-------------- proto/include/diopi/functions.h | 88 +++-- 2 files changed, 353 insertions(+), 318 deletions(-) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index db813d31a..e4fd89d24 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -10,7 +10,6 @@ #include #include - // clang-format off // NOTE: this header does not include all its dependencies, so we need to keep the order of the includes #include @@ -130,15 +129,20 @@ diopiError_t diopiMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_ at::IntArrayRef atDilation = impl::aten::buildAtIntArray(dilation); auto atIndices = impl::aten::buildATen(indices); - auto atGrad2d = CALL_ATEN_FUNC(max_pool2d_with_indices_backward, atGradOutput.unsqueeze(-2), atInput.unsqueeze(-2), {1, atKernelSize[0]}, {1, atStride[0]}, {0, atPadding[0]}, {1, atDilation[0]}, ceil_mode, atIndices.unsqueeze(-2)); + auto atGrad2d = CALL_ATEN_FUNC(max_pool2d_with_indices_backward, + atGradOutput.unsqueeze(-2), + atInput.unsqueeze(-2), + {1, atKernelSize[0]}, + {1, atStride[0]}, + {0, atPadding[0]}, + {1, atDilation[0]}, + ceil_mode, + atIndices.unsqueeze(-2)); auto atGradInput = atGrad2d.squeeze(-2); impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); - // CALL_ATEN_FUNC( - // max_pool1d_with_indices_backward_out, atGradInput, atGradOutput, atInput, atKernelSize, atStride, atPadding, atDilation, ceil_mode, atIndices); - return diopiSuccess; } @@ -185,7 +189,15 @@ diopiError_t diopiAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_ at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); - auto atGrad2d = CALL_ATEN_FUNC(avg_pool2d_backward, atGradOutput.unsqueeze(-2), atInput.unsqueeze(-2), {1, atKernelSize[0]}, {1, atStride[0]}, {0, atPadding[0]}, ceil_mode, count_include_pad, c10::nullopt); + auto atGrad2d = CALL_ATEN_FUNC(avg_pool2d_backward, + atGradOutput.unsqueeze(-2), + atInput.unsqueeze(-2), + {1, atKernelSize[0]}, + {1, atStride[0]}, + {0, atPadding[0]}, + ceil_mode, + count_include_pad, + c10::nullopt); auto atGradInput = atGrad2d.squeeze(-2); @@ -224,7 +236,7 @@ diopiError_t diopiAdaptiveMaxPool1dBackward(diopiContextHandle_t ctx, diopiTenso auto atGradOutput = impl::aten::buildATen(grad_output); auto atIndices = impl::aten::buildATen(indices); auto atGrad2d = CALL_ATEN_FUNC(adaptive_max_pool2d_backward, atGradOutput.unsqueeze(-2), atInput.unsqueeze(-2), atIndices.squeeze(-2)); - auto atGradInput = atGrad2d.squeeze(-2); + auto atGradInput = atGrad2d.squeeze(-2); impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); @@ -255,37 +267,38 @@ diopiError_t diopiAdaptiveAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_ return diopiSuccess; } -diopiError_t diopiPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size) { - impl::aten::setCurStream(ctx); +diopiError_t diopiPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, + diopiSize_t output_size) { + impl::aten::setCurStream(ctx); - if (adaptive == false && strcmp(mode, "max") == 0) { - return impl::cuda::diopiMaxPool1d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode); - } else if (adaptive == false && strcmp(mode, "avg") == 0) { - return impl::cuda::diopiAvgPool1d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive); - } else if (adaptive == true && strcmp(mode, "max") == 0) { - return impl::cuda::diopiAdaptiveMaxPool1d(ctx, out, input, output_size); - } else { - return impl::cuda::diopiAdaptiveAvgPool1d(ctx, out, input, output_size); - } - + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool1d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool1d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool1d(ctx, out, input, output_size); + } else { + return impl::cuda::diopiAdaptiveAvgPool1d(ctx, out, input, output_size); + } } -diopiError_t diopiPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) { - impl::aten::setCurStream(ctx); +diopiError_t diopiPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, + const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, + const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) { + impl::aten::setCurStream(ctx); - if (adaptive == false && strcmp(mode, "max") == 0) { - return impl::cuda::diopiMaxPool1dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices); - } else if (adaptive == false && strcmp(mode, "avg") == 0) { - return impl::cuda::diopiAvgPool1dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive); - } else if (adaptive == true && strcmp(mode, "max") == 0) { - return impl::cuda::diopiAdaptiveMaxPool1dBackward(ctx, grad_input, grad_output, input, indices); - } else { - return impl::cuda::diopiAdaptiveAvgPool1dBackward(ctx, grad_input, grad_output, input); - } - + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool1dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool1dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool1dBackward(ctx, grad_input, grad_output, input, indices); + } else { + return impl::cuda::diopiAdaptiveAvgPool1dBackward(ctx, grad_input, grad_output, input); + } } - diopiError_t diopiMaxPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) { impl::aten::setCurStream(ctx); @@ -301,7 +314,6 @@ diopiError_t diopiMaxPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, d return diopiSuccess; } - diopiError_t diopiMaxPool2dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) { impl::aten::setCurStream(ctx); @@ -2298,34 +2310,36 @@ diopiError_t diopiAvgPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, d return diopiSuccess; } -diopiError_t diopiPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size) { - impl::aten::setCurStream(ctx); +diopiError_t diopiPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, + diopiSize_t output_size) { + impl::aten::setCurStream(ctx); - if (adaptive == false && strcmp(mode, "max") == 0) { - return impl::cuda::diopiMaxPool2d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode); - } else if (adaptive == false && strcmp(mode, "avg") == 0) { - return impl::cuda::diopiAvgPool2d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); - } else if (adaptive == true && strcmp(mode, "max") == 0) { - return impl::cuda::diopiAdaptiveMaxPool2d(ctx, out, input, output_size); - } else { - return impl::cuda::diopiAdaptiveAvgPool2d(ctx, out, input, output_size); - } - + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool2d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool2d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool2d(ctx, out, input, output_size); + } else { + return impl::cuda::diopiAdaptiveAvgPool2d(ctx, out, input, output_size); + } } -diopiError_t diopiPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) { - impl::aten::setCurStream(ctx); +diopiError_t diopiPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, + const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, + const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) { + impl::aten::setCurStream(ctx); - if (adaptive == false && strcmp(mode, "max") == 0) { - return impl::cuda::diopiMaxPool2dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices); - } else if (adaptive == false && strcmp(mode, "avg") == 0) { - return impl::cuda::diopiAvgPool2dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); - } else if (adaptive == true && strcmp(mode, "max") == 0) { - return impl::cuda::diopiAdaptiveMaxPool2dBackward(ctx, grad_input, grad_output, input, indices); - } else { - return impl::cuda::diopiAdaptiveAvgPool2dBackward(ctx, grad_input, grad_output, input); - } - + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool2dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool2dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool2dBackward(ctx, grad_input, grad_output, input, indices); + } else { + return impl::cuda::diopiAdaptiveAvgPool2dBackward(ctx, grad_input, grad_output, input); + } } diopiError_t diopiDropout(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t mask, diopiConstTensorHandle_t input, double p, bool train, @@ -3928,84 +3942,75 @@ diopiError_t diopiNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiC return diopiSuccess; } -at::Tensor unsqueeze_multiple( - const at::Tensor& t, - at::OptionalIntArrayRef opt_dim, - size_t n_dims) { - if (opt_dim.has_value()) { - at::IntArrayRef dim = opt_dim.value(); - auto dim_size = dim.size(); - // Optimisation for two common cases - if (dim_size == 0) { - return t; - } else if (dim_size == 1) { - return t.unsqueeze(dim[0]); +at::Tensor unsqueeze_multiple(const at::Tensor& t, at::OptionalIntArrayRef opt_dim, size_t n_dims) { + if (opt_dim.has_value()) { + at::IntArrayRef dim = opt_dim.value(); + auto dim_size = dim.size(); + // Optimisation for two common cases + if (dim_size == 0) { + return t; + } else if (dim_size == 1) { + return t.unsqueeze(dim[0]); + } } - } - auto dims_to_unsqueeze = at::dim_list_to_bitset(opt_dim, n_dims); - at::Tensor res = t; - for (const auto i : c10::irange(n_dims)) { - if (dims_to_unsqueeze[i]) { - res = res.unsqueeze(i); + auto dims_to_unsqueeze = at::dim_list_to_bitset(opt_dim, n_dims); + at::Tensor res = t; + for (const auto i : c10::irange(n_dims)) { + if (dims_to_unsqueeze[i]) { + res = res.unsqueeze(i); + } + } + return res; +} + +at::Tensor norm_backward(at::Tensor grad, const at::Tensor& self, const std::optional& p_, at::Tensor norm, at::IntArrayRef dim, bool keepdim) { + // NB: We mask fill the NaNs in the output to be zero but still do float + // division + // by zero, which ASAN complains about. One way to appease ASAN is to fill + // the problematic values with something arbitrary before the division, + // but we decide not to due to the perf hit. Instead we just silence ASAN + // where necessary + size_t ndim = self.dim(); + double p = p_.value_or(2.0).toDouble(); + at::Tensor self_scaled; + at::Tensor scale_v; + + if (!keepdim && self.dim() != 0) { + grad = unsqueeze_multiple(grad, dim, ndim); + norm = unsqueeze_multiple(norm, dim, ndim); + } + + if (p == 0.0) { + return {}; + } else if (p == 1.0) { + return self.sgn() * grad; + } else if (p == 2.0) { + return grad * (self / norm).masked_fill_(norm == 0, 0); + } else if (std::isinf(p)) { + // Derivative of amax(abs(self), dim, keepdim) but respecting nans + // We create a mask of `argmax`: it's argmax if self.abs() == norm or it's + // NaN + auto self_abs = self.abs(); + auto mask = self_abs.eq(norm).logical_or(self_abs.isnan()); + return self.sgn() * ((grad / mask.sum(dim, true)) * mask); + } else if (p < 1.0) { + self_scaled = self.sgn() * self.abs().pow_(p - 1).masked_fill_(self == 0, 0); + return self_scaled * grad * norm.pow(1 - p); + } else if (p < 2.0) { + self_scaled = self.sgn() * self.abs().pow_(p - 1); + scale_v = grad / norm.pow(p - 1); + scale_v.masked_fill_(norm == 0, 0); + return self_scaled * scale_v; + } else { + self_scaled = self * self.abs().pow_(p - 2); + scale_v = grad / norm.pow(p - 1); + scale_v.masked_fill_(norm == 0, 0); + return self_scaled * scale_v; } - } - return res; -} - -at::Tensor norm_backward( - at::Tensor grad, - const at::Tensor& self, - const std::optional& p_, - at::Tensor norm, - at::IntArrayRef dim, - bool keepdim) { - // NB: We mask fill the NaNs in the output to be zero but still do float - // division - // by zero, which ASAN complains about. One way to appease ASAN is to fill - // the problematic values with something arbitrary before the division, - // but we decide not to due to the perf hit. Instead we just silence ASAN - // where necessary - size_t ndim = self.dim(); - double p = p_.value_or(2.0).toDouble(); - at::Tensor self_scaled; - at::Tensor scale_v; - - if (!keepdim && self.dim() != 0) { - grad = unsqueeze_multiple(grad, dim, ndim); - norm = unsqueeze_multiple(norm, dim, ndim); - } - - if (p == 0.0) { - return {}; - } else if (p == 1.0) { - return self.sgn() * grad; - } else if (p == 2.0) { - return grad * (self / norm).masked_fill_(norm == 0, 0); - } else if (std::isinf(p)) { - // Derivative of amax(abs(self), dim, keepdim) but respecting nans - // We create a mask of `argmax`: it's argmax if self.abs() == norm or it's - // NaN - auto self_abs = self.abs(); - auto mask = self_abs.eq(norm).logical_or(self_abs.isnan()); - return self.sgn() * ((grad / mask.sum(dim, true)) * mask); - } else if (p < 1.0) { - self_scaled = - self.sgn() * self.abs().pow_(p - 1).masked_fill_(self == 0, 0); - return self_scaled * grad * norm.pow(1 - p); - } else if (p < 2.0) { - self_scaled = self.sgn() * self.abs().pow_(p - 1); - scale_v = grad / norm.pow(p - 1); - scale_v.masked_fill_(norm == 0, 0); - return self_scaled * scale_v; - } else { - self_scaled = self * self.abs().pow_(p - 2); - scale_v = grad / norm.pow(p - 1); - scale_v.masked_fill_(norm == 0, 0); - return self_scaled * scale_v; - } -} - -diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t self, diopiConstTensorHandle_t norm, diopiSize_t dim, const diopiScalar_t* p) { +} + +diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t self, + diopiConstTensorHandle_t norm, diopiSize_t dim, const diopiScalar_t* p) { impl::aten::setCurStream(ctx); auto atGradOutput = impl::aten::buildATen(grad_output); auto atSelf = impl::aten::buildATen(self); @@ -4024,21 +4029,16 @@ diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gra if (!atGradInput.defined()) { return diopiSuccess; } - - impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); + + impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); return diopiSuccess; } - - /* -diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t grad_input, diopiConstTensorHandle_t result, const diopiScalar_t* p, diopiSize_t dim) { - impl::aten::setCurStream(ctx); - auto atInput = impl::aten::buildATen(input); - auto atGradInput = impl::aten::buildATen(grad_input); - auto atP = impl::aten::buildAtScalar(p); - auto atResult = impl::aten::buildATen(result); - at::IntArrayRef atDim = impl::aten::buildAtIntArray(dim); +diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t grad_input, +diopiConstTensorHandle_t result, const diopiScalar_t* p, diopiSize_t dim) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); auto +atGradInput = impl::aten::buildATen(grad_input); auto atP = impl::aten::buildAtScalar(p); auto atResult = impl::aten::buildATen(result); at::IntArrayRef atDim = +impl::aten::buildAtIntArray(dim); bool keepdim = true; auto atGradOutput = torch::autograd::generated::details::norm_backward(atGradInput, atInput, atP, atResult, atDim, keepdim); @@ -4164,8 +4164,9 @@ diopiError_t diopiLayerNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, d return diopiSuccess; } -diopiError_t diopiLayerNormGB(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t running_mean, diopiTensorHandle_t running_var, diopiConstTensorHandle_t input, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps, const int64_t begin_norm_axis) { - +diopiError_t diopiLayerNormGB(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t running_mean, diopiTensorHandle_t running_var, + diopiConstTensorHandle_t input, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps, + const int64_t begin_norm_axis) { impl::aten::setCurStream(ctx); auto atOut = impl::aten::buildATen(out); @@ -4173,17 +4174,14 @@ diopiError_t diopiLayerNormGB(diopiContextHandle_t ctx, diopiTensorHandle_t out, auto atVar = impl::aten::buildATen(running_var); auto atInput = impl::aten::buildATen(input); - + DIOPI_IMPL_BUILD_ATEN_OPTIONAL(atBias, bias); DIOPI_IMPL_BUILD_ATEN_OPTIONAL(atScale, scale); - at::IntArrayRef atNormalizedShape(atInput.sizes().begin() + begin_norm_axis, atInput.sizes().end()); - diopi_tensor_list vecOut = {out, running_mean, running_var}; - auto Out = CALL_ATEN_CUDA_FUNC(native_layer_norm, atInput, atNormalizedShape, atScale, atBias, eps); impl::aten::updateATen2Tensor(ctx, Out, vecOut); @@ -4191,7 +4189,6 @@ diopiError_t diopiLayerNormGB(diopiContextHandle_t ctx, diopiTensorHandle_t out, return diopiSuccess; } - diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiConstTensorHandle_t mean, diopiConstTensorHandle_t rstd, diopiSize_t normalized_shape) { @@ -4240,7 +4237,10 @@ diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_ return diopiSuccess; } -diopiError_t diopiLayerNormGBBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiConstTensorHandle_t running_mean, diopiConstTensorHandle_t running_std, const int64_t begin_norm_axis) { +diopiError_t diopiLayerNormGBBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias, + diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, + diopiConstTensorHandle_t bias, diopiConstTensorHandle_t running_mean, diopiConstTensorHandle_t running_std, + const int64_t begin_norm_axis) { impl::aten::setCurStream(ctx); diopiDtype_t mDtype, rDtype; if (running_std) { @@ -4288,128 +4288,131 @@ diopiError_t diopiLayerNormGBBackward(diopiContextHandle_t ctx, diopiTensorHandl return diopiSuccess; } -diopiError_t diopiInstanceNorm(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const int64_t axis, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps) { - impl::aten::setCurStream(ctx); +diopiError_t diopiInstanceNorm(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const int64_t axis, + diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps) { + impl::aten::setCurStream(ctx); - auto atInput = impl::aten::buildATen(input); - auto atScale = impl::aten::buildATen(scale); - auto atBias = impl::aten::buildATen(bias); - auto input_size = atInput.sizes().vec(); + auto atInput = impl::aten::buildATen(input); + auto atScale = impl::aten::buildATen(scale); + auto atBias = impl::aten::buildATen(bias); + auto input_size = atInput.sizes().vec(); - std::vector reshaped_size = {1}; - int64_t shape = 1; - for (int i = 0; i < std::min(axis, (int64_t)input_size.size()); i++) { - shape = shape * input_size[i]; - } - reshaped_size.push_back(shape); - for (int i = axis; i < input_size.size(); i++) { - reshaped_size.push_back(input_size[i]); - } + std::vector reshaped_size = {1}; + int64_t shape = 1; + for (int i = 0; i < std::min(axis, (int64_t)input_size.size()); i++) { + shape = shape * input_size[i]; + } + reshaped_size.push_back(shape); + for (int i = axis; i < input_size.size(); i++) { + reshaped_size.push_back(input_size[i]); + } - auto atInputReshaped = atInput.contiguous().view(reshaped_size); - auto atScale_ = atScale.repeat(input_size[0]); - auto atBias_ = atBias.repeat(input_size[0]); + auto atInputReshaped = atInput.contiguous().view(reshaped_size); + auto atScale_ = atScale.repeat(input_size[0]); + auto atBias_ = atBias.repeat(input_size[0]); - // auto atRunningMean = torch::empty({reshaped_size[1]}, torch::TensorOptions().device(torch::kCUDA)); - // auto atRunningStd = torch::empty({reshaped_size[1]}, torch::TensorOptions().device(torch::kCUDA)); - + // auto atRunningMean = torch::empty({reshaped_size[1]}, torch::TensorOptions().device(torch::kCUDA)); + // auto atRunningStd = torch::empty({reshaped_size[1]}, torch::TensorOptions().device(torch::kCUDA)); - auto atOutput = CALL_ATEN_FUNC(batch_norm, atInputReshaped, atScale_, atBias_, c10::nullopt, c10::nullopt, true, 0.0, eps, false); - impl::aten::updateATen2Tensor(ctx, atOutput, output); - return diopiSuccess; + auto atOutput = CALL_ATEN_FUNC(batch_norm, atInputReshaped, atScale_, atBias_, c10::nullopt, c10::nullopt, true, 0.0, eps, false); + impl::aten::updateATen2Tensor(ctx, atOutput, output); + return diopiSuccess; } -diopiError_t diopiInstanceNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_scale, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const int64_t axis, const double eps) { - impl::aten::setCurStream(ctx); +diopiError_t diopiInstanceNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_scale, diopiTensorHandle_t grad_bias, + diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t scale, + diopiConstTensorHandle_t bias, const int64_t axis, const double eps) { + impl::aten::setCurStream(ctx); - auto atGradOutput = impl::aten::buildATen(grad_output); - auto atInput = impl::aten::buildATen(input); - auto atScale = impl::aten::buildATen(scale); - auto atBias = impl::aten::buildATen(bias); - auto input_size = atInput.sizes().vec(); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + auto atScale = impl::aten::buildATen(scale); + auto atBias = impl::aten::buildATen(bias); + auto input_size = atInput.sizes().vec(); - auto atScale_ = atScale.repeat(input_size[0]); + auto atScale_ = atScale.repeat(input_size[0]); - std::vector reshaped_size = {1}; - int64_t shape = 1; - for (int i = 0; i < std::min(axis, (int64_t)input_size.size()); i++) { - shape = shape * input_size[i]; - } - reshaped_size.push_back(shape); - for (int i = axis; i < input_size.size(); i++) { - reshaped_size.push_back(input_size[i]); - } + std::vector reshaped_size = {1}; + int64_t shape = 1; + for (int i = 0; i < std::min(axis, (int64_t)input_size.size()); i++) { + shape = shape * input_size[i]; + } + reshaped_size.push_back(shape); + for (int i = axis; i < input_size.size(); i++) { + reshaped_size.push_back(input_size[i]); + } - std::vector mean_dim = {0}; - for (int i = 2; i < reshaped_size.size(); i++) { - mean_dim.push_back(i); - } + std::vector mean_dim = {0}; + for (int i = 2; i < reshaped_size.size(); i++) { + mean_dim.push_back(i); + } - auto atInputReshaped = atInput.contiguous().view(reshaped_size); - auto atGradOutputReshaped = atGradOutput.contiguous().view(reshaped_size); + auto atInputReshaped = atInput.contiguous().view(reshaped_size); + auto atGradOutputReshaped = atGradOutput.contiguous().view(reshaped_size); - auto atMean = torch::mean(atInputReshaped, mean_dim); - auto atStd = torch::std(atInputReshaped, mean_dim); + auto atMean = torch::mean(atInputReshaped, mean_dim); + auto atStd = torch::std(atInputReshaped, mean_dim); - auto grad_input_mask = std::array{grad_input != nullptr, grad_scale != nullptr, grad_bias != nullptr}; + auto grad_input_mask = std::array{grad_input != nullptr, grad_scale != nullptr, grad_bias != nullptr}; - auto atOut = at::native_batch_norm_backward(atGradOutputReshaped, atInputReshaped, atScale_, c10::nullopt, c10::nullopt, atMean, atStd, true, eps, grad_input_mask); + auto atOut = + at::native_batch_norm_backward(atGradOutputReshaped, atInputReshaped, atScale_, c10::nullopt, c10::nullopt, atMean, atStd, true, eps, grad_input_mask); - if (grad_input) { - impl::aten::updateATen2Tensor(ctx, std::get<0>(atOut), grad_input); - } - if (grad_scale) { - auto atGradScaleOrigin = torch::zeros_like(atScale); - for (int i = 0; i < std::get<1>(atOut).size(0); i++) { - atGradScaleOrigin[i % (atScale_.size(0) / input_size[0])] += std::get<1>(atOut)[i]; + if (grad_input) { + impl::aten::updateATen2Tensor(ctx, std::get<0>(atOut), grad_input); } - impl::aten::updateATen2Tensor(ctx, atGradScaleOrigin, grad_scale); - } - if (grad_bias) { - auto atGradBiasOrigin = torch::zeros_like(atBias); - for (int i = 0; i < std::get<2>(atOut).size(0); i++) { - atGradBiasOrigin[i % (atScale_.size(0) / input_size[0])] += std::get<2>(atOut)[i]; + if (grad_scale) { + auto atGradScaleOrigin = torch::zeros_like(atScale); + for (int i = 0; i < std::get<1>(atOut).size(0); i++) { + atGradScaleOrigin[i % (atScale_.size(0) / input_size[0])] += std::get<1>(atOut)[i]; + } + impl::aten::updateATen2Tensor(ctx, atGradScaleOrigin, grad_scale); + } + if (grad_bias) { + auto atGradBiasOrigin = torch::zeros_like(atBias); + for (int i = 0; i < std::get<2>(atOut).size(0); i++) { + atGradBiasOrigin[i % (atScale_.size(0) / input_size[0])] += std::get<2>(atOut)[i]; + } + impl::aten::updateATen2Tensor(ctx, atGradBiasOrigin, grad_bias); } - impl::aten::updateATen2Tensor(ctx, atGradBiasOrigin, grad_bias); - } - - return diopiSuccess; + return diopiSuccess; } -diopiError_t diopiNormalize(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps) { - impl::aten::setCurStream(ctx); +diopiError_t diopiNormalize(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const float p, const int64_t axis, + const double eps) { + impl::aten::setCurStream(ctx); - auto atInput = impl::aten::buildATen(input); + auto atInput = impl::aten::buildATen(input); - auto atOut = impl::aten::buildATen(output); + auto atOut = impl::aten::buildATen(output); - auto atDenom = atInput.norm(p, axis, true).clamp_min(eps).expand_as(atInput); + auto atDenom = atInput.norm(p, axis, true).clamp_min(eps).expand_as(atInput); - CALL_ATEN_FUNC(div_out, atOut, atInput, atDenom); + CALL_ATEN_FUNC(div_out, atOut, atInput, atDenom); - return diopiSuccess; + return diopiSuccess; } -diopiError_t diopiNormalizeBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps) { - impl::aten::setCurStream(ctx); +diopiError_t diopiNormalizeBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps) { + impl::aten::setCurStream(ctx); - auto atGradOutput = impl::aten::buildATen(grad_output); - auto atInput = impl::aten::buildATen(input); - auto atNorm = atInput.norm(p, axis, true); - auto atClamp = atNorm.clamp_min(eps); - auto atDenom = atClamp.expand_as(atInput); - auto atGradDenom = atInput * (-1 / atDenom / atDenom) * atGradOutput; - auto atGradClamp = atGradDenom.sum(axis, true); - auto atGradNorm = atGradClamp.masked_fill_(atNorm < eps, 0); - auto atGradOriginInput = norm_backward(atGradNorm, atInput, p, atNorm, axis, true); - auto atGradInput = (1 / atDenom) * atGradOutput + atGradOriginInput; + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + auto atNorm = atInput.norm(p, axis, true); + auto atClamp = atNorm.clamp_min(eps); + auto atDenom = atClamp.expand_as(atInput); + auto atGradDenom = atInput * (-1 / atDenom / atDenom) * atGradOutput; + auto atGradClamp = atGradDenom.sum(axis, true); + auto atGradNorm = atGradClamp.masked_fill_(atNorm < eps, 0); + auto atGradOriginInput = norm_backward(atGradNorm, atInput, p, atNorm, axis, true); + auto atGradInput = (1 / atDenom) * atGradOutput + atGradOriginInput; - impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); - return diopiSuccess; + impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); + return diopiSuccess; } - diopiError_t diopiAdaptiveAvgPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); @@ -4514,61 +4517,67 @@ diopiError_t diopiMaxPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_ return diopiSuccess; } -diopiError_t diopiAvgPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override) { - impl::aten::setCurStream(ctx); +diopiError_t diopiAvgPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, + diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override) { + impl::aten::setCurStream(ctx); - auto atInput = impl::aten::buildATen(input); - at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); - at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); - at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); - c10::optional atDivisorOverride = divisor_override ? c10::optional(*divisor_override) : c10::nullopt; - auto atOut = impl::aten::buildATen(out); - CALL_ATEN_CUDA_FUNC(avg_pool3d_out, atOut, atInput, atKernelSize, atStride, atPadding, ceil_mode, count_include_pad, atDivisorOverride); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + c10::optional atDivisorOverride = divisor_override ? c10::optional(*divisor_override) : c10::nullopt; + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(avg_pool3d_out, atOut, atInput, atKernelSize, atStride, atPadding, ceil_mode, count_include_pad, atDivisorOverride); - return diopiSuccess; + return diopiSuccess; } -diopiError_t diopiAvgPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override) { - impl::aten::setCurStream(ctx); - auto atGradOutput = impl::aten::buildATen(grad_output); - auto atInput = impl::aten::buildATen(input); - at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); - at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); - at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); - c10::optional atDivisorOverride = divisor_override ? c10::optional(*divisor_override) : c10::nullopt; - auto atGradInput = impl::aten::buildATen(grad_input); - CALL_ATEN_CUDA_FUNC(avg_pool3d_backward_out, atGradInput, atGradOutput, atInput, atKernelSize, atStride, atPadding, ceil_mode, count_include_pad, atDivisorOverride); - return diopiSuccess; +diopiError_t diopiAvgPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, + bool count_include_pad, const int64_t* divisor_override) { + impl::aten::setCurStream(ctx); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + c10::optional atDivisorOverride = divisor_override ? c10::optional(*divisor_override) : c10::nullopt; + auto atGradInput = impl::aten::buildATen(grad_input); + CALL_ATEN_CUDA_FUNC( + avg_pool3d_backward_out, atGradInput, atGradOutput, atInput, atKernelSize, atStride, atPadding, ceil_mode, count_include_pad, atDivisorOverride); + return diopiSuccess; } -diopiError_t diopiPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size) { - impl::aten::setCurStream(ctx); +diopiError_t diopiPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, + diopiSize_t output_size) { + impl::aten::setCurStream(ctx); - if (adaptive == false && strcmp(mode, "max") == 0) { - return impl::cuda::diopiMaxPool3d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode); - } else if (adaptive == false && strcmp(mode, "avg") == 0) { - return impl::cuda::diopiAvgPool3d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); - } else if (adaptive == true && strcmp(mode, "max") == 0) { - return impl::cuda::diopiAdaptiveMaxPool3d(ctx, out, input, output_size); - } else { - return impl::cuda::diopiAdaptiveAvgPool3d(ctx, out, input, output_size); - } - + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool3d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool3d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool3d(ctx, out, input, output_size); + } else { + return impl::cuda::diopiAdaptiveAvgPool3d(ctx, out, input, output_size); + } } -diopiError_t diopiPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) { - impl::aten::setCurStream(ctx); +diopiError_t diopiPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, + const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, + const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) { + impl::aten::setCurStream(ctx); - if (adaptive == false && strcmp(mode, "max") == 0) { - return impl::cuda::diopiMaxPool3dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices); - } else if (adaptive == false && strcmp(mode, "avg") == 0) { - return impl::cuda::diopiAvgPool3dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); - } else if (adaptive == true && strcmp(mode, "max") == 0) { - return impl::cuda::diopiAdaptiveMaxPool3dBackward(ctx, grad_input, grad_output, input, indices); - } else { - return impl::cuda::diopiAdaptiveAvgPool3dBackward(ctx, grad_input, grad_output, input); - } - + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool3dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool3dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool3dBackward(ctx, grad_input, grad_output, input, indices); + } else { + return impl::cuda::diopiAdaptiveAvgPool3dBackward(ctx, grad_input, grad_output, input); + } } diopiError_t diopiPermute(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t dims) { diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index 569771aec..7fc12f773 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -302,39 +302,44 @@ DIOPI_API diopiError_t diopiLeakyReluInp(diopiContextHandle_t ctx, diopiTensorHa DIOPI_API diopiError_t diopiLeakyReluBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const diopiScalar_t* negative_slope, bool input_is_result); -DIOPI_API diopiError_t diopiMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, - diopiSize_t padding, diopiSize_t dilation, bool ceil_mode); +DIOPI_API diopiError_t diopiMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode); DIOPI_API diopiError_t diopiMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, - diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, - bool ceil_mode, diopiConstTensorHandle_t indices); + diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, + diopiSize_t dilation, bool ceil_mode, diopiConstTensorHandle_t indices); DIOPI_API diopiError_t diopiMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, - diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode); + diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode); -DIOPI_API diopiError_t diopiAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, - diopiSize_t padding, bool ceil_mode, bool count_include_pad); +DIOPI_API diopiError_t diopiAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, + diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad); DIOPI_API diopiError_t diopiAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, - diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, - bool count_include_pad); + diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, + bool count_include_pad); DIOPI_API diopiError_t diopiAdaptiveMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size); -DIOPI_API diopiError_t diopiAdaptiveMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, - diopiSize_t output_size); +DIOPI_API diopiError_t diopiAdaptiveMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, + diopiConstTensorHandle_t input, diopiSize_t output_size); DIOPI_API diopiError_t diopiAdaptiveMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, - diopiConstTensorHandle_t input, diopiConstTensorHandle_t indices); + diopiConstTensorHandle_t input, diopiConstTensorHandle_t indices); DIOPI_API diopiError_t diopiAdaptiveAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, - diopiConstTensorHandle_t input); + diopiConstTensorHandle_t input); DIOPI_API diopiError_t diopiAdaptiveAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size); -DIOPI_API diopiError_t diopiPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size); +DIOPI_API diopiError_t diopiPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, + const bool adaptive, diopiSize_t output_size); -DIOPI_API diopiError_t diopiPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices); +DIOPI_API diopiError_t diopiPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, + diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, + diopiConstTensorHandle_t indices); /** * @brief Applies 2D average-pooling operation in kH×kW regions by step size sH×sW steps. * @param[in] ctx Context environment. @@ -421,9 +426,14 @@ DIOPI_API diopiError_t diopiMaxPool2dBackward(diopiContextHandle_t ctx, diopiTen diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode, diopiConstTensorHandle_t indices); -DIOPI_API diopiError_t diopiPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size); +DIOPI_API diopiError_t diopiPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, + const bool adaptive, diopiSize_t output_size); -DIOPI_API diopiError_t diopiPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices); +DIOPI_API diopiError_t diopiPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, + diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, + diopiConstTensorHandle_t indices); /** * @brief Applies a 2D adaptive average pooling over an input signal composed of several input planes. @@ -2944,9 +2954,12 @@ DIOPI_API diopiError_t diopiConvolution3dBackward(diopiContextHandle_t ctx, diop diopiConstTensorHandle_t weight, diopiSize_t* bias_sizes, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, int64_t groups); -DIOPI_API diopiError_t diopiAvgPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override); +DIOPI_API diopiError_t diopiAvgPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, + diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override); -DIOPI_API diopiError_t diopiAvgPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override); +DIOPI_API diopiError_t diopiAvgPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, + bool count_include_pad, const int64_t* divisor_override); /** * \brief Applies a 3D max pooling over an input signal composed of several input planes. * @param[in] ctx Context environment. @@ -3043,10 +3056,14 @@ DIOPI_API diopiError_t diopiAdaptiveMaxPool3dWithIndices(diopiContextHandle_t ct DIOPI_API diopiError_t diopiAdaptiveMaxPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t indices); -DIOPI_API diopiError_t diopiPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size); - -DIOPI_API diopiError_t diopiPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices); +DIOPI_API diopiError_t diopiPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, + const bool adaptive, diopiSize_t output_size); +DIOPI_API diopiError_t diopiPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, + diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, + diopiConstTensorHandle_t indices); /** * \brief Returns a new 1-D tensor which indexes the input tensor according to the boolean mask. @@ -3212,7 +3229,8 @@ DIOPI_API diopiError_t diopiFlip(diopiContextHandle_t ctx, diopiTensorHandle_t o */ DIOPI_API diopiError_t diopiNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* p, diopiSize_t dim); -DIOPI_API diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t self, diopiConstTensorHandle_t norm, diopiSize_t dim, const diopiScalar_t* p); +DIOPI_API diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t self, diopiConstTensorHandle_t norm, diopiSize_t dim, const diopiScalar_t* p); /** * * @@ -3220,7 +3238,9 @@ DIOPI_API diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHa * * */ -DIOPI_API diopiError_t diopiLayerNormGB(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t running_mean, diopiTensorHandle_t running_var, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, const double eps, const int64_t begin_norm_axis); +DIOPI_API diopiError_t diopiLayerNormGB(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t running_mean, diopiTensorHandle_t running_var, + diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, const double eps, + const int64_t begin_norm_axis); /** * @brief Returns the matrix norm or vector norm of a given tensor list. @@ -3650,17 +3670,23 @@ DIOPI_API diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTen diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiConstTensorHandle_t mean, diopiConstTensorHandle_t rstd, diopiSize_t normalized_shape); -DIOPI_API diopiError_t diopiLayerNormGBBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiConstTensorHandle_t running_mean, diopiConstTensorHandle_t running_std, const int64_t begin_norm_axis); - - -DIOPI_API diopiError_t diopiInstanceNorm(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const int64_t axis, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps); +DIOPI_API diopiError_t diopiLayerNormGBBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, + diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, + diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiConstTensorHandle_t running_mean, + diopiConstTensorHandle_t running_std, const int64_t begin_norm_axis); -DIOPI_API diopiError_t diopiInstanceNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_scale, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const int64_t axis, const double eps); +DIOPI_API diopiError_t diopiInstanceNorm(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const int64_t axis, + diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps); -DIOPI_API diopiError_t diopiNormalize(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps); +DIOPI_API diopiError_t diopiInstanceNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_scale, + diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, + diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const int64_t axis, const double eps); -DIOPI_API diopiError_t diopiNormalizeBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps); +DIOPI_API diopiError_t diopiNormalize(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const float p, const int64_t axis, + const double eps); +DIOPI_API diopiError_t diopiNormalizeBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps); /** * @brief Copies the elements from src into dest tensor. From c75f57fb8e4edda13017633d44f0ed0e86dc0634 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Wed, 16 Oct 2024 19:21:15 +0800 Subject: [PATCH 27/30] remove conflicted layer_norm --- diopi_test/python/conformance/diopi_functions.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index fe47dae59..452e7c452 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -8062,14 +8062,3 @@ def spmm(input, mat2) -> Tensor: ret = func(input.context(), out, input, mat2) check_returncode(ret) return out - - -def layer_norm(input, axis, weight, bias, eps): - out = raw_like(input) - running_mean = raw_like(input) - running_var = raw_like(input) - func = check_function("diopiLayerNorm") - ret = func(input.context(), out, running_mean, running_var, input, axis, weight, bias, eps) - check_returncode(ret) - return out, running_mean, running_var - From 68aa0be0d8ee7c2ca42ca1d66972db8e488bf577 Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Thu, 17 Oct 2024 14:43:59 +0800 Subject: [PATCH 28/30] finish comment and fix fused_adam caused by previously merge --- impl/torch/functions/functions.cpp | 2 + proto/include/diopi/functions.h | 334 ++++++++++++++++++++++++++++- 2 files changed, 326 insertions(+), 10 deletions(-) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index e4fd89d24..1d7ac869e 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -3382,6 +3382,8 @@ diopiError_t diopiFusedAdamW(diopiContextHandle_t ctx, diopiTensorHandle_t* para std::vector tensorList; CALL_ATEN_CUDA_FUNC(_fused_adamw_, atParam, atGrad, atExpAvg, atExpAvgSq, tensorList, atstep, lr, beta1, beta2, weight_decay, eps, amsgrad, maximize); } + + return diopiSuccess; } diopiError_t diopiAdamW(diopiContextHandle_t ctx, diopiTensorHandle_t param, diopiConstTensorHandle_t grad, diopiTensorHandle_t exp_avg, diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index 7fc12f773..bc813ad0c 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -302,44 +302,172 @@ DIOPI_API diopiError_t diopiLeakyReluInp(diopiContextHandle_t ctx, diopiTensorHa DIOPI_API diopiError_t diopiLeakyReluBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const diopiScalar_t* negative_slope, bool input_is_result); +/** + * @brief Performs 1D max pooling on the input tensor. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after max pooling. + * @param[in] input the input tensor. + * @param[in] kernel_size the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + */ DIOPI_API diopiError_t diopiMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode); +/** + * @brief Performs the backward pass for diopiMaxPool1d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] kernel_size the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] indices the tensor storing the indices of max elements from the forward pass. + */ DIOPI_API diopiError_t diopiMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode, diopiConstTensorHandle_t indices); +/** + * @brief Performs 1D max pooling on the input tensor and returns the indices of max elements. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after max pooling. + * @param[out] indices the tensor storing the indices of max elements. + * @param[in] input the input tensor. + * @param[in] kernel_size the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + */ DIOPI_API diopiError_t diopiMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode); +/** + * @brief Performs 1D average pooling on the input tensor. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after average pooling. + * @param[in] input the input tensor. + * @param[in] kernel_size the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] count_include_pad whether to include padding in the count for averaging. + */ DIOPI_API diopiError_t diopiAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad); +/** + * @brief Performs the backward pass for diopiAvgPool1d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] kernel_size the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] count_include_pad whether to include padding in the count for averaging. + */ DIOPI_API diopiError_t diopiAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad); +/** + * @brief Performs 1D adaptive max pooling on the input tensor. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after adaptive max pooling. + * @param[in] input the input tensor. + * @param[in] output_size the size of the output after pooling. + */ DIOPI_API diopiError_t diopiAdaptiveMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size); +/** + * @brief Performs 1D adaptive max pooling on the input tensor and returns the indices of max elements. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after adaptive max pooling. + * @param[out] indices the tensor storing the indices of max elements. + * @param[in] input the input tensor. + * @param[in] output_size the size of the output after pooling. + */ DIOPI_API diopiError_t diopiAdaptiveMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, diopiSize_t output_size); +/** + * @brief Performs the backward pass for diopiAdaptiveMaxPool1d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] indices the tensor storing the indices of max elements from the forward pass. + */ DIOPI_API diopiError_t diopiAdaptiveMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t indices); +/** + * @brief Performs the backward pass for diopiAdaptiveAvgPool1d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + */ DIOPI_API diopiError_t diopiAdaptiveAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input); +/** + * @brief Performs 1D adaptive average pooling on the input tensor. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after adaptive average pooling. + * @param[in] input the input tensor. + * @param[in] output_size the size of the output after pooling. + */ DIOPI_API diopiError_t diopiAdaptiveAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size); +/** + * @brief General 1D pooling operation with support for multiple pooling modes. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after pooling. + * @param[in] input the input tensor. + * @param[in] mode the pooling mode, such as "max" or "avg". + * @param[in] ksize the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] exclusive whether to exclude padding when averaging. + * @param[in] adaptive whether to use adaptive pooling. + * @param[in] output_size the size of the output after pooling (if adaptive pooling is used). + */ DIOPI_API diopiError_t diopiPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size); +/** + * @brief Performs the backward pass for diopiPool1d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] mode the pooling mode, such as "max" or "avg". + * @param[in] ksize the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] exclusive whether to exclude padding when averaging. + * @param[in] adaptive whether to use adaptive pooling. + * @param[in] indices the tensor storing the indices of max elements from the forward pass (if max pooling is used). + */ DIOPI_API diopiError_t diopiPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices); + /** * @brief Applies 2D average-pooling operation in kH×kW regions by step size sH×sW steps. * @param[in] ctx Context environment. @@ -425,11 +553,41 @@ DIOPI_API diopiError_t diopiMaxPool2dWithIndices(diopiContextHandle_t ctx, diopi DIOPI_API diopiError_t diopiMaxPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode, diopiConstTensorHandle_t indices); - +/** + * @brief General 2D pooling operation with support for multiple pooling modes. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after pooling. + * @param[in] input the input tensor. + * @param[in] mode the pooling mode, such as "max" or "avg". + * @param[in] ksize the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] exclusive whether to exclude padding when averaging. + * @param[in] adaptive whether to use adaptive pooling. + * @param[in] output_size the size of the output after pooling (if adaptive pooling is used). + */ DIOPI_API diopiError_t diopiPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size); +/** + * @brief Performs the backward pass for diopiPool2d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] mode the pooling mode, such as "max" or "avg". + * @param[in] ksize the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] exclusive whether to exclude padding when averaging. + * @param[in] adaptive whether to use adaptive pooling. + * @param[in] indices the tensor storing the indices of max elements from the forward pass (if max pooling is used). + */ DIOPI_API diopiError_t diopiPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, @@ -2735,10 +2893,40 @@ DIOPI_API diopiError_t diopiConvTranspose2dBackward(diopiContextHandle_t ctx, di diopiConstTensorHandle_t weight, diopiSize_t* bias_sizes, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, diopiSize_t output_padding, int64_t groups); +/** + * @brief Applies a 3D transposed convolution operator over an input image composed of several input planes, sometimes also called “deconvolution”. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float32, float16, float64]. + * @param[in] weight the weight tensor; dimension of kernel_size must match the number of input spatial dimensions. + * type = [float32, float16, float64]. + * @param[in] bias bias tensor. type = [float32, float16, float64]. + * @param[in] stride an array with dimension matching the number of input spatial dimensions. type = [int32, int64]. + * @param[in] padding an array with dimension matching the number of input spatial dimensions. type = [int32, int64]. + * @param[in] output_padding an array, dimension == number of input spatial dimensions; only supported when transposed is true. type = [int32, int64]. + * @param[in] dilation an array with dimension matching the number of input spatial dimensions. type = [int32, int64]. + * @param[in] groups number of groups for grouped convolution. type = [int64]. + * @param[out] out the result tensor. type = [float32, float16, float64]. + */ DIOPI_API diopiError_t diopiConvTranspose3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiSize_t stride, diopiSize_t padding, diopiSize_t output_padding, int64_t groups, diopiSize_t dilation); +/** + * @brief Backward pass for ConvTranspose3dBackward. Computes gradients for input, weight, and bias. + * @param[in] ctx Context environment. + * @param[in] grad_output the grad tensor of output. type = [float32, float16, float64]. + * @param[in] bias_sizes an array, indicates that a bias was used in the forward pass and contains the shape of the bias. type = [int32, int64]. + * @param[in] input the input tensor. type = [float32, float16, float64]. + * @param[in] weight the weight tensor; dimension of kernel_size must match the number of input spatial dimensions. + * @param[in] stride an array with dimension matching the number of input spatial dimensions. type = [int32, int64]. + * @param[in] padding an array with dimension matching the number of input spatial dimensions. type = [int32, int64]. + * @param[in] output_padding an array, dimension == number of input spatial dimensions; only supported when transposed is true. type = [int32, int64]. + * @param[in] dilation an array with dimension matching the number of input spatial dimensions. type = [int32, int64]. + * @param[in] groups number of groups for grouped convolution. type = [int64]. + * @param[out] grad_input the grad of input. type = [float32, float16, float64]. + * @param[out] grad_weight the grad of weight. type = [float32, float16, float64]. + * @param[out] grad_bias the grad of bias. type = [float32, float16, float64]. + */ DIOPI_API diopiError_t diopiConvTranspose3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiSize_t* bias_sizes, diopiSize_t stride, diopiSize_t padding, @@ -2953,13 +3141,38 @@ DIOPI_API diopiError_t diopiConvolution3dBackward(diopiContextHandle_t ctx, diop diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiSize_t* bias_sizes, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, int64_t groups); - +/** + * @brief Performs 3D average pooling operation. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after average pooling. + * @param[in] input the input tensor. + * @param[in] kernel_size the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] count_include_pad whether to include the zero-padding in the averaging calculation. + * @param[in] divisor_override if provided, it will be used as the divisor for averaging. + */ DIOPI_API diopiError_t diopiAvgPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override); +/** + * @brief Performs the backward pass for diopiAvgPool3d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] kernel_size the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] count_include_pad whether to include the zero-padding in the averaging calculation. + * @param[in] divisor_override if provided, it will be used as the divisor for averaging. + */ DIOPI_API diopiError_t diopiAvgPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override); + /** * \brief Applies a 3D max pooling over an input signal composed of several input planes. * @param[in] ctx Context environment. @@ -3056,10 +3269,41 @@ DIOPI_API diopiError_t diopiAdaptiveMaxPool3dWithIndices(diopiContextHandle_t ct DIOPI_API diopiError_t diopiAdaptiveMaxPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t indices); +/** + * @brief General 3D pooling operation with support for multiple pooling modes. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after pooling. + * @param[in] input the input tensor. + * @param[in] mode the pooling mode, such as "max" or "avg". + * @param[in] ksize the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] exclusive whether to exclude padding when averaging. + * @param[in] adaptive whether to use adaptive pooling. + * @param[in] output_size the size of the output after pooling (if adaptive pooling is used). + */ DIOPI_API diopiError_t diopiPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size); +/** + * @brief Performs the backward pass for diopiPool3d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] mode the pooling mode, such as "max" or "avg". + * @param[in] ksize the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] exclusive whether to exclude padding when averaging. + * @param[in] adaptive whether to use adaptive pooling. + * @param[in] indices the tensor storing the indices of max elements from the forward pass (if max pooling is used). + */ DIOPI_API diopiError_t diopiPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, @@ -3229,14 +3473,31 @@ DIOPI_API diopiError_t diopiFlip(diopiContextHandle_t ctx, diopiTensorHandle_t o */ DIOPI_API diopiError_t diopiNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* p, diopiSize_t dim); +/** + * @brief Compute the backward pass for diopiNorm(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[in] grad_output the grad tensor of output. type=[float32, float64, float16]. + * @param[in] self input tensor. type=[float32, float64, float16]. + * @param[in] norm norm tensor. type=[float32, float64, float16]. + * @param[in] dim Specifies which dimension or dimensions of input to calculate the norm across. + * @param[in] p an array, the order of norm. + * @param[out] grad_input the grad of input. type=[float32, float64, float16]. + */ DIOPI_API diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t self, diopiConstTensorHandle_t norm, diopiSize_t dim, const diopiScalar_t* p); + /** - * - * - * - * - * + * @brief Applies Layer Normalization over a mini-batch of inputs. + * type=[float32, float64, float16]. + * @param[in] ctx Context environment. + * @param[in] save_mean Mean tensor,the mean value for each feature channel of the input tensor. type=[float32, float64, float16]. + * @param[in] save_invstd Backup of inverse standard deviation computed during training. type=[float32, float64, float16]. + * @param[in] input input tensor. type=[float32, float64, float16]. + * @param[in] weight weight tensor. type=[float32, float64, float16]. + * @param[in] bias bias tensor. type=[float32, float64, float16]. + * @param[in] begin_norm_axis int64, Indicates which dimension to start normalization. + * @param[in] eps float64 a value added to the denominator for numerical stability. + * @param[out] out normalized result. type=[float32, float64, float16]. */ DIOPI_API diopiError_t diopiLayerNormGB(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t running_mean, diopiTensorHandle_t running_var, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, const double eps, @@ -3670,21 +3931,74 @@ DIOPI_API diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTen diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiConstTensorHandle_t mean, diopiConstTensorHandle_t rstd, diopiSize_t normalized_shape); +/** + * @brief Compute the backward pass for diopiLayerNormGB(). Computes gradients for input, weight, and bias. + * @param[in] ctx Context environment. + * @param[in] grad_output the grad tensor of output. type=[float32, float64, float16]. + * @param[in] grad_bias the grad of bias. type=[float32, float64, float16]. + * @param[in] grad_weight the grad of weight. type=[float32, float64, float16]. + * @param[in] mean Mean tensor,the mean value for each feature channel of the input tensor. type=[float32, float64, float16]. + * @param[in] rstd Backup of inverse standard deviation computed during training. type=[float32, float64, float16]. + * @param[in] input input tensor. type=[float32, float64, float16]. + * @param[in] weight weight tensor. type=[float32, float64, float16]. + * @param[in] bias bias tensor. type=[float32, float64, float16]. + * @param[in] begin_norm_axis int64, Indicates which dimension to start normalization. + * @param[out] grad_input the grad of input. type=[float32, float64, float16]. + */ DIOPI_API diopiError_t diopiLayerNormGBBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiConstTensorHandle_t running_mean, diopiConstTensorHandle_t running_std, const int64_t begin_norm_axis); +/** + * @brief Performs instance normalization on the input tensor. + * @param[in] ctx Context environment. + * @param[out] output the output tensor after instance normalization. + * @param[in] input the input tensor to be normalized. + * @param[in] axis the axis along which normalization is applied. + * @param[in] scale the scale tensor. + * @param[in] bias the bias tensor. + * @param[in] eps small value to avoid division by zero during normalization. + */ DIOPI_API diopiError_t diopiInstanceNorm(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const int64_t axis, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps); - +/** + * @brief Performs the backward pass for diopiInstanceNorm(). Computes gradients for input, scale, and bias. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[out] grad_scale the gradient tensor of scale. + * @param[out] grad_bias the gradient tensor of bias. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] scale the scale tensor. + * @param[in] bias the bias tensor. + * @param[in] axis the axis along which normalization is applied. + * @param[in] eps small value to avoid division by zero during normalization. + */ DIOPI_API diopiError_t diopiInstanceNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_scale, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const int64_t axis, const double eps); - +/** + * @brief Normalizes the input tensor based on the p-norm along the given axis. + * @param[in] ctx Context environment. + * @param[out] output the normalized output tensor. + * @param[in] input the input tensor to be normalized. + * @param[in] p the p-norm to use for normalization. + * @param[in] axis the axis along which to normalize. + * @param[in] eps small value to avoid division by zero during normalization. + */ DIOPI_API diopiError_t diopiNormalize(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps); - +/** + * @brief Performs the backward pass for diopiNormalize(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] p the p-norm used during normalization. + * @param[in] axis the axis along which normalization was applied. + * @param[in] eps small value to avoid division by zero during normalization. + */ DIOPI_API diopiError_t diopiNormalizeBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps); From 25901cd53a1e2576ecf105310c86e6a03c9a1bff Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Thu, 17 Oct 2024 16:17:39 +0800 Subject: [PATCH 29/30] remove unused code & try to fix adam --- impl/torch/functions/functions.cpp | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index 1d7ac869e..6ee8e104e 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -3429,7 +3429,8 @@ diopiError_t diopiAdam(diopiContextHandle_t ctx, diopiTensorHandle_t param, diop if (weight_decay != 0) { grad_d = grad_d.add(atParam, weight_decay); } - atExpAvg.mul_(beta1).add_(grad_d, 1 - beta1); + // atExpAvg.mul_(beta1).add_(grad_d, 1 - beta1); + atExpAvg.lerp_(grad_d, 1 - beta1); atExpAvgSq.mul_(beta2).addcmul_(grad_d, grad_d.conj(), 1 - beta2); at::Tensor denom; @@ -4036,19 +4037,6 @@ diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gra return diopiSuccess; } -/* -diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t grad_input, -diopiConstTensorHandle_t result, const diopiScalar_t* p, diopiSize_t dim) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); auto -atGradInput = impl::aten::buildATen(grad_input); auto atP = impl::aten::buildAtScalar(p); auto atResult = impl::aten::buildATen(result); at::IntArrayRef atDim = -impl::aten::buildAtIntArray(dim); - - bool keepdim = true; - auto atGradOutput = torch::autograd::generated::details::norm_backward(atGradInput, atInput, atP, atResult, atDim, keepdim); - - return diopiSuccess; -} -*/ - diopiError_t diopiForeachnormScalar(diopiContextHandle_t ctx, diopiTensorHandle_t* outs, diopiConstTensorHandle_t* inputs, int64_t inputSize, const diopiScalar_t* ord) { DIOPI_CHECK_PTR(outs); From d8a1c54bdac93c51b2e4944051e6f51343453e8c Mon Sep 17 00:00:00 2001 From: DoorKickers <1105976166@qq.com> Date: Thu, 17 Oct 2024 16:47:45 +0800 Subject: [PATCH 30/30] add layerNorm GB national standard operator's explanation --- diopi_test/python/configs/diopi_configs.py | 1 + diopi_test/python/conformance/diopi_functions.py | 2 ++ proto/include/diopi/functions.h | 5 ++++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 84283dab0..0ad81385e 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -7986,6 +7986,7 @@ ), ), + #This test config is for the national standard GB operator version of diopiLayerNorm, which is different from the original interface definition. 'layer_norm': dict( name=["layer_norm"], dtype=[np.float32, np.float64, np.float16], diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 452e7c452..3b35e6fc9 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -5270,6 +5270,8 @@ def layer_normGB(input, normalized_shape, weight=None, bias=None, eps=1e-05): bias = None if bias is None else bias out = raw_like(input) + + # Note that, this is the national standard GB operator version, which is different from the diopiLayerNorm interface definition, normalized_shape has changed to begin_norm_axis. func = check_function("diopiLayerNormGB") ret = func( input.context(), diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index bc813ad0c..4f7dfcecb 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -3488,7 +3488,8 @@ DIOPI_API diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHa /** * @brief Applies Layer Normalization over a mini-batch of inputs. - * type=[float32, float64, float16]. + * Note that, this is the national standard GB operator version, which is different from the diopiLayerNorm interface definition, normalized_shape has changed + * to begin_norm_axis. type=[float32, float64, float16]. * @param[in] ctx Context environment. * @param[in] save_mean Mean tensor,the mean value for each feature channel of the input tensor. type=[float32, float64, float16]. * @param[in] save_invstd Backup of inverse standard deviation computed during training. type=[float32, float64, float16]. @@ -3933,6 +3934,8 @@ DIOPI_API diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTen /** * @brief Compute the backward pass for diopiLayerNormGB(). Computes gradients for input, weight, and bias. + * Note that, this is the national standard GB operator version, which is different from the diopiLayerNormBackward interface definition, normalized_shape has + * changed to begin_norm_axis * @param[in] ctx Context environment. * @param[in] grad_output the grad tensor of output. type=[float32, float64, float16]. * @param[in] grad_bias the grad of bias. type=[float32, float64, float16].