diff --git a/diopi_test/python/configs/diopi_configs.py b/diopi_test/python/configs/diopi_configs.py index 34f43f67d..0ad81385e 100755 --- a/diopi_test/python/configs/diopi_configs.py +++ b/diopi_test/python/configs/diopi_configs.py @@ -743,6 +743,202 @@ ), ), + 'pool1d': dict( + name=['pool1d'], + interface=['CustomizedTest'], + para=dict( + kernel_size=[2, 2, 6, 2, 3, 6, 5, 0, 0, 0, 0], + stride=[None, None, 3, 1, 2, None, 2, 0, 0, 0, 0], + padding=[0, 0, 2, 1, 0, 0, 2, 0, 0, 0, 0], + dilation=[0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0], + ceil_mode=[False, True, False, True, False, False, True, False, False, False, False], + count_include_pad=[True, True, False, True, False, False, False, False, False, False, False], + mode=["avg", "avg", "avg", "avg", "avg", "max", "max", "avg", "avg", "max", "max"], + adaptive=[False, False, False, False, False, False, False, True, True, True, True], + output_size=[0, 0, 0, 0, 0, 0, 0, 5, 26, 3, 2] + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((2, 16), (5, 2, 16), (3, 4, 16), + (2, 1024, 14), (256, 28, 28), (3, 12), (5, 4, 17), (3, 16), (4, 7, 27), (4, 16), (288, 33)), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + ), + + 'avg_pool1d': dict( + name=["avg_pool1d"], + para=dict( + kernel_size=[2, 2, 6, 2, 3], + stride=[None, None, 3, 1, 2], + padding=[0, 0, 2, 1, 0], + ceil_mode=[False, True, False, True, False], + count_include_pad=[True, True, False, True, False], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((2, 16), (5, 2, 16), (3, 4, 16), + (2, 1024, 14), (256, 28, 28)), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + ), + + 'max_pool1d': dict( + name=["max_pool1d"], + para=dict( + kernel_size=[6, 5, 6, 8, 3, 2, 3, 3], + stride=[None, 2, 2, 3, 2, 2, 1, 2], + padding=[0, 2, 2, 3, 1, 0, 1, 0], + dilation=[1, 3, 2, 2, 1, 1, 1, 2], + ceil_mode=[False, True, False, True, False, True, False, True], + return_indices=[False, False, False, False, False, False, False, False], + ), + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((3, 12), (5, 4, 17), + (6, 17), (1, 4, 17), + (2, 64, 352), + (2, 256, 12), + (2, 512, 4), + (3, 4)), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + ), + + 'max_pool1d_return_indices': dict( + name=["max_pool1d"], + para=dict( + kernel_size=[6, 6, 8, 8], + stride=[None, 3, 3, 2], + padding=[0, 1, 2, 3], + dilation=[1, 4, 2, 3], + ceil_mode=[False, True, False, True], + return_indices=[True, True, True, True], + ), + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((3, 12), (5, 4, 17), + (6, 17), (1, 4, 17),), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + requires_backward=[0], + ), + + 'adaptive_avg_pool1d': dict( + name=["adaptive_avg_pool1d"], + atol=1e-5, + rtol=1e-4, + atol_half=1e-2, + rtol_half=1e-2, + para=dict( + output_size=[5, 26, 3, 1, 2, + 1, 3, 7, 10], + ), + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((3, 16), (4, 7, 27), (4, 16), + (2, 2048, 8), (2, 288, 33), + (2, 144, 65), (2, 1280, 7), + (2, 265, 7), (2, 265, 7)), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + ), + + 'adaptive_max_pool1d': dict( + name=["adaptive_max_pool1d"], + atol=1e-5, + rtol=1e-4, + para=dict( + output_size=[5, 26, 3, 2, 1, 3, 33, 40], + return_indices=[False, False, False, False, False, False, False, False] + ), + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((3, 16), (4, 7, 27), (4, 16), + (288, 33), (2, 144, 33), (2, 16, 130), + (2, 144, 33), (2, 144, 33)), + "dtype": [np.float32, np.float16, np.float64], + }, + ] + ), + ), + + 'adaptive_max_pool1d_return_indices': dict( + name=["adaptive_max_pool1d"], + atol=1e-5, + rtol=1e-4, + para=dict( + output_size=[5, 26, 3, 0], + return_indices=[True, True, True, True] + ), + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((3, 16), (4, 7, 27), (4, 16), (4, 16)), + "dtype": [np.float32, np.float16, np.float64], + }, + ] + ), + ), + + 'pool2d': dict( + name=['pool2d'], + interface=['CustomizedTest'], + para=dict( + kernel_size=[2, 2, 6, 2, 3, 6, 5, 0, 0, 0, 0], + stride=[None, None, 3, 1, 2, None, 2, 0, 0, 0, 0], + padding=[0, 0, 2, 1, 0, 0, 2, 0, 0, 0, 0], + dilation=[0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0], + ceil_mode=[False, True, False, True, False, False, True, False, False, False, False], + count_include_pad=[True, True, False, True, False, False, False, False, False, False, False], + mode=["avg", "avg", "avg", "avg", "avg", "max", "max", "avg", "avg", "max", "max"], + adaptive=[False, False, False, False, False, False, False, True, True, True, True], + output_size=[0, 0, 0, 0, 0, 0, 0, 5, 26, 3, 2] + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((2, 16, 16), (5, 2, 16, 16), (3, 4, 16, 16), + (2, 1024, 14, 16), (256, 28, 28, 16), (3, 12, 12), (5, 4, 17, 17), (3, 16, 16), (4, 7, 27, 27), (4, 16, 17), (288, 33, 33)), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + ), 'avg_pool2d': dict( name=["avg_pool2d"], para=dict( @@ -1025,7 +1221,7 @@ ), 'pointwise_op': dict( - name=['abs', 'cos', 'erf', 'erfinv', 'exp', 'floor', + name=['abs', 'cos', 'acos', 'tan', 'erf', 'erfinv', 'exp', 'expm1', 'floor', 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'ceil', 'atan'], interface=['torch'], is_inplace=True, @@ -1047,7 +1243,7 @@ 'pointwise_op_int_without_inplace': dict( # name=['abs', 'cos', 'erf', 'erfinv', 'exp', # 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], - name=['abs', 'cos', 'erf', 'exp', + name=['abs', 'cos', 'acos', 'tan', 'erf', 'exp', 'expm1', 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], interface=['torch'], dtype=[np.int16, np.int32, np.int64, np.int8], @@ -1068,7 +1264,7 @@ 'pointwise_op_uint8': dict( # name=['abs', 'cos', 'erf', 'erfinv', 'exp', # 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], - name=['abs', 'cos', 'erf', 'exp', + name=['abs', 'cos', 'acos', 'tan', 'erf', 'exp', 'expm1', 'neg', 'sin', 'asin', 'sqrt', 'logical_not', 'rsqrt', 'atan'], interface=['torch'], dtype=[np.uint8], @@ -1103,7 +1299,7 @@ # FIXME erfinv输入int或bool报错 'pointwise_op_bool': dict( # name=['abs', 'cos', 'erf', 'erfinv', 'exp', 'sin', 'asin', 'sqrt', 'rsqrt', 'atan', 'logical_not'], - name=['abs', 'cos', 'erf', 'exp', 'sin', 'asin', 'sqrt', 'rsqrt', 'atan', 'logical_not'], + name=['abs', 'cos', 'acos', 'tan', 'erf', 'exp', 'expm1', 'sin', 'asin', 'sqrt', 'rsqrt', 'atan', 'logical_not'], interface=['torch'], dtype=[np.bool_], tensor_para=dict( @@ -1192,6 +1388,43 @@ ), ), + 'hyperbolic_trigo_function_without_backward': dict( + name=['sinh', 'cosh', 'asinh', 'acosh', 'atanh'], + interface=['torch'], + is_inplace=True, + dtype=[np.float16, np.float32, np.float64], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), + (256, 128, 3, 3), + (2, 31, 512, 6, 40), + (0,), (16, 0), (1, 0, 6)), + }, + ], + ), + ), + + 'hyperbolic_trigo_function_without_backward_not_float': dict( + name=['sinh', 'cosh', 'asinh', 'acosh', 'atanh'], + interface=['torch'], + dtype=[np.int16, np.int32, np.int64, np.uint8, np.int8, np.bool_], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((), (1, ), (1024,), (364800, 4), (2, 128, 3072), + (256, 128, 3, 3), + (2, 31, 512, 6, 40), + (0,), (16, 0), (1, 0, 6)), + }, + ], + ), + ), + 'tanh': dict( name=['tanh'], interface=['torch'], @@ -1250,7 +1483,7 @@ ), 'pointwise_op_zero': dict( - name=['abs', 'exp', 'floor', 'neg', 'sqrt', + name=['abs', 'exp', 'expm1', 'floor', 'neg', 'sqrt', 'logical_not', 'rsqrt', 'ceil'], interface=['torch'], is_inplace=True, @@ -1267,7 +1500,7 @@ ), 'pointwise_op_without_inplace_zero': dict( - name=['abs', 'sign', 'exp', 'sqrt', + name=['abs', 'sign', 'exp', 'expm1', 'sqrt', 'logical_not', 'rsqrt'], interface=['torch'], dtype=[np.float16, np.float32, np.float64, np.int16, @@ -4347,6 +4580,88 @@ ), ), + 'sort_stable_for_backward': dict( + name=["sort"], + interface=['CustomizedTest'], + saved_args=dict(indice=1), + requires_backward=[0], + para=dict( + dim=[-1, 0, 1], + descending=[True, False, False], + stable=[True, True, True], + ), + dtype=[np.float16, np.float32], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((11400, ), + (4, 4, 16, 20), + (4, 4, 16, 2, 20)), + }, + ], + ), + ), + + 'complex': dict( + name=["complex"], + interface=['torch'], + dtype=[np.float32, np.float64], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['real'], + "shape": ((11400, ), + (4, 4, 16, 20), + (4, 4, 16, 2, 20)), + }, + { + "ins": ['imag'], + "shape": ((11400, ), + (4, 4, 16, 20), + (4, 4, 16, 2, 20)), + }, + ], + ), + ), + + 'conj': dict( + name=["conj"], + interface=['torch'], + dtype=[np.float32, np.float64, np.complex64, np.complex128], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((11400, ), + (4, 4, 16, 20), + (4, 4, 16, 2, 20)), + }, + ], + ), + ), + + 'real_imag': dict( + name=["real", "imag"], + interface=['torch'], + dtype=[np.complex64, np.complex128], + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ['input'], + "shape": ((11400, ), + (4, 4, 16, 20), + (4, 4, 16, 2, 20)), + }, + ], + ), + ), + # FIXME topk输入0-d张量,且k为0时,结果精度不一致 'topk_nonzero': dict( name=['topk'], @@ -5386,6 +5701,49 @@ ), ), + 'conv_transpose3d': dict( + name=["conv_transpose3d"], + atol=1e-3, + rtol=1e-3, + atol_half=1e2, + rtol_half=1e2, + para=dict( + stride=[1, 1, 2, 1, 2, (2, 2, 2), 1], + padding=[0, 0, 1, 0, 1, (1, 0, 1), 0], + output_padding=[0, 0, 1, 0, 1, (0, 1, 1), 0], + groups=[1, 1, 8, 1, 1, 1, 1], + dilation=[1, 1, 2, 1, 2, (1, 2, 2), 1], + ), + tensor_para=dict( + args=[ + { + "ins": ["input"], + "requires_grad": [True], + "shape": ((6, 16, 20, 8, 5), + (2, 256, 14, 14, 5), (2, 128, 32, 32, 4), + (2, 64, 160, 160, 5), (2, 64, 320, 320, 5), (2, 64, 320, 320, 5), + (0, 16, 20, 8, 5)), + "dtype": [np.float32, np.float64, np.float16], + }, + { + "ins": ["weight"], + "requires_grad": [True], + "shape": ((16, 2, 12, 2, 2), + (256, 256, 2, 2, 2), (128, 128, 4, 4, 4), + (64, 64, 2, 2, 2), (64, 1, 2, 2, 2), (64, 1, 2, 2, 2), + (16, 2, 12, 2, 2)), + "dtype": [np.float32, np.float64, np.float16], + }, + { + "ins": ["bias"], + "requires_grad": [True], + "shape": (None, (256,), None, (64,), (1,), (1,), None), + "dtype": [np.float32, np.float64, np.float16], + }, + ] + ), + ), + 'unfold': dict( name=["unfold"], interface=['torch.Tensor'], @@ -5455,6 +5813,29 @@ ), ), + 'cumsum_float_for_backward': dict( + name=["cumsum"], + interface=['torch'], + atol=1e-6, + rtol=1e-5, + requires_backward=[0], + dtype=[np.float32], + para=dict( + dim=[0, -1, 1], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((), (12,), (2, 22, 33)), + "dtype": [np.float32, np.float64, np.float16], + "gen_fn": 'Genfunc.randn', + }, + ], + ), + ), + 'cdist': dict( name=['cdist'], interface=['torch'], @@ -5601,6 +5982,88 @@ ), ), + 'argmin': dict( + name=['argmin'], + interface=["torch"], + para=dict( + dim=[0, -1, 0, 1, None, -2, 2, 1], + keepdim=[True, False, True, False, False, True, True, False], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "shape": ((), (1,), (1024, 80), (2, 256, 256), (2, 1, 64, 64), + (12, 0), (2, 0, 9), (0, 9, 8, 7)), + "dtype": [np.float64, np.float16, np.float32, np.int32, np.int16, + np.int64, np.uint8, np.int8], + "gen_fn": 'Genfunc.randn', + }, + ], + ), + ), + + 'argmin_same_value': dict( + name=['argmin'], + interface=["torch"], + para=dict( + dim=[-1, 0, None, 1], + keepdim=[True, False, True, False], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "shape": ((1,), (1024, 80), (2, 256, 256), (2, 1, 64, 64)), + "dtype": [np.float32], + "gen_fn": 'Genfunc.zeros', + }, + ], + ), + ), + + 'argsort': dict( + name=['argsort'], + interface=["torch"], + para=dict( + dim=[0, -1, 0, 1, -1, 0, 2, 1], + stable=[True, False, True, False, False, True, True, False], + descending=[True, False, True, False, False, True, True, False], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "shape": ((), (1,), (1024, 80), (2, 256, 256), (2, 1, 64, 64), + (12, 0), (2, 0, 9), (0, 9, 8, 7)), + "dtype": [np.float64, np.float16, np.float32, np.int32, np.int16, + np.int64, np.uint8, np.int8], + "gen_fn": 'Genfunc.randn', + }, + ], + ), + ), + + 'argsort_same_value': dict( + name=['argsort'], + interface=["torch"], + para=dict( + dim=[-1, 0, -1, 1], + stable=[True, False, True, False], + descending=[True, False, True, False], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "shape": ((1,), (1024, 80), (2, 256, 256), (2, 1, 64, 64)), + "dtype": [np.float32], + "gen_fn": 'Genfunc.zeros', + }, + ], + ), + ), + 'adadelta': dict( name=["adadelta"], interface=["CustomizedTest"], @@ -5790,6 +6253,56 @@ ), ), + 'pool3d': dict( + name=['pool3d'], + interface=['CustomizedTest'], + para=dict( + kernel_size=[2, 2, 6, 2, 3, 6, 5, 0, 0, 0, 0], + stride=[None, None, 3, 1, 2, None, 2, 0, 0, 0, 0], + padding=[0, 0, 2, 1, 0, 0, 2, 0, 0, 0, 0], + dilation=[0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0], + ceil_mode=[False, True, False, True, False, False, True, False, False, False, False], + count_include_pad=[True, True, False, True, False, False, False, False, False, False, False], + mode=["avg", "avg", "avg", "avg", "avg", "max", "max", "avg", "avg", "max", "max"], + adaptive=[False, False, False, False, False, False, False, True, True, True, True], + output_size=[0, 0, 0, 0, 0, 0, 0, 5, 26, 3, 2] + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((2, 16, 16, 16), (5, 2, 16, 16, 17), (3, 4, 16, 16, 18), + (2, 1024, 14, 16, 20), (256, 28, 28, 16, 17), (3, 12, 12, 18), (5, 4, 17, 17, 21), (3, 16, 16, 20), (4, 7, 27, 27, 26), (4, 16, 17, 23), (288, 33, 33, 35)), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + ), + + 'avg_pool3d': dict( + name=["avg_pool3d"], + para=dict( + kernel_size=[2, (2, 2, 2), (20, 13, 13), (2, 2, 2), 3], + stride=[None, None, 3, 1, (1, 2, 2)], + padding=[0, (0, 0, 0), (2, 3, 2), (1, 1, 1), 0], + ceil_mode=[False, True, False, True, False], + count_include_pad=[True, True, False, True, False], + divisor_override=[None, None, -3, None, 2], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "requires_grad": [True], + "shape": ((2, 16, 7, 7), (5, 2, 16, 7, 7), (3, 4, 23, 23, 23), + (2, 1024, 14, 14, 14), (256, 28, 28, 28)), + "dtype": [np.float16, np.float32, np.float64], + }, + ] + ), + ), + 'max_pool3d': dict( name=['max_pool3d'], para=dict( @@ -6420,6 +6933,7 @@ (0,), (0, 12), (13, 0, 4)), "dtype": [np.float32, np.float64, np.float16], "gen_fn": 'Genfunc.randn', + "requires_grad": [True], }, ], ), @@ -7472,6 +7986,7 @@ ), ), + #This test config is for the national standard GB operator version of diopiLayerNorm, which is different from the original interface definition. 'layer_norm': dict( name=["layer_norm"], dtype=[np.float32, np.float64, np.float16], @@ -7508,6 +8023,81 @@ ) ), + 'instance_norm': dict( + name=["instance_norm"], + dtype=[np.float32, np.float64], + atol=1e-5, + atol_half=1e-1, + rtol_half=1e-2, + para=dict( + eps=[1e-5, 1e-5, 1e-12, 0, -1e-5, 2], + ), + tensor_para=dict( + args=[ + { + "ins": ["input"], + "shape": ((3, 5, 3, 5), (2, 16, 128), (2, 64, 16), (2, 32, 16), + (4, 5, 3, 5), (5, 16, 128)), + 'gen_fn':'Genfunc.randn', + 'requires_grad': [True], + }, + { + "ins": ["weight"], + "shape": ((5,), (16,), (64,), (32,), + (5,), (16,)), + 'gen_fn':'Genfunc.randn', + 'requires_grad': [True], + }, + { + "ins": ["bias"], + "shape": ((5,), (16,), (64,), (32,), + (5,), (16,)), + 'gen_fn':'Genfunc.randn', + 'requires_grad': [True], + }, + ] + ) + ), + + 'layer_normGB': dict( + name=["layer_normGB"], + interface=['CustomizedTest'], + dtype=[np.float32, np.float64, np.float16], + atol=1e-5, + atol_half=1e-1, + rtol_half=1e-2, + para=dict( + eps=[1e-5, 1e-5, 1e-12, 0, -1e-5, 2], + normalized_shape=[(5, 3, 5), (128, ), (64, ), (32,), + (3, 5), (2, 16, 128)], + ), + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ["input"], + "requires_grad": [True], + "shape": ((2, 5, 3, 5), (2, 3136, 128), (2, 64), (32,), + (2, 5, 3, 5), (2, 16, 128)), + }, + { + "ins": ["weight"], + "requires_grad": [True], + "shape": (None, (128,), (64,), (32,), + (3, 5), (2, 16, 128)), + }, + { + "ins": ["bias"], + "requires_grad": [True], + "shape": (None, (128,), (64,), (32,), + (3, 5), (2, 16, 128)), + }, + ] + ) + ), + + + 'layer_norm_empty_tensor': dict( name=["layer_norm"], dtype=[np.float32, np.float64, np.float16], @@ -7535,6 +8125,28 @@ ) ), + 'normalize': dict( + name=["normalize"], + interface=['torch.nn.functional'], + dtype=[np.float32, np.float64, np.float16], + atol=1e-5, + para=dict( + eps=[1e-2, 1e-8, -3], + p=[1, 2, 3], + dim=[1, 1, 1], + ), + tensor_para=dict( + gen_fn='Genfunc.randn', + args=[ + { + "ins": ["input"], + "shape": ((3, 3), (3, 12), (6, 3, 9)), + "requires_grad": [True], + }, + ] + ) + ), + 'copy': dict( name=["copy_"], interface=['torch.Tensor'], @@ -8061,6 +8673,31 @@ ), ), + 'grid_sample': dict( + name=["grid_sample"], + interface=['torch.nn.functional'], + para=dict( + mode=["bilinear", "nearest", "bilinear", "nearest"], + ), + tensor_para=dict( + args=[ + { + "ins": ['input'], + "shape": ((2, 3, 15, 15), (3, 3, 20, 20, 20), (2, 3, 25, 25), (3, 3, 30, 30, 30)), + "dtype": [np.float16, np.float32, np.float64], + "gen_fn": 'Genfunc.randn', + }, + { + "ins": ['grid'], + "shape": ((2, 5, 5, 2), (3, 10, 10, 10, 3), (2, 20, 20, 2), (3, 60, 60, 60, 3)), + "dtype": [np.float16, np.float32, np.float64], + "gen_fn": 'Genfunc.randn', + "gen_num_range": [1, 19], + }, + ], + ), + ), + 'multinomial': dict( name=["multinomial"], interface=['torch'], diff --git a/diopi_test/python/conformance/customized_test.py b/diopi_test/python/conformance/customized_test.py index 9cd10f905..3f351e27c 100644 --- a/diopi_test/python/conformance/customized_test.py +++ b/diopi_test/python/conformance/customized_test.py @@ -857,3 +857,37 @@ def nll_loss_v2(input, target, weight=None, ignore_index=-100, reduction="mean") input, target, weight, None, ignore_index, None, reduction ) return out + + def pool1d(input, kernel_size, stride, padding, dilation, ceil_mode, count_include_pad, mode, adaptive, output_size): + if mode == "avg" and adaptive == False: + return torch.nn.functional.avg_pool1d(input, kernel_size, stride, padding, ceil_mode, count_include_pad) + elif mode == "max" and adaptive == False: + return torch.nn.functional.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode, return_indices=False) + elif mode == "avg" and adaptive == True: + return torch.nn.functional.adaptive_avg_pool1d(input, output_size) + elif mode == "max" and adaptive == True: + return torch.nn.functional.adaptive_max_pool1d(input, output_size, return_indices=False) + + def pool2d(input, kernel_size, stride, padding, dilation, ceil_mode, count_include_pad, mode, adaptive, output_size): + if mode == "avg" and adaptive == False: + return torch.nn.functional.avg_pool2d(input, kernel_size, stride, padding, ceil_mode, count_include_pad) + elif mode == "max" and adaptive == False: + return torch.nn.functional.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode, return_indices=False) + elif mode == "avg" and adaptive == True: + return torch.nn.functional.adaptive_avg_pool2d(input, output_size) + elif mode == "max" and adaptive == True: + return torch.nn.functional.adaptive_max_pool2d(input, output_size, return_indices=False) + + def pool3d(input, kernel_size, stride, padding, dilation, ceil_mode, count_include_pad, mode, adaptive, output_size): + if mode == "avg" and adaptive == False: + return torch.nn.functional.avg_pool3d(input, kernel_size, stride, padding, ceil_mode, count_include_pad) + elif mode == "max" and adaptive == False: + return torch.nn.functional.max_pool3d(input, kernel_size, stride, padding, dilation, ceil_mode, return_indices=False) + elif mode == "avg" and adaptive == True: + return torch.nn.functional.adaptive_avg_pool3d(input, output_size) + elif mode == "max" and adaptive == True: + return torch.nn.functional.adaptive_max_pool3d(input, output_size, return_indices=False) + + def layer_normGB(input, weight, bias, eps, normalized_shape): + return torch.nn.functional.layer_norm(input=input, weight=weight, bias=bias, eps=eps, normalized_shape=normalized_shape) + diff --git a/diopi_test/python/conformance/diopi_functions.py b/diopi_test/python/conformance/diopi_functions.py index 297240cb0..3b35e6fc9 100644 --- a/diopi_test/python/conformance/diopi_functions.py +++ b/diopi_test/python/conformance/diopi_functions.py @@ -405,6 +405,22 @@ def cos(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiCos", promote_type(input, Dtype.float32)) +def acos(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiAcos", promote_type(input, Dtype.float32)) + + +def tan(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiTan", promote_type(input, Dtype.float32)) + + +def sinh(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiSinh", promote_type(input, Dtype.float32)) + + +def cosh(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiCosh", promote_type(input, Dtype.float32)) + + def tanh(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiTanh", promote_type(input, Dtype.float32)) @@ -413,10 +429,25 @@ def atan(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiAtan", promote_type(input, Dtype.float32)) +def asinh(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiAsinh", promote_type(input, Dtype.float32)) + + +def acosh(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiAcosh", promote_type(input, Dtype.float32)) + +def atanh(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiAtanh", promote_type(input, Dtype.float32)) + + def exp(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiExp", promote_type(input, Dtype.float32)) +def expm1(input, inplace=False) -> Tensor: + return unary_op(input, inplace, "diopiExpm1", promote_type(input, Dtype.float32)) + + def log(input, inplace=False) -> Tensor: return unary_op(input, inplace, "diopiLog", promote_type(input, Dtype.float32)) @@ -1044,7 +1075,915 @@ def conv2d( check_returncode(ret) return out +def pool1d(input, kernel_size=0, stride=None, padding=0, dilation=1, ceil_mode=False, count_include_pad=True, output_size=0, mode="avg", adaptive=False) -> Tensor: + + sizeI = input.size().data + assert len(sizeI) == 3 or len(sizeI) == 2, "input must be 2d or 3d tensors" + assert mode in ["avg", "max"] and adaptive in [True, False], "mode or adaptive has wrong type" + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 3: + sizeO.append(sizeI[1]) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size,) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride,) + if isinstance(padding, int): + padding = (padding,) + if isinstance(dilation, int): + dilation = (dilation,) + if isinstance(output_size, int): + output_size = (output_size,) + + if mode == "avg" and adaptive == False: + for i in range(-1, 0): + if ceil_mode: + sizeO.append( + math.ceil((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + else: + sizeO.append( + math.floor((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + output_size = Sizes(list(output_size)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiPool1d") + ret = func( + input.context(), + out, + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + output_size, + ) + check_returncode(ret) + return out + elif mode == "max" and adaptive == False: + for i in range(-1, 0): + tmp_ker_size = kernel_size[i] + (kernel_size[i] - 1) * (dilation[i] - 1) + tmp_size = (sizeI[i] - tmp_ker_size + 2 * padding[i]) / stride[i] + 1 + tmp_size = tmp_size if tmp_size > 1 else 1 + if ceil_mode: + sizeO.append(math.ceil(tmp_size)) + else: + sizeO.append(math.floor(tmp_size)) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + output_size = Sizes(list(output_size)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiPool1d") + ret = func( + input.context(), + out, + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + output_size, + ) + check_returncode(ret) + return out + elif mode == "avg" and adaptive == True: + for i in range(-1, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-1],])) + + func = check_function("diopiPool1d") + ret = func( + input.context(), + out, + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + output_size, + ) + + check_returncode(ret) + return out + else: + for i in range(-1, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-1],])) + + func = check_function("diopiPool1d") + ret = func( + input.context(), + out, + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + output_size, + ) + + check_returncode(ret) + return out + + +def pool1d_backward(input, grad_outputs, kernel_size=0, stride=0, padding=0, dilation=0, ceil_mode=0, count_include_pad=True, mode="avg", adaptive=False, output_size=0, **kwargs,) -> Tensor: + + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + assert mode in ["avg", "max"] and adaptive in [True, False], "mode or adaptive has wrong type" + grad_input = raw_like(input) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size,) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride,) + if isinstance(padding, int): + padding = (padding,) + if isinstance(dilation, int): + dilation = (dilation,) + if isinstance(output_size, int): + output_size = (output_size, ) + + + if mode == "avg" and adaptive == False: + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + indices = raw_like(input) + + func = check_function("diopiPool1dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "max" and adaptive == False: + + _, indices = max_pool1d( + input, kernel_size, stride, padding, dilation, ceil_mode, True + ) + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + func = check_function("diopiPool1dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "avg" and adaptive == True: + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + indices = raw_like(input) + + func = check_function("diopiPool1dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "max" and adaptive == True: + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + _, indices = adaptive_max_pool1d(input, output_size, return_indices=True) + + func = check_function("diopiPool1dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + +def avg_pool1d( + input, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + count_include_pad=True, +) -> Tensor: + sizeI = input.size().data + assert len(sizeI) == 3 or len(sizeI) == 2, "input must be 2d or 3d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 3: + sizeO.append(sizeI[1]) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size,) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride,) + if isinstance(padding, int): + padding = (padding,) + + for i in range(-1, 0): + if ceil_mode: + sizeO.append( + math.ceil((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + else: + sizeO.append( + math.floor((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiAvgPool1d") + ret = func( + input.context(), + out, + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + ) + check_returncode(ret) + return out + +def avg_pool1d_backward( + input, + grad_outputs, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + count_include_pad=True, + **kwargs, +) -> Tensor: + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + grad_input = raw_like(input) + if isinstance(kernel_size, int): + kernel_size = (kernel_size,) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride,) + if isinstance(padding, int): + padding = (padding,) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + + func = check_function("diopiAvgPool1dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + +def max_pool1d( + input, + kernel_size, + stride=None, + padding=0, + dilation=1, + ceil_mode=False, + return_indices=False, +) -> Tensor: + sizeI = input.size().data + assert len(sizeI) == 3 or len(sizeI) == 2, "input must be 2d or 3d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 3: + sizeO.append(sizeI[1]) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size,) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride,) + if isinstance(padding, int): + padding = (padding,) + if isinstance(dilation, int): + dilation = (dilation,) + + for i in range(-1, 0): + tmp_ker_size = kernel_size[i] + (kernel_size[i] - 1) * (dilation[i] - 1) + tmp_size = (sizeI[i] - tmp_ker_size + 2 * padding[i]) / stride[i] + 1 + tmp_size = tmp_size if tmp_size > 1 else 1 + if ceil_mode: + sizeO.append(math.ceil(tmp_size)) + else: + sizeO.append(math.floor(tmp_size)) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + if not return_indices: + func = check_function("diopiMaxPool1d") + ret = func( + input.context(), + out, + input, + kernel_size, + stride, + padding, + dilation, + ceil_mode, + ) + check_returncode(ret) + return out + else: + func = check_function("diopiMaxPool1dWithIndices") + nhwc_stride = compute_nhwc_stride_2d(sizeO) if glob_vars.nhwc else None + indices = Tensor( + sizeO, from_numpy_dtype(glob_vars.int_type), stride=nhwc_stride + ) + ret = func( + input.context(), + out, + indices, + input, + kernel_size, + stride, + padding, + dilation, + ceil_mode, + ) + check_returncode(ret) + return out, indices + +def max_pool1d_backward( + input, + grad_outputs, + kernel_size, + stride=None, + padding=0, + dilation=1, + ceil_mode=False, + **kwargs, +) -> Tensor: + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + grad_input = raw_like(input) + sizeI = input.size().data + assert len(sizeI) == 3 or len(sizeI) == 2, "input must be 2d or 3d tensors" + + if isinstance(kernel_size, int): + kernel_size = (kernel_size,) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride,) + if isinstance(padding, int): + padding = (padding,) + if isinstance(dilation, int): + dilation = (dilation,) + + _, indices = max_pool1d( + input, kernel_size, stride, padding, dilation, ceil_mode, True + ) + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + func = check_function("diopiMaxPool1dBackward") + ret = func( + input.context(), + grad_input, + grad_outputs[0], + input, + kernel_size, + stride, + padding, + dilation, + ceil_mode, + indices, + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + +def adaptive_avg_pool1d(input, output_size): + sizeI = input.size().data + assert len(sizeI) == 3 or len(sizeI) == 2, "input must be 2d or 3d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 3: + sizeO.append(sizeI[1]) + + if isinstance(output_size, int): + output_size = (output_size,) + + for i in range(-1, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-1],])) + + func = check_function("diopiAdaptiveAvgPool1d") + ret = func(input.context(), out, input, output_size) + check_returncode(ret) + return out + + +def adaptive_max_pool1d(input, output_size, return_indices=False): + sizeI = input.size().data + assert len(sizeI) == 3 or len(sizeI) == 2, "input must be 2d or 3d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 3: + sizeO.append(sizeI[1]) + + if isinstance(output_size, int): + output_size = (output_size,) + + for i in range(-1, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-1],])) + + if return_indices: + func = check_function("diopiAdaptiveMaxPool1dWithIndices") + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + indices = Tensor( + sizeO, from_numpy_dtype(glob_vars.int_type), stride=nhwc_stride + ) + ret = func(input.context(), out, indices, input, output_size) + check_returncode(ret) + return out, indices + else: + func = check_function("diopiAdaptiveMaxPool1d") + ret = func(input.context(), out, input, output_size) + check_returncode(ret) + return out + +def adaptive_avg_pool1d_backward(input, grad_outputs, **kwargs) -> Tensor: + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + grad_input = raw_like(input) + func = check_function("diopiAdaptiveAvgPool1dBackward") + ret = func(input.context(), grad_input, grad_outputs[0], input) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + + +def adaptive_max_pool1d_backward(input, grad_outputs, output_size, **kwargs) -> Tensor: + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + grad_input = raw_like(input) + _, indices = adaptive_max_pool1d(input, output_size, return_indices=True) + + func = check_function("diopiAdaptiveMaxPool1dBackward") + ret = func(input.context(), grad_input, grad_outputs[0], input, indices) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + +def pool2d(input, kernel_size=0, stride=None, padding=0, dilation=1, ceil_mode=False, count_include_pad=True, output_size=0, mode="avg", adaptive=False) -> Tensor: + + sizeI = input.size().data + assert len(sizeI) == 4 or len(sizeI) == 3, "input must be 3d or 4d tensors" + assert mode in ["avg", "max"] and adaptive in [True, False], "mode or adaptive has wrong type" + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 4: + sizeO.append(sizeI[1]) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride, stride) + if isinstance(padding, int): + padding = (padding, padding) + if isinstance(dilation, int): + dilation = (dilation, dilation) + if isinstance(output_size, int): + output_size = (output_size, output_size) + + if mode == "avg" and adaptive == False: + for i in range(-2, 0): + if ceil_mode: + sizeO.append( + math.ceil((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + else: + sizeO.append( + math.floor((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + output_size = Sizes(list(output_size)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiPool2d") + ret = func( + input.context(), + out, + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + output_size, + ) + check_returncode(ret) + return out + elif mode == "max" and adaptive == False: + for i in range(-2, 0): + tmp_ker_size = kernel_size[i] + (kernel_size[i] - 1) * (dilation[i] - 1) + tmp_size = (sizeI[i] - tmp_ker_size + 2 * padding[i]) / stride[i] + 1 + tmp_size = tmp_size if tmp_size > 1 else 1 + if ceil_mode: + sizeO.append(math.ceil(tmp_size)) + else: + sizeO.append(math.floor(tmp_size)) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + output_size = Sizes(list(output_size)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiPool2d") + ret = func( + input.context(), + out, + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + output_size, + ) + check_returncode(ret) + return out + elif mode == "avg" and adaptive == True: + for i in range(-2, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-2], sizeO[-1]])) + + func = check_function("diopiPool2d") + ret = func( + input.context(), + out, + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + output_size, + ) + + check_returncode(ret) + return out + else: + for i in range(-2, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-2], sizeO[-1]])) + + func = check_function("diopiPool2d") + ret = func( + input.context(), + out, + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + output_size, + ) + + check_returncode(ret) + return out + + +def pool2d_backward(input, grad_outputs, kernel_size=0, stride=0, padding=0, dilation=0, ceil_mode=0, count_include_pad=True, mode="avg", adaptive=False, output_size=0, **kwargs,) -> Tensor: + + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + assert mode in ["avg", "max"] and adaptive in [True, False], "mode or adaptive has wrong type" + grad_input = raw_like(input) + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride, stride) + if isinstance(padding, int): + padding = (padding, padding) + if isinstance(dilation, int): + dilation = (dilation, dilation) + if isinstance(output_size, int): + output_size = (output_size, output_size) + + + if mode == "avg" and adaptive == False: + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + indices = raw_like(input) + + func = check_function("diopiPool2dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "max" and adaptive == False: + + _, indices = max_pool2d( + input, kernel_size, stride, padding, dilation, ceil_mode, True + ) + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + func = check_function("diopiPool2dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "avg" and adaptive == True: + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + indices = raw_like(input) + + func = check_function("diopiPool2dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "max" and adaptive == True: + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + _, indices = adaptive_max_pool2d(input, output_size, return_indices=True) + + func = check_function("diopiPool2dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + def avg_pool2d( input, kernel_size, @@ -1478,6 +2417,50 @@ def sort(input, dim=-1, descending=False, stable=None): return vals, indices +def sort_backward(input, grad_outputs, dim, indice, **kwargs): + grad_outputs = grad_outputs[0] + grad_input = raw_like(grad_outputs) + + func = check_function("diopiSortBackward") + ret = func(input.context(), grad_input, grad_outputs, dim, indice, input.size(), True) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + +def complex(real, imag): + out_shape = infer_size(real.size().data, imag.size().data) + if real.get_dtype() == Dtype.float64: + out = Tensor(out_shape, Dtype.complex128) + elif real.get_dtype() == Dtype.float32: + out = Tensor(out_shape, Dtype.complex64) + func = check_function("diopiComplex") + ret = func(real.context(), out, real, imag) + check_returncode(ret) + return out + +def conj(input): + out = raw_like(input) + func = check_function("diopiConj") + ret = func(input.context(), out, input) + + check_returncode(ret) + return out + +def imag(input): + out = raw_like(input) + func = check_function("diopiImag") + ret = func(input.context(), out, input) + + check_returncode(ret) + return out + +def real(input): + out = raw_like(input) + func = check_function("diopiReal") + ret = func(input.context(), out, input) + + check_returncode(ret) + return out + def topk(input, k, dim=-1, largest=True, sorted=True): sizeI = input.size().data if len(sizeI) > 0: @@ -2472,10 +3455,69 @@ def conv2d_backward( grad_input = raw_like(input) grad_weight = raw_like(weight) - - keys = ["input", "weight"] - grads = [grad_input, grad_weight] - out = {k: v for k, v in zip(keys, grads) if v.requires_grad} + + keys = ["input", "weight"] + grads = [grad_input, grad_weight] + out = {k: v for k, v in zip(keys, grads) if v.requires_grad} + + if bias is None: + grad_bias = None + sizeBias = None + else: + gradBias = raw_like(bias) + grad_bias = gradBias + sizeBias = bias.size() + out.update({"bias": grad_bias}) + + func = check_function("diopiConvolution2dBackward") + ret = func( + input.context(), + grad_input, + grad_weight, + grad_bias, + grad_outputs[0], + input, + weight, + sizeBias, + stride, + padding, + dilation, + groups, + ) + check_returncode(ret) + return out + + +def conv_transpose2d_backward( + input, + grad_outputs, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + **kwargs, +) -> Tensor: + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + sizeI = input.size().data + sizeW = weight.size().data + assert len(sizeI) == 4 and len(sizeW) == 4, "input and weight must be 4d tensors" + + if isinstance(stride, int): + stride = (stride, stride) + if isinstance(padding, int): + padding = (padding, padding) + if isinstance(dilation, int): + dilation = (dilation, dilation) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + dilation = Sizes(list(dilation)) + + grad_input = raw_like(input) + grad_weight = raw_like(weight) + out = {"input": grad_input, "weight": grad_weight} if bias is None: grad_bias = None @@ -2486,7 +3528,9 @@ def conv2d_backward( sizeBias = bias.size() out.update({"bias": grad_bias}) - func = check_function("diopiConvolution2dBackward") + output_padding = Sizes(list([0, 0])) + + func = check_function("diopiConvTranspose2dBackward") ret = func( input.context(), grad_input, @@ -2499,13 +3543,13 @@ def conv2d_backward( stride, padding, dilation, + output_padding, groups, ) check_returncode(ret) - return out - + return {k: v for k, v in out.items() if v.requires_grad} -def conv_transpose2d_backward( +def conv_transpose3d_backward( input, grad_outputs, weight, @@ -2514,23 +3558,27 @@ def conv_transpose2d_backward( padding=0, dilation=1, groups=1, + output_padding=0, **kwargs, ) -> Tensor: assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" sizeI = input.size().data sizeW = weight.size().data - assert len(sizeI) == 4 and len(sizeW) == 4, "input and weight must be 4d tensors" + assert len(sizeI) == 5 and len(sizeW) == 5, "input and weight must be 5d tensors" if isinstance(stride, int): - stride = (stride, stride) + stride = (stride, stride, stride) if isinstance(padding, int): - padding = (padding, padding) + padding = (padding, padding, padding) if isinstance(dilation, int): - dilation = (dilation, dilation) + dilation = (dilation, dilation, dilation) + if isinstance(output_padding, int): + output_padding = (output_padding, output_padding, output_padding) stride = Sizes(list(stride)) padding = Sizes(list(padding)) dilation = Sizes(list(dilation)) + output_padding = Sizes(list(output_padding)) grad_input = raw_like(input) grad_weight = raw_like(weight) @@ -2545,9 +3593,7 @@ def conv_transpose2d_backward( sizeBias = bias.size() out.update({"bias": grad_bias}) - output_padding = Sizes(list([0, 0])) - - func = check_function("diopiConvTranspose2dBackward") + func = check_function("diopiConvTranspose3dBackward") ret = func( input.context(), grad_input, @@ -2567,6 +3613,7 @@ def conv_transpose2d_backward( return {k: v for k, v in out.items() if v.requires_grad} + def hardtanh_backward( input, grad_outputs, min_val=-1.0, max_val=1.0, **kwargs ) -> Tensor: @@ -3385,6 +4432,70 @@ def conv_transpose2d( check_returncode(ret) return out +def conv_transpose3d( + input, + weight, + bias=None, + stride=1, + padding=0, + output_padding=0, + groups=1, + dilation=1, +) -> Tensor: + if bias is not None: + assert isinstance(bias, Tensor), "bias must be a Tensor" + + sizeI = input.size().data + sizeW = list(weight.size().data) + assert len(sizeI) == 5 and len(sizeW) == 5, "input and weight must be 5d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + sizeO.append(sizeW[1] * groups) + + if isinstance(stride, int): + stride = (stride, stride, stride) + if isinstance(padding, int): + padding = (padding, padding, padding) + if isinstance(output_padding, int): + output_padding = (output_padding, output_padding, output_padding) + if isinstance(dilation, int): + dilation = (dilation, dilation, dilation) + for i in range(-3, 0): + # equivalent kernel size + sizeW[i] = (sizeW[i] - 1) * dilation[i] + sizeO.append( + int( + (sizeI[i] - 1) * stride[i] + - 2 * padding[i] + + sizeW[i] + + output_padding[i] + ) + + 1 + ) + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + output_padding = Sizes(list(output_padding)) + dilation = Sizes(list(dilation)) + + out = Tensor(sizeO, input.get_dtype()) + func = check_function("diopiConvTranspose3d") + ret = func( + input.context(), + out, + input, + weight, + bias, + stride, + padding, + output_padding, + groups, + dilation, + ) + check_returncode(ret) + return out + + def cumsum(input, dim, dtype=None): assert isinstance(dim, int), "dim should be int" @@ -3406,6 +4517,15 @@ def cumsum(input, dim, dtype=None): return out +def cumsum_backward(input, grad_outputs, dim, **kwargs): + grad_output = grad_outputs[0] + grad_input = raw_like(input) + func = check_function("diopiCumsumBackward") + ret = func(input.context(), grad_input, grad_output, dim) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + + def infer_size(a, b): dimsA = len(a) dimsB = len(b) @@ -3597,6 +4717,37 @@ def argmax(input, dim=None, keepdim=False): return out +def argmin(input, dim=None, keepdim=False): + sizeO = list(input.size().data) + if len(sizeO) > 0 and dim is not None: + assert dim < len(sizeO), "dim out of index" + if keepdim: + sizeO[dim] = 1 + else: + sizeO = sizeO[:dim] + sizeO[dim + 1 :] + else: + sizeO = [1] + + out = Tensor(sizeO, from_numpy_dtype(glob_vars.int_type)) + func = check_function("diopiArgmin") + # todo: check the reason of using keepdim + ret = ( + func(input.context(), out, input, keepdim) + if dim is None + else func(input.context(), out, input, dim, keepdim) + ) + check_returncode(ret) + + return out + +def argsort(input, dim=-1, descending=False, stable=False): + out = Tensor(input.size().data, from_numpy_dtype(glob_vars.int_type)) + func = check_function("diopiArgsort") + ret = func(input.context(), out, input, stable, dim, descending) + check_returncode(ret) + + return out + def smooth_l1_loss(input, target, reduction="mean", beta=1.0): assert ( @@ -3932,8 +5083,34 @@ def norm(input, p, dim=None, keepdim=False, dtype=None): func = check_function("diopiNorm") ret = func(input.context(), out, input, p, dim) check_returncode(ret) + + GLOBAL_STATE["norm"] = out return out +def norm_backward(grad_outputs, input, p, dim, keepdim=False, dtype=None): + if p == 0: + return {'input': None} + else: + grad_input = raw_like(input) + + p = Scalar(p) + + dim, _ = reduce_op_process(input, dim, keepdim, dtype) + + dim = Sizes(list(dim)) + + grad_output = grad_outputs[0] + + out = {"input": grad_input} + + func = check_function("diopiNormBackward") + + norm = GLOBAL_STATE.pop("norm") + ret = func(input.context(), grad_input, grad_output, input, norm, dim, p) + check_returncode(ret) + + return {k: v for k, v in out.items() if v.requires_grad} + def group_norm(input, num_groups, weight=None, bias=None, eps=1e-05): dim = list(input.size().data) @@ -3997,6 +5174,62 @@ def group_norm_backward( check_returncode(ret) return {k: v for k, v in out.items() if v.requires_grad} +def instance_norm(input, axis=2, weight=None, bias=None, eps=1e-05): + weight = None if weight is None else weight + bias = None if bias is None else bias + + out = raw_like(input) + func = check_function("diopiInstanceNorm") + ret = func( + input.context(), + out, + input, + axis, + weight, + bias, + eps, + ) + check_returncode(ret) + return out + +def instance_norm_backward(input, grad_outputs, axis=2, weight=None, bias=None, eps=1e-05, **kwargs): + grad_input = raw_like(input) + out = {"input": grad_input} + + if weight is None: + weight = None + grad_weight_capsule = None + else: + grad_weight = raw_like(weight) + weight = weight + grad_weight_capsule = grad_weight + out["weight"] = grad_weight + + if bias is None: + bias = None + grad_bias_capsule = None + else: + grad_bias = raw_like(bias) + bias = bias + grad_bias_capsule = grad_bias + out["bias"] = grad_bias + + func = check_function("diopiInstanceNormBackward") + ret = func( + input.context(), + grad_input, + grad_weight_capsule, + grad_bias_capsule, + grad_outputs[0], + input, + weight, + bias, + axis, + eps, + ) + check_returncode(ret) + return {k: v for k, v in out.items() if v.requires_grad} + def layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-05): sizeI = input.size().data @@ -4008,26 +5241,107 @@ def layer_norm(input, normalized_shape, weight=None, bias=None, eps=1e-05): weight = None if weight is None else weight bias = None if bias is None else bias - out = raw_like(input) - func = check_function("diopiLayerNorm") + out = raw_like(input) + func = check_function("diopiLayerNorm") + ret = func( + input.context(), + out, + save_mean, + save_invstd, + input, + weight, + bias, + Sizes(normalized_shape), + eps, + ) + check_returncode(ret) + GLOBAL_STATE["layer_norm_save_mean"] = save_mean + GLOBAL_STATE["layer_norm_save_invstd"] = save_invstd + return out + +def layer_normGB(input, normalized_shape, weight=None, bias=None, eps=1e-05): + sizeI = input.size().data + dims = len(sizeI) - len(normalized_shape) + size = [i for i in sizeI[0:dims]] + save_mean = Tensor(size, input.get_dtype()) + save_invstd = raw_like(save_mean) + + weight = None if weight is None else weight + bias = None if bias is None else bias + + out = raw_like(input) + + # Note that, this is the national standard GB operator version, which is different from the diopiLayerNorm interface definition, normalized_shape has changed to begin_norm_axis. + func = check_function("diopiLayerNormGB") + ret = func( + input.context(), + out, + save_mean, + save_invstd, + input, + weight, + bias, + eps, + dims + ) + check_returncode(ret) + GLOBAL_STATE["layer_norm_save_mean"] = save_mean + GLOBAL_STATE["layer_norm_save_invstd"] = save_invstd + return out + + + +def layer_norm_backward( + input, + grad_outputs, + normalized_shape, + weight=None, + bias=None, + eps=1e-05, + **kwargs, +) -> Tensor: + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + save_mean = GLOBAL_STATE.pop("layer_norm_save_mean") + save_invstd = GLOBAL_STATE.pop("layer_norm_save_invstd") + grad_input = raw_like(input) + out = {"input": grad_input} + + if weight is None: + weight = None + grad_weight_capsule = None + else: + grad_weight = raw_like(weight) + weight = weight + grad_weight_capsule = grad_weight + out["weight"] = grad_weight + + if bias is None: + bias = None + grad_bias_capsule = None + else: + grad_bias = raw_like(bias) + bias = bias + grad_bias_capsule = grad_bias + out["bias"] = grad_bias + + func = check_function("diopiLayerNormBackward") ret = func( input.context(), - out, - save_mean, - save_invstd, + grad_input, + grad_weight_capsule, + grad_bias_capsule, + grad_outputs[0], input, weight, bias, + save_mean, + save_invstd, Sizes(normalized_shape), - eps, ) check_returncode(ret) - GLOBAL_STATE["layer_norm_save_mean"] = save_mean - GLOBAL_STATE["layer_norm_save_invstd"] = save_invstd - return out - + return {k: v for k, v in out.items() if v.requires_grad} -def layer_norm_backward( +def layer_normGB_backward( input, grad_outputs, normalized_shape, @@ -4042,6 +5356,9 @@ def layer_norm_backward( grad_input = raw_like(input) out = {"input": grad_input} + sizeI = input.size().data + dim = len(sizeI) - len(normalized_shape) + if weight is None: weight = None grad_weight_capsule = None @@ -4060,7 +5377,7 @@ def layer_norm_backward( grad_bias_capsule = grad_bias out["bias"] = grad_bias - func = check_function("diopiLayerNormBackward") + func = check_function("diopiLayerNormGBBackward") ret = func( input.context(), grad_input, @@ -4072,11 +5389,477 @@ def layer_norm_backward( bias, save_mean, save_invstd, - Sizes(normalized_shape), + dim + ) + check_returncode(ret) + return {k: v for k, v in out.items() if v.requires_grad} + +def normalize(input, p, dim, eps): + output = raw_like(input) + + func = check_function("diopiNormalize") + + ret = func( + input.context(), + output, + input, + p, + dim, + eps ) + check_returncode(ret) + + return output + +def normalize_backward(grad_outputs, input, p, dim, eps): + + grad_output = grad_outputs[0] + + func = check_function("diopiNormalizeBackward") + + grad_input = raw_like(input) + + out = {'input': grad_input} + + ret = func( + input.context(), + grad_input, + grad_output, + input, + p, + dim, + eps + + ) return {k: v for k, v in out.items() if v.requires_grad} +def pool3d(input, kernel_size=0, stride=None, padding=0, dilation=1, ceil_mode=False, count_include_pad=True, output_size=0, mode="avg", adaptive=False) -> Tensor: + + sizeI = input.size().data + assert len(sizeI) == 5 or len(sizeI) == 4, "input must be 4d or 5d tensors" + assert mode in ["avg", "max"] and adaptive in [True, False], "mode or adaptive has wrong type" + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 5: + sizeO.append(sizeI[1]) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size, kernel_size) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride, stride, stride) + if isinstance(padding, int): + padding = (padding, padding, padding) + if isinstance(dilation, int): + dilation = (dilation, dilation, dilation) + if isinstance(output_size, int): + output_size = (output_size, output_size, output_size) + + if mode == "avg" and adaptive == False: + for i in range(-3, 0): + if ceil_mode: + sizeO.append( + math.ceil((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + else: + sizeO.append( + math.floor((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + output_size = Sizes(list(output_size)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiPool3d") + ret = func( + input.context(), + out, + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + output_size, + ) + check_returncode(ret) + return out + elif mode == "max" and adaptive == False: + for i in range(-3, 0): + tmp_ker_size = kernel_size[i] + (kernel_size[i] - 1) * (dilation[i] - 1) + tmp_size = (sizeI[i] - tmp_ker_size + 2 * padding[i]) / stride[i] + 1 + tmp_size = tmp_size if tmp_size > 1 else 1 + if ceil_mode: + sizeO.append(math.ceil(tmp_size)) + else: + sizeO.append(math.floor(tmp_size)) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + output_size = Sizes(list(output_size)) + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiPool3d") + ret = func( + input.context(), + out, + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + output_size, + ) + check_returncode(ret) + return out + elif mode == "avg" and adaptive == True: + for i in range(-3, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-3], sizeO[-2], sizeO[-1]])) + + func = check_function("diopiPool3d") + ret = func( + input.context(), + out, + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + output_size, + ) + + check_returncode(ret) + return out + else: + for i in range(-3, 0): + if output_size[i] is None: + sizeO.append(sizeI[i]) + else: + sizeO.append(output_size[i]) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + nhwc_stride = compute_nhwc_stride_1d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + output_size = Sizes(list([sizeO[-3], sizeO[-2], sizeO[-1]])) + + func = check_function("diopiPool3d") + ret = func( + input.context(), + out, + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + output_size, + ) + + check_returncode(ret) + return out + + +def pool3d_backward(input, grad_outputs, kernel_size=0, stride=0, padding=0, dilation=0, ceil_mode=0, count_include_pad=True, mode="avg", adaptive=False, output_size=0, **kwargs,) -> Tensor: + + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + assert mode in ["avg", "max"] and adaptive in [True, False], "mode or adaptive has wrong type" + grad_input = raw_like(input) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size, kernel_size) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride, stride, stride) + if isinstance(padding, int): + padding = (padding, padding, padding) + if isinstance(dilation, int): + dilation = (dilation, dilation, dilation) + if isinstance(output_size, int): + output_size = (output_size, output_size, output_size) + + + if mode == "avg" and adaptive == False: + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + indices = raw_like(input) + + func = check_function("diopiPool3dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "max" and adaptive == False: + + _, indices = max_pool3d( + input, kernel_size, stride, padding, dilation, ceil_mode, True + ) + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + func = check_function("diopiPool3dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + False, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "avg" and adaptive == True: + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + indices = raw_like(input) + + func = check_function("diopiPool3dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "avg", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + elif mode == "max" and adaptive == True: + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + dilation = Sizes(list(dilation)) + + _, indices = adaptive_max_pool3d(input, output_size, return_indices=True) + + func = check_function("diopiPool3dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + "max", + kernel_size, + stride, + padding, + dilation, + ceil_mode, + not count_include_pad, + True, + indices, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + +def avg_pool3d( + input, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + count_include_pad=True, + divisor_override=None, +) -> Tensor: + sizeI = input.size().data + assert len(sizeI) == 5 or len(sizeI) == 4, "input must be 4d or 5d tensors" + + sizeO = [] + sizeO.append(sizeI[0]) + if len(sizeI) == 5: + sizeO.append(sizeI[1]) + + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size, kernel_size) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride, stride, stride) + if isinstance(padding, int): + padding = (padding, padding, padding) + + for i in range(-3, 0): + if ceil_mode: + sizeO.append( + math.ceil((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + else: + sizeO.append( + math.floor((sizeI[i] - kernel_size[i] + 2 * padding[i]) / stride[i]) + 1 + ) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + nhwc_stride = compute_nhwc_stride_3d(sizeO) if glob_vars.nhwc else None + out = Tensor(sizeO, input.get_dtype(), stride=nhwc_stride) + + func = check_function("diopiAvgPool3d") + if divisor_override: + ret = func( + input.context(), + out, + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + divisor_override, + ) + else: + ret = func( + input.context(), + out, + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + ) + check_returncode(ret) + return out + +def avg_pool3d_backward( + input, + grad_outputs, + kernel_size, + stride=None, + padding=0, + ceil_mode=False, + count_include_pad=True, + divisor_override=None, + **kwargs, +) -> Tensor: + assert len(grad_outputs) == 1, "only accept 1 gradient to do backward" + grad_input = raw_like(input) + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size, kernel_size) + if stride is None: + stride = kernel_size + if isinstance(stride, int): + stride = (stride, stride, stride) + if isinstance(padding, int): + padding = (padding, padding, padding) + + stride = Sizes(list(stride)) + padding = Sizes(list(padding)) + kernel_size = Sizes(list(kernel_size)) + + func = check_function("diopiAvgPool3dBackward") + ret = ( + func( + input.context(), + grad_input, + grad_outputs[0], + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + divisor_override, + ) + if divisor_override + else func( + input.context(), + grad_input, + grad_outputs[0], + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + ) + ) + check_returncode(ret) + return {"input": grad_input} if grad_input.requires_grad else {} + + def adaptive_avg_pool3d(input, output_size): sizeI = input.size().data @@ -5047,6 +6830,16 @@ def meshgrid(tensors, shape=None): check_returncode(ret) return out +def grid_sample(input, grid, mode="bilinear"): + if len(input.size().data) == 4: + out = Tensor(size=(input.size().data[0], input.size().data[1], grid.size().data[1], grid.size().data[2],), dtype=input.dtype()) + else: + out = Tensor(size=(input.size().data[0], input.size().data[1], grid.size().data[1], grid.size().data[2], grid.size().data[3],), dtype=input.dtype()) + func = check_function("diopiGridSample") + ret = func(input.context(), out, input, grid, mode) + check_returncode(ret) + return out + def cast_dtype(input, out) -> Tensor: call = "diopiCastDtype" diff --git a/diopi_test/python/conformance/diopi_runtime.py b/diopi_test/python/conformance/diopi_runtime.py index 5b319ec7a..c5f5826b6 100644 --- a/diopi_test/python/conformance/diopi_runtime.py +++ b/diopi_test/python/conformance/diopi_runtime.py @@ -138,6 +138,19 @@ def to_numpy_dtype(dtype: Dtype) -> np.dtype: def is_dtype(dtype) -> bool: return isinstance(dtype, Dtype) +def compute_nhwc_stride_1d(sizes, itemsize=1): + dim = len(sizes) + strides = [itemsize for i in range(dim)] + assert dim == 2 or dim == 3, "not supported dim" + if dim == 2: + strides[0] = itemsize + strides[1] = strides[0] * sizes[0] + elif dim == 3: + strides[1] = itemsize + strides[2] = strides[0] * sizes[1] + strides[0] = strides[2] * sizes[2] + return strides + def compute_nhwc_stride_2d(sizes, itemsize=1): dim = len(sizes) diff --git a/impl/torch/functions/functions.cpp b/impl/torch/functions/functions.cpp index c348ff5d8..6ee8e104e 100644 --- a/impl/torch/functions/functions.cpp +++ b/impl/torch/functions/functions.cpp @@ -102,6 +102,203 @@ diopiError_t diopiLeakyReluInp(diopiContextHandle_t ctx, diopiTensorHandle_t inp return diopiSuccess; } +diopiError_t diopiMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, + diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + at::IntArrayRef atDilation = impl::aten::buildAtIntArray(dilation); + bool atCeilMode = ceil_mode; + auto atOut = CALL_ATEN_FUNC(max_pool1d, atInput, atKernelSize, atStride, atPadding, atDilation, atCeilMode); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + +diopiError_t diopiMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, + bool ceil_mode, diopiConstTensorHandle_t indices) { + impl::aten::setCurStream(ctx); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + at::IntArrayRef atDilation = impl::aten::buildAtIntArray(dilation); + auto atIndices = impl::aten::buildATen(indices); + + auto atGrad2d = CALL_ATEN_FUNC(max_pool2d_with_indices_backward, + atGradOutput.unsqueeze(-2), + atInput.unsqueeze(-2), + {1, atKernelSize[0]}, + {1, atStride[0]}, + {0, atPadding[0]}, + {1, atDilation[0]}, + ceil_mode, + atIndices.unsqueeze(-2)); + + auto atGradInput = atGrad2d.squeeze(-2); + + impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); + + return diopiSuccess; +} + +diopiError_t diopiMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, + diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + at::IntArrayRef atDilation = impl::aten::buildAtIntArray(dilation); + auto atOut = impl::aten::buildATen(out); + auto atIndices = impl::aten::buildATen(indices); + bool atCeilMode = ceil_mode; + std::tuple atRes = CALL_ATEN_FUNC(max_pool1d_with_indices, atInput, atKernelSize, atStride, atPadding, atDilation, atCeilMode); + + impl::aten::updateATen2Tensor(ctx, std::get<0>(atRes), out); + impl::aten::updateATen2Tensor(ctx, std::get<1>(atRes), indices); + + return diopiSuccess; +} + +diopiError_t diopiAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, + diopiSize_t padding, bool ceil_mode, bool count_include_pad) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + auto atOut = CALL_ATEN_FUNC(avg_pool1d, atInput, atKernelSize, atStride, atPadding, ceil_mode, count_include_pad); + + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + +diopiError_t diopiAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, + bool count_include_pad) { + impl::aten::setCurStream(ctx); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + + auto atGrad2d = CALL_ATEN_FUNC(avg_pool2d_backward, + atGradOutput.unsqueeze(-2), + atInput.unsqueeze(-2), + {1, atKernelSize[0]}, + {1, atStride[0]}, + {0, atPadding[0]}, + ceil_mode, + count_include_pad, + c10::nullopt); + + auto atGradInput = atGrad2d.squeeze(-2); + + impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); + + return diopiSuccess; +} + +diopiError_t diopiAdaptiveMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOutSize = impl::aten::buildAtIntArray(output_size); + auto atOuts = at::adaptive_max_pool1d(atInput, atOutSize); + impl::aten::updateATen2Tensor(ctx, std::get<0>(atOuts), out); + + return diopiSuccess; +} + +diopiError_t diopiAdaptiveMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, + diopiSize_t output_size) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOutSize = impl::aten::buildAtIntArray(output_size); + auto [atOut, atIndices] = CALL_ATEN_FUNC(adaptive_max_pool1d, atInput, atOutSize); + + impl::aten::updateATen2Tensor(ctx, atOut, out); + impl::aten::updateATen2Tensor(ctx, atIndices, indices); + + return diopiSuccess; +} + +diopiError_t diopiAdaptiveMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiConstTensorHandle_t indices) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atIndices = impl::aten::buildATen(indices); + auto atGrad2d = CALL_ATEN_FUNC(adaptive_max_pool2d_backward, atGradOutput.unsqueeze(-2), atInput.unsqueeze(-2), atIndices.squeeze(-2)); + auto atGradInput = atGrad2d.squeeze(-2); + + impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); + + return diopiSuccess; +} + +diopiError_t diopiAdaptiveAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + auto atGrad2d = CALL_ATEN_FUNC(_adaptive_avg_pool2d_backward, atGradOutput.unsqueeze(-2), atInput.unsqueeze(-2)); + auto atGradInput = atGrad2d.squeeze(-2); + + impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); + + return diopiSuccess; +} + +diopiError_t diopiAdaptiveAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOutSize = impl::aten::buildAtIntArray(output_size); + auto atOut = CALL_ATEN_FUNC(adaptive_avg_pool1d, atInput, atOutSize); + + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + +diopiError_t diopiPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, + diopiSize_t output_size) { + impl::aten::setCurStream(ctx); + + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool1d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool1d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool1d(ctx, out, input, output_size); + } else { + return impl::cuda::diopiAdaptiveAvgPool1d(ctx, out, input, output_size); + } +} + +diopiError_t diopiPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, + const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, + const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) { + impl::aten::setCurStream(ctx); + + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool1dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool1dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool1dBackward(ctx, grad_input, grad_output, input, indices); + } else { + return impl::cuda::diopiAdaptiveAvgPool1dBackward(ctx, grad_input, grad_output, input); + } +} + diopiError_t diopiMaxPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) { impl::aten::setCurStream(ctx); @@ -133,6 +330,29 @@ diopiError_t diopiMaxPool2dWithIndices(diopiContextHandle_t ctx, diopiTensorHand return diopiSuccess; } +// TODO +diopiError_t diopiPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t kernel_size, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode, bool exclusive, bool adaptive) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + at::IntArrayRef atDilation = impl::aten::buildAtIntArray(dilation); + bool atCeilMode = ceil_mode; + at::Tensor atOut = {}; + if (strcmp(mode, "max") == 0 && adaptive) { + } + if (strcmp(mode, "max") == 0 && !adaptive) { + } + if (strcmp(mode, "avg") == 0 && adaptive) { + } + if (strcmp(mode, "avg") == 0 && !adaptive) { + } + + return diopiSuccess; +} + /** * @brief * @param rounding_mode supported in pytorch>=1.8 @@ -755,6 +975,60 @@ diopiError_t diopiSort(diopiContextHandle_t ctx, diopiTensorHandle_t values, dio return diopiSuccess; } +diopiError_t diopiSortBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, int64_t dim, + diopiConstTensorHandle_t indices, diopiSize_t sizes, bool keepdim = 1) { + impl::aten::setCurStream(ctx); + auto atGrad_input = impl::aten::buildATen(grad_input); + auto atGrad_output = impl::aten::buildATen(grad_output); + auto atIndices = impl::aten::buildATen(indices); + auto atSizes = impl::aten::buildAtIntArray(sizes); + atGrad_input = CALL_ATEN_FUNC(value_selecting_reduction_backward, atGrad_output, dim, atIndices, atSizes, keepdim); + impl::aten::updateATen2Tensor(ctx, atGrad_input, grad_input); + + return diopiSuccess; +} + +diopiError_t diopiComplex(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t real, diopiConstTensorHandle_t imag) { + impl::aten::setCurStream(ctx); + auto atReal = impl::aten::buildATen(real); + auto atImag = impl::aten::buildATen(imag); + auto atOut = impl::aten::buildATen(out); + atOut = torch::complex(atReal, atImag); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + +diopiError_t diopiConj(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + atOut = torch::conj(atInput); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + +diopiError_t diopiImag(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + atOut = torch::imag(atInput); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + +diopiError_t diopiReal(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + atOut = torch::real(atInput); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + diopiError_t diopiTopk(diopiContextHandle_t ctx, diopiTensorHandle_t values, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, int64_t k, int64_t dim, bool largest, bool sorted) { impl::aten::setCurStream(ctx); @@ -830,6 +1104,40 @@ diopiError_t diopiCosInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { return diopiSuccess; } +diopiError_t diopiAcos(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(acos_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiAcosInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(acos_, atInput); + + return diopiSuccess; +} + +diopiError_t diopiTan(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(tan_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiTanInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(tan_, atInput); + + return diopiSuccess; +} + diopiError_t diopiAbs(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); @@ -924,6 +1232,40 @@ diopiError_t diopiSign(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiC return diopiSuccess; } +diopiError_t diopiSinh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(sinh_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiSinhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(sinh_, atInput); + + return diopiSuccess; +} + +diopiError_t diopiCosh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(cosh_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiCoshInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(cosh_, atInput); + + return diopiSuccess; +} + diopiError_t diopiTanh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); @@ -958,6 +1300,57 @@ diopiError_t diopiAtanInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { return diopiSuccess; } +diopiError_t diopiAsinh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(asinh_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiAsinhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(asinh_, atInput); + + return diopiSuccess; +} + +diopiError_t diopiAcosh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(acosh_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiAcoshInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(acosh_, atInput); + + return diopiSuccess; +} + +diopiError_t diopiAtanh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(atanh_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiAtanhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(atanh_, atInput); + + return diopiSuccess; +} + diopiError_t diopiSigmoid(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); @@ -1018,6 +1411,23 @@ diopiError_t diopiExpInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { return diopiSuccess; } +diopiError_t diopiExpm1(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(expm1_out, atOut, atInput); + + return diopiSuccess; +} + +diopiError_t diopiExpm1Inp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + CALL_ATEN_CUDA_FUNC(expm1_, atInput); + + return diopiSuccess; +} + diopiError_t diopiLog(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); @@ -1900,6 +2310,38 @@ diopiError_t diopiAvgPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, d return diopiSuccess; } +diopiError_t diopiPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, + diopiSize_t output_size) { + impl::aten::setCurStream(ctx); + + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool2d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool2d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool2d(ctx, out, input, output_size); + } else { + return impl::cuda::diopiAdaptiveAvgPool2d(ctx, out, input, output_size); + } +} + +diopiError_t diopiPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, + const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, + const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) { + impl::aten::setCurStream(ctx); + + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool2dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool2dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool2dBackward(ctx, grad_input, grad_output, input, indices); + } else { + return impl::cuda::diopiAdaptiveAvgPool2dBackward(ctx, grad_input, grad_output, input); + } +} + diopiError_t diopiDropout(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t mask, diopiConstTensorHandle_t input, double p, bool train, diopiGeneratorHandle_t generator) { impl::aten::setCurStream(ctx); @@ -2246,7 +2688,79 @@ diopiError_t diopiConvolution2dBackward(diopiContextHandle_t ctx, diopiTensorHan auto grad_input_mask = std::array{true, true, false}; auto atOut = CALL_ATEN_FUNC(miopen_convolution_backward, atInput, atGrad, atWeight, atPadding, atStride, atDilation, groups, false, false, grad_input_mask); updateATen2Tensor(ctx, atOut, vecOut); - if (bias_sizes && grad_bias) { + if (bias_sizes && grad_bias) { + auto atGradBias = impl::aten::buildATen(grad_bias); + at::Tensor atTmp = atGrad; + int64_t size = atGrad.dim() - 1; + while (atGradBias.dim() != size) { + atTmp = at::sum(atTmp, -1, false); + size -= 1; + } + CALL_ATEN_CUDA_FUNC(sum_out, atGradBias, atTmp, 0, false); + } +#else + std::vector outputPadding(padding.len, 0); + if (grad_input && grad_weight && grad_bias && bias_sizes) { + // TODO(ywt): when pytorch fix the bug of empty tensor, remove the + // check of grad_input && grad_weight + auto atBiasSizes = impl::aten::buildAtIntArray(bias_sizes); + auto atGradInput = impl::aten::buildATen(grad_input); + auto atGradWeight = impl::aten::buildATen(grad_weight); + auto atGradBias = impl::aten::buildATen(grad_bias); + auto tempOut = CALL_ATEN_CUDA_FUNC( + convolution_backward, atGrad, atInput, atWeight, atBiasSizes, atStride, atPadding, atDilation, false, outputPadding, groups, {true, true, true}); + at::native::copy_(atGradInput, std::get<0>(tempOut), true); + at::native::copy_(atGradWeight, std::get<1>(tempOut), true); + at::native::copy_(atGradBias, std::get<2>(tempOut), true); + } else { + auto results = at::convolution_backward( + atGrad, atInput, atWeight, c10::nullopt, atStride, atPadding, atDilation, false, outputPadding, groups, {true, true, false}); + impl::aten::updateATen2Tensor(ctx, std::get<0>(results), grad_input); + impl::aten::updateATen2Tensor(ctx, std::get<1>(results), grad_weight); + if (bias_sizes && grad_bias) { + auto atGradBias = impl::aten::buildATen(grad_bias); + at::Tensor atTmp = atGrad; + int64_t size = atGrad.dim() - 1; + while (atGradBias.dim() != size) { + atTmp = at::sum(atTmp, -1, false); + size -= 1; + } + CALL_ATEN_CUDA_FUNC(sum_out, atGradBias, atTmp, 0, false); + } + } +#endif + + return diopiSuccess; +} + +diopiError_t diopiConvTranspose2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, + diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, + diopiConstTensorHandle_t weight, diopiSize_t* bias_sizes, diopiSize_t stride, diopiSize_t padding, + diopiSize_t dilation, diopiSize_t output_padding, int64_t groups) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atGrad = impl::aten::buildATen(grad_output); + auto atWeight = impl::aten::buildATen(weight); + auto atStride = impl::aten::buildAtIntArray(stride); + auto atPadding = impl::aten::buildAtIntArray(padding); + auto atOutputPadding = impl::aten::buildAtIntArray(output_padding); + auto atDilation = impl::aten::buildAtIntArray(dilation); +#ifdef USE_HIP + auto grad_input_mask = std::array{true, true, false}; + auto atOut = CALL_ATEN_FUNC(miopen_convolution_transpose_backward, + atInput, + atGrad, + atWeight, + atPadding, + atOutputPadding, + atStride, + atDilation, + groups, + false, + false, + grad_input_mask); + updateATen2Tensor(ctx, atOut, vecOut); + if (bias_sizes != nullptr && grad_bias != nullptr) { auto atGradBias = impl::aten::buildATen(grad_bias); at::Tensor atTmp = atGrad; int64_t size = atGrad.dim() - 1; @@ -2257,25 +2771,22 @@ diopiError_t diopiConvolution2dBackward(diopiContextHandle_t ctx, diopiTensorHan CALL_ATEN_CUDA_FUNC(sum_out, atGradBias, atTmp, 0, false); } #else - std::vector outputPadding(padding.len, 0); if (grad_input && grad_weight && grad_bias && bias_sizes) { - // TODO(ywt): when pytorch fix the bug of empty tensor, remove the - // check of grad_input && grad_weight auto atBiasSizes = impl::aten::buildAtIntArray(bias_sizes); auto atGradInput = impl::aten::buildATen(grad_input); auto atGradWeight = impl::aten::buildATen(grad_weight); auto atGradBias = impl::aten::buildATen(grad_bias); auto tempOut = CALL_ATEN_CUDA_FUNC( - convolution_backward, atGrad, atInput, atWeight, atBiasSizes, atStride, atPadding, atDilation, false, outputPadding, groups, {true, true, true}); + convolution_backward, atGrad, atInput, atWeight, atBiasSizes, atStride, atPadding, atDilation, true, atOutputPadding, groups, {true, true, true}); at::native::copy_(atGradInput, std::get<0>(tempOut), true); at::native::copy_(atGradWeight, std::get<1>(tempOut), true); at::native::copy_(atGradBias, std::get<2>(tempOut), true); } else { - auto results = at::convolution_backward( - atGrad, atInput, atWeight, c10::nullopt, atStride, atPadding, atDilation, false, outputPadding, groups, {true, true, false}); - impl::aten::updateATen2Tensor(ctx, std::get<0>(results), grad_input); - impl::aten::updateATen2Tensor(ctx, std::get<1>(results), grad_weight); - if (bias_sizes && grad_bias) { + auto grad_inputs = at::convolution_backward( + atGrad, atInput, atWeight, c10::nullopt, atStride, atPadding, atDilation, true, atOutputPadding, groups, {true, true, false}); + impl::aten::updateATen2Tensor(ctx, std::get<0>(grad_inputs), grad_input); + impl::aten::updateATen2Tensor(ctx, std::get<1>(grad_inputs), grad_weight); + if (bias_sizes != nullptr && grad_bias != nullptr) { auto atGradBias = impl::aten::buildATen(grad_bias); at::Tensor atTmp = atGrad; int64_t size = atGrad.dim() - 1; @@ -2291,7 +2802,7 @@ diopiError_t diopiConvolution2dBackward(diopiContextHandle_t ctx, diopiTensorHan return diopiSuccess; } -diopiError_t diopiConvTranspose2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, +diopiError_t diopiConvTranspose3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiSize_t* bias_sizes, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, diopiSize_t output_padding, int64_t groups) { @@ -2832,6 +3343,21 @@ diopiError_t diopiMeshGrid(diopiContextHandle_t ctx, diopiTensorHandle_t* outs, return diopiSuccess; } +diopiError_t diopiGridSample(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t grid, + const char* mode) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atGrid = impl::aten::buildATen(grid); + auto atOut = impl::aten::buildATen(out); + int interpolation_mode = 0; + if (strcmp(mode, "bilinear") != 0) { + interpolation_mode = 1; + } + atOut = CALL_ATEN_FUNC(grid_sampler, atInput, atGrid, interpolation_mode, 0, 0); + impl::aten::updateATen2Tensor(ctx, atOut, out); + return diopiSuccess; +} + diopiError_t diopiFusedAdamW(diopiContextHandle_t ctx, diopiTensorHandle_t* params, diopiConstTensorHandle_t* grads, diopiTensorHandle_t* exp_avgs, diopiTensorHandle_t* exp_avg_sqs, diopiTensorHandle_t* max_exp_avg_sqs, diopiConstTensorHandle_t* state_steps, int64_t nums, float lr, float beta1, float beta2, float eps, float weight_decay, bool amsgrad, bool maximize) { @@ -2903,7 +3429,8 @@ diopiError_t diopiAdam(diopiContextHandle_t ctx, diopiTensorHandle_t param, diop if (weight_decay != 0) { grad_d = grad_d.add(atParam, weight_decay); } - atExpAvg.mul_(beta1).add_(grad_d, 1 - beta1); + // atExpAvg.mul_(beta1).add_(grad_d, 1 - beta1); + atExpAvg.lerp_(grad_d, 1 - beta1); atExpAvgSq.mul_(beta2).addcmul_(grad_d, grad_d.conj(), 1 - beta2); at::Tensor denom; @@ -2997,6 +3524,23 @@ diopiError_t diopiConvTranspose2d(diopiContextHandle_t ctx, diopiTensorHandle_t return diopiSuccess; } +diopiError_t diopiConvTranspose3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, + diopiConstTensorHandle_t bias, diopiSize_t stride, diopiSize_t padding, diopiSize_t output_padding, int64_t groups, + diopiSize_t dilation) { + impl::aten::setCurStream(ctx); + auto atInput = impl::aten::buildATen(input); + auto atWeight = impl::aten::buildATen(weight); + auto atBias = impl::aten::buildATen(bias); + auto atStride = impl::aten::buildAtIntArray(stride); + auto atPadding = impl::aten::buildAtIntArray(padding); + auto atOutputPadding = impl::aten::buildAtIntArray(output_padding); + auto atDilation = impl::aten::buildAtIntArray(dilation); + auto atOut = CALL_ATEN_FUNC(conv_transpose3d, atInput, atWeight, atBias, atStride, atPadding, atOutputPadding, groups, atDilation); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + diopiError_t diopiCumsum(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, int64_t dim) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); @@ -3006,6 +3550,15 @@ diopiError_t diopiCumsum(diopiContextHandle_t ctx, diopiTensorHandle_t out, diop return diopiSuccess; } +diopiError_t diopiCumsumBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, int64_t dim) { + impl::aten::setCurStream(ctx); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atGradInput = atGradOutput; + impl::aten::updateATen2Tensor(ctx, atGradInput.flip(dim).cumsum(dim).flip(dim), grad_input); + + return diopiSuccess; +} + diopiError_t diopiCdist(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input1, diopiConstTensorHandle_t input2, double p, const int64_t* compute_mode) { impl::aten::setCurStream(ctx); @@ -3092,6 +3645,26 @@ diopiError_t diopiArgmax(diopiContextHandle_t ctx, diopiTensorHandle_t out, diop return diopiSuccess; } +diopiError_t diopiArgmin(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const int64_t* dim, bool keepdim) { + impl::aten::setCurStream(ctx); + auto atOut = impl::aten::buildATen(out); + auto atInput = impl::aten::buildATen(input); + c10::optional atDim = dim ? c10::optional(*dim) : c10::nullopt; + CALL_ATEN_CUDA_FUNC(argmin_out, atOut, atInput, atDim, keepdim); + + return diopiSuccess; +} + +diopiError_t diopiArgsort(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, bool stable, const int64_t* dim, bool descending) { + impl::aten::setCurStream(ctx); + auto atOut = impl::aten::buildATen(out); + auto atInput = impl::aten::buildATen(input); + atOut = CALL_ATEN_CUDA_FUNC(argsort, atInput, stable, (dim ? *dim : -1), descending); + impl::aten::updateATen2Tensor(ctx, atOut, out); + + return diopiSuccess; +} + diopiError_t diopiSmoothL1Loss(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t target, diopiReduction_t reduction, double beta) { impl::aten::setCurStream(ctx); @@ -3372,6 +3945,98 @@ diopiError_t diopiNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiC return diopiSuccess; } +at::Tensor unsqueeze_multiple(const at::Tensor& t, at::OptionalIntArrayRef opt_dim, size_t n_dims) { + if (opt_dim.has_value()) { + at::IntArrayRef dim = opt_dim.value(); + auto dim_size = dim.size(); + // Optimisation for two common cases + if (dim_size == 0) { + return t; + } else if (dim_size == 1) { + return t.unsqueeze(dim[0]); + } + } + auto dims_to_unsqueeze = at::dim_list_to_bitset(opt_dim, n_dims); + at::Tensor res = t; + for (const auto i : c10::irange(n_dims)) { + if (dims_to_unsqueeze[i]) { + res = res.unsqueeze(i); + } + } + return res; +} + +at::Tensor norm_backward(at::Tensor grad, const at::Tensor& self, const std::optional& p_, at::Tensor norm, at::IntArrayRef dim, bool keepdim) { + // NB: We mask fill the NaNs in the output to be zero but still do float + // division + // by zero, which ASAN complains about. One way to appease ASAN is to fill + // the problematic values with something arbitrary before the division, + // but we decide not to due to the perf hit. Instead we just silence ASAN + // where necessary + size_t ndim = self.dim(); + double p = p_.value_or(2.0).toDouble(); + at::Tensor self_scaled; + at::Tensor scale_v; + + if (!keepdim && self.dim() != 0) { + grad = unsqueeze_multiple(grad, dim, ndim); + norm = unsqueeze_multiple(norm, dim, ndim); + } + + if (p == 0.0) { + return {}; + } else if (p == 1.0) { + return self.sgn() * grad; + } else if (p == 2.0) { + return grad * (self / norm).masked_fill_(norm == 0, 0); + } else if (std::isinf(p)) { + // Derivative of amax(abs(self), dim, keepdim) but respecting nans + // We create a mask of `argmax`: it's argmax if self.abs() == norm or it's + // NaN + auto self_abs = self.abs(); + auto mask = self_abs.eq(norm).logical_or(self_abs.isnan()); + return self.sgn() * ((grad / mask.sum(dim, true)) * mask); + } else if (p < 1.0) { + self_scaled = self.sgn() * self.abs().pow_(p - 1).masked_fill_(self == 0, 0); + return self_scaled * grad * norm.pow(1 - p); + } else if (p < 2.0) { + self_scaled = self.sgn() * self.abs().pow_(p - 1); + scale_v = grad / norm.pow(p - 1); + scale_v.masked_fill_(norm == 0, 0); + return self_scaled * scale_v; + } else { + self_scaled = self * self.abs().pow_(p - 2); + scale_v = grad / norm.pow(p - 1); + scale_v.masked_fill_(norm == 0, 0); + return self_scaled * scale_v; + } +} + +diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t self, + diopiConstTensorHandle_t norm, diopiSize_t dim, const diopiScalar_t* p) { + impl::aten::setCurStream(ctx); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atSelf = impl::aten::buildATen(self); + auto atP = impl::aten::buildAtScalar(p); + auto atNorm = impl::aten::buildATen(norm); + + at::IntArrayRef atDim = impl::aten::buildAtIntArray(dim); + bool keepdim = false; + + if (atSelf.dim() == atNorm.dim()) { + keepdim = true; + } + + auto atGradInput = norm_backward(atGradOutput, atSelf, atP, atNorm, atDim, keepdim); + + if (!atGradInput.defined()) { + return diopiSuccess; + } + + impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); + return diopiSuccess; +} + diopiError_t diopiForeachnormScalar(diopiContextHandle_t ctx, diopiTensorHandle_t* outs, diopiConstTensorHandle_t* inputs, int64_t inputSize, const diopiScalar_t* ord) { DIOPI_CHECK_PTR(outs); @@ -3489,6 +4154,31 @@ diopiError_t diopiLayerNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, d return diopiSuccess; } +diopiError_t diopiLayerNormGB(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t running_mean, diopiTensorHandle_t running_var, + diopiConstTensorHandle_t input, diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps, + const int64_t begin_norm_axis) { + impl::aten::setCurStream(ctx); + + auto atOut = impl::aten::buildATen(out); + auto atMean = impl::aten::buildATen(running_mean); + auto atVar = impl::aten::buildATen(running_var); + + auto atInput = impl::aten::buildATen(input); + + DIOPI_IMPL_BUILD_ATEN_OPTIONAL(atBias, bias); + DIOPI_IMPL_BUILD_ATEN_OPTIONAL(atScale, scale); + + at::IntArrayRef atNormalizedShape(atInput.sizes().begin() + begin_norm_axis, atInput.sizes().end()); + + diopi_tensor_list vecOut = {out, running_mean, running_var}; + + auto Out = CALL_ATEN_CUDA_FUNC(native_layer_norm, atInput, atNormalizedShape, atScale, atBias, eps); + + impl::aten::updateATen2Tensor(ctx, Out, vecOut); + + return diopiSuccess; +} + diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiConstTensorHandle_t mean, diopiConstTensorHandle_t rstd, diopiSize_t normalized_shape) { @@ -3537,6 +4227,182 @@ diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_ return diopiSuccess; } +diopiError_t diopiLayerNormGBBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias, + diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, + diopiConstTensorHandle_t bias, diopiConstTensorHandle_t running_mean, diopiConstTensorHandle_t running_std, + const int64_t begin_norm_axis) { + impl::aten::setCurStream(ctx); + diopiDtype_t mDtype, rDtype; + if (running_std) { + diopiGetTensorDtype(running_std, &rDtype); + } + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + + at::IntArrayRef atNormalizedShape(atInput.sizes().begin() + begin_norm_axis, atInput.sizes().end()); + + DIOPI_IMPL_BUILD_ATEN_OPTIONAL(atWeight, weight); + DIOPI_IMPL_BUILD_ATEN_OPTIONAL(atBias, bias); + auto grad_input_mask = std::array{true, atWeight.has_value(), atBias.has_value()}; + + auto atSaveMean = impl::aten::buildATen(running_mean); + diopiGetTensorDtype(running_mean, &mDtype); + if (diopiDtype_t::diopi_dtype_float16 == mDtype) { + atSaveMean = at::native::to(atSaveMean, impl::aten::getATenType(diopiDtype_t::diopi_dtype_float32).toScalarType(), false, true, c10::nullopt); + } + auto atSaveVar = impl::aten::buildATen(running_std); + diopiGetTensorDtype(running_std, &rDtype); + if (diopiDtype_t::diopi_dtype_float16 == rDtype) { + atSaveVar = at::native::to(atSaveVar, impl::aten::getATenType(diopiDtype_t::diopi_dtype_float32).toScalarType(), false, true, c10::nullopt); + } + + if (grad_input && grad_weight && grad_bias) { + auto atGradInput = impl::aten::buildATen(grad_input); + auto atGradWeight = impl::aten::buildATen(grad_weight); + auto atGradBias = impl::aten::buildATen(grad_bias); + at::native_layer_norm_backward_out( + atGradInput, atGradWeight, atGradBias, atGradOutput, atInput, atNormalizedShape, atSaveMean, atSaveVar, atWeight, atBias, grad_input_mask); + } else { + auto atOut = at::native_layer_norm_backward(atGradOutput, atInput, atNormalizedShape, atSaveMean, atSaveVar, atWeight, atBias, grad_input_mask); + if (grad_input) { + impl::aten::updateATen2Tensor(ctx, std::get<0>(atOut), grad_input); + } + if (grad_weight) { + impl::aten::updateATen2Tensor(ctx, std::get<1>(atOut), grad_weight); + } + if (grad_bias) { + impl::aten::updateATen2Tensor(ctx, std::get<2>(atOut), grad_bias); + } + } + + return diopiSuccess; +} + +diopiError_t diopiInstanceNorm(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const int64_t axis, + diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps) { + impl::aten::setCurStream(ctx); + + auto atInput = impl::aten::buildATen(input); + auto atScale = impl::aten::buildATen(scale); + auto atBias = impl::aten::buildATen(bias); + auto input_size = atInput.sizes().vec(); + + std::vector reshaped_size = {1}; + int64_t shape = 1; + for (int i = 0; i < std::min(axis, (int64_t)input_size.size()); i++) { + shape = shape * input_size[i]; + } + reshaped_size.push_back(shape); + for (int i = axis; i < input_size.size(); i++) { + reshaped_size.push_back(input_size[i]); + } + + auto atInputReshaped = atInput.contiguous().view(reshaped_size); + auto atScale_ = atScale.repeat(input_size[0]); + auto atBias_ = atBias.repeat(input_size[0]); + + // auto atRunningMean = torch::empty({reshaped_size[1]}, torch::TensorOptions().device(torch::kCUDA)); + // auto atRunningStd = torch::empty({reshaped_size[1]}, torch::TensorOptions().device(torch::kCUDA)); + + auto atOutput = CALL_ATEN_FUNC(batch_norm, atInputReshaped, atScale_, atBias_, c10::nullopt, c10::nullopt, true, 0.0, eps, false); + impl::aten::updateATen2Tensor(ctx, atOutput, output); + return diopiSuccess; +} + +diopiError_t diopiInstanceNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_scale, diopiTensorHandle_t grad_bias, + diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t scale, + diopiConstTensorHandle_t bias, const int64_t axis, const double eps) { + impl::aten::setCurStream(ctx); + + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + auto atScale = impl::aten::buildATen(scale); + auto atBias = impl::aten::buildATen(bias); + auto input_size = atInput.sizes().vec(); + + auto atScale_ = atScale.repeat(input_size[0]); + + std::vector reshaped_size = {1}; + int64_t shape = 1; + for (int i = 0; i < std::min(axis, (int64_t)input_size.size()); i++) { + shape = shape * input_size[i]; + } + reshaped_size.push_back(shape); + for (int i = axis; i < input_size.size(); i++) { + reshaped_size.push_back(input_size[i]); + } + + std::vector mean_dim = {0}; + for (int i = 2; i < reshaped_size.size(); i++) { + mean_dim.push_back(i); + } + + auto atInputReshaped = atInput.contiguous().view(reshaped_size); + auto atGradOutputReshaped = atGradOutput.contiguous().view(reshaped_size); + + auto atMean = torch::mean(atInputReshaped, mean_dim); + auto atStd = torch::std(atInputReshaped, mean_dim); + + auto grad_input_mask = std::array{grad_input != nullptr, grad_scale != nullptr, grad_bias != nullptr}; + + auto atOut = + at::native_batch_norm_backward(atGradOutputReshaped, atInputReshaped, atScale_, c10::nullopt, c10::nullopt, atMean, atStd, true, eps, grad_input_mask); + + if (grad_input) { + impl::aten::updateATen2Tensor(ctx, std::get<0>(atOut), grad_input); + } + if (grad_scale) { + auto atGradScaleOrigin = torch::zeros_like(atScale); + for (int i = 0; i < std::get<1>(atOut).size(0); i++) { + atGradScaleOrigin[i % (atScale_.size(0) / input_size[0])] += std::get<1>(atOut)[i]; + } + impl::aten::updateATen2Tensor(ctx, atGradScaleOrigin, grad_scale); + } + if (grad_bias) { + auto atGradBiasOrigin = torch::zeros_like(atBias); + for (int i = 0; i < std::get<2>(atOut).size(0); i++) { + atGradBiasOrigin[i % (atScale_.size(0) / input_size[0])] += std::get<2>(atOut)[i]; + } + impl::aten::updateATen2Tensor(ctx, atGradBiasOrigin, grad_bias); + } + + return diopiSuccess; +} + +diopiError_t diopiNormalize(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const float p, const int64_t axis, + const double eps) { + impl::aten::setCurStream(ctx); + + auto atInput = impl::aten::buildATen(input); + + auto atOut = impl::aten::buildATen(output); + + auto atDenom = atInput.norm(p, axis, true).clamp_min(eps).expand_as(atInput); + + CALL_ATEN_FUNC(div_out, atOut, atInput, atDenom); + + return diopiSuccess; +} + +diopiError_t diopiNormalizeBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps) { + impl::aten::setCurStream(ctx); + + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + auto atNorm = atInput.norm(p, axis, true); + auto atClamp = atNorm.clamp_min(eps); + auto atDenom = atClamp.expand_as(atInput); + auto atGradDenom = atInput * (-1 / atDenom / atDenom) * atGradOutput; + auto atGradClamp = atGradDenom.sum(axis, true); + auto atGradNorm = atGradClamp.masked_fill_(atNorm < eps, 0); + auto atGradOriginInput = norm_backward(atGradNorm, atInput, p, atNorm, axis, true); + auto atGradInput = (1 / atDenom) * atGradOutput + atGradOriginInput; + + impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input); + return diopiSuccess; +} + diopiError_t diopiAdaptiveAvgPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); @@ -3641,6 +4507,69 @@ diopiError_t diopiMaxPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_ return diopiSuccess; } +diopiError_t diopiAvgPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, + diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override) { + impl::aten::setCurStream(ctx); + + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + c10::optional atDivisorOverride = divisor_override ? c10::optional(*divisor_override) : c10::nullopt; + auto atOut = impl::aten::buildATen(out); + CALL_ATEN_CUDA_FUNC(avg_pool3d_out, atOut, atInput, atKernelSize, atStride, atPadding, ceil_mode, count_include_pad, atDivisorOverride); + + return diopiSuccess; +} + +diopiError_t diopiAvgPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, + bool count_include_pad, const int64_t* divisor_override) { + impl::aten::setCurStream(ctx); + auto atGradOutput = impl::aten::buildATen(grad_output); + auto atInput = impl::aten::buildATen(input); + at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size); + at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride); + at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding); + c10::optional atDivisorOverride = divisor_override ? c10::optional(*divisor_override) : c10::nullopt; + auto atGradInput = impl::aten::buildATen(grad_input); + CALL_ATEN_CUDA_FUNC( + avg_pool3d_backward_out, atGradInput, atGradOutput, atInput, atKernelSize, atStride, atPadding, ceil_mode, count_include_pad, atDivisorOverride); + return diopiSuccess; +} + +diopiError_t diopiPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, + diopiSize_t output_size) { + impl::aten::setCurStream(ctx); + + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool3d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool3d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool3d(ctx, out, input, output_size); + } else { + return impl::cuda::diopiAdaptiveAvgPool3d(ctx, out, input, output_size); + } +} + +diopiError_t diopiPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, + const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, + const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) { + impl::aten::setCurStream(ctx); + + if (adaptive == false && strcmp(mode, "max") == 0) { + return impl::cuda::diopiMaxPool3dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices); + } else if (adaptive == false && strcmp(mode, "avg") == 0) { + return impl::cuda::diopiAvgPool3dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr); + } else if (adaptive == true && strcmp(mode, "max") == 0) { + return impl::cuda::diopiAdaptiveMaxPool3dBackward(ctx, grad_input, grad_output, input, indices); + } else { + return impl::cuda::diopiAdaptiveAvgPool3dBackward(ctx, grad_input, grad_output, input); + } +} + diopiError_t diopiPermute(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t dims) { impl::aten::setCurStream(ctx); auto atInput = impl::aten::buildATen(input); diff --git a/proto/include/diopi/functions.h b/proto/include/diopi/functions.h index bbedfa525..4f7dfcecb 100644 --- a/proto/include/diopi/functions.h +++ b/proto/include/diopi/functions.h @@ -302,6 +302,172 @@ DIOPI_API diopiError_t diopiLeakyReluInp(diopiContextHandle_t ctx, diopiTensorHa DIOPI_API diopiError_t diopiLeakyReluBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const diopiScalar_t* negative_slope, bool input_is_result); +/** + * @brief Performs 1D max pooling on the input tensor. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after max pooling. + * @param[in] input the input tensor. + * @param[in] kernel_size the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + */ +DIOPI_API diopiError_t diopiMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode); + +/** + * @brief Performs the backward pass for diopiMaxPool1d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] kernel_size the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] indices the tensor storing the indices of max elements from the forward pass. + */ +DIOPI_API diopiError_t diopiMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, + diopiSize_t dilation, bool ceil_mode, diopiConstTensorHandle_t indices); + +/** + * @brief Performs 1D max pooling on the input tensor and returns the indices of max elements. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after max pooling. + * @param[out] indices the tensor storing the indices of max elements. + * @param[in] input the input tensor. + * @param[in] kernel_size the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + */ +DIOPI_API diopiError_t diopiMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, + diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode); + +/** + * @brief Performs 1D average pooling on the input tensor. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after average pooling. + * @param[in] input the input tensor. + * @param[in] kernel_size the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] count_include_pad whether to include padding in the count for averaging. + */ +DIOPI_API diopiError_t diopiAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, + diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad); + +/** + * @brief Performs the backward pass for diopiAvgPool1d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] kernel_size the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] count_include_pad whether to include padding in the count for averaging. + */ +DIOPI_API diopiError_t diopiAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, + bool count_include_pad); + +/** + * @brief Performs 1D adaptive max pooling on the input tensor. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after adaptive max pooling. + * @param[in] input the input tensor. + * @param[in] output_size the size of the output after pooling. + */ +DIOPI_API diopiError_t diopiAdaptiveMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size); + +/** + * @brief Performs 1D adaptive max pooling on the input tensor and returns the indices of max elements. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after adaptive max pooling. + * @param[out] indices the tensor storing the indices of max elements. + * @param[in] input the input tensor. + * @param[in] output_size the size of the output after pooling. + */ +DIOPI_API diopiError_t diopiAdaptiveMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, + diopiConstTensorHandle_t input, diopiSize_t output_size); + +/** + * @brief Performs the backward pass for diopiAdaptiveMaxPool1d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] indices the tensor storing the indices of max elements from the forward pass. + */ +DIOPI_API diopiError_t diopiAdaptiveMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiConstTensorHandle_t indices); + +/** + * @brief Performs the backward pass for diopiAdaptiveAvgPool1d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + */ +DIOPI_API diopiError_t diopiAdaptiveAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input); + +/** + * @brief Performs 1D adaptive average pooling on the input tensor. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after adaptive average pooling. + * @param[in] input the input tensor. + * @param[in] output_size the size of the output after pooling. + */ +DIOPI_API diopiError_t diopiAdaptiveAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size); + +/** + * @brief General 1D pooling operation with support for multiple pooling modes. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after pooling. + * @param[in] input the input tensor. + * @param[in] mode the pooling mode, such as "max" or "avg". + * @param[in] ksize the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] exclusive whether to exclude padding when averaging. + * @param[in] adaptive whether to use adaptive pooling. + * @param[in] output_size the size of the output after pooling (if adaptive pooling is used). + */ +DIOPI_API diopiError_t diopiPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, + const bool adaptive, diopiSize_t output_size); + +/** + * @brief Performs the backward pass for diopiPool1d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] mode the pooling mode, such as "max" or "avg". + * @param[in] ksize the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] exclusive whether to exclude padding when averaging. + * @param[in] adaptive whether to use adaptive pooling. + * @param[in] indices the tensor storing the indices of max elements from the forward pass (if max pooling is used). + */ +DIOPI_API diopiError_t diopiPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, + diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, + diopiConstTensorHandle_t indices); + /** * @brief Applies 2D average-pooling operation in kH×kW regions by step size sH×sW steps. * @param[in] ctx Context environment. @@ -387,6 +553,45 @@ DIOPI_API diopiError_t diopiMaxPool2dWithIndices(diopiContextHandle_t ctx, diopi DIOPI_API diopiError_t diopiMaxPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode, diopiConstTensorHandle_t indices); +/** + * @brief General 2D pooling operation with support for multiple pooling modes. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after pooling. + * @param[in] input the input tensor. + * @param[in] mode the pooling mode, such as "max" or "avg". + * @param[in] ksize the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] exclusive whether to exclude padding when averaging. + * @param[in] adaptive whether to use adaptive pooling. + * @param[in] output_size the size of the output after pooling (if adaptive pooling is used). + */ +DIOPI_API diopiError_t diopiPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, + const bool adaptive, diopiSize_t output_size); + +/** + * @brief Performs the backward pass for diopiPool2d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] mode the pooling mode, such as "max" or "avg". + * @param[in] ksize the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] exclusive whether to exclude padding when averaging. + * @param[in] adaptive whether to use adaptive pooling. + * @param[in] indices the tensor storing the indices of max elements from the forward pass (if max pooling is used). + */ +DIOPI_API diopiError_t diopiPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, + diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, + diopiConstTensorHandle_t indices); /** * @brief Applies a 2D adaptive average pooling over an input signal composed of several input planes. @@ -878,6 +1083,68 @@ DIOPI_API diopiError_t diopiCosInp(diopiContextHandle_t ctx, diopiTensorHandle_t */ DIOPI_API diopiError_t diopiCos(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); +/** + * @brief The in-place version of diopiAcos(). + * @param[in] ctx Context environment. + * @param[in] input the input and output tensor and will be stored result tensor, + * type = [float16, float32, float64, int16, int32, int64, uint8, int8]. + */ +DIOPI_API diopiError_t diopiAcosInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Compute the element-wise arccosine values of the input tensor input. + * @param[in] ctx Context environment. + * @param[in] input Input tensor, type = [float16, float32, float64, int16, int32, int64, uint8, int8]. + * @param[out] out the output tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiAcos(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + +/** + * @brief The in-place version of diopiTan(). + * @param[in] ctx Context environment. + * @param[in] input the input and output tensor and will be stored result tensor, + * type = [float16, float32, float64, int16, int32, int64, uint8, int8]. + */ +DIOPI_API diopiError_t diopiTanInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Compute the element-wise tangent values of the input tensor input. + * @param[in] ctx Context environment. + * @param[in] input Input tensor, type = [float16, float32, float64, int16, int32, int64, uint8, int8]. + * @param[out] out the output tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiTan(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + +/** + * @brief The in-place version of diopiSinh(). + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiSinhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Returns a new tensor with the hyperbolic sine of the elements of input. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64]. + * @param[out] out the input tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiSinh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + +/** + * @brief The in-place version of diopiCosh(). + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiCoshInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Returns a new tensor with the hyperbolic cosine of the elements of input. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64]. + * @param[out] out the input tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiCosh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief The in-place version of diopiTanh(). * @param[in] ctx Context environment. @@ -915,6 +1182,51 @@ DIOPI_API diopiError_t diopiAtan(diopiContextHandle_t ctx, diopiTensorHandle_t o */ DIOPI_API diopiError_t diopiAtanInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); +/** + * @brief The in-place version of diopiAsinh(). + * @param[in] ctx Context environment. + * @param[inout] input the input tensor and will be stroed reuslt tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiAsinhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Returns a new tensor with the arc hyperbolic sine of the elements of input. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64]. + * @param[out] out the output tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiAsinh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + +/** + * @brief The in-place version of diopiAcosh(). + * @param[in] ctx Context environment. + * @param[inout] input the input tensor and will be stroed reuslt tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiAcoshInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Returns a new tensor with the arc hyperbolic cosine of the elements of input. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64]. + * @param[out] out the output tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiAcosh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + +/** + * @brief The in-place version of diopiAtanh(). + * @param[in] ctx Context environment. + * @param[inout] input the input tensor and will be stroed reuslt tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiAtanhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Returns a new tensor with the arc hyperbolic tangent of the elements of input. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64]. + * @param[out] out the output tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiAtanh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief The in-place version of diopiSigmoid(). * @param[in] ctx Context environment. @@ -978,6 +1290,22 @@ DIOPI_API diopiError_t diopiExpInp(diopiContextHandle_t ctx, diopiTensorHandle_t */ DIOPI_API diopiError_t diopiExp(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); +/** + * @brief The in-place version of diopiExpm1(). + * @param[in] ctx Context environment. + * @param[in] input the input tensor and will be stroed reuslt tensor. type = [float16, float32, float64] + */ +DIOPI_API diopiError_t diopiExpm1Inp(diopiContextHandle_t ctx, diopiTensorHandle_t input); + +/** + * @brief Returns a new tensor with the exponential of the elements of the input tensor input and minus 1 + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float16, float32, float64, int16, int32, + * int64, uint8, int8, bool]. + * @param[out] out the output tensor. type = [float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiExpm1(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief The in-place version of diopiLog(). * @param[in] ctx Context environment. @@ -2363,6 +2691,20 @@ DIOPI_API diopiError_t diopiStack(diopiContextHandle_t ctx, diopiTensorHandle_t DIOPI_API diopiError_t diopiSort(diopiContextHandle_t ctx, diopiTensorHandle_t values, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, int64_t dim, bool descending, const bool* pStable); +/** + * @brief Computes the gradient of the input tensor with respect to the sorted output tensor during backpropagation. + * @param[in] ctx Context environment. + * @param[in] grad_output Tensor containing the gradient of the loss with respect to the sorted output. + * @param[in] dim The dimension along which the sorting was performed. This is used to correctly align the gradients with the corresponding dimension. + * @param[in] indices Tensor containing the indices that were used to sort the input tensor. + * @param[in] sizes The size of the tensor, which is necessary to manage the shape and alignment during gradient computation. + * @param[in] keepdim Boolean flag indicating whether to retain the reduced dimensions or not. If `true`, the dimensions that were reduced are retained with + * size one, which affects how gradients are accumulated. + * @param[out] grad_input Tensor to store the gradient with respect to the input tensor. This tensor will be updated with the computed gradient. + */ +DIOPI_API diopiError_t diopiSortBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, int64_t dim, + diopiConstTensorHandle_t indices, diopiSize_t sizes, bool keepdim); + /** * @brief Returns the k largest elements of the given input tensor along a given dimension. * @param[in] ctx Context environment. @@ -2551,6 +2893,44 @@ DIOPI_API diopiError_t diopiConvTranspose2dBackward(diopiContextHandle_t ctx, di diopiConstTensorHandle_t weight, diopiSize_t* bias_sizes, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, diopiSize_t output_padding, int64_t groups); +/** + * @brief Applies a 3D transposed convolution operator over an input image composed of several input planes, sometimes also called “deconvolution”. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type = [float32, float16, float64]. + * @param[in] weight the weight tensor; dimension of kernel_size must match the number of input spatial dimensions. + * type = [float32, float16, float64]. + * @param[in] bias bias tensor. type = [float32, float16, float64]. + * @param[in] stride an array with dimension matching the number of input spatial dimensions. type = [int32, int64]. + * @param[in] padding an array with dimension matching the number of input spatial dimensions. type = [int32, int64]. + * @param[in] output_padding an array, dimension == number of input spatial dimensions; only supported when transposed is true. type = [int32, int64]. + * @param[in] dilation an array with dimension matching the number of input spatial dimensions. type = [int32, int64]. + * @param[in] groups number of groups for grouped convolution. type = [int64]. + * @param[out] out the result tensor. type = [float32, float16, float64]. + */ +DIOPI_API diopiError_t diopiConvTranspose3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, + diopiConstTensorHandle_t bias, diopiSize_t stride, diopiSize_t padding, diopiSize_t output_padding, int64_t groups, + diopiSize_t dilation); + +/** + * @brief Backward pass for ConvTranspose3dBackward. Computes gradients for input, weight, and bias. + * @param[in] ctx Context environment. + * @param[in] grad_output the grad tensor of output. type = [float32, float16, float64]. + * @param[in] bias_sizes an array, indicates that a bias was used in the forward pass and contains the shape of the bias. type = [int32, int64]. + * @param[in] input the input tensor. type = [float32, float16, float64]. + * @param[in] weight the weight tensor; dimension of kernel_size must match the number of input spatial dimensions. + * @param[in] stride an array with dimension matching the number of input spatial dimensions. type = [int32, int64]. + * @param[in] padding an array with dimension matching the number of input spatial dimensions. type = [int32, int64]. + * @param[in] output_padding an array, dimension == number of input spatial dimensions; only supported when transposed is true. type = [int32, int64]. + * @param[in] dilation an array with dimension matching the number of input spatial dimensions. type = [int32, int64]. + * @param[in] groups number of groups for grouped convolution. type = [int64]. + * @param[out] grad_input the grad of input. type = [float32, float16, float64]. + * @param[out] grad_weight the grad of weight. type = [float32, float16, float64]. + * @param[out] grad_bias the grad of bias. type = [float32, float16, float64]. + */ +DIOPI_API diopiError_t diopiConvTranspose3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, + diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, + diopiConstTensorHandle_t weight, diopiSize_t* bias_sizes, diopiSize_t stride, diopiSize_t padding, + diopiSize_t dilation, diopiSize_t output_padding, int64_t groups); /** * @brief Extracts sliding local blocks from a batched input tensor. * @param[in] ctx Context environment. @@ -2583,6 +2963,15 @@ DIOPI_API diopiError_t diopiUnfoldBackward(diopiContextHandle_t ctx, diopiTensor */ DIOPI_API diopiError_t diopiCumsum(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, int64_t dim); +/** + * @brief Computes the backward pass for diopiCumsum() + * @param[in] ctx Context environment. + * @param[in] grad_output the grad tensor of output, with the same shape as the forward pass output. type=[float16, float32, float64]. + * @param[in] dim the dimension to do the operation over. type = [int64]. + * @param[out] grad_input the grad tensor of input, with the same shape as the forward pass input. type=[float16, float32, float64]. + */ +DIOPI_API diopiError_t diopiCumsumBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, int64_t dim); + /** * @brief Computes batched the p-norm distance between each pair of the two collections of row vectors. * @param[in] ctx Context environment. @@ -2616,6 +3005,28 @@ DIOPI_API diopiError_t diopiCdistBackward(diopiContextHandle_t ctx, diopiTensorH */ DIOPI_API diopiError_t diopiArgmax(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const int64_t* dim, bool keepdim); +/** + * @brief Returns the indices of the minimum values of a tensor across a dimension. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type=[float32, float64, float16, int16, int32, int64, uint8, int8, bool]. + * @param[in] dim the dimension to do the operation over. type=[int32, int64]. + * @param[in] keepdim whether the output tensor has dim retained or not. + * @param[out] out the output tensor. type=[int32, int64]. + */ +DIOPI_API diopiError_t diopiArgmin(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const int64_t* dim, bool keepdim); + +/** + * @brief Returns the indices that sort a tensor along a given dimension in ascending order by value. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. type=[float32, float64, float16, int16, int32, int64, uint8, int8, bool]. + * @param[in] dim the dimension to do the operation over. type=[int32, int64]. + * @param[in] descending controls the sorting order (ascending or descending). + * @param[in] stable controls the relative order of equivalent elements. + * @param[out] out the output tensor. type=[int32, int64]. + */ +DIOPI_API diopiError_t diopiArgsort(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, bool stable, const int64_t* dim, + bool descending); + /** * @brief The function is used to implement the Adadelta optimizer. Its functionality is to perform a single parameter update. * @param[in] ctx Context environment. @@ -2730,6 +3141,37 @@ DIOPI_API diopiError_t diopiConvolution3dBackward(diopiContextHandle_t ctx, diop diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiSize_t* bias_sizes, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, int64_t groups); +/** + * @brief Performs 3D average pooling operation. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after average pooling. + * @param[in] input the input tensor. + * @param[in] kernel_size the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] count_include_pad whether to include the zero-padding in the averaging calculation. + * @param[in] divisor_override if provided, it will be used as the divisor for averaging. + */ +DIOPI_API diopiError_t diopiAvgPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, + diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override); + +/** + * @brief Performs the backward pass for diopiAvgPool3d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] kernel_size the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] count_include_pad whether to include the zero-padding in the averaging calculation. + * @param[in] divisor_override if provided, it will be used as the divisor for averaging. + */ +DIOPI_API diopiError_t diopiAvgPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, + bool count_include_pad, const int64_t* divisor_override); /** * \brief Applies a 3D max pooling over an input signal composed of several input planes. @@ -2827,6 +3269,46 @@ DIOPI_API diopiError_t diopiAdaptiveMaxPool3dWithIndices(diopiContextHandle_t ct DIOPI_API diopiError_t diopiAdaptiveMaxPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t indices); +/** + * @brief General 3D pooling operation with support for multiple pooling modes. + * @param[in] ctx Context environment. + * @param[out] out the output tensor after pooling. + * @param[in] input the input tensor. + * @param[in] mode the pooling mode, such as "max" or "avg". + * @param[in] ksize the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] exclusive whether to exclude padding when averaging. + * @param[in] adaptive whether to use adaptive pooling. + * @param[in] output_size the size of the output after pooling (if adaptive pooling is used). + */ +DIOPI_API diopiError_t diopiPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, + const bool adaptive, diopiSize_t output_size); + +/** + * @brief Performs the backward pass for diopiPool3d(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] mode the pooling mode, such as "max" or "avg". + * @param[in] ksize the size of the pooling window. + * @param[in] stride the stride of the pooling window. + * @param[in] padding implicit padding added to the input. + * @param[in] dilation the spacing between elements in the pooling window. + * @param[in] ceil_mode whether to use ceil instead of floor for output shape calculation. + * @param[in] exclusive whether to exclude padding when averaging. + * @param[in] adaptive whether to use adaptive pooling. + * @param[in] indices the tensor storing the indices of max elements from the forward pass (if max pooling is used). + */ +DIOPI_API diopiError_t diopiPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, + diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, + diopiConstTensorHandle_t indices); + /** * \brief Returns a new 1-D tensor which indexes the input tensor according to the boolean mask. * @param[in] ctx Context environment. @@ -2991,6 +3473,37 @@ DIOPI_API diopiError_t diopiFlip(diopiContextHandle_t ctx, diopiTensorHandle_t o */ DIOPI_API diopiError_t diopiNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* p, diopiSize_t dim); +/** + * @brief Compute the backward pass for diopiNorm(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[in] grad_output the grad tensor of output. type=[float32, float64, float16]. + * @param[in] self input tensor. type=[float32, float64, float16]. + * @param[in] norm norm tensor. type=[float32, float64, float16]. + * @param[in] dim Specifies which dimension or dimensions of input to calculate the norm across. + * @param[in] p an array, the order of norm. + * @param[out] grad_input the grad of input. type=[float32, float64, float16]. + */ +DIOPI_API diopiError_t diopiNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t self, diopiConstTensorHandle_t norm, diopiSize_t dim, const diopiScalar_t* p); + +/** + * @brief Applies Layer Normalization over a mini-batch of inputs. + * Note that, this is the national standard GB operator version, which is different from the diopiLayerNorm interface definition, normalized_shape has changed + * to begin_norm_axis. type=[float32, float64, float16]. + * @param[in] ctx Context environment. + * @param[in] save_mean Mean tensor,the mean value for each feature channel of the input tensor. type=[float32, float64, float16]. + * @param[in] save_invstd Backup of inverse standard deviation computed during training. type=[float32, float64, float16]. + * @param[in] input input tensor. type=[float32, float64, float16]. + * @param[in] weight weight tensor. type=[float32, float64, float16]. + * @param[in] bias bias tensor. type=[float32, float64, float16]. + * @param[in] begin_norm_axis int64, Indicates which dimension to start normalization. + * @param[in] eps float64 a value added to the denominator for numerical stability. + * @param[out] out normalized result. type=[float32, float64, float16]. + */ +DIOPI_API diopiError_t diopiLayerNormGB(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t running_mean, diopiTensorHandle_t running_var, + diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, const double eps, + const int64_t begin_norm_axis); + /** * @brief Returns the matrix norm or vector norm of a given tensor list. * @param[in] ctx Context environment. @@ -3361,6 +3874,17 @@ DIOPI_API diopiError_t diopiNormalInp(diopiContextHandle_t ctx, diopiTensorHandl */ DIOPI_API diopiError_t diopiMeshGrid(diopiContextHandle_t ctx, diopiTensorHandle_t* outs, diopiConstTensorHandle_t* inputs, int64_t inputsNum); +/** + * @brief Compute grid sample. + * @param[in] ctx Context environment. + * @param[in] input the original tensor to be sampled. + * @param[in] grid the pixel locations of sampling. + * @param[in] mode the sampling mode. [bilinear, nearest]. + * @param[out] out the result sampling tensor. + */ +DIOPI_API diopiError_t diopiGridSample(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t grid, + const char* mode); + /** * @brief Returns a tensor where each row contains num_samples indices sampled from the * multinomial probability distribution located in the corresponding row of tensor input. @@ -3408,6 +3932,79 @@ DIOPI_API diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTen diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiConstTensorHandle_t mean, diopiConstTensorHandle_t rstd, diopiSize_t normalized_shape); +/** + * @brief Compute the backward pass for diopiLayerNormGB(). Computes gradients for input, weight, and bias. + * Note that, this is the national standard GB operator version, which is different from the diopiLayerNormBackward interface definition, normalized_shape has + * changed to begin_norm_axis + * @param[in] ctx Context environment. + * @param[in] grad_output the grad tensor of output. type=[float32, float64, float16]. + * @param[in] grad_bias the grad of bias. type=[float32, float64, float16]. + * @param[in] grad_weight the grad of weight. type=[float32, float64, float16]. + * @param[in] mean Mean tensor,the mean value for each feature channel of the input tensor. type=[float32, float64, float16]. + * @param[in] rstd Backup of inverse standard deviation computed during training. type=[float32, float64, float16]. + * @param[in] input input tensor. type=[float32, float64, float16]. + * @param[in] weight weight tensor. type=[float32, float64, float16]. + * @param[in] bias bias tensor. type=[float32, float64, float16]. + * @param[in] begin_norm_axis int64, Indicates which dimension to start normalization. + * @param[out] grad_input the grad of input. type=[float32, float64, float16]. + */ +DIOPI_API diopiError_t diopiLayerNormGBBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, + diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, + diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiConstTensorHandle_t running_mean, + diopiConstTensorHandle_t running_std, const int64_t begin_norm_axis); + +/** + * @brief Performs instance normalization on the input tensor. + * @param[in] ctx Context environment. + * @param[out] output the output tensor after instance normalization. + * @param[in] input the input tensor to be normalized. + * @param[in] axis the axis along which normalization is applied. + * @param[in] scale the scale tensor. + * @param[in] bias the bias tensor. + * @param[in] eps small value to avoid division by zero during normalization. + */ +DIOPI_API diopiError_t diopiInstanceNorm(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const int64_t axis, + diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const double eps); +/** + * @brief Performs the backward pass for diopiInstanceNorm(). Computes gradients for input, scale, and bias. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[out] grad_scale the gradient tensor of scale. + * @param[out] grad_bias the gradient tensor of bias. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] scale the scale tensor. + * @param[in] bias the bias tensor. + * @param[in] axis the axis along which normalization is applied. + * @param[in] eps small value to avoid division by zero during normalization. + */ +DIOPI_API diopiError_t diopiInstanceNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_scale, + diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, + diopiConstTensorHandle_t scale, diopiConstTensorHandle_t bias, const int64_t axis, const double eps); +/** + * @brief Normalizes the input tensor based on the p-norm along the given axis. + * @param[in] ctx Context environment. + * @param[out] output the normalized output tensor. + * @param[in] input the input tensor to be normalized. + * @param[in] p the p-norm to use for normalization. + * @param[in] axis the axis along which to normalize. + * @param[in] eps small value to avoid division by zero during normalization. + */ +DIOPI_API diopiError_t diopiNormalize(diopiContextHandle_t ctx, diopiTensorHandle_t output, diopiConstTensorHandle_t input, const float p, const int64_t axis, + const double eps); +/** + * @brief Performs the backward pass for diopiNormalize(). Computes gradients for input. + * @param[in] ctx Context environment. + * @param[out] grad_input the gradient tensor of input. + * @param[in] grad_output the gradient tensor of the output. + * @param[in] input the input tensor. + * @param[in] p the p-norm used during normalization. + * @param[in] axis the axis along which normalization was applied. + * @param[in] eps small value to avoid division by zero during normalization. + */ +DIOPI_API diopiError_t diopiNormalizeBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, + diopiConstTensorHandle_t input, const float p, const int64_t axis, const double eps); + /** * @brief Copies the elements from src into dest tensor. * @param[in] ctx Context environment. @@ -3548,6 +4145,38 @@ DIOPI_API diopiError_t diopiTriu(diopiContextHandle_t ctx, diopiTensorHandle_t o */ DIOPI_API diopiError_t diopiTriuInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, int64_t diagonal); +/** + * @brief Create a complex tensor with real part and image part. + * @param[in] ctx Context environment. + * @param[in] real the real part of the tensor. + * @param[in] imag the image part of the tensor. + */ +DIOPI_API diopiError_t diopiComplex(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t real, diopiConstTensorHandle_t imag); + +/** + * @brief Return the complex conjugate of the input tensor. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. + * @param[in] out the complex conjugate of the input tensor. + */ +DIOPI_API diopiError_t diopiConj(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + +/** + * @brief Return the image part of the input tensor. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. + * @param[in] out the image part of the input tensor. + */ +DIOPI_API diopiError_t diopiImag(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + +/** + * @brief Return the real part of the input tensor. + * @param[in] ctx Context environment. + * @param[in] input the input tensor. + * @param[in] out the real part of the input tensor. + */ +DIOPI_API diopiError_t diopiReal(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input); + /** * @brief Create a tensor filled with one. * @param[in] ctx Context environment.