Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement some GB ops #1342

Merged
Merged
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
07e5dce
add expm1 on diopi torch impl
DoorKickers Aug 14, 2024
a7fc14d
add tan on diopi torch impl
DoorKickers Aug 14, 2024
299455e
add acos on diopi torch impl
DoorKickers Aug 14, 2024
9a9a39e
fix format & add sinh on diopi torch impl
DoorKickers Aug 14, 2024
dca4fcc
add cosh on diopi torch impl
DoorKickers Aug 14, 2024
766a976
add asinh on diopi torch impl
DoorKickers Aug 14, 2024
6f4e9d6
add acosh on diopi torch impl
DoorKickers Aug 14, 2024
89168cd
add atanh on diopi torch impl
DoorKickers Aug 14, 2024
56b3150
conbine some hyperbolic trigo fucntion diopi test into hyperbolic_tri…
DoorKickers Aug 14, 2024
74b8872
add argmin on diopi torch impl & del diopi test hyperbolic_trigo_func…
DoorKickers Aug 14, 2024
a8c9223
add argsort on diopi torch impl
DoorKickers Aug 14, 2024
6fd5621
add sort_backward on diopi torch impl
DoorKickers Aug 15, 2024
8e0948a
refactor diopi_test for sort_backward & add cumsum_backward
DoorKickers Aug 22, 2024
0f2194c
add complex on diopi torch impl
DoorKickers Aug 22, 2024
4f4870c
add conj on diopi torch impl
DoorKickers Aug 22, 2024
0e6bcc1
refactor some format & add imag on diopi torch impl
DoorKickers Aug 22, 2024
5eb369e
add real on diopi torch impl
DoorKickers Aug 22, 2024
30fadee
fix setCurStream & add grid_sample & prepare for diopiPool2d
DoorKickers Aug 23, 2024
dbebe51
add norm_backward, normalize, normalize_backward, layer_normGB, layer…
DoorKickers Oct 14, 2024
e66858d
prepare for pool1d, pool2d, pool3d
DoorKickers Oct 14, 2024
41fad0b
finish diopi_pool related
DoorKickers Oct 15, 2024
de320eb
add part of pool1d in diopi_configs.py and diopi_functions.py
DoorKickers Oct 15, 2024
d0b25fa
finish all of pool1d, pool2d and pool3d
DoorKickers Oct 15, 2024
d106821
update layer_normGB, add instance_norm_backward
DoorKickers Oct 16, 2024
87df8ec
add conv_transpose3d
DoorKickers Oct 16, 2024
d6e63fb
Merge remote-tracking branch 'upstream/main' into zhanglantian/implem…
DoorKickers Oct 16, 2024
d6eac97
fix clang format
DoorKickers Oct 16, 2024
c75f57f
remove conflicted layer_norm
DoorKickers Oct 16, 2024
68aa0be
finish comment and fix fused_adam caused by previously merge
DoorKickers Oct 17, 2024
25901cd
remove unused code & try to fix adam
DoorKickers Oct 17, 2024
d8a1c54
add layerNorm GB national standard operator's explanation
DoorKickers Oct 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
prepare for pool1d, pool2d, pool3d
  • Loading branch information
DoorKickers committed Oct 14, 2024
commit e66858d912579cde240eb5c03aa36254494b3f4d
270 changes: 270 additions & 0 deletions impl/torch/functions/functions.cpp
Original file line number Diff line number Diff line change
@@ -103,6 +103,189 @@ diopiError_t diopiLeakyReluInp(diopiContextHandle_t ctx, diopiTensorHandle_t inp
return diopiSuccess;
}

diopiError_t diopiMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride,
diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) {
impl::aten::setCurStream(ctx);
auto atInput = impl::aten::buildATen(input);
at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size);
at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride);
at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding);
at::IntArrayRef atDilation = impl::aten::buildAtIntArray(dilation);
bool atCeilMode = ceil_mode;
auto atOut = CALL_ATEN_FUNC(max_pool1d, atInput, atKernelSize, atStride, atPadding, atDilation, atCeilMode);
impl::aten::updateATen2Tensor(ctx, atOut, out);

return diopiSuccess;
}

diopiError_t diopiMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation,
bool ceil_mode, diopiConstTensorHandle_t indices) {
impl::aten::setCurStream(ctx);
auto atGradOutput = impl::aten::buildATen(grad_output);
auto atInput = impl::aten::buildATen(input);
at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size);
at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride);
at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding);
at::IntArrayRef atDilation = impl::aten::buildAtIntArray(dilation);
auto atIndices = impl::aten::buildATen(indices);

auto atGrad2d = CALL_ATEN_FUNC(max_pool2d_with_indices_backward, atGradOutput.unsqueeze(-2), atInput.unsqueeze(-2), {1, atKernelSize[0]}, {1, atStride[0]}, {0, atPadding[0]}, {1, atDilation[0]}, ceil_mode, atIndices.unsqueeze(-2));

auto atGradInput = atGrad2d.squeeze(-2);

impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input);

// CALL_ATEN_FUNC(
// max_pool1d_with_indices_backward_out, atGradInput, atGradOutput, atInput, atKernelSize, atStride, atPadding, atDilation, ceil_mode, atIndices);

return diopiSuccess;
}

diopiError_t diopiMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input,
diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) {
impl::aten::setCurStream(ctx);
auto atInput = impl::aten::buildATen(input);
at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size);
at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride);
at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding);
at::IntArrayRef atDilation = impl::aten::buildAtIntArray(dilation);
auto atOut = impl::aten::buildATen(out);
auto atIndices = impl::aten::buildATen(indices);
bool atCeilMode = ceil_mode;
std::tuple<at::Tensor, at::Tensor> atRes = CALL_ATEN_FUNC(max_pool1d_with_indices, atInput, atKernelSize, atStride, atPadding, atDilation, atCeilMode);

impl::aten::updateATen2Tensor(ctx, std::get<0>(atRes), out);
impl::aten::updateATen2Tensor(ctx, std::get<1>(atRes), indices);

return diopiSuccess;
}

diopiError_t diopiAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride,
diopiSize_t padding, bool ceil_mode, bool count_include_pad) {
impl::aten::setCurStream(ctx);
auto atInput = impl::aten::buildATen(input);
at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size);
at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride);
at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding);
auto atOut = CALL_ATEN_FUNC(avg_pool1d, atInput, atKernelSize, atStride, atPadding, ceil_mode, count_include_pad);

impl::aten::updateATen2Tensor(ctx, atOut, out);

return diopiSuccess;
}

diopiError_t diopiAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode,
bool count_include_pad) {
impl::aten::setCurStream(ctx);
auto atGradOutput = impl::aten::buildATen(grad_output);
auto atInput = impl::aten::buildATen(input);
at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size);
at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride);
at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding);

auto atGrad2d = CALL_ATEN_FUNC(avg_pool2d_backward, atGradOutput.unsqueeze(-2), atInput.unsqueeze(-2), {1, atKernelSize[0]}, {1, atStride[0]}, {0, atPadding[0]}, ceil_mode, count_include_pad, c10::nullopt);

auto atGradInput = atGrad2d.squeeze(-2);

impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input);

return diopiSuccess;
}

diopiError_t diopiAdaptiveMaxPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size) {
impl::aten::setCurStream(ctx);
auto atInput = impl::aten::buildATen(input);
auto atOutSize = impl::aten::buildAtIntArray(output_size);
auto atOuts = at::adaptive_max_pool1d(atInput, atOutSize);
impl::aten::updateATen2Tensor(ctx, std::get<0>(atOuts), out);

return diopiSuccess;
}

diopiError_t diopiAdaptiveMaxPool1dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input,
diopiSize_t output_size) {
impl::aten::setCurStream(ctx);
auto atInput = impl::aten::buildATen(input);
auto atOutSize = impl::aten::buildAtIntArray(output_size);
auto [atOut, atIndices] = CALL_ATEN_FUNC(adaptive_max_pool1d, atInput, atOutSize);

impl::aten::updateATen2Tensor(ctx, atOut, out);
impl::aten::updateATen2Tensor(ctx, atIndices, indices);

return diopiSuccess;
}

diopiError_t diopiAdaptiveMaxPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input, diopiConstTensorHandle_t indices) {
impl::aten::setCurStream(ctx);
auto atInput = impl::aten::buildATen(input);
auto atGradOutput = impl::aten::buildATen(grad_output);
auto atIndices = impl::aten::buildATen(indices);
auto atGrad2d = CALL_ATEN_FUNC(adaptive_max_pool2d_backward, atGradOutput.unsqueeze(-2), atInput.unsqueeze(-2), atIndices.squeeze(-2));
auto atGradInput = atGrad2d.squeeze(-2);

impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input);

return diopiSuccess;
}

diopiError_t diopiAdaptiveAvgPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output,
diopiConstTensorHandle_t input) {
impl::aten::setCurStream(ctx);
auto atGradOutput = impl::aten::buildATen(grad_output);
auto atInput = impl::aten::buildATen(input);
auto atGrad2d = CALL_ATEN_FUNC(_adaptive_avg_pool2d_backward, atGradOutput.unsqueeze(-2), atInput.unsqueeze(-2));
auto atGradInput = atGrad2d.squeeze(-2);

impl::aten::updateATen2Tensor(ctx, atGradInput, grad_input);

return diopiSuccess;
}

diopiError_t diopiAdaptiveAvgPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size) {
impl::aten::setCurStream(ctx);
auto atInput = impl::aten::buildATen(input);
auto atOutSize = impl::aten::buildAtIntArray(output_size);
auto atOut = CALL_ATEN_FUNC(adaptive_avg_pool1d, atInput, atOutSize);

impl::aten::updateATen2Tensor(ctx, atOut, out);

return diopiSuccess;
}

diopiError_t diopiPool1d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size) {
impl::aten::setCurStream(ctx);

if (adaptive == false && strcmp(mode, "max") == 0) {
return impl::cuda::diopiMaxPool1d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode);
} else if (adaptive == false && strcmp(mode, "avg") == 0) {
return impl::cuda::diopiAvgPool1d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive);
} else if (adaptive == true && strcmp(mode, "max") == 0) {
return impl::cuda::diopiAdaptiveMaxPool1d(ctx, out, input, output_size);
} else {
return impl::cuda::diopiAdaptiveAvgPool1d(ctx, out, input, output_size);
}

}

diopiError_t diopiPool1dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) {
impl::aten::setCurStream(ctx);

if (adaptive == false && strcmp(mode, "max") == 0) {
return impl::cuda::diopiMaxPool1dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices);
} else if (adaptive == false && strcmp(mode, "avg") == 0) {
return impl::cuda::diopiAvgPool1dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive);
} else if (adaptive == true && strcmp(mode, "max") == 0) {
return impl::cuda::diopiAdaptiveMaxPool1dBackward(ctx, grad_input, grad_output, input, indices);
} else {
return impl::cuda::diopiAdaptiveAvgPool1dBackward(ctx, grad_input, grad_output, input);
}

}


diopiError_t diopiMaxPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride,
diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) {
impl::aten::setCurStream(ctx);
@@ -118,6 +301,7 @@ diopiError_t diopiMaxPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, d
return diopiSuccess;
}


diopiError_t diopiMaxPool2dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input,
diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) {
impl::aten::setCurStream(ctx);
@@ -2114,6 +2298,36 @@ diopiError_t diopiAvgPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, d
return diopiSuccess;
}

diopiError_t diopiPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size) {
impl::aten::setCurStream(ctx);

if (adaptive == false && strcmp(mode, "max") == 0) {
return impl::cuda::diopiMaxPool2d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode);
} else if (adaptive == false && strcmp(mode, "avg") == 0) {
return impl::cuda::diopiAvgPool2d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr);
} else if (adaptive == true && strcmp(mode, "max") == 0) {
return impl::cuda::diopiAdaptiveMaxPool2d(ctx, out, input, output_size);
} else {
return impl::cuda::diopiAdaptiveAvgPool2d(ctx, out, input, output_size);
}

}

diopiError_t diopiPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) {
impl::aten::setCurStream(ctx);

if (adaptive == false && strcmp(mode, "max") == 0) {
return impl::cuda::diopiMaxPool2dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices);
} else if (adaptive == false && strcmp(mode, "avg") == 0) {
return impl::cuda::diopiAvgPool2dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr);
} else if (adaptive == true && strcmp(mode, "max") == 0) {
return impl::cuda::diopiAdaptiveMaxPool2dBackward(ctx, grad_input, grad_output, input, indices);
} else {
return impl::cuda::diopiAdaptiveAvgPool2dBackward(ctx, grad_input, grad_output, input);
}

}

diopiError_t diopiDropout(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t mask, diopiConstTensorHandle_t input, double p, bool train,
diopiGeneratorHandle_t generator) {
impl::aten::setCurStream(ctx);
@@ -4129,6 +4343,62 @@ diopiError_t diopiMaxPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_
return diopiSuccess;
}

diopiError_t diopiAvgPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override) {
impl::aten::setCurStream(ctx);

auto atInput = impl::aten::buildATen(input);
at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size);
at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride);
at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding);
c10::optional<int64_t> atDivisorOverride = divisor_override ? c10::optional<int64_t>(*divisor_override) : c10::nullopt;
auto atOut = impl::aten::buildATen(out);
CALL_ATEN_CUDA_FUNC(avg_pool3d_out, atOut, atInput, atKernelSize, atStride, atPadding, ceil_mode, count_include_pad, atDivisorOverride);

return diopiSuccess;
}

diopiError_t diopiAvgPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override) {
impl::aten::setCurStream(ctx);
auto atGradOutput = impl::aten::buildATen(grad_output);
auto atInput = impl::aten::buildATen(input);
at::IntArrayRef atKernelSize = impl::aten::buildAtIntArray(kernel_size);
at::IntArrayRef atStride = impl::aten::buildAtIntArray(stride);
at::IntArrayRef atPadding = impl::aten::buildAtIntArray(padding);
c10::optional<int64_t> atDivisorOverride = divisor_override ? c10::optional<int64_t>(*divisor_override) : c10::nullopt;
auto atGradInput = impl::aten::buildATen(grad_input);
CALL_ATEN_CUDA_FUNC(avg_pool3d_backward_out, atGradInput, atGradOutput, atInput, atKernelSize, atStride, atPadding, ceil_mode, count_include_pad, atDivisorOverride);
}

diopiError_t diopiPool3d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiSize_t output_size) {
impl::aten::setCurStream(ctx);

if (adaptive == false && strcmp(mode, "max") == 0) {
return impl::cuda::diopiMaxPool3d(ctx, out, input, ksize, stride, padding, dilation, ceil_mode);
} else if (adaptive == false && strcmp(mode, "avg") == 0) {
return impl::cuda::diopiAvgPool3d(ctx, out, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr);
} else if (adaptive == true && strcmp(mode, "max") == 0) {
return impl::cuda::diopiAdaptiveMaxPool3d(ctx, out, input, output_size);
} else {
return impl::cuda::diopiAdaptiveAvgPool3d(ctx, out, input, output_size);
}

}

diopiError_t diopiPool3dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, const char* mode, diopiSize_t ksize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, const bool ceil_mode, const bool exclusive, const bool adaptive, diopiConstTensorHandle_t indices) {
impl::aten::setCurStream(ctx);

if (adaptive == false && strcmp(mode, "max") == 0) {
return impl::cuda::diopiMaxPool3dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, dilation, ceil_mode, indices);
} else if (adaptive == false && strcmp(mode, "avg") == 0) {
return impl::cuda::diopiAvgPool3dBackward(ctx, grad_input, grad_output, input, ksize, stride, padding, ceil_mode, !exclusive, nullptr);
} else if (adaptive == true && strcmp(mode, "max") == 0) {
return impl::cuda::diopiAdaptiveMaxPool3dBackward(ctx, grad_input, grad_output, input, indices);
} else {
return impl::cuda::diopiAdaptiveAvgPool3dBackward(ctx, grad_input, grad_output, input);
}

}

diopiError_t diopiPermute(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t dims) {
impl::aten::setCurStream(ctx);
auto atInput = impl::aten::buildATen(input);