From 52ae9cdd45b3a942a0bd02c44ee78548c873cb27 Mon Sep 17 00:00:00 2001 From: yangbofun <37532326+yangbofun@users.noreply.github.com> Date: Tue, 23 May 2023 17:59:26 +0800 Subject: [PATCH] yb/add clang-tidy and fix (#76) * add clang-tidy for code static check and code style check --- .clang-tidy | 68 +++ .github/workflows/main.yml | 8 + DIOPI-IMPL/camb/CMakeLists.txt | 2 +- DIOPI-IMPL/camb/cnnl_helper.cpp | 12 +- DIOPI-IMPL/camb/cnnl_helper.hpp | 44 +- DIOPI-IMPL/camb/common/basic_op.cpp | 82 ++-- DIOPI-IMPL/camb/common/broadcast.cpp | 16 +- DIOPI-IMPL/camb/common/clone.cpp | 1 + DIOPI-IMPL/camb/common/common.hpp | 8 +- DIOPI-IMPL/camb/common/contiguous.cpp | 66 +-- DIOPI-IMPL/camb/common/dtype_cast.cpp | 38 +- DIOPI-IMPL/camb/common/scalar.cpp | 2 +- DIOPI-IMPL/camb/diopi_helper.hpp | 98 ++-- DIOPI-IMPL/camb/error.hpp | 4 +- DIOPI-IMPL/camb/functions/abs.cpp | 24 +- DIOPI-IMPL/camb/functions/activation.cpp | 198 ++++---- .../camb/functions/adaptive_pooling.cpp | 108 ++--- DIOPI-IMPL/camb/functions/addcdiv.cpp | 60 +-- DIOPI-IMPL/camb/functions/addcmul.cpp | 60 +-- DIOPI-IMPL/camb/functions/addmm.cpp | 170 +++---- DIOPI-IMPL/camb/functions/arange.cpp | 24 +- DIOPI-IMPL/camb/functions/avg_pool2d.cpp | 300 ++++++------ DIOPI-IMPL/camb/functions/batch_norm.cpp | 346 +++++++------- DIOPI-IMPL/camb/functions/binary_op.cpp | 2 +- DIOPI-IMPL/camb/functions/bitwise.cpp | 48 +- DIOPI-IMPL/camb/functions/cast_dtype.cpp | 6 +- DIOPI-IMPL/camb/functions/cat.cpp | 32 +- DIOPI-IMPL/camb/functions/clamp.cpp | 118 ++--- DIOPI-IMPL/camb/functions/conv_2d.cpp | 334 ++++++------- DIOPI-IMPL/camb/functions/copy.cpp | 14 +- DIOPI-IMPL/camb/functions/cos.cpp | 26 +- DIOPI-IMPL/camb/functions/cumsum.cpp | 34 +- DIOPI-IMPL/camb/functions/div.cpp | 72 +-- DIOPI-IMPL/camb/functions/dropout.cpp | 80 ++-- DIOPI-IMPL/camb/functions/erf.cpp | 22 +- DIOPI-IMPL/camb/functions/error.cpp | 4 +- DIOPI-IMPL/camb/functions/exp.cpp | 20 +- DIOPI-IMPL/camb/functions/expand.cpp | 2 +- DIOPI-IMPL/camb/functions/fill.cpp | 91 ++-- DIOPI-IMPL/camb/functions/flip.cpp | 28 +- DIOPI-IMPL/camb/functions/floor.cpp | 2 +- DIOPI-IMPL/camb/functions/gather.cpp | 90 ++-- DIOPI-IMPL/camb/functions/hardtanh.cpp | 100 ++-- DIOPI-IMPL/camb/functions/layernorm.cpp | 250 +++++----- DIOPI-IMPL/camb/functions/linear.cpp | 230 ++++----- DIOPI-IMPL/camb/functions/linspace.cpp | 30 +- DIOPI-IMPL/camb/functions/log.cpp | 44 +- DIOPI-IMPL/camb/functions/logic.cpp | 184 +++---- DIOPI-IMPL/camb/functions/loss.cpp | 374 +++++++-------- DIOPI-IMPL/camb/functions/masked_fill.cpp | 101 ++-- DIOPI-IMPL/camb/functions/matmul.cpp | 404 ++++++++-------- DIOPI-IMPL/camb/functions/max_pool2d.cpp | 448 +++++++++--------- DIOPI-IMPL/camb/functions/meshgrid.cpp | 50 +- DIOPI-IMPL/camb/functions/mul.cpp | 52 +- DIOPI-IMPL/camb/functions/multinomial.cpp | 32 +- DIOPI-IMPL/camb/functions/neg.cpp | 26 +- DIOPI-IMPL/camb/functions/nonzero.cpp | 48 +- DIOPI-IMPL/camb/functions/one_hot.cpp | 114 ++--- DIOPI-IMPL/camb/functions/permute.cpp | 48 +- DIOPI-IMPL/camb/functions/pow.cpp | 70 +-- DIOPI-IMPL/camb/functions/random.cpp | 4 +- DIOPI-IMPL/camb/functions/randperm.cpp | 19 +- DIOPI-IMPL/camb/functions/reciprocal.cpp | 18 +- DIOPI-IMPL/camb/functions/reduce.cpp | 246 +++++----- DIOPI-IMPL/camb/functions/repeat.cpp | 12 +- DIOPI-IMPL/camb/functions/roll.cpp | 38 +- DIOPI-IMPL/camb/functions/rsqrt.cpp | 20 +- DIOPI-IMPL/camb/functions/sgd.cpp | 74 +-- DIOPI-IMPL/camb/functions/sign.cpp | 22 +- DIOPI-IMPL/camb/functions/sin.cpp | 20 +- DIOPI-IMPL/camb/functions/slice.cpp | 290 ++++++------ DIOPI-IMPL/camb/functions/softmax.cpp | 192 ++++---- DIOPI-IMPL/camb/functions/sort.cpp | 74 +-- DIOPI-IMPL/camb/functions/sqrt.cpp | 20 +- DIOPI-IMPL/camb/functions/stack.cpp | 38 +- DIOPI-IMPL/camb/functions/sub.cpp | 44 +- DIOPI-IMPL/camb/functions/threshold.cpp | 157 +++--- DIOPI-IMPL/camb/functions/topk.cpp | 70 +-- DIOPI-IMPL/camb/functions/transpose.cpp | 73 ++- DIOPI-IMPL/camb/functions/tril.cpp | 20 +- DIOPI-IMPL/camb/functions/where.cpp | 64 +-- DIOPI-IMPL/camb/test/conform_test.cpp | 24 +- DIOPI-IMPL/scripts/build_impl.sh | 20 +- DIOPI-IMPL/scripts/ci_script.sh | 21 +- DIOPI-TEST/csrc/litert.cpp | 87 ++-- .../python/conformance/diopi_runtime.py | 22 +- run-clang-tidy.py | 397 ++++++++++++++++ 87 files changed, 3976 insertions(+), 3479 deletions(-) create mode 100644 .clang-tidy create mode 100755 run-clang-tidy.py diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 000000000..881e53dcb --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,68 @@ +--- +Checks: ' + -*, + clang-diagnostic-*, + bugprone-*, + -bugprone-easily-swappable-parameters, + -bugprone-forward-declaration-namespace, + -bugprone-macro-parentheses, + -bugprone-lambda-function-name, + -bugprone-reserved-identifier, + -bugprone-swapped-arguments, + -bugprone-narrowing-conversions, + misc-*,-misc-const-correctness, + -misc-unused-parameters, + -misc-non-private-member-variables-in-classes, + -misc-no-recursion, + -misc-use-anonymous-namespace, + hicpp-avoid-goto, + modernize-*, + -modernize-concat-nested-namespaces, + -modernize-macro-to-enum, + -modernize-return-braced-init-list, + -modernize-use-auto, + -modernize-use-default-member-init, + -modernize-use-using, + -modernize-use-trailing-return-type, + -modernize-use-nodiscard, + -modernize-avoid-c-arrays + performance-*, + -performance-noexcept-move-constructor, + -performance-unnecessary-value-param, + readability-identifier-naming, + readability-container-size-empty, + ' + + +# NOTE there must be no spaces before the '-', so put the comma last. +CheckOptions: + - key: readability-identifier-naming.ClassCase + value: "CamelCase" + - key: readability-identifier-naming.ClassMethodCase + value: "camelBack" + - key: readability-identifier-naming.EnumCase + value: "CamelCase" + - key: readability-identifier-naming.FunctionCase + value: "camelBack" + - key: readability-identifier-naming.MemberCase + value: "camelBack" + - key: readability-identifier-naming.MemberSuffix + value: "_" + - key: readability-identifier-naming.ParameterCase + value: "camelBack" + - key: readability-identifier-naming.UnionCase + value: "camelBack" + - key: readability-identifier-naming.VariableCase + value: "camelBack" + - key: readability-identifier-naming.IgnoreMainLikeFunctions + value: 1 + - key: readability-redundant-member-init.IgnoreBaseInCopyConstructors + value: 1 + - key: modernize-use-default-member-init.UseAssignment + value: 1 + +HeaderFilterRegex: '.*' +AnalyzeTemporaryDtors: false +WarningsAsErrors: '*' +UseColor: true +... diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a164e3fa8..75014d7cc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -113,6 +113,14 @@ jobs: export DIOPI_BUILD_TESTRT=ON srun --job-name=${GITHUB_JOB} --partition=${SLURM_PAR_CAMB} --time=10 bash -c 'cd DIOPI-IMPL && bash scripts/build_impl.sh camb' || ( cd ${NFS_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${BUILD_TEST1} && exit 1 ) """ + - name: clang-tidy + run: | + ssh ${CLUSTER_CAMB} """ + set -e + source /mnt/cache/share/platform/env/camb_ci_diopi_impl + cd ${NFS_PATH}/${GITHUB_RUN_NUMBER} && cd ${BUILD_TEST1} + srun --job-name=${GITHUB_JOB} --partition=${SLURM_PAR_CAMB} --time=10 bash -c 'cd DIOPI-IMPL && bash scripts/ci_script.sh clang-tidy' || ( cd ${NFS_PATH}/${GITHUB_RUN_NUMBER}/ && rm -rf ${BUILD_TEST1} && exit 1 ) + """ Build-Mmcv-Ext-Nvidia: name: Build-Mmcv-Ext-Nvidia diff --git a/DIOPI-IMPL/camb/CMakeLists.txt b/DIOPI-IMPL/camb/CMakeLists.txt index 298b62f6b..f2d3b20ec 100644 --- a/DIOPI-IMPL/camb/CMakeLists.txt +++ b/DIOPI-IMPL/camb/CMakeLists.txt @@ -49,4 +49,4 @@ set(THIRD_PARTY_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/half/inc set_target_properties(${DEVICEIMPL} PROPERTIES SUFFIX ".so") target_link_libraries(${DEVICEIMPL} cndev cnrt cnnl cnmlrt) -target_include_directories(${DEVICEIMPL} PUBLIC ${THIRD_PARTY_INCLUDE_DIRS}) +target_include_directories(${DEVICEIMPL} SYSTEM PUBLIC ${THIRD_PARTY_INCLUDE_DIRS}) diff --git a/DIOPI-IMPL/camb/cnnl_helper.cpp b/DIOPI-IMPL/camb/cnnl_helper.cpp index bfa1995b0..841d69f05 100644 --- a/DIOPI-IMPL/camb/cnnl_helper.cpp +++ b/DIOPI-IMPL/camb/cnnl_helper.cpp @@ -49,7 +49,7 @@ diopiError_t CnnlDataType::convertToCnnlType(cnnlDataType_t* cnnlType, diopiDtyp *cnnlType = CNNL_DTYPE_INT64; break; default: - set_last_error_string("unkown diopitype error %d at %s:%d", type, __FILE__, __LINE__); + setLastErrorString("unkown diopitype error %d at %s:%d", type, __FILE__, __LINE__); return diopiDtypeNotSupported; } return diopiSuccess; @@ -125,7 +125,7 @@ const std::unordered_map, cnnlCastDataType_t, HashCnnl CnnlHandlePool cnnlHandlePool; -diopiError_t cnnl_transpose( +diopiError_t cnnlTranspose( diopiContextHandle_t& ctx, cnnlHandle_t& handle, DiopiTensor& in, DiopiTensor& out, cnnlTensorLayout_t layoutIn, cnnlTensorLayout_t layoutOut) { /* DEPRECATED AND WILL BE REMOVED */ DIOPI_CHECK(in.dtype() == out.dtype(), "the data type of input and output tensor should be the same."); @@ -151,11 +151,11 @@ diopiError_t cnnl_transpose( CnnlTensorDesc inDesc(in, layoutIn); CnnlTensorDesc outDesc(out, layoutOut); CnnlTransposeDescriptor transDesc(order.size(), order.data()); - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetTransposeWorkspaceSize(handle, inDesc.get(), transDesc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetTransposeWorkspaceSize(handle, inDesc.get(), transDesc.get(), &workspaceSize)); - void* workspace_ptr = workspace_size == 0 ? requiresBuffer(ctx, workspace_size).data() : nullptr; - DIOPI_CALLCNNL(cnnlTranspose_v2(handle, transDesc.get(), inDesc.get(), in.data(), outDesc.get(), out.data(), workspace_ptr, workspace_size)); + void* workspacePtr = workspaceSize == 0 ? requiresBuffer(ctx, workspaceSize).data() : nullptr; + DIOPI_CALLCNNL(cnnlTranspose_v2(handle, transDesc.get(), inDesc.get(), in.data(), outDesc.get(), out.data(), workspacePtr, workspaceSize)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/cnnl_helper.hpp b/DIOPI-IMPL/camb/cnnl_helper.hpp index 5c881324e..596b71425 100644 --- a/DIOPI-IMPL/camb/cnnl_helper.hpp +++ b/DIOPI-IMPL/camb/cnnl_helper.hpp @@ -26,7 +26,7 @@ namespace camb { do { \ ::cnnlStatus_t ret = Expr; \ if (ret != ::CNNL_STATUS_SUCCESS) { \ - set_last_error_string("cnnl error %d : %s at %s:%d", ret, ::cnnlGetErrorString(ret), __FILE__, __LINE__); \ + setLastErrorString("cnnl error %d : %s at %s:%d", ret, ::cnnlGetErrorString(ret), __FILE__, __LINE__); \ return diopiErrorOccurred; \ } \ } while (false); @@ -99,8 +99,8 @@ class CnnlTensorDesc : public CnnlDescBase dim_array(1, 1); - DIOPI_CALLCNNL(cnnlSetTensorDescriptorEx(get(), CNNL_LAYOUT_ARRAY, dtype, 1, dim_array.data(), dim_array.data())); + std::vector dimArray(1, 1); + DIOPI_CALLCNNL(cnnlSetTensorDescriptorEx(get(), CNNL_LAYOUT_ARRAY, dtype, 1, dimArray.data(), dimArray.data())); return diopiSuccess; } @@ -115,14 +115,14 @@ class CnnlTensorDesc : public CnnlDescBase& vec, std::vector& target_vec) { - target_vec[0] = static_cast(vec[2]); - target_vec[1] = static_cast(vec[3]); - target_vec[2] = static_cast(vec[1]); - target_vec[3] = static_cast(vec[0]); + auto convertShapeStrideHwcn = [](const std::vector& vec, std::vector& targetVec) { + targetVec[0] = static_cast(vec[2]); + targetVec[1] = static_cast(vec[3]); + targetVec[2] = static_cast(vec[1]); + targetVec[3] = static_cast(vec[0]); }; - convert_shape_stride_hwcn(dimSize, shape); - convert_shape_stride_hwcn(dimStride, stride); + convertShapeStrideHwcn(dimSize, shape); + convertShapeStrideHwcn(dimStride, stride); } else { for (size_t i = 0; i < dim; ++i) { shape[i] = dimSize[i]; @@ -151,7 +151,7 @@ class CnnlHandlePool final { cnnlHandle_t cnnlHandle; cnnlCreate(&cnnlHandle); cnnlSetQueue(cnnlHandle, queue); - cnnlHandlePool_.emplace(std::make_pair(queue, cnnlHandle)); + cnnlHandlePool_.emplace(queue, cnnlHandle); return cnnlHandle; } @@ -177,7 +177,7 @@ class CnnlHandlePool final { class CnnlTransposeDescriptor final : public CnnlDescBase { public: - CnnlTransposeDescriptor() {} + CnnlTransposeDescriptor() = default; CnnlTransposeDescriptor(const int dim, const int* permute) { set(dim, permute); } @@ -189,21 +189,21 @@ class CnnlTransposeDescriptor final : public CnnlDescBase { public: - CnnlReduceDescriptor() {} - - diopiError_t set(DiopiTensor& t, std::vector axis, cnnlReduceOp_t reduce_op, cnnlReduceIndices_t is_indices, cnnlIndicesType_t indices_type, - cnnlDataType_t tensor_type) { - int axis_num = axis.size(); - std::vector axis_list(axis_num); - for (int i = 0; i < axis_num; i++) { - axis_list[i] = static_cast(axis[i]); + CnnlReduceDescriptor() = default; + + diopiError_t set(DiopiTensor& t, std::vector axis, cnnlReduceOp_t reduceOp, cnnlReduceIndices_t isIndices, cnnlIndicesType_t indicesType, + cnnlDataType_t tensorType) { + int axisNum = axis.size(); + std::vector axisList(axisNum); + for (int i = 0; i < axisNum; i++) { + axisList[i] = static_cast(axis[i]); } - DIOPI_CALLCNNL(cnnlSetReduceDescriptor(get(), axis_list.data(), axis_num, reduce_op, tensor_type, CNNL_NOT_PROPAGATE_NAN, is_indices, indices_type)); + DIOPI_CALLCNNL(cnnlSetReduceDescriptor(get(), axisList.data(), axisNum, reduceOp, tensorType, CNNL_NOT_PROPAGATE_NAN, isIndices, indicesType)); return diopiSuccess; } }; -diopiError_t cnnl_transpose(diopiContextHandle_t& ctx, cnnlHandle_t& handle, DiopiTensor& in, DiopiTensor& out, cnnlTensorLayout_t layoutIn, +diopiError_t cnnlTranspose(diopiContextHandle_t& ctx, cnnlHandle_t& handle, DiopiTensor& in, DiopiTensor& out, cnnlTensorLayout_t layoutIn, cnnlTensorLayout_t layoutOut); struct HashCnnlCastDType { diff --git a/DIOPI-IMPL/camb/common/basic_op.cpp b/DIOPI-IMPL/camb/common/basic_op.cpp index 680d63989..3e0610d81 100644 --- a/DIOPI-IMPL/camb/common/basic_op.cpp +++ b/DIOPI-IMPL/camb/common/basic_op.cpp @@ -4,72 +4,72 @@ namespace impl { namespace camb { template -diopiError_t cnnl_op_tensor(diopiContextHandle_t ctx, DiopiTensor input, DiopiTensor other, DiopiTensor out, cnnlOpTensorDesc_t op_type, T1 alpha1, T2 alpha2, +diopiError_t cnnlOpTensor(diopiContextHandle_t ctx, DiopiTensor input, DiopiTensor other, DiopiTensor out, cnnlOpTensorDesc_t opType, T1 alpha1, T2 alpha2, T3 beta) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_casted = input; - DiopiTensor other_casted = other; - DiopiTensor output_casted = out; + DiopiTensor inputCasted = input; + DiopiTensor otherCasted = other; + DiopiTensor outputCasted = out; - std::vector tensors{&input_casted, &other_casted, &output_casted}; + std::vector tensors{&inputCasted, &otherCasted, &outputCasted}; DIOPI_CALL(autoCastTensorType(ctx, tensors, {diopi_dtype_float16, diopi_dtype_float32, diopi_dtype_int32})); - cnnlDataType_t comp_type; - DIOPI_CALL(CnnlDataType::convertToCnnlType(&comp_type, input_casted.dtype())); + cnnlDataType_t compType; + DIOPI_CALL(CnnlDataType::convertToCnnlType(&compType, inputCasted.dtype())); - CnnlResourceGuard op_desc; + CnnlResourceGuard opDesc; - DIOPI_CALLCNNL(cnnlSetOpTensorDescriptor(op_desc.get(), CNNL_OP_TENSOR_SUB, comp_type, CNNL_NOT_PROPAGATE_NAN)); + DIOPI_CALLCNNL(cnnlSetOpTensorDescriptor(opDesc.get(), CNNL_OP_TENSOR_SUB, compType, CNNL_NOT_PROPAGATE_NAN)); - std::shared_ptr alpha1_value = nullptr; - std::shared_ptr alpha2_value = nullptr; - std::shared_ptr beta_value = nullptr; + std::shared_ptr alpha1Value = nullptr; + std::shared_ptr alpha2Value = nullptr; + std::shared_ptr betaValue = nullptr; - if (DiopiDataType::isInteger(input_casted.dtype())) { - alpha1_value = std::make_shared(alpha1); - alpha2_value = std::make_shared(alpha2); - beta_value = std::make_shared(beta); - } else if (DiopiDataType::isFloatPoint(input_casted.dtype())) { - alpha1_value = std::make_shared(alpha1); - alpha2_value = std::make_shared(alpha2); - beta_value = std::make_shared(beta); + if (DiopiDataType::isInteger(inputCasted.dtype())) { + alpha1Value = std::make_shared(alpha1); + alpha2Value = std::make_shared(alpha2); + betaValue = std::make_shared(beta); + } else if (DiopiDataType::isFloatPoint(inputCasted.dtype())) { + alpha1Value = std::make_shared(alpha1); + alpha2Value = std::make_shared(alpha2); + betaValue = std::make_shared(beta); } else { - set_last_error_string("%s", "cnnl op tensor only support int or float type.\n"); + setLastErrorString("%s", "cnnl op tensor only support int or float type.\n"); return diopiDtypeNotSupported; } - CnnlTensorDesc input_desc(input_casted, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc other_desc(other_casted, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc output_desc(output_casted, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputCasted, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc otherDesc(otherCasted, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outputDesc(outputCasted, CNNL_LAYOUT_ARRAY); - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetOpTensorWorkspaceSize(handle, input_desc.get(), other_desc.get(), output_desc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetOpTensorWorkspaceSize(handle, inputDesc.get(), otherDesc.get(), outputDesc.get(), &workspaceSize)); void* workspace = nullptr; - if (workspace_size != 0) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (workspaceSize != 0) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } DIOPI_CALLCNNL(cnnlOpTensor(handle, - op_desc.get(), - alpha1_value.get(), - input_desc.get(), - input_casted.data(), - alpha2_value.get(), - other_desc.get(), - other_casted.data(), + opDesc.get(), + alpha1Value.get(), + inputDesc.get(), + inputCasted.data(), + alpha2Value.get(), + otherDesc.get(), + otherCasted.data(), workspace, - workspace_size, - beta_value.get(), - output_desc.get(), - output_casted.data())); + workspaceSize, + betaValue.get(), + outputDesc.get(), + outputCasted.data())); - DIOPI_CALL(dataTypeCast(ctx, out, output_casted)); + DIOPI_CALL(dataTypeCast(ctx, out, outputCasted)); return diopiSuccess; } // Explicitly instantiate the template function for use in other .cpp files. -template diopiError_t cnnl_op_tensor(diopiContextHandle_t ctx, DiopiTensor input, DiopiTensor other, DiopiTensor out, +template diopiError_t cnnlOpTensor(diopiContextHandle_t ctx, DiopiTensor input, DiopiTensor other, DiopiTensor out, cnnlOpTensorDesc_t op_type, double alpha1, double alpha2, double beta); } // namespace camb diff --git a/DIOPI-IMPL/camb/common/broadcast.cpp b/DIOPI-IMPL/camb/common/broadcast.cpp index fa28b79cf..22d0caca3 100644 --- a/DIOPI-IMPL/camb/common/broadcast.cpp +++ b/DIOPI-IMPL/camb/common/broadcast.cpp @@ -30,16 +30,16 @@ diopiError_t broadcast(diopiContextHandle_t ctx, DiopiTensor& out, const DiopiTe return diopiSuccess; } -diopiError_t broadcastHelper(diopiContextHandle_t ctx, DiopiTensor input_tensor, DiopiTensor target_tensor, DiopiTensor* out_tensor) { - diopiTensorHandle_t bcast_input = nullptr; - DiopiTensor bcast_input_tensor; - if (input_tensor.shape() != target_tensor.shape()) { - bcast_input_tensor = requiresTensor(ctx, vec2diopiSize_t(target_tensor.shape()), target_tensor.dtype()); - DIOPI_CALL(broadcast(ctx, bcast_input_tensor, input_tensor)); +diopiError_t broadcastHelper(diopiContextHandle_t ctx, DiopiTensor inputTensor, DiopiTensor targetTensor, DiopiTensor* outTensor) { + diopiTensorHandle_t bcastInput = nullptr; + DiopiTensor bcastInputTensor; + if (inputTensor.shape() != targetTensor.shape()) { + bcastInputTensor = requiresTensor(ctx, vec2diopiSizeT(targetTensor.shape()), targetTensor.dtype()); + DIOPI_CALL(broadcast(ctx, bcastInputTensor, inputTensor)); } else { - bcast_input_tensor = input_tensor; + bcastInputTensor = inputTensor; } - *out_tensor = bcast_input_tensor; + *outTensor = bcastInputTensor; return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/common/clone.cpp b/DIOPI-IMPL/camb/common/clone.cpp index 66b3699ab..18fa198ed 100644 --- a/DIOPI-IMPL/camb/common/clone.cpp +++ b/DIOPI-IMPL/camb/common/clone.cpp @@ -15,6 +15,7 @@ diopiError_t clone(diopiContextHandle_t ctx, const DiopiTensor& inTensor, DiopiT CnnlTensorDesc inTensorDesc(inTensor, CNNL_LAYOUT_ARRAY); CnnlTensorDesc outTensorDesc(outTensor, CNNL_LAYOUT_ARRAY); DIOPI_CALLCNNL(cnnlCopy(handle, inTensorDesc.get(), inTensor.data(), outTensorDesc.get(), outTensor.data())); + return diopiSuccess; } } // namespace camb diff --git a/DIOPI-IMPL/camb/common/common.hpp b/DIOPI-IMPL/camb/common/common.hpp index a80de394b..894490fb5 100644 --- a/DIOPI-IMPL/camb/common/common.hpp +++ b/DIOPI-IMPL/camb/common/common.hpp @@ -26,14 +26,14 @@ diopiError_t autoCastTensorType(diopiContextHandle_t ctx, const std::vector -diopiError_t cnnl_op_tensor(diopiContextHandle_t ctx, DiopiTensor input, DiopiTensor other, DiopiTensor out, cnnlOpTensorDesc_t op_type, T1 alpha1 = 1.0, +diopiError_t cnnlOpTensor(diopiContextHandle_t ctx, DiopiTensor input, DiopiTensor other, DiopiTensor out, cnnlOpTensorDesc_t opType, T1 alpha1 = 1.0, T2 alpha2 = 1.0, T3 beta = 0.0); diopiError_t clone(diopiContextHandle_t ctx, const DiopiTensor& inTensor, DiopiTensor& outTensor); diff --git a/DIOPI-IMPL/camb/common/contiguous.cpp b/DIOPI-IMPL/camb/common/contiguous.cpp index 6e2674870..b8a5f08af 100644 --- a/DIOPI-IMPL/camb/common/contiguous.cpp +++ b/DIOPI-IMPL/camb/common/contiguous.cpp @@ -9,78 +9,78 @@ namespace impl { namespace camb { -diopiError_t transpose(diopiContextHandle_t& ctx, DiopiTensor& in, DiopiTensor& out, cnnlTensorLayout_t layout_in, cnnlTensorLayout_t layout_out, +diopiError_t transpose(diopiContextHandle_t& ctx, DiopiTensor& in, DiopiTensor& out, cnnlTensorLayout_t layoutIn, cnnlTensorLayout_t layoutOut, std::vector order) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - CnnlTensorDesc inDesc(in, layout_in); - CnnlTensorDesc outDesc(out, layout_out); + CnnlTensorDesc inDesc(in, layoutIn); + CnnlTensorDesc outDesc(out, layoutOut); CnnlTransposeDescriptor transDesc(order.size(), order.data()); - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetTransposeWorkspaceSize(handle, inDesc.get(), transDesc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetTransposeWorkspaceSize(handle, inDesc.get(), transDesc.get(), &workspaceSize)); - void* workspace_ptr = workspace_size == 0 ? requiresBuffer(ctx, workspace_size).data() : nullptr; - DIOPI_CALLCNNL(cnnlTranspose_v2(handle, transDesc.get(), inDesc.get(), in.data(), outDesc.get(), out.data(), workspace_ptr, workspace_size)); + void* workspacePtr = workspaceSize == 0 ? requiresBuffer(ctx, workspaceSize).data() : nullptr; + DIOPI_CALLCNNL(cnnlTranspose_v2(handle, transDesc.get(), inDesc.get(), in.data(), outDesc.get(), out.data(), workspacePtr, workspaceSize)); return diopiSuccess; } -void generate_layout_order(int64_t dim, MemoryFormat memory_format, cnnlTensorLayout_t& layout_in, cnnlTensorLayout_t& layout_out, std::vector& order) { - if (memory_format == MemoryFormat::Contiguous) { +void generateLayoutOrder(int64_t dim, MemoryFormat memoryFormat, cnnlTensorLayout_t& layoutIn, cnnlTensorLayout_t& layoutOut, std::vector& order) { + if (memoryFormat == MemoryFormat::Contiguous) { if (dim == 4) { - layout_in = CNNL_LAYOUT_NHWC; - layout_out = CNNL_LAYOUT_NCHW; + layoutIn = CNNL_LAYOUT_NHWC; + layoutOut = CNNL_LAYOUT_NCHW; order = {0, 3, 1, 2}; } else if (dim == 5) { - layout_in = CNNL_LAYOUT_NDHWC; - layout_out = CNNL_LAYOUT_NCDHW; + layoutIn = CNNL_LAYOUT_NDHWC; + layoutOut = CNNL_LAYOUT_NCDHW; order = {0, 4, 1, 2, 3}; } - } else if (memory_format == MemoryFormat::ChannelsLast) { + } else if (memoryFormat == MemoryFormat::ChannelsLast) { if (dim == 4) { - layout_in = CNNL_LAYOUT_NCHW; - layout_out = CNNL_LAYOUT_NHWC; + layoutIn = CNNL_LAYOUT_NCHW; + layoutOut = CNNL_LAYOUT_NHWC; order = {0, 2, 3, 1}; } - } else if (memory_format == MemoryFormat::ChannelsLast3d) { + } else if (memoryFormat == MemoryFormat::ChannelsLast3d) { if (dim == 5) { - layout_in = CNNL_LAYOUT_NCDHW; - layout_out = CNNL_LAYOUT_NDHWC; + layoutIn = CNNL_LAYOUT_NCDHW; + layoutOut = CNNL_LAYOUT_NDHWC; order = {0, 2, 3, 4, 1}; } } } /* Inplace contiguous, support NCHW <-> NHWC, NCDHW <-> NDHWC */ -diopiError_t contiguous_(diopiContextHandle_t& ctx, DiopiTensor& src, MemoryFormat memory_format) { - if (src.is_contiguous(memory_format)) return diopiSuccess; +diopiError_t contiguous(diopiContextHandle_t& ctx, DiopiTensor& src, MemoryFormat memoryFormat) { + if (src.isContiguous(memoryFormat)) return diopiSuccess; int64_t dim = src.dim(); DIOPI_CHECK(dim == 4 || dim == 5, "only support 4d/5d tensor currently"); - cnnlTensorLayout_t layout_in, layout_out; + cnnlTensorLayout_t layoutIn, layoutOut; std::vector order; - generate_layout_order(dim, memory_format, layout_in, layout_out, order); + generateLayoutOrder(dim, memoryFormat, layoutIn, layoutOut, order); - DiopiTensor dest = requiresTensor(ctx, src.shape(), src.dtype(), memory_format); - DIOPI_CALL(transpose(ctx, src, dest, layout_in, layout_out, order)); + DiopiTensor dest = requiresTensor(ctx, src.shape(), src.dtype(), memoryFormat); + DIOPI_CALL(transpose(ctx, src, dest, layoutIn, layoutOut, order)); // DIOPI_CALL(diopiCopyInp(ctx, src.tensorHandle(), dest.tensorHandle())); src = dest; return diopiSuccess; } /* Inplace contiguous, support special layout like CNNL_LAYOUT_HWCN */ -diopiError_t contiguous_(diopiContextHandle_t& ctx, DiopiTensor& src, MemoryFormat memory_format, cnnlTensorLayout_t layout_in, cnnlTensorLayout_t layout_out) { - if (src.is_contiguous(memory_format)) return diopiSuccess; +diopiError_t contiguous(diopiContextHandle_t& ctx, DiopiTensor& src, MemoryFormat memoryFormat, cnnlTensorLayout_t layoutIn, cnnlTensorLayout_t layoutOut) { + if (src.isContiguous(memoryFormat)) return diopiSuccess; DIOPI_CHECK(src.dim() == 4, "only support 4d tensor currently"); std::vector order; - if (layout_in == CNNL_LAYOUT_NHWC && layout_out == CNNL_LAYOUT_HWCN) { + if (layoutIn == CNNL_LAYOUT_NHWC && layoutOut == CNNL_LAYOUT_HWCN) { order = {1, 2, 3, 0}; - } else if (layout_in == CNNL_LAYOUT_NCHW && layout_out == CNNL_LAYOUT_HWCN) { + } else if (layoutIn == CNNL_LAYOUT_NCHW && layoutOut == CNNL_LAYOUT_HWCN) { order = {2, 3, 1, 0}; - } else if (layout_in == CNNL_LAYOUT_HWCN && layout_out == CNNL_LAYOUT_NHWC) { + } else if (layoutIn == CNNL_LAYOUT_HWCN && layoutOut == CNNL_LAYOUT_NHWC) { order = {3, 0, 1, 2}; - } else if (layout_in == CNNL_LAYOUT_HWCN && layout_out == CNNL_LAYOUT_NCHW) { + } else if (layoutIn == CNNL_LAYOUT_HWCN && layoutOut == CNNL_LAYOUT_NCHW) { order = {3, 2, 0, 1}; } else { DIOPI_CHECK(false, @@ -88,8 +88,8 @@ diopiError_t contiguous_(diopiContextHandle_t& ctx, DiopiTensor& src, MemoryForm "in [CNNL_LAYOUT_NHWC, CNNL_LAYOUT_NCHW, CNNL_LAYOUT_HWCN]"); } - DiopiTensor dest = requiresTensor(ctx, src.shape(), src.dtype(), memory_format); - DIOPI_CALL(transpose(ctx, src, dest, layout_in, layout_out, order)); + DiopiTensor dest = requiresTensor(ctx, src.shape(), src.dtype(), memoryFormat); + DIOPI_CALL(transpose(ctx, src, dest, layoutIn, layoutOut, order)); src = dest; return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/common/dtype_cast.cpp b/DIOPI-IMPL/camb/common/dtype_cast.cpp index f967b5cc4..b0a38d3e7 100644 --- a/DIOPI-IMPL/camb/common/dtype_cast.cpp +++ b/DIOPI-IMPL/camb/common/dtype_cast.cpp @@ -18,43 +18,43 @@ namespace camb { inline bool canCastByInt32(uint64_t castType) { // special convert (cnnl doesn't support) - constexpr uint64_t BoolInt64 = _MAKE_KEY(diopi_dtype_bool, diopi_dtype_int64); - constexpr uint64_t Int16Int64 = _MAKE_KEY(diopi_dtype_int16, diopi_dtype_int64); - constexpr uint64_t Uint8Bool = _MAKE_KEY(diopi_dtype_uint8, diopi_dtype_bool); - constexpr uint64_t Int16Bool = _MAKE_KEY(diopi_dtype_int16, diopi_dtype_bool); - constexpr uint64_t Int64Bool = _MAKE_KEY(diopi_dtype_int64, diopi_dtype_bool); - constexpr uint64_t Int8Bool = _MAKE_KEY(diopi_dtype_int8, diopi_dtype_bool); - constexpr uint64_t Int8Int64 = _MAKE_KEY(diopi_dtype_int8, diopi_dtype_int64); - constexpr uint64_t Int64Int8 = _MAKE_KEY(diopi_dtype_int64, diopi_dtype_int8); - - return BoolInt64 == castType || Int16Int64 == castType || Uint8Bool == castType || Int16Bool == castType || Int64Bool == castType || Int8Bool == castType || - Int8Int64 == castType || Int64Int8 == castType; + constexpr uint64_t boolInt64 = _MAKE_KEY(diopi_dtype_bool, diopi_dtype_int64); + constexpr uint64_t int16Int64 = _MAKE_KEY(diopi_dtype_int16, diopi_dtype_int64); + constexpr uint64_t uint8Bool = _MAKE_KEY(diopi_dtype_uint8, diopi_dtype_bool); + constexpr uint64_t int16Bool = _MAKE_KEY(diopi_dtype_int16, diopi_dtype_bool); + constexpr uint64_t int64Bool = _MAKE_KEY(diopi_dtype_int64, diopi_dtype_bool); + constexpr uint64_t int8Bool = _MAKE_KEY(diopi_dtype_int8, diopi_dtype_bool); + constexpr uint64_t int8Int64 = _MAKE_KEY(diopi_dtype_int8, diopi_dtype_int64); + constexpr uint64_t int64Int8 = _MAKE_KEY(diopi_dtype_int64, diopi_dtype_int8); + + return boolInt64 == castType || int16Int64 == castType || uint8Bool == castType || int16Bool == castType || int64Bool == castType || int8Bool == castType || + int8Int64 == castType || int64Int8 == castType; } inline bool canCastByFloat32(uint64_t castType) { - constexpr uint64_t Int64Float64 = _MAKE_KEY(diopi_dtype_int64, diopi_dtype_float64); - constexpr uint64_t Float64Int64 = _MAKE_KEY(diopi_dtype_float64, diopi_dtype_int64); - return Int64Float64 == castType || Float64Int64 == castType; + constexpr uint64_t int64Float64 = _MAKE_KEY(diopi_dtype_int64, diopi_dtype_float64); + constexpr uint64_t float64Int64 = _MAKE_KEY(diopi_dtype_float64, diopi_dtype_int64); + return int64Float64 == castType || float64Int64 == castType; } static diopiError_t dataTypeCastTwice(diopiContextHandle_t ctx, DiopiTensor& dest, const DiopiTensor& src) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); diopiDtype_t srcDtype = src.dtype(); diopiDtype_t destDtype = dest.dtype(); - cnnlCastDataType_t cast_type; + cnnlCastDataType_t castType; // cast through middle auto key = _MAKE_KEY(srcDtype, destDtype); if (canCastByInt32(key)) { DiopiTensor mid = requiresTensor(ctx, src.shape(), diopi_dtype_int32); DIOPI_CALL(dataTypeCast(ctx, mid, src)); DIOPI_CALL(dataTypeCast(ctx, dest, mid)); - } else if (canCastByFloat32) { + } else if (&canCastByFloat32) { DiopiTensor mid = requiresTensor(ctx, src.shape(), diopi_dtype_float32); DIOPI_CALL(dataTypeCast(ctx, mid, src)); DIOPI_CALL(dataTypeCast(ctx, dest, mid)); } else { // TODO(waiting for dispatch) : cast through cpu - set_last_error_string("Can not cast from %d to %d at %s:%d ", srcDtype, destDtype, __FILE__, __LINE__); + setLastErrorString("Can not cast from %d to %d at %s:%d ", srcDtype, destDtype, __FILE__, __LINE__); return diopiDtypeNotSupported; } return diopiSuccess; @@ -109,7 +109,7 @@ static diopiError_t choiceDtype(const std::set& opSupportedDtypes, } else if (opSupportedDtypes.find(diopi_dtype_bool) != opSupportedDtypes.end()) { *dtype = diopi_dtype_bool; } else { - set_last_error_string("%s", "this operator does not support bool, int8, int16, int32, float16, float32"); + setLastErrorString("%s", "this operator does not support bool, int8, int16, int32, float16, float32"); return diopiDtypeNotSupported; } return diopiSuccess; @@ -163,7 +163,7 @@ diopiError_t autoCastTensorType(diopiContextHandle_t ctx, const std::vectorshape().size(); } + int64_t dim() const { return static_cast(this->shape().size()); } DiopiTensor contiguous(diopiContextHandle_t ctx, MemoryFormat format = MemoryFormat::Contiguous) { /* DEPRECATED AND WILL BE REMOVED */ - if (this->is_contiguous(format)) return *this; + if (this->isContiguous(format)) return *this; int64_t dim = this->dim(); std::vector strides(dim); int64_t stride = 1; @@ -198,14 +198,14 @@ class DiopiTensor final { } } } - diopiSize_t stride_diopi(strides.data(), static_cast(strides.size())); - diopiSize_t shape_diopi(this->shape().data(), this->shape().size()); + diopiSize_t strideDiopi(strides.data(), static_cast(strides.size())); + diopiSize_t shapeDiopi(this->shape().data(), static_cast(this->shape().size())); diopiTensorHandle_t tensor = nullptr; - diopiRequireTensor(ctx, &tensor, &shape_diopi, &stride_diopi, this->dtype(), this->device()); + diopiRequireTensor(ctx, &tensor, &shapeDiopi, &strideDiopi, this->dtype(), this->device()); return DiopiTensor(tensor); } - bool is_contiguous(MemoryFormat format = MemoryFormat::Contiguous) { + bool isContiguous(MemoryFormat format = MemoryFormat::Contiguous) { int64_t stride = 1; int64_t dim = this->dim(); auto strides = this->stride(); @@ -213,31 +213,31 @@ class DiopiTensor final { if (format == MemoryFormat::Contiguous) { for (int64_t i = dim - 1; i >= 0; i--) { - const auto& shape_d = shape[i]; - if (shape_d != 1) { + const auto& shapeD = shape[i]; + if (shapeD != 1) { if (strides[i] != stride) { return false; } } - stride *= shape_d; + stride *= shapeD; } } else if (format == MemoryFormat::ChannelsLast) { if (strides.size() != 4) return false; for (auto& i : {1, 3, 2, 0}) { - const auto& shape_d = shape[i]; - if (shape_d != 1) { + const auto& shapeD = shape[i]; + if (shapeD != 1) { // shape_d != 1 help dealing with shape like [2, 2048, 1, 1] if (strides[i] != stride) { return false; } } - stride *= shape_d; + stride *= shapeD; } } else if (format == MemoryFormat::ChannelsLast3d) { if (strides.size() != 5) return false; for (auto& i : {1, 4, 3, 2, 0}) { - const auto& shape_d = shape[i]; - if (shape_d != 1) { + const auto& shapeD = shape[i]; + if (shapeD != 1) { if (strides[i] != stride) { return false; } @@ -248,7 +248,7 @@ class DiopiTensor final { return true; } - void as_strided(std::vector& shape, std::vector& stride) { + void asStrided(std::vector& shape, std::vector& stride) { this->shape_ = shape; this->stride_ = stride; } @@ -258,13 +258,13 @@ class DiopiTensor final { // which is same with pytorch auto shape = this->shape(); auto strides = this->stride(); - int64_t new_stride = dim >= this->dim() ? 1 : shape[dim] * strides[dim]; - std::vector new_shape(shape.begin(), shape.end()); - std::vector new_strides(strides.begin(), strides.end()); + int64_t newStride = dim >= this->dim() ? 1 : shape[dim] * strides[dim]; + std::vector newShape(shape.begin(), shape.end()); + std::vector newStrides(strides.begin(), strides.end()); - new_shape.insert(new_shape.begin() + dim, 1); - new_strides.insert(new_strides.begin() + dim, new_stride); - this->as_strided(new_shape, new_strides); + newShape.insert(newShape.begin() + dim, 1); + newStrides.insert(newStrides.begin() + dim, newStride); + this->asStrided(newShape, newStrides); } bool defined() const { @@ -294,11 +294,11 @@ class DiopiTensor final { return p; } - MemoryFormat suggest_memory_format() { + MemoryFormat suggestMemoryFormat() { // TODO(waiting for dispatch): Performance can be improved by dividing is_contiguous into several funcs - if (this->is_contiguous(MemoryFormat::Contiguous)) { + if (this->isContiguous(MemoryFormat::Contiguous)) { return MemoryFormat::Contiguous; - } else if (this->is_contiguous(MemoryFormat::ChannelsLast)) { + } else if (this->isContiguous(MemoryFormat::ChannelsLast)) { return MemoryFormat::ChannelsLast; } else { return MemoryFormat::ChannelsLast3d; @@ -309,10 +309,10 @@ class DiopiTensor final { diopiConstTensorHandle_t tensorHandle() const { return tensor_; } - bool is_same(DiopiTensor t) { return this->tensorHandle() == t.tensorHandle(); } + bool isSame(DiopiTensor t) { return this->tensorHandle() == t.tensorHandle(); } protected: - diopiTensorHandle_t tensor_ = 0; + diopiTensorHandle_t tensor_ = nullptr; std::vector shape_{0}; std::vector stride_{0}; }; @@ -327,8 +327,8 @@ inline auto makeTensor(diopiContextHandle_t ctx, const diopiScalar_t* pScalar) - inline DiopiTensor ones(diopiContextHandle_t ctx, std::vector size, diopiDtype_t dtype) { diopiTensorHandle_t tensor = nullptr; - diopiSize_t size_(size.data(), size.size()); - diopiRequireTensor(ctx, &tensor, &size_, nullptr, dtype, diopi_device); + diopiSize_t sizeTmp(size.data(), size.size()); + diopiRequireTensor(ctx, &tensor, &sizeTmp, nullptr, dtype, diopi_device); diopiScalar_t scalar = {dtype, 1.0}; if (DiopiDataType().isInteger(dtype)) scalar = {dtype, 1}; diopiFill(ctx, tensor, &scalar); @@ -342,25 +342,25 @@ inline DiopiTensor requiresTensor(diopiContextHandle_t ctx, const diopiSize_t& s } inline DiopiTensor requiresTensor(diopiContextHandle_t ctx, const std::vector& size, const std::vector& stride, diopiDtype_t dtype) { - diopiSize_t size_(size.data(), size.size()); - diopiSize_t stride_(stride.data(), stride.size()); + diopiSize_t sizeTmp(size.data(), size.size()); + diopiSize_t strideTmp(stride.data(), stride.size()); diopiTensorHandle_t tensor = nullptr; - diopiRequireTensor(ctx, &tensor, &size_, &stride_, dtype, diopi_device); + diopiRequireTensor(ctx, &tensor, &sizeTmp, &strideTmp, dtype, diopi_device); return DiopiTensor(tensor); } inline DiopiTensor requiresTensor(diopiContextHandle_t ctx, const std::vector& size, diopiDtype_t dtype) { - diopiSize_t size_(size.data(), size.size()); + diopiSize_t sizeTmp(size.data(), size.size()); diopiTensorHandle_t tensor = nullptr; - diopiRequireTensor(ctx, &tensor, &size_, nullptr, dtype, diopi_device); + diopiRequireTensor(ctx, &tensor, &sizeTmp, nullptr, dtype, diopi_device); return DiopiTensor(tensor); } -inline DiopiTensor requiresTensor(diopiContextHandle_t ctx, const std::vector& size, diopiDtype_t dtype, MemoryFormat memory_format) { +inline DiopiTensor requiresTensor(diopiContextHandle_t ctx, const std::vector& size, diopiDtype_t dtype, MemoryFormat memoryFormat) { int64_t dim = size.size(); std::vector strides(dim); int64_t stride = 1; - if (memory_format == MemoryFormat::Contiguous) { + if (memoryFormat == MemoryFormat::Contiguous) { for (size_t i = dim; i > 0; --i) { strides[i - 1] = stride; if (size[i - 1] == 0) { @@ -370,7 +370,7 @@ inline DiopiTensor requiresTensor(diopiContextHandle_t ctx, const std::vector(stream_handle); + diopiStreamHandle_t streamHandle; + diopiGetStream(ctx, &streamHandle); + return static_cast(streamHandle); } template -inline std::vector diopiSize_t2Vector(diopiSize_t size, T) { +inline std::vector diopiSizeT2Vector(diopiSize_t size, T) { return std::vector(size.data, size.data + size.len); } -inline diopiSize_t vec2diopiSize_t(const std::vector& sizeIn) { +inline diopiSize_t vec2diopiSizeT(const std::vector& sizeIn) { diopiSize_t diopiSize(sizeIn.data(), sizeIn.size()); return diopiSize; } -inline void syncStreamInCtx(const diopiContextHandle_t ctx) { +inline void syncStreamInCtx(diopiContextHandle_t ctx) { cnrtQueue_t queue = getStream(ctx); cnrtQueueSync(queue); return; diff --git a/DIOPI-IMPL/camb/error.hpp b/DIOPI-IMPL/camb/error.hpp index 0aba3dd70..1bdb47311 100644 --- a/DIOPI-IMPL/camb/error.hpp +++ b/DIOPI-IMPL/camb/error.hpp @@ -23,12 +23,12 @@ extern char strLastErrorOther[4096]; extern std::mutex mtxLastError; template -inline void set_last_error_string(const char* szFmt, Types&&... args) { +inline void setLastErrorString(const char* szFmt, Types&&... args) { std::lock_guard lock(mtxLastError); sprintf(strLastErrorOther, szFmt, std::forward(args)...); } -const char* camb_get_last_error_string(); +const char* cambGetLastErrorString(); const char* getDiopiErrorStr(diopiError_t err); diff --git a/DIOPI-IMPL/camb/functions/abs.cpp b/DIOPI-IMPL/camb/functions/abs.cpp index 9245d6840..b45fe51d3 100644 --- a/DIOPI-IMPL/camb/functions/abs.cpp +++ b/DIOPI-IMPL/camb/functions/abs.cpp @@ -17,30 +17,30 @@ static diopiError_t abs(diopiContextHandle_t ctx, DiopiTensor input, DiopiTensor std::vector pTensors{&input}; std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor output_tmp = output; + DiopiTensor outputTmp = output; if (input.dtype() != output.dtype()) { - output_tmp = requiresTensor(ctx, output.shape(), input.dtype()); + outputTmp = requiresTensor(ctx, output.shape(), input.dtype()); } - CnnlTensorDesc input_desc(input, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc output_tmp_desc(output_tmp, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlAbs(handle, input_desc.get(), input.data(), output_tmp_desc.get(), output_tmp.data())); - if (output_tmp.dtype() != output.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, output, output_tmp)); + CnnlTensorDesc inputDesc(input, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outputTmpDesc(outputTmp, CNNL_LAYOUT_ARRAY); + DIOPI_CALLCNNL(cnnlAbs(handle, inputDesc.get(), input.data(), outputTmpDesc.get(), outputTmp.data())); + if (outputTmp.dtype() != output.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, output, outputTmp)); } return diopiSuccess; } extern "C" diopiError_t diopiAbsInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { - DiopiTensor input_tensor(input); - DIOPI_CALL(abs(ctx, input_tensor, input_tensor)); + DiopiTensor inputTensor(input); + DIOPI_CALL(abs(ctx, inputTensor, inputTensor)); return diopiSuccess; } extern "C" diopiError_t diopiAbs(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); - DIOPI_CALL(abs(ctx, input_tensor, output_tensor)); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); + DIOPI_CALL(abs(ctx, inputTensor, outputTensor)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/activation.cpp b/DIOPI-IMPL/camb/functions/activation.cpp index 50e5a17e8..07d5dc5ff 100644 --- a/DIOPI-IMPL/camb/functions/activation.cpp +++ b/DIOPI-IMPL/camb/functions/activation.cpp @@ -4,10 +4,10 @@ * @copyright (c) 2023, DeepLink. */ +#include #include #include "../common/common.hpp" - namespace impl { namespace camb { @@ -17,13 +17,13 @@ class CnnlAttribute { template void set(std::string key, T value) { auto pHolder = std::make_shared>(value); - m_data[key] = pHolder; + mData_[key] = pHolder; } template T get(std::string key, T defaultValue) const { - auto iter = m_data.find(key); - if (iter != m_data.end()) { + auto iter = mData_.find(key); + if (iter != mData_.end()) { const ValueHolderBase* holder = iter->second.get(); if (holder->getTypeInfo() == typeid(T)) { const ValueHolder* typedHolder = static_cast*>(holder); @@ -36,75 +36,75 @@ class CnnlAttribute { private: class ValueHolderBase { public: - virtual ~ValueHolderBase() {} + virtual ~ValueHolderBase() = default; virtual const std::type_info& getTypeInfo() const = 0; }; template class ValueHolder : public ValueHolderBase { public: - explicit ValueHolder(T value) : m_value(value) {} + explicit ValueHolder(T value) : mValue_(value) {} const std::type_info& getTypeInfo() const override { return typeid(T); } - T get() const { return m_value; } + T get() const { return mValue_; } private: - T m_value; + T mValue_; }; - std::unordered_map> m_data; + std::unordered_map> mData_; }; -diopiError_t cnnl_activation_internal(diopiContextHandle_t ctx, DiopiTensor input, DiopiTensor out, CnnlAttribute attr) { +diopiError_t cnnlActivationInternal(diopiContextHandle_t ctx, DiopiTensor input, DiopiTensor out, CnnlAttribute attr) { auto handle = cnnlHandlePool.get(ctx); auto mode = attr.get("mode", CNNL_ACTIVATION_SIGMOID); auto perf = attr.get("perf", CNNL_ACTIVATION_HIGH_PRECISION); - auto nan_prop = attr.get("nan", CNNL_NOT_PROPAGATE_NAN); + auto nanProp = attr.get("nan", CNNL_NOT_PROPAGATE_NAN); float coef = attr.get("coef", 0.0f); - int sliced_dim = attr.get("sliced_dim", 0); + int slicedDim = attr.get("sliced_dim", 0); float gamma = attr.get("gamma", 0.0f); float scale = attr.get("scale", 0.0f); - bool is_result = attr.get("is_result", false); + bool isResult = attr.get("is_result", false); bool approximate = attr.get("approximate", false); void* alpha = attr.get("alpha", nullptr); void* beta = attr.get("beta", nullptr); - CnnlResourceGuard activation_desc; - DIOPI_CALLCNNL(cnnlSetActivationDescriptor_v6(activation_desc.get(), mode, perf, nan_prop, coef, sliced_dim, gamma, scale, is_result, approximate)); + CnnlResourceGuard activationDesc; + DIOPI_CALLCNNL(cnnlSetActivationDescriptor_v6(activationDesc.get(), mode, perf, nanProp, coef, slicedDim, gamma, scale, isResult, approximate)); std::vector inputs{&input}; DIOPI_CALL(autoCastTensorType(ctx, inputs, {diopi_dtype_float16, diopi_dtype_float32})); - DiopiTensor temp_output = out; - DIOPI_CALL(dataTypeCast(ctx, temp_output, input.dtype())); + DiopiTensor tempOutput = out; + DIOPI_CALL(dataTypeCast(ctx, tempOutput, input.dtype())); - CnnlTensorDesc input_desc(input, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc output_desc(temp_output, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(input, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outputDesc(tempOutput, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlActivationForward(handle, activation_desc.get(), alpha, input_desc.get(), input.data(), beta, output_desc.get(), temp_output.data())); - DIOPI_CALL(dataTypeCast(ctx, out, temp_output)); + DIOPI_CALLCNNL(cnnlActivationForward(handle, activationDesc.get(), alpha, inputDesc.get(), input.data(), beta, outputDesc.get(), tempOutput.data())); + DIOPI_CALL(dataTypeCast(ctx, out, tempOutput)); return diopiSuccess; } -diopiError_t cnnl_activation_backward_internal(diopiContextHandle_t ctx, DiopiTensor grad_input, DiopiTensor grad_output, DiopiTensor input, DiopiTensor output, +diopiError_t cnnlActivationBackwardInternal(diopiContextHandle_t ctx, DiopiTensor gradInput, DiopiTensor gradOutput, DiopiTensor input, DiopiTensor output, CnnlAttribute attr) { auto handle = cnnlHandlePool.get(ctx); auto mode = attr.get("mode", CNNL_ACTIVATION_SIGMOID); auto perf = attr.get("perf", CNNL_ACTIVATION_HIGH_PRECISION); - auto nan_prop = attr.get("perf", CNNL_NOT_PROPAGATE_NAN); // relu relu6 + auto nanProp = attr.get("perf", CNNL_NOT_PROPAGATE_NAN); // relu relu6 float coef = attr.get("coef", 0.0f); - int sliced_dim = attr.get("sliced_dim", 0); + int slicedDim = attr.get("sliced_dim", 0); float gamma = attr.get("gamma", 0.0f); float scale = attr.get("scale", 0.0f); - bool is_result = attr.get("is_result", true); + bool isResult = attr.get("is_result", true); bool approximate = attr.get("approximate", false); void* alpha = attr.get("alpha", nullptr); void* beta = attr.get("beta", nullptr); - CnnlResourceGuard activation_desc; - DIOPI_CALLCNNL(cnnlSetActivationDescriptor_v6(activation_desc.get(), mode, perf, nan_prop, coef, sliced_dim, gamma, scale, is_result, approximate)); - std::vector inputs{&grad_output}; + CnnlResourceGuard activationDesc; + DIOPI_CALLCNNL(cnnlSetActivationDescriptor_v6(activationDesc.get(), mode, perf, nanProp, coef, slicedDim, gamma, scale, isResult, approximate)); + std::vector inputs{&gradOutput}; if (input.defined()) { inputs.push_back(&input); } @@ -112,35 +112,35 @@ diopiError_t cnnl_activation_backward_internal(diopiContextHandle_t ctx, DiopiTe inputs.push_back(&output); } - std::set support_dtype{diopi_dtype_float16, diopi_dtype_float32}; - DIOPI_CALL(autoCastTensorType(ctx, inputs, support_dtype)); - DiopiTensor temp_grad_input = grad_input; - DIOPI_CALL(dataTypeCast(ctx, temp_grad_input, grad_output.dtype())); + std::set supportDtype{diopi_dtype_float16, diopi_dtype_float32}; + DIOPI_CALL(autoCastTensorType(ctx, inputs, supportDtype)); + DiopiTensor tempGradInput = gradInput; + DIOPI_CALL(dataTypeCast(ctx, tempGradInput, gradOutput.dtype())); - CnnlTensorDesc grad_input_desc(temp_grad_input, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc grad_output_desc(grad_output, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc gradInputDesc(tempGradInput, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc gradOutputDesc(gradOutput, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc input_desc, output_desc; + CnnlTensorDesc inputDesc, outputDesc; if (input.defined()) { - DIOPI_CALL(input_desc.set(input, CNNL_LAYOUT_ARRAY)); + DIOPI_CALL(inputDesc.set(input, CNNL_LAYOUT_ARRAY)); } if (output.defined()) { - DIOPI_CALL(output_desc.set(output, CNNL_LAYOUT_ARRAY)); + DIOPI_CALL(outputDesc.set(output, CNNL_LAYOUT_ARRAY)); } DIOPI_CALLCNNL(cnnlActivationBackward(handle, - activation_desc.get(), + activationDesc.get(), alpha, - output_desc.get(), + outputDesc.get(), output.defined() ? output.data() : nullptr, - grad_output_desc.get(), - grad_output.data(), - input_desc.get(), + gradOutputDesc.get(), + gradOutput.data(), + inputDesc.get(), input.defined() ? input.data() : nullptr, beta, - grad_input_desc.get(), - temp_grad_input.data())); - DIOPI_CALL(dataTypeCast(ctx, grad_input, temp_grad_input)); + gradInputDesc.get(), + tempGradInput.data())); + DIOPI_CALL(dataTypeCast(ctx, gradInput, tempGradInput)); return diopiSuccess; } @@ -148,161 +148,161 @@ diopiError_t cnnl_activation_backward_internal(diopiContextHandle_t ctx, DiopiTe extern "C" diopiError_t diopiRelu(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); CnnlAttribute attr; attr.set("mode", CNNL_ACTIVATION_RELU); - DIOPI_CALL(cnnl_activation_internal(ctx, input_tensor, output_tensor, attr)); + DIOPI_CALL(cnnlActivationInternal(ctx, inputTensor, outputTensor, attr)); return diopiSuccess; } extern "C" diopiError_t diopiReluInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); + DiopiTensor inputTensor(input); CnnlAttribute attr; attr.set("mode", CNNL_ACTIVATION_RELU); - DIOPI_CALL(cnnl_activation_internal(ctx, input_tensor, input_tensor, attr)); + DIOPI_CALL(cnnlActivationInternal(ctx, inputTensor, inputTensor, attr)); return diopiSuccess; } extern "C" diopiError_t diopiSigmoid(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); CnnlAttribute attr; attr.set("mode", CNNL_ACTIVATION_SIGMOID); - DIOPI_CALL(cnnl_activation_internal(ctx, input_tensor, output_tensor, attr)); + DIOPI_CALL(cnnlActivationInternal(ctx, inputTensor, outputTensor, attr)); return diopiSuccess; } extern "C" diopiError_t diopiSigmoidInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); + DiopiTensor inputTensor(input); CnnlAttribute attr; attr.set("mode", CNNL_ACTIVATION_SIGMOID); - DIOPI_CALL(cnnl_activation_internal(ctx, input_tensor, input_tensor, attr)); + DIOPI_CALL(cnnlActivationInternal(ctx, inputTensor, inputTensor, attr)); return diopiSuccess; } -extern "C" diopiError_t diopiSigmoidBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, +extern "C" diopiError_t diopiSigmoidBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t output) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor grad_input_tensor(grad_input); - DiopiTensor grad_output_tensor(grad_output); - DiopiTensor output_tensor(output); + DiopiTensor gradInputTensor(gradInput); + DiopiTensor gradOutputTensor(gradOutput); + DiopiTensor outputTensor(output); CnnlAttribute attr; attr.set("mode", CNNL_ACTIVATION_SIGMOID); - DIOPI_CALL(cnnl_activation_backward_internal(ctx, grad_input_tensor, grad_output_tensor, {}, output_tensor, attr)); + DIOPI_CALL(cnnlActivationBackwardInternal(ctx, gradInputTensor, gradOutputTensor, {}, outputTensor, attr)); return diopiSuccess; } extern "C" diopiError_t diopiTanh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); CnnlAttribute attr; attr.set("mode", CNNL_ACTIVATION_TANH); - DIOPI_CALL(cnnl_activation_internal(ctx, input_tensor, output_tensor, attr)); + DIOPI_CALL(cnnlActivationInternal(ctx, inputTensor, outputTensor, attr)); return diopiSuccess; } extern "C" diopiError_t diopiTanhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); + DiopiTensor inputTensor(input); CnnlAttribute attr; attr.set("mode", CNNL_ACTIVATION_TANH); - DIOPI_CALL(cnnl_activation_internal(ctx, input_tensor, input_tensor, attr)); + DIOPI_CALL(cnnlActivationInternal(ctx, inputTensor, inputTensor, attr)); return diopiSuccess; } -extern "C" diopiError_t diopiTanhBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, +extern "C" diopiError_t diopiTanhBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t output) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor grad_input_tensor(grad_input); - DiopiTensor grad_output_tensor(grad_output); - DiopiTensor output_tensor(output); + DiopiTensor gradInputTensor(gradInput); + DiopiTensor gradOutputTensor(gradOutput); + DiopiTensor outputTensor(output); CnnlAttribute attr; attr.set("mode", CNNL_ACTIVATION_TANH); - DIOPI_CALL(cnnl_activation_backward_internal(ctx, grad_input_tensor, grad_output_tensor, {}, output_tensor, attr)); + DIOPI_CALL(cnnlActivationBackwardInternal(ctx, gradInputTensor, gradOutputTensor, {}, outputTensor, attr)); return diopiSuccess; } extern "C" diopiError_t diopiGelu(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const char* approximate) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); CnnlAttribute attr; attr.set("mode", CNNL_ACTIVATION_GELU); - if (approximate == "tanh") { + if (strcmp(approximate, "tanh") == 0) { attr.set("approximate", true); } - DIOPI_CALL(cnnl_activation_internal(ctx, input_tensor, output_tensor, attr)); + DIOPI_CALL(cnnlActivationInternal(ctx, inputTensor, outputTensor, attr)); return diopiSuccess; } -extern "C" diopiError_t diopiGeluBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, +extern "C" diopiError_t diopiGeluBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t input, const char* approximate) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor grad_input_tensor(grad_input); - DiopiTensor grad_output_tensor(grad_output); - DiopiTensor input_tensor(input); + DiopiTensor gradInputTensor(gradInput); + DiopiTensor gradOutputTensor(gradOutput); + DiopiTensor inputTensor(input); CnnlAttribute attr; attr.set("mode", CNNL_ACTIVATION_GELU); - if (approximate == "tanh") { + if (strcmp(approximate, "tanh") == 0) { attr.set("approximate", true); } - DIOPI_CALL(cnnl_activation_backward_internal(ctx, grad_input_tensor, grad_output_tensor, input_tensor, {}, attr)); + DIOPI_CALL(cnnlActivationBackwardInternal(ctx, gradInputTensor, gradOutputTensor, inputTensor, {}, attr)); return diopiSuccess; } -extern "C" diopiError_t diopiLeakyRelu(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* negative_slope) { +extern "C" diopiError_t diopiLeakyRelu(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* negativeSlope) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); CnnlAttribute attr; - float coef_val = DiopiDataType::isInteger(negative_slope->stype) ? negative_slope->ival : negative_slope->fval; - attr.set("coef", coef_val); + float coefVal = DiopiDataType::isInteger(negativeSlope->stype) ? negativeSlope->ival : negativeSlope->fval; + attr.set("coef", coefVal); attr.set("mode", CNNL_ACTIVATION_LEAKYRELU); - DIOPI_CALL(cnnl_activation_internal(ctx, input_tensor, output_tensor, attr)); + DIOPI_CALL(cnnlActivationInternal(ctx, inputTensor, outputTensor, attr)); return diopiSuccess; } -extern "C" diopiError_t diopiLeakyReluInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* negative_slope) { +extern "C" diopiError_t diopiLeakyReluInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* negativeSlope) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); + DiopiTensor inputTensor(input); CnnlAttribute attr; - float coef_val = DiopiDataType::isInteger(negative_slope->stype) ? negative_slope->ival : negative_slope->fval; - attr.set("coef", coef_val); + float coefVal = DiopiDataType::isInteger(negativeSlope->stype) ? negativeSlope->ival : negativeSlope->fval; + attr.set("coef", coefVal); attr.set("mode", CNNL_ACTIVATION_LEAKYRELU); - DIOPI_CALL(cnnl_activation_internal(ctx, input_tensor, input_tensor, attr)); + DIOPI_CALL(cnnlActivationInternal(ctx, inputTensor, inputTensor, attr)); return diopiSuccess; } -extern "C" diopiError_t diopiLeakyReluBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, - diopiConstTensorHandle_t input, const diopiScalar_t* negative_slope, bool input_is_result) { +extern "C" diopiError_t diopiLeakyReluBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, + diopiConstTensorHandle_t input, const diopiScalar_t* negativeSlope, bool inputIsResult) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor grad_input_tensor(grad_input); - DiopiTensor grad_output_tensor(grad_output); - DiopiTensor input_tensor(input); + DiopiTensor gradInputTensor(gradInput); + DiopiTensor gradOutputTensor(gradOutput); + DiopiTensor inputTensor(input); CnnlAttribute attr; - float coef_val = DiopiDataType::isInteger(negative_slope->stype) ? negative_slope->ival : negative_slope->fval; - attr.set("coef", coef_val); + float coefVal = DiopiDataType::isInteger(negativeSlope->stype) ? negativeSlope->ival : negativeSlope->fval; + attr.set("coef", coefVal); attr.set("mode", CNNL_ACTIVATION_LEAKYRELU); - DIOPI_CALL(cnnl_activation_backward_internal(ctx, grad_input_tensor, grad_output_tensor, input_tensor, {}, attr)); + DIOPI_CALL(cnnlActivationBackwardInternal(ctx, gradInputTensor, gradOutputTensor, inputTensor, {}, attr)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/adaptive_pooling.cpp b/DIOPI-IMPL/camb/functions/adaptive_pooling.cpp index d1fffdef8..6acbcdede 100644 --- a/DIOPI-IMPL/camb/functions/adaptive_pooling.cpp +++ b/DIOPI-IMPL/camb/functions/adaptive_pooling.cpp @@ -14,112 +14,112 @@ namespace camb { extern "C" { -diopiError_t diopiAdaptiveAvgPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t output_size) { +diopiError_t diopiAdaptiveAvgPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t outputSize) { /* Get handle and generate tensors */ cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tr(input); - DiopiTensor output_tr(out); + DiopiTensor inputTr(input); + DiopiTensor outputTr(out); /* Some basic check */ - DIOPI_CHECK(input_tr.dim() == 3 || input_tr.dim() == 4, "non-empty 3D or 4D (batch mode) tensor expected for input"); + DIOPI_CHECK(inputTr.dim() == 3 || inputTr.dim() == 4, "non-empty 3D or 4D (batch mode) tensor expected for input"); - std::vector p_tensors{&input_tr}; - std::set supported_dtypes{diopi_dtype_float16, diopi_dtype_float32}; - DIOPI_CALL(autoCastTensorType(ctx, p_tensors, supported_dtypes)); + std::vector pTensors{&inputTr}; + std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; + DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor output_tmp_tr = output_tr; - if (input_tr.dtype() != output_tr.dtype()) { - output_tmp_tr = requiresTensor(ctx, output_tr.shape(), input_tr.dtype()); + DiopiTensor outputTmpTr = outputTr; + if (inputTr.dtype() != outputTr.dtype()) { + outputTmpTr = requiresTensor(ctx, outputTr.shape(), inputTr.dtype()); } - auto memory_format = MemoryFormat::ChannelsLast; - auto input_channel_last = input_tr.contiguous(ctx, memory_format); - DIOPI_CALL(cnnl_transpose(ctx, handle, input_tr, input_channel_last, CNNL_LAYOUT_NCHW, CNNL_LAYOUT_NHWC)); + auto memoryFormat = MemoryFormat::ChannelsLast; + auto inputChannelLast = inputTr.contiguous(ctx, memoryFormat); + DIOPI_CALL(cnnlTranspose(ctx, handle, inputTr, inputChannelLast, CNNL_LAYOUT_NCHW, CNNL_LAYOUT_NHWC)); - auto output_channel_last = output_tmp_tr; - if (!output_channel_last.is_contiguous(memory_format)) { + auto outputChannelLast = outputTmpTr; + if (!outputChannelLast.isContiguous(memoryFormat)) { // for some special case like shape = [2, 2048, 1, 1], it's already been ChannelsLast - output_channel_last = requiresTensor(ctx, output_tmp_tr.shape(), output_tmp_tr.dtype(), MemoryFormat::ChannelsLast); + outputChannelLast = requiresTensor(ctx, outputTmpTr.shape(), outputTmpTr.dtype(), MemoryFormat::ChannelsLast); } cnnlTensorLayout_t layout = CNNL_LAYOUT_NHWC; - CnnlTensorDesc input_desc(input_channel_last, layout); - CnnlTensorDesc output_desc(output_channel_last, layout); + CnnlTensorDesc inputDesc(inputChannelLast, layout); + CnnlTensorDesc outputDesc(outputChannelLast, layout); cnnlPoolingMode_t mode = CNNL_POOLING_AVERAGE_COUNT_INCLUDE_PADDING; - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetAdaptivePoolingForwardWorkspaceSize(handle, input_desc.get(), mode, output_desc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetAdaptivePoolingForwardWorkspaceSize(handle, inputDesc.get(), mode, outputDesc.get(), &workspaceSize)); - void* workspace_ptr = workspace_size == 0 ? nullptr : requiresBuffer(ctx, workspace_size).data(); + void* workspacePtr = workspaceSize == 0 ? nullptr : requiresBuffer(ctx, workspaceSize).data(); /* call adaptive pooling */ DIOPI_CALLCNNL(cnnlAdaptivePoolingForward_v2(handle, - input_desc.get(), - input_channel_last.data(), + inputDesc.get(), + inputChannelLast.data(), mode, - workspace_ptr, - workspace_size, - output_desc.get(), - output_channel_last.data(), + workspacePtr, + workspaceSize, + outputDesc.get(), + outputChannelLast.data(), nullptr, nullptr)); // NHWC -> NCHW - DIOPI_CALL(cnnl_transpose(ctx, handle, output_channel_last, output_tmp_tr, CNNL_LAYOUT_NHWC, CNNL_LAYOUT_NCHW)); + DIOPI_CALL(cnnlTranspose(ctx, handle, outputChannelLast, outputTmpTr, CNNL_LAYOUT_NHWC, CNNL_LAYOUT_NCHW)); - if (output_tmp_tr.dtype() != output_tr.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, output_tr, output_tmp_tr)); + if (outputTmpTr.dtype() != outputTr.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outputTr, outputTmpTr)); } return diopiSuccess; } -diopiError_t diopiAdaptiveAvgPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, +diopiError_t diopiAdaptiveAvgPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t input) { /* Get handle and generate tensors */ cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tr(input); - DiopiTensor grad_output_tr(grad_output); - DiopiTensor grad_input_tr(grad_input); + DiopiTensor inputTr(input); + DiopiTensor gradOutputTr(gradOutput); + DiopiTensor gradInputTr(gradInput); /* Some basic check */ - DIOPI_CHECK(input_tr.dim() == 3 || input_tr.dim() == 4, "non-empty 3D or 4D (batch mode) tensor expected for input"); + DIOPI_CHECK(inputTr.dim() == 3 || inputTr.dim() == 4, "non-empty 3D or 4D (batch mode) tensor expected for input"); - std::vector p_tensors{&grad_output_tr, &input_tr}; - std::set supported_dtypes{diopi_dtype_float16, diopi_dtype_float32}; - DIOPI_CALL(autoCastTensorType(ctx, p_tensors, supported_dtypes)); + std::vector pTensors{&gradOutputTr, &inputTr}; + std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; + DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - auto memory_format = MemoryFormat::ChannelsLast; - auto grad_output_channel_last = grad_output_tr.contiguous(ctx, memory_format); - DIOPI_CALL(cnnl_transpose(ctx, handle, grad_output_tr, grad_output_channel_last, CNNL_LAYOUT_NCHW, CNNL_LAYOUT_NHWC)); + auto memoryFormat = MemoryFormat::ChannelsLast; + auto gradOutputChannelLast = gradOutputTr.contiguous(ctx, memoryFormat); + DIOPI_CALL(cnnlTranspose(ctx, handle, gradOutputTr, gradOutputChannelLast, CNNL_LAYOUT_NCHW, CNNL_LAYOUT_NHWC)); - DiopiTensor grad_input_tmp_tr = grad_input_tr; - if (grad_input_tr.dtype() != grad_output_tr.dtype()) { - grad_input_tmp_tr = requiresTensor(ctx, grad_input_tr.shape(), grad_output_tr.dtype()); + DiopiTensor gradInputTmpTr = gradInputTr; + if (gradInputTr.dtype() != gradOutputTr.dtype()) { + gradInputTmpTr = requiresTensor(ctx, gradInputTr.shape(), gradOutputTr.dtype()); } - auto grad_input_channel_last = grad_input_tmp_tr.contiguous(ctx, memory_format); + auto gradInputChannelLast = gradInputTmpTr.contiguous(ctx, memoryFormat); /* generate tensor desc */ cnnlTensorLayout_t layout = CNNL_LAYOUT_NHWC; - CnnlTensorDesc grad_output_desc(grad_output_channel_last, layout); - CnnlTensorDesc grad_input_desc(grad_input_channel_last, layout); + CnnlTensorDesc gradOutputDesc(gradOutputChannelLast, layout); + CnnlTensorDesc gradInputDesc(gradInputChannelLast, layout); /* call adaptive pooling */ DIOPI_CALLCNNL(cnnlAdaptivePoolingBackward(handle, - grad_output_desc.get(), - grad_output_channel_last.data(), + gradOutputDesc.get(), + gradOutputChannelLast.data(), nullptr, nullptr, CNNL_POOLING_AVERAGE_COUNT_INCLUDE_PADDING, - grad_input_desc.get(), - grad_input_channel_last.data())); + gradInputDesc.get(), + gradInputChannelLast.data())); // NHWC -> NCHW - DIOPI_CALL(cnnl_transpose(ctx, handle, grad_input_channel_last, grad_input_tmp_tr, CNNL_LAYOUT_NHWC, CNNL_LAYOUT_NCHW)); + DIOPI_CALL(cnnlTranspose(ctx, handle, gradInputChannelLast, gradInputTmpTr, CNNL_LAYOUT_NHWC, CNNL_LAYOUT_NCHW)); - if (grad_input_tmp_tr.dtype() != grad_input_tr.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, grad_input_tr, grad_input_tmp_tr)); + if (gradInputTmpTr.dtype() != gradInputTr.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, gradInputTr, gradInputTmpTr)); } return diopiSuccess; diff --git a/DIOPI-IMPL/camb/functions/addcdiv.cpp b/DIOPI-IMPL/camb/functions/addcdiv.cpp index 82049c4b9..aa845e705 100644 --- a/DIOPI-IMPL/camb/functions/addcdiv.cpp +++ b/DIOPI-IMPL/camb/functions/addcdiv.cpp @@ -10,50 +10,50 @@ extern "C" { diopiError_t diopiAddcdiv(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t tensor1, diopiConstTensorHandle_t tensor2, const diopiScalar_t* value) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor other_tensor1(tensor1); - DiopiTensor other_tensor2(tensor2); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor otherTensor1(tensor1); + DiopiTensor otherTensor2(tensor2); + DiopiTensor outTensor(out); - std::vector pTensors{&input_tensor, &other_tensor1, &other_tensor2}; + std::vector pTensors{&inputTensor, &otherTensor1, &otherTensor2}; std::set supportedDtypes{diopi_dtype_float32, diopi_dtype_float16}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor out_tensor_temp = out_tensor; - if (out_tensor.dtype() != input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor_temp, input_tensor.dtype())); + DiopiTensor outTensorTemp = outTensor; + if (outTensor.dtype() != inputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensorTemp, inputTensor.dtype())); } - CnnlTensorDesc input_tensor_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc other_tensor1_desc(other_tensor1, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc other_tensor2_desc(other_tensor2, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_tensor_desc(out_tensor_temp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputTensorDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc otherTensor1Desc(otherTensor1, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc otherTensor2Desc(otherTensor2, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outTensorDesc(outTensorTemp, CNNL_LAYOUT_ARRAY); - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetAddcdivWorkspaceSize(handle, input_tensor_desc.get(), other_tensor1_desc.get(), other_tensor2_desc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetAddcdivWorkspaceSize(handle, inputTensorDesc.get(), otherTensor1Desc.get(), otherTensor2Desc.get(), &workspaceSize)); void* workspace = nullptr; - float scalar_value; + float scalarValue; if (DiopiDataType::isInteger(value->stype)) { - scalar_value = value->ival; + scalarValue = value->ival; } else { - scalar_value = value->fval; + scalarValue = value->fval; } - workspace = requiresBuffer(ctx, workspace_size).data(); + workspace = requiresBuffer(ctx, workspaceSize).data(); DIOPI_CALLCNNL(cnnlAddcdiv(handle, - input_tensor_desc.get(), - input_tensor.data(), - &(scalar_value), - other_tensor1_desc.get(), - other_tensor1.data(), - other_tensor2_desc.get(), - other_tensor2.data(), + inputTensorDesc.get(), + inputTensor.data(), + &(scalarValue), + otherTensor1Desc.get(), + otherTensor1.data(), + otherTensor2Desc.get(), + otherTensor2.data(), workspace, - workspace_size, - out_tensor_desc.get(), - out_tensor_temp.data())) - if (out_tensor_temp.dtype() != out_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_temp)); + workspaceSize, + outTensorDesc.get(), + outTensorTemp.data())) + if (outTensorTemp.dtype() != outTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTemp)); } return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/addcmul.cpp b/DIOPI-IMPL/camb/functions/addcmul.cpp index eda3c5e8e..f978e8257 100644 --- a/DIOPI-IMPL/camb/functions/addcmul.cpp +++ b/DIOPI-IMPL/camb/functions/addcmul.cpp @@ -10,50 +10,50 @@ extern "C" { diopiError_t diopiAddcmul(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t tensor1, diopiConstTensorHandle_t tensor2, const diopiScalar_t* value) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor other_tensor1(tensor1); - DiopiTensor other_tensor2(tensor2); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor otherTensor1(tensor1); + DiopiTensor otherTensor2(tensor2); + DiopiTensor outTensor(out); - std::vector pTensors{&input_tensor, &other_tensor1, &other_tensor2}; + std::vector pTensors{&inputTensor, &otherTensor1, &otherTensor2}; std::set supportedDtypes{diopi_dtype_float32, diopi_dtype_float16}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor out_tensor_temp = out_tensor; - if (out_tensor.dtype() != input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor_temp, input_tensor.dtype())); + DiopiTensor outTensorTemp = outTensor; + if (outTensor.dtype() != inputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensorTemp, inputTensor.dtype())); } - CnnlTensorDesc input_tensor_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc other_tensor1_desc(other_tensor1, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc other_tensor2_desc(other_tensor2, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_tensor_desc(out_tensor_temp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputTensorDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc otherTensor1Desc(otherTensor1, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc otherTensor2Desc(otherTensor2, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outTensorDesc(outTensorTemp, CNNL_LAYOUT_ARRAY); - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetAddcmulWorkspaceSize(handle, input_tensor_desc.get(), other_tensor1_desc.get(), other_tensor2_desc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetAddcmulWorkspaceSize(handle, inputTensorDesc.get(), otherTensor1Desc.get(), otherTensor2Desc.get(), &workspaceSize)); void* workspace = nullptr; - float scalar_value; + float scalarValue; if (DiopiDataType::isInteger(value->stype)) { - scalar_value = value->ival; + scalarValue = value->ival; } else { - scalar_value = value->fval; + scalarValue = value->fval; } - workspace = requiresBuffer(ctx, workspace_size).data(); + workspace = requiresBuffer(ctx, workspaceSize).data(); DIOPI_CALLCNNL(cnnlAddcmul(handle, - input_tensor_desc.get(), - input_tensor.data(), - &(scalar_value), - other_tensor1_desc.get(), - other_tensor1.data(), - other_tensor2_desc.get(), - other_tensor2.data(), + inputTensorDesc.get(), + inputTensor.data(), + &(scalarValue), + otherTensor1Desc.get(), + otherTensor1.data(), + otherTensor2Desc.get(), + otherTensor2.data(), workspace, - workspace_size, - out_tensor_desc.get(), - out_tensor_temp.data())) - if (out_tensor_temp.dtype() != out_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_temp)); + workspaceSize, + outTensorDesc.get(), + outTensorTemp.data())) + if (outTensorTemp.dtype() != outTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTemp)); } return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/addmm.cpp b/DIOPI-IMPL/camb/functions/addmm.cpp index 3c28b9c2b..fbcf87d0a 100644 --- a/DIOPI-IMPL/camb/functions/addmm.cpp +++ b/DIOPI-IMPL/camb/functions/addmm.cpp @@ -5,10 +5,12 @@ */ #include -#include + +#include #include #include #include + #include "../cnnl_helper.hpp" #include "../common/common.hpp" @@ -21,117 +23,117 @@ diopiError_t diopiAddmm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopi diopiConstTensorHandle_t mat2, const diopiScalar_t* beta, const diopiScalar_t* alpha) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor mat1_tensor(mat1); - DiopiTensor mat2_tensor(mat2); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); + DiopiTensor mat1Tensor(mat1); + DiopiTensor mat2Tensor(mat2); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); - std::vector pTensors{&input_tensor, &mat1_tensor, &mat2_tensor}; + std::vector pTensors{&inputTensor, &mat1Tensor, &mat2Tensor}; std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor input_tensor_tmp = *pTensors[0]; - DiopiTensor mat1_tensor_tmp = *pTensors[1]; - DiopiTensor mat2_tensor_tmp = *pTensors[2]; - DiopiTensor out_tensor_tmp = out_tensor; - DIOPI_CALL(dataTypeCast(ctx, out_tensor_tmp, input_tensor_tmp.dtype())); - - CnnlTensorDesc input_desc(input_tensor_tmp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc mat1_desc(mat1_tensor_tmp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc mat2_desc(mat2_tensor_tmp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor_tmp, CNNL_LAYOUT_ARRAY); - DiopiTensor mm_result_tensor = requiresTensor(ctx, vec2diopiSize_t(out_tensor.shape()), input_tensor_tmp.dtype()); - CnnlTensorDesc mm_result_desc(mm_result_tensor, CNNL_LAYOUT_ARRAY); - - CnnlResourceGuard CnnlMatMulDesc; - cnnlMatMulDescriptor_t matmul_desc = CnnlMatMulDesc.get(); - - int32_t is_transa = 0; - int32_t is_transb = 0; - int32_t allow_tf32_i32 = 1; - DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmul_desc, CNNL_MATMUL_DESC_TRANSA, &(is_transa), sizeof(int32_t))); - DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmul_desc, CNNL_MATMUL_DESC_TRANSB, &(is_transb), sizeof(int32_t))); - DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmul_desc, CNNL_MATMUL_ALLOW_TF32, &(allow_tf32_i32), sizeof(int32_t))); - - size_t workspace_size = 0; + DiopiTensor inputTensorTmp = *pTensors[0]; + DiopiTensor mat1TensorTmp = *pTensors[1]; + DiopiTensor mat2TensorTmp = *pTensors[2]; + DiopiTensor outTensorTmp = outTensor; + DIOPI_CALL(dataTypeCast(ctx, outTensorTmp, inputTensorTmp.dtype())); + + CnnlTensorDesc inputDesc(inputTensorTmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc mat1Desc(mat1TensorTmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc mat2Desc(mat2TensorTmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensorTmp, CNNL_LAYOUT_ARRAY); + DiopiTensor mmResultTensor = requiresTensor(ctx, vec2diopiSizeT(outTensor.shape()), inputTensorTmp.dtype()); + CnnlTensorDesc mmResultDesc(mmResultTensor, CNNL_LAYOUT_ARRAY); + + CnnlResourceGuard cnnlMatMulDesc; + cnnlMatMulDescriptor_t matmulDesc = cnnlMatMulDesc.get(); + + int32_t isTransa = 0; + int32_t isTransb = 0; + int32_t allowTf32I32 = 1; + DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmulDesc, CNNL_MATMUL_DESC_TRANSA, &(isTransa), sizeof(int32_t))); + DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmulDesc, CNNL_MATMUL_DESC_TRANSB, &(isTransb), sizeof(int32_t))); + DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmulDesc, CNNL_MATMUL_ALLOW_TF32, &(allowTf32I32), sizeof(int32_t))); + + size_t workspaceSize = 0; int requestedAlgoCount = 1; int returnAlgoCount = 0; - CnnlResourceGuard CnnlMatMulHeuristic; - cnnlMatMulHeuristicResult_t heuristicResult = CnnlMatMulHeuristic.get(); - CnnlResourceGuard CnnlMatMulAlgo; - cnnlMatMulAlgo_t matmul_algo = CnnlMatMulAlgo.get(); + CnnlResourceGuard cnnlMatMulHeuristic; + cnnlMatMulHeuristicResult_t heuristicResult = cnnlMatMulHeuristic.get(); + CnnlResourceGuard cnnlMatMulAlgo; + cnnlMatMulAlgo_t matmulAlgo = cnnlMatMulAlgo.get(); DIOPI_CALLCNNL(cnnlGetMatMulAlgoHeuristic(handle, - matmul_desc, - mat1_desc.get(), - mat2_desc.get(), - mm_result_desc.get(), - mm_result_desc.get(), + matmulDesc, + mat1Desc.get(), + mat2Desc.get(), + mmResultDesc.get(), + mmResultDesc.get(), nullptr, requestedAlgoCount, &heuristicResult, &returnAlgoCount)); - DIOPI_CALLCNNL(cnnlGetMatMulHeuristicResult(heuristicResult, matmul_algo, &workspace_size)); + DIOPI_CALLCNNL(cnnlGetMatMulHeuristicResult(heuristicResult, matmulAlgo, &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } - float alpha_; + float alphaTmp; if (alpha->stype <= 7) { - alpha_ = alpha->ival; + alphaTmp = alpha->ival; } else { - alpha_ = alpha->fval; + alphaTmp = alpha->fval; } - float beta_; + float betaTmp; if (beta->stype <= 7) { - beta_ = beta->ival; + betaTmp = beta->ival; } else { - beta_ = beta->fval; + betaTmp = beta->fval; } - float alpha_default = 1; - float beta_default = 0; + float alphaDefault = 1; + float betaDefault = 0; DIOPI_CALLCNNL(cnnlMatMul_v2(handle, - matmul_desc, - matmul_algo, - &alpha_default, - mat1_desc.get(), - mat1_tensor_tmp.data(), - mat2_desc.get(), - mat2_tensor_tmp.data(), - &beta_default, - mm_result_desc.get(), - mm_result_tensor.data(), + matmulDesc, + matmulAlgo, + &alphaDefault, + mat1Desc.get(), + mat1TensorTmp.data(), + mat2Desc.get(), + mat2TensorTmp.data(), + &betaDefault, + mmResultDesc.get(), + mmResultTensor.data(), workspace, - workspace_size, - mm_result_desc.get(), - mm_result_tensor.data())); - - CnnlResourceGuard CnnlOpTensorDesc; - cnnlOpTensorDescriptor_t optensor_desc = CnnlOpTensorDesc.get(); - size_t workspace_size_ = 0; - DIOPI_CALLCNNL(cnnlGetOpTensorWorkspaceSize(handle, mm_result_desc.get(), input_desc.get(), out_desc.get(), &workspace_size_)); - void* workspace_ = nullptr; - if (0 != workspace_size_) { - workspace_ = requiresBuffer(ctx, workspace_size_).data(); + workspaceSize, + mmResultDesc.get(), + mmResultTensor.data())); + + CnnlResourceGuard cnnlOpTensorDesc; + cnnlOpTensorDescriptor_t optensorDesc = cnnlOpTensorDesc.get(); + workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetOpTensorWorkspaceSize(handle, mmResultDesc.get(), inputDesc.get(), outDesc.get(), &workspaceSize)); + workspace = nullptr; + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } DIOPI_CALLCNNL(cnnlOpTensor(handle, - optensor_desc, - &alpha_, - mm_result_desc.get(), - mm_result_tensor.data(), - &beta_, - input_desc.get(), - input_tensor_tmp.data(), - workspace_, - workspace_size_, - &beta_default, - out_desc.get(), - out_tensor_tmp.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_tmp)); + optensorDesc, + &alphaTmp, + mmResultDesc.get(), + mmResultTensor.data(), + &betaTmp, + inputDesc.get(), + inputTensorTmp.data(), + workspace, + workspaceSize, + &betaDefault, + outDesc.get(), + outTensorTmp.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTmp)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/arange.cpp b/DIOPI-IMPL/camb/functions/arange.cpp index 8ccde3016..778373ccc 100644 --- a/DIOPI-IMPL/camb/functions/arange.cpp +++ b/DIOPI-IMPL/camb/functions/arange.cpp @@ -17,24 +17,24 @@ extern "C" { diopiError_t diopiArange(diopiContextHandle_t ctx, diopiTensorHandle_t out, const diopiScalar_t* start, const diopiScalar_t* end, const diopiScalar_t* step) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor out_tensor(out); - DiopiTensor out32_tensor = out_tensor; - if (diopi_dtype_int64 == out_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out32_tensor, diopi_dtype_int32)); + DiopiTensor outTensor(out); + DiopiTensor out32Tensor = outTensor; + if (diopi_dtype_int64 == outTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, out32Tensor, diopi_dtype_int32)); } - CnnlTensorDesc outDesc(out32_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(out32Tensor, CNNL_LAYOUT_ARRAY); cnnlDataType_t dtype; - CnnlDataType::convertToCnnlType(&dtype, out32_tensor.dtype()); + CnnlDataType::convertToCnnlType(&dtype, out32Tensor.dtype()); if (CnnlDataType::isInteger(dtype)) { - DIOPI_CALLCNNL(cnnlArange_v2(handle, CNNL_COMPUTATION_ULTRAHIGH_PRECISION, &(start->ival), &(step->ival), outDesc.get(), out32_tensor.data())); - if (out32_tensor.dtype() != out_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out32_tensor)); + DIOPI_CALLCNNL(cnnlArange_v2(handle, CNNL_COMPUTATION_ULTRAHIGH_PRECISION, &(start->ival), &(step->ival), outDesc.get(), out32Tensor.data())); + if (out32Tensor.dtype() != outTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensor, out32Tensor)); } } else if (CnnlDataType::isFloatPoint(dtype)) { - float start_val = start->fval; - float step_val = step->fval; - DIOPI_CALLCNNL(cnnlArange_v2(handle, CNNL_COMPUTATION_ULTRAHIGH_PRECISION, &(start_val), &(step_val), outDesc.get(), out32_tensor.data())); + float startVal = start->fval; + float stepVal = step->fval; + DIOPI_CALLCNNL(cnnlArange_v2(handle, CNNL_COMPUTATION_ULTRAHIGH_PRECISION, &(startVal), &(stepVal), outDesc.get(), out32Tensor.data())); } return diopiSuccess; diff --git a/DIOPI-IMPL/camb/functions/avg_pool2d.cpp b/DIOPI-IMPL/camb/functions/avg_pool2d.cpp index 050702d2b..6128c26f8 100644 --- a/DIOPI-IMPL/camb/functions/avg_pool2d.cpp +++ b/DIOPI-IMPL/camb/functions/avg_pool2d.cpp @@ -14,12 +14,12 @@ namespace camb { namespace { std::vector getDim(DiopiTensor tensor) { - int shape_size = tensor.shape().size(); - std::vector dim; - for (int i = 0; i < shape_size; i++) { - dim.push_back(static_cast(tensor.shape()[i])); + int shapeSize = tensor.shape().size(); + std::vector dim(shapeSize); + for (int i = 0; i < shapeSize; i++) { + dim[i] = static_cast(tensor.shape()[i]); } - if (shape_size == 3) { + if (shapeSize == 3) { dim.insert(dim.begin(), 1); } return dim; @@ -28,131 +28,131 @@ std::vector getDim(DiopiTensor tensor) { } // namespace extern "C" { -diopiError_t diopiAvgPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, - diopiSize_t stride, diopiSize_t padding, bool ceil_mode, bool count_include_pad, const int64_t* divisor_override) { +diopiError_t diopiAvgPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernelSize, + diopiSize_t stride, diopiSize_t padding, bool ceilMode, bool countIncludePad, const int64_t* divisorOverride) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); - DIOPI_CHECK(input_tensor.dim() == 3 || input_tensor.dim() == 4, "non-empty 3D or 4D (batch mode) tensor expected for input"); + DIOPI_CHECK(inputTensor.dim() == 3 || inputTensor.dim() == 4, "non-empty 3D or 4D (batch mode) tensor expected for input"); - std::vector pTensors{&input_tensor}; + std::vector pTensors{&inputTensor}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, {diopi_dtype_float16, diopi_dtype_float32})); - DiopiTensor input_tensor_tmp = *pTensors[0]; - DiopiTensor out_tensor_tmp = out_tensor; - DIOPI_CALL(dataTypeCast(ctx, out_tensor_tmp, input_tensor_tmp.dtype())); - - std::vector input_dim = getDim(input_tensor_tmp); - std::vector out_dim = getDim(out_tensor_tmp); - CnnlTensorDesc input_desc; - CnnlTensorDesc out_desc; - input_desc.set(input_tensor_tmp, CNNL_LAYOUT_NCHW, input_dim); - out_desc.set(out_tensor_tmp, CNNL_LAYOUT_NCHW, out_dim); - - const int64_t kernel_h = kernel_size.data[0]; - const int64_t kernel_w = kernel_size.len == 1 ? kernel_h : kernel_size.data[1]; - int64_t stride_h = 0; - int64_t stride_w = 0; + DiopiTensor inputTensorTmp = *pTensors[0]; + DiopiTensor outTensorTmp = outTensor; + DIOPI_CALL(dataTypeCast(ctx, outTensorTmp, inputTensorTmp.dtype())); + + std::vector inputDim = getDim(inputTensorTmp); + std::vector outDim = getDim(outTensorTmp); + CnnlTensorDesc inputDesc; + CnnlTensorDesc outDesc; + inputDesc.set(inputTensorTmp, CNNL_LAYOUT_NCHW, inputDim); + outDesc.set(outTensorTmp, CNNL_LAYOUT_NCHW, outDim); + + const int64_t kernelH = kernelSize.data[0]; + const int64_t kernelW = kernelSize.len == 1 ? kernelH : kernelSize.data[1]; + int64_t strideH = 0; + int64_t strideW = 0; if (stride.len == 0) { - stride_h = kernel_h; - stride_w = kernel_w; + strideH = kernelH; + strideW = kernelW; } else { - stride_h = stride.data[0]; - stride_w = stride.len == 1 ? stride_h : stride.data[1]; + strideH = stride.data[0]; + strideW = stride.len == 1 ? strideH : stride.data[1]; } - const int64_t pad_h = padding.data[0]; - const int64_t pad_w = padding.len == 1 ? pad_h : padding.data[1]; - const int64_t dilation_0 = 1; - const int64_t dilation_1 = 1; + const int64_t padH = padding.data[0]; + const int64_t padW = padding.len == 1 ? padH : padding.data[1]; + const int64_t dilation0 = 1; + const int64_t dilation1 = 1; // calculate padding coefficients auto pl = 0, pr = 0, pu = 0, pd = 0; - pu = pd = pad_h; - pl = pr = pad_w; - if (ceil_mode) { + pu = pd = padH; + pl = pr = padW; + if (ceilMode) { // diff = (out - 1) * stride + kernel_size - input - int diff_height = (out_tensor.shape()[2] - 1) * stride_h + kernel_h - input_tensor.shape()[2]; - int diff_width = (out_tensor.shape()[3] - 1) * stride_w + kernel_w - input_tensor.shape()[3]; + int diffHeight = (outTensor.shape()[2] - 1) * strideH + kernelH - inputTensor.shape()[2]; + int diffWidth = (outTensor.shape()[3] - 1) * strideW + kernelW - inputTensor.shape()[3]; // If ceil_mode is set to true, the pad needs to be filled up. // If the offset pad is redundant, it will be removed. - pd = diff_height > pad_h ? diff_height - pad_h : 0; - pr = diff_width > pad_w ? diff_width - pad_w : 0; + pd = diffHeight > padH ? diffHeight - padH : 0; + pr = diffWidth > padW ? diffWidth - padW : 0; } - CnnlResourceGuard CnnlPoolDesc; - cnnlPoolingDescriptor_t pool_desc = CnnlPoolDesc.get(); - cnnlPoolingMode_t mode = count_include_pad ? CNNL_POOLING_AVERAGE_COUNT_INCLUDE_PADDING : CNNL_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; + CnnlResourceGuard cnnlPoolDesc; + cnnlPoolingDescriptor_t poolDesc = cnnlPoolDesc.get(); + cnnlPoolingMode_t mode = countIncludePad ? CNNL_POOLING_AVERAGE_COUNT_INCLUDE_PADDING : CNNL_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; DIOPI_CALLCNNL(cnnlSetPooling2dDescriptor_v2( - pool_desc, mode, CNNL_PROPAGATE_NAN, kernel_h, kernel_w, pu, pd, pl, pr, stride_h, stride_w, dilation_0, dilation_1, ceil_mode)); + poolDesc, mode, CNNL_PROPAGATE_NAN, kernelH, kernelW, pu, pd, pl, pr, strideH, strideW, dilation0, dilation1, ceilMode)); - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetPoolingWorkspaceSize(handle, mode, out_tensor.shape()[3], input_tensor.shape()[2], &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetPoolingWorkspaceSize(handle, mode, outTensor.shape()[3], inputTensor.shape()[2], &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } const void* alpha = nullptr; const void* beta = nullptr; DIOPI_CALLCNNL(cnnlPoolingForward( - handle, pool_desc, alpha, input_desc.get(), input_tensor_tmp.data(), beta, out_desc.get(), out_tensor_tmp.data(), workspace, workspace_size)); + handle, poolDesc, alpha, inputDesc.get(), inputTensorTmp.data(), beta, outDesc.get(), outTensorTmp.data(), workspace, workspaceSize)); - if (divisor_override != nullptr) { - diopiScalar_t mul_value; - mul_value.stype = diopi_dtype_float64; - mul_value.fval = static_cast(kernel_h * kernel_w) / (*divisor_override); - DIOPI_CALL(diopiMulInpScalar(ctx, static_cast(out_tensor_tmp), (const diopiScalar_t*)&mul_value)); + if (divisorOverride != nullptr) { + diopiScalar_t mulValue; + mulValue.stype = diopi_dtype_float64; + mulValue.fval = static_cast(kernelH * kernelW) / (*divisorOverride); + DIOPI_CALL(diopiMulInpScalar(ctx, static_cast(outTensorTmp), (const diopiScalar_t*)&mulValue)); } - dataTypeCast(ctx, out_tensor, out_tensor_tmp); + dataTypeCast(ctx, outTensor, outTensorTmp); return diopiSuccess; } -diopiError_t diopiAvgPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, - diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, bool ceil_mode, - bool count_include_pad, const int64_t* divisor_override) { +diopiError_t diopiAvgPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, + diopiConstTensorHandle_t input, diopiSize_t kernelSize, diopiSize_t stride, diopiSize_t padding, bool ceilMode, + bool countIncludePad, const int64_t* divisorOverride) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor grad_input_tensor(grad_input); - DiopiTensor grad_output_tensor(grad_output); + DiopiTensor inputTensor(input); + DiopiTensor gradInputTensor(gradInput); + DiopiTensor gradOutputTensor(gradOutput); - DIOPI_CHECK(input_tensor.dim() == 3 || input_tensor.dim() == 4, "non-empty 3D or 4D (batch mode) tensor expected for input"); + DIOPI_CHECK(inputTensor.dim() == 3 || inputTensor.dim() == 4, "non-empty 3D or 4D (batch mode) tensor expected for input"); - std::vector pTensors{&input_tensor, &grad_output_tensor}; + std::vector pTensors{&inputTensor, &gradOutputTensor}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, {diopi_dtype_float16, diopi_dtype_float32})); - DiopiTensor input_tensor_tmp = *pTensors[0]; - DiopiTensor grad_output_tensor_tmp = *pTensors[1]; - DiopiTensor grad_input_tensor_tmp = grad_input_tensor; - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor_tmp, input_tensor_tmp.dtype())); - - diopiTensorHandle_t input_t = nullptr; - diopiTensorHandle_t grad_input_t = nullptr; - diopiTensorHandle_t grad_output_t = nullptr; - - auto permute_to_nhwc = [&](auto src, auto& dst) { - DiopiTensor src_tensor(src); - std::vector src_shape_t_64(src_tensor.shape().size()); + DiopiTensor inputTensorTmp = *pTensors[0]; + DiopiTensor gradOutputTensorTmp = *pTensors[1]; + DiopiTensor gradInputTensorTmp = gradInputTensor; + DIOPI_CALL(dataTypeCast(ctx, gradInputTensorTmp, inputTensorTmp.dtype())); + + diopiTensorHandle_t inputT = nullptr; + diopiTensorHandle_t gradInputT = nullptr; + diopiTensorHandle_t gradOutputT = nullptr; + + auto permuteToNhwc = [&](auto src, auto& dst) { + DiopiTensor srcTensor(src); + std::vector srcShapeT64(srcTensor.shape().size()); std::vector axis{0, 2, 3, 1}; - if (src_tensor.shape().size() == 3) { + if (srcTensor.shape().size() == 3) { axis.clear(); } - if (src_tensor.shape().size() == 3) { + if (srcTensor.shape().size() == 3) { axis.push_back(1); axis.push_back(2); axis.push_back(0); } - for (int i = 0; i < src_tensor.shape().size(); ++i) { - src_shape_t_64[i] = src_tensor.shape()[axis[i]]; + for (int i = 0; i < srcTensor.shape().size(); ++i) { + srcShapeT64[i] = srcTensor.shape()[axis[i]]; } - diopiSize_t src_t_shape(src_shape_t_64.data(), src_shape_t_64.size()); - DIOPI_CALL(diopiRequireTensor(ctx, &dst, &src_t_shape, nullptr, src_tensor.dtype(), diopi_device)); - if (src_tensor.shape().size() == 4) { + diopiSize_t srcTShape(srcShapeT64.data(), srcShapeT64.size()); + DIOPI_CALL(diopiRequireTensor(ctx, &dst, &srcTShape, nullptr, srcTensor.dtype(), diopi_device)); + if (srcTensor.shape().size() == 4) { diopiSize_t nchw2nhwc(axis.data(), 4); DIOPI_CALL(diopiPermute(ctx, dst, src, nchw2nhwc)); - } else if (src_tensor.shape().size() == 3) { + } else if (srcTensor.shape().size() == 3) { diopiSize_t chw2hwc(axis.data(), 3); DIOPI_CALL(diopiPermute(ctx, dst, src, chw2hwc)); } else { @@ -161,95 +161,95 @@ diopiError_t diopiAvgPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_ return diopiSuccess; }; - DIOPI_CALL(permute_to_nhwc(static_cast(input_tensor_tmp), input_t)); - DIOPI_CALL(permute_to_nhwc(static_cast(grad_input_tensor_tmp), grad_input_t)); - DIOPI_CALL(permute_to_nhwc(static_cast(grad_output_tensor_tmp), grad_output_t)); - - DiopiTensor input_tensor_t(input_t); - DiopiTensor grad_input_tensor_t(grad_input_t); - DiopiTensor grad_output_tensor_t(grad_output_t); - - std::vector input_dim = getDim(input_tensor_t); - std::vector grad_input_dim = getDim(grad_input_tensor_t); - std::vector grad_output_dim = getDim(grad_output_tensor_t); - CnnlTensorDesc input_desc; - CnnlTensorDesc grad_input_desc; - CnnlTensorDesc grad_output_desc; - input_desc.set(input_tensor_t, CNNL_LAYOUT_NHWC, input_dim); - grad_input_desc.set(grad_input_tensor_t, CNNL_LAYOUT_NHWC, grad_input_dim); - grad_output_desc.set(grad_output_tensor_t, CNNL_LAYOUT_NHWC, grad_output_dim); - - const int64_t kernel_h = kernel_size.data[0]; - const int64_t kernel_w = kernel_size.len == 1 ? kernel_h : kernel_size.data[1]; - int64_t stride_h = 0; - int64_t stride_w = 0; + DIOPI_CALL(permuteToNhwc(static_cast(inputTensorTmp), inputT)); + DIOPI_CALL(permuteToNhwc(static_cast(gradInputTensorTmp), gradInputT)); + DIOPI_CALL(permuteToNhwc(static_cast(gradOutputTensorTmp), gradOutputT)); + + DiopiTensor inputTensorT(inputT); + DiopiTensor gradInputTensorT(gradInputT); + DiopiTensor gradOutputTensorT(gradOutputT); + + std::vector inputDim = getDim(inputTensorT); + std::vector gradInputDim = getDim(gradInputTensorT); + std::vector gradOutputDim = getDim(gradOutputTensorT); + CnnlTensorDesc inputDesc; + CnnlTensorDesc gradInputDesc; + CnnlTensorDesc gradOutputDesc; + inputDesc.set(inputTensorT, CNNL_LAYOUT_NHWC, inputDim); + gradInputDesc.set(gradInputTensorT, CNNL_LAYOUT_NHWC, gradInputDim); + gradOutputDesc.set(gradOutputTensorT, CNNL_LAYOUT_NHWC, gradOutputDim); + + const int64_t kernelH = kernelSize.data[0]; + const int64_t kernelW = kernelSize.len == 1 ? kernelH : kernelSize.data[1]; + int64_t strideH = 0; + int64_t strideW = 0; if (stride.len == 0) { - stride_h = kernel_h; - stride_w = kernel_w; + strideH = kernelH; + strideW = kernelW; } else { - stride_h = stride.data[0]; - stride_w = stride.len == 1 ? stride_h : stride.data[1]; + strideH = stride.data[0]; + strideW = stride.len == 1 ? strideH : stride.data[1]; } - const int64_t pad_h = padding.data[0]; - const int64_t pad_w = padding.len == 1 ? pad_h : padding.data[1]; - const int64_t dilation_0 = 1; - const int64_t dilation_1 = 1; + const int64_t padH = padding.data[0]; + const int64_t padW = padding.len == 1 ? padH : padding.data[1]; + const int64_t dilation0 = 1; + const int64_t dilation1 = 1; // calculate padding coefficients auto pl = 0, pr = 0, pu = 0, pd = 0; - pu = pd = pad_h; - pl = pr = pad_w; - int height = (grad_output_tensor.shape()[2] - 1) * stride_h + kernel_h; - int width = (grad_output_tensor.shape()[3] - 1) * stride_w + kernel_w; - if (pad_h + input_tensor.shape()[2] >= height) pd = 0; - if (pad_w + input_tensor.shape()[3] >= width) pr = 0; + pu = pd = padH; + pl = pr = padW; + int height = (gradOutputTensor.shape()[2] - 1) * strideH + kernelH; + int width = (gradOutputTensor.shape()[3] - 1) * strideW + kernelW; + if (padH + inputTensor.shape()[2] >= height) pd = 0; + if (padW + inputTensor.shape()[3] >= width) pr = 0; // if ceil_mode is set to true, the pad needs to be filled up. - if (ceil_mode) { - pd = height - input_tensor.shape()[2] - pad_h; - pr = width - input_tensor.shape()[3] - pad_w; + if (ceilMode) { + pd = height - inputTensor.shape()[2] - padH; + pr = width - inputTensor.shape()[3] - padW; } - CnnlResourceGuard CnnlPoolDesc; - cnnlPoolingDescriptor_t pool_desc = CnnlPoolDesc.get(); - cnnlPoolingMode_t mode = count_include_pad ? CNNL_POOLING_AVERAGE_COUNT_INCLUDE_PADDING : CNNL_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; + CnnlResourceGuard cnnlPoolDesc; + cnnlPoolingDescriptor_t poolDesc = cnnlPoolDesc.get(); + cnnlPoolingMode_t mode = countIncludePad ? CNNL_POOLING_AVERAGE_COUNT_INCLUDE_PADDING : CNNL_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; DIOPI_CALLCNNL(cnnlSetPooling2dDescriptor_v2( - pool_desc, mode, CNNL_PROPAGATE_NAN, kernel_h, kernel_w, pu, pd, pl, pr, stride_h, stride_w, dilation_0, dilation_1, ceil_mode)); + poolDesc, mode, CNNL_PROPAGATE_NAN, kernelH, kernelW, pu, pd, pl, pr, strideH, strideW, dilation0, dilation1, ceilMode)); const void* alpha = nullptr; const void* beta = nullptr; DIOPI_CALLCNNL(cnnlPoolingBackward(handle, - pool_desc, + poolDesc, alpha, - NULL, nullptr, - grad_output_desc.get(), - grad_output_tensor_t.data(), - input_desc.get(), - input_tensor_t.data(), + nullptr, + gradOutputDesc.get(), + gradOutputTensorT.data(), + inputDesc.get(), + inputTensorT.data(), beta, - grad_input_desc.get(), - grad_input_tensor_t.data())); - - if (grad_input_tensor_t.shape().size() == 4) { - std::vector perm_nhwc2nchw{0, 3, 1, 2}; - diopiSize_t nhwc2nchw(perm_nhwc2nchw.data(), 4); - DIOPI_CALL(diopiPermute(ctx, static_cast(grad_input_tensor_tmp), grad_input_t, nhwc2nchw)); - } else if (grad_input_tensor_t.shape().size() == 3) { - std::vector perm_hwc2chw{2, 0, 1}; - diopiSize_t hwc2chw(perm_hwc2chw.data(), 3); - DIOPI_CALL(diopiPermute(ctx, static_cast(grad_input_tensor_tmp), grad_input_t, hwc2chw)); + gradInputDesc.get(), + gradInputTensorT.data())); + + if (gradInputTensorT.shape().size() == 4) { + std::vector permNhwc2nchw{0, 3, 1, 2}; + diopiSize_t nhwc2nchw(permNhwc2nchw.data(), 4); + DIOPI_CALL(diopiPermute(ctx, static_cast(gradInputTensorTmp), gradInputT, nhwc2nchw)); + } else if (gradInputTensorT.shape().size() == 3) { + std::vector permHwc2chw{2, 0, 1}; + diopiSize_t hwc2chw(permHwc2chw.data(), 3); + DIOPI_CALL(diopiPermute(ctx, static_cast(gradInputTensorTmp), gradInputT, hwc2chw)); } else { DIOPI_CHECK(false, "non-empty 3D or 4D (batch mode) tensor expected for input"); } - if (divisor_override != nullptr) { - diopiScalar_t mul_value; - mul_value.stype = diopi_dtype_float64; - mul_value.fval = static_cast(kernel_h * kernel_w) / (*divisor_override); - DIOPI_CALL(diopiMulInpScalar(ctx, static_cast(grad_input_tensor_tmp), (const diopiScalar_t*)&mul_value)); + if (divisorOverride != nullptr) { + diopiScalar_t mulValue; + mulValue.stype = diopi_dtype_float64; + mulValue.fval = static_cast(kernelH * kernelW) / (*divisorOverride); + DIOPI_CALL(diopiMulInpScalar(ctx, static_cast(gradInputTensorTmp), (const diopiScalar_t*)&mulValue)); } - dataTypeCast(ctx, grad_input_tensor, grad_input_tensor_tmp); + dataTypeCast(ctx, gradInputTensor, gradInputTensorTmp); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/batch_norm.cpp b/DIOPI-IMPL/camb/functions/batch_norm.cpp index 1d74600a0..6a280e4b6 100644 --- a/DIOPI-IMPL/camb/functions/batch_norm.cpp +++ b/DIOPI-IMPL/camb/functions/batch_norm.cpp @@ -13,271 +13,271 @@ namespace camb { extern "C" { -diopiError_t diopiBatchNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t save_mean, diopiTensorHandle_t save_invstd, - diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiTensorHandle_t running_mean, - diopiTensorHandle_t running_var, bool training, double momentum, double eps) { +diopiError_t diopiBatchNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t saveMean, diopiTensorHandle_t saveInvstd, + diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiTensorHandle_t runningMean, + diopiTensorHandle_t runningVar, bool training, double momentum, double eps) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor save_mean_tr(save_mean); - DiopiTensor save_invstd_tr(save_invstd); - DiopiTensor input_tr(input); - DiopiTensor weight_tr(weight); - DiopiTensor bias_tr(bias); - DiopiTensor running_mean_tr(running_mean); - DiopiTensor running_var_tr(running_var); - DiopiTensor output_tr(out); + DiopiTensor saveMeanTr(saveMean); + DiopiTensor saveInvstdTr(saveInvstd); + DiopiTensor inputTr(input); + DiopiTensor weightTr(weight); + DiopiTensor biasTr(bias); + DiopiTensor runningMeanTr(runningMean); + DiopiTensor runningVarTr(runningVar); + DiopiTensor outputTr(out); /* Some basic check */ - if (running_mean_tr.defined() && running_var_tr.defined()) { - DIOPI_CHECK(running_mean_tr.dtype() == running_var_tr.dtype(), "running_mean and running_var need to have the same data types"); + if (runningMeanTr.defined() && runningVarTr.defined()) { + DIOPI_CHECK(runningMeanTr.dtype() == runningVarTr.dtype(), "running_mean and running_var need to have the same data types"); } - auto dim = input_tr.dim(); + auto dim = inputTr.dim(); DIOPI_CHECK(dim >= 2 && dim <= 5, "Input dim is out of range"); - DIOPI_CHECK(dim == output_tr.dim(), "Input dim != out dim"); + DIOPI_CHECK(dim == outputTr.dim(), "Input dim != out dim"); if (3 == dim) { - input_tr.unsqueeze(3); - output_tr.reshape(input_tr.shape()); + inputTr.unsqueeze(3); + outputTr.reshape(inputTr.shape()); } if (2 == dim) { - input_tr.unsqueeze(2); - input_tr.unsqueeze(3); - output_tr.reshape(input_tr.shape()); + inputTr.unsqueeze(2); + inputTr.unsqueeze(3); + outputTr.reshape(inputTr.shape()); } - std::vector p_tensors{&input_tr, &weight_tr, &bias_tr}; - if (running_mean_tr.defined()) { - p_tensors.push_back(&running_mean_tr); + std::vector pTensors{&inputTr, &weightTr, &biasTr}; + if (runningMeanTr.defined()) { + pTensors.push_back(&runningMeanTr); } - if (running_var_tr.defined()) { - p_tensors.push_back(&running_var_tr); + if (runningVarTr.defined()) { + pTensors.push_back(&runningVarTr); } - std::set supported_dtypes{diopi_dtype_float16, diopi_dtype_float32}; - DIOPI_CALL(autoCastTensorType(ctx, p_tensors, supported_dtypes)); + std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; + DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); // Note: 1. output.dtype = input.dtype 2. channelsLast format - MemoryFormat memory_format = input_tr.dim() == 4 ? MemoryFormat::ChannelsLast : MemoryFormat::ChannelsLast3d; - DiopiTensor output_tmp_tr = requiresTensor(ctx, output_tr.shape(), input_tr.dtype(), memory_format); + MemoryFormat memoryFormat = inputTr.dim() == 4 ? MemoryFormat::ChannelsLast : MemoryFormat::ChannelsLast3d; + DiopiTensor outputTmpTr = requiresTensor(ctx, outputTr.shape(), inputTr.dtype(), memoryFormat); /* Transpose to channels last */ - DIOPI_CALL(contiguous_(ctx, input_tr, memory_format)); + DIOPI_CALL(contiguous(ctx, inputTr, memoryFormat)); - CnnlTensorDesc weight_bias_mean_var_desc(weight_tr, CNNL_LAYOUT_ARRAY); - cnnlTensorLayout_t layout = input_tr.dim() == 4 ? CNNL_LAYOUT_NHWC : CNNL_LAYOUT_NDHWC; - CnnlTensorDesc input_desc(input_tr, layout); - CnnlTensorDesc output_desc(output_tmp_tr, layout); + CnnlTensorDesc weightBiasMeanVarDesc(weightTr, CNNL_LAYOUT_ARRAY); + cnnlTensorLayout_t layout = inputTr.dim() == 4 ? CNNL_LAYOUT_NHWC : CNNL_LAYOUT_NDHWC; + CnnlTensorDesc inputDesc(inputTr, layout); + CnnlTensorDesc outputDesc(outputTmpTr, layout); if (training) { - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetBatchNormForwardWorkspaceSize(handle, input_desc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetBatchNormForwardWorkspaceSize(handle, inputDesc.get(), &workspaceSize)); - void* workspace_ptr = workspace_size == 0 ? nullptr : requiresBuffer(ctx, workspace_size).data(); + void* workspacePtr = workspaceSize == 0 ? nullptr : requiresBuffer(ctx, workspaceSize).data(); // set activition part to default - cnnlActivationMode_t active_mode = CNNL_ACTIVATION_IDENTITY; - cnnlActivationDescriptor_t activation_desc = nullptr; - DIOPI_CALLCNNL(cnnlCreateActivationDescriptor(&activation_desc)); - cnnlSetActivationDescriptor_v5(activation_desc, active_mode, CNNL_ACTIVATION_HIGH_PRECISION, CNNL_NOT_PROPAGATE_NAN, 1.0, -1, 1.0, 1.0, false); + cnnlActivationMode_t activeMode = CNNL_ACTIVATION_IDENTITY; + cnnlActivationDescriptor_t activationDesc = nullptr; + DIOPI_CALLCNNL(cnnlCreateActivationDescriptor(&activationDesc)); + cnnlSetActivationDescriptor_v5(activationDesc, activeMode, CNNL_ACTIVATION_HIGH_PRECISION, CNNL_NOT_PROPAGATE_NAN, 1.0, -1, 1.0, 1.0, false); DIOPI_CALLCNNL(cnnlBatchNormForwardTraining_v2(handle, - activation_desc, + activationDesc, CNNL_BATCHNORM_SPATIAL, CNNL_BATCHNORM_OPS_BN, nullptr, nullptr, - input_desc.get(), - input_tr.data(), - NULL, - NULL, - weight_bias_mean_var_desc.get(), - weight_tr.data(), - bias_tr.data(), - running_mean_tr.defined() ? running_mean_tr.data() : nullptr, - running_var_tr.defined() ? running_var_tr.data() : nullptr, + inputDesc.get(), + inputTr.data(), + nullptr, + nullptr, + weightBiasMeanVarDesc.get(), + weightTr.data(), + biasTr.data(), + runningMeanTr.defined() ? runningMeanTr.data() : nullptr, + runningVarTr.defined() ? runningVarTr.data() : nullptr, static_cast(eps), static_cast(momentum), - output_desc.get(), - output_tmp_tr.data(), - save_mean_tr.data(), - save_invstd_tr.data(), - workspace_ptr, - workspace_size, - NULL, + outputDesc.get(), + outputTmpTr.data(), + saveMeanTr.data(), + saveInvstdTr.data(), + workspacePtr, + workspaceSize, + nullptr, 0)); } else { DIOPI_CALLCNNL(cnnlBatchNormForwardInference(handle, nullptr, nullptr, - input_desc.get(), - input_tr.data(), - weight_bias_mean_var_desc.get(), - weight_tr.data(), - bias_tr.data(), - running_mean_tr.defined() ? running_mean_tr.data() : nullptr, - running_var_tr.defined() ? running_var_tr.data() : nullptr, + inputDesc.get(), + inputTr.data(), + weightBiasMeanVarDesc.get(), + weightTr.data(), + biasTr.data(), + runningMeanTr.defined() ? runningMeanTr.data() : nullptr, + runningVarTr.defined() ? runningVarTr.data() : nullptr, static_cast(eps), - output_desc.get(), - output_tmp_tr.data())); + outputDesc.get(), + outputTmpTr.data())); } // channels last -> contiguous - DIOPI_CALL(contiguous_(ctx, output_tmp_tr, MemoryFormat::Contiguous)); + DIOPI_CALL(contiguous(ctx, outputTmpTr, MemoryFormat::Contiguous)); // Copy back to origin - DIOPI_CALL(diopiCopyInp(ctx, output_tmp_tr.tensorHandle(), output_tr.tensorHandle())); + DIOPI_CALL(diopiCopyInp(ctx, outputTmpTr.tensorHandle(), outputTr.tensorHandle())); return diopiSuccess; } -diopiError_t diopiBatchNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias, - diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, - diopiConstTensorHandle_t running_mean, diopiConstTensorHandle_t running_var, diopiConstTensorHandle_t save_mean, - diopiConstTensorHandle_t save_invstd, bool training, double eps) { +diopiError_t diopiBatchNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiTensorHandle_t gradWeight, diopiTensorHandle_t gradBias, + diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, + diopiConstTensorHandle_t runningMean, diopiConstTensorHandle_t runningVar, diopiConstTensorHandle_t saveMean, + diopiConstTensorHandle_t saveInvstd, bool training, double eps) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor grad_input_tr(grad_input); - DiopiTensor grad_weight_tr(grad_weight); - DiopiTensor grad_bias_tr(grad_bias); - DiopiTensor input_tr(input); - DiopiTensor weight_tr(weight); - DiopiTensor running_mean_tr(running_mean); - DiopiTensor running_var_tr(running_var); - DiopiTensor save_mean_tr(save_mean); - DiopiTensor save_invstd_tr(save_invstd); - - DiopiTensor grad_output_tr(grad_output); - - if (running_mean_tr.defined() && running_var_tr.defined()) { - DIOPI_CHECK(running_mean_tr.dtype() == running_var_tr.dtype(), "running_mean and running_var need to have the same data types"); + DiopiTensor gradInputTr(gradInput); + DiopiTensor gradWeightTr(gradWeight); + DiopiTensor gradBiasTr(gradBias); + DiopiTensor inputTr(input); + DiopiTensor weightTr(weight); + DiopiTensor runningMeanTr(runningMean); + DiopiTensor runningVarTr(runningVar); + DiopiTensor saveMeanTr(saveMean); + DiopiTensor saveInvstdTr(saveInvstd); + + DiopiTensor gradOutputTr(gradOutput); + + if (runningMeanTr.defined() && runningVarTr.defined()) { + DIOPI_CHECK(runningMeanTr.dtype() == runningVarTr.dtype(), "running_mean and running_var need to have the same data types"); } - auto dim = input_tr.dim(); + auto dim = inputTr.dim(); DIOPI_CHECK(dim >= 2 && dim <= 5, "Input dim is out of range"); if (3 == dim) { - input_tr.unsqueeze(3); - grad_output_tr.unsqueeze(3); - grad_input_tr.reshape(input_tr.shape()); + inputTr.unsqueeze(3); + gradOutputTr.unsqueeze(3); + gradInputTr.reshape(inputTr.shape()); } if (2 == dim) { - input_tr.unsqueeze(2); - input_tr.unsqueeze(3); - grad_output_tr.unsqueeze(2); - grad_output_tr.unsqueeze(3); - grad_input_tr.reshape(input_tr.shape()); + inputTr.unsqueeze(2); + inputTr.unsqueeze(3); + gradOutputTr.unsqueeze(2); + gradOutputTr.unsqueeze(3); + gradInputTr.reshape(inputTr.shape()); } - std::vector p_tensors{&grad_output_tr, &input_tr, &weight_tr}; - if (running_mean_tr.defined()) { - p_tensors.push_back(&running_mean_tr); + std::vector pTensors{&gradOutputTr, &inputTr, &weightTr}; + if (runningMeanTr.defined()) { + pTensors.push_back(&runningMeanTr); } - if (running_var_tr.defined()) { - p_tensors.push_back(&running_var_tr); + if (runningVarTr.defined()) { + pTensors.push_back(&runningVarTr); } - std::set supported_dtypes{diopi_dtype_float16, diopi_dtype_float32}; - DIOPI_CALL(autoCastTensorType(ctx, p_tensors, supported_dtypes)); + std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; + DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor grad_weight_tmp_tr = grad_weight_tr; - if (grad_weight_tr.dtype() != grad_output_tr.dtype()) { - grad_weight_tmp_tr = requiresTensor(ctx, grad_weight_tr.shape(), grad_output_tr.dtype()); + DiopiTensor gradWeightTmpTr = gradWeightTr; + if (gradWeightTr.dtype() != gradOutputTr.dtype()) { + gradWeightTmpTr = requiresTensor(ctx, gradWeightTr.shape(), gradOutputTr.dtype()); } - DiopiTensor grad_bias_tmp_tr = grad_bias_tr; - if (grad_bias_tr.dtype() != grad_output_tr.dtype()) { - grad_bias_tmp_tr = requiresTensor(ctx, grad_bias_tr.shape(), grad_output_tr.dtype()); + DiopiTensor gradBiasTmpTr = gradBiasTr; + if (gradBiasTr.dtype() != gradOutputTr.dtype()) { + gradBiasTmpTr = requiresTensor(ctx, gradBiasTr.shape(), gradOutputTr.dtype()); } /* Transpose */ - MemoryFormat memory_format = input_tr.dim() == 4 ? MemoryFormat::ChannelsLast : MemoryFormat::ChannelsLast3d; - DIOPI_CALL(contiguous_(ctx, input_tr, memory_format)); - DIOPI_CALL(contiguous_(ctx, grad_output_tr, memory_format)); + MemoryFormat memoryFormat = inputTr.dim() == 4 ? MemoryFormat::ChannelsLast : MemoryFormat::ChannelsLast3d; + DIOPI_CALL(contiguous(ctx, inputTr, memoryFormat)); + DIOPI_CALL(contiguous(ctx, gradOutputTr, memoryFormat)); // Note: 1. output.dtype = input.dtype 2. channelsLast format - DiopiTensor grad_input_tmp_tr = requiresTensor(ctx, grad_input_tr.shape(), grad_output_tr.dtype(), memory_format); + DiopiTensor gradInputTmpTr = requiresTensor(ctx, gradInputTr.shape(), gradOutputTr.dtype(), memoryFormat); - cnnlTensorLayout_t layout = input_tr.dim() == 4 ? CNNL_LAYOUT_NHWC : CNNL_LAYOUT_NDHWC; - CnnlTensorDesc input_desc(input_tr, layout); - CnnlTensorDesc grad_output_desc(grad_output_tr, layout); - CnnlTensorDesc grad_input_desc(grad_input_tmp_tr, layout); - CnnlTensorDesc weight_bias_mean_var_desc(weight_tr, CNNL_LAYOUT_ARRAY); + cnnlTensorLayout_t layout = inputTr.dim() == 4 ? CNNL_LAYOUT_NHWC : CNNL_LAYOUT_NDHWC; + CnnlTensorDesc inputDesc(inputTr, layout); + CnnlTensorDesc gradOutputDesc(gradOutputTr, layout); + CnnlTensorDesc gradInputDesc(gradInputTmpTr, layout); + CnnlTensorDesc weightBiasMeanVarDesc(weightTr, CNNL_LAYOUT_ARRAY); // set activition part cnnlBatchNormMode_t mode = CNNL_BATCHNORM_SPATIAL; cnnlBatchNormOps_t bnOps = CNNL_BATCHNORM_OPS_BN; - cnnlActivationMode_t active_mode = CNNL_ACTIVATION_IDENTITY; + cnnlActivationMode_t activeMode = CNNL_ACTIVATION_IDENTITY; - cnnlActivationDescriptor_t activation_desc = nullptr; - DIOPI_CALLCNNL(cnnlCreateActivationDescriptor(&activation_desc)); - cnnlSetActivationDescriptor_v5(activation_desc, active_mode, CNNL_ACTIVATION_HIGH_PRECISION, CNNL_NOT_PROPAGATE_NAN, 1.0, -1, 1.0, 1.0, false); + cnnlActivationDescriptor_t activationDesc = nullptr; + DIOPI_CALLCNNL(cnnlCreateActivationDescriptor(&activationDesc)); + cnnlSetActivationDescriptor_v5(activationDesc, activeMode, CNNL_ACTIVATION_HIGH_PRECISION, CNNL_NOT_PROPAGATE_NAN, 1.0, -1, 1.0, 1.0, false); if (training) { // get workspace - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetBatchNormBackwardWorkspaceSize(handle, input_desc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetBatchNormBackwardWorkspaceSize(handle, inputDesc.get(), &workspaceSize)); - void* workspace_ptr = workspace_size == 0 ? nullptr : requiresBuffer(ctx, workspace_size).data(); + void* workspacePtr = workspaceSize == 0 ? nullptr : requiresBuffer(ctx, workspaceSize).data(); DIOPI_CALLCNNL(cnnlBatchNormBackward_v2(handle, - activation_desc, + activationDesc, mode, bnOps, nullptr, nullptr, nullptr, nullptr, - input_desc.get(), - input_tr.data(), - NULL, - NULL, - grad_output_desc.get(), - grad_output_tr.data(), - weight_bias_mean_var_desc.get(), - weight_tr.data(), - NULL, - save_mean_tr.defined() ? save_mean_tr.data() : nullptr, - save_invstd_tr.defined() ? save_invstd_tr.data() : nullptr, + inputDesc.get(), + inputTr.data(), + nullptr, + nullptr, + gradOutputDesc.get(), + gradOutputTr.data(), + weightBiasMeanVarDesc.get(), + weightTr.data(), + nullptr, + saveMeanTr.defined() ? saveMeanTr.data() : nullptr, + saveInvstdTr.defined() ? saveInvstdTr.data() : nullptr, static_cast(eps), - NULL, - NULL, - grad_input_desc.get(), - grad_input_tmp_tr.data(), - grad_weight_tmp_tr.data(), - grad_bias_tmp_tr.data(), - workspace_ptr, - workspace_size, - NULL, + nullptr, + nullptr, + gradInputDesc.get(), + gradInputTmpTr.data(), + gradWeightTmpTr.data(), + gradBiasTmpTr.data(), + workspacePtr, + workspaceSize, + nullptr, 0)); } else { - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetFrozenBatchNormBackwardWorkspaceSize(handle, input_desc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetFrozenBatchNormBackwardWorkspaceSize(handle, inputDesc.get(), &workspaceSize)); - void* workspace_ptr = workspace_size == 0 ? nullptr : requiresBuffer(ctx, workspace_size).data(); + void* workspacePtr = workspaceSize == 0 ? nullptr : requiresBuffer(ctx, workspaceSize).data(); DIOPI_CALLCNNL(cnnlFrozenBatchNormBackward_v2(handle, - activation_desc, + activationDesc, mode, bnOps, - input_desc.get(), - input_tr.data(), - NULL, - NULL, - grad_output_desc.get(), - grad_output_tr.data(), - weight_bias_mean_var_desc.get(), - weight_tr.data(), - NULL, - running_mean_tr.defined() ? running_mean_tr.data() : nullptr, - running_var_tr.defined() ? running_var_tr.data() : nullptr, + inputDesc.get(), + inputTr.data(), + nullptr, + nullptr, + gradOutputDesc.get(), + gradOutputTr.data(), + weightBiasMeanVarDesc.get(), + weightTr.data(), + nullptr, + runningMeanTr.defined() ? runningMeanTr.data() : nullptr, + runningVarTr.defined() ? runningVarTr.data() : nullptr, static_cast(eps), - workspace_ptr, - workspace_size, - NULL, - NULL, - grad_input_desc.get(), - grad_input_tmp_tr.data(), - grad_weight_tmp_tr.data(), - grad_bias_tmp_tr.data())); + workspacePtr, + workspaceSize, + nullptr, + nullptr, + gradInputDesc.get(), + gradInputTmpTr.data(), + gradWeightTmpTr.data(), + gradBiasTmpTr.data())); } // Channels last -> contiguous - DIOPI_CALL(contiguous_(ctx, grad_input_tmp_tr, MemoryFormat::Contiguous)); - DIOPI_CALL(diopiCopyInp(ctx, grad_input_tmp_tr.tensorHandle(), grad_input_tr.tensorHandle())); - DIOPI_CALL(diopiCopyInp(ctx, grad_weight_tmp_tr.tensorHandle(), grad_weight_tr.tensorHandle())); - DIOPI_CALL(diopiCopyInp(ctx, grad_bias_tmp_tr.tensorHandle(), grad_bias_tr.tensorHandle())); + DIOPI_CALL(contiguous(ctx, gradInputTmpTr, MemoryFormat::Contiguous)); + DIOPI_CALL(diopiCopyInp(ctx, gradInputTmpTr.tensorHandle(), gradInputTr.tensorHandle())); + DIOPI_CALL(diopiCopyInp(ctx, gradWeightTmpTr.tensorHandle(), gradWeightTr.tensorHandle())); + DIOPI_CALL(diopiCopyInp(ctx, gradBiasTmpTr.tensorHandle(), gradBiasTr.tensorHandle())); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/binary_op.cpp b/DIOPI-IMPL/camb/functions/binary_op.cpp index 560c573ab..b094bf56c 100644 --- a/DIOPI-IMPL/camb/functions/binary_op.cpp +++ b/DIOPI-IMPL/camb/functions/binary_op.cpp @@ -43,7 +43,7 @@ diopiAdd(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHand trOutTmp = trOut; descOut.set(trOut, layout); } else { - trOutTmp = requiresTensor(ctx, vec2diopiSize_t(trOut.shape()), trInput.dtype()); + trOutTmp = requiresTensor(ctx, vec2diopiSizeT(trOut.shape()), trInput.dtype()); descOut.set(trOutTmp, CNNL_LAYOUT_ARRAY); } diff --git a/DIOPI-IMPL/camb/functions/bitwise.cpp b/DIOPI-IMPL/camb/functions/bitwise.cpp index 0bd10007e..97a827bf3 100644 --- a/DIOPI-IMPL/camb/functions/bitwise.cpp +++ b/DIOPI-IMPL/camb/functions/bitwise.cpp @@ -16,45 +16,45 @@ diopiError_t bitwiseCommon( diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other, cnnlBitComputeOp_t optype) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor out_tensor(out); - auto out32_tensor = out_tensor; - if (diopi_dtype_int64 == out_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out32_tensor, diopi_dtype_int32)); + DiopiTensor outTensor(out); + auto out32Tensor = outTensor; + if (diopi_dtype_int64 == outTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, out32Tensor, diopi_dtype_int32)); } - CnnlTensorDesc outDesc(out32_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(out32Tensor, CNNL_LAYOUT_ARRAY); diopiTensorHandle_t input1 = const_cast(input); - DiopiTensor input1_tensor(input1); - if (input1_tensor.dtype() != out32_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, input1_tensor, out32_tensor.dtype())); + DiopiTensor input1Tensor(input1); + if (input1Tensor.dtype() != out32Tensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, input1Tensor, out32Tensor.dtype())); } - CnnlTensorDesc input1Desc(input1_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc input1Desc(input1Tensor, CNNL_LAYOUT_ARRAY); diopiTensorHandle_t input2 = const_cast(other); - const void* input2_ptr = nullptr; + const void* input2Ptr = nullptr; CnnlTensorDesc input2Desc; - cnnlTensorDescriptor_t input2_desc = nullptr; + cnnlTensorDescriptor_t input2DescTmp = nullptr; if (nullptr != other) { - DiopiTensor input2_tensor(input2); - if (input2_tensor.dtype() != out32_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, input2_tensor, out32_tensor.dtype())); + DiopiTensor input2Tensor(input2); + if (input2Tensor.dtype() != out32Tensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, input2Tensor, out32Tensor.dtype())); } - input2_ptr = input2_tensor.data(); - input2Desc.set(input2_tensor, CNNL_LAYOUT_ARRAY); - input2_desc = input2Desc.get(); + input2Ptr = input2Tensor.data(); + input2Desc.set(input2Tensor, CNNL_LAYOUT_ARRAY); + input2DescTmp = input2Desc.get(); } - size_t workspace_size(0); - DIOPI_CALLCNNL(cnnlGetBitComputeWorkspaceSize(handle, input1Desc.get(), input2_desc, outDesc.get(), &workspace_size)); + size_t workspaceSize(0); + DIOPI_CALLCNNL(cnnlGetBitComputeWorkspaceSize(handle, input1Desc.get(), input2DescTmp, outDesc.get(), &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } DIOPI_CALLCNNL(cnnlBitCompute_v2( - handle, optype, input1Desc.get(), input1_tensor.data(), input2_desc, input2_ptr, outDesc.get(), out32_tensor.data(), workspace, workspace_size)); - if (out_tensor.dtype() != out32_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out32_tensor)); + handle, optype, input1Desc.get(), input1Tensor.data(), input2DescTmp, input2Ptr, outDesc.get(), out32Tensor.data(), workspace, workspaceSize)); + if (outTensor.dtype() != out32Tensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensor, out32Tensor)); } return diopiSuccess; diff --git a/DIOPI-IMPL/camb/functions/cast_dtype.cpp b/DIOPI-IMPL/camb/functions/cast_dtype.cpp index 5865ed488..8831e712c 100644 --- a/DIOPI-IMPL/camb/functions/cast_dtype.cpp +++ b/DIOPI-IMPL/camb/functions/cast_dtype.cpp @@ -15,10 +15,10 @@ namespace camb { extern "C" { diopiError_t diopiCastDtype(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { - DiopiTensor input_tr(input); - DiopiTensor output_tr(out); + DiopiTensor inputTr(input); + DiopiTensor outputTr(out); - DIOPI_CALL(dataTypeCast(ctx, output_tr, input_tr)); + DIOPI_CALL(dataTypeCast(ctx, outputTr, inputTr)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/cat.cpp b/DIOPI-IMPL/camb/functions/cat.cpp index ad74a1743..a154049a7 100644 --- a/DIOPI-IMPL/camb/functions/cat.cpp +++ b/DIOPI-IMPL/camb/functions/cat.cpp @@ -13,29 +13,29 @@ namespace camb { extern "C" { -diopiError_t diopiCat(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t* tensors, int64_t num_inputs, int64_t dim) { +diopiError_t diopiCat(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t* tensors, int64_t numInputs, int64_t dim) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - std::vector inputsDesc(num_inputs); - std::vector inputs_desc(num_inputs); - std::vector inputs(num_inputs); - for (int i = 0; i < num_inputs; i++) { - DiopiTensor temp_tensor(tensors[i]); - inputsDesc[i].set(temp_tensor, CNNL_LAYOUT_ARRAY); - inputs_desc[i] = inputsDesc[i].get(); - inputs[i] = temp_tensor.data(); + std::vector inputsDesc(numInputs); + std::vector inputsDescTmp(numInputs); + std::vector inputs(numInputs); + for (int i = 0; i < numInputs; i++) { + DiopiTensor tempTensor(tensors[i]); + inputsDesc[i].set(tempTensor, CNNL_LAYOUT_ARRAY); + inputsDescTmp[i] = inputsDesc[i].get(); + inputs[i] = tempTensor.data(); } - size_t workspace_size(0); - DIOPI_CALLCNNL(cnnlGetConcatWorkspaceSize(handle, num_inputs, &workspace_size)); + size_t workspaceSize(0); + DIOPI_CALLCNNL(cnnlGetConcatWorkspaceSize(handle, numInputs, &workspaceSize)); void * workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } - DiopiTensor out_tensor(out); - CnnlTensorDesc out_desc(out_tensor, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlConcat(handle, num_inputs, dim, inputs_desc.data(), inputs.data(), workspace, workspace_size, out_desc.get(), out_tensor.data())); + DiopiTensor outTensor(out); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); + DIOPI_CALLCNNL(cnnlConcat(handle, numInputs, dim, inputsDescTmp.data(), inputs.data(), workspace, workspaceSize, outDesc.get(), outTensor.data())); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/clamp.cpp b/DIOPI-IMPL/camb/functions/clamp.cpp index 602596127..17ec7c71b 100644 --- a/DIOPI-IMPL/camb/functions/clamp.cpp +++ b/DIOPI-IMPL/camb/functions/clamp.cpp @@ -6,14 +6,14 @@ namespace impl { namespace camb { extern "C" { -diopiError_t getClampBoundPtr(diopiContextHandle_t ctx, diopiConstTensorHandle_t bound, diopiDtype_t desire_dtype, void** out) { +diopiError_t getClampBoundPtr(diopiContextHandle_t ctx, diopiConstTensorHandle_t bound, diopiDtype_t desireDtype, void** out) { if (nullptr != bound) { - DiopiTensor bound_tensor(bound); - DIOPI_CHECK(bound_tensor.numel() == 1, "only supported when min and max are scalar or one element Tensor currently"); - if (desire_dtype != bound_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, bound_tensor, desire_dtype)); + DiopiTensor boundTensor(bound); + DIOPI_CHECK(boundTensor.numel() == 1, "only supported when min and max are scalar or one element Tensor currently"); + if (desireDtype != boundTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, boundTensor, desireDtype)); } - *out = bound_tensor.data(); + *out = boundTensor.data(); return diopiSuccess; } *out = nullptr; @@ -24,47 +24,47 @@ diopiError_t clampCommon(diopiContextHandle_t ctx, diopiConstTensorHandle_t inpu diopiConstTensorHandle_t max) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); - DIOPI_CHECK(input_tensor.dtype() == output_tensor.dtype(), "the dtype of input and output must be the same") - - DiopiTensor output32_tensor = output_tensor; - if (DiopiDataType::isInteger(input_tensor.dtype())) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_int32)); - DIOPI_CALL(dataTypeCast(ctx, output32_tensor, diopi_dtype_int32)); - } else if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, output32_tensor, diopi_dtype_float32)); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); + DIOPI_CHECK(inputTensor.dtype() == outputTensor.dtype(), "the dtype of input and output must be the same") + + DiopiTensor output32Tensor = outputTensor; + if (DiopiDataType::isInteger(inputTensor.dtype())) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_int32)); + DIOPI_CALL(dataTypeCast(ctx, output32Tensor, diopi_dtype_int32)); + } else if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, output32Tensor, diopi_dtype_float32)); } - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc output32Desc(output32_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc output32Desc(output32Tensor, CNNL_LAYOUT_ARRAY); - void* min_ptr = nullptr; - void* max_ptr = nullptr; - DIOPI_CALL(getClampBoundPtr(ctx, min, input_tensor.dtype(), &min_ptr)); - DIOPI_CALL(getClampBoundPtr(ctx, max, input_tensor.dtype(), &max_ptr)); + void* minPtr = nullptr; + void* maxPtr = nullptr; + DIOPI_CALL(getClampBoundPtr(ctx, min, inputTensor.dtype(), &minPtr)); + DIOPI_CALL(getClampBoundPtr(ctx, max, inputTensor.dtype(), &maxPtr)); DIOPI_CALLCNNL( - cnnlClip_v2(handle, CNNL_POINTER_MODE_DEVICE, inputDesc.get(), input_tensor.data(), min_ptr, max_ptr, output32Desc.get(), output32_tensor.data())); - if (output_tensor.dtype() != output32_tensor.dtype()) { - if (output_tensor.dtype() != diopi_dtype_uint8) { - DIOPI_CALL(dataTypeCast(ctx, output_tensor, output32_tensor)); + cnnlClip_v2(handle, CNNL_POINTER_MODE_DEVICE, inputDesc.get(), inputTensor.data(), minPtr, maxPtr, output32Desc.get(), output32Tensor.data())); + if (outputTensor.dtype() != output32Tensor.dtype()) { + if (outputTensor.dtype() != diopi_dtype_uint8) { + DIOPI_CALL(dataTypeCast(ctx, outputTensor, output32Tensor)); } else { - DIOPI_CALL(dataTypeCast(ctx, output32_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, output_tensor, output32_tensor)); + DIOPI_CALL(dataTypeCast(ctx, output32Tensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, outputTensor, output32Tensor)); } } return diopiSuccess; } diopiError_t diopiClampInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* min, const diopiScalar_t* max) { - DiopiTensor min_tensor_tmp; - DiopiTensor max_tensor_tmp; - makeTensorFromScalar(ctx, min, min_tensor_tmp); - makeTensorFromScalar(ctx, max, max_tensor_tmp); - diopiTensorHandle_t min_tensor = min_tensor_tmp.tensorHandle(); - diopiTensorHandle_t max_tensor = max_tensor_tmp.tensorHandle(); - return clampCommon(ctx, input, input, min_tensor, max_tensor); + DiopiTensor minTensorTmp; + DiopiTensor maxTensorTmp; + makeTensorFromScalar(ctx, min, minTensorTmp); + makeTensorFromScalar(ctx, max, maxTensorTmp); + diopiTensorHandle_t minTensor = minTensorTmp.tensorHandle(); + diopiTensorHandle_t maxTensor = maxTensorTmp.tensorHandle(); + return clampCommon(ctx, input, input, minTensor, maxTensor); } diopiError_t diopiClampInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t min, diopiConstTensorHandle_t max) { @@ -73,13 +73,13 @@ diopiError_t diopiClampInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiError_t diopiClampScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* min, const diopiScalar_t* max) { - DiopiTensor min_tensor_tmp; - DiopiTensor max_tensor_tmp; - makeTensorFromScalar(ctx, min, min_tensor_tmp); - makeTensorFromScalar(ctx, max, max_tensor_tmp); - diopiTensorHandle_t min_tensor = min_tensor_tmp.tensorHandle(); - diopiTensorHandle_t max_tensor = max_tensor_tmp.tensorHandle(); - return clampCommon(ctx, input, out, min_tensor, max_tensor); + DiopiTensor minTensorTmp; + DiopiTensor maxTensorTmp; + makeTensorFromScalar(ctx, min, minTensorTmp); + makeTensorFromScalar(ctx, max, maxTensorTmp); + diopiTensorHandle_t minTensor = minTensorTmp.tensorHandle(); + diopiTensorHandle_t maxTensor = maxTensorTmp.tensorHandle(); + return clampCommon(ctx, input, out, minTensor, maxTensor); } diopiError_t diopiClamp(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t min, @@ -88,10 +88,10 @@ diopiError_t diopiClamp(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopi } diopiError_t diopiClampMaxInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* max) { - DiopiTensor max_tensor_tmp; - makeTensorFromScalar(ctx, max, max_tensor_tmp); - diopiTensorHandle_t max_tensor = max_tensor_tmp.tensorHandle(); - return clampCommon(ctx, input, input, nullptr, max_tensor); + DiopiTensor maxTensorTmp; + makeTensorFromScalar(ctx, max, maxTensorTmp); + diopiTensorHandle_t maxTensor = maxTensorTmp.tensorHandle(); + return clampCommon(ctx, input, input, nullptr, maxTensor); } diopiError_t diopiClampMaxInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t max) { @@ -99,10 +99,10 @@ diopiError_t diopiClampMaxInp(diopiContextHandle_t ctx, diopiTensorHandle_t inpu } diopiError_t diopiClampMaxScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* max) { - DiopiTensor max_tensor_tmp; - makeTensorFromScalar(ctx, max, max_tensor_tmp); - diopiTensorHandle_t max_tensor = max_tensor_tmp.tensorHandle(); - return clampCommon(ctx, input, out, nullptr, max_tensor); + DiopiTensor maxTensorTmp; + makeTensorFromScalar(ctx, max, maxTensorTmp); + diopiTensorHandle_t maxTensor = maxTensorTmp.tensorHandle(); + return clampCommon(ctx, input, out, nullptr, maxTensor); } diopiError_t diopiClampMax(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t max) { @@ -110,10 +110,10 @@ diopiError_t diopiClampMax(diopiContextHandle_t ctx, diopiTensorHandle_t out, di } diopiError_t diopiClampMinInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* min) { - DiopiTensor min_tensor_tmp; - makeTensorFromScalar(ctx, min, min_tensor_tmp); - diopiTensorHandle_t min_tensor = min_tensor_tmp.tensorHandle(); - return clampCommon(ctx, input, input, min_tensor, nullptr); + DiopiTensor minTensorTmp; + makeTensorFromScalar(ctx, min, minTensorTmp); + diopiTensorHandle_t minTensor = minTensorTmp.tensorHandle(); + return clampCommon(ctx, input, input, minTensor, nullptr); } diopiError_t diopiClampMinInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t min) { @@ -121,10 +121,10 @@ diopiError_t diopiClampMinInp(diopiContextHandle_t ctx, diopiTensorHandle_t inpu } diopiError_t diopiClampMinScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* min) { - DiopiTensor min_tensor_tmp; - makeTensorFromScalar(ctx, min, min_tensor_tmp); - diopiTensorHandle_t min_tensor = min_tensor_tmp.tensorHandle(); - return clampCommon(ctx, input, out, min_tensor, nullptr); + DiopiTensor minTensorTmp; + makeTensorFromScalar(ctx, min, minTensorTmp); + diopiTensorHandle_t minTensor = minTensorTmp.tensorHandle(); + return clampCommon(ctx, input, out, minTensor, nullptr); } diopiError_t diopiClampMin(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t min) { diff --git a/DIOPI-IMPL/camb/functions/conv_2d.cpp b/DIOPI-IMPL/camb/functions/conv_2d.cpp index 9266787ae..9bb927256 100644 --- a/DIOPI-IMPL/camb/functions/conv_2d.cpp +++ b/DIOPI-IMPL/camb/functions/conv_2d.cpp @@ -15,32 +15,32 @@ namespace impl { namespace camb { namespace { -diopiError_t tensorPermute(diopiContextHandle_t ctx, DiopiTensor &dst_tensor, DiopiTensor src_tensor, std::vector perm_axis) { - if (!dst_tensor.defined()) { - std::vector src_shape_t_64(src_tensor.shape().size()); - for (int i = 0; i < src_tensor.shape().size(); ++i) { - src_shape_t_64[i] = src_tensor.shape()[perm_axis[i]]; +diopiError_t tensorPermute(diopiContextHandle_t ctx, DiopiTensor &dstTensor, DiopiTensor srcTensor, std::vector permAxis) { + if (!dstTensor.defined()) { + std::vector srcShapeT64(srcTensor.shape().size()); + for (int i = 0; i < srcTensor.shape().size(); ++i) { + srcShapeT64[i] = srcTensor.shape()[permAxis[i]]; } - diopiSize_t src_t_shape(src_shape_t_64.data(), src_shape_t_64.size()); - auto dst_handle = dst_tensor.tensorHandle(); - DIOPI_CALL(diopiRequireTensor(ctx, &dst_handle, &src_t_shape, nullptr, src_tensor.dtype(), diopi_device)); - dst_tensor = DiopiTensor(dst_handle); + diopiSize_t srcTShape(srcShapeT64.data(), srcShapeT64.size()); + auto dstHandle = dstTensor.tensorHandle(); + DIOPI_CALL(diopiRequireTensor(ctx, &dstHandle, &srcTShape, nullptr, srcTensor.dtype(), diopi_device)); + dstTensor = DiopiTensor(dstHandle); } - diopiSize_t axis_size(perm_axis.data(), 4); - DIOPI_CALL(diopiPermute(ctx, dst_tensor.tensorHandle(), src_tensor.tensorHandle(), axis_size)); + diopiSize_t axisSize(permAxis.data(), 4); + DIOPI_CALL(diopiPermute(ctx, dstTensor.tensorHandle(), srcTensor.tensorHandle(), axisSize)); return diopiSuccess; } diopiError_t tensorPermute2D(diopiContextHandle_t ctx, DiopiTensor &dst, DiopiTensor src, MemoryFormat format) { - if (src.is_contiguous(format)) { + if (src.isContiguous(format)) { dst = src; return diopiSuccess; } - if (src.is_contiguous(MemoryFormat::Contiguous) && format == MemoryFormat::ChannelsLast) { + if (src.isContiguous(MemoryFormat::Contiguous) && format == MemoryFormat::ChannelsLast) { DIOPI_CALL(tensorPermute(ctx, dst, src, {0, 2, 3, 1})); return diopiSuccess; } - if (src.is_contiguous(MemoryFormat::ChannelsLast) && format == MemoryFormat::Contiguous) { + if (src.isContiguous(MemoryFormat::ChannelsLast) && format == MemoryFormat::Contiguous) { DIOPI_CALL(tensorPermute(ctx, dst, src, {0, 3, 1, 2})) } return diopiErrorOccurred; @@ -52,232 +52,232 @@ extern "C" diopiError_t diopiConvolution2d(diopiContextHandle_t ctx, diopiTensor diopiConstTensorHandle_t bias, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, int64_t groups) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor weight_tensor(weight); - DiopiTensor output_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor weightTensor(weight); + DiopiTensor outputTensor(out); - DIOPI_CHECK(input_tensor.is_contiguous() || input_tensor.is_contiguous(MemoryFormat::ChannelsLast), "%s", + DIOPI_CHECK(inputTensor.isContiguous() || inputTensor.isContiguous(MemoryFormat::ChannelsLast), "%s", "[diopiConvolution2d] the memory format is not supportted."); - DIOPI_CHECK(weight_tensor.is_contiguous() || weight_tensor.is_contiguous(MemoryFormat::ChannelsLast), "%s", + DIOPI_CHECK(weightTensor.isContiguous() || weightTensor.isContiguous(MemoryFormat::ChannelsLast), "%s", "[diopiConvolution2d] the memory format is not supportted."); - DIOPI_CHECK(output_tensor.is_contiguous() || output_tensor.is_contiguous(MemoryFormat::ChannelsLast), "%s", + DIOPI_CHECK(outputTensor.isContiguous() || outputTensor.isContiguous(MemoryFormat::ChannelsLast), "%s", "[diopiConvolution2d] the memory format is not supportted."); - DiopiTensor input_tensor_casted = input_tensor; - DiopiTensor weight_tensor_casted = weight_tensor; - DiopiTensor output_tensor_casted = output_tensor; + DiopiTensor inputTensorCasted = inputTensor; + DiopiTensor weightTensorCasted = weightTensor; + DiopiTensor outputTensorCasted = outputTensor; - std::vector tensors{&input_tensor_casted, &weight_tensor_casted, &output_tensor_casted}; + std::vector tensors{&inputTensorCasted, &weightTensorCasted, &outputTensorCasted}; DIOPI_CALL(autoCastTensorType(ctx, tensors, {diopi_dtype_float16, diopi_dtype_float32})); - DiopiTensor input_tensor_t, weight_tensor_t, output_tensor_t; + DiopiTensor inputTensorT, weightTensorT, outputTensorT; - DIOPI_CALL(tensorPermute2D(ctx, input_tensor_t, input_tensor_casted, MemoryFormat::ChannelsLast)); - DIOPI_CALL(tensorPermute2D(ctx, output_tensor_t, output_tensor_casted, MemoryFormat::ChannelsLast)); - DIOPI_CALL(tensorPermute2D(ctx, weight_tensor_t, weight_tensor_casted, MemoryFormat::ChannelsLast)); + DIOPI_CALL(tensorPermute2D(ctx, inputTensorT, inputTensorCasted, MemoryFormat::ChannelsLast)); + DIOPI_CALL(tensorPermute2D(ctx, outputTensorT, outputTensorCasted, MemoryFormat::ChannelsLast)); + DIOPI_CALL(tensorPermute2D(ctx, weightTensorT, weightTensorCasted, MemoryFormat::ChannelsLast)); - std::vector input_t_shape{input_tensor_t.shape().begin(), input_tensor_t.shape().end()}; - std::vector weight_t_shape{weight_tensor_t.shape().begin(), weight_tensor_t.shape().end()}; - std::vector output_t_shape{output_tensor_t.shape().begin(), output_tensor_t.shape().end()}; + std::vector inputTShape{inputTensorT.shape().begin(), inputTensorT.shape().end()}; + std::vector weightTShape{weightTensorT.shape().begin(), weightTensorT.shape().end()}; + std::vector outputTShape{outputTensorT.shape().begin(), outputTensorT.shape().end()}; - CnnlTensorDesc input_desc(input_tensor_t, CNNL_LAYOUT_NHWC, input_t_shape); - CnnlTensorDesc weight_desc(weight_tensor_t, CNNL_LAYOUT_NHWC, weight_t_shape); - CnnlTensorDesc output_desc(output_tensor_t, CNNL_LAYOUT_NHWC, output_t_shape); + CnnlTensorDesc inputDesc(inputTensorT, CNNL_LAYOUT_NHWC, inputTShape); + CnnlTensorDesc weightDesc(weightTensorT, CNNL_LAYOUT_NHWC, weightTShape); + CnnlTensorDesc outputDesc(outputTensorT, CNNL_LAYOUT_NHWC, outputTShape); - DiopiTensor bias_tensor(bias); - DiopiTensor bias_tensor_casted = bias_tensor; - CnnlTensorDesc bias_desc; + DiopiTensor biasTensor(bias); + DiopiTensor biasTensorCasted = biasTensor; + CnnlTensorDesc biasDesc; if (nullptr != bias) { - std::vector tensors{&bias_tensor_casted}; + std::vector tensors{&biasTensorCasted}; DIOPI_CALL(autoCastTensorType(ctx, tensors, {diopi_dtype_float16, diopi_dtype_float32})); - DIOPI_CALL(bias_desc.set(bias_tensor_casted, CNNL_LAYOUT_ARRAY)); + DIOPI_CALL(biasDesc.set(biasTensorCasted, CNNL_LAYOUT_ARRAY)); } - std::vector stride_vec{stride.data, stride.data + stride.len}; - std::vector padding_vec{padding.data, padding.data + padding.len}; - std::vector dilation_vec{dilation.data, dilation.data + dilation.len}; + std::vector strideVec{stride.data, stride.data + stride.len}; + std::vector paddingVec{padding.data, padding.data + padding.len}; + std::vector dilationVec{dilation.data, dilation.data + dilation.len}; - CnnlResourceGuard conv_desc; + CnnlResourceGuard convDesc; - int padding_[4] = {padding_vec[0], padding_vec[0], padding_vec[1], padding_vec[1]}; - int stride_[2] = {stride_vec[0], stride_vec[1]}; - int dilation_[2] = {dilation_vec[0], dilation_vec[1]}; + int paddingTmp[4] = {paddingVec[0], paddingVec[0], paddingVec[1], paddingVec[1]}; + int strideTmp[2] = {strideVec[0], strideVec[1]}; + int dilationTmp[2] = {dilationVec[0], dilationVec[1]}; - cnnlDataType_t compute_type; - DIOPI_CALL(CnnlDataType::convertToCnnlType(&compute_type, input_tensor_t.dtype())); - DIOPI_CALLCNNL(cnnlSetConvolutionDescriptor(conv_desc.get(), 4, padding_, stride_, dilation_, groups, compute_type)); + cnnlDataType_t computeType; + DIOPI_CALL(CnnlDataType::convertToCnnlType(&computeType, inputTensorT.dtype())); + DIOPI_CALLCNNL(cnnlSetConvolutionDescriptor(convDesc.get(), 4, paddingTmp, strideTmp, dilationTmp, groups, computeType)); - size_t workspace_size; + size_t workspaceSize; DIOPI_CALLCNNL(cnnlGetConvolutionForwardWorkspaceSize( - handle, input_desc.get(), weight_desc.get(), output_desc.get(), bias_desc.get(), conv_desc.get(), CNNL_CONVOLUTION_FWD_ALGO_DIRECT, &workspace_size)); + handle, inputDesc.get(), weightDesc.get(), outputDesc.get(), biasDesc.get(), convDesc.get(), CNNL_CONVOLUTION_FWD_ALGO_DIRECT, &workspaceSize)); void *workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } DIOPI_CALLCNNL(cnnlConvolutionForward(handle, - conv_desc.get(), + convDesc.get(), CNNL_CONVOLUTION_FWD_ALGO_DIRECT, - NULL, - input_desc.get(), - input_tensor_t.data(), - weight_desc.get(), - weight_tensor_t.data(), - bias_tensor.defined() ? bias_desc.get() : nullptr, - bias_tensor.defined() ? bias_tensor_casted.data() : nullptr, + nullptr, + inputDesc.get(), + inputTensorT.data(), + weightDesc.get(), + weightTensorT.data(), + biasTensor.defined() ? biasDesc.get() : nullptr, + biasTensor.defined() ? biasTensorCasted.data() : nullptr, workspace, - workspace_size, - NULL, - output_desc.get(), - output_tensor_t.data())); + workspaceSize, + nullptr, + outputDesc.get(), + outputTensorT.data())); - DIOPI_CALL(tensorPermute2D(ctx, output_tensor_casted, output_tensor_casted, MemoryFormat::Contiguous)); - DIOPI_CALL(dataTypeCast(ctx, output_tensor, output_tensor_casted)); + DIOPI_CALL(tensorPermute2D(ctx, outputTensorCasted, outputTensorCasted, MemoryFormat::Contiguous)); + DIOPI_CALL(dataTypeCast(ctx, outputTensor, outputTensorCasted)); return diopiSuccess; } -extern "C" diopiError_t diopiConvolution2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, - diopiTensorHandle_t grad3, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, - diopiConstTensorHandle_t weight, diopiSize_t *bias_sizes, diopiSize_t stride, diopiSize_t padding, - diopiSize_t dilation, bool transposed, diopiSize_t output_padding, int64_t groups) { +extern "C" diopiError_t diopiConvolution2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiTensorHandle_t gradWeight, + diopiTensorHandle_t grad3, diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t input, + diopiConstTensorHandle_t weight, diopiSize_t *biasSizes, diopiSize_t stride, diopiSize_t padding, + diopiSize_t dilation, bool transposed, diopiSize_t outputPadding, int64_t groups) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor weight_tensor(weight); - DiopiTensor grad_output_tensor(grad_output); - DiopiTensor grad_input_tensor(grad_input); - DiopiTensor grad_weight_tensor(grad_weight); + DiopiTensor inputTensor(input); + DiopiTensor weightTensor(weight); + DiopiTensor gradOutputTensor(gradOutput); + DiopiTensor gradInputTensor(gradInput); + DiopiTensor gradWeightTensor(gradWeight); - DIOPI_CHECK(input_tensor.is_contiguous() || input_tensor.is_contiguous(MemoryFormat::ChannelsLast), "%s", + DIOPI_CHECK(inputTensor.isContiguous() || inputTensor.isContiguous(MemoryFormat::ChannelsLast), "%s", "[diopiConvolution2dBackward] the memory format is not supportted."); - DIOPI_CHECK(weight_tensor.is_contiguous() || weight_tensor.is_contiguous(MemoryFormat::ChannelsLast), "%s", + DIOPI_CHECK(weightTensor.isContiguous() || weightTensor.isContiguous(MemoryFormat::ChannelsLast), "%s", "[diopiConvolution2dBackward] the memory format is not supportted."); - DIOPI_CHECK(grad_output_tensor.is_contiguous() || grad_output_tensor.is_contiguous(MemoryFormat::ChannelsLast), "%s", + DIOPI_CHECK(gradOutputTensor.isContiguous() || gradOutputTensor.isContiguous(MemoryFormat::ChannelsLast), "%s", "[diopiConvolution2dBackward] the memory format is not supportted."); - DIOPI_CHECK(grad_input_tensor.is_contiguous() || grad_input_tensor.is_contiguous(MemoryFormat::ChannelsLast), "%s", + DIOPI_CHECK(gradInputTensor.isContiguous() || gradInputTensor.isContiguous(MemoryFormat::ChannelsLast), "%s", "[diopiConvolution2dBackward] the memory format is not supportted."); - DIOPI_CHECK(grad_weight_tensor.is_contiguous() || grad_weight_tensor.is_contiguous(MemoryFormat::ChannelsLast), "%s", + DIOPI_CHECK(gradWeightTensor.isContiguous() || gradWeightTensor.isContiguous(MemoryFormat::ChannelsLast), "%s", "[diopiConvolution2dBackward] the memory format is not supportted."); - DiopiTensor input_casted = input_tensor; - DiopiTensor weight_casted = weight_tensor; - DiopiTensor grad_output_casted = grad_output_tensor; - DiopiTensor grad_input_casted = grad_input_tensor; - DiopiTensor grad_weight_casted = grad_weight_tensor; + DiopiTensor inputCasted = inputTensor; + DiopiTensor weightCasted = weightTensor; + DiopiTensor gradOutputCasted = gradOutputTensor; + DiopiTensor gradInputCasted = gradInputTensor; + DiopiTensor gradWeightCasted = gradWeightTensor; - std::vector tensors{&input_casted, &weight_casted, &grad_output_casted, &grad_input_casted, &grad_weight_casted}; + std::vector tensors{&inputCasted, &weightCasted, &gradOutputCasted, &gradInputCasted, &gradWeightCasted}; DIOPI_CALL(autoCastTensorType(ctx, tensors, {diopi_dtype_float16, diopi_dtype_float32})); - DiopiTensor input_t, weight_t, grad_output_t, grad_input_t, grad_weight_t; + DiopiTensor inputT, weightT, gradOutputT, gradInputT, gradWeightT; - DIOPI_CALL(tensorPermute2D(ctx, input_t, input_casted, MemoryFormat::ChannelsLast)); - DIOPI_CALL(tensorPermute2D(ctx, weight_t, weight_casted, MemoryFormat::ChannelsLast)); - DIOPI_CALL(tensorPermute2D(ctx, grad_input_t, grad_input_casted, MemoryFormat::ChannelsLast)); - DIOPI_CALL(tensorPermute2D(ctx, grad_output_t, grad_output_casted, MemoryFormat::ChannelsLast)); - DIOPI_CALL(tensorPermute2D(ctx, grad_weight_t, grad_weight_casted, MemoryFormat::ChannelsLast)); + DIOPI_CALL(tensorPermute2D(ctx, inputT, inputCasted, MemoryFormat::ChannelsLast)); + DIOPI_CALL(tensorPermute2D(ctx, weightT, weightCasted, MemoryFormat::ChannelsLast)); + DIOPI_CALL(tensorPermute2D(ctx, gradInputT, gradInputCasted, MemoryFormat::ChannelsLast)); + DIOPI_CALL(tensorPermute2D(ctx, gradOutputT, gradOutputCasted, MemoryFormat::ChannelsLast)); + DIOPI_CALL(tensorPermute2D(ctx, gradWeightT, gradWeightCasted, MemoryFormat::ChannelsLast)); - std::vector input_t_shape{input_t.shape().begin(), input_t.shape().end()}; - std::vector weight_t_shape{weight_t.shape().begin(), weight_t.shape().end()}; - std::vector grad_output_t_shape{grad_output_t.shape().begin(), grad_output_t.shape().end()}; - std::vector grad_input_t_shape{grad_input_t.shape().begin(), grad_input_t.shape().end()}; - std::vector grad_weight_shape{grad_weight_t.shape().begin(), grad_weight_t.shape().end()}; + std::vector inputTShape{inputT.shape().begin(), inputT.shape().end()}; + std::vector weightTShape{weightT.shape().begin(), weightT.shape().end()}; + std::vector gradOutputTShape{gradOutputT.shape().begin(), gradOutputT.shape().end()}; + std::vector gradInputTShape{gradInputT.shape().begin(), gradInputT.shape().end()}; + std::vector gradWeightShape{gradWeightT.shape().begin(), gradWeightT.shape().end()}; - CnnlTensorDesc input_desc(input_t, CNNL_LAYOUT_NHWC, input_t_shape); - CnnlTensorDesc weight_desc(weight_t, CNNL_LAYOUT_NHWC, weight_t_shape); - CnnlTensorDesc output_grad_desc(grad_output_t, CNNL_LAYOUT_NHWC, grad_output_t_shape); - CnnlTensorDesc input_grad_desc(grad_input_t, CNNL_LAYOUT_NHWC, grad_input_t_shape); - CnnlTensorDesc weight_grad_desc(grad_weight_t, CNNL_LAYOUT_NHWC, grad_weight_shape); + CnnlTensorDesc inputDesc(inputT, CNNL_LAYOUT_NHWC, inputTShape); + CnnlTensorDesc weightDesc(weightT, CNNL_LAYOUT_NHWC, weightTShape); + CnnlTensorDesc outputGradDesc(gradOutputT, CNNL_LAYOUT_NHWC, gradOutputTShape); + CnnlTensorDesc inputGradDesc(gradInputT, CNNL_LAYOUT_NHWC, gradInputTShape); + CnnlTensorDesc weightGradDesc(gradWeightT, CNNL_LAYOUT_NHWC, gradWeightShape); - CnnlResourceGuard conv_desc; + CnnlResourceGuard convDesc; - std::vector stride_vec{stride.data, stride.data + stride.len}; - std::vector padding_vec{padding.data, padding.data + padding.len}; - std::vector dilation_vec{dilation.data, dilation.data + dilation.len}; + std::vector strideVec{stride.data, stride.data + stride.len}; + std::vector paddingVec{padding.data, padding.data + padding.len}; + std::vector dilationVec{dilation.data, dilation.data + dilation.len}; - int padding_[4] = {padding_vec[0], padding_vec[1], padding_vec[0], padding_vec[1]}; - int stride_[2] = {stride_vec[0], stride_vec[1]}; - int dilation_[2] = {dilation_vec[0], dilation_vec[1]}; + int paddingTmp[4] = {paddingVec[0], paddingVec[1], paddingVec[0], paddingVec[1]}; + int strideTmp[2] = {strideVec[0], strideVec[1]}; + int dilationTmp[2] = {dilationVec[0], dilationVec[1]}; - cnnlDataType_t compute_type; - DIOPI_CALL(CnnlDataType::convertToCnnlType(&compute_type, input_t.dtype())); - DIOPI_CALLCNNL(cnnlSetConvolutionDescriptor(conv_desc.get(), 4, padding_, stride_, dilation_, groups, compute_type)); + cnnlDataType_t computeType; + DIOPI_CALL(CnnlDataType::convertToCnnlType(&computeType, inputT.dtype())); + DIOPI_CALLCNNL(cnnlSetConvolutionDescriptor(convDesc.get(), 4, paddingTmp, strideTmp, dilationTmp, groups, computeType)); - size_t workspace_size_filter = 0; + size_t workspaceSizeFilter = 0; DIOPI_CALLCNNL(cnnlGetConvolutionBackwardFilterWorkspaceSize( - handle, input_desc.get(), output_grad_desc.get(), weight_desc.get(), conv_desc.get(), CNNL_CONVOLUTION_BWD_FILTER_ALGO_DIRECT, &workspace_size_filter)); + handle, inputDesc.get(), outputGradDesc.get(), weightDesc.get(), convDesc.get(), CNNL_CONVOLUTION_BWD_FILTER_ALGO_DIRECT, &workspaceSizeFilter)); - void *workspace_filter = nullptr; - if (workspace_size_filter != 0) { - workspace_filter = requiresBuffer(ctx, workspace_size_filter).data(); + void *workspaceFilter = nullptr; + if (workspaceSizeFilter != 0) { + workspaceFilter = requiresBuffer(ctx, workspaceSizeFilter).data(); } DIOPI_CALLCNNL(cnnlConvolutionBackwardFilter(handle, - NULL, - input_desc.get(), - input_t.data(), - output_grad_desc.get(), - grad_output_t.data(), - conv_desc.get(), + nullptr, + inputDesc.get(), + inputT.data(), + outputGradDesc.get(), + gradOutputT.data(), + convDesc.get(), CNNL_CONVOLUTION_BWD_FILTER_ALGO_DIRECT, - workspace_filter, - workspace_size_filter, - NULL, - weight_grad_desc.get(), - grad_weight_t.data())); + workspaceFilter, + workspaceSizeFilter, + nullptr, + weightGradDesc.get(), + gradWeightT.data())); - size_t workspace_size_input; + size_t workspaceSizeInput; DIOPI_CALLCNNL(cnnlGetConvolutionBackwardDataWorkspaceSize(handle, - weight_desc.get(), - output_grad_desc.get(), - conv_desc.get(), - input_grad_desc.get(), + weightDesc.get(), + outputGradDesc.get(), + convDesc.get(), + inputGradDesc.get(), CNNL_CONVOLUTION_BWD_DATA_ALGO_DIRECT, - &workspace_size_input)); + &workspaceSizeInput)); - void *workspace_input; - if (workspace_size_input != 0) { - workspace_input = requiresBuffer(ctx, workspace_size_input).data(); + void *workspaceInput; + if (workspaceSizeInput != 0) { + workspaceInput = requiresBuffer(ctx, workspaceSizeInput).data(); } DIOPI_CALLCNNL(cnnlConvolutionBackwardData(handle, - NULL, - weight_desc.get(), - weight_t.data(), - output_grad_desc.get(), - grad_output_t.data(), - conv_desc.get(), + nullptr, + weightDesc.get(), + weightT.data(), + outputGradDesc.get(), + gradOutputT.data(), + convDesc.get(), CNNL_CONVOLUTION_BWD_DATA_ALGO_DIRECT, - workspace_input, - workspace_size_input, - NULL, - input_grad_desc.get(), - grad_input_t.data())); + workspaceInput, + workspaceSizeInput, + nullptr, + inputGradDesc.get(), + gradInputT.data())); - DIOPI_CALL(tensorPermute2D(ctx, grad_input_casted, grad_input_t, MemoryFormat::Contiguous)); - DIOPI_CALL(tensorPermute2D(ctx, grad_weight_casted, grad_weight_t, MemoryFormat::Contiguous)); + DIOPI_CALL(tensorPermute2D(ctx, gradInputCasted, gradInputT, MemoryFormat::Contiguous)); + DIOPI_CALL(tensorPermute2D(ctx, gradWeightCasted, gradWeightT, MemoryFormat::Contiguous)); - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor, grad_input_casted)); - DIOPI_CALL(dataTypeCast(ctx, grad_weight_tensor, grad_weight_casted)); + DIOPI_CALL(dataTypeCast(ctx, gradInputTensor, gradInputCasted)); + DIOPI_CALL(dataTypeCast(ctx, gradWeightTensor, gradWeightCasted)); if (grad3 != nullptr) { - DiopiTensor bias_grad_tensor(grad3); - DiopiTensor grad_bias_casted = bias_grad_tensor; - std::vector tensors{&grad_bias_casted}; + DiopiTensor biasGradTensor(grad3); + DiopiTensor gradBiasCasted = biasGradTensor; + std::vector tensors{&gradBiasCasted}; DIOPI_CALL(autoCastTensorType(ctx, tensors, {diopi_dtype_float16, diopi_dtype_float32})); - CnnlTensorDesc bias_grad_desc(grad_bias_casted, CNNL_LAYOUT_ARRAY); - std::vector bias_shape{bias_grad_tensor.shape().begin(), bias_grad_tensor.shape().end()}; - bias_sizes->data = bias_shape.data(); - bias_sizes->len = bias_shape.size(); - size_t workspace_size_bias; - DIOPI_CALLCNNL(cnnlGetBiasAddBackwardWorkspaceSize(handle, output_grad_desc.get(), bias_grad_desc.get(), 3, &workspace_size_bias)) - void *workspace_bias = nullptr; - if (0 != workspace_size_bias) { - workspace_bias = requiresBuffer(ctx, workspace_size_bias).data(); + CnnlTensorDesc biasGradDesc(gradBiasCasted, CNNL_LAYOUT_ARRAY); + std::vector biasShape{biasGradTensor.shape().begin(), biasGradTensor.shape().end()}; + biasSizes->data = biasShape.data(); + biasSizes->len = biasShape.size(); + size_t workspaceSizeBias; + DIOPI_CALLCNNL(cnnlGetBiasAddBackwardWorkspaceSize(handle, outputGradDesc.get(), biasGradDesc.get(), 3, &workspaceSizeBias)) + void *workspaceBias = nullptr; + if (0 != workspaceSizeBias) { + workspaceBias = requiresBuffer(ctx, workspaceSizeBias).data(); } DIOPI_CALLCNNL(cnnlBiasAddBackward_v2( - handle, output_grad_desc.get(), grad_output_t.data(), 3, bias_grad_desc.get(), grad_bias_casted.data(), workspace_bias, workspace_size_bias)); - DIOPI_CALL(dataTypeCast(ctx, bias_grad_tensor, grad_bias_casted)) + handle, outputGradDesc.get(), gradOutputT.data(), 3, biasGradDesc.get(), gradBiasCasted.data(), workspaceBias, workspaceSizeBias)); + DIOPI_CALL(dataTypeCast(ctx, biasGradTensor, gradBiasCasted)) } return diopiSuccess; diff --git a/DIOPI-IMPL/camb/functions/copy.cpp b/DIOPI-IMPL/camb/functions/copy.cpp index c9677b3b5..ab9c1f504 100644 --- a/DIOPI-IMPL/camb/functions/copy.cpp +++ b/DIOPI-IMPL/camb/functions/copy.cpp @@ -22,17 +22,17 @@ diopiError_t diopiCopyInp(diopiContextHandle_t ctx, diopiConstTensorHandle_t src // TODO(waiting for dispatch): support broadcast, dealing with uncontiguous cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor dest_tr(dest); - DiopiTensor src_tr(src); + DiopiTensor destTr(dest); + DiopiTensor srcTr(src); - if (src_tr.dtype() != dest_tr.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, src_tr, dest_tr.dtype())); + if (srcTr.dtype() != destTr.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, srcTr, destTr.dtype())); } - CnnlTensorDesc input_desc(dest_tr, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc src_desc(src_tr, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(destTr, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc srcDesc(srcTr, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlCopy(handle, src_desc.get(), src_tr.data(), input_desc.get(), dest_tr.data())); + DIOPI_CALLCNNL(cnnlCopy(handle, srcDesc.get(), srcTr.data(), inputDesc.get(), destTr.data())); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/cos.cpp b/DIOPI-IMPL/camb/functions/cos.cpp index 544362564..4f9989093 100644 --- a/DIOPI-IMPL/camb/functions/cos.cpp +++ b/DIOPI-IMPL/camb/functions/cos.cpp @@ -16,33 +16,33 @@ namespace camb { static diopiError_t cos(diopiContextHandle_t ctx, DiopiTensor input, DiopiTensor& output) { auto handle = cnnlHandlePool.get(ctx); - auto input_tensor = DiopiTensor(input); + auto inputTensor = DiopiTensor(input); std::vector pTensors{&input}; std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor output_tmp = output; + DiopiTensor outputTmp = output; if (input.dtype() != output.dtype()) { - output_tmp = requiresTensor(ctx, output.shape(), input.dtype()); + outputTmp = requiresTensor(ctx, output.shape(), input.dtype()); } - CnnlTensorDesc input_desc(input, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc output_tmp_desc(output_tmp, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlCos_v2(handle, CNNL_COMPUTATION_HIGH_PRECISION, input_desc.get(), input.data(), output_tmp_desc.get(), output_tmp.data())); - if (output_tmp.dtype() != output.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, output, output_tmp)); + CnnlTensorDesc inputDesc(input, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outputTmpDesc(outputTmp, CNNL_LAYOUT_ARRAY); + DIOPI_CALLCNNL(cnnlCos_v2(handle, CNNL_COMPUTATION_HIGH_PRECISION, inputDesc.get(), input.data(), outputTmpDesc.get(), outputTmp.data())); + if (outputTmp.dtype() != output.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, output, outputTmp)); } return diopiSuccess; } extern "C" diopiError_t diopiCosInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { - DiopiTensor input_tensor(input); - DIOPI_CALL(cos(ctx, input_tensor, input_tensor)); + DiopiTensor inputTensor(input); + DIOPI_CALL(cos(ctx, inputTensor, inputTensor)); return diopiSuccess; } extern "C" diopiError_t diopiCos(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); - DIOPI_CALL(cos(ctx, input_tensor, output_tensor)); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); + DIOPI_CALL(cos(ctx, inputTensor, outputTensor)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/cumsum.cpp b/DIOPI-IMPL/camb/functions/cumsum.cpp index e3f1be78e..2311ddfda 100644 --- a/DIOPI-IMPL/camb/functions/cumsum.cpp +++ b/DIOPI-IMPL/camb/functions/cumsum.cpp @@ -8,12 +8,12 @@ namespace camb { namespace { int getDim(DiopiTensor tensor, int64_t dim) { - int shape_size = tensor.shape().size(); - int dim_ = static_cast(dim); - if (dim_ < 0) { - dim_ = dim_ + shape_size; + int shapeSize = tensor.shape().size(); + int dimTmp = static_cast(dim); + if (dimTmp < 0) { + dimTmp = dim + shapeSize; } - return dim_; + return dimTmp; } } // namespace @@ -23,21 +23,21 @@ extern "C" { diopiError_t diopiCumsum(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, int64_t dim) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); - DIOPI_CALL(autoCastTensorType(ctx, {&input_tensor}, {diopi_dtype_int32, diopi_dtype_float32, diopi_dtype_float16})); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); + DIOPI_CALL(autoCastTensorType(ctx, {&inputTensor}, {diopi_dtype_int32, diopi_dtype_float32, diopi_dtype_float16})); - CnnlTensorDesc input_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor, CNNL_LAYOUT_ARRAY); - int axis = getDim(input_tensor, dim); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); + int axis = getDim(inputTensor, dim); - if (input_tensor.dtype() == out_tensor.dtype()) { - DIOPI_CALLCNNL(cnnlCumsum(handle, input_desc.get(), input_tensor.data(), axis, false, false, CNNL_PROPAGATE_NAN, out_desc.get(), out_tensor.data())); + if (inputTensor.dtype() == outTensor.dtype()) { + DIOPI_CALLCNNL(cnnlCumsum(handle, inputDesc.get(), inputTensor.data(), axis, false, false, CNNL_PROPAGATE_NAN, outDesc.get(), outTensor.data())); } else { - DiopiTensor out_temp = requiresTensor(ctx, out_tensor.shape(), input_tensor.dtype()); - CnnlTensorDesc out_temp_desc(out_temp, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlCumsum(handle, input_desc.get(), input_tensor.data(), axis, false, false, CNNL_PROPAGATE_NAN, out_temp_desc.get(), out_temp.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_temp)); + DiopiTensor outTemp = requiresTensor(ctx, outTensor.shape(), inputTensor.dtype()); + CnnlTensorDesc outTempDesc(outTemp, CNNL_LAYOUT_ARRAY); + DIOPI_CALLCNNL(cnnlCumsum(handle, inputDesc.get(), inputTensor.data(), axis, false, false, CNNL_PROPAGATE_NAN, outTempDesc.get(), outTemp.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTemp)); } return diopiSuccess; diff --git a/DIOPI-IMPL/camb/functions/div.cpp b/DIOPI-IMPL/camb/functions/div.cpp index 89a89f662..2b4776b14 100644 --- a/DIOPI-IMPL/camb/functions/div.cpp +++ b/DIOPI-IMPL/camb/functions/div.cpp @@ -14,64 +14,64 @@ namespace camb { extern "C" { diopiError_t diopiDiv(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other, - diopiRoundMode_t rounding_mode) { + diopiRoundMode_t roundingMode) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor other_tensor(other); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor otherTensor(other); + DiopiTensor outTensor(out); - DiopiTensor out_tensor_temp = out_tensor; - if ((out_tensor.dtype() != diopi_dtype_float16) && (out_tensor.dtype() != diopi_dtype_float32)) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor_temp, diopi_dtype_float32)); + DiopiTensor outTensorTemp = outTensor; + if ((outTensor.dtype() != diopi_dtype_float16) && (outTensor.dtype() != diopi_dtype_float32)) { + DIOPI_CALL(dataTypeCast(ctx, outTensorTemp, diopi_dtype_float32)); } else { - out_tensor_temp = DiopiTensor(out); + outTensorTemp = DiopiTensor(out); } - DIOPI_CALL(dataTypeCast(ctx, input_tensor, out_tensor_temp.dtype())); - DIOPI_CALL(dataTypeCast(ctx, other_tensor, out_tensor_temp.dtype())); + DIOPI_CALL(dataTypeCast(ctx, inputTensor, outTensorTemp.dtype())); + DIOPI_CALL(dataTypeCast(ctx, otherTensor, outTensorTemp.dtype())); - CnnlTensorDesc input_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc other_desc(other_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor_temp, CNNL_LAYOUT_ARRAY); - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetDivWorkspaceSize(handle, input_desc.get(), other_desc.get(), out_desc.get(), &workspace_size)); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc otherDesc(otherTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensorTemp, CNNL_LAYOUT_ARRAY); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetDivWorkspaceSize(handle, inputDesc.get(), otherDesc.get(), outDesc.get(), &workspaceSize)); void* workspace = nullptr; - workspace = requiresBuffer(ctx, workspace_size).data(); + workspace = requiresBuffer(ctx, workspaceSize).data(); cnnlDiv_v2(handle, CNNL_COMPUTATION_HIGH_PRECISION, - input_desc.get(), - input_tensor.data(), - other_desc.get(), - other_tensor.data(), + inputDesc.get(), + inputTensor.data(), + otherDesc.get(), + otherTensor.data(), workspace, - workspace_size, - out_desc.get(), - out_tensor_temp.data()); - if (out_tensor_temp.dtype() != out_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_temp)); + workspaceSize, + outDesc.get(), + outTensorTemp.data()); + if (outTensorTemp.dtype() != outTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTemp)); } return diopiSuccess; } -diopiError_t diopiDivInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other, diopiRoundMode_t rounding_mode) { - DIOPI_CALL(diopiDiv(ctx, input, input, other, rounding_mode)); +diopiError_t diopiDivInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other, diopiRoundMode_t roundingMode) { + DIOPI_CALL(diopiDiv(ctx, input, input, other, roundingMode)); return diopiSuccess; } diopiError_t diopiDivScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other, - diopiRoundMode_t rounding_mode) { + diopiRoundMode_t roundingMode) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor other_tensor_tmp; - DIOPI_CALL(makeTensorFromScalar(ctx, other, other_tensor_tmp)); - auto other_tensor = other_tensor_tmp.tensorHandle(); - DiopiTensor out_tensor(out); - DIOPI_CALL(diopiDiv(ctx, out, input, diopiTensorHandle_t(other_tensor), rounding_mode)); + DiopiTensor inputTensor(input); + DiopiTensor otherTensorTmp; + DIOPI_CALL(makeTensorFromScalar(ctx, other, otherTensorTmp)); + auto otherTensor = otherTensorTmp.tensorHandle(); + DiopiTensor outTensor(out); + DIOPI_CALL(diopiDiv(ctx, out, input, diopiTensorHandle_t(otherTensor), roundingMode)); return diopiSuccess; } -diopiError_t diopiDivInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other, diopiRoundMode_t rounding_mode) { - DIOPI_CALL(diopiDivScalar(ctx, input, input, other, rounding_mode)); +diopiError_t diopiDivInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other, diopiRoundMode_t roundingMode) { + DIOPI_CALL(diopiDivScalar(ctx, input, input, other, roundingMode)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/dropout.cpp b/DIOPI-IMPL/camb/functions/dropout.cpp index 398b545c8..7f0813416 100644 --- a/DIOPI-IMPL/camb/functions/dropout.cpp +++ b/DIOPI-IMPL/camb/functions/dropout.cpp @@ -18,25 +18,25 @@ extern "C" { diopiError_t diopiDropout(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t mask, diopiConstTensorHandle_t input, double p, bool train) { if (train) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); - DiopiTensor mask_tensor(mask); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); + DiopiTensor maskTensor(mask); // Do this Check to use DIOPI-TEST because non-float data not supported in PyTorch unless p==0 - DIOPI_CHECK(((DiopiDataType::isFloatPoint(input_tensor.dtype()) || p == 0)), "result type Float can't be cast to the desired type"); - std::vector pTensors{&input_tensor}; + DIOPI_CHECK(((DiopiDataType::isFloatPoint(inputTensor.dtype()) || p == 0)), "result type Float can't be cast to the desired type"); + std::vector pTensors{&inputTensor}; std::set supportedDtypes{ diopi_dtype_int8, diopi_dtype_uint8, diopi_dtype_int16, diopi_dtype_int32, diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor output_tensor_temp = output_tensor; - if ((output_tensor.dtype() != input_tensor.dtype())) { - DIOPI_CALL(dataTypeCast(ctx, output_tensor_temp, input_tensor.dtype())); + DiopiTensor outputTensorTemp = outputTensor; + if ((outputTensor.dtype() != inputTensor.dtype())) { + DIOPI_CALL(dataTypeCast(ctx, outputTensorTemp, inputTensor.dtype())); } - CnnlTensorDesc input_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc output_desc(output_tensor_temp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc mask_desc(mask_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outputDesc(outputTensorTemp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc maskDesc(maskTensor, CNNL_LAYOUT_ARRAY); // create and set the rand_generator cnnlRandGenerator_t generator; @@ -45,51 +45,51 @@ diopiError_t diopiDropout(diopiContextHandle_t ctx, diopiTensorHandle_t out, dio // set the period to the generator DIOPI_CALLCNNL(cnnlRandSetMTGP32Period(generator, CNNL_RAND_MTGP32_P11213)); // create and set the state - size_t size_state = 0; - DIOPI_CALLCNNL(cnnlRandGetMTGP32StateSize(generator, &size_state)); + size_t sizeState = 0; + DIOPI_CALLCNNL(cnnlRandGetMTGP32StateSize(generator, &sizeState)); void* state = nullptr; - state = requiresBuffer(ctx, size_state).data(); + state = requiresBuffer(ctx, sizeState).data(); cnnlMTGP32FastParams_t params; DIOPI_CALLCNNL(cnnlRandGetMTGP32HostParam(generator, ¶ms)); - size_t size_kernel = 0; - DIOPI_CALLCNNL(cnnlRandGetMTGP32KernelParamSize(generator, &size_kernel)); - void* kernel_params = nullptr; - kernel_params = requiresBuffer(ctx, size_kernel).data(); - DIOPI_CALLCNNL(cnnlRandMakeMTGP32Constants(handle, params, kernel_params)); - int rand_seed = time(NULL); - DIOPI_CALLCNNL(cnnlRandMakeMTGP32KernelState(handle, state, params, kernel_params, rand_seed)); + size_t sizeKernel = 0; + DIOPI_CALLCNNL(cnnlRandGetMTGP32KernelParamSize(generator, &sizeKernel)); + void* kernelParams = nullptr; + kernelParams = requiresBuffer(ctx, sizeKernel).data(); + DIOPI_CALLCNNL(cnnlRandMakeMTGP32Constants(handle, params, kernelParams)); + int randSeed = time(nullptr); + DIOPI_CALLCNNL(cnnlRandMakeMTGP32KernelState(handle, state, params, kernelParams, randSeed)); // cases for dropout2d when input_shape != mask_shape - if (input_tensor.shape() != mask_tensor.shape()) { - DiopiTensor temp_tensor = ones(ctx, mask_tensor.shape(), diopi_dtype_float32); - CnnlTensorDesc temp_desc(temp_tensor, CNNL_LAYOUT_ARRAY); + if (inputTensor.shape() != maskTensor.shape()) { + DiopiTensor tempTensor = ones(ctx, maskTensor.shape(), diopi_dtype_float32); + CnnlTensorDesc tempDesc(tempTensor, CNNL_LAYOUT_ARRAY); DIOPI_CALLCNNL(cnnlFusedDropout_v2( - handle, generator, temp_desc.get(), temp_tensor.data(), p, state, mask_desc.get(), mask_tensor.data(), temp_desc.get(), temp_tensor.data())); + handle, generator, tempDesc.get(), tempTensor.data(), p, state, maskDesc.get(), maskTensor.data(), tempDesc.get(), tempTensor.data())); - DiopiTensor bcast_temp_tensor; - DIOPI_CALL(dataTypeCast(ctx, temp_tensor, output_tensor_temp.dtype())); - broadcastHelper(ctx, temp_tensor, output_tensor_temp, &bcast_temp_tensor); - CnnlTensorDesc bcast_temp_desc(bcast_temp_tensor, CNNL_LAYOUT_ARRAY); + DiopiTensor bcastTempTensor; + DIOPI_CALL(dataTypeCast(ctx, tempTensor, outputTensorTemp.dtype())); + broadcastHelper(ctx, tempTensor, outputTensorTemp, &bcastTempTensor); + CnnlTensorDesc bcastTempDesc(bcastTempTensor, CNNL_LAYOUT_ARRAY); - cnnlTensorDescriptor_t input_descs[] = {input_desc.get(), bcast_temp_desc.get()}; - const void* inputs[] = {input_tensor.data(), bcast_temp_tensor.data()}; - DIOPI_CALLCNNL(cnnlMulN(handle, input_descs, inputs, 2, output_desc.get(), output_tensor_temp.data())) + cnnlTensorDescriptor_t inputDescs[] = {inputDesc.get(), bcastTempDesc.get()}; + const void* inputs[] = {inputTensor.data(), bcastTempTensor.data()}; + DIOPI_CALLCNNL(cnnlMulN(handle, inputDescs, inputs, 2, outputDesc.get(), outputTensorTemp.data())) } else { // cases for dropout DIOPI_CALLCNNL(cnnlFusedDropout_v2(handle, generator, - input_desc.get(), - input_tensor.data(), + inputDesc.get(), + inputTensor.data(), p, state, - mask_desc.get(), - mask_tensor.data(), - output_desc.get(), - output_tensor_temp.data())); + maskDesc.get(), + maskTensor.data(), + outputDesc.get(), + outputTensorTemp.data())); } - if (output_tensor_temp.dtype() != output_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, output_tensor, output_tensor_temp)); + if (outputTensorTemp.dtype() != outputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outputTensor, outputTensorTemp)); } DIOPI_CALLCNNL(cnnlRandDestroyGenerator(generator)); diff --git a/DIOPI-IMPL/camb/functions/erf.cpp b/DIOPI-IMPL/camb/functions/erf.cpp index 5205aea81..296255c69 100644 --- a/DIOPI-IMPL/camb/functions/erf.cpp +++ b/DIOPI-IMPL/camb/functions/erf.cpp @@ -16,25 +16,25 @@ extern "C" { diopiError_t diopiErf(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); - std::vector pTensors{&input_tensor}; + std::vector pTensors{&inputTensor}; std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor out_tensor_temp = out_tensor; - if (out_tensor.dtype() != input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor_temp, input_tensor.dtype())); + DiopiTensor outTensorTemp = outTensor; + if (outTensor.dtype() != inputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensorTemp, inputTensor.dtype())); } - CnnlTensorDesc input_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor_temp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensorTemp, CNNL_LAYOUT_ARRAY); cnnlComputationPreference_t prefer = CNNL_COMPUTATION_HIGH_PRECISION; - DIOPI_CALLCNNL(cnnlErf_v2(handle, prefer, input_desc.get(), input_tensor.data(), out_desc.get(), out_tensor_temp.data())); - if (out_tensor_temp.dtype() != out_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_temp)); + DIOPI_CALLCNNL(cnnlErf_v2(handle, prefer, inputDesc.get(), inputTensor.data(), outDesc.get(), outTensorTemp.data())); + if (outTensorTemp.dtype() != outTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTemp)); } return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/error.cpp b/DIOPI-IMPL/camb/functions/error.cpp index faf8c29df..2844a18cc 100644 --- a/DIOPI-IMPL/camb/functions/error.cpp +++ b/DIOPI-IMPL/camb/functions/error.cpp @@ -15,7 +15,7 @@ char strLastError[8192] = {0}; char strLastErrorOther[4096] = {0}; std::mutex mtxLastError; -const char* camb_get_last_error_string() { +const char* cambGetLastErrorString() { // consider cnrt version cnrtGetLastErr or cnrtGetLaislhhstError ::cnrtRet_t err = ::cnrtGetLastError(); std::lock_guard lock(mtxLastError); @@ -67,5 +67,5 @@ const char* getDiopiErrorStr(diopiError_t err) { const char* diopiGetLastErrorString() { - return impl::camb::camb_get_last_error_string(); + return impl::camb::cambGetLastErrorString(); } diff --git a/DIOPI-IMPL/camb/functions/exp.cpp b/DIOPI-IMPL/camb/functions/exp.cpp index 8a04d7e44..305db386b 100644 --- a/DIOPI-IMPL/camb/functions/exp.cpp +++ b/DIOPI-IMPL/camb/functions/exp.cpp @@ -20,28 +20,28 @@ static diopiError_t exp(diopiContextHandle_t ctx, DiopiTensor input, DiopiTensor DIOPI_CHECK(input.shape() == output.shape(), "input shape should be same as output"); std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor output_tmp = output; + DiopiTensor outputTmp = output; if (input.dtype() != output.dtype()) { - output_tmp = requiresTensor(ctx, output.shape(), input.dtype()); + outputTmp = requiresTensor(ctx, output.shape(), input.dtype()); } CnnlTensorDesc desc(input, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlExp_v2(handle, CNNL_COMPUTATION_HIGH_PRECISION, desc.get(), input.data(), desc.get(), output_tmp.data())); - if (output.dtype() != output_tmp.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, output, output_tmp)); + DIOPI_CALLCNNL(cnnlExp_v2(handle, CNNL_COMPUTATION_HIGH_PRECISION, desc.get(), input.data(), desc.get(), outputTmp.data())); + if (output.dtype() != outputTmp.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, output, outputTmp)); } return diopiSuccess; } extern "C" diopiError_t diopiExpInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { - DiopiTensor input_tensor(input); - DIOPI_CALL(exp(ctx, input_tensor, input_tensor)); + DiopiTensor inputTensor(input); + DIOPI_CALL(exp(ctx, inputTensor, inputTensor)); return diopiSuccess; } extern "C" diopiError_t diopiExp(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); - DIOPI_CALL(exp(ctx, input_tensor, output_tensor)); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); + DIOPI_CALL(exp(ctx, inputTensor, outputTensor)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/expand.cpp b/DIOPI-IMPL/camb/functions/expand.cpp index 2e460a061..e40c33b48 100644 --- a/DIOPI-IMPL/camb/functions/expand.cpp +++ b/DIOPI-IMPL/camb/functions/expand.cpp @@ -38,7 +38,7 @@ extern "C" diopiError_t diopiExpand(diopiContextHandle_t ctx, diopiTensorHandle_ trOutTmp = trOut; descOut.set(trOut, layout); } else { - trOutTmp = requiresTensor(ctx, vec2diopiSize_t(trOut.shape()), trInput.dtype()); + trOutTmp = requiresTensor(ctx, vec2diopiSizeT(trOut.shape()), trInput.dtype()); descOut.set(trOutTmp, CNNL_LAYOUT_ARRAY); } diff --git a/DIOPI-IMPL/camb/functions/fill.cpp b/DIOPI-IMPL/camb/functions/fill.cpp index f85f86b21..bc3c90207 100644 --- a/DIOPI-IMPL/camb/functions/fill.cpp +++ b/DIOPI-IMPL/camb/functions/fill.cpp @@ -19,92 +19,95 @@ extern "C" { diopiError_t diopiFill(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* value) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor input_tensor_temp = input_tensor; + DiopiTensor inputTensor(input); + DiopiTensor inputTensorTemp = inputTensor; // float64 not supported yet - if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor_temp, diopi_dtype_float32)); + if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensorTemp, diopi_dtype_float32)); } - CnnlTensorDesc input_tensor_desc(input_tensor_temp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputTensorDesc(inputTensorTemp, CNNL_LAYOUT_ARRAY); - double value_scalar = DiopiDataType::isInteger(value->stype) ? value->ival : value->fval; - void* value_ptr; - bool temp_bool = 0; - int8_t temp_i8 = 0; - uint8_t temp_u8 = 0; - int16_t temp_i16 = 0; - uint16_t temp_u16 = 0; - int32_t temp_i32 = 0; - uint32_t temp_u32 = 0; - int64_t temp_i64 = 0; - uint64_t temp_u64 = 0; - half_float::half temp_f16 = static_cast(0); - float temp_f32 = 0; + double valueScalar = DiopiDataType::isInteger(value->stype) ? value->ival : value->fval; + void* valuePtr = nullptr; + bool tempBool = false; + int8_t tempI8 = 0; + uint8_t tempU8 = 0; + int16_t tempI16 = 0; + uint16_t tempU16 = 0; + int32_t tempI32 = 0; + uint32_t tempU32 = 0; + int64_t tempI64 = 0; + uint64_t tempU64 = 0; + half_float::half tempF16 = static_cast(0); + float tempF32 = 0; - switch (input_tensor_temp.dtype()) { + switch (inputTensorTemp.dtype()) { case diopi_dtype_bool: { - temp_bool = static_cast(value_scalar); - value_ptr = &temp_bool; + tempBool = static_cast(valueScalar); + valuePtr = &tempBool; break; } case diopi_dtype_int8: { - temp_i8 = int8_t(value_scalar); - value_ptr = &temp_i8; + tempI8 = int8_t(valueScalar); + valuePtr = &tempI8; break; } case diopi_dtype_uint8: { - temp_u8 = uint8_t(value_scalar); - value_ptr = &temp_u8; + tempU8 = uint8_t(valueScalar); + valuePtr = &tempU8; break; } case diopi_dtype_int16: { - temp_i16 = int16_t(value_scalar); - value_ptr = &temp_i16; + tempI16 = int16_t(valueScalar); + valuePtr = &tempI16; break; } case diopi_dtype_uint16: { - temp_u16 = uint16_t(value_scalar); - value_ptr = &temp_u16; + tempU16 = uint16_t(valueScalar); + valuePtr = &tempU16; break; } case diopi_dtype_int32: { - temp_i32 = int32_t(value_scalar); - value_ptr = &temp_i32; + tempI32 = int32_t(valueScalar); + valuePtr = &tempI32; break; } case diopi_dtype_uint32: { - temp_u32 = uint32_t(value_scalar); - value_ptr = &temp_u32; + tempU32 = uint32_t(valueScalar); + valuePtr = &tempU32; break; } case diopi_dtype_int64: { - temp_i64 = int64_t(value_scalar); - value_ptr = &temp_i64; + tempI64 = int64_t(valueScalar); + valuePtr = &tempI64; break; } case diopi_dtype_uint64: { - temp_u64 = uint64_t(value_scalar); - value_ptr = &temp_u64; + tempU64 = uint64_t(valueScalar); + valuePtr = &tempU64; break; } case diopi_dtype_float16: { - temp_f16 = half_float::half(value_scalar); - value_ptr = &temp_f16; + tempF16 = half_float::half(valueScalar); + valuePtr = &tempF16; break; } case diopi_dtype_float32: { - temp_f32 = static_cast(value_scalar); - value_ptr = &temp_f32; + tempF32 = static_cast(valueScalar); + valuePtr = &tempF32; break; } + default: { + DIOPI_CHECK(false, "the input tensor dtype %s is not allown", DiopiDataType::dataTypeStr(inputTensorTemp.dtype()).c_str()); + } } - DIOPI_CALLCNNL(cnnlFill_v3(handle, CNNL_POINTER_MODE_HOST, value_ptr, input_tensor_desc.get(), input_tensor_temp.data())); + DIOPI_CALLCNNL(cnnlFill_v3(handle, CNNL_POINTER_MODE_HOST, valuePtr, inputTensorDesc.get(), inputTensorTemp.data())); - if (input_tensor_temp.dtype() != input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, input_tensor_temp)); + if (inputTensorTemp.dtype() != inputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, inputTensorTemp)); } return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/flip.cpp b/DIOPI-IMPL/camb/functions/flip.cpp index 573c15f2a..cdf09f346 100644 --- a/DIOPI-IMPL/camb/functions/flip.cpp +++ b/DIOPI-IMPL/camb/functions/flip.cpp @@ -10,29 +10,29 @@ extern "C" { diopiError_t diopiFlip(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t dims) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - if (diopi_dtype_float64 == input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); - } else if (diopi_dtype_int64 == input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_int32)); + DiopiTensor inputTensor(input); + if (diopi_dtype_float64 == inputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); + } else if (diopi_dtype_int64 == inputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_int32)); } - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); - DiopiTensor out_tensor(out); - CnnlTensorDesc outDesc(out_tensor, CNNL_LAYOUT_ARRAY); + DiopiTensor outTensor(out); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); std::vector dimension(dims.len); for (int i = 0; i < dims.len; i++) { dimension[i] = dims.data[i]; } - if (out_tensor.dtype() == input_tensor.dtype()) { - DIOPI_CALLCNNL(cnnlFlip(handle, dimension.data(), dims.len, inputDesc.get(), input_tensor.data(), outDesc.get(), out_tensor.data())); + if (outTensor.dtype() == inputTensor.dtype()) { + DIOPI_CALLCNNL(cnnlFlip(handle, dimension.data(), dims.len, inputDesc.get(), inputTensor.data(), outDesc.get(), outTensor.data())); } else { - DiopiTensor out_temp = requiresTensor(ctx, out_tensor.shape(), input_tensor.dtype()); - CnnlTensorDesc out_tempDesc(out_temp, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlFlip(handle, dimension.data(), dims.len, inputDesc.get(), input_tensor.data(), out_tempDesc.get(), out_temp.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_temp)); + DiopiTensor outTemp = requiresTensor(ctx, outTensor.shape(), inputTensor.dtype()); + CnnlTensorDesc outTempDesc(outTemp, CNNL_LAYOUT_ARRAY); + DIOPI_CALLCNNL(cnnlFlip(handle, dimension.data(), dims.len, inputDesc.get(), inputTensor.data(), outTempDesc.get(), outTemp.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTemp)); } return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/floor.cpp b/DIOPI-IMPL/camb/functions/floor.cpp index d33608366..5a069a13f 100644 --- a/DIOPI-IMPL/camb/functions/floor.cpp +++ b/DIOPI-IMPL/camb/functions/floor.cpp @@ -39,7 +39,7 @@ extern "C" diopiError_t diopiFloor(diopiContextHandle_t ctx, diopiTensorHandle_t trOutTmp = trOut; descOut.set(trOut, layout); } else { - trOutTmp = requiresTensor(ctx, vec2diopiSize_t(trOut.shape()), trInput.dtype()); + trOutTmp = requiresTensor(ctx, vec2diopiSizeT(trOut.shape()), trInput.dtype()); descOut.set(trOutTmp, CNNL_LAYOUT_ARRAY); } diff --git a/DIOPI-IMPL/camb/functions/gather.cpp b/DIOPI-IMPL/camb/functions/gather.cpp index 607dedf60..1353b603b 100644 --- a/DIOPI-IMPL/camb/functions/gather.cpp +++ b/DIOPI-IMPL/camb/functions/gather.cpp @@ -16,76 +16,76 @@ DIOPI_API diopiError_t diopiGather(diopiContextHandle_t ctx, diopiTensorHandle_t diopiConstTensorHandle_t index) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); - } else if (input_tensor.dtype() == diopi_dtype_int64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_int32)); + DiopiTensor inputTensor(input); + if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); + } else if (inputTensor.dtype() == diopi_dtype_int64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_int32)); } - DiopiTensor index_tensor(index); - DIOPI_CALL(autoCastTensorType(ctx, {&index_tensor}, {diopi_dtype_int32, diopi_dtype_int64})); - DiopiTensor out_tensor(out); - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc indexDesc(index_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc outDesc(out_tensor, CNNL_LAYOUT_ARRAY); + DiopiTensor indexTensor(index); + DIOPI_CALL(autoCastTensorType(ctx, {&indexTensor}, {diopi_dtype_int32, diopi_dtype_int64})); + DiopiTensor outTensor(out); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc indexDesc(indexTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); if (dim < 0) { - dim += input_tensor.dim(); + dim += inputTensor.dim(); } - if (out_tensor.dtype() == input_tensor.dtype()) { - DIOPI_CALLCNNL(cnnlGather(handle, dim, inputDesc.get(), input_tensor.data(), indexDesc.get(), index_tensor.data(), outDesc.get(), out_tensor.data())); + if (outTensor.dtype() == inputTensor.dtype()) { + DIOPI_CALLCNNL(cnnlGather(handle, dim, inputDesc.get(), inputTensor.data(), indexDesc.get(), indexTensor.data(), outDesc.get(), outTensor.data())); } else { - DiopiTensor out_temp = out_tensor; - DIOPI_CALL(dataTypeCast(ctx, out_temp, input_tensor.dtype())); - CnnlTensorDesc out_tempDesc(out_temp, CNNL_LAYOUT_ARRAY); + DiopiTensor outTemp = outTensor; + DIOPI_CALL(dataTypeCast(ctx, outTemp, inputTensor.dtype())); + CnnlTensorDesc outTempDesc(outTemp, CNNL_LAYOUT_ARRAY); DIOPI_CALLCNNL( - cnnlGather(handle, dim, inputDesc.get(), input_tensor.data(), indexDesc.get(), index_tensor.data(), out_tempDesc.get(), out_temp.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_temp)); + cnnlGather(handle, dim, inputDesc.get(), inputTensor.data(), indexDesc.get(), indexTensor.data(), outTempDesc.get(), outTemp.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTemp)); } return diopiSuccess; } -DIOPI_API diopiError_t diopiGatherBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, +DIOPI_API diopiError_t diopiGatherBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t input, int64_t dim, diopiConstTensorHandle_t index) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); diopiScalar_t zero = {diopi_dtype_float32, 0}; - DIOPI_CALL(diopiFill(ctx, grad_input, &zero)); + DIOPI_CALL(diopiFill(ctx, gradInput, &zero)); - DiopiTensor input_tensor(input); - if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); + DiopiTensor inputTensor(input); + if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); } - DiopiTensor index_tensor(index); - DIOPI_CALL(autoCastTensorType(ctx, {&index_tensor}, {diopi_dtype_int32, diopi_dtype_int64})); - DiopiTensor grad_input_tensor(grad_input); - DiopiTensor out_temp = grad_input_tensor; - if (out_temp.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, out_temp, diopi_dtype_float32)); + DiopiTensor indexTensor(index); + DIOPI_CALL(autoCastTensorType(ctx, {&indexTensor}, {diopi_dtype_int32, diopi_dtype_int64})); + DiopiTensor gradInputTensor(gradInput); + DiopiTensor outTemp = gradInputTensor; + if (outTemp.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, outTemp, diopi_dtype_float32)); } - DiopiTensor grad_output_tensor(grad_output); - if (grad_output_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, grad_output_tensor, diopi_dtype_float32)); + DiopiTensor gradOutputTensor(gradOutput); + if (gradOutputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, gradOutputTensor, diopi_dtype_float32)); } - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc indexDesc(index_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_tempDesc(out_temp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc grad_outputDesc(grad_output_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc indexDesc(indexTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outTempDesc(outTemp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc gradOutputDesc(gradOutputTensor, CNNL_LAYOUT_ARRAY); DIOPI_CALLCNNL(cnnlScatter(handle, dim, - out_tempDesc.get(), - out_temp.data(), + outTempDesc.get(), + outTemp.data(), indexDesc.get(), - index_tensor.data(), - grad_outputDesc.get(), - grad_output_tensor.data(), - out_tempDesc.get(), - out_temp.data(), + indexTensor.data(), + gradOutputDesc.get(), + gradOutputTensor.data(), + outTempDesc.get(), + outTemp.data(), CNNL_SCATTER_ADD)); - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor, out_temp)); + DIOPI_CALL(dataTypeCast(ctx, gradInputTensor, outTemp)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/hardtanh.cpp b/DIOPI-IMPL/camb/functions/hardtanh.cpp index d0a2c19ee..d640cc2db 100644 --- a/DIOPI-IMPL/camb/functions/hardtanh.cpp +++ b/DIOPI-IMPL/camb/functions/hardtanh.cpp @@ -14,93 +14,93 @@ namespace camb { extern "C" { -diopiError_t diopiHardtanh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* min_val, - const diopiScalar_t* max_val) { +diopiError_t diopiHardtanh(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* minVal, + const diopiScalar_t* maxVal) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); + DiopiTensor inputTensor(input); + if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); } - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - DiopiTensor out_tensor(out); - CnnlTensorDesc outDesc(out_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + DiopiTensor outTensor(out); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); - float min = min_val->fval; - float max = max_val->fval; + float min = minVal->fval; + float max = maxVal->fval; if (min > max) { min = max; } - if (out_tensor.dtype() == diopi_dtype_float64) { - DiopiTensor out32_tensor = requiresTensor(ctx, out_tensor.shape(), diopi_dtype_float32); - CnnlTensorDesc out32Desc(out32_tensor, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlHardtanh(handle, inputDesc.get(), input_tensor.data(), max, min, out32Desc.get(), out32_tensor.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out32_tensor)); + if (outTensor.dtype() == diopi_dtype_float64) { + DiopiTensor out32Tensor = requiresTensor(ctx, outTensor.shape(), diopi_dtype_float32); + CnnlTensorDesc out32Desc(out32Tensor, CNNL_LAYOUT_ARRAY); + DIOPI_CALLCNNL(cnnlHardtanh(handle, inputDesc.get(), inputTensor.data(), max, min, out32Desc.get(), out32Tensor.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, out32Tensor)); } else { - DIOPI_CALLCNNL(cnnlHardtanh(handle, inputDesc.get(), input_tensor.data(), max, min, outDesc.get(), out_tensor.data())); + DIOPI_CALLCNNL(cnnlHardtanh(handle, inputDesc.get(), inputTensor.data(), max, min, outDesc.get(), outTensor.data())); } return diopiSuccess; } -diopiError_t diopiHardtanhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* min_val, const diopiScalar_t* max_val) { +diopiError_t diopiHardtanhInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* minVal, const diopiScalar_t* maxVal) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); + DiopiTensor inputTensor(input); + if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); } - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - DiopiTensor out_tensor(input); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + DiopiTensor outTensor(input); - float min = min_val->fval; - float max = max_val->fval; + float min = minVal->fval; + float max = maxVal->fval; if (min > max) { min = max; } - if (out_tensor.dtype() == diopi_dtype_float64) { - DiopiTensor out32_tensor = requiresTensor(ctx, input_tensor.shape(), diopi_dtype_float32); - CnnlTensorDesc out32Desc(out32_tensor, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlHardtanh(handle, inputDesc.get(), input_tensor.data(), max, min, out32Desc.get(), out32_tensor.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out32_tensor)); + if (outTensor.dtype() == diopi_dtype_float64) { + DiopiTensor out32Tensor = requiresTensor(ctx, inputTensor.shape(), diopi_dtype_float32); + CnnlTensorDesc out32Desc(out32Tensor, CNNL_LAYOUT_ARRAY); + DIOPI_CALLCNNL(cnnlHardtanh(handle, inputDesc.get(), inputTensor.data(), max, min, out32Desc.get(), out32Tensor.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, out32Tensor)); } else { - DIOPI_CALLCNNL(cnnlHardtanh(handle, inputDesc.get(), input_tensor.data(), max, min, inputDesc.get(), input_tensor.data())); + DIOPI_CALLCNNL(cnnlHardtanh(handle, inputDesc.get(), inputTensor.data(), max, min, inputDesc.get(), inputTensor.data())); } return diopiSuccess; } -diopiError_t diopiHardtanhBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, - diopiConstTensorHandle_t input, const diopiScalar_t* min_val, const diopiScalar_t* max_val) { +diopiError_t diopiHardtanhBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, + diopiConstTensorHandle_t input, const diopiScalar_t* minVal, const diopiScalar_t* maxVal) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); + DiopiTensor inputTensor(input); + if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); } - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - DiopiTensor grad_out_tensor(grad_output); - if (grad_out_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, grad_out_tensor, diopi_dtype_float32)); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + DiopiTensor gradOutTensor(gradOutput); + if (gradOutTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, gradOutTensor, diopi_dtype_float32)); } - CnnlTensorDesc gradoutDesc(grad_out_tensor, CNNL_LAYOUT_ARRAY); - DiopiTensor grad_in_tensor(grad_input); - CnnlTensorDesc gradinDesc(grad_in_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc gradoutDesc(gradOutTensor, CNNL_LAYOUT_ARRAY); + DiopiTensor gradInTensor(gradInput); + CnnlTensorDesc gradinDesc(gradInTensor, CNNL_LAYOUT_ARRAY); - float min = min_val->fval; - float max = max_val->fval; + float min = minVal->fval; + float max = maxVal->fval; if (min > max) { min = max; } - if (grad_in_tensor.dtype() == diopi_dtype_float64) { - DiopiTensor grad_in32_tensor = requiresTensor(ctx, grad_in_tensor.shape(), diopi_dtype_float32); - CnnlTensorDesc gradin32Desc(grad_in32_tensor, CNNL_LAYOUT_ARRAY); + if (gradInTensor.dtype() == diopi_dtype_float64) { + DiopiTensor gradIn32Tensor = requiresTensor(ctx, gradInTensor.shape(), diopi_dtype_float32); + CnnlTensorDesc gradin32Desc(gradIn32Tensor, CNNL_LAYOUT_ARRAY); DIOPI_CALLCNNL(cnnlHardtanhBackward( - handle, inputDesc.get(), input_tensor.data(), gradoutDesc.get(), grad_out_tensor.data(), max, min, gradin32Desc.get(), grad_in32_tensor.data())); - DIOPI_CALL(dataTypeCast(ctx, grad_in_tensor, grad_in32_tensor)); + handle, inputDesc.get(), inputTensor.data(), gradoutDesc.get(), gradOutTensor.data(), max, min, gradin32Desc.get(), gradIn32Tensor.data())); + DIOPI_CALL(dataTypeCast(ctx, gradInTensor, gradIn32Tensor)); } else { DIOPI_CALLCNNL(cnnlHardtanhBackward( - handle, inputDesc.get(), input_tensor.data(), gradoutDesc.get(), grad_out_tensor.data(), max, min, gradinDesc.get(), grad_in_tensor.data())); + handle, inputDesc.get(), inputTensor.data(), gradoutDesc.get(), gradOutTensor.data(), max, min, gradinDesc.get(), gradInTensor.data())); } return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/layernorm.cpp b/DIOPI-IMPL/camb/functions/layernorm.cpp index 58ea98154..5d6542515 100644 --- a/DIOPI-IMPL/camb/functions/layernorm.cpp +++ b/DIOPI-IMPL/camb/functions/layernorm.cpp @@ -8,176 +8,176 @@ namespace camb { extern "C" { -diopiError_t diopiLayerNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t save_mean, diopiTensorHandle_t save_invstd, - diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiSize_t normalized_shape, +diopiError_t diopiLayerNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t saveMean, diopiTensorHandle_t saveInvstd, + diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias, diopiSize_t normalizedShape, double eps) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); - DiopiTensor save_mean_tensor(save_mean); - DiopiTensor save_invstd_tensor(save_invstd); - - diopiDtype_t out_dtype = out_tensor.dtype(); - if (out_dtype != diopi_dtype_float32 && out_dtype != diopi_dtype_float16) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, save_mean_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, save_invstd_tensor, diopi_dtype_float32)); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); + DiopiTensor saveMeanTensor(saveMean); + DiopiTensor saveInvstdTensor(saveInvstd); + + diopiDtype_t outDtype = outTensor.dtype(); + if (outDtype != diopi_dtype_float32 && outDtype != diopi_dtype_float16) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, outTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, saveMeanTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, saveInvstdTensor, diopi_dtype_float32)); } - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc outDesc(out_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc save_meanDesc(save_mean_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc saveMeanDesc(saveMeanTensor, CNNL_LAYOUT_ARRAY); - size_t workspace_size(0); - DIOPI_CALLCNNL(cnnlGetLayerNormOpWorkspaceSize(handle, normalized_shape.len, inputDesc.get(), &workspace_size)); + size_t workspaceSize(0); + DIOPI_CALLCNNL(cnnlGetLayerNormOpWorkspaceSize(handle, normalizedShape.len, inputDesc.get(), &workspaceSize)); void *workspace = nullptr; - if (workspace_size > 0) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (workspaceSize > 0) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } - void *weight_ptr = nullptr; - void *bias_ptr = nullptr; - CnnlTensorDesc weight_biasDesc; - cnnlTensorDescriptor_t weight_bias_desc = nullptr; + void *weightPtr = nullptr; + void *biasPtr = nullptr; + CnnlTensorDesc weightBiasDesc; + cnnlTensorDescriptor_t weightBiasDescTmp = nullptr; if (weight != nullptr && bias != nullptr) { - DiopiTensor weight_tensor(weight); - DiopiTensor bias_tensor(bias); - if (out_dtype != diopi_dtype_float32 && out_dtype != diopi_dtype_float16) { - DIOPI_CALL(dataTypeCast(ctx, weight_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, bias_tensor, diopi_dtype_float32)); + DiopiTensor weightTensor(weight); + DiopiTensor biasTensor(bias); + if (outDtype != diopi_dtype_float32 && outDtype != diopi_dtype_float16) { + DIOPI_CALL(dataTypeCast(ctx, weightTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, biasTensor, diopi_dtype_float32)); } - weight_ptr = weight_tensor.data(); - bias_ptr = bias_tensor.data(); - weight_biasDesc.set(weight_tensor, CNNL_LAYOUT_ARRAY); - weight_bias_desc = weight_biasDesc.get(); + weightPtr = weightTensor.data(); + biasPtr = biasTensor.data(); + weightBiasDesc.set(weightTensor, CNNL_LAYOUT_ARRAY); + weightBiasDescTmp = weightBiasDesc.get(); } - int axis = input_tensor.dim() - normalized_shape.len; + int axis = inputTensor.dim() - normalizedShape.len; DIOPI_CALLCNNL(cnnlLayerNormForward(handle, inputDesc.get(), - input_tensor.data(), + inputTensor.data(), axis, - weight_bias_desc, - weight_ptr, - bias_ptr, + weightBiasDescTmp, + weightPtr, + biasPtr, eps, workspace, - workspace_size, + workspaceSize, outDesc.get(), - out_tensor.data(), - save_meanDesc.get(), - save_mean_tensor.data(), - save_invstd_tensor.data())); - - if (out_dtype != diopi_dtype_float32 && out_dtype != diopi_dtype_float16) { - DiopiTensor out_tensor_(out); - DiopiTensor save_mean_tensor_(save_mean); - DiopiTensor save_invstd_tensor_(save_invstd); - DIOPI_CALL(dataTypeCast(ctx, out_tensor_, out_tensor)); - DIOPI_CALL(dataTypeCast(ctx, save_mean_tensor_, save_mean_tensor)); - DIOPI_CALL(dataTypeCast(ctx, save_invstd_tensor_, save_invstd_tensor)); + outTensor.data(), + saveMeanDesc.get(), + saveMeanTensor.data(), + saveInvstdTensor.data())); + + if (outDtype != diopi_dtype_float32 && outDtype != diopi_dtype_float16) { + DiopiTensor outTensorTmp(out); + DiopiTensor saveMeanTensorTmp(saveMean); + DiopiTensor saveInvstdTensorTmp(saveInvstd); + DIOPI_CALL(dataTypeCast(ctx, outTensorTmp, outTensor)); + DIOPI_CALL(dataTypeCast(ctx, saveMeanTensorTmp, saveMeanTensor)); + DIOPI_CALL(dataTypeCast(ctx, saveInvstdTensorTmp, saveInvstdTensor)); } return diopiSuccess; } -diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, diopiTensorHandle_t grad_bias, - diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, - diopiConstTensorHandle_t bias, diopiConstTensorHandle_t mean, diopiConstTensorHandle_t rstd, diopiSize_t normalized_shape) { +diopiError_t diopiLayerNormBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiTensorHandle_t gradWeight, diopiTensorHandle_t gradBias, + diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, + diopiConstTensorHandle_t bias, diopiConstTensorHandle_t mean, diopiConstTensorHandle_t rstd, diopiSize_t normalizedShape) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor grad_input_tensor(grad_input); - DiopiTensor grad_output_tensor(grad_output); - DiopiTensor input_tensor(input); - DiopiTensor mean_tensor(mean); - DiopiTensor rstd_tensor(rstd); - DiopiTensor weight_tensor(weight); - DiopiTensor bias_tensor(bias); - DiopiTensor grad_weight_tensor(grad_weight); - DiopiTensor grad_bias_tensor(grad_bias); - - diopiDtype_t out_dtype = grad_input_tensor.dtype(); - if (out_dtype != diopi_dtype_float16 && out_dtype != diopi_dtype_float32) { - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, grad_output_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, mean_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, rstd_tensor, diopi_dtype_float32)); + DiopiTensor gradInputTensor(gradInput); + DiopiTensor gradOutputTensor(gradOutput); + DiopiTensor inputTensor(input); + DiopiTensor meanTensor(mean); + DiopiTensor rstdTensor(rstd); + DiopiTensor weightTensor(weight); + DiopiTensor biasTensor(bias); + DiopiTensor gradWeightTensor(gradWeight); + DiopiTensor gradBiasTensor(gradBias); + + diopiDtype_t outDtype = gradInputTensor.dtype(); + if (outDtype != diopi_dtype_float16 && outDtype != diopi_dtype_float32) { + DIOPI_CALL(dataTypeCast(ctx, gradInputTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, gradOutputTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, meanTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, rstdTensor, diopi_dtype_float32)); } - CnnlTensorDesc grad_inputDesc(grad_input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc grad_outputDesc(grad_output_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc meanDesc(mean_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc gradInputDesc(gradInputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc gradOutputDesc(gradOutputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc meanDesc(meanTensor, CNNL_LAYOUT_ARRAY); - void *weight_ptr = nullptr; - CnnlTensorDesc weight_biasDesc; - cnnlTensorDescriptor_t weight_bias_desc = nullptr; - void *grad_weight_ptr = nullptr; - void *grad_bias_ptr = nullptr; + void *weightPtr = nullptr; + CnnlTensorDesc weightBiasDesc; + cnnlTensorDescriptor_t weightBiasDescTmp = nullptr; + void *gradWeightPtr = nullptr; + void *gradBiasPtr = nullptr; if (weight != nullptr && bias != nullptr) { - if (out_dtype != diopi_dtype_float16 && out_dtype != diopi_dtype_float32) { - DIOPI_CALL(dataTypeCast(ctx, weight_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, grad_weight_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, grad_bias_tensor, diopi_dtype_float32)); + if (outDtype != diopi_dtype_float16 && outDtype != diopi_dtype_float32) { + DIOPI_CALL(dataTypeCast(ctx, weightTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, gradWeightTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, gradBiasTensor, diopi_dtype_float32)); } - weight_ptr = weight_tensor.data(); - grad_weight_ptr = grad_weight_tensor.data(); - grad_bias_ptr = grad_bias_tensor.data(); - weight_biasDesc.set(weight_tensor, CNNL_LAYOUT_ARRAY); - weight_bias_desc = weight_biasDesc.get(); + weightPtr = weightTensor.data(); + gradWeightPtr = gradWeightTensor.data(); + gradBiasPtr = gradBiasTensor.data(); + weightBiasDesc.set(weightTensor, CNNL_LAYOUT_ARRAY); + weightBiasDescTmp = weightBiasDesc.get(); } else { - weight_tensor = requiresTensor(ctx, normalized_shape, input_tensor.dtype()); - grad_weight_tensor = requiresTensor(ctx, normalized_shape, input_tensor.dtype()); - grad_bias_tensor = requiresTensor(ctx, normalized_shape, input_tensor.dtype()); + weightTensor = requiresTensor(ctx, normalizedShape, inputTensor.dtype()); + gradWeightTensor = requiresTensor(ctx, normalizedShape, inputTensor.dtype()); + gradBiasTensor = requiresTensor(ctx, normalizedShape, inputTensor.dtype()); diopiScalar_t one = {diopi_dtype_float32, 1}; diopiScalar_t zero = {diopi_dtype_float32, 0}; - DIOPI_CALL(diopiFill(ctx, diopiTensorHandle_t(weight_tensor), &one)); - DIOPI_CALL(diopiFill(ctx, diopiTensorHandle_t(grad_weight_tensor), &zero)); - DIOPI_CALL(diopiFill(ctx, diopiTensorHandle_t(grad_bias_tensor), &zero)); - weight_ptr = weight_tensor.data(); - weight_biasDesc.set(weight_tensor, CNNL_LAYOUT_ARRAY); - weight_bias_desc = weight_biasDesc.get(); - grad_weight_ptr = grad_weight_tensor.data(); - grad_bias_ptr = grad_bias_tensor.data(); + DIOPI_CALL(diopiFill(ctx, diopiTensorHandle_t(weightTensor), &one)); + DIOPI_CALL(diopiFill(ctx, diopiTensorHandle_t(gradWeightTensor), &zero)); + DIOPI_CALL(diopiFill(ctx, diopiTensorHandle_t(gradBiasTensor), &zero)); + weightPtr = weightTensor.data(); + weightBiasDesc.set(weightTensor, CNNL_LAYOUT_ARRAY); + weightBiasDescTmp = weightBiasDesc.get(); + gradWeightPtr = gradWeightTensor.data(); + gradBiasPtr = gradBiasTensor.data(); } - int axis = input_tensor.dim() - normalized_shape.len; + int axis = inputTensor.dim() - normalizedShape.len; - size_t workspace_size(0); - DIOPI_CALLCNNL(cnnlGetLayerNormBackwardWorkspaceSize(handle, inputDesc.get(), axis, &workspace_size)); + size_t workspaceSize(0); + DIOPI_CALLCNNL(cnnlGetLayerNormBackwardWorkspaceSize(handle, inputDesc.get(), axis, &workspaceSize)); void *workspace; - if (workspace_size > 0) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (workspaceSize > 0) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } DIOPI_CALLCNNL(cnnlLayerNormBackward_v2(handle, inputDesc.get(), - input_tensor.data(), + inputTensor.data(), axis, - grad_outputDesc.get(), - grad_output_tensor.data(), - weight_bias_desc, - weight_ptr, + gradOutputDesc.get(), + gradOutputTensor.data(), + weightBiasDescTmp, + weightPtr, meanDesc.get(), - mean_tensor.data(), - rstd_tensor.data(), + meanTensor.data(), + rstdTensor.data(), workspace, - workspace_size, - grad_inputDesc.get(), - grad_input_tensor.data(), - grad_weight_ptr, - grad_bias_ptr)); - if (out_dtype != diopi_dtype_float16 && out_dtype != diopi_dtype_float32) { - DiopiTensor grad_input_tensor_(grad_input); - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor_, grad_input_tensor)); - if (grad_bias != nullptr && grad_weight != nullptr) { - DiopiTensor grad_weight_tensor_(grad_weight); - DiopiTensor grad_bias_tensor_(grad_bias); - DIOPI_CALL(dataTypeCast(ctx, grad_weight_tensor_, grad_weight_tensor)); - DIOPI_CALL(dataTypeCast(ctx, grad_bias_tensor_, grad_bias_tensor)); + workspaceSize, + gradInputDesc.get(), + gradInputTensor.data(), + gradWeightPtr, + gradBiasPtr)); + if (outDtype != diopi_dtype_float16 && outDtype != diopi_dtype_float32) { + DiopiTensor gradInputTensorTmp(gradInput); + DIOPI_CALL(dataTypeCast(ctx, gradInputTensorTmp, gradInputTensor)); + if (gradBias != nullptr && gradWeight != nullptr) { + DiopiTensor gradWeightTensorTmp(gradWeight); + DiopiTensor gradBiasTensorTmp(gradBias); + DIOPI_CALL(dataTypeCast(ctx, gradWeightTensorTmp, gradWeightTensor)); + DIOPI_CALL(dataTypeCast(ctx, gradBiasTensorTmp, gradBiasTensor)); } } return diopiSuccess; diff --git a/DIOPI-IMPL/camb/functions/linear.cpp b/DIOPI-IMPL/camb/functions/linear.cpp index b9a50e3ff..b0696ce97 100644 --- a/DIOPI-IMPL/camb/functions/linear.cpp +++ b/DIOPI-IMPL/camb/functions/linear.cpp @@ -16,109 +16,109 @@ namespace impl { namespace camb { namespace { -diopiError_t flatten_to_2d(std::vector in_dims, std::vector& out_dims) { - out_dims.resize(2); - if (in_dims.size() >= 2) { - out_dims[0] = std::accumulate(in_dims.begin(), in_dims.end() - 1, 1, std::multiplies()); - out_dims[1] = in_dims[in_dims.size() - 1]; +diopiError_t flattenTo2d(std::vector inDims, std::vector& outDims) { + outDims.resize(2); + if (inDims.size() >= 2) { + outDims[0] = std::accumulate(inDims.begin(), inDims.end() - 1, 1, std::multiplies<>()); + outDims[1] = inDims[inDims.size() - 1]; } else { return diopiErrorOccurred; } return diopiSuccess; } -diopiError_t matmul(diopiContextHandle_t ctx, DiopiTensor input_a, DiopiTensor input_b, DiopiTensor input_bias, DiopiTensor output, bool trans_a, - bool trans_b) { +diopiError_t matmul(diopiContextHandle_t ctx, DiopiTensor inputA, DiopiTensor inputB, DiopiTensor inputBias, DiopiTensor output, bool transA, + bool transB) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - std::vector input_shape, weight_shape, output_shape; - DIOPI_CALL(flatten_to_2d(input_a.shape(), input_shape)); - DIOPI_CALL(flatten_to_2d(input_b.shape(), weight_shape)); - DIOPI_CALL(flatten_to_2d(output.shape(), output_shape)); + std::vector inputShape, weightShape, outputShape; + DIOPI_CALL(flattenTo2d(inputA.shape(), inputShape)); + DIOPI_CALL(flattenTo2d(inputB.shape(), weightShape)); + DIOPI_CALL(flattenTo2d(output.shape(), outputShape)); - CnnlTensorDesc a_desc, b_desc, bias_desc, output_desc; - DIOPI_CALL(a_desc.set(input_a, CNNL_LAYOUT_ARRAY, input_shape)); - DIOPI_CALL(b_desc.set(input_b, CNNL_LAYOUT_ARRAY, weight_shape)); - DIOPI_CALL(output_desc.set(output, CNNL_LAYOUT_ARRAY, output_shape)); + CnnlTensorDesc aDesc, bDesc, biasDesc, outputDesc; + DIOPI_CALL(aDesc.set(inputA, CNNL_LAYOUT_ARRAY, inputShape)); + DIOPI_CALL(bDesc.set(inputB, CNNL_LAYOUT_ARRAY, weightShape)); + DIOPI_CALL(outputDesc.set(output, CNNL_LAYOUT_ARRAY, outputShape)); - CnnlResourceGuard matmul_desc; + CnnlResourceGuard matmulDesc; - cnnlDataType_t comp_type; + cnnlDataType_t compType; if (output.dtype() == diopi_dtype_float32) { - comp_type = CNNL_DTYPE_FLOAT; + compType = CNNL_DTYPE_FLOAT; } else if (output.dtype() == diopi_dtype_float16) { - comp_type = CNNL_DTYPE_HALF; + compType = CNNL_DTYPE_HALF; } else { - set_last_error_string("%s", "matmul on support float or half."); + setLastErrorString("%s", "matmul on support float or half."); return diopiDtypeNotSupported; } - DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmul_desc.get(), CNNL_MATMUL_DESC_COMPUTE_TYPE, &(comp_type), sizeof(cnnlDataType_t))); + DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmulDesc.get(), CNNL_MATMUL_DESC_COMPUTE_TYPE, &(compType), sizeof(cnnlDataType_t))); - int32_t is_transa = 0; - if (trans_a) { - is_transa = 1; + int32_t isTransa = 0; + if (transA) { + isTransa = 1; } - DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmul_desc.get(), CNNL_MATMUL_DESC_TRANSA, &(is_transa), sizeof(int32_t))); + DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmulDesc.get(), CNNL_MATMUL_DESC_TRANSA, &(isTransa), sizeof(int32_t))); - int32_t is_transb = 0; - if (trans_b) { - is_transb = 1; + int32_t isTransb = 0; + if (transB) { + isTransb = 1; } - DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmul_desc.get(), CNNL_MATMUL_DESC_TRANSB, &(is_transb), sizeof(int32_t))); + DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmulDesc.get(), CNNL_MATMUL_DESC_TRANSB, &(isTransb), sizeof(int32_t))); - int32_t allow_tf32_i32 = 0; - DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmul_desc.get(), CNNL_MATMUL_ALLOW_TF32, &(allow_tf32_i32), sizeof(int32_t))); + int32_t allowTf32I32 = 0; + DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmulDesc.get(), CNNL_MATMUL_ALLOW_TF32, &(allowTf32I32), sizeof(int32_t))); - int32_t use_beta = 0; + int32_t useBeta = 0; float beta = 0.0; - void* bias_ptr = nullptr; - if (input_bias.defined()) { - use_beta = 1; + void* biasPtr = nullptr; + if (inputBias.defined()) { + useBeta = 1; beta = 1.0; - bias_ptr = input_bias.data(); - DIOPI_CALL(bias_desc.set(input_bias, CNNL_LAYOUT_ARRAY)); - DIOPI_CALLCNNL(cnnlExpand(handle, bias_desc.get(), input_bias.data(), output_desc.get(), output.data())); + biasPtr = inputBias.data(); + DIOPI_CALL(biasDesc.set(inputBias, CNNL_LAYOUT_ARRAY)); + DIOPI_CALLCNNL(cnnlExpand(handle, biasDesc.get(), inputBias.data(), outputDesc.get(), output.data())); } - DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmul_desc.get(), CNNL_MATMUL_USE_BETA, &(use_beta), sizeof(int32_t))); + DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmulDesc.get(), CNNL_MATMUL_USE_BETA, &(useBeta), sizeof(int32_t))); - size_t workspace_size = 0; + size_t workspaceSize = 0; int requestedAlgoCount = 1; int returnAlgoCount = 0; - CnnlResourceGuard heuristic_result; + CnnlResourceGuard heuristicResult; CnnlResourceGuard algo; DIOPI_CALLCNNL(cnnlGetMatMulAlgoHeuristic(handle, - matmul_desc.get(), - a_desc.get(), - b_desc.get(), - output_desc.get(), - output_desc.get(), + matmulDesc.get(), + aDesc.get(), + bDesc.get(), + outputDesc.get(), + outputDesc.get(), nullptr, requestedAlgoCount, - &heuristic_result.get(), + &heuristicResult.get(), &returnAlgoCount)); - DIOPI_CALLCNNL(cnnlGetMatMulHeuristicResult(heuristic_result.get(), algo.get(), &workspace_size)); + DIOPI_CALLCNNL(cnnlGetMatMulHeuristicResult(heuristicResult.get(), algo.get(), &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } - float alpha_default = 1.0; + float alphaDefault = 1.0; DIOPI_CALLCNNL(cnnlMatMul_v2(handle, - matmul_desc.get(), + matmulDesc.get(), algo.get(), - &alpha_default, - a_desc.get(), - input_a.data(), - b_desc.get(), - input_b.data(), + &alphaDefault, + aDesc.get(), + inputA.data(), + bDesc.get(), + inputB.data(), &beta, - output_desc.get(), + outputDesc.get(), output.data(), workspace, - workspace_size, - output_desc.get(), + workspaceSize, + outputDesc.get(), output.data())); return diopiSuccess; @@ -128,81 +128,81 @@ diopiError_t matmul(diopiContextHandle_t ctx, DiopiTensor input_a, DiopiTensor i extern "C" diopiError_t diopiLinear(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight, diopiConstTensorHandle_t bias) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor weight_tensor(weight); - DiopiTensor bias_tensor(bias); - DiopiTensor output_tensor(out); - DiopiTensor out_temp = output_tensor; - - if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, out_temp, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, weight_tensor, diopi_dtype_float32)); + DiopiTensor inputTensor(input); + DiopiTensor weightTensor(weight); + DiopiTensor biasTensor(bias); + DiopiTensor outputTensor(out); + DiopiTensor outTemp = outputTensor; + + if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, outTemp, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, weightTensor, diopi_dtype_float32)); if (bias != nullptr) { - DIOPI_CALL(dataTypeCast(ctx, bias_tensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, biasTensor, diopi_dtype_float32)); } } - DIOPI_CALL(matmul(ctx, input_tensor, weight_tensor, bias_tensor, out_temp, false, true)); - if (out_temp.dtype() != output_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, output_tensor, out_temp)); + DIOPI_CALL(matmul(ctx, inputTensor, weightTensor, biasTensor, outTemp, false, true)); + if (outTemp.dtype() != outputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outputTensor, outTemp)); } return diopiSuccess; } -extern "C" diopiError_t diopiLinearBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiTensorHandle_t grad_weight, - diopiTensorHandle_t grad_bias, diopiConstTensorHandle_t grad_output, diopiConstTensorHandle_t input, +extern "C" diopiError_t diopiLinearBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiTensorHandle_t gradWeight, + diopiTensorHandle_t gradBias, diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t input, diopiConstTensorHandle_t weight) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor grad_input_tensor(grad_input); - DiopiTensor grad_weight_tensor(grad_weight); - DiopiTensor grad_output_tensor(grad_output); - DiopiTensor input_tensor(input); - DiopiTensor weight_tensor(weight); - DiopiTensor grad_input_temp = grad_input_tensor; - DiopiTensor grad_weight_temp = grad_weight_tensor; - if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, grad_input_temp, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, grad_weight_temp, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, grad_output_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, weight_tensor, diopi_dtype_float32)); + DiopiTensor gradInputTensor(gradInput); + DiopiTensor gradWeightTensor(gradWeight); + DiopiTensor gradOutputTensor(gradOutput); + DiopiTensor inputTensor(input); + DiopiTensor weightTensor(weight); + DiopiTensor gradInputTemp = gradInputTensor; + DiopiTensor gradWeightTemp = gradWeightTensor; + if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, gradInputTemp, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, gradWeightTemp, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, gradOutputTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, weightTensor, diopi_dtype_float32)); } - DiopiTensor bias_tensor((diopiTensorHandle_t) nullptr); + DiopiTensor biasTensor((diopiTensorHandle_t) nullptr); - DIOPI_CALL(matmul(ctx, grad_output_tensor, input_tensor, bias_tensor, grad_weight_temp, true, false)); - if (grad_weight_temp.dtype() != grad_weight_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, grad_weight_tensor, grad_weight_temp)); + DIOPI_CALL(matmul(ctx, gradOutputTensor, inputTensor, biasTensor, gradWeightTemp, true, false)); + if (gradWeightTemp.dtype() != gradWeightTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, gradWeightTensor, gradWeightTemp)); } - DIOPI_CALL(matmul(ctx, grad_output_tensor, weight_tensor, bias_tensor, grad_input_temp, false, false)); - if (grad_input_temp.dtype() != grad_input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor, grad_input_temp)); + DIOPI_CALL(matmul(ctx, gradOutputTensor, weightTensor, biasTensor, gradInputTemp, false, false)); + if (gradInputTemp.dtype() != gradInputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, gradInputTensor, gradInputTemp)); } - if (grad_bias != nullptr) { - DiopiTensor bias_grad_tensor(grad_bias); - DiopiTensor bias_grad_temp = bias_grad_tensor; - if (bias_grad_temp.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, bias_grad_temp, diopi_dtype_float32)); + if (gradBias != nullptr) { + DiopiTensor biasGradTensor(gradBias); + DiopiTensor biasGradTemp = biasGradTensor; + if (biasGradTemp.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, biasGradTemp, diopi_dtype_float32)); } - CnnlTensorDesc bias_grad_desc; - DIOPI_CALL(bias_grad_desc.set(bias_grad_temp, CNNL_LAYOUT_ARRAY)); + CnnlTensorDesc biasGradDesc; + DIOPI_CALL(biasGradDesc.set(biasGradTemp, CNNL_LAYOUT_ARRAY)); - std::vector output_shape; - DIOPI_CALL(flatten_to_2d(grad_output_tensor.shape(), output_shape)); - CnnlTensorDesc grad_output_desc; - DIOPI_CALL(grad_output_desc.set(grad_output_tensor, CNNL_LAYOUT_ARRAY, output_shape)); + std::vector outputShape; + DIOPI_CALL(flattenTo2d(gradOutputTensor.shape(), outputShape)); + CnnlTensorDesc gradOutputDesc; + DIOPI_CALL(gradOutputDesc.set(gradOutputTensor, CNNL_LAYOUT_ARRAY, outputShape)); - size_t workspace_size_bias; - DIOPI_CALLCNNL(cnnlGetBiasAddBackwardWorkspaceSize(handle, grad_output_desc.get(), bias_grad_desc.get(), 3, &workspace_size_bias)) + size_t workspaceSizeBias; + DIOPI_CALLCNNL(cnnlGetBiasAddBackwardWorkspaceSize(handle, gradOutputDesc.get(), biasGradDesc.get(), 3, &workspaceSizeBias)) - void* workspace_bias = nullptr; - if (0 != workspace_size_bias) { - workspace_bias = requiresBuffer(ctx, workspace_size_bias).data(); + void* workspaceBias = nullptr; + if (0 != workspaceSizeBias) { + workspaceBias = requiresBuffer(ctx, workspaceSizeBias).data(); } DIOPI_CALLCNNL(cnnlBiasAddBackward_v2( - handle, grad_output_desc.get(), grad_output_tensor.data(), 1, bias_grad_desc.get(), bias_grad_temp.data(), workspace_bias, workspace_size_bias)); - if (bias_grad_tensor.dtype() != bias_grad_temp.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, bias_grad_tensor, bias_grad_temp)); + handle, gradOutputDesc.get(), gradOutputTensor.data(), 1, biasGradDesc.get(), biasGradTemp.data(), workspaceBias, workspaceSizeBias)); + if (biasGradTensor.dtype() != biasGradTemp.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, biasGradTensor, biasGradTemp)); } } diff --git a/DIOPI-IMPL/camb/functions/linspace.cpp b/DIOPI-IMPL/camb/functions/linspace.cpp index 0c8bf0093..446ad9b67 100644 --- a/DIOPI-IMPL/camb/functions/linspace.cpp +++ b/DIOPI-IMPL/camb/functions/linspace.cpp @@ -5,32 +5,32 @@ namespace camb { extern "C" diopiError_t diopiLinspace(diopiContextHandle_t ctx, diopiTensorHandle_t out, const diopiScalar_t* start, const diopiScalar_t* end, int64_t steps) { auto handle = cnnlHandlePool.get(ctx); - DiopiTensor out_tensor(out); + DiopiTensor outTensor(out); - float start_value, end_value; + float startValue, endValue; - cnnlDataType_t start_type, end_type; - DIOPI_CALL(CnnlDataType::convertToCnnlType(&start_type, start->stype)); - DIOPI_CALL(CnnlDataType::convertToCnnlType(&end_type, end->stype)); + cnnlDataType_t startType, endType; + DIOPI_CALL(CnnlDataType::convertToCnnlType(&startType, start->stype)); + DIOPI_CALL(CnnlDataType::convertToCnnlType(&endType, end->stype)); - if (CnnlDataType::isFloatPoint(start_type)) { - start_value = start->fval; - } else if (CnnlDataType::isInteger(start_type)) { - start_value = start->ival; + if (CnnlDataType::isFloatPoint(startType)) { + startValue = start->fval; + } else if (CnnlDataType::isInteger(startType)) { + startValue = start->ival; } else { return diopiDtypeNotSupported; } - if (CnnlDataType::isFloatPoint(end_type)) { - end_value = end->fval; - } else if (CnnlDataType::isInteger(start_type)) { - end_value = end->ival; + if (CnnlDataType::isFloatPoint(endType)) { + endValue = end->fval; + } else if (CnnlDataType::isInteger(startType)) { + endValue = end->ival; } else { return diopiDtypeNotSupported; } - CnnlTensorDesc out_desc(out_tensor, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlLinspace(handle, start_value, end_value, out_desc.get(), out_tensor.data())); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); + DIOPI_CALLCNNL(cnnlLinspace(handle, startValue, endValue, outDesc.get(), outTensor.data())); return diopiSuccess; } } // namespace camb diff --git a/DIOPI-IMPL/camb/functions/log.cpp b/DIOPI-IMPL/camb/functions/log.cpp index 39cf7be85..44a5db173 100644 --- a/DIOPI-IMPL/camb/functions/log.cpp +++ b/DIOPI-IMPL/camb/functions/log.cpp @@ -1,66 +1,68 @@ #include -#include + +#include #include + #include "../cnnl_helper.hpp" #include "../common/common.hpp" namespace impl { namespace camb { -extern "C" { -diopiError_t Log(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, cnnlLogBase_t log_base) { +diopiError_t logInternal(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, cnnlLogBase_t logBase) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); - std::vector pTensors{&input_tensor}; + std::vector pTensors{&inputTensor}; std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor input_tensor_tmp = *pTensors[0]; - DiopiTensor out_tensor_tmp = out_tensor; - DIOPI_CALL(dataTypeCast(ctx, out_tensor_tmp, input_tensor_tmp.dtype())); + DiopiTensor inputTensorTmp = *pTensors[0]; + DiopiTensor outTensorTmp = outTensor; + DIOPI_CALL(dataTypeCast(ctx, outTensorTmp, inputTensorTmp.dtype())); - CnnlTensorDesc input_desc(input_tensor_tmp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor_tmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensorTmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensorTmp, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlLog(handle, log_base, input_desc.get(), input_tensor_tmp.data(), out_desc.get(), out_tensor_tmp.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_tmp)); + DIOPI_CALLCNNL(cnnlLog(handle, logBase, inputDesc.get(), inputTensorTmp.data(), outDesc.get(), outTensorTmp.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTmp)); return diopiSuccess; } -diopiError_t LogInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, cnnlLogBase_t log_base) { - DIOPI_CALL(Log(ctx, input, input, log_base)); +diopiError_t logInpInternal(diopiContextHandle_t ctx, diopiTensorHandle_t input, cnnlLogBase_t logBase) { + DIOPI_CALL(logInternal(ctx, input, input, logBase)); return diopiSuccess; } +extern "C" { diopiError_t diopiLogInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { - DIOPI_CALL(LogInp(ctx, input, CNNL_LOG_E)); + DIOPI_CALL(logInpInternal(ctx, input, CNNL_LOG_E)); return diopiSuccess; } diopiError_t diopiLog(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { - DIOPI_CALL(Log(ctx, out, input, CNNL_LOG_E)); + DIOPI_CALL(logInternal(ctx, out, input, CNNL_LOG_E)); return diopiSuccess; } diopiError_t diopiLog2Inp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { - DIOPI_CALL(LogInp(ctx, input, CNNL_LOG_2)); + DIOPI_CALL(logInpInternal(ctx, input, CNNL_LOG_2)); return diopiSuccess; } diopiError_t diopiLog2(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { - DIOPI_CALL(Log(ctx, out, input, CNNL_LOG_2)); + DIOPI_CALL(logInternal(ctx, out, input, CNNL_LOG_2)); return diopiSuccess; } diopiError_t diopiLog10Inp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { - DIOPI_CALL(LogInp(ctx, input, CNNL_LOG_10)); + DIOPI_CALL(logInpInternal(ctx, input, CNNL_LOG_10)); return diopiSuccess; } diopiError_t diopiLog10(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { - DIOPI_CALL(Log(ctx, out, input, CNNL_LOG_10)); + DIOPI_CALL(logInternal(ctx, out, input, CNNL_LOG_10)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/logic.cpp b/DIOPI-IMPL/camb/functions/logic.cpp index e6a52dfda..3eca1987b 100644 --- a/DIOPI-IMPL/camb/functions/logic.cpp +++ b/DIOPI-IMPL/camb/functions/logic.cpp @@ -5,8 +5,8 @@ */ #include -#include +#include #include #include "../cnnl_helper.hpp" @@ -17,263 +17,263 @@ namespace camb { extern "C" { -diopiError_t Logic(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other, - cnnlLogicOp_t logic_op) { +diopiError_t logic(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other, + cnnlLogicOp_t logicOp) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor other_tensor(other); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor otherTensor(other); + DiopiTensor outTensor(out); - std::vector pTensors{&input_tensor, &other_tensor}; + std::vector pTensors{&inputTensor, &otherTensor}; std::set supportedDtypes{diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor out_tensor_temp = out_tensor; - if (out_tensor.dtype() != input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor_temp, input_tensor.dtype())); + DiopiTensor outTensorTemp = outTensor; + if (outTensor.dtype() != inputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensorTemp, inputTensor.dtype())); } - CnnlTensorDesc input_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc other_desc(other_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor_temp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc otherDesc(otherTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensorTemp, CNNL_LAYOUT_ARRAY); - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetLogicOpWorkspaceSize(handle, input_desc.get(), other_desc.get(), out_desc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetLogicOpWorkspaceSize(handle, inputDesc.get(), otherDesc.get(), outDesc.get(), &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } DIOPI_CALLCNNL(cnnlLogicOp(handle, - logic_op, - input_desc.get(), - input_tensor.data(), - other_desc.get(), - other_tensor.data(), + logicOp, + inputDesc.get(), + inputTensor.data(), + otherDesc.get(), + otherTensor.data(), workspace, - workspace_size, - out_desc.get(), - out_tensor_temp.data())); - if (out_tensor_temp.dtype() != out_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_temp)); + workspaceSize, + outDesc.get(), + outTensorTemp.data())); + if (outTensorTemp.dtype() != outTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTemp)); } return diopiSuccess; } -diopiError_t LogicInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other, cnnlLogicOp_t logic_op) { - DIOPI_CALL(Logic(ctx, input, input, other, logic_op)); +diopiError_t logicInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other, cnnlLogicOp_t logicOp) { + DIOPI_CALL(logic(ctx, input, input, other, logicOp)); return diopiSuccess; } -diopiError_t LogicScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other, - cnnlLogicOp_t logic_op) { +diopiError_t logicScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other, + cnnlLogicOp_t logicOp) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); - diopiTensorHandle_t other_t; - diopiSize_t input_shape; - DIOPI_CALL(diopiGetTensorShape(input, &input_shape)); - DIOPI_CALL(diopiRequireTensor(ctx, &other_t, &input_shape, nullptr, input_tensor.dtype(), diopi_device)); - DIOPI_CALL(diopiFill(ctx, other_t, other)); - DiopiTensor other_t_tensor(other_t); + diopiTensorHandle_t otherT; + diopiSize_t inputShape; + DIOPI_CALL(diopiGetTensorShape(input, &inputShape)); + DIOPI_CALL(diopiRequireTensor(ctx, &otherT, &inputShape, nullptr, inputTensor.dtype(), diopi_device)); + DIOPI_CALL(diopiFill(ctx, otherT, other)); + DiopiTensor otherTTensor(otherT); - std::vector pTensors{&input_tensor, &other_t_tensor}; + std::vector pTensors{&inputTensor, &otherTTensor}; std::set supportedDtypes{diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor out_tensor_temp = out_tensor; - if (out_tensor.dtype() != input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor_temp, input_tensor.dtype())); + DiopiTensor outTensorTemp = outTensor; + if (outTensor.dtype() != inputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensorTemp, inputTensor.dtype())); } - CnnlTensorDesc input_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc other_t_desc(other_t_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor_temp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc otherTDesc(otherTTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensorTemp, CNNL_LAYOUT_ARRAY); - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetLogicOpWorkspaceSize(handle, input_desc.get(), other_t_desc.get(), out_desc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetLogicOpWorkspaceSize(handle, inputDesc.get(), otherTDesc.get(), outDesc.get(), &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } DIOPI_CALLCNNL(cnnlLogicOp(handle, - logic_op, - input_desc.get(), - input_tensor.data(), - other_t_desc.get(), - other_t_tensor.data(), + logicOp, + inputDesc.get(), + inputTensor.data(), + otherTDesc.get(), + otherTTensor.data(), workspace, - workspace_size, - out_desc.get(), - out_tensor_temp.data())); - if (out_tensor_temp.dtype() != out_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_temp)); + workspaceSize, + outDesc.get(), + outTensorTemp.data())); + if (outTensorTemp.dtype() != outTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTemp)); } return diopiSuccess; } -diopiError_t LogicInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other, cnnlLogicOp_t logic_op) { - DIOPI_CALL(LogicScalar(ctx, input, input, other, logic_op)); +diopiError_t logicInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other, cnnlLogicOp_t logicOp) { + DIOPI_CALL(logicScalar(ctx, input, input, other, logicOp)); return diopiSuccess; } // ge diopiError_t diopiGeScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other) { - DIOPI_CALL(LogicScalar(ctx, out, input, other, CNNL_LOGIC_OP_GE)); + DIOPI_CALL(logicScalar(ctx, out, input, other, CNNL_LOGIC_OP_GE)); return diopiSuccess; } diopiError_t diopiGeInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other) { - DIOPI_CALL(LogicInpScalar(ctx, input, other, CNNL_LOGIC_OP_GE)); + DIOPI_CALL(logicInpScalar(ctx, input, other, CNNL_LOGIC_OP_GE)); return diopiSuccess; } diopiError_t diopiGe(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(Logic(ctx, out, input, other, CNNL_LOGIC_OP_GE)); + DIOPI_CALL(logic(ctx, out, input, other, CNNL_LOGIC_OP_GE)); return diopiSuccess; } diopiError_t diopiGeInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(LogicInp(ctx, input, other, CNNL_LOGIC_OP_GE)); + DIOPI_CALL(logicInp(ctx, input, other, CNNL_LOGIC_OP_GE)); return diopiSuccess; } // gt diopiError_t diopiGtScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other) { - DIOPI_CALL(LogicScalar(ctx, out, input, other, CNNL_LOGIC_OP_GT)); + DIOPI_CALL(logicScalar(ctx, out, input, other, CNNL_LOGIC_OP_GT)); return diopiSuccess; } diopiError_t diopiGtInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other) { - DIOPI_CALL(LogicInpScalar(ctx, input, other, CNNL_LOGIC_OP_GT)); + DIOPI_CALL(logicInpScalar(ctx, input, other, CNNL_LOGIC_OP_GT)); return diopiSuccess; } diopiError_t diopiGt(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(Logic(ctx, out, input, other, CNNL_LOGIC_OP_GT)); + DIOPI_CALL(logic(ctx, out, input, other, CNNL_LOGIC_OP_GT)); return diopiSuccess; } diopiError_t diopiGtInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(LogicInp(ctx, input, other, CNNL_LOGIC_OP_GT)); + DIOPI_CALL(logicInp(ctx, input, other, CNNL_LOGIC_OP_GT)); return diopiSuccess; } // le diopiError_t diopiLeScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other) { - DIOPI_CALL(LogicScalar(ctx, out, input, other, CNNL_LOGIC_OP_LE)); + DIOPI_CALL(logicScalar(ctx, out, input, other, CNNL_LOGIC_OP_LE)); return diopiSuccess; } diopiError_t diopiLeInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other) { - DIOPI_CALL(LogicInpScalar(ctx, input, other, CNNL_LOGIC_OP_LE)); + DIOPI_CALL(logicInpScalar(ctx, input, other, CNNL_LOGIC_OP_LE)); return diopiSuccess; } diopiError_t diopiLe(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(Logic(ctx, out, input, other, CNNL_LOGIC_OP_LE)); + DIOPI_CALL(logic(ctx, out, input, other, CNNL_LOGIC_OP_LE)); return diopiSuccess; } diopiError_t diopiLeInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(LogicInp(ctx, input, other, CNNL_LOGIC_OP_LE)); + DIOPI_CALL(logicInp(ctx, input, other, CNNL_LOGIC_OP_LE)); return diopiSuccess; } // lt diopiError_t diopiLtScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other) { - DIOPI_CALL(LogicScalar(ctx, out, input, other, CNNL_LOGIC_OP_LT)); + DIOPI_CALL(logicScalar(ctx, out, input, other, CNNL_LOGIC_OP_LT)); return diopiSuccess; } diopiError_t diopiLtInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other) { - DIOPI_CALL(LogicInpScalar(ctx, input, other, CNNL_LOGIC_OP_LT)); + DIOPI_CALL(logicInpScalar(ctx, input, other, CNNL_LOGIC_OP_LT)); return diopiSuccess; } diopiError_t diopiLt(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(Logic(ctx, out, input, other, CNNL_LOGIC_OP_LT)); + DIOPI_CALL(logic(ctx, out, input, other, CNNL_LOGIC_OP_LT)); return diopiSuccess; } diopiError_t diopiLtInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(LogicInp(ctx, input, other, CNNL_LOGIC_OP_LT)); + DIOPI_CALL(logicInp(ctx, input, other, CNNL_LOGIC_OP_LT)); return diopiSuccess; } // ne diopiError_t diopiNeScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other) { - DIOPI_CALL(LogicScalar(ctx, out, input, other, CNNL_LOGIC_OP_NE)); + DIOPI_CALL(logicScalar(ctx, out, input, other, CNNL_LOGIC_OP_NE)); return diopiSuccess; } diopiError_t diopiNeInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other) { - DIOPI_CALL(LogicInpScalar(ctx, input, other, CNNL_LOGIC_OP_NE)); + DIOPI_CALL(logicInpScalar(ctx, input, other, CNNL_LOGIC_OP_NE)); return diopiSuccess; } diopiError_t diopiNe(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(Logic(ctx, out, input, other, CNNL_LOGIC_OP_NE)); + DIOPI_CALL(logic(ctx, out, input, other, CNNL_LOGIC_OP_NE)); return diopiSuccess; } diopiError_t diopiNeInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(LogicInp(ctx, input, other, CNNL_LOGIC_OP_NE)); + DIOPI_CALL(logicInp(ctx, input, other, CNNL_LOGIC_OP_NE)); return diopiSuccess; } // eq diopiError_t diopiEqScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other) { - DIOPI_CALL(LogicScalar(ctx, out, input, other, CNNL_LOGIC_OP_EQ)); + DIOPI_CALL(logicScalar(ctx, out, input, other, CNNL_LOGIC_OP_EQ)); return diopiSuccess; } diopiError_t diopiEqInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other) { - DIOPI_CALL(LogicInpScalar(ctx, input, other, CNNL_LOGIC_OP_EQ)); + DIOPI_CALL(logicInpScalar(ctx, input, other, CNNL_LOGIC_OP_EQ)); return diopiSuccess; } diopiError_t diopiEq(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(Logic(ctx, out, input, other, CNNL_LOGIC_OP_EQ)); + DIOPI_CALL(logic(ctx, out, input, other, CNNL_LOGIC_OP_EQ)); return diopiSuccess; } diopiError_t diopiEqInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(LogicInp(ctx, input, other, CNNL_LOGIC_OP_EQ)); + DIOPI_CALL(logicInp(ctx, input, other, CNNL_LOGIC_OP_EQ)); return diopiSuccess; } // logical_and diopiError_t diopiLogicalAnd(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(Logic(ctx, out, input, other, CNNL_LOGIC_OP_AND)); + DIOPI_CALL(logic(ctx, out, input, other, CNNL_LOGIC_OP_AND)); return diopiSuccess; } diopiError_t diopiLogicalAndInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(LogicInp(ctx, input, other, CNNL_LOGIC_OP_AND)); + DIOPI_CALL(logicInp(ctx, input, other, CNNL_LOGIC_OP_AND)); return diopiSuccess; } // logical_or diopiError_t diopiLogicalOr(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(Logic(ctx, out, input, other, CNNL_LOGIC_OP_OR)); + DIOPI_CALL(logic(ctx, out, input, other, CNNL_LOGIC_OP_OR)); return diopiSuccess; } diopiError_t diopiLogicalOrInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other) { - DIOPI_CALL(LogicInp(ctx, input, other, CNNL_LOGIC_OP_OR)); + DIOPI_CALL(logicInp(ctx, input, other, CNNL_LOGIC_OP_OR)); return diopiSuccess; } // logical_not diopiError_t diopiLogicalNot(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { - DIOPI_CALL(Logic(ctx, out, input, input, CNNL_LOGIC_OP_NOT)); + DIOPI_CALL(logic(ctx, out, input, input, CNNL_LOGIC_OP_NOT)); return diopiSuccess; } diopiError_t diopiLogicalNotInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { - DIOPI_CALL(LogicInp(ctx, input, input, CNNL_LOGIC_OP_NOT)); + DIOPI_CALL(logicInp(ctx, input, input, CNNL_LOGIC_OP_NOT)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/loss.cpp b/DIOPI-IMPL/camb/functions/loss.cpp index f7a1622ec..0506c0a53 100644 --- a/DIOPI-IMPL/camb/functions/loss.cpp +++ b/DIOPI-IMPL/camb/functions/loss.cpp @@ -16,281 +16,281 @@ namespace camb { extern "C" { diopiError_t diopiNLLLoss(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t target, - diopiConstTensorHandle_t weight, diopiReduction_t reduction, int64_t ignore_index) { + diopiConstTensorHandle_t weight, diopiReduction_t reduction, int64_t ignoreIndex) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tr(input); - DiopiTensor output_tr(out); - DiopiTensor target_tr(target); - DiopiTensor weight_tr(weight); - if (!weight_tr.defined()) { - weight_tr = ones(ctx, {input_tr.shape()[1]}, input_tr.dtype()); + DiopiTensor inputTr(input); + DiopiTensor outputTr(out); + DiopiTensor targetTr(target); + DiopiTensor weightTr(weight); + if (!weightTr.defined()) { + weightTr = ones(ctx, {inputTr.shape()[1]}, inputTr.dtype()); } - DIOPI_CHECK(input_tr.numel() != 0, "input tensor is empty") - DIOPI_CHECK(input_tr.is_contiguous(), "input tensor should be contiguous"); - DIOPI_CHECK(weight_tr.is_contiguous(), "weight tensor should be contiguous"); - DIOPI_CHECK(target_tr.is_contiguous(), "input tensor should be contiguous"); + DIOPI_CHECK(inputTr.numel() != 0, "input tensor is empty") + DIOPI_CHECK(inputTr.isContiguous(), "input tensor should be contiguous"); + DIOPI_CHECK(weightTr.isContiguous(), "weight tensor should be contiguous"); + DIOPI_CHECK(targetTr.isContiguous(), "input tensor should be contiguous"); if (ReductionMean == reduction || ReductionSum == reduction) { - DIOPI_CHECK(output_tr.dim() <= 1, "output.dim should be <= 1 when the redcution is %s.", reductionStr(reduction)); + DIOPI_CHECK(outputTr.dim() <= 1, "output.dim should be <= 1 when the redcution is %s.", reductionStr(reduction)); } - std::vector p_tensors{&input_tr, &weight_tr}; - std::set supported_dtypes{diopi_dtype_float16, diopi_dtype_float32}; - DIOPI_CALL(autoCastTensorType(ctx, p_tensors, supported_dtypes)); + std::vector pTensors{&inputTr, &weightTr}; + std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; + DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor output_tmp_tr = output_tr; - if (input_tr.dtype() != output_tr.dtype()) { - output_tmp_tr = requiresTensor(ctx, output_tr.shape(), input_tr.dtype()); + DiopiTensor outputTmpTr = outputTr; + if (inputTr.dtype() != outputTr.dtype()) { + outputTmpTr = requiresTensor(ctx, outputTr.shape(), inputTr.dtype()); } - if (target_tr.dtype() != diopi_dtype_int32) { - DIOPI_CALL(dataTypeCast(ctx, target_tr, diopi_dtype_int32)); + if (targetTr.dtype() != diopi_dtype_int32) { + DIOPI_CALL(dataTypeCast(ctx, targetTr, diopi_dtype_int32)); } - auto input_contiguous = input_tr; + auto inputContiguous = inputTr; - auto dim = input_tr.dim(); + auto dim = inputTr.dim(); if (dim == 2 || dim == 1) { - DIOPI_CHECK(target_tr.dim() == 1, "1D target_tr tensor expected, multi-target_tr not supported"); - DIOPI_CHECK(input_tr.shape()[0] == target_tr.shape()[0], "size mismatch "); - DIOPI_CHECK(!weight_tr.defined() || weight_tr.numel() == input_tr.shape()[1], + DIOPI_CHECK(targetTr.dim() == 1, "1D target_tr tensor expected, multi-target_tr not supported"); + DIOPI_CHECK(inputTr.shape()[0] == targetTr.shape()[0], "size mismatch "); + DIOPI_CHECK(!weightTr.defined() || weightTr.numel() == inputTr.shape()[1], "weight_tr tensor should be defined either for all classes or no classes"); } else if (dim == 4) { - input_contiguous = input_tr.contiguous(ctx, MemoryFormat::ChannelsLast); - DIOPI_CALL(cnnl_transpose(ctx, handle, input_tr, input_contiguous, CNNL_LAYOUT_NCHW, CNNL_LAYOUT_NHWC)); + inputContiguous = inputTr.contiguous(ctx, MemoryFormat::ChannelsLast); + DIOPI_CALL(cnnlTranspose(ctx, handle, inputTr, inputContiguous, CNNL_LAYOUT_NCHW, CNNL_LAYOUT_NHWC)); } else if (dim == 3) { - int64_t input_last_size = 1; - for (int i = 2; i < input_tr.dim(); ++i) { - input_last_size *= input_tr.shape()[i]; + int64_t inputLastSize = 1; + for (int i = 2; i < inputTr.dim(); ++i) { + inputLastSize *= inputTr.shape()[i]; } - input_tr.reshape({input_tr.shape()[0], input_tr.shape()[1], 1, input_last_size}); + inputTr.reshape({inputTr.shape()[0], inputTr.shape()[1], 1, inputLastSize}); - input_contiguous = input_tr.contiguous(ctx, MemoryFormat::ChannelsLast); - DIOPI_CALL(cnnl_transpose(ctx, handle, input_tr, input_contiguous, CNNL_LAYOUT_NCHW, CNNL_LAYOUT_NHWC)); + inputContiguous = inputTr.contiguous(ctx, MemoryFormat::ChannelsLast); + DIOPI_CALL(cnnlTranspose(ctx, handle, inputTr, inputContiguous, CNNL_LAYOUT_NCHW, CNNL_LAYOUT_NHWC)); } else { DIOPI_CHECK(false, "unexpected input tensor dim") } - auto input_size = input_contiguous.shape(); - int C = input_size[1]; - int N = std::accumulate(input_size.begin(), input_size.end(), 1, std::multiplies()) / C; - DIOPI_CHECK(N == target_tr.numel(), "Target size need be equal as input N*H*W."); - DIOPI_CHECK(C == weight_tr.numel(), "Weight size need be equal as input C."); - std::vector output_size(input_size.begin(), input_size.end()); + auto inputSize = inputContiguous.shape(); + int c = inputSize[1]; + int n = std::accumulate(inputSize.begin(), inputSize.end(), 1, std::multiplies<>()) / c; + DIOPI_CHECK(n == targetTr.numel(), "Target size need be equal as input N*H*W."); + DIOPI_CHECK(c == weightTr.numel(), "Weight size need be equal as input C."); + std::vector outputSize(inputSize.begin(), inputSize.end()); - cnnlNlllossAlgorithm_t reduction_mode; + cnnlNlllossAlgorithm_t reductionMode; switch (reduction) { case 0: { - reduction_mode = CNNL_REDUCTION_NONE; - output_size.erase(output_size.begin() + 1); + reductionMode = CNNL_REDUCTION_NONE; + outputSize.erase(outputSize.begin() + 1); break; } case 1: { - reduction_mode = CNNL_REDUCTION_MEAN; - output_size = {1}; + reductionMode = CNNL_REDUCTION_MEAN; + outputSize = {1}; break; } case 2: { - reduction_mode = CNNL_REDUCTION_SUM; - output_size = {1}; + reductionMode = CNNL_REDUCTION_SUM; + outputSize = {1}; break; } default: DIOPI_CHECK(false, "unexpected nll_loss reduciton mode"); } - auto total_weight_tr = requiresTensor(ctx, {1}, weight_tr.dtype()); - diopiScalar_t scalar({weight_tr.dtype(), static_cast(target_tr.numel())}); - DIOPI_CALL(diopiFill(ctx, total_weight_tr.tensorHandle(), &scalar)); - - CnnlTensorDesc input_desc; - CnnlTensorDesc target_desc; - CnnlTensorDesc weight_desc(weight_tr, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc tw_desc(total_weight_tr, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc output_desc; - input_desc.set(input_contiguous, CNNL_LAYOUT_ARRAY, {N, C}); - target_desc.set(target_tr, CNNL_LAYOUT_ARRAY, {N}); - output_desc.set(output_tmp_tr, CNNL_LAYOUT_ARRAY, output_size); - - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetNlllossWorkspaceSize(handle, input_desc.get(), &workspace_size)); - void* workspace_ptr = workspace_size == 0 ? nullptr : requiresBuffer(ctx, workspace_size).data(); + auto totalWeightTr = requiresTensor(ctx, {1}, weightTr.dtype()); + diopiScalar_t scalar({weightTr.dtype(), static_cast(targetTr.numel())}); + DIOPI_CALL(diopiFill(ctx, totalWeightTr.tensorHandle(), &scalar)); + + CnnlTensorDesc inputDesc; + CnnlTensorDesc targetDesc; + CnnlTensorDesc weightDesc(weightTr, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc twDesc(totalWeightTr, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outputDesc; + inputDesc.set(inputContiguous, CNNL_LAYOUT_ARRAY, {n, c}); + targetDesc.set(targetTr, CNNL_LAYOUT_ARRAY, {n}); + outputDesc.set(outputTmpTr, CNNL_LAYOUT_ARRAY, outputSize); + + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetNlllossWorkspaceSize(handle, inputDesc.get(), &workspaceSize)); + void* workspacePtr = workspaceSize == 0 ? nullptr : requiresBuffer(ctx, workspaceSize).data(); DIOPI_CALLCNNL(cnnlNlllossForward(handle, - reduction_mode, - workspace_ptr, - workspace_size, - input_desc.get(), - input_contiguous.data(), - target_desc.get(), - target_tr.data(), - static_cast(ignore_index), - weight_desc.get(), - weight_tr.data(), - tw_desc.get(), - total_weight_tr.data(), - output_desc.get(), - output_tmp_tr.data())); - - if (output_tmp_tr.dtype() != output_tr.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, output_tr, output_tmp_tr)); + reductionMode, + workspacePtr, + workspaceSize, + inputDesc.get(), + inputContiguous.data(), + targetDesc.get(), + targetTr.data(), + static_cast(ignoreIndex), + weightDesc.get(), + weightTr.data(), + twDesc.get(), + totalWeightTr.data(), + outputDesc.get(), + outputTmpTr.data())); + + if (outputTmpTr.dtype() != outputTr.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outputTr, outputTmpTr)); } return diopiSuccess; } -diopiError_t diopiNLLLossBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, +diopiError_t diopiNLLLossBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t input, diopiConstTensorHandle_t target, diopiConstTensorHandle_t weight, diopiReduction_t reduction, - int64_t ignore_index) { + int64_t ignoreIndex) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tr(input); - DiopiTensor grad_input_tr(grad_input); - DiopiTensor grad_output_tr(grad_output); - DiopiTensor target_tr(target); - DiopiTensor weight_tr(weight); - - if (!weight_tr.defined()) { - weight_tr = ones(ctx, {input_tr.shape()[1]}, input_tr.dtype()); + DiopiTensor inputTr(input); + DiopiTensor gradInputTr(gradInput); + DiopiTensor gradOutputTr(gradOutput); + DiopiTensor targetTr(target); + DiopiTensor weightTr(weight); + + if (!weightTr.defined()) { + weightTr = ones(ctx, {inputTr.shape()[1]}, inputTr.dtype()); } - DIOPI_CHECK(input_tr.numel() != 0, "input tensor is empty") - DIOPI_CHECK(input_tr.is_contiguous(), "input tensor should be contiguous"); - DIOPI_CHECK(weight_tr.is_contiguous(), "weight tensor should be contiguous"); - DIOPI_CHECK(target_tr.is_contiguous(), "input tensor should be contiguous"); + DIOPI_CHECK(inputTr.numel() != 0, "input tensor is empty") + DIOPI_CHECK(inputTr.isContiguous(), "input tensor should be contiguous"); + DIOPI_CHECK(weightTr.isContiguous(), "weight tensor should be contiguous"); + DIOPI_CHECK(targetTr.isContiguous(), "input tensor should be contiguous"); if (ReductionMean == reduction || ReductionSum == reduction) { - DIOPI_CHECK(grad_output_tr.dim() <= 1, "grad_output.dim should be <= 1 when the redcution is %s.", reductionStr(reduction)); + DIOPI_CHECK(gradOutputTr.dim() <= 1, "grad_output.dim should be <= 1 when the redcution is %s.", reductionStr(reduction)); } - std::vector p_tensors{&grad_output_tr, &weight_tr, &input_tr}; - std::set supported_dtypes{diopi_dtype_float16, diopi_dtype_float32}; - DIOPI_CALL(autoCastTensorType(ctx, p_tensors, supported_dtypes)); + std::vector pTensors{&gradOutputTr, &weightTr, &inputTr}; + std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; + DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - if (target_tr.dtype() != diopi_dtype_int32) { - DIOPI_CALL(dataTypeCast(ctx, target_tr, diopi_dtype_int32)); + if (targetTr.dtype() != diopi_dtype_int32) { + DIOPI_CALL(dataTypeCast(ctx, targetTr, diopi_dtype_int32)); } - auto input_contiguous = input_tr; + auto inputContiguous = inputTr; - auto dim = input_tr.dim(); + auto dim = inputTr.dim(); if (dim == 2 || dim == 1) { - DIOPI_CHECK(target_tr.dim() == 1, "1D target_tr tensor expected, multi-target_tr not supported"); - DIOPI_CHECK(input_tr.shape()[0] == target_tr.shape()[0], "size mismatch "); - DIOPI_CHECK(!weight_tr.defined() || weight_tr.numel() == input_tr.shape()[1], + DIOPI_CHECK(targetTr.dim() == 1, "1D target_tr tensor expected, multi-target_tr not supported"); + DIOPI_CHECK(inputTr.shape()[0] == targetTr.shape()[0], "size mismatch "); + DIOPI_CHECK(!weightTr.defined() || weightTr.numel() == inputTr.shape()[1], "weight_tr tensor should be defined either for all classes or no classes"); } else if (dim == 4) { - input_contiguous = input_tr.contiguous(ctx, MemoryFormat::ChannelsLast); - DIOPI_CALL(cnnl_transpose(ctx, handle, input_tr, input_contiguous, CNNL_LAYOUT_NCHW, CNNL_LAYOUT_NHWC)); + inputContiguous = inputTr.contiguous(ctx, MemoryFormat::ChannelsLast); + DIOPI_CALL(cnnlTranspose(ctx, handle, inputTr, inputContiguous, CNNL_LAYOUT_NCHW, CNNL_LAYOUT_NHWC)); } else if (dim == 3) { - int64_t input_last_size = 1; - for (int i = 2; i < input_tr.dim(); ++i) { - input_last_size *= input_tr.shape()[i]; + int64_t inputLastSize = 1; + for (int i = 2; i < inputTr.dim(); ++i) { + inputLastSize *= inputTr.shape()[i]; } - input_tr.reshape({input_tr.shape()[0], input_tr.shape()[1], 1, input_last_size}); + inputTr.reshape({inputTr.shape()[0], inputTr.shape()[1], 1, inputLastSize}); - input_contiguous = input_tr.contiguous(ctx, MemoryFormat::ChannelsLast); - DIOPI_CALL(cnnl_transpose(ctx, handle, input_tr, input_contiguous, CNNL_LAYOUT_NCHW, CNNL_LAYOUT_NHWC)); + inputContiguous = inputTr.contiguous(ctx, MemoryFormat::ChannelsLast); + DIOPI_CALL(cnnlTranspose(ctx, handle, inputTr, inputContiguous, CNNL_LAYOUT_NCHW, CNNL_LAYOUT_NHWC)); } else { DIOPI_CHECK(false, "unexpected input tensor dim") } - auto input_size = input_contiguous.shape(); - int C = input_size[1]; - int N = std::accumulate(input_size.begin(), input_size.end(), 1, std::multiplies()) / C; - DIOPI_CHECK(N == target_tr.numel(), "Target size need be equal as input N*H*W."); - DIOPI_CHECK(C == weight_tr.numel(), "Weight size need be equal as input C."); + auto inputSize = inputContiguous.shape(); + int c = inputSize[1]; + int n = std::accumulate(inputSize.begin(), inputSize.end(), 1, std::multiplies<>()) / c; + DIOPI_CHECK(n == targetTr.numel(), "Target size need be equal as input N*H*W."); + DIOPI_CHECK(c == weightTr.numel(), "Weight size need be equal as input C."); - cnnlNlllossAlgorithm_t reduction_mode; + cnnlNlllossAlgorithm_t reductionMode; switch (reduction) { case 0: - reduction_mode = CNNL_REDUCTION_NONE; + reductionMode = CNNL_REDUCTION_NONE; break; case 1: - reduction_mode = CNNL_REDUCTION_MEAN; + reductionMode = CNNL_REDUCTION_MEAN; break; case 2: - reduction_mode = CNNL_REDUCTION_SUM; + reductionMode = CNNL_REDUCTION_SUM; break; default: DIOPI_CHECK(false, "unexpected nll_loss reduciton mode"); } - auto grad_input_real_tr = requiresTensor(ctx, {N, C}, input_contiguous.dtype()); + auto gradInputRealTr = requiresTensor(ctx, {n, c}, inputContiguous.dtype()); - auto total_weight_tr = requiresTensor(ctx, {1}, weight_tr.dtype()); - diopiScalar_t scalar({weight_tr.dtype(), static_cast(target_tr.numel())}); - DIOPI_CALL(diopiFill(ctx, total_weight_tr.tensorHandle(), &scalar)); + auto totalWeightTr = requiresTensor(ctx, {1}, weightTr.dtype()); + diopiScalar_t scalar({weightTr.dtype(), static_cast(targetTr.numel())}); + DIOPI_CALL(diopiFill(ctx, totalWeightTr.tensorHandle(), &scalar)); - CnnlTensorDesc grad_output_desc(grad_output_tr, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc target_desc; - CnnlTensorDesc weight_desc(weight_tr, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc tw_desc(total_weight_tr, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc grad_input_desc(grad_input_real_tr, CNNL_LAYOUT_ARRAY); - target_desc.set(target_tr, CNNL_LAYOUT_ARRAY, {N}); + CnnlTensorDesc gradOutputDesc(gradOutputTr, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc targetDesc; + CnnlTensorDesc weightDesc(weightTr, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc twDesc(totalWeightTr, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc gradInputDesc(gradInputRealTr, CNNL_LAYOUT_ARRAY); + targetDesc.set(targetTr, CNNL_LAYOUT_ARRAY, {n}); DIOPI_CALLCNNL(cnnlNlllossBackward(handle, - reduction_mode, - grad_output_desc.get(), - grad_output_tr.data(), - target_desc.get(), - target_tr.data(), - static_cast(ignore_index), - weight_desc.get(), - weight_tr.data(), - tw_desc.get(), - total_weight_tr.data(), - grad_input_desc.get(), - grad_input_real_tr.data())); + reductionMode, + gradOutputDesc.get(), + gradOutputTr.data(), + targetDesc.get(), + targetTr.data(), + static_cast(ignoreIndex), + weightDesc.get(), + weightTr.data(), + twDesc.get(), + totalWeightTr.data(), + gradInputDesc.get(), + gradInputRealTr.data())); if (dim > 2) { // NHWC -> NCHW and dealing with data type - grad_input_real_tr.reshape(input_contiguous.shape()); - grad_input_tr.reshape(input_contiguous.shape()); + gradInputRealTr.reshape(inputContiguous.shape()); + gradInputTr.reshape(inputContiguous.shape()); - DiopiTensor grad_input_tmp_tr = grad_input_tr; - if (grad_input_tr.dtype() != grad_input_real_tr.dtype()) { - grad_input_tmp_tr = requiresTensor(ctx, grad_input_tr.shape(), grad_input_real_tr.dtype()); + DiopiTensor gradInputTmpTr = gradInputTr; + if (gradInputTr.dtype() != gradInputRealTr.dtype()) { + gradInputTmpTr = requiresTensor(ctx, gradInputTr.shape(), gradInputRealTr.dtype()); } - DIOPI_CALL(cnnl_transpose(ctx, handle, grad_input_real_tr, grad_input_tmp_tr, CNNL_LAYOUT_NHWC, CNNL_LAYOUT_NCHW)); + DIOPI_CALL(cnnlTranspose(ctx, handle, gradInputRealTr, gradInputTmpTr, CNNL_LAYOUT_NHWC, CNNL_LAYOUT_NCHW)); - if (grad_input_tmp_tr.dtype() != grad_input_tr.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, grad_input_tr, grad_input_tmp_tr)); + if (gradInputTmpTr.dtype() != gradInputTr.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, gradInputTr, gradInputTmpTr)); } } else { - DIOPI_CALL(diopiCopyInp(ctx, grad_input_real_tr.tensorHandle(), grad_input_tr.tensorHandle())); + DIOPI_CALL(diopiCopyInp(ctx, gradInputRealTr.tensorHandle(), gradInputTr.tensorHandle())); } return diopiSuccess; } diopiError_t diopiCrossEntropyLoss(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t target, - diopiConstTensorHandle_t weight, diopiReduction_t reduction, int64_t ignore_index, double label_smoothing) { - DiopiTensor input_tr(input); - DiopiTensor target_tr(target); + diopiConstTensorHandle_t weight, diopiReduction_t reduction, int64_t ignoreIndex, double labelSmoothing) { + DiopiTensor inputTr(input); + DiopiTensor targetTr(target); - DIOPI_CHECK(label_smoothing == 0, "Param label_smoothing is not supported by cnnl") - DIOPI_CHECK(target_tr.dim() == input_tr.dim() - 1, "Probabilities for each class are not supported by cnnl"); + DIOPI_CHECK(labelSmoothing == 0, "Param label_smoothing is not supported by cnnl") + DIOPI_CHECK(targetTr.dim() == inputTr.dim() - 1, "Probabilities for each class are not supported by cnnl"); - auto log_tr = requiresTensor(ctx, input_tr.shape(), input_tr.dtype()); - DIOPI_CALL(diopiLogSoftmax(ctx, log_tr.tensorHandle(), input, 1)); - DIOPI_CALL(diopiNLLLoss(ctx, out, log_tr.tensorHandle(), target, weight, reduction, ignore_index)); + auto logTr = requiresTensor(ctx, inputTr.shape(), inputTr.dtype()); + DIOPI_CALL(diopiLogSoftmax(ctx, logTr.tensorHandle(), input, 1)); + DIOPI_CALL(diopiNLLLoss(ctx, out, logTr.tensorHandle(), target, weight, reduction, ignoreIndex)); return diopiSuccess; } -diopiError_t diopiCrossEntropyLossBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, +diopiError_t diopiCrossEntropyLossBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t input, diopiConstTensorHandle_t target, diopiConstTensorHandle_t weight, - diopiReduction_t reduction, int64_t ignore_index, double label_smoothing) { - DiopiTensor input_tr(input); - DiopiTensor target_tr(target); - DiopiTensor grad_input_tr(grad_input); + diopiReduction_t reduction, int64_t ignoreIndex, double labelSmoothing) { + DiopiTensor inputTr(input); + DiopiTensor targetTr(target); + DiopiTensor gradInputTr(gradInput); - DIOPI_CHECK(label_smoothing == 0, "param label_smoothing is not supported") - DIOPI_CHECK(target_tr.dim() == input_tr.dim() - 1, "Probabilities for each class are not supported"); + DIOPI_CHECK(labelSmoothing == 0, "param label_smoothing is not supported") + DIOPI_CHECK(targetTr.dim() == inputTr.dim() - 1, "Probabilities for each class are not supported"); - auto log_tr = requiresTensor(ctx, input_tr.shape(), input_tr.dtype()); - auto grad_tmp_tr = requiresTensor(ctx, grad_input_tr.shape(), grad_input_tr.dtype()); + auto logTr = requiresTensor(ctx, inputTr.shape(), inputTr.dtype()); + auto gradTmpTr = requiresTensor(ctx, gradInputTr.shape(), gradInputTr.dtype()); - DIOPI_CALL(diopiLogSoftmax(ctx, log_tr.tensorHandle(), input, 1)); + DIOPI_CALL(diopiLogSoftmax(ctx, logTr.tensorHandle(), input, 1)); // for nll loss backward, `input` should be logsoftmax out. - DIOPI_CALL(diopiNLLLossBackward(ctx, grad_tmp_tr.tensorHandle(), grad_output, log_tr.tensorHandle(), target, weight, reduction, ignore_index)); + DIOPI_CALL(diopiNLLLossBackward(ctx, gradTmpTr.tensorHandle(), gradOutput, logTr.tensorHandle(), target, weight, reduction, ignoreIndex)); // for softmax backward, `output` should be logsoftmax out - DIOPI_CALL(diopiLogSoftmaxBackward(ctx, grad_input, grad_tmp_tr.tensorHandle(), log_tr.tensorHandle(), 1)); + DIOPI_CALL(diopiLogSoftmaxBackward(ctx, gradInput, gradTmpTr.tensorHandle(), logTr.tensorHandle(), 1)); return diopiSuccess; } @@ -303,15 +303,15 @@ diopiError_t diopiMSELoss(diopiContextHandle_t ctx, diopiTensorHandle_t out, dio std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - cnnlMSELossReduction_t cnnl_reduction; + cnnlMSELossReduction_t cnnlReduction; if (reduction == ReductionMean) { - cnnl_reduction = CNNL_MSE_LOSS_MEAN; + cnnlReduction = CNNL_MSE_LOSS_MEAN; DIOPI_CHECK(trOut.dim() == 0, "Output dim must be 0."); } else if (reduction == ReductionSum) { - cnnl_reduction = CNNL_MSE_LOSS_SUM; + cnnlReduction = CNNL_MSE_LOSS_SUM; DIOPI_CHECK(trOut.dim() == 0, "Output dim must be 0."); } else { - cnnl_reduction = CNNL_MSE_LOSS_NONE; + cnnlReduction = CNNL_MSE_LOSS_NONE; DIOPI_CHECK(trOut.dim() == trInput.dim(), "Output dim must be the same as input."); } @@ -328,37 +328,37 @@ diopiError_t diopiMSELoss(diopiContextHandle_t ctx, diopiTensorHandle_t out, dio trOutTmp = trOut; descOut.set(trOut, layout); } else { - trOutTmp = requiresTensor(ctx, vec2diopiSize_t(trOut.shape()), trInput.dtype()); + trOutTmp = requiresTensor(ctx, vec2diopiSizeT(trOut.shape()), trInput.dtype()); descOut.set(trOutTmp, CNNL_LAYOUT_ARRAY); } - DIOPI_CALLCNNL(cnnlMSELoss(handle, cnnl_reduction, descInput.get(), trInput.data(), descTarget.get(), trTarget.data(), descOut.get(), trOutTmp.data())); + DIOPI_CALLCNNL(cnnlMSELoss(handle, cnnlReduction, descInput.get(), trInput.data(), descTarget.get(), trTarget.data(), descOut.get(), trOutTmp.data())); if (trOutTmp.dtype() != trOut.dtype()) { DIOPI_CALL(dataTypeCast(ctx, trOut, trOutTmp)); } return diopiSuccess; } -diopiError_t diopiMSELossBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, +diopiError_t diopiMSELossBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t input, diopiConstTensorHandle_t target, diopiReduction_t reduction) { DiopiTensor trInput(input); - DiopiTensor trGradOutput(grad_output); + DiopiTensor trGradOutput(gradOutput); DiopiTensor trTarget(target); - DiopiTensor trGradInput(grad_input); + DiopiTensor trGradInput(gradInput); std::vector pTensors{&trInput, &trGradOutput, &trTarget}; std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - cnnlMSELossReduction_t cnnl_reduction; + cnnlMSELossReduction_t cnnlReduction; if (reduction == ReductionMean) { - cnnl_reduction = CNNL_MSE_LOSS_MEAN; + cnnlReduction = CNNL_MSE_LOSS_MEAN; DIOPI_CHECK(trGradOutput.dim() == 0, "Grad output dim must be 0."); } else if (reduction == ReductionSum) { - cnnl_reduction = CNNL_MSE_LOSS_SUM; + cnnlReduction = CNNL_MSE_LOSS_SUM; DIOPI_CHECK(trGradOutput.dim() == 0, "Grad output dim must be 0."); } else { - cnnl_reduction = CNNL_MSE_LOSS_NONE; + cnnlReduction = CNNL_MSE_LOSS_NONE; DIOPI_CHECK(trGradOutput.dim() == trInput.dim(), "Output dim must be the same as input."); } @@ -377,12 +377,12 @@ diopiError_t diopiMSELossBackward(diopiContextHandle_t ctx, diopiTensorHandle_t trGradInputTmp = trGradInput; descGradInput.set(trGradInput, layout); } else { - trGradInputTmp = requiresTensor(ctx, vec2diopiSize_t(trGradInput.shape()), trInput.dtype()); + trGradInputTmp = requiresTensor(ctx, vec2diopiSizeT(trGradInput.shape()), trInput.dtype()); descGradInput.set(trGradInputTmp, CNNL_LAYOUT_ARRAY); } DIOPI_CALLCNNL(cnnlMSELossBackward(handle, - cnnl_reduction, + cnnlReduction, descInput.get(), trInput.data(), descTarget.get(), diff --git a/DIOPI-IMPL/camb/functions/masked_fill.cpp b/DIOPI-IMPL/camb/functions/masked_fill.cpp index 1b4b8cb8f..6135d2512 100644 --- a/DIOPI-IMPL/camb/functions/masked_fill.cpp +++ b/DIOPI-IMPL/camb/functions/masked_fill.cpp @@ -1,7 +1,8 @@ -#include +#include #include #include #include + #include "../cnnl_helper.hpp" #include "../common/common.hpp" @@ -14,72 +15,72 @@ diopiError_t diopiMaskedFill(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t value) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor mask_tensor(mask); - DiopiTensor value_tensor(value); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor maskTensor(mask); + DiopiTensor valueTensor(value); + DiopiTensor outTensor(out); - std::vector pTensors{&input_tensor, &value_tensor}; + std::vector pTensors{&inputTensor, &valueTensor}; std::set supportedDtypes{diopi_dtype_int8, diopi_dtype_int16, diopi_dtype_int32, diopi_dtype_float16, diopi_dtype_float32, diopi_dtype_bool}; - std::vector MTensors{&mask_tensor}; - std::set supportedDtypes_mask{diopi_dtype_int8, diopi_dtype_uint8, diopi_dtype_bool}; + std::vector mTensors{&maskTensor}; + std::set supportedDtypesMask{diopi_dtype_int8, diopi_dtype_uint8, diopi_dtype_bool}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DIOPI_CALL(autoCastTensorType(ctx, MTensors, supportedDtypes_mask)); + DIOPI_CALL(autoCastTensorType(ctx, mTensors, supportedDtypesMask)); - DiopiTensor input_tensor_tmp = *pTensors[0]; - DiopiTensor value_tensor_tmp = *pTensors[1]; - DiopiTensor mask_tensor_tmp = *MTensors[0]; - DiopiTensor out_tensor_tmp = out_tensor; - DIOPI_CALL(dataTypeCast(ctx, out_tensor_tmp, input_tensor_tmp.dtype())); + DiopiTensor inputTensorTmp = *pTensors[0]; + DiopiTensor valueTensorTmp = *pTensors[1]; + DiopiTensor maskTensorTmp = *mTensors[0]; + DiopiTensor outTensorTmp = outTensor; + DIOPI_CALL(dataTypeCast(ctx, outTensorTmp, inputTensorTmp.dtype())); - CnnlTensorDesc input_desc(input_tensor_tmp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc mask_desc(mask_tensor_tmp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor_tmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensorTmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc maskDesc(maskTensorTmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensorTmp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc value_desc; - if (value_tensor_tmp.shape().size() > 0) { - DIOPI_CALL(value_desc.set(value_tensor_tmp, CNNL_LAYOUT_ARRAY)); + CnnlTensorDesc valueDesc; + if (!valueTensorTmp.shape().empty()) { + DIOPI_CALL(valueDesc.set(valueTensorTmp, CNNL_LAYOUT_ARRAY)); } else { - std::vector value_dims = {1}; - DIOPI_CALL(value_desc.set(value_tensor_tmp, CNNL_LAYOUT_ARRAY, value_dims)); + std::vector valueDims = {1}; + DIOPI_CALL(valueDesc.set(valueTensorTmp, CNNL_LAYOUT_ARRAY, valueDims)); } - DiopiTensor value_cast_tensor; - CnnlTensorDesc value_cast_desc; + DiopiTensor valueCastTensor; + CnnlTensorDesc valueCastDesc; - bool value_cast = false; - if (input_tensor_tmp.dtype() != value_tensor_tmp.dtype()) { - value_cast = true; - value_cast_tensor = value_tensor_tmp; - DIOPI_CALL(dataTypeCast(ctx, value_tensor, input_tensor_tmp.dtype())); - value_cast_desc.set(value_cast_tensor, CNNL_LAYOUT_ARRAY); + bool valueCast = false; + if (inputTensorTmp.dtype() != valueTensorTmp.dtype()) { + valueCast = true; + valueCastTensor = valueTensorTmp; + DIOPI_CALL(dataTypeCast(ctx, valueTensor, inputTensorTmp.dtype())); + valueCastDesc.set(valueCastTensor, CNNL_LAYOUT_ARRAY); } - size_t workspace_size = 0; + size_t workspaceSize = 0; DIOPI_CALLCNNL(cnnlGetMaskedWorkspaceSize( - handle, CNNL_MASKED_FILL, input_desc.get(), mask_desc.get(), value_cast ? value_cast_desc.get() : value_desc.get(), out_desc.get(), &workspace_size)); + handle, CNNL_MASKED_FILL, inputDesc.get(), maskDesc.get(), valueCast ? valueCastDesc.get() : valueDesc.get(), outDesc.get(), &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } DIOPI_CALLCNNL(cnnlMasked_v3(handle, CNNL_MASKED_FILL, - input_desc.get(), - input_tensor_tmp.data(), - mask_desc.get(), - mask_tensor_tmp.data(), - value_cast ? value_cast_desc.get() : value_desc.get(), - value_cast ? value_cast_tensor.data() : value_tensor_tmp.data(), + inputDesc.get(), + inputTensorTmp.data(), + maskDesc.get(), + maskTensorTmp.data(), + valueCast ? valueCastDesc.get() : valueDesc.get(), + valueCast ? valueCastTensor.data() : valueTensorTmp.data(), workspace, - workspace_size, - out_desc.get(), - out_tensor_tmp.data(), + workspaceSize, + outDesc.get(), + outTensorTmp.data(), nullptr)); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_tmp)); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTmp)); return diopiSuccess; } @@ -90,17 +91,17 @@ diopiError_t diopiMaskedFillInp(diopiContextHandle_t ctx, diopiTensorHandle_t in diopiError_t diopiMaskedFillScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t mask, const diopiScalar_t* value) { - DiopiTensor value_tensor; - makeTensorFromScalar(ctx, value, value_tensor); - DIOPI_CALL(diopiMaskedFill(ctx, out, input, mask, static_cast(value_tensor))); + DiopiTensor valueTensor; + makeTensorFromScalar(ctx, value, valueTensor); + DIOPI_CALL(diopiMaskedFill(ctx, out, input, mask, static_cast(valueTensor))); return diopiSuccess; } diopiError_t diopiMaskedFillInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t mask, const diopiScalar_t* value) { - DiopiTensor value_tensor; - makeTensorFromScalar(ctx, value, value_tensor); - DIOPI_CALL(diopiMaskedFill(ctx, input, input, mask, static_cast(value_tensor))); + DiopiTensor valueTensor; + makeTensorFromScalar(ctx, value, valueTensor); + DIOPI_CALL(diopiMaskedFill(ctx, input, input, mask, static_cast(valueTensor))); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/matmul.cpp b/DIOPI-IMPL/camb/functions/matmul.cpp index f378527c5..8adab81c2 100644 --- a/DIOPI-IMPL/camb/functions/matmul.cpp +++ b/DIOPI-IMPL/camb/functions/matmul.cpp @@ -10,32 +10,32 @@ namespace camb { extern "C" { static std::vector getPerm(DiopiTensor tensor, int64_t dim0, int64_t dim1) { - int input_size = tensor.shape().size(); + int inputSize = tensor.shape().size(); if (dim0 < 0) { - dim0 = dim0 + input_size; + dim0 = dim0 + inputSize; } if (dim1 < 0) { - dim1 = dim1 + input_size; + dim1 = dim1 + inputSize; } - std::vector perms(input_size); + std::vector perms(inputSize); std::iota(perms.begin(), perms.end(), 0); perms[dim0] = dim1; perms[dim1] = dim0; return perms; } -static std::vector inferSize(std::vector batch_tensor1, std::vector batch_tensor2) { - if (batch_tensor1.size() < batch_tensor2.size()) { - batch_tensor1.insert(batch_tensor1.begin(), batch_tensor2.size() - batch_tensor1.size(), 1); - } else if (batch_tensor1.size() > batch_tensor2.size()) { - batch_tensor2.insert(batch_tensor2.begin(), batch_tensor1.size() - batch_tensor2.size(), 1); +static std::vector inferSize(std::vector batchTensor1, std::vector batchTensor2) { + if (batchTensor1.size() < batchTensor2.size()) { + batchTensor1.insert(batchTensor1.begin(), batchTensor2.size() - batchTensor1.size(), 1); + } else if (batchTensor1.size() > batchTensor2.size()) { + batchTensor2.insert(batchTensor2.begin(), batchTensor1.size() - batchTensor2.size(), 1); } - std::vector res(batch_tensor1); - for (int i = 0; i < batch_tensor1.size(); i++) { - if (1 == batch_tensor1[i]) { - res[i] = batch_tensor2[i]; + std::vector res(batchTensor1); + for (int i = 0; i < batchTensor1.size(); i++) { + if (1 == batchTensor1[i]) { + res[i] = batchTensor2[i]; } } @@ -44,45 +44,45 @@ static std::vector inferSize(std::vector batch_tensor1, std::v static int64_t multiplyIntegers(std::vector tensor) { int64_t out = 1; - for (int i = 0; i < tensor.size(); i++) { - out = out * tensor[i]; + for (long i : tensor) { + out = out * i; } return out; } -static diopiError_t vectorMulVector(diopiContextHandle_t ctx, DiopiTensor out_tensor, DiopiTensor vector1_tensor, DiopiTensor vector2_tensor) { +static diopiError_t vectorMulVector(diopiContextHandle_t ctx, DiopiTensor outTensor, DiopiTensor vector1Tensor, DiopiTensor vector2Tensor) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - if (vector1_tensor.dtype() != diopi_dtype_float32 && vector1_tensor.dtype() != diopi_dtype_float16) { - DIOPI_CALL(dataTypeCast(ctx, vector1_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, vector2_tensor, diopi_dtype_float32)); + if (vector1Tensor.dtype() != diopi_dtype_float32 && vector1Tensor.dtype() != diopi_dtype_float16) { + DIOPI_CALL(dataTypeCast(ctx, vector1Tensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, vector2Tensor, diopi_dtype_float32)); } - CnnlTensorDesc outDesc(out_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc vector1Desc(vector1_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc vector2Desc(vector2_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc vector1Desc(vector1Tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc vector2Desc(vector2Tensor, CNNL_LAYOUT_ARRAY); - DiopiTensor temp_out = requiresTensor(ctx, vector1_tensor.shape(), vector1_tensor.dtype()); - CnnlTensorDesc temp_outDesc(temp_out, CNNL_LAYOUT_ARRAY); + DiopiTensor tempOut = requiresTensor(ctx, vector1Tensor.shape(), vector1Tensor.dtype()); + CnnlTensorDesc tempOutDesc(tempOut, CNNL_LAYOUT_ARRAY); - std::vector inputs_desc(2); - inputs_desc[0] = vector1Desc.get(); - inputs_desc[1] = vector2Desc.get(); + std::vector inputsDesc(2); + inputsDesc[0] = vector1Desc.get(); + inputsDesc[1] = vector2Desc.get(); std::vector inputs(2); - inputs[0] = vector1_tensor.data(); - inputs[1] = vector2_tensor.data(); + inputs[0] = vector1Tensor.data(); + inputs[1] = vector2Tensor.data(); - DIOPI_CALLCNNL(cnnlMulN(handle, inputs_desc.data(), inputs.data(), 2, temp_outDesc.get(), temp_out.data())); - int64_t dim_data = 0; - diopiSize_t dim = {&dim_data, 1}; + DIOPI_CALLCNNL(cnnlMulN(handle, inputsDesc.data(), inputs.data(), 2, tempOutDesc.get(), tempOut.data())); + int64_t dimData = 0; + diopiSize_t dim = {&dimData, 1}; - if (out_tensor.dtype() == vector1_tensor.dtype()) { - DIOPI_CALL(diopiSum(ctx, (diopiTensorHandle_t)out_tensor, (diopiTensorHandle_t)temp_out, dim)); + if (outTensor.dtype() == vector1Tensor.dtype()) { + DIOPI_CALL(diopiSum(ctx, (diopiTensorHandle_t)outTensor, (diopiTensorHandle_t)tempOut, dim)); } else { - DiopiTensor out32_tensor = requiresTensor(ctx, out_tensor.shape(), vector1_tensor.dtype()); - DIOPI_CALL(diopiSum(ctx, (diopiTensorHandle_t)out32_tensor, (diopiTensorHandle_t)temp_out, dim)); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out32_tensor)); + DiopiTensor out32Tensor = requiresTensor(ctx, outTensor.shape(), vector1Tensor.dtype()); + DIOPI_CALL(diopiSum(ctx, (diopiTensorHandle_t)out32Tensor, (diopiTensorHandle_t)tempOut, dim)); + DIOPI_CALL(dataTypeCast(ctx, outTensor, out32Tensor)); } return diopiSuccess; } @@ -99,10 +99,10 @@ static diopiError_t matMulMat(diopiContextHandle_t ctx, DiopiTensor out, DiopiTe CnnlTensorDesc otherDesc(other, CNNL_LAYOUT_ARRAY); CnnlTensorDesc outDesc(out, CNNL_LAYOUT_ARRAY); - CnnlResourceGuard matmulDesc; - cnnlMatMulDescriptor_t matmul_desc = matmulDesc.get(); - int32_t allow_tf32_i32 = 1; - DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmul_desc, CNNL_MATMUL_ALLOW_TF32, &(allow_tf32_i32), sizeof(int32_t))); + CnnlResourceGuard matmulDescGuard; + cnnlMatMulDescriptor_t matmulDesc = matmulDescGuard.get(); + int32_t allowTf32I32 = 1; + DIOPI_CALLCNNL(cnnlSetMatMulDescAttr(matmulDesc, CNNL_MATMUL_ALLOW_TF32, &(allowTf32I32), sizeof(int32_t))); CnnlResourceGuard matmulAlgo; cnnlMatMulAlgo_t algo = matmulAlgo.get(); @@ -110,19 +110,19 @@ static diopiError_t matMulMat(diopiContextHandle_t ctx, DiopiTensor out, DiopiTe cnnlMatMulHeuristicResult_t heuristicResult = matMulHeuristic.get(); int returnAlgoCount = 0; DIOPI_CALLCNNL(cnnlGetMatMulAlgoHeuristic( - handle, matmul_desc, inputDesc.get(), otherDesc.get(), outDesc.get(), outDesc.get(), nullptr, 1, &heuristicResult, &returnAlgoCount)); - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetMatMulHeuristicResult(heuristicResult, algo, &workspace_size)); + handle, matmulDesc, inputDesc.get(), otherDesc.get(), outDesc.get(), outDesc.get(), nullptr, 1, &heuristicResult, &returnAlgoCount)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetMatMulHeuristicResult(heuristicResult, algo, &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } float alpha = 1; float beta = 0; if (out.dtype() == input.dtype()) { DIOPI_CALLCNNL(cnnlMatMul_v2(handle, - matmul_desc, + matmulDesc, algo, &alpha, inputDesc.get(), @@ -133,14 +133,14 @@ static diopiError_t matMulMat(diopiContextHandle_t ctx, DiopiTensor out, DiopiTe outDesc.get(), out.data(), workspace, - workspace_size, + workspaceSize, outDesc.get(), out.data())); } else { - DiopiTensor out_temp = requiresTensor(ctx, out.shape(), input.dtype()); - CnnlTensorDesc out_tempDesc(out_temp, CNNL_LAYOUT_ARRAY); + DiopiTensor outTemp = requiresTensor(ctx, out.shape(), input.dtype()); + CnnlTensorDesc outTempDesc(outTemp, CNNL_LAYOUT_ARRAY); DIOPI_CALLCNNL(cnnlMatMul_v2(handle, - matmul_desc, + matmulDesc, algo, &alpha, inputDesc.get(), @@ -148,243 +148,243 @@ static diopiError_t matMulMat(diopiContextHandle_t ctx, DiopiTensor out, DiopiTe otherDesc.get(), other.data(), &beta, - out_tempDesc.get(), - out_temp.data(), + outTempDesc.get(), + outTemp.data(), workspace, - workspace_size, - out_tempDesc.get(), - out_temp.data())); - DIOPI_CALL(dataTypeCast(ctx, out, out_temp)); + workspaceSize, + outTempDesc.get(), + outTemp.data())); + DIOPI_CALL(dataTypeCast(ctx, out, outTemp)); } return diopiSuccess; } -static diopiError_t matMulVector(diopiContextHandle_t ctx, DiopiTensor out_tensor, DiopiTensor input_tensor, DiopiTensor vector_tensor) { - if (input_tensor.shape()[1] != vector_tensor.shape()[0]) { - vector_tensor.reshape({1, vector_tensor.shape()[0]}); - out_tensor.reshape({vector_tensor.shape()[0], 1}); +static diopiError_t matMulVector(diopiContextHandle_t ctx, DiopiTensor outTensor, DiopiTensor inputTensor, DiopiTensor vectorTensor) { + if (inputTensor.shape()[1] != vectorTensor.shape()[0]) { + vectorTensor.reshape({1, vectorTensor.shape()[0]}); + outTensor.reshape({vectorTensor.shape()[0], 1}); } else { - vector_tensor.reshape({vector_tensor.shape()[0], 1}); - out_tensor.reshape({input_tensor.shape()[0], 1}); + vectorTensor.reshape({vectorTensor.shape()[0], 1}); + outTensor.reshape({inputTensor.shape()[0], 1}); } - DIOPI_CALL(matMulMat(ctx, out_tensor, input_tensor, vector_tensor)); + DIOPI_CALL(matMulMat(ctx, outTensor, inputTensor, vectorTensor)); return diopiSuccess; } -static diopiError_t transpose(diopiContextHandle_t ctx, DiopiTensor out_tensor, DiopiTensor input, int64_t dim0, int64_t dim1) { +static diopiError_t transpose(diopiContextHandle_t ctx, DiopiTensor outTensor, DiopiTensor input, int64_t dim0, int64_t dim1) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - diopiTensorHandle_t out = (diopiTensorHandle_t)out_tensor; + diopiTensorHandle_t out = (diopiTensorHandle_t)outTensor; - CnnlResourceGuard CnnlTransposeDesc; - cnnlTransposeDescriptor_t transpose_desc = CnnlTransposeDesc.get(); + CnnlResourceGuard cnnlTransposeDesc; + cnnlTransposeDescriptor_t transposeDesc = cnnlTransposeDesc.get(); std::vector perms = getPerm(input, dim0, dim1); - cnnlSetTransposeDescriptor(transpose_desc, perms.size(), perms.data()); + cnnlSetTransposeDescriptor(transposeDesc, perms.size(), perms.data()); CnnlTensorDesc inputDesc(input, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc outDesc(out_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); - size_t workspace_size = 0; - cnnlGetTransposeWorkspaceSize(handle, inputDesc.get(), transpose_desc, &workspace_size); + size_t workspaceSize = 0; + cnnlGetTransposeWorkspaceSize(handle, inputDesc.get(), transposeDesc, &workspaceSize); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } - cnnlTranspose_v2(handle, transpose_desc, inputDesc.get(), input.data(), outDesc.get(), out_tensor.data(), workspace, workspace_size); + cnnlTranspose_v2(handle, transposeDesc, inputDesc.get(), input.data(), outDesc.get(), outTensor.data(), workspace, workspaceSize); return diopiSuccess; } -static diopiError_t batchMatmul(diopiContextHandle_t ctx, DiopiTensor out_tensor, DiopiTensor input_tensor, DiopiTensor other_tensor) { +static diopiError_t batchMatmul(diopiContextHandle_t ctx, DiopiTensor outTensor, DiopiTensor inputTensor, DiopiTensor otherTensor) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, other_tensor, diopi_dtype_float32)); + if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, otherTensor, diopi_dtype_float32)); } - CnnlTensorDesc outDesc(out_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc otherDesc(other_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc otherDesc(otherTensor, CNNL_LAYOUT_ARRAY); - int32_t allow_tf32_int = 1; - CnnlDescBase bmmDesc; - cnnlSetMatMulDescAttr(bmmDesc.get(), CNNL_MATMUL_ALLOW_TF32, &allow_tf32_int, sizeof(allow_tf32_int)); + int32_t allowTf32Int = 1; + CnnlDescBase bmmDescGuard; + cnnlSetMatMulDescAttr(bmmDescGuard.get(), CNNL_MATMUL_ALLOW_TF32, &allowTf32Int, sizeof(allowTf32Int)); CnnlDescBase bmmAlgo; CnnlDescBase bmmHeuristicResult; - int return_algo_count = 0; + int returnAlgoCount = 0; cnnlGetBatchMatMulAlgoHeuristic( - handle, bmmDesc.get(), inputDesc.get(), otherDesc.get(), outDesc.get(), nullptr, 1, &(bmmHeuristicResult.get()), &return_algo_count); + handle, bmmDescGuard.get(), inputDesc.get(), otherDesc.get(), outDesc.get(), nullptr, 1, &(bmmHeuristicResult.get()), &returnAlgoCount); - size_t workspace_size(0); - cnnlGetBatchMatMulHeuristicResult(bmmHeuristicResult.get(), bmmAlgo.get(), &workspace_size); + size_t workspaceSize(0); + cnnlGetBatchMatMulHeuristicResult(bmmHeuristicResult.get(), bmmAlgo.get(), &workspaceSize); void* workspace = nullptr; - if (workspace > 0) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (workspaceSize > 0) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } - if (out_tensor.dtype() == input_tensor.dtype()) { + if (outTensor.dtype() == inputTensor.dtype()) { DIOPI_CALLCNNL(cnnlBatchMatMulBCast_v2(handle, - bmmDesc.get(), + bmmDescGuard.get(), bmmAlgo.get(), nullptr, inputDesc.get(), - input_tensor.data(), + inputTensor.data(), otherDesc.get(), - other_tensor.data(), + otherTensor.data(), nullptr, outDesc.get(), - out_tensor.data(), + outTensor.data(), workspace, - workspace_size)); + workspaceSize)); } else { - DiopiTensor out_temp = requiresTensor(ctx, out_tensor.shape(), input_tensor.dtype()); - CnnlTensorDesc out_tempDesc(out_temp, CNNL_LAYOUT_ARRAY); + DiopiTensor outTemp = requiresTensor(ctx, outTensor.shape(), inputTensor.dtype()); + CnnlTensorDesc outTempDesc(outTemp, CNNL_LAYOUT_ARRAY); DIOPI_CALLCNNL(cnnlBatchMatMulBCast_v2(handle, - bmmDesc.get(), + bmmDescGuard.get(), bmmAlgo.get(), nullptr, inputDesc.get(), - input_tensor.data(), + inputTensor.data(), otherDesc.get(), - other_tensor.data(), + otherTensor.data(), nullptr, - out_tempDesc.get(), - out_temp.data(), + outTempDesc.get(), + outTemp.data(), workspace, - workspace_size)); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_temp)); + workspaceSize)); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTemp)); } return diopiSuccess; } -static diopiError_t tensorMatmulTensor(diopiContextHandle_t ctx, DiopiTensor out_tensor, DiopiTensor input_tensor, DiopiTensor other_tensor) { - if (input_tensor.dim() == 1 && other_tensor.dim() == 1) { - DIOPI_CALL(vectorMulVector(ctx, out_tensor, input_tensor, other_tensor)); +static diopiError_t tensorMatmulTensor(diopiContextHandle_t ctx, DiopiTensor outTensor, DiopiTensor inputTensor, DiopiTensor otherTensor) { + if (inputTensor.dim() == 1 && otherTensor.dim() == 1) { + DIOPI_CALL(vectorMulVector(ctx, outTensor, inputTensor, otherTensor)); return diopiSuccess; - } else if (input_tensor.dim() == 2 && other_tensor.dim() == 1) { - DIOPI_CALL(matMulVector(ctx, out_tensor, input_tensor, other_tensor)); + } else if (inputTensor.dim() == 2 && otherTensor.dim() == 1) { + DIOPI_CALL(matMulVector(ctx, outTensor, inputTensor, otherTensor)); return diopiSuccess; - } else if (input_tensor.dim() == 1 && other_tensor.dim() == 2) { - std::vector shape(other_tensor.shape()); - shape[0] = other_tensor.shape()[1]; - shape[1] = other_tensor.shape()[0]; - DiopiTensor other_T = requiresTensor(ctx, shape, other_tensor.dtype()); - DIOPI_CALL(transpose(ctx, other_T, other_tensor, 0, 1)) - DIOPI_CALL(matMulVector(ctx, out_tensor, other_T, input_tensor)); + } else if (inputTensor.dim() == 1 && otherTensor.dim() == 2) { + std::vector shape(otherTensor.shape()); + shape[0] = otherTensor.shape()[1]; + shape[1] = otherTensor.shape()[0]; + DiopiTensor otherT = requiresTensor(ctx, shape, otherTensor.dtype()); + DIOPI_CALL(transpose(ctx, otherT, otherTensor, 0, 1)) + DIOPI_CALL(matMulVector(ctx, outTensor, otherT, inputTensor)); return diopiSuccess; - } else if (input_tensor.dim() == 2 && other_tensor.dim() == 2) { - DIOPI_CALL(matMulMat(ctx, out_tensor, input_tensor, other_tensor)); + } else if (inputTensor.dim() == 2 && otherTensor.dim() == 2) { + DIOPI_CALL(matMulMat(ctx, outTensor, inputTensor, otherTensor)); return diopiSuccess; - } else if (input_tensor.dim() >= 3 && (other_tensor.dim() == 1 || other_tensor.dim() == 2)) { - std::vector output_size; - output_size.insert(output_size.end(), input_tensor.shape().begin(), input_tensor.shape().end() - 1); - if (other_tensor.dim() == 1) { - std::vector temp_shape(2); - temp_shape[0] = other_tensor.shape()[0]; - temp_shape[1] = 1; - other_tensor.reshape(temp_shape); + } else if (inputTensor.dim() >= 3 && (otherTensor.dim() == 1 || otherTensor.dim() == 2)) { + std::vector outputSize; + outputSize.insert(outputSize.end(), inputTensor.shape().begin(), inputTensor.shape().end() - 1); + if (otherTensor.dim() == 1) { + std::vector tempShape(2); + tempShape[0] = otherTensor.shape()[0]; + tempShape[1] = 1; + otherTensor.reshape(tempShape); } else { - output_size.push_back(other_tensor.shape()[1]); + outputSize.push_back(otherTensor.shape()[1]); } std::vector shape(2); - shape[1] = input_tensor.shape()[input_tensor.dim() - 1]; - shape[0] = input_tensor.numel() / shape[1]; - input_tensor.reshape(shape); - shape[1] = other_tensor.shape()[1]; - out_tensor.reshape(shape); - DIOPI_CALL(matMulMat(ctx, out_tensor, input_tensor, other_tensor)); + shape[1] = inputTensor.shape()[inputTensor.dim() - 1]; + shape[0] = inputTensor.numel() / shape[1]; + inputTensor.reshape(shape); + shape[1] = otherTensor.shape()[1]; + outTensor.reshape(shape); + DIOPI_CALL(matMulMat(ctx, outTensor, inputTensor, otherTensor)); return diopiSuccess; - } else if ((input_tensor.dim() == 1 || input_tensor.dim() == 2) && other_tensor.dim() >= 3) { - int input_dim = input_tensor.dim(); - int64_t n = input_tensor.dim() == 2 ? input_tensor.shape()[0] : 1; - int64_t m = input_tensor.shape()[input_tensor.dim() - 1]; - int64_t p = other_tensor.shape()[other_tensor.dim() - 1]; - if (input_dim == 1) { - input_tensor.reshape({n, m}); + } else if ((inputTensor.dim() == 1 || inputTensor.dim() == 2) && otherTensor.dim() >= 3) { + int inputDim = inputTensor.dim(); + int64_t n = inputTensor.dim() == 2 ? inputTensor.shape()[0] : 1; + int64_t m = inputTensor.shape()[inputTensor.dim() - 1]; + int64_t p = otherTensor.shape()[otherTensor.dim() - 1]; + if (inputDim == 1) { + inputTensor.reshape({n, m}); } - std::vector other_shape(other_tensor.shape()); - other_shape[other_tensor.shape().size() - 1] = other_tensor.shape()[other_tensor.shape().size() - 2]; - other_shape[other_tensor.shape().size() - 2] = other_tensor.shape()[other_tensor.shape().size() - 1]; - DiopiTensor other_T_tensor = requiresTensor(ctx, other_shape, other_tensor.dtype()); - DIOPI_CALL(transpose(ctx, other_T_tensor, other_tensor, -1, -2)) - std::vector input_shape(input_tensor.shape()); - input_shape[0] = input_tensor.shape()[1]; - input_shape[1] = input_tensor.shape()[0]; - DiopiTensor input_T_tensor = requiresTensor(ctx, input_shape, input_tensor.dtype()); - DIOPI_CALL(transpose(ctx, input_T_tensor, input_tensor, 0, 1)) - - if (input_dim == 1) { - DIOPI_CALL(tensorMatmulTensor(ctx, out_tensor, other_T_tensor, input_T_tensor)); + std::vector otherShape(otherTensor.shape()); + otherShape[otherTensor.shape().size() - 1] = otherTensor.shape()[otherTensor.shape().size() - 2]; + otherShape[otherTensor.shape().size() - 2] = otherTensor.shape()[otherTensor.shape().size() - 1]; + DiopiTensor otherTTensor = requiresTensor(ctx, otherShape, otherTensor.dtype()); + DIOPI_CALL(transpose(ctx, otherTTensor, otherTensor, -1, -2)) + std::vector inputShape(inputTensor.shape()); + inputShape[0] = inputTensor.shape()[1]; + inputShape[1] = inputTensor.shape()[0]; + DiopiTensor inputTTensor = requiresTensor(ctx, inputShape, inputTensor.dtype()); + DIOPI_CALL(transpose(ctx, inputTTensor, inputTensor, 0, 1)) + + if (inputDim == 1) { + DIOPI_CALL(tensorMatmulTensor(ctx, outTensor, otherTTensor, inputTTensor)); } else { - std::vector shape(other_T_tensor.shape().begin(), other_T_tensor.shape().end() - 1); - shape.push_back(input_tensor.shape()[0]); - DiopiTensor out_temp = requiresTensor(ctx, shape, out_tensor.dtype()); + std::vector shape(otherTTensor.shape().begin(), otherTTensor.shape().end() - 1); + shape.push_back(inputTensor.shape()[0]); + DiopiTensor outTemp = requiresTensor(ctx, shape, outTensor.dtype()); - DIOPI_CALL(tensorMatmulTensor(ctx, out_temp, other_T_tensor, input_T_tensor)); - DIOPI_CALL(transpose(ctx, out_tensor, out_temp, -1, -2)); + DIOPI_CALL(tensorMatmulTensor(ctx, outTemp, otherTTensor, inputTTensor)); + DIOPI_CALL(transpose(ctx, outTensor, outTemp, -1, -2)); } return diopiSuccess; - } else if ((input_tensor.dim() >= 1 && other_tensor.dim() >= 1) && (input_tensor.dim() >= 3 || other_tensor.dim() >= 3)) { - int64_t n = input_tensor.dim() > 1 ? input_tensor.shape()[input_tensor.dim() - 2] : 1; - int64_t m1 = input_tensor.shape()[input_tensor.dim() - 1]; - int64_t data_len = input_tensor.dim() > 2 ? input_tensor.shape().size() - 2 : 0; - std::vector batch_tensor1(input_tensor.shape().begin(), input_tensor.shape().begin() + data_len); - - int64_t m2 = other_tensor.dim() > 1 ? other_tensor.shape()[input_tensor.dim() - 2] : 1; - int64_t p = other_tensor.shape()[other_tensor.dim() - 1]; - data_len = other_tensor.dim() > 2 ? other_tensor.shape().size() - 2 : 0; - std::vector batch_tensor2(other_tensor.shape().begin(), other_tensor.shape().begin() + data_len); - - std::vector expand_batch_portion = inferSize(batch_tensor1, batch_tensor2); - std::vector tensor1_expand_size(expand_batch_portion); - tensor1_expand_size.insert(tensor1_expand_size.end(), {n, m1}); - std::vector tensor2_expand_size(expand_batch_portion); - tensor2_expand_size.insert(tensor2_expand_size.end(), {m2, p}); - - int64_t expand_batch_product = multiplyIntegers(expand_batch_portion); - std::vector tensor1_bmm_view({expand_batch_product}); - tensor1_bmm_view.insert(tensor1_bmm_view.end(), {n, m1}); - std::vector tensor2_bmm_view({expand_batch_product}); - tensor2_bmm_view.insert(tensor2_bmm_view.end(), {m2, p}); - - DiopiTensor input_expand = requiresTensor(ctx, tensor1_expand_size, input_tensor.dtype()); - DiopiTensor other_expand = requiresTensor(ctx, tensor2_expand_size, other_tensor.dtype()); - broadcast(ctx, input_expand, input_tensor); - broadcast(ctx, other_expand, other_tensor); - input_expand.reshape(tensor1_bmm_view); - other_expand.reshape(tensor2_bmm_view); - - std::vector output_shape({expand_batch_product}); - if (input_tensor.dim() > 1) { - output_shape.push_back(n); + } else if ((inputTensor.dim() >= 1 && otherTensor.dim() >= 1) && (inputTensor.dim() >= 3 || otherTensor.dim() >= 3)) { + int64_t n = inputTensor.dim() > 1 ? inputTensor.shape()[inputTensor.dim() - 2] : 1; + int64_t m1 = inputTensor.shape()[inputTensor.dim() - 1]; + int64_t dataLen = inputTensor.dim() > 2 ? inputTensor.shape().size() - 2 : 0; + std::vector batchTensor1(inputTensor.shape().begin(), inputTensor.shape().begin() + dataLen); + + int64_t m2 = otherTensor.dim() > 1 ? otherTensor.shape()[inputTensor.dim() - 2] : 1; + int64_t p = otherTensor.shape()[otherTensor.dim() - 1]; + dataLen = otherTensor.dim() > 2 ? otherTensor.shape().size() - 2 : 0; + std::vector batchTensor2(otherTensor.shape().begin(), otherTensor.shape().begin() + dataLen); + + std::vector expandBatchPortion = inferSize(batchTensor1, batchTensor2); + std::vector tensor1ExpandSize(expandBatchPortion); + tensor1ExpandSize.insert(tensor1ExpandSize.end(), {n, m1}); + std::vector tensor2ExpandSize(expandBatchPortion); + tensor2ExpandSize.insert(tensor2ExpandSize.end(), {m2, p}); + + int64_t expandBatchProduct = multiplyIntegers(expandBatchPortion); + std::vector tensor1BmmView({expandBatchProduct}); + tensor1BmmView.insert(tensor1BmmView.end(), {n, m1}); + std::vector tensor2BmmView({expandBatchProduct}); + tensor2BmmView.insert(tensor2BmmView.end(), {m2, p}); + + DiopiTensor inputExpand = requiresTensor(ctx, tensor1ExpandSize, inputTensor.dtype()); + DiopiTensor otherExpand = requiresTensor(ctx, tensor2ExpandSize, otherTensor.dtype()); + broadcast(ctx, inputExpand, inputTensor); + broadcast(ctx, otherExpand, otherTensor); + inputExpand.reshape(tensor1BmmView); + otherExpand.reshape(tensor2BmmView); + + std::vector outputShape({expandBatchProduct}); + if (inputTensor.dim() > 1) { + outputShape.push_back(n); } - if (other_tensor.dim() > 1) { - output_shape.push_back(p); + if (otherTensor.dim() > 1) { + outputShape.push_back(p); } - out_tensor.reshape(output_shape); - DIOPI_CALL(batchMatmul(ctx, out_tensor, input_expand, other_expand)); + outTensor.reshape(outputShape); + DIOPI_CALL(batchMatmul(ctx, outTensor, inputExpand, otherExpand)); return diopiSuccess; } - set_last_error_string("both arguments to matmul need to be at least 1D, but they are ", input_tensor.dim(), "D and ", other_tensor.dim(), "D"); + setLastErrorString("both arguments to matmul need to be at least 1D, but they are ", inputTensor.dim(), "D and ", otherTensor.dim(), "D"); return diopiErrorOccurred; } diopiError_t diopiMatmul(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor other_tensor(other); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor otherTensor(other); + DiopiTensor outTensor(out); - DIOPI_CALL(tensorMatmulTensor(ctx, out_tensor, input_tensor, other_tensor)); + DIOPI_CALL(tensorMatmulTensor(ctx, outTensor, inputTensor, otherTensor)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/max_pool2d.cpp b/DIOPI-IMPL/camb/functions/max_pool2d.cpp index 79755f93a..b0e14ccd0 100644 --- a/DIOPI-IMPL/camb/functions/max_pool2d.cpp +++ b/DIOPI-IMPL/camb/functions/max_pool2d.cpp @@ -5,8 +5,10 @@ */ #include -#include + +#include #include + #include "../cnnl_helper.hpp" #include "../common/common.hpp" @@ -16,12 +18,12 @@ namespace camb { namespace { std::vector getDim(const DiopiTensor& tensor) { - int shape_size = tensor.shape().size(); - std::vector dim; - for (int i = 0; i < shape_size; i++) { - dim.push_back(static_cast(tensor.shape()[i])); + int shapeSize = tensor.shape().size(); + std::vector dim(shapeSize); + for (int i = 0; i < shapeSize; i++) { + dim[i] = static_cast(tensor.shape()[i]); } - if (shape_size == 3) { + if (shapeSize == 3) { dim.insert(dim.begin(), 1); } return dim; @@ -30,247 +32,247 @@ std::vector getDim(const DiopiTensor& tensor) { } // namespace extern "C" { -diopiError_t diopiMaxPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernel_size, - diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) { +diopiError_t diopiMaxPool2d(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t kernelSize, + diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceilMode) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); - DIOPI_CHECK(input_tensor.dim() == 3 || input_tensor.dim() == 4, "non-empty 3D or 4D (batch mode) tensor expected for input"); + DIOPI_CHECK(inputTensor.dim() == 3 || inputTensor.dim() == 4, "non-empty 3D or 4D (batch mode) tensor expected for input"); - std::vector pTensors{&input_tensor}; + std::vector pTensors{&inputTensor}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, {diopi_dtype_float16, diopi_dtype_float32})); - DiopiTensor input_tensor_tmp = *pTensors[0]; - DiopiTensor out_tensor_tmp = out_tensor; - DIOPI_CALL(dataTypeCast(ctx, out_tensor_tmp, input_tensor_tmp.dtype())); - - std::vector input_dim = getDim(input_tensor_tmp); - std::vector out_dim = getDim(out_tensor_tmp); - CnnlTensorDesc input_desc; - CnnlTensorDesc out_desc; - input_desc.set(input_tensor_tmp, CNNL_LAYOUT_NCHW, input_dim); - out_desc.set(out_tensor_tmp, CNNL_LAYOUT_NCHW, out_dim); - - const int64_t kernel_h = kernel_size.data[0]; - const int64_t kernel_w = kernel_size.len == 1 ? kernel_h : kernel_size.data[1]; - int64_t stride_h = 0; - int64_t stride_w = 0; + DiopiTensor inputTensorTmp = *pTensors[0]; + DiopiTensor outTensorTmp = outTensor; + DIOPI_CALL(dataTypeCast(ctx, outTensorTmp, inputTensorTmp.dtype())); + + std::vector inputDim = getDim(inputTensorTmp); + std::vector outDim = getDim(outTensorTmp); + CnnlTensorDesc inputDesc; + CnnlTensorDesc outDesc; + inputDesc.set(inputTensorTmp, CNNL_LAYOUT_NCHW, inputDim); + outDesc.set(outTensorTmp, CNNL_LAYOUT_NCHW, outDim); + + const int64_t kernelH = kernelSize.data[0]; + const int64_t kernelW = kernelSize.len == 1 ? kernelH : kernelSize.data[1]; + int64_t strideH = 0; + int64_t strideW = 0; if (stride.len == 0) { - stride_h = kernel_h; - stride_w = kernel_w; + strideH = kernelH; + strideW = kernelW; } else { - stride_h = stride.data[0]; - stride_w = stride.len == 1 ? stride_h : stride.data[1]; + strideH = stride.data[0]; + strideW = stride.len == 1 ? strideH : stride.data[1]; } - const int64_t pad_h = padding.data[0]; - const int64_t pad_w = padding.len == 1 ? pad_h : padding.data[1]; - const int64_t dilation_0 = dilation.data[0]; - const int64_t dilation_1 = dilation.len == 1 ? dilation_0 : dilation.data[1]; + const int64_t padH = padding.data[0]; + const int64_t padW = padding.len == 1 ? padH : padding.data[1]; + const int64_t dilation0 = dilation.data[0]; + const int64_t dilation1 = dilation.len == 1 ? dilation0 : dilation.data[1]; // calculate padding coefficients auto pl = 0, pr = 0, pu = 0, pd = 0; - pu = pd = pad_h; - pl = pr = pad_w; - if (ceil_mode) { + pu = pd = padH; + pl = pr = padW; + if (ceilMode) { // diff = (out - 1) * stride + kernel_size - input - int diff_height = (out_dim[2] - 1) * stride_h + kernel_h - input_dim[2]; - int diff_width = (out_dim[3] - 1) * stride_w + kernel_w - input_dim[3]; + int diffHeight = (outDim[2] - 1) * strideH + kernelH - inputDim[2]; + int diffWidth = (outDim[3] - 1) * strideW + kernelW - inputDim[3]; // If ceil_mode is set to true, the pad needs to be filled up. // If the offset pad is redundant, it will be removed. - pd = diff_height > pad_h ? diff_height - pad_h : 0; - pr = diff_width > pad_w ? diff_width - pad_w : 0; + pd = diffHeight > padH ? diffHeight - padH : 0; + pr = diffWidth > padW ? diffWidth - padW : 0; } - CnnlResourceGuard CnnlPoolDesc; - cnnlPoolingDescriptor_t pool_desc = CnnlPoolDesc.get(); + CnnlResourceGuard cnnlPoolDesc; + cnnlPoolingDescriptor_t poolDesc = cnnlPoolDesc.get(); DIOPI_CALLCNNL(cnnlSetPooling2dDescriptor_v2( - pool_desc, CNNL_POOLING_MAX, CNNL_PROPAGATE_NAN, kernel_h, kernel_w, pu, pd, pl, pr, stride_h, stride_w, dilation_0, dilation_1, ceil_mode)); + poolDesc, CNNL_POOLING_MAX, CNNL_PROPAGATE_NAN, kernelH, kernelW, pu, pd, pl, pr, strideH, strideW, dilation0, dilation1, ceilMode)); - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetPoolingWorkspaceSize(handle, CNNL_POOLING_MAX, out_tensor.shape()[3], input_tensor.shape()[2], &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetPoolingWorkspaceSize(handle, CNNL_POOLING_MAX, outTensor.shape()[3], inputTensor.shape()[2], &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } const void* alpha = nullptr; const void* beta = nullptr; DIOPI_CALLCNNL(cnnlPoolingForward( - handle, pool_desc, alpha, input_desc.get(), input_tensor_tmp.data(), beta, out_desc.get(), out_tensor_tmp.data(), workspace, workspace_size)); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_tmp)); + handle, poolDesc, alpha, inputDesc.get(), inputTensorTmp.data(), beta, outDesc.get(), outTensorTmp.data(), workspace, workspaceSize)); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTmp)); return diopiSuccess; } diopiError_t diopiMaxPool2dWithIndices(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, - diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceil_mode) { + diopiSize_t kernelSize, diopiSize_t stride, diopiSize_t padding, diopiSize_t dilation, bool ceilMode) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); - DiopiTensor indices_tensor(indices); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); + DiopiTensor indicesTensor(indices); - DIOPI_CHECK(input_tensor.dim() == 3 || input_tensor.dim() == 4, "non-empty 3D or 4D (batch mode) tensor expected for input"); + DIOPI_CHECK(inputTensor.dim() == 3 || inputTensor.dim() == 4, "non-empty 3D or 4D (batch mode) tensor expected for input"); - std::vector pTensors{&input_tensor}; + std::vector pTensors{&inputTensor}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, {diopi_dtype_float16, diopi_dtype_float32})); - DiopiTensor input_tensor_tmp = *pTensors[0]; - DiopiTensor out_tensor_tmp = out_tensor; - DIOPI_CALL(dataTypeCast(ctx, out_tensor_tmp, input_tensor_tmp.dtype())); + DiopiTensor inputTensorTmp = *pTensors[0]; + DiopiTensor outTensorTmp = outTensor; + DIOPI_CALL(dataTypeCast(ctx, outTensorTmp, inputTensorTmp.dtype())); - DiopiTensor indices_tensor_tmp = indices_tensor; - if (indices_tensor.dtype() == diopi_dtype_int64) { - DIOPI_CALL(dataTypeCast(ctx, indices_tensor_tmp, diopi_dtype_int32)); + DiopiTensor indicesTensorTmp = indicesTensor; + if (indicesTensor.dtype() == diopi_dtype_int64) { + DIOPI_CALL(dataTypeCast(ctx, indicesTensorTmp, diopi_dtype_int32)); } - if (input_tensor_tmp.dtype() == diopi_dtype_float16) { - DIOPI_CALL(dataTypeCast(ctx, indices_tensor_tmp, diopi_dtype_int16)); - } else if (input_tensor_tmp.dtype() == diopi_dtype_float32) { - DIOPI_CALL(dataTypeCast(ctx, indices_tensor_tmp, diopi_dtype_int32)); + if (inputTensorTmp.dtype() == diopi_dtype_float16) { + DIOPI_CALL(dataTypeCast(ctx, indicesTensorTmp, diopi_dtype_int16)); + } else if (inputTensorTmp.dtype() == diopi_dtype_float32) { + DIOPI_CALL(dataTypeCast(ctx, indicesTensorTmp, diopi_dtype_int32)); } else { DIOPI_CHECK(false, "non-empty 3D or 4D (batch mode) tensor expected for input"); } - std::vector input_dim = getDim(input_tensor_tmp); - std::vector indices_dim = getDim(indices_tensor_tmp); - std::vector out_dim = getDim(out_tensor_tmp); - CnnlTensorDesc input_desc; - CnnlTensorDesc indices_desc; - CnnlTensorDesc out_desc; - input_desc.set(input_tensor_tmp, CNNL_LAYOUT_NCHW, input_dim); - indices_desc.set(indices_tensor_tmp, CNNL_LAYOUT_NCHW, indices_dim); - out_desc.set(out_tensor_tmp, CNNL_LAYOUT_NCHW, out_dim); - - const int64_t kernel_h = kernel_size.data[0]; - const int64_t kernel_w = kernel_size.len == 1 ? kernel_h : kernel_size.data[1]; - int64_t stride_h = 0; - int64_t stride_w = 0; + std::vector inputDim = getDim(inputTensorTmp); + std::vector indicesDim = getDim(indicesTensorTmp); + std::vector outDim = getDim(outTensorTmp); + CnnlTensorDesc inputDesc; + CnnlTensorDesc indicesDesc; + CnnlTensorDesc outDesc; + inputDesc.set(inputTensorTmp, CNNL_LAYOUT_NCHW, inputDim); + indicesDesc.set(indicesTensorTmp, CNNL_LAYOUT_NCHW, indicesDim); + outDesc.set(outTensorTmp, CNNL_LAYOUT_NCHW, outDim); + + const int64_t kernelH = kernelSize.data[0]; + const int64_t kernelW = kernelSize.len == 1 ? kernelH : kernelSize.data[1]; + int64_t strideH = 0; + int64_t strideW = 0; if (stride.len == 0) { - stride_h = kernel_h; - stride_w = kernel_w; + strideH = kernelH; + strideW = kernelW; } else { - stride_h = stride.data[0]; - stride_w = stride.len == 1 ? stride_h : stride.data[1]; + strideH = stride.data[0]; + strideW = stride.len == 1 ? strideH : stride.data[1]; } - const int64_t pad_h = padding.data[0]; - const int64_t pad_w = padding.len == 1 ? pad_h : padding.data[1]; - const int64_t dilation_0 = dilation.data[0]; - const int64_t dilation_1 = dilation.len == 1 ? dilation_0 : dilation.data[1]; + const int64_t padH = padding.data[0]; + const int64_t padW = padding.len == 1 ? padH : padding.data[1]; + const int64_t dilation0 = dilation.data[0]; + const int64_t dilation1 = dilation.len == 1 ? dilation0 : dilation.data[1]; // calculate padding coefficients auto pl = 0, pr = 0, pu = 0, pd = 0; - pu = pd = pad_h; - pl = pr = pad_w; - if (ceil_mode) { + pu = pd = padH; + pl = pr = padW; + if (ceilMode) { // diff = (out - 1) * stride + kernel_size - input - int diff_height = (out_dim[2] - 1) * stride_h + kernel_h - input_dim[2]; - int diff_width = (out_dim[3] - 1) * stride_w + kernel_w - input_dim[3]; + int diffHeight = (outDim[2] - 1) * strideH + kernelH - inputDim[2]; + int diffWidth = (outDim[3] - 1) * strideW + kernelW - inputDim[3]; // If ceil_mode is set to true, the pad needs to be filled up. // If the offset pad is redundant, it will be removed. - pd = diff_height > pad_h ? diff_height - pad_h : 0; - pr = diff_width > pad_w ? diff_width - pad_w : 0; + pd = diffHeight > padH ? diffHeight - padH : 0; + pr = diffWidth > padW ? diffWidth - padW : 0; } - CnnlResourceGuard CnnlPoolDesc; - cnnlPoolingDescriptor_t pool_desc = CnnlPoolDesc.get(); - int pool_rank_ = kernel_size.len; - if (pool_rank_ == 3) { - std::vector window_{kernel_size.data, kernel_size.data + kernel_size.len}; - std::vector padding_{padding.data, padding.data + padding.len}; - std::vector stride_{stride.data, stride.data + stride.len}; - std::vector dilation_{dilation.data, dilation.data + dilation.len}; + CnnlResourceGuard cnnlPoolDesc; + cnnlPoolingDescriptor_t poolDesc = cnnlPoolDesc.get(); + int poolRank = kernelSize.len; + if (poolRank == 3) { + std::vector window{kernelSize.data, kernelSize.data + kernelSize.len}; + std::vector paddingTmp{padding.data, padding.data + padding.len}; + std::vector strideTmp{stride.data, stride.data + stride.len}; + std::vector dilationTmp{dilation.data, dilation.data + dilation.len}; DIOPI_CALLCNNL(cnnlSetPoolingNdDescriptor_v2( - pool_desc, CNNL_POOLING_MAX, CNNL_PROPAGATE_NAN, pool_rank_ + 2, window_.data(), padding_.data(), stride_.data(), dilation_.data(), ceil_mode)); + poolDesc, CNNL_POOLING_MAX, CNNL_PROPAGATE_NAN, poolRank + 2, window.data(), paddingTmp.data(), strideTmp.data(), dilationTmp.data(), ceilMode)); } else { DIOPI_CALLCNNL(cnnlSetPooling2dDescriptor_v2( - pool_desc, CNNL_POOLING_MAX, CNNL_PROPAGATE_NAN, kernel_h, kernel_w, pu, pd, pl, pr, stride_h, stride_w, dilation_0, dilation_1, ceil_mode)); + poolDesc, CNNL_POOLING_MAX, CNNL_PROPAGATE_NAN, kernelH, kernelW, pu, pd, pl, pr, strideH, strideW, dilation0, dilation1, ceilMode)); } - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetPoolingWithIndexWorkspaceSize(handle, input_desc.get(), out_desc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetPoolingWithIndexWorkspaceSize(handle, inputDesc.get(), outDesc.get(), &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } DIOPI_CALLCNNL(cnnlPoolingForwardWithIndex(handle, - pool_desc, + poolDesc, nullptr, - input_desc.get(), - input_tensor_tmp.data(), + inputDesc.get(), + inputTensorTmp.data(), nullptr, - out_desc.get(), - out_tensor_tmp.data(), - indices_desc.get(), - indices_tensor_tmp.data(), + outDesc.get(), + outTensorTmp.data(), + indicesDesc.get(), + indicesTensorTmp.data(), workspace, - workspace_size)); + workspaceSize)); - if (indices_tensor.dtype() == diopi_dtype_int64) { - DIOPI_CALL(dataTypeCast(ctx, indices_tensor_tmp, diopi_dtype_int32)); + if (indicesTensor.dtype() == diopi_dtype_int64) { + DIOPI_CALL(dataTypeCast(ctx, indicesTensorTmp, diopi_dtype_int32)); } - DIOPI_CALL(dataTypeCast(ctx, indices_tensor, indices_tensor_tmp)); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_tmp)); + DIOPI_CALL(dataTypeCast(ctx, indicesTensor, indicesTensorTmp)); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTmp)); return diopiSuccess; } -diopiError_t diopiMaxPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, - diopiConstTensorHandle_t input, diopiSize_t kernel_size, diopiSize_t stride, diopiSize_t padding, - diopiSize_t dilation, bool ceil_mode, diopiConstTensorHandle_t indices) { +diopiError_t diopiMaxPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, + diopiConstTensorHandle_t input, diopiSize_t kernelSize, diopiSize_t stride, diopiSize_t padding, + diopiSize_t dilation, bool ceilMode, diopiConstTensorHandle_t indices) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor grad_input_tensor(grad_input); - DiopiTensor grad_output_tensor(grad_output); - DiopiTensor indices_tensor(indices); + DiopiTensor inputTensor(input); + DiopiTensor gradInputTensor(gradInput); + DiopiTensor gradOutputTensor(gradOutput); + DiopiTensor indicesTensor(indices); - DIOPI_CHECK(input_tensor.dim() == 3 || input_tensor.dim() == 4, "non-empty 3D or 4D (batch mode) tensor expected for input"); + DIOPI_CHECK(inputTensor.dim() == 3 || inputTensor.dim() == 4, "non-empty 3D or 4D (batch mode) tensor expected for input"); - std::vector pTensors{&input_tensor, &grad_output_tensor}; + std::vector pTensors{&inputTensor, &gradOutputTensor}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, {diopi_dtype_float16, diopi_dtype_float32})); - DiopiTensor input_tensor_tmp = *pTensors[0]; - DiopiTensor grad_output_tensor_tmp = *pTensors[1]; - DiopiTensor grad_input_tensor_tmp = grad_input_tensor; - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor_tmp, input_tensor_tmp.dtype())); - - DiopiTensor indices_tensor_tmp = indices_tensor; - if (indices_tensor.dtype() == diopi_dtype_int64) { - DIOPI_CALL(dataTypeCast(ctx, indices_tensor_tmp, diopi_dtype_int32)); + DiopiTensor inputTensorTmp = *pTensors[0]; + DiopiTensor gradOutputTensorTmp = *pTensors[1]; + DiopiTensor gradInputTensorTmp = gradInputTensor; + DIOPI_CALL(dataTypeCast(ctx, gradInputTensorTmp, inputTensorTmp.dtype())); + + DiopiTensor indicesTensorTmp = indicesTensor; + if (indicesTensor.dtype() == diopi_dtype_int64) { + DIOPI_CALL(dataTypeCast(ctx, indicesTensorTmp, diopi_dtype_int32)); } - if (input_tensor_tmp.dtype() == diopi_dtype_float16) { - DIOPI_CALL(dataTypeCast(ctx, indices_tensor_tmp, diopi_dtype_int16)); - } else if (input_tensor_tmp.dtype() == diopi_dtype_float32) { - DIOPI_CALL(dataTypeCast(ctx, indices_tensor_tmp, diopi_dtype_int32)); + if (inputTensorTmp.dtype() == diopi_dtype_float16) { + DIOPI_CALL(dataTypeCast(ctx, indicesTensorTmp, diopi_dtype_int16)); + } else if (inputTensorTmp.dtype() == diopi_dtype_float32) { + DIOPI_CALL(dataTypeCast(ctx, indicesTensorTmp, diopi_dtype_int32)); } else { DIOPI_CHECK(false, "non-empty 3D or 4D (batch mode) tensor expected for input"); } - diopiTensorHandle_t input_t = nullptr; - diopiTensorHandle_t grad_input_t = nullptr; - diopiTensorHandle_t grad_output_t = nullptr; - diopiTensorHandle_t indices_t = nullptr; + diopiTensorHandle_t inputT = nullptr; + diopiTensorHandle_t gradInputT = nullptr; + diopiTensorHandle_t gradOutputT = nullptr; + diopiTensorHandle_t indicesT = nullptr; - auto permute_to_nhwc = [&](auto src, auto& dst) { - DiopiTensor src_tensor(src); - std::vector src_shape_t_64(src_tensor.shape().size()); + auto permuteToNhwc = [&](auto src, auto& dst) { + DiopiTensor srcTensor(src); + std::vector srcShapeT64(srcTensor.shape().size()); std::vector axis{0, 2, 3, 1}; - if (src_tensor.shape().size() == 3) { + if (srcTensor.shape().size() == 3) { axis.clear(); axis.push_back(1); axis.push_back(2); axis.push_back(0); } - for (int i = 0; i < src_tensor.shape().size(); ++i) { - src_shape_t_64[i] = src_tensor.shape()[axis[i]]; + for (int i = 0; i < srcTensor.shape().size(); ++i) { + srcShapeT64[i] = srcTensor.shape()[axis[i]]; } - diopiSize_t src_t_shape(src_shape_t_64.data(), src_shape_t_64.size()); - DIOPI_CALL(diopiRequireTensor(ctx, &dst, &src_t_shape, nullptr, src_tensor.dtype(), diopi_device)); - if (src_tensor.shape().size() == 4) { + diopiSize_t srcTShape(srcShapeT64.data(), srcShapeT64.size()); + DIOPI_CALL(diopiRequireTensor(ctx, &dst, &srcTShape, nullptr, srcTensor.dtype(), diopi_device)); + if (srcTensor.shape().size() == 4) { diopiSize_t nchw2nhwc(axis.data(), 4); DIOPI_CALL(diopiPermute(ctx, dst, src, nchw2nhwc)); - } else if (src_tensor.shape().size() == 3) { + } else if (srcTensor.shape().size() == 3) { diopiSize_t chw2hwc(axis.data(), 3); DIOPI_CALL(diopiPermute(ctx, dst, src, chw2hwc)); } else { @@ -279,93 +281,93 @@ diopiError_t diopiMaxPool2dBackward(diopiContextHandle_t ctx, diopiTensorHandle_ return diopiSuccess; }; - DIOPI_CALL(permute_to_nhwc(static_cast(input_tensor_tmp), input_t)); - DIOPI_CALL(permute_to_nhwc(static_cast(grad_input_tensor_tmp), grad_input_t)); - DIOPI_CALL(permute_to_nhwc(static_cast(grad_output_tensor_tmp), grad_output_t)); - DIOPI_CALL(permute_to_nhwc(static_cast(indices_tensor_tmp), indices_t)); - - DiopiTensor input_tensor_t(input_t); - DiopiTensor grad_input_tensor_t(grad_input_t); - DiopiTensor grad_output_tensor_t(grad_output_t); - DiopiTensor indices_tensor_t(indices_t); - - std::vector input_dim = getDim(input_tensor_t); - std::vector grad_input_dim = getDim(grad_input_tensor_t); - std::vector grad_output_dim = getDim(grad_output_tensor_t); - std::vector indices_dim = getDim(indices_tensor_t); - CnnlTensorDesc input_desc; - CnnlTensorDesc grad_input_desc; - CnnlTensorDesc grad_output_desc; - CnnlTensorDesc indices_desc; - input_desc.set(input_tensor_t, CNNL_LAYOUT_NHWC, input_dim); - grad_input_desc.set(grad_input_tensor_t, CNNL_LAYOUT_NHWC, grad_input_dim); - grad_output_desc.set(grad_output_tensor_t, CNNL_LAYOUT_NHWC, grad_output_dim); - indices_desc.set(indices_tensor_t, CNNL_LAYOUT_NHWC, indices_dim); - - const int64_t kernel_h = kernel_size.data[0]; - const int64_t kernel_w = kernel_size.len == 1 ? kernel_h : kernel_size.data[1]; - int64_t stride_h = 0; - int64_t stride_w = 0; + DIOPI_CALL(permuteToNhwc(static_cast(inputTensorTmp), inputT)); + DIOPI_CALL(permuteToNhwc(static_cast(gradInputTensorTmp), gradInputT)); + DIOPI_CALL(permuteToNhwc(static_cast(gradOutputTensorTmp), gradOutputT)); + DIOPI_CALL(permuteToNhwc(static_cast(indicesTensorTmp), indicesT)); + + DiopiTensor inputTensorT(inputT); + DiopiTensor gradInputTensorT(gradInputT); + DiopiTensor gradOutputTensorT(gradOutputT); + DiopiTensor indicesTensorT(indicesT); + + std::vector inputDim = getDim(inputTensorT); + std::vector gradInputDim = getDim(gradInputTensorT); + std::vector gradOutputDim = getDim(gradOutputTensorT); + std::vector indicesDim = getDim(indicesTensorT); + CnnlTensorDesc inputDesc; + CnnlTensorDesc gradInputDesc; + CnnlTensorDesc gradOutputDesc; + CnnlTensorDesc indicesDesc; + inputDesc.set(inputTensorT, CNNL_LAYOUT_NHWC, inputDim); + gradInputDesc.set(gradInputTensorT, CNNL_LAYOUT_NHWC, gradInputDim); + gradOutputDesc.set(gradOutputTensorT, CNNL_LAYOUT_NHWC, gradOutputDim); + indicesDesc.set(indicesTensorT, CNNL_LAYOUT_NHWC, indicesDim); + + const int64_t kernelH = kernelSize.data[0]; + const int64_t kernelW = kernelSize.len == 1 ? kernelH : kernelSize.data[1]; + int64_t strideH = 0; + int64_t strideW = 0; if (stride.len == 0) { - stride_h = kernel_h; - stride_w = kernel_w; + strideH = kernelH; + strideW = kernelW; } else { - stride_h = stride.data[0]; - stride_w = stride.len == 1 ? stride_h : stride.data[1]; + strideH = stride.data[0]; + strideW = stride.len == 1 ? strideH : stride.data[1]; } - const int64_t pad_h = padding.data[0]; - const int64_t pad_w = padding.len == 1 ? pad_h : padding.data[1]; - const int64_t dilation_0 = dilation.data[0]; - const int64_t dilation_1 = dilation.len == 1 ? dilation_0 : dilation.data[1]; + const int64_t padH = padding.data[0]; + const int64_t padW = padding.len == 1 ? padH : padding.data[1]; + const int64_t dilation0 = dilation.data[0]; + const int64_t dilation1 = dilation.len == 1 ? dilation0 : dilation.data[1]; // calculate padding coefficients auto pl = 0, pr = 0, pu = 0, pd = 0; - pu = pd = pad_h; - pl = pr = pad_w; - int height = (grad_output_dim[1] - 1) * stride_h + kernel_h; - int width = (grad_output_dim[2] - 1) * stride_w + kernel_w; - if (pad_h + input_dim[1] >= height) { + pu = pd = padH; + pl = pr = padW; + int height = (gradOutputDim[1] - 1) * strideH + kernelH; + int width = (gradOutputDim[2] - 1) * strideW + kernelW; + if (padH + inputDim[1] >= height) { pd = 0; } - if (pad_w + input_dim[2] >= width) { + if (padW + inputDim[2] >= width) { pr = 0; } // if ceil_mode is set to true, the pad needs to be filled up. - if (ceil_mode) { - pd = height - input_dim[1] - pad_h; - pr = width - input_dim[2] - pad_w; + if (ceilMode) { + pd = height - inputDim[1] - padH; + pr = width - inputDim[2] - padW; } - CnnlResourceGuard CnnlPoolDesc; - cnnlPoolingDescriptor_t pool_desc = CnnlPoolDesc.get(); + CnnlResourceGuard cnnlPoolDesc; + cnnlPoolingDescriptor_t poolDesc = cnnlPoolDesc.get(); DIOPI_CALLCNNL(cnnlSetPooling2dDescriptor_v2( - pool_desc, CNNL_POOLING_MAX, CNNL_PROPAGATE_NAN, kernel_h, kernel_w, pu, pd, pl, pr, stride_h, stride_w, dilation_0, dilation_1, ceil_mode)); + poolDesc, CNNL_POOLING_MAX, CNNL_PROPAGATE_NAN, kernelH, kernelW, pu, pd, pl, pr, strideH, strideW, dilation0, dilation1, ceilMode)); DIOPI_CALLCNNL(cnnlPoolingBackward(handle, - pool_desc, + poolDesc, nullptr, - indices_desc.get(), - indices_tensor_t.data(), - grad_output_desc.get(), - grad_output_tensor_t.data(), - input_desc.get(), - input_tensor_t.data(), + indicesDesc.get(), + indicesTensorT.data(), + gradOutputDesc.get(), + gradOutputTensorT.data(), + inputDesc.get(), + inputTensorT.data(), nullptr, - grad_input_desc.get(), - grad_input_tensor_t.data())); - - if (grad_input_tensor_t.shape().size() == 4) { - std::vector perm_nhwc2nchw{0, 3, 1, 2}; - diopiSize_t nhwc2nchw(perm_nhwc2nchw.data(), 4); - DIOPI_CALL(diopiPermute(ctx, static_cast(grad_input_tensor_tmp), grad_input_t, nhwc2nchw)); - } else if (grad_input_tensor_t.shape().size() == 3) { - std::vector perm_hwc2chw{2, 0, 1}; - diopiSize_t hwc2chw(perm_hwc2chw.data(), 3); - DIOPI_CALL(diopiPermute(ctx, static_cast(grad_input_tensor_tmp), grad_input_t, hwc2chw)); + gradInputDesc.get(), + gradInputTensorT.data())); + + if (gradInputTensorT.shape().size() == 4) { + std::vector permNhwc2nchw{0, 3, 1, 2}; + diopiSize_t nhwc2nchw(permNhwc2nchw.data(), 4); + DIOPI_CALL(diopiPermute(ctx, static_cast(gradInputTensorTmp), gradInputT, nhwc2nchw)); + } else if (gradInputTensorT.shape().size() == 3) { + std::vector permHwc2chw{2, 0, 1}; + diopiSize_t hwc2chw(permHwc2chw.data(), 3); + DIOPI_CALL(diopiPermute(ctx, static_cast(gradInputTensorTmp), gradInputT, hwc2chw)); } else { DIOPI_CHECK(false, "non-empty 3D or 4D (batch mode) tensor expected for input"); } - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor, grad_input_tensor_tmp)); + DIOPI_CALL(dataTypeCast(ctx, gradInputTensor, gradInputTensorTmp)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/meshgrid.cpp b/DIOPI-IMPL/camb/functions/meshgrid.cpp index 2683badd7..b46fe5472 100644 --- a/DIOPI-IMPL/camb/functions/meshgrid.cpp +++ b/DIOPI-IMPL/camb/functions/meshgrid.cpp @@ -5,8 +5,10 @@ */ #include -#include + +#include #include + #include "../cnnl_helper.hpp" #include "../common/common.hpp" @@ -18,35 +20,35 @@ extern "C" { diopiError_t diopiMeshGrid(diopiContextHandle_t ctx, diopiTensorHandle_t* outs, diopiConstTensorHandle_t* inputs, int64_t inputsNum) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); for (int i = 0; i < inputsNum; i++) { - DiopiTensor input_tensor(inputs[i]); - DiopiTensor out_tensor(outs[i]); + DiopiTensor inputTensor(inputs[i]); + DiopiTensor outTensor(outs[i]); - auto input_dim = input_tensor.shape(); - auto output_dims = out_tensor.shape(); + auto inputDim = inputTensor.shape(); + auto outputDims = outTensor.shape(); - int tmp_output_dims[8] = {1, 1, 1, 1, 1, 1, 1, 1}; - int tmp_input_dims[8] = {1, 1, 1, 1, 1, 1, 1, 1}; - int repeat_dim0 = 1; - int repeat_dim1 = 1; + int tmpOutputDims[8] = {1, 1, 1, 1, 1, 1, 1, 1}; + int tmpInputDims[8] = {1, 1, 1, 1, 1, 1, 1, 1}; + int repeatDim0 = 1; + int repeatDim1 = 1; for (int j = 0; j < i; j++) { - repeat_dim0 *= output_dims[j]; + repeatDim0 *= outputDims[j]; } for (int k = i + 1; k < inputsNum; k++) { - repeat_dim1 *= output_dims[k]; + repeatDim1 *= outputDims[k]; } - tmp_output_dims[0] = repeat_dim0 * output_dims[i]; - tmp_output_dims[1] = repeat_dim1; - tmp_input_dims[0] = output_dims[i]; - tmp_input_dims[1] = 1; - - CnnlTensorDesc input_desc; - CnnlTensorDesc out_desc; - std::vector in_dims = {tmp_input_dims[0], tmp_input_dims[1]}; - std::vector out_dims = {tmp_output_dims[0], tmp_output_dims[1]}; - input_desc.set(input_tensor, CNNL_LAYOUT_ARRAY, in_dims); - out_desc.set(out_tensor, CNNL_LAYOUT_ARRAY, out_dims); - - DIOPI_CALLCNNL(cnnlTile(handle, input_desc.get(), input_tensor.data(), out_desc.get(), out_tensor.data())); + tmpOutputDims[0] = repeatDim0 * outputDims[i]; + tmpOutputDims[1] = repeatDim1; + tmpInputDims[0] = outputDims[i]; + tmpInputDims[1] = 1; + + CnnlTensorDesc inputDesc; + CnnlTensorDesc outDesc; + std::vector inDims = {tmpInputDims[0], tmpInputDims[1]}; + std::vector outDims = {tmpOutputDims[0], tmpOutputDims[1]}; + inputDesc.set(inputTensor, CNNL_LAYOUT_ARRAY, inDims); + outDesc.set(outTensor, CNNL_LAYOUT_ARRAY, outDims); + + DIOPI_CALLCNNL(cnnlTile(handle, inputDesc.get(), inputTensor.data(), outDesc.get(), outTensor.data())); } return diopiSuccess; diff --git a/DIOPI-IMPL/camb/functions/mul.cpp b/DIOPI-IMPL/camb/functions/mul.cpp index da6ce14e1..76b9171e1 100644 --- a/DIOPI-IMPL/camb/functions/mul.cpp +++ b/DIOPI-IMPL/camb/functions/mul.cpp @@ -15,33 +15,33 @@ extern "C" { diopiError_t diopiMul(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor other_tensor(other); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor otherTensor(other); + DiopiTensor outTensor(out); - DiopiTensor out_tensor_tmp = out_tensor; - if ((out_tensor.dtype() != diopi_dtype_float16) && (out_tensor.dtype() != diopi_dtype_float32)) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor_tmp, diopi_dtype_float32)); + DiopiTensor outTensorTmp = outTensor; + if ((outTensor.dtype() != diopi_dtype_float16) && (outTensor.dtype() != diopi_dtype_float32)) { + DIOPI_CALL(dataTypeCast(ctx, outTensorTmp, diopi_dtype_float32)); } - DIOPI_CALL(dataTypeCast(ctx, input_tensor, out_tensor_tmp.dtype())); - DIOPI_CALL(dataTypeCast(ctx, other_tensor, out_tensor_tmp.dtype())); + DIOPI_CALL(dataTypeCast(ctx, inputTensor, outTensorTmp.dtype())); + DIOPI_CALL(dataTypeCast(ctx, otherTensor, outTensorTmp.dtype())); - DiopiTensor bcast_input_tensor; - broadcastHelper(ctx, input_tensor, out_tensor_tmp, &bcast_input_tensor); - DiopiTensor bcast_other_tensor; - broadcastHelper(ctx, other_tensor, out_tensor_tmp, &bcast_other_tensor); + DiopiTensor bcastInputTensor; + broadcastHelper(ctx, inputTensor, outTensorTmp, &bcastInputTensor); + DiopiTensor bcastOtherTensor; + broadcastHelper(ctx, otherTensor, outTensorTmp, &bcastOtherTensor); - CnnlTensorDesc bcast_input_desc(bcast_input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc bcast_other_desc(bcast_other_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor_tmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc bcastInputDesc(bcastInputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc bcastOtherDesc(bcastOtherTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensorTmp, CNNL_LAYOUT_ARRAY); - cnnlTensorDescriptor_t input_descs[] = {bcast_input_desc.get(), bcast_other_desc.get()}; - const void* inputs[] = {bcast_input_tensor.data(), bcast_other_tensor.data()}; + cnnlTensorDescriptor_t inputDescs[] = {bcastInputDesc.get(), bcastOtherDesc.get()}; + const void* inputs[] = {bcastInputTensor.data(), bcastOtherTensor.data()}; - DIOPI_CALLCNNL(cnnlMulN(handle, input_descs, inputs, 2, out_desc.get(), out_tensor_tmp.data())) - if (out_tensor_tmp.dtype() != out_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_tmp)); + DIOPI_CALLCNNL(cnnlMulN(handle, inputDescs, inputs, 2, outDesc.get(), outTensorTmp.data())) + if (outTensorTmp.dtype() != outTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTmp)); } return diopiSuccess; } @@ -53,12 +53,12 @@ diopiError_t diopiMulInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, di diopiError_t diopiMulScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); - DiopiTensor other_tensor_tmp; - makeTensorFromScalar(ctx, other, other_tensor_tmp); - auto other_tensor = other_tensor_tmp.tensorHandle(); - DIOPI_CALL(diopiMul(ctx, out, input, diopiTensorHandle_t(other_tensor))); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); + DiopiTensor otherTensorTmp; + makeTensorFromScalar(ctx, other, otherTensorTmp); + auto otherTensor = otherTensorTmp.tensorHandle(); + DIOPI_CALL(diopiMul(ctx, out, input, diopiTensorHandle_t(otherTensor))); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/multinomial.cpp b/DIOPI-IMPL/camb/functions/multinomial.cpp index ea69207af..32584ecb3 100644 --- a/DIOPI-IMPL/camb/functions/multinomial.cpp +++ b/DIOPI-IMPL/camb/functions/multinomial.cpp @@ -20,34 +20,34 @@ class CnnlRandGenerator final { cnnlRandGenerator_t& get() { return resource_; } private: - cnnlRandGenerator_t resource_{0}; + cnnlRandGenerator_t resource_{nullptr}; }; -diopiError_t diopiMultinomial(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, int64_t num_samples, bool replacement) { +diopiError_t diopiMultinomial(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, int64_t numSamples, bool replacement) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); CnnlRandGenerator cnnlGenerator; cnnlRandGenerator_t generator = cnnlGenerator.get(); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); - DiopiTensor out_temp = out_tensor; - DIOPI_CALL(autoCastTensorType(ctx, {&input_tensor}, {diopi_dtype_float16, diopi_dtype_float32})); - DIOPI_CALL(autoCastTensorType(ctx, {&out_temp}, {diopi_dtype_int32})); - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc outDesc(out_temp, CNNL_LAYOUT_ARRAY); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); + DiopiTensor outTemp = outTensor; + DIOPI_CALL(autoCastTensorType(ctx, {&inputTensor}, {diopi_dtype_float16, diopi_dtype_float32})); + DIOPI_CALL(autoCastTensorType(ctx, {&outTemp}, {diopi_dtype_int32})); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTemp, CNNL_LAYOUT_ARRAY); - size_t workspace_size; - DIOPI_CALLCNNL(cnnlGetRandGenerateMultinomialWorkspaceSize(handle, inputDesc.get(), &workspace_size)); + size_t workspaceSize; + DIOPI_CALLCNNL(cnnlGetRandGenerateMultinomialWorkspaceSize(handle, inputDesc.get(), &workspaceSize)); void* workspace = nullptr; - if (workspace_size > 0) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (workspaceSize > 0) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } DIOPI_CALLCNNL(cnnlRandGenerateMultinomial_v2( - handle, generator, inputDesc.get(), input_tensor.data(), replacement, false, nullptr, workspace, workspace_size, outDesc.get(), out_temp.data())); - if (out_tensor.dtype() != out_temp.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_temp)); + handle, generator, inputDesc.get(), inputTensor.data(), replacement, false, nullptr, workspace, workspaceSize, outDesc.get(), outTemp.data())); + if (outTensor.dtype() != outTemp.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTemp)); } return diopiSuccess; diff --git a/DIOPI-IMPL/camb/functions/neg.cpp b/DIOPI-IMPL/camb/functions/neg.cpp index dae41d061..76a0ccbb2 100644 --- a/DIOPI-IMPL/camb/functions/neg.cpp +++ b/DIOPI-IMPL/camb/functions/neg.cpp @@ -1,7 +1,9 @@ #include -#include + +#include #include + #include "../cnnl_helper.hpp" #include "../common/common.hpp" @@ -12,20 +14,20 @@ extern "C" { diopiError_t diopiNeg(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); - std::vector pTensors{&input_tensor}; + std::vector pTensors{&inputTensor}; std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor input_tensor_tmp = *pTensors[0]; - DiopiTensor out_tensor_tmp = out_tensor; - DIOPI_CALL(dataTypeCast(ctx, out_tensor_tmp, input_tensor_tmp.dtype())); - - CnnlTensorDesc input_desc(input_tensor_tmp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor_tmp, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlNegTensor(handle, input_desc.get(), input_tensor_tmp.data(), out_desc.get(), out_tensor_tmp.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_tmp)); + DiopiTensor inputTensorTmp = *pTensors[0]; + DiopiTensor outTensorTmp = outTensor; + DIOPI_CALL(dataTypeCast(ctx, outTensorTmp, inputTensorTmp.dtype())); + + CnnlTensorDesc inputDesc(inputTensorTmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensorTmp, CNNL_LAYOUT_ARRAY); + DIOPI_CALLCNNL(cnnlNegTensor(handle, inputDesc.get(), inputTensorTmp.data(), outDesc.get(), outTensorTmp.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTmp)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/nonzero.cpp b/DIOPI-IMPL/camb/functions/nonzero.cpp index e4946b188..60ffc8f61 100644 --- a/DIOPI-IMPL/camb/functions/nonzero.cpp +++ b/DIOPI-IMPL/camb/functions/nonzero.cpp @@ -9,53 +9,53 @@ namespace camb { extern "C" { -diopiError_t nonzeroCount(diopiContextHandle_t ctx, DiopiTensor input_tensor, DiopiTensor* num_true) { +diopiError_t nonzeroCount(diopiContextHandle_t ctx, DiopiTensor inputTensor, DiopiTensor* numTrue) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); std::vector shape = {1}; - *num_true = requiresTensor(ctx, shape, diopi_dtype_int32); - CnnlTensorDesc num_trueDesc(*num_true, CNNL_LAYOUT_ARRAY); + *numTrue = requiresTensor(ctx, shape, diopi_dtype_int32); + CnnlTensorDesc numTrueDesc(*numTrue, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlNumTrue_v2(handle, inputDesc.get(), input_tensor.data(), num_trueDesc.get(), num_true->data())); + DIOPI_CALLCNNL(cnnlNumTrue_v2(handle, inputDesc.get(), inputTensor.data(), numTrueDesc.get(), numTrue->data())); return diopiSuccess; } diopiError_t diopiNonzero(diopiContextHandle_t ctx, diopiTensorHandle_t* out, diopiConstTensorHandle_t input) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - if (DiopiDataType::isInteger(input_tensor.dtype())) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_int32)); - } else if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); + DiopiTensor inputTensor(input); + if (DiopiDataType::isInteger(inputTensor.dtype())) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_int32)); + } else if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); } - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); - DiopiTensor num_true; - nonzeroCount(ctx, input_tensor, &num_true); - CnnlTensorDesc num_trueDesc(num_true, CNNL_LAYOUT_ARRAY); + DiopiTensor numTrue; + nonzeroCount(ctx, inputTensor, &numTrue); + CnnlTensorDesc numTrueDesc(numTrue, CNNL_LAYOUT_ARRAY); - size_t workspace_size(0); - DIOPI_CALLCNNL(cnnlGetWhereWorkspaceSize(handle, num_trueDesc.get(), &workspace_size)); + size_t workspaceSize(0); + DIOPI_CALLCNNL(cnnlGetWhereWorkspaceSize(handle, numTrueDesc.get(), &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } syncStreamInCtx(ctx); int32_t count = 0; - cnrtMemcpy(&count, num_true.data(), sizeof(int32_t), CNRT_MEM_TRANS_DIR_DEV2HOST); + cnrtMemcpy(&count, numTrue.data(), sizeof(int32_t), CNRT_MEM_TRANS_DIR_DEV2HOST); std::vector shape(2); shape[0] = count; - shape[1] = input_tensor.dim(); - auto out_tensor = requiresTensor(ctx, shape, diopi_dtype_int32); - CnnlTensorDesc outDesc(out_tensor, CNNL_LAYOUT_ARRAY); + shape[1] = inputTensor.dim(); + auto outTensor = requiresTensor(ctx, shape, diopi_dtype_int32); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); DIOPI_CALLCNNL(cnnlWhere_v2( - handle, inputDesc.get(), input_tensor.data(), num_trueDesc.get(), num_true.data(), false, workspace, workspace_size, outDesc.get(), out_tensor.data())); - *out = diopiTensorHandle_t(out_tensor); + handle, inputDesc.get(), inputTensor.data(), numTrueDesc.get(), numTrue.data(), false, workspace, workspaceSize, outDesc.get(), outTensor.data())); + *out = diopiTensorHandle_t(outTensor); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/one_hot.cpp b/DIOPI-IMPL/camb/functions/one_hot.cpp index f33b964e7..c76542977 100644 --- a/DIOPI-IMPL/camb/functions/one_hot.cpp +++ b/DIOPI-IMPL/camb/functions/one_hot.cpp @@ -15,58 +15,58 @@ extern "C" { diopiError_t maxAll(diopiContextHandle_t ctx, diopiTensorHandle_t max, diopiTensorHandle_t input) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(max); - CnnlTensorDesc input_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc output_desc(output_tensor, CNNL_LAYOUT_ARRAY); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(max); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outputDesc(outputTensor, CNNL_LAYOUT_ARRAY); - DIOPI_CHECK(input_tensor.dtype() == output_tensor.dtype(), "input->dtype should equal to output->dtype"); + DIOPI_CHECK(inputTensor.dtype() == outputTensor.dtype(), "input->dtype should equal to output->dtype"); cnnlDataType_t dtype; - DIOPI_CALL(CnnlDataType::convertToCnnlType(&dtype, input_tensor.dtype())); - std::vector dims(input_tensor.dim()); - for (int i = 0; i < input_tensor.dim(); i++) { + DIOPI_CALL(CnnlDataType::convertToCnnlType(&dtype, inputTensor.dtype())); + std::vector dims(inputTensor.dim()); + for (int i = 0; i < inputTensor.dim(); i++) { dims[i] = i; } - diopiSize_t dim = {dims.data(), input_tensor.dim()}; - CnnlReduceDescriptor reduce_desc; - reduce_desc.set(input_tensor, dims, CNNL_REDUCE_MAX, CNNL_REDUCE_NO_INDICES, CNNL_32BIT_INDICES, dtype); + diopiSize_t dim = {dims.data(), inputTensor.dim()}; + CnnlReduceDescriptor reduceDesc; + reduceDesc.set(inputTensor, dims, CNNL_REDUCE_MAX, CNNL_REDUCE_NO_INDICES, CNNL_32BIT_INDICES, dtype); - size_t workspace_size(0); - DIOPI_CALLCNNL(cnnlGetReduceOpWorkspaceSize(handle, input_desc.get(), output_desc.get(), reduce_desc.get(), &workspace_size)); + size_t workspaceSize(0); + DIOPI_CALLCNNL(cnnlGetReduceOpWorkspaceSize(handle, inputDesc.get(), outputDesc.get(), reduceDesc.get(), &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } - size_t indices_size_inbytes(0); + size_t indicesSizeInbytes(0); void* indices = nullptr; void* alpha = nullptr; void* beta = nullptr; DIOPI_CALLCNNL(cnnlReduce(handle, - reduce_desc.get(), + reduceDesc.get(), workspace, - workspace_size, + workspaceSize, alpha, - input_desc.get(), - input_tensor.data(), - indices_size_inbytes, + inputDesc.get(), + inputTensor.data(), + indicesSizeInbytes, indices, beta, - output_desc.get(), - output_tensor.data())); + outputDesc.get(), + outputTensor.data())); return diopiSuccess; } -diopiError_t getClassNum(diopiContextHandle_t ctx, DiopiTensor input_tensor, int32_t* cls_num) { +diopiError_t getClassNum(diopiContextHandle_t ctx, DiopiTensor inputTensor, int32_t* clsNum) { std::vector dims(1, 1); - DiopiTensor max = requiresTensor(ctx, dims, input_tensor.dtype()); - DIOPI_CALL(maxAll(ctx, (diopiTensorHandle_t)max, (diopiTensorHandle_t)input_tensor)); + DiopiTensor max = requiresTensor(ctx, dims, inputTensor.dtype()); + DIOPI_CALL(maxAll(ctx, (diopiTensorHandle_t)max, (diopiTensorHandle_t)inputTensor)); syncStreamInCtx(ctx); int32_t* ptr = reinterpret_cast(malloc(max.numel() * sizeof(int32_t))); cnrtMemcpy(ptr, max.data(), max.numel() * sizeof(int32_t), cnrtMemcpyDevToHost); - *cls_num = *ptr + 1; + *clsNum = *ptr + 1; free(ptr); return diopiSuccess; @@ -75,59 +75,59 @@ diopiError_t getClassNum(diopiContextHandle_t ctx, DiopiTensor input_tensor, int diopiError_t diopiOneHot(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, int64_t numClasses) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); // input must be int32 - if (diopi_dtype_int32 != input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_int32)); + if (diopi_dtype_int32 != inputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_int32)); } - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); - int32_t cls_num = 0; + int32_t clsNum = 0; if (-1 == numClasses) { - DIOPI_CALL(getClassNum(ctx, input_tensor, &cls_num)); + DIOPI_CALL(getClassNum(ctx, inputTensor, &clsNum)); } else { - cls_num = numClasses; + clsNum = numClasses; } - diopiTensorHandle_t on_value, off_value; + diopiTensorHandle_t onValue, offValue; std::vector dims(1, 1); diopiSize_t shape(dims.data(), 1); - DIOPI_CALL(diopiRequireTensor(ctx, &on_value, &shape, nullptr, diopi_dtype_int32, diopi_device)); - DIOPI_CALL(diopiRequireTensor(ctx, &off_value, &shape, nullptr, diopi_dtype_int32, diopi_device)); - DiopiTensor on_value_tensor(on_value); - DiopiTensor off_value_tensor(off_value); - CnnlTensorDesc on_tensor_desc(on_value_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc off_tensor_desc(off_value_tensor, CNNL_LAYOUT_ARRAY); + DIOPI_CALL(diopiRequireTensor(ctx, &onValue, &shape, nullptr, diopi_dtype_int32, diopi_device)); + DIOPI_CALL(diopiRequireTensor(ctx, &offValue, &shape, nullptr, diopi_dtype_int32, diopi_device)); + DiopiTensor onValueTensor(onValue); + DiopiTensor offValueTensor(offValue); + CnnlTensorDesc onTensorDesc(onValueTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc offTensorDesc(offValueTensor, CNNL_LAYOUT_ARRAY); int32_t one = 1; int32_t zero = 0; - DIOPI_CALLCNNL(cnnlFill_v3(handle, CNNL_POINTER_MODE_HOST, &one, on_tensor_desc.get(), on_value_tensor.data())); - DIOPI_CALLCNNL(cnnlFill_v3(handle, CNNL_POINTER_MODE_HOST, &zero, off_tensor_desc.get(), off_value_tensor.data())); + DIOPI_CALLCNNL(cnnlFill_v3(handle, CNNL_POINTER_MODE_HOST, &one, onTensorDesc.get(), onValueTensor.data())); + DIOPI_CALLCNNL(cnnlFill_v3(handle, CNNL_POINTER_MODE_HOST, &zero, offTensorDesc.get(), offValueTensor.data())); int axis = -1; // output must be int32, float16, float32 - if (diopi_dtype_int32 != out_tensor.dtype()) { - DiopiTensor out32_tensor = requiresTensor(ctx, out_tensor.shape(), diopi_dtype_int32); + if (diopi_dtype_int32 != outTensor.dtype()) { + DiopiTensor out32Tensor = requiresTensor(ctx, outTensor.shape(), diopi_dtype_int32); DIOPI_CALLCNNL(cnnlOneHot(handle, inputDesc.get(), - input_tensor.data(), - cls_num, - on_value_tensor.data(), - off_value_tensor.data(), + inputTensor.data(), + clsNum, + onValueTensor.data(), + offValueTensor.data(), axis, CNNL_DTYPE_INT32, - out32_tensor.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out32_tensor)); + out32Tensor.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, out32Tensor)); } else { DIOPI_CALLCNNL(cnnlOneHot(handle, inputDesc.get(), - input_tensor.data(), - cls_num, - on_value_tensor.data(), - off_value_tensor.data(), + inputTensor.data(), + clsNum, + onValueTensor.data(), + offValueTensor.data(), axis, CNNL_DTYPE_INT32, - out_tensor.data())); + outTensor.data())); } return diopiSuccess; diff --git a/DIOPI-IMPL/camb/functions/permute.cpp b/DIOPI-IMPL/camb/functions/permute.cpp index 6afbf9796..a3f990214 100644 --- a/DIOPI-IMPL/camb/functions/permute.cpp +++ b/DIOPI-IMPL/camb/functions/permute.cpp @@ -18,43 +18,43 @@ namespace camb { extern "C" diopiError_t diopiPermute(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t dims) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); - if (diopi_dtype_float64 == input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); + if (diopi_dtype_float64 == inputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); } - CnnlTensorDesc input_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc output_desc(output_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outputDesc(outputTensor, CNNL_LAYOUT_ARRAY); - std::vector perm_data{dims.data, dims.data + dims.len}; - for (int i = 0; i < perm_data.size(); i++) { - if (perm_data[i] < 0) { - perm_data[i] += input_tensor.dim(); + std::vector permData{dims.data, dims.data + dims.len}; + for (int & i : permData) { + if (i < 0) { + i += inputTensor.dim(); } } - CnnlResourceGuard trans_desc; + CnnlResourceGuard transDesc; - const std::vector src_input_shape = input_tensor.shape(); - int dim_num = src_input_shape.size(); - DIOPI_CALLCNNL(cnnlSetTransposeDescriptor(trans_desc.get(), dim_num, perm_data.data())); - size_t workspace_size; - DIOPI_CALLCNNL(cnnlGetTransposeWorkspaceSize(handle, input_desc.get(), trans_desc.get(), &workspace_size)); + const std::vector srcInputShape = inputTensor.shape(); + int dimNum = srcInputShape.size(); + DIOPI_CALLCNNL(cnnlSetTransposeDescriptor(transDesc.get(), dimNum, permData.data())); + size_t workspaceSize; + DIOPI_CALLCNNL(cnnlGetTransposeWorkspaceSize(handle, inputDesc.get(), transDesc.get(), &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } - if (input_tensor.dtype() == output_tensor.dtype()) { + if (inputTensor.dtype() == outputTensor.dtype()) { DIOPI_CALLCNNL(cnnlTranspose_v2( - handle, trans_desc.get(), input_desc.get(), input_tensor.data(), output_desc.get(), output_tensor.data(), workspace, workspace_size)); + handle, transDesc.get(), inputDesc.get(), inputTensor.data(), outputDesc.get(), outputTensor.data(), workspace, workspaceSize)); } else { - DiopiTensor out_temp = requiresTensor(ctx, output_tensor.shape(), input_tensor.dtype()); - CnnlTensorDesc out_temp_desc(out_temp, CNNL_LAYOUT_ARRAY); + DiopiTensor outTemp = requiresTensor(ctx, outputTensor.shape(), inputTensor.dtype()); + CnnlTensorDesc outTempDesc(outTemp, CNNL_LAYOUT_ARRAY); DIOPI_CALLCNNL( - cnnlTranspose_v2(handle, trans_desc.get(), input_desc.get(), input_tensor.data(), out_temp_desc.get(), out_temp.data(), workspace, workspace_size)); - DIOPI_CALL(dataTypeCast(ctx, output_tensor, out_temp)); + cnnlTranspose_v2(handle, transDesc.get(), inputDesc.get(), inputTensor.data(), outTempDesc.get(), outTemp.data(), workspace, workspaceSize)); + DIOPI_CALL(dataTypeCast(ctx, outputTensor, outTemp)); } return diopiSuccess; diff --git a/DIOPI-IMPL/camb/functions/pow.cpp b/DIOPI-IMPL/camb/functions/pow.cpp index c57c1eb73..29f712df4 100644 --- a/DIOPI-IMPL/camb/functions/pow.cpp +++ b/DIOPI-IMPL/camb/functions/pow.cpp @@ -9,50 +9,50 @@ extern "C" { diopiError_t diopiPowTensor(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t exponent) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor exponent_tensor(exponent); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor exponentTensor(exponent); + DiopiTensor outTensor(out); - std::vector pTensors_in{&input_tensor}; + std::vector pTensorsIn{&inputTensor}; std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; - DIOPI_CALL(autoCastTensorType(ctx, pTensors_in, supportedDtypes)); - DiopiTensor input_tensor_tmp = *pTensors_in[0]; - DiopiTensor out_tensor_tmp = out_tensor; - DIOPI_CALL(dataTypeCast(ctx, out_tensor_tmp, input_tensor_tmp.dtype())); + DIOPI_CALL(autoCastTensorType(ctx, pTensorsIn, supportedDtypes)); + DiopiTensor inputTensorTmp = *pTensorsIn[0]; + DiopiTensor outTensorTmp = outTensor; + DIOPI_CALL(dataTypeCast(ctx, outTensorTmp, inputTensorTmp.dtype())); - CnnlTensorDesc input_desc(input_tensor_tmp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor_tmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensorTmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensorTmp, CNNL_LAYOUT_ARRAY); - std::vector pTensors_exp{&exponent_tensor}; - if (input_tensor.dtype() == diopi_dtype_float16) { - DIOPI_CALL(autoCastTensorType(ctx, pTensors_exp, {diopi_dtype_float16, diopi_dtype_int16})); - } else if (input_tensor.dtype() == diopi_dtype_float32) { - DIOPI_CALL(autoCastTensorType(ctx, pTensors_exp, {diopi_dtype_float32, diopi_dtype_int16})); + std::vector pTensorsExp{&exponentTensor}; + if (inputTensor.dtype() == diopi_dtype_float16) { + DIOPI_CALL(autoCastTensorType(ctx, pTensorsExp, {diopi_dtype_float16, diopi_dtype_int16})); + } else if (inputTensor.dtype() == diopi_dtype_float32) { + DIOPI_CALL(autoCastTensorType(ctx, pTensorsExp, {diopi_dtype_float32, diopi_dtype_int16})); } else { DIOPI_CHECK(false, "input datatype not supported, only float16, float32 supported"); } - DiopiTensor exponent_tensor_tmp = *pTensors_exp[0]; - CnnlTensorDesc exponent_desc(exponent_tensor_tmp, CNNL_LAYOUT_ARRAY); + DiopiTensor exponentTensorTmp = *pTensorsExp[0]; + CnnlTensorDesc exponentDesc(exponentTensorTmp, CNNL_LAYOUT_ARRAY); - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetPowWorkspaceSize(handle, input_desc.get(), exponent_desc.get(), out_desc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetPowWorkspaceSize(handle, inputDesc.get(), exponentDesc.get(), outDesc.get(), &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } DIOPI_CALLCNNL(cnnlPow(handle, CNNL_COMPUTATION_HIGH_PRECISION, - input_desc.get(), - input_tensor_tmp.data(), - exponent_desc.get(), - exponent_tensor_tmp.data(), + inputDesc.get(), + inputTensorTmp.data(), + exponentDesc.get(), + exponentTensorTmp.data(), workspace, - workspace_size, - out_desc.get(), - out_tensor_tmp.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_tmp)); + workspaceSize, + outDesc.get(), + outTensorTmp.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTmp)); return diopiSuccess; } @@ -62,9 +62,9 @@ diopiError_t diopiPowInpTensor(diopiContextHandle_t ctx, diopiTensorHandle_t inp } diopiError_t diopiPow(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* exponent) { - DiopiTensor exponent_tensor; - makeTensorFromScalar(ctx, exponent, exponent_tensor); - DIOPI_CALL(diopiPowTensor(ctx, out, input, static_cast(exponent_tensor))); + DiopiTensor exponentTensor; + makeTensorFromScalar(ctx, exponent, exponentTensor); + DIOPI_CALL(diopiPowTensor(ctx, out, input, static_cast(exponentTensor))); return diopiSuccess; } @@ -74,9 +74,9 @@ diopiError_t diopiPowInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, co } diopiError_t diopiPowScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, const diopiScalar_t* input, diopiConstTensorHandle_t exponent) { - DiopiTensor input_tensor; - makeTensorFromScalar(ctx, input, input_tensor); - DIOPI_CALL(diopiPowTensor(ctx, out, static_cast(input_tensor), exponent)); + DiopiTensor inputTensor; + makeTensorFromScalar(ctx, input, inputTensor); + DIOPI_CALL(diopiPowTensor(ctx, out, static_cast(inputTensor), exponent)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/random.cpp b/DIOPI-IMPL/camb/functions/random.cpp index 52ea2dada..1447ae658 100644 --- a/DIOPI-IMPL/camb/functions/random.cpp +++ b/DIOPI-IMPL/camb/functions/random.cpp @@ -5,8 +5,8 @@ */ #include -#include +#include #include #include "../cnnl_helper.hpp" @@ -33,7 +33,7 @@ extern "C" diopiError_t diopiRandomInp(diopiContextHandle_t ctx, diopiTensorHand } DIOPI_CALLCNNL(cnnlRandGenerateUniform(handle, generator, dtype, nullptr, tensor.numel(), min, max, tensor.data())); } else { - set_last_error_string("%s%d", "cnnl random not support datatype: ", dtype); + setLastErrorString("%s%d", "cnnl random not support datatype: ", dtype); return diopiDtypeNotSupported; } DIOPI_CALLCNNL(cnnlRandDestroyGenerator(generator)); diff --git a/DIOPI-IMPL/camb/functions/randperm.cpp b/DIOPI-IMPL/camb/functions/randperm.cpp index 160f48503..4d097d885 100644 --- a/DIOPI-IMPL/camb/functions/randperm.cpp +++ b/DIOPI-IMPL/camb/functions/randperm.cpp @@ -8,6 +8,7 @@ #include #include +#include #include "../cnnl_helper.hpp" @@ -16,13 +17,13 @@ namespace camb { namespace { template -diopiError_t randperm_func(DiopiTensor tensor, int64_t n, int64_t idx) { +diopiError_t randpermFunc(DiopiTensor tensor, int64_t n, int64_t idx) { std::vector vec(n); std::iota(vec.begin(), vec.end(), 0); - std::random_shuffle(vec.begin(), vec.end()); + std::shuffle(vec.begin(), vec.end(), std::mt19937(std::random_device()())); auto ret = cnrtMemcpy(tensor.data(), vec.data(), sizeof(T) * n, cnrtMemcpyHostToDev); if (ret != cnrtSuccess) { - set_last_error_string("%s%d", "cnrt memcpy error, ret = ", ret); + setLastErrorString("%s%d", "cnrt memcpy error, ret = ", ret); return diopiErrorOccurred; } return diopiSuccess; @@ -30,13 +31,13 @@ diopiError_t randperm_func(DiopiTensor tensor, int64_t n, int64_t idx) { } // namespace extern "C" diopiError_t diopiRandperm(diopiContextHandle_t ctx, diopiTensorHandle_t out, int64_t n, int64_t idx) { - DiopiTensor out_tensor(out); - if (out_tensor.dtype() == diopi_dtype_int32) { - DIOPI_CALL(randperm_func(out_tensor, n, idx)); - } else if (out_tensor.dtype() == diopi_dtype_int64) { - DIOPI_CALL(randperm_func(out_tensor, n, idx)); + DiopiTensor outTensor(out); + if (outTensor.dtype() == diopi_dtype_int32) { + DIOPI_CALL(randpermFunc(outTensor, n, idx)); + } else if (outTensor.dtype() == diopi_dtype_int64) { + DIOPI_CALL(randpermFunc(outTensor, n, idx)); } else { - set_last_error_string("randperm not support datatype %d.\n", out_tensor.dtype()); + setLastErrorString("randperm not support datatype %d.\n", outTensor.dtype()); return diopi5DNotSupported; } return diopiSuccess; diff --git a/DIOPI-IMPL/camb/functions/reciprocal.cpp b/DIOPI-IMPL/camb/functions/reciprocal.cpp index 2758d3f5e..0aff30290 100644 --- a/DIOPI-IMPL/camb/functions/reciprocal.cpp +++ b/DIOPI-IMPL/camb/functions/reciprocal.cpp @@ -15,20 +15,20 @@ extern "C" { diopiError_t diopiReciprocal(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); - diopiDtype_t origin_dtype = input_tensor.dtype(); - std::vector pTensors{&input_tensor, &out_tensor}; + diopiDtype_t originDtype = inputTensor.dtype(); + std::vector pTensors{&inputTensor, &outTensor}; std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - CnnlTensorDesc input_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlReciprocal(handle, input_desc.get(), input_tensor.data(), out_desc.get(), out_tensor.data())); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); + DIOPI_CALLCNNL(cnnlReciprocal(handle, inputDesc.get(), inputTensor.data(), outDesc.get(), outTensor.data())); - if (origin_dtype == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor, origin_dtype)); + if (originDtype == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, outTensor, originDtype)); } return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/reduce.cpp b/DIOPI-IMPL/camb/functions/reduce.cpp index ec3dc706e..55b36b483 100644 --- a/DIOPI-IMPL/camb/functions/reduce.cpp +++ b/DIOPI-IMPL/camb/functions/reduce.cpp @@ -18,41 +18,41 @@ namespace camb { Get real sorted dim eg: getRealDims({-1,1,2,3,-2}, 5) -> {1,2,3,4} */ -std::vector getRealDims(std::vector input_dim, int64_t t_dim) { +std::vector getRealDims(std::vector inputDim, int64_t tDim) { // handle negative dims - for (int64_t i = 0; i < input_dim.size(); ++i) { - if (input_dim[i] < 0) { - input_dim[i] = input_dim[i] + t_dim; + for (int64_t & i : inputDim) { + if (i < 0) { + i = i + tDim; } } // remove duplicate dims and sort them - std::vector dim_vec(input_dim); - std::set s(dim_vec.begin(), dim_vec.end()); - dim_vec.assign(s.begin(), s.end()); - return dim_vec; + std::vector dimVec(inputDim); + std::set s(dimVec.begin(), dimVec.end()); + dimVec.assign(s.begin(), s.end()); + return dimVec; } -std::vector infer_desc_shape(std::vector input_dim, std::vector reduce_dim, bool keepdim) { - std::vector output_dim(input_dim.begin(), input_dim.end()); - if (input_dim.size() == 0) { - return output_dim; +std::vector inferDescShape(std::vector inputDim, std::vector reduceDim, bool keepdim) { + std::vector outputDim(inputDim.begin(), inputDim.end()); + if (inputDim.empty()) { + return outputDim; } int num = 0; - for (auto i : reduce_dim) { + for (auto i : reduceDim) { if (keepdim) { - output_dim[i] = 1; + outputDim[i] = 1; } else { - auto it = output_dim.begin() + i - num; - output_dim.erase(it); + auto it = outputDim.begin() + i - num; + outputDim.erase(it); num++; } } - return output_dim; + return outputDim; } struct HashCnnlReduceOp { int64_t operator()(const cnnlReduceOp_t& reduceOp) const { return static_cast(reduceOp); } }; -static std::unordered_map, HashCnnlReduceOp> supported_type_table = { +static std::unordered_map, HashCnnlReduceOp> supportedTypeTable = { {CNNL_REDUCE_ADD, {diopi_dtype_float16, diopi_dtype_float32}}, {CNNL_REDUCE_AVG, {diopi_dtype_float16, diopi_dtype_float32}}, {CNNL_REDUCE_MUL, {diopi_dtype_int32, diopi_dtype_float16, diopi_dtype_float32}}, @@ -63,98 +63,98 @@ static std::unordered_map, HashCnnlReduce {CNNL_REDUCE_NORM1, {diopi_dtype_float16, diopi_dtype_float32}}, {CNNL_REDUCE_NORM2, {diopi_dtype_float16, diopi_dtype_float32}}}; -diopiError_t reduce_internal(diopiContextHandle_t ctx, DiopiTensor& input_tr, DiopiTensor& output_tr, DiopiTensor& index_tr, - const std::vector reduce_dim, cnnlReduceOp_t reduce_op) { +diopiError_t reduceInternal(diopiContextHandle_t ctx, DiopiTensor& inputTr, DiopiTensor& outputTr, DiopiTensor& indexTr, + const std::vector reduceDim, cnnlReduceOp_t reduceOp) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DIOPI_CHECK(input_tr.is_contiguous(), "input tensor should be contiguous"); + DIOPI_CHECK(inputTr.isContiguous(), "input tensor should be contiguous"); - CnnlReduceDescriptor reduce_desc; - CnnlTensorDesc input_desc; - CnnlTensorDesc output_desc; - CnnlTensorDesc index_desc; + CnnlReduceDescriptor reduceDesc; + CnnlTensorDesc inputDesc; + CnnlTensorDesc outputDesc; + CnnlTensorDesc indexDesc; - cnnlDataType_t cnnl_dtype; - DIOPI_CALL(CnnlDataType::convertToCnnlType(&cnnl_dtype, input_tr.dtype())); - if (reduce_op == CNNL_REDUCE_AVG && input_tr.dtype() == diopi_dtype_float16) { - cnnl_dtype = CNNL_DTYPE_FLOAT; + cnnlDataType_t cnnlDtype; + DIOPI_CALL(CnnlDataType::convertToCnnlType(&cnnlDtype, inputTr.dtype())); + if (reduceOp == CNNL_REDUCE_AVG && inputTr.dtype() == diopi_dtype_float16) { + cnnlDtype = CNNL_DTYPE_FLOAT; } // Only Min and Max Ops have indices as result.when reduce_dim > 1, - auto reduce_indices = - ((reduce_op == CNNL_REDUCE_MAX || reduce_op == CNNL_REDUCE_MIN) && reduce_dim.size() >= 1) ? CNNL_REDUCE_FLATTENED_INDICES : CNNL_REDUCE_NO_INDICES; + auto reduceIndices = + ((reduceOp == CNNL_REDUCE_MAX || reduceOp == CNNL_REDUCE_MIN) && !reduceDim.empty()) ? CNNL_REDUCE_FLATTENED_INDICES : CNNL_REDUCE_NO_INDICES; - if (reduce_dim.size() == 0 || reduce_dim.size() == input_tr.dim() + 1) { + if (reduceDim.empty() || reduceDim.size() == inputTr.dim() + 1) { /* FULL-REDUCE: axis = [-1] instead of [0, 1, 2, ..., n] */ - std::vector full_reduce(1, -1); - std::vector fake_size(input_tr.dim(), 1); - reduce_desc.set(input_tr, full_reduce, reduce_op, reduce_indices, CNNL_32BIT_INDICES, cnnl_dtype); - input_desc.set(input_tr, CNNL_LAYOUT_ARRAY); - DiopiTensor fake_tensor = requiresTensor(ctx, fake_size, output_tr.dtype()); - output_desc.set(fake_tensor, CNNL_LAYOUT_ARRAY); - DiopiTensor fake_tensor2 = requiresTensor(ctx, fake_size, index_tr.dtype()); + std::vector fullReduce(1, -1); + std::vector fakeSize(inputTr.dim(), 1); + reduceDesc.set(inputTr, fullReduce, reduceOp, reduceIndices, CNNL_32BIT_INDICES, cnnlDtype); + inputDesc.set(inputTr, CNNL_LAYOUT_ARRAY); + DiopiTensor fakeTensor = requiresTensor(ctx, fakeSize, outputTr.dtype()); + outputDesc.set(fakeTensor, CNNL_LAYOUT_ARRAY); + DiopiTensor fakeTensor2 = requiresTensor(ctx, fakeSize, indexTr.dtype()); // index_desc.set_reduce(fake_tensor2); - index_desc.set(fake_tensor2, CNNL_LAYOUT_ARRAY); + indexDesc.set(fakeTensor2, CNNL_LAYOUT_ARRAY); } else { - reduce_desc.set(input_tr, reduce_dim, reduce_op, reduce_indices, CNNL_32BIT_INDICES, cnnl_dtype); - input_desc.set(input_tr, CNNL_LAYOUT_ARRAY); - auto desc_shape = infer_desc_shape(input_tr.shape(), reduce_dim, true); - output_desc.set(output_tr, CNNL_LAYOUT_ARRAY, desc_shape); - index_desc.set(index_tr, CNNL_LAYOUT_ARRAY, desc_shape); + reduceDesc.set(inputTr, reduceDim, reduceOp, reduceIndices, CNNL_32BIT_INDICES, cnnlDtype); + inputDesc.set(inputTr, CNNL_LAYOUT_ARRAY); + auto descShape = inferDescShape(inputTr.shape(), reduceDim, true); + outputDesc.set(outputTr, CNNL_LAYOUT_ARRAY, descShape); + indexDesc.set(indexTr, CNNL_LAYOUT_ARRAY, descShape); } - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetReduceOpWorkspaceSize(handle, input_desc.get(), output_desc.get(), reduce_desc.get(), &workspace_size)); - void* workspace_ptr = workspace_size == 0 ? nullptr : requiresBuffer(ctx, workspace_size).data(); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetReduceOpWorkspaceSize(handle, inputDesc.get(), outputDesc.get(), reduceDesc.get(), &workspaceSize)); + void* workspacePtr = workspaceSize == 0 ? nullptr : requiresBuffer(ctx, workspaceSize).data(); DIOPI_CALLCNNL(cnnlReduce(handle, - reduce_desc.get(), - workspace_ptr, - workspace_size, + reduceDesc.get(), + workspacePtr, + workspaceSize, nullptr, - input_desc.get(), - input_tr.data(), - sizeof(int) * index_tr.numel(), - reduce_indices != CNNL_REDUCE_NO_INDICES ? index_tr.data() : nullptr, + inputDesc.get(), + inputTr.data(), + sizeof(int) * indexTr.numel(), + reduceIndices != CNNL_REDUCE_NO_INDICES ? indexTr.data() : nullptr, nullptr, - output_desc.get(), - output_tr.data())); + outputDesc.get(), + outputTr.data())); return diopiSuccess; } -diopiError_t reduce_impl(diopiContextHandle_t ctx, DiopiTensor& output_tr, DiopiTensor& index_tr, DiopiTensor& input_tr, cnnlReduceOp_t reduce_op) { - std::vector reduce_dim; - for (int64_t i = 0; i <= input_tr.dim(); i++) { - reduce_dim.push_back(i); +diopiError_t reduceImpl(diopiContextHandle_t ctx, DiopiTensor& outputTr, DiopiTensor& indexTr, DiopiTensor& inputTr, cnnlReduceOp_t reduceOp) { + std::vector reduceDim; + for (int64_t i = 0; i <= inputTr.dim(); i++) { + reduceDim.push_back(i); } - auto supported_dtypes = supported_type_table.find(reduce_op); - std::vector p_tensors{&input_tr}; - DIOPI_CALL(autoCastTensorType(ctx, p_tensors, supported_dtypes->second)); - - if (output_tr.dtype() != input_tr.dtype()) { - auto output_tmp_tr = requiresTensor(ctx, output_tr.shape(), input_tr.dtype()); - DIOPI_CALL(reduce_internal(ctx, input_tr, output_tmp_tr, index_tr, reduce_dim, reduce_op)); - DIOPI_CALL(dataTypeCast(ctx, output_tr, output_tmp_tr)); + auto supportedDtypes = supportedTypeTable.find(reduceOp); + std::vector pTensors{&inputTr}; + DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes->second)); + + if (outputTr.dtype() != inputTr.dtype()) { + auto outputTmpTr = requiresTensor(ctx, outputTr.shape(), inputTr.dtype()); + DIOPI_CALL(reduceInternal(ctx, inputTr, outputTmpTr, indexTr, reduceDim, reduceOp)); + DIOPI_CALL(dataTypeCast(ctx, outputTr, outputTmpTr)); } else { - DIOPI_CALL(reduce_internal(ctx, input_tr, output_tr, index_tr, reduce_dim, reduce_op)); + DIOPI_CALL(reduceInternal(ctx, inputTr, outputTr, indexTr, reduceDim, reduceOp)); } return diopiSuccess; } -diopiError_t reduce_dim_impl(diopiContextHandle_t ctx, DiopiTensor& output_tr, DiopiTensor& index_tr, DiopiTensor& input_tr, const std::vector dim_vec, - const bool keepdim, cnnlReduceOp_t reduce_op) { - std::vector reduce_dim = getRealDims(dim_vec, input_tr.dim()); - auto supported_dtypes = supported_type_table.find(reduce_op); - std::vector p_tensors{&input_tr}; - DIOPI_CALL(autoCastTensorType(ctx, p_tensors, supported_dtypes->second)); - - if (output_tr.dtype() != input_tr.dtype()) { - auto output_tmp_tr = requiresTensor(ctx, output_tr.shape(), input_tr.dtype()); - DIOPI_CALL(reduce_internal(ctx, input_tr, output_tmp_tr, index_tr, reduce_dim, reduce_op)); - DIOPI_CALL(dataTypeCast(ctx, output_tr, output_tmp_tr)); +diopiError_t reduceDimImpl(diopiContextHandle_t ctx, DiopiTensor& outputTr, DiopiTensor& indexTr, DiopiTensor& inputTr, const std::vector dimVec, + const bool keepdim, cnnlReduceOp_t reduceOp) { + std::vector reduceDim = getRealDims(dimVec, inputTr.dim()); + auto supportedDtypes = supportedTypeTable.find(reduceOp); + std::vector pTensors{&inputTr}; + DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes->second)); + + if (outputTr.dtype() != inputTr.dtype()) { + auto outputTmpTr = requiresTensor(ctx, outputTr.shape(), inputTr.dtype()); + DIOPI_CALL(reduceInternal(ctx, inputTr, outputTmpTr, indexTr, reduceDim, reduceOp)); + DIOPI_CALL(dataTypeCast(ctx, outputTr, outputTmpTr)); } else { - DIOPI_CALL(reduce_internal(ctx, input_tr, output_tr, index_tr, reduce_dim, reduce_op)); + DIOPI_CALL(reduceInternal(ctx, inputTr, outputTr, indexTr, reduceDim, reduceOp)); } return diopiSuccess; } @@ -162,78 +162,78 @@ diopiError_t reduce_dim_impl(diopiContextHandle_t ctx, DiopiTensor& output_tr, D extern "C" { diopiError_t diopiSum(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t dim) { - DiopiTensor input_tr(input); - DiopiTensor output_tr(out); - auto index_tr = requiresTensor(ctx, {1}, diopi_dtype_int32); + DiopiTensor inputTr(input); + DiopiTensor outputTr(out); + auto indexTr = requiresTensor(ctx, {1}, diopi_dtype_int32); - std::vector dim_vec(dim.data, dim.data + dim.len); + std::vector dimVec(dim.data, dim.data + dim.len); - DIOPI_CALL(reduce_dim_impl(ctx, output_tr, index_tr, input_tr, dim_vec, false, CNNL_REDUCE_ADD)); + DIOPI_CALL(reduceDimImpl(ctx, outputTr, indexTr, inputTr, dimVec, false, CNNL_REDUCE_ADD)); return diopiSuccess; } diopiError_t diopiMean(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t dim) { - DiopiTensor input_tr(input); - DiopiTensor output_tr(out); - auto index_tr = requiresTensor(ctx, {1}, diopi_dtype_int32); + DiopiTensor inputTr(input); + DiopiTensor outputTr(out); + auto indexTr = requiresTensor(ctx, {1}, diopi_dtype_int32); - std::vector dim_vec(dim.data, dim.data + dim.len); + std::vector dimVec(dim.data, dim.data + dim.len); - DIOPI_CALL(reduce_dim_impl(ctx, output_tr, index_tr, input_tr, dim_vec, false, CNNL_REDUCE_AVG)); + DIOPI_CALL(reduceDimImpl(ctx, outputTr, indexTr, inputTr, dimVec, false, CNNL_REDUCE_AVG)); return diopiSuccess; } diopiError_t diopiProd(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const int64_t* dim) { - DiopiTensor input_tr(input); - DiopiTensor output_tr(out); - auto index_tr = requiresTensor(ctx, {1}, diopi_dtype_int32); + DiopiTensor inputTr(input); + DiopiTensor outputTr(out); + auto indexTr = requiresTensor(ctx, {1}, diopi_dtype_int32); - DIOPI_CALL(reduce_dim_impl(ctx, output_tr, index_tr, input_tr, {*dim}, false, CNNL_REDUCE_MUL)); + DIOPI_CALL(reduceDimImpl(ctx, outputTr, indexTr, inputTr, {*dim}, false, CNNL_REDUCE_MUL)); return diopiSuccess; } -diopiError_t diopiMin(diopiContextHandle_t ctx, diopiTensorHandle_t min, diopiTensorHandle_t min_indices, diopiConstTensorHandle_t input, int64_t dim) { - DiopiTensor input_tr(input); - DiopiTensor output_tr(min); - DiopiTensor index_tr(min_indices); +diopiError_t diopiMin(diopiContextHandle_t ctx, diopiTensorHandle_t min, diopiTensorHandle_t minIndices, diopiConstTensorHandle_t input, int64_t dim) { + DiopiTensor inputTr(input); + DiopiTensor outputTr(min); + DiopiTensor indexTr(minIndices); // Note: camb index out is int32 dtype - auto index_tmp_tr = requiresTensor(ctx, index_tr.shape(), diopi_dtype_int32); + auto indexTmpTr = requiresTensor(ctx, indexTr.shape(), diopi_dtype_int32); - DIOPI_CALL(reduce_dim_impl(ctx, output_tr, index_tmp_tr, input_tr, {dim}, false, CNNL_REDUCE_MIN)); + DIOPI_CALL(reduceDimImpl(ctx, outputTr, indexTmpTr, inputTr, {dim}, false, CNNL_REDUCE_MIN)); - DIOPI_CALL(dataTypeCast(ctx, index_tr, index_tmp_tr)); + DIOPI_CALL(dataTypeCast(ctx, indexTr, indexTmpTr)); return diopiSuccess; } diopiError_t diopiMinAll(diopiContextHandle_t ctx, diopiTensorHandle_t min, diopiConstTensorHandle_t input) { - DiopiTensor input_tr(input); - DiopiTensor output_tr(min); - auto index_tr = requiresTensor(ctx, {1}, diopi_dtype_int32); + DiopiTensor inputTr(input); + DiopiTensor outputTr(min); + auto indexTr = requiresTensor(ctx, {1}, diopi_dtype_int32); - DIOPI_CALL(reduce_impl(ctx, output_tr, index_tr, input_tr, CNNL_REDUCE_MIN)); + DIOPI_CALL(reduceImpl(ctx, outputTr, indexTr, inputTr, CNNL_REDUCE_MIN)); return diopiSuccess; } -diopiError_t diopiMax(diopiContextHandle_t ctx, diopiTensorHandle_t max, diopiTensorHandle_t max_indices, diopiConstTensorHandle_t input, int64_t dim) { - DiopiTensor input_tr(input); - DiopiTensor output_tr(max); - DiopiTensor index_tr(max_indices); - auto index_tmp_tr = requiresTensor(ctx, index_tr.shape(), diopi_dtype_int32); +diopiError_t diopiMax(diopiContextHandle_t ctx, diopiTensorHandle_t max, diopiTensorHandle_t maxIndices, diopiConstTensorHandle_t input, int64_t dim) { + DiopiTensor inputTr(input); + DiopiTensor outputTr(max); + DiopiTensor indexTr(maxIndices); + auto indexTmpTr = requiresTensor(ctx, indexTr.shape(), diopi_dtype_int32); - DIOPI_CALL(reduce_dim_impl(ctx, output_tr, index_tmp_tr, input_tr, {dim}, false, CNNL_REDUCE_MAX)); + DIOPI_CALL(reduceDimImpl(ctx, outputTr, indexTmpTr, inputTr, {dim}, false, CNNL_REDUCE_MAX)); - DIOPI_CALL(dataTypeCast(ctx, index_tr, index_tmp_tr)); + DIOPI_CALL(dataTypeCast(ctx, indexTr, indexTmpTr)); return diopiSuccess; } diopiError_t diopiMaxAll(diopiContextHandle_t ctx, diopiTensorHandle_t max, diopiConstTensorHandle_t input) { - DiopiTensor input_tr(input); - DiopiTensor output_tr(max); - auto index_tr = requiresTensor(ctx, {1}, diopi_dtype_int32); + DiopiTensor inputTr(input); + DiopiTensor outputTr(max); + auto indexTr = requiresTensor(ctx, {1}, diopi_dtype_int32); - DIOPI_CALL(reduce_impl(ctx, output_tr, index_tr, input_tr, CNNL_REDUCE_MAX)); + DIOPI_CALL(reduceImpl(ctx, outputTr, indexTr, inputTr, CNNL_REDUCE_MAX)); return diopiSuccess; } @@ -243,12 +243,12 @@ diopiError_t diopiNorm(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiC if (DiopiDataType().isInteger(p->stype)) norm = p->ival; DIOPI_CHECK(norm == 1.0 || norm == 2.0, "camb only support L1-Norm as p=1.0 and L2-Norm as p=2.0"); - DiopiTensor input_tr(input); - DiopiTensor output_tr(out); - auto index_tr = requiresTensor(ctx, {1}, diopi_dtype_int32); + DiopiTensor inputTr(input); + DiopiTensor outputTr(out); + auto indexTr = requiresTensor(ctx, {1}, diopi_dtype_int32); - std::vector dim_vec(dim.data, dim.data + dim.len); - DIOPI_CALL(reduce_dim_impl(ctx, output_tr, index_tr, input_tr, dim_vec, false, norm == 1.0 ? CNNL_REDUCE_NORM1 : CNNL_REDUCE_NORM2)); + std::vector dimVec(dim.data, dim.data + dim.len); + DIOPI_CALL(reduceDimImpl(ctx, outputTr, indexTr, inputTr, dimVec, false, norm == 1.0 ? CNNL_REDUCE_NORM1 : CNNL_REDUCE_NORM2)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/repeat.cpp b/DIOPI-IMPL/camb/functions/repeat.cpp index 94077718c..fa5dab934 100644 --- a/DIOPI-IMPL/camb/functions/repeat.cpp +++ b/DIOPI-IMPL/camb/functions/repeat.cpp @@ -14,15 +14,15 @@ namespace camb { extern "C" { -diopiError_t diopiRepeat(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t repeats_size) { +diopiError_t diopiRepeat(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t repeatsSize) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc outDesc(out_tensor, CNNL_LAYOUT_ARRAY); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlTile(handle, inputDesc.get(), input_tensor.data(), outDesc.get(), out_tensor.data())); + DIOPI_CALLCNNL(cnnlTile(handle, inputDesc.get(), inputTensor.data(), outDesc.get(), outTensor.data())); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/roll.cpp b/DIOPI-IMPL/camb/functions/roll.cpp index 5e09515ac..f0b6216d2 100644 --- a/DIOPI-IMPL/camb/functions/roll.cpp +++ b/DIOPI-IMPL/camb/functions/roll.cpp @@ -10,32 +10,32 @@ extern "C" { diopiError_t diopiRoll(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiSize_t shifts, diopiSize_t dims) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); - CnnlTensorDesc input_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); - std::vector shifts_{shifts.data, shifts.data + shifts.len}; - std::vector dims_{dims.data, dims.data + dims.len}; - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetRollWorkspaceSize(handle, input_desc.get(), &workspace_size)); + std::vector shiftsTmp{shifts.data, shifts.data + shifts.len}; + std::vector dimsTmp{dims.data, dims.data + dims.len}; + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetRollWorkspaceSize(handle, inputDesc.get(), &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } DIOPI_CALLCNNL(cnnlRoll(handle, - input_desc.get(), - input_tensor.data(), - shifts_.data(), - shifts_.size(), - dims_.size() > 0 ? dims_.data() : nullptr, - dims_.size(), + inputDesc.get(), + inputTensor.data(), + shiftsTmp.data(), + shiftsTmp.size(), + !dimsTmp.empty() ? dimsTmp.data() : nullptr, + dimsTmp.size(), workspace, - workspace_size, - out_desc.get(), - out_tensor.data())); + workspaceSize, + outDesc.get(), + outTensor.data())); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/rsqrt.cpp b/DIOPI-IMPL/camb/functions/rsqrt.cpp index b9333b4fd..529e9911b 100644 --- a/DIOPI-IMPL/camb/functions/rsqrt.cpp +++ b/DIOPI-IMPL/camb/functions/rsqrt.cpp @@ -22,29 +22,29 @@ static diopiError_t rsqrt(diopiContextHandle_t ctx, DiopiTensor& output, DiopiTe DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor output_tmp = output; + DiopiTensor outputTmp = output; if (input.dtype() != output.dtype()) { - output_tmp = requiresTensor(ctx, output.shape(), input.dtype()); + outputTmp = requiresTensor(ctx, output.shape(), input.dtype()); } CnnlTensorDesc desc(input, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlRsqrt_v2(handle, CNNL_COMPUTATION_HIGH_PRECISION, desc.get(), input.data(), desc.get(), output_tmp.data())); - if (output_tmp.dtype() != output.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, output, output_tmp)); + DIOPI_CALLCNNL(cnnlRsqrt_v2(handle, CNNL_COMPUTATION_HIGH_PRECISION, desc.get(), input.data(), desc.get(), outputTmp.data())); + if (outputTmp.dtype() != output.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, output, outputTmp)); } return diopiSuccess; } extern "C" diopiError_t diopiRsqrtInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { - DiopiTensor input_tensor(input); - DIOPI_CALL(rsqrt(ctx, input_tensor, input_tensor)); + DiopiTensor inputTensor(input); + DIOPI_CALL(rsqrt(ctx, inputTensor, inputTensor)); return diopiSuccess; } extern "C" diopiError_t diopiRsqrt(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); - DIOPI_CALL(rsqrt(ctx, output_tensor, input_tensor)); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); + DIOPI_CALL(rsqrt(ctx, outputTensor, inputTensor)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/sgd.cpp b/DIOPI-IMPL/camb/functions/sgd.cpp index 97d4734a7..e9c26718a 100644 --- a/DIOPI-IMPL/camb/functions/sgd.cpp +++ b/DIOPI-IMPL/camb/functions/sgd.cpp @@ -16,79 +16,79 @@ namespace impl { namespace camb { extern "C" diopiError_t diopiSgd(diopiContextHandle_t ctx, diopiTensorHandle_t w, diopiTensorHandle_t dw, diopiTensorHandle_t buf, double lr, - double momentum, double dampening, double weight_decay, bool nesterov) { + double momentum, double dampening, double weightDecay, bool nesterov) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor w_tensor(w); - DiopiTensor dw_tensor(dw); - DiopiTensor buf_tensor; - DiopiTensor buf_tensor_tmp; + DiopiTensor wTensor(w); + DiopiTensor dwTensor(dw); + DiopiTensor bufTensor; + DiopiTensor bufTensorTmp; std::vector pTensors; if (buf != nullptr) { - buf_tensor = DiopiTensor(buf); - buf_tensor_tmp = buf_tensor; - pTensors = std::vector{&dw_tensor, &buf_tensor_tmp}; + bufTensor = DiopiTensor(buf); + bufTensorTmp = bufTensor; + pTensors = std::vector{&dwTensor, &bufTensorTmp}; } else { - pTensors = std::vector{&dw_tensor}; + pTensors = std::vector{&dwTensor}; } std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor w_tensor_tmp = w_tensor; - if (dw_tensor.dtype() != w_tensor_tmp.dtype()) { - w_tensor_tmp = requiresTensor(ctx, w_tensor.shape(), dw_tensor.dtype()); - DIOPI_CALL(dataTypeCast(ctx, w_tensor_tmp, w_tensor)); + DiopiTensor wTensorTmp = wTensor; + if (dwTensor.dtype() != wTensorTmp.dtype()) { + wTensorTmp = requiresTensor(ctx, wTensor.shape(), dwTensor.dtype()); + DIOPI_CALL(dataTypeCast(ctx, wTensorTmp, wTensor)); } - CnnlTensorDesc w_desc_tmp(w_tensor_tmp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc dw_desc(dw_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc wDescTmp(wTensorTmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc dwDesc(dwTensor, CNNL_LAYOUT_ARRAY); // a = a * scale_a + b * scale_b; - auto add_mul_func = [&](auto &a, float scale_a, auto b, float scale_b) { - size_t workspace_size; + auto addMulFunc = [&](auto &a, float scaleA, auto b, float scaleB) { + size_t workspaceSize; std::vector shape; shape.push_back(a.numel()); - CnnlTensorDesc a_desc, b_desc; - DIOPI_CALL(a_desc.set(a, CNNL_LAYOUT_ARRAY, shape)); - DIOPI_CALL(b_desc.set(b, CNNL_LAYOUT_ARRAY, shape)); + CnnlTensorDesc aDesc, bDesc; + DIOPI_CALL(aDesc.set(a, CNNL_LAYOUT_ARRAY, shape)); + DIOPI_CALL(bDesc.set(b, CNNL_LAYOUT_ARRAY, shape)); - DIOPI_CALLCNNL(cnnlGetBiasAddWorkspaceSize(handle, b_desc.get(), a_desc.get(), &workspace_size)); + DIOPI_CALLCNNL(cnnlGetBiasAddWorkspaceSize(handle, bDesc.get(), aDesc.get(), &workspaceSize)); void *workspace = nullptr; - if (workspace_size != 0) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (workspaceSize != 0) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } - DIOPI_CALLCNNL(cnnlBiasAdd(handle, &scale_b, b_desc.get(), b.data(), workspace, workspace_size, &scale_a, a_desc.get(), a.data())); + DIOPI_CALLCNNL(cnnlBiasAdd(handle, &scaleB, bDesc.get(), b.data(), workspace, workspaceSize, &scaleA, aDesc.get(), a.data())); return diopiSuccess; }; - if (weight_decay != 0) { - DIOPI_CALL(add_mul_func(dw_tensor, 1.0, w_tensor_tmp, weight_decay)); + if (weightDecay != 0) { + DIOPI_CALL(addMulFunc(dwTensor, 1.0, wTensorTmp, weightDecay)); } if (momentum != 0) { if (buf == nullptr) { - buf_tensor_tmp = dw_tensor; + bufTensorTmp = dwTensor; } else { - DIOPI_CALL(add_mul_func(buf_tensor_tmp, momentum, dw_tensor, (1.0 - dampening))); + DIOPI_CALL(addMulFunc(bufTensorTmp, momentum, dwTensor, (1.0 - dampening))); } if (nesterov) { - DIOPI_CALL(add_mul_func(dw_tensor, 1.0, buf_tensor_tmp, momentum)); + DIOPI_CALL(addMulFunc(dwTensor, 1.0, bufTensorTmp, momentum)); } else { - dw_tensor = buf_tensor_tmp; + dwTensor = bufTensorTmp; } } std::vector shape{1}; diopiSize_t size(shape.data(), shape.size()); - DiopiTensor lr_tensor; - diopiScalar_t lr_scalar{diopi_dtype_float64, {lr}}; - DIOPI_CALL(makeTensorFromScalar(ctx, &lr_scalar, lr_tensor)); - DIOPI_CALL(dataTypeCast(ctx, lr_tensor, dw_tensor.dtype())); - DIOPI_CALLCNNL(cnnlGradientDescent(handle, dw_desc.get(), dw_tensor.data(), lr_tensor.data(), w_desc_tmp.get(), w_tensor_tmp.data())); - DIOPI_CALL(dataTypeCast(ctx, w_tensor, w_tensor_tmp)); + DiopiTensor lrTensor; + diopiScalar_t lrScalar{diopi_dtype_float64, {lr}}; + DIOPI_CALL(makeTensorFromScalar(ctx, &lrScalar, lrTensor)); + DIOPI_CALL(dataTypeCast(ctx, lrTensor, dwTensor.dtype())); + DIOPI_CALLCNNL(cnnlGradientDescent(handle, dwDesc.get(), dwTensor.data(), lrTensor.data(), wDescTmp.get(), wTensorTmp.data())); + DIOPI_CALL(dataTypeCast(ctx, wTensor, wTensorTmp)); if (buf != nullptr) { - DIOPI_CALL(dataTypeCast(ctx, buf_tensor, buf_tensor_tmp)); + DIOPI_CALL(dataTypeCast(ctx, bufTensor, bufTensorTmp)); } return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/sign.cpp b/DIOPI-IMPL/camb/functions/sign.cpp index 244611166..2baf9861e 100644 --- a/DIOPI-IMPL/camb/functions/sign.cpp +++ b/DIOPI-IMPL/camb/functions/sign.cpp @@ -15,24 +15,24 @@ extern "C" { diopiError_t diopiSign(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); - std::vector pTensors{&input_tensor}; + std::vector pTensors{&inputTensor}; std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor out_tensor_temp = out_tensor; - if (input_tensor.dtype() != out_tensor_temp.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor_temp, input_tensor.dtype())); + DiopiTensor outTensorTemp = outTensor; + if (inputTensor.dtype() != outTensorTemp.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensorTemp, inputTensor.dtype())); } - CnnlTensorDesc input_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor_temp, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlSign(handle, input_desc.get(), input_tensor.data(), out_desc.get(), out_tensor_temp.data())); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensorTemp, CNNL_LAYOUT_ARRAY); + DIOPI_CALLCNNL(cnnlSign(handle, inputDesc.get(), inputTensor.data(), outDesc.get(), outTensorTemp.data())); - if (out_tensor_temp.dtype() != out_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_temp)); + if (outTensorTemp.dtype() != outTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTemp)); } return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/sin.cpp b/DIOPI-IMPL/camb/functions/sin.cpp index 91fff01c4..9834c4fbc 100644 --- a/DIOPI-IMPL/camb/functions/sin.cpp +++ b/DIOPI-IMPL/camb/functions/sin.cpp @@ -20,28 +20,28 @@ static diopiError_t sin(diopiContextHandle_t ctx, DiopiTensor& output, DiopiTens std::vector pTensors{&input}; std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor output_tmp = output; + DiopiTensor outputTmp = output; if (input.dtype() != output.dtype()) { - output_tmp = requiresTensor(ctx, output.shape(), input.dtype()); + outputTmp = requiresTensor(ctx, output.shape(), input.dtype()); } CnnlTensorDesc desc(input, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlSin_v2(handle, CNNL_COMPUTATION_HIGH_PRECISION, desc.get(), input.data(), desc.get(), output_tmp.data())); - if (output_tmp.dtype() != output.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, output, output_tmp)); + DIOPI_CALLCNNL(cnnlSin_v2(handle, CNNL_COMPUTATION_HIGH_PRECISION, desc.get(), input.data(), desc.get(), outputTmp.data())); + if (outputTmp.dtype() != output.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, output, outputTmp)); } return diopiSuccess; } extern "C" diopiError_t diopiSinInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { - DiopiTensor input_tensor(input); - DIOPI_CALL(sin(ctx, input_tensor, input_tensor)); + DiopiTensor inputTensor(input); + DIOPI_CALL(sin(ctx, inputTensor, inputTensor)); return diopiSuccess; } extern "C" diopiError_t diopiSin(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); - DIOPI_CALL(sin(ctx, output_tensor, input_tensor)); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); + DIOPI_CALL(sin(ctx, outputTensor, inputTensor)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/slice.cpp b/DIOPI-IMPL/camb/functions/slice.cpp index 0e636f604..22035907c 100644 --- a/DIOPI-IMPL/camb/functions/slice.cpp +++ b/DIOPI-IMPL/camb/functions/slice.cpp @@ -17,88 +17,88 @@ extern "C" { diopiError_t diopiIndexSelect(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, int64_t dim, diopiConstTensorHandle_t index) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); + DiopiTensor inputTensor(input); + if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); } - DiopiTensor index_tensor(index); - DiopiTensor out_tensor(out); - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc indexDesc(index_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc outDesc(out_tensor, CNNL_LAYOUT_ARRAY); + DiopiTensor indexTensor(index); + DiopiTensor outTensor(out); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc indexDesc(indexTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); if (dim < 0) { - dim = dim + input_tensor.dim(); + dim = dim + inputTensor.dim(); } - if (out_tensor.dtype() == input_tensor.dtype()) { + if (outTensor.dtype() == inputTensor.dtype()) { DIOPI_CALLCNNL( - cnnlIndexSelect(handle, dim, inputDesc.get(), input_tensor.data(), indexDesc.get(), index_tensor.data(), outDesc.get(), out_tensor.data())); + cnnlIndexSelect(handle, dim, inputDesc.get(), inputTensor.data(), indexDesc.get(), indexTensor.data(), outDesc.get(), outTensor.data())); } else { - DiopiTensor out_temp_tensor = requiresTensor(ctx, out_tensor.shape(), input_tensor.dtype()); - CnnlTensorDesc out_tempDesc(out_temp_tensor, CNNL_LAYOUT_ARRAY); + DiopiTensor outTempTensor = requiresTensor(ctx, outTensor.shape(), inputTensor.dtype()); + CnnlTensorDesc outTempDesc(outTempTensor, CNNL_LAYOUT_ARRAY); DIOPI_CALLCNNL(cnnlIndexSelect( - handle, dim, inputDesc.get(), input_tensor.data(), indexDesc.get(), index_tensor.data(), out_tempDesc.get(), out_temp_tensor.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_temp_tensor)); + handle, dim, inputDesc.get(), inputTensor.data(), indexDesc.get(), indexTensor.data(), outTempDesc.get(), outTempTensor.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTempTensor)); } return diopiSuccess; } -diopiError_t diopiIndexSelectBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad, diopiSize_t input_sizes, +diopiError_t diopiIndexSelectBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t grad, diopiSize_t inputSizes, int64_t dim, diopiConstTensorHandle_t index) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); diopiScalar_t zero = {diopi_dtype_int64, 0}; - DIOPI_CALL(diopiFill(ctx, grad_input, &zero)); - DiopiTensor grad_input_tensor(grad_input); - DiopiTensor grad_tensor(grad); - DiopiTensor out_tensor(grad_input); - diopiDtype_t out_dtype = grad_input_tensor.dtype(); - if (grad_input_tensor.dtype() == diopi_dtype_int64) { - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor, diopi_dtype_int32)); - } else if (grad_input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor, diopi_dtype_float32)); + DIOPI_CALL(diopiFill(ctx, gradInput, &zero)); + DiopiTensor gradInputTensor(gradInput); + DiopiTensor gradTensor(grad); + DiopiTensor outTensor(gradInput); + diopiDtype_t outDtype = gradInputTensor.dtype(); + if (gradInputTensor.dtype() == diopi_dtype_int64) { + DIOPI_CALL(dataTypeCast(ctx, gradInputTensor, diopi_dtype_int32)); + } else if (gradInputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, gradInputTensor, diopi_dtype_float32)); } - if (grad_tensor.dtype() == diopi_dtype_int64) { - DIOPI_CALL(dataTypeCast(ctx, grad_tensor, diopi_dtype_int32)); - } else if (grad_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, grad_tensor, diopi_dtype_float32)); + if (gradTensor.dtype() == diopi_dtype_int64) { + DIOPI_CALL(dataTypeCast(ctx, gradTensor, diopi_dtype_int32)); + } else if (gradTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, gradTensor, diopi_dtype_float32)); } - CnnlTensorDesc grad_inputDesc(grad_input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc gradDesc(grad_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc gradInputDesc(gradInputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc gradDesc(gradTensor, CNNL_LAYOUT_ARRAY); - DiopiTensor index_tensor(index); - if (index_tensor.dtype() != diopi_dtype_int32) { - DIOPI_CALL(dataTypeCast(ctx, index_tensor, diopi_dtype_int32)); + DiopiTensor indexTensor(index); + if (indexTensor.dtype() != diopi_dtype_int32) { + DIOPI_CALL(dataTypeCast(ctx, indexTensor, diopi_dtype_int32)); } - CnnlTensorDesc indexDesc(index_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc indexDesc(indexTensor, CNNL_LAYOUT_ARRAY); if (dim < 0) { - dim = dim + input_sizes.len; + dim = dim + inputSizes.len; } - if (grad_input_tensor.dtype() == out_dtype) { + if (gradInputTensor.dtype() == outDtype) { DIOPI_CALLCNNL(cnnlIndexAdd(handle, dim, - grad_inputDesc.get(), - grad_input_tensor.data(), + gradInputDesc.get(), + gradInputTensor.data(), indexDesc.get(), - index_tensor.data(), + indexTensor.data(), gradDesc.get(), - grad_tensor.data(), - grad_inputDesc.get(), - grad_input_tensor.data())); + gradTensor.data(), + gradInputDesc.get(), + gradInputTensor.data())); } else { DIOPI_CALLCNNL(cnnlIndexAdd(handle, dim, - grad_inputDesc.get(), - grad_input_tensor.data(), + gradInputDesc.get(), + gradInputTensor.data(), indexDesc.get(), - index_tensor.data(), + indexTensor.data(), gradDesc.get(), - grad_tensor.data(), - grad_inputDesc.get(), - grad_input_tensor.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, grad_input_tensor)); + gradTensor.data(), + gradInputDesc.get(), + gradInputTensor.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, gradInputTensor)); } return diopiSuccess; } @@ -106,157 +106,157 @@ diopiError_t diopiIndexSelectBackward(diopiContextHandle_t ctx, diopiTensorHandl diopiError_t diopiSelect(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, int64_t dim, int64_t index) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); + DiopiTensor inputTensor(input); + if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); } - diopiScalar_t index_scalar; - index_scalar.stype = diopi_dtype_int64; - index_scalar.ival = index; - DiopiTensor index_tensor; - DIOPI_CALL(makeTensorFromScalar(ctx, &index_scalar, index_tensor)); - DiopiTensor out_tensor(out); + diopiScalar_t indexScalar; + indexScalar.stype = diopi_dtype_int64; + indexScalar.ival = index; + DiopiTensor indexTensor; + DIOPI_CALL(makeTensorFromScalar(ctx, &indexScalar, indexTensor)); + DiopiTensor outTensor(out); if (dim < 0) { - dim = dim + input_tensor.dim(); + dim = dim + inputTensor.dim(); } - std::vector shape(out_tensor.shape()); + std::vector shape(outTensor.shape()); shape.insert(shape.begin() + dim, 1); - out_tensor.reshape(shape); + outTensor.reshape(shape); - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc indexDesc(index_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc outDesc(out_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc indexDesc(indexTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); - if (out_tensor.dtype() == input_tensor.dtype()) { + if (outTensor.dtype() == inputTensor.dtype()) { DIOPI_CALLCNNL( - cnnlIndexSelect(handle, dim, inputDesc.get(), input_tensor.data(), indexDesc.get(), index_tensor.data(), outDesc.get(), out_tensor.data())); + cnnlIndexSelect(handle, dim, inputDesc.get(), inputTensor.data(), indexDesc.get(), indexTensor.data(), outDesc.get(), outTensor.data())); } else { - DiopiTensor out_temp_tensor = requiresTensor(ctx, out_tensor.shape(), input_tensor.dtype()); - CnnlTensorDesc out_tempDesc(out_temp_tensor, CNNL_LAYOUT_ARRAY); + DiopiTensor outTempTensor = requiresTensor(ctx, outTensor.shape(), inputTensor.dtype()); + CnnlTensorDesc outTempDesc(outTempTensor, CNNL_LAYOUT_ARRAY); DIOPI_CALLCNNL(cnnlIndexSelect( - handle, dim, inputDesc.get(), input_tensor.data(), indexDesc.get(), index_tensor.data(), out_tempDesc.get(), out_temp_tensor.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_temp_tensor)); + handle, dim, inputDesc.get(), inputTensor.data(), indexDesc.get(), indexTensor.data(), outTempDesc.get(), outTempTensor.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTempTensor)); } return diopiSuccess; } -diopiError_t diopiSelectBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiSize_t input_sizes, +diopiError_t diopiSelectBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, diopiSize_t inputSizes, int64_t dim, int64_t index) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); diopiScalar_t zero = {diopi_dtype_int64, 0}; - DIOPI_CALL(diopiFill(ctx, grad_input, &zero)); - DiopiTensor grad_input_tensor(grad_input); - diopiDtype_t out_dtype = grad_input_tensor.dtype(); + DIOPI_CALL(diopiFill(ctx, gradInput, &zero)); + DiopiTensor gradInputTensor(gradInput); + diopiDtype_t outDtype = gradInputTensor.dtype(); if (dim < 0) { - dim = dim + input_sizes.len; + dim = dim + inputSizes.len; } - DiopiTensor grad_tensor(grad_output); - std::vector shape(grad_tensor.shape()); + DiopiTensor gradTensor(gradOutput); + std::vector shape(gradTensor.shape()); shape.insert(shape.begin() + dim, 1); - grad_tensor.reshape(shape); + gradTensor.reshape(shape); - if (grad_input_tensor.dtype() == diopi_dtype_int64) { - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor, diopi_dtype_int32)); - } else if (grad_input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor, diopi_dtype_float32)); + if (gradInputTensor.dtype() == diopi_dtype_int64) { + DIOPI_CALL(dataTypeCast(ctx, gradInputTensor, diopi_dtype_int32)); + } else if (gradInputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, gradInputTensor, diopi_dtype_float32)); } - if (grad_tensor.dtype() == diopi_dtype_int64) { - DIOPI_CALL(dataTypeCast(ctx, grad_tensor, diopi_dtype_int32)); - } else if (grad_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, grad_tensor, diopi_dtype_float32)); + if (gradTensor.dtype() == diopi_dtype_int64) { + DIOPI_CALL(dataTypeCast(ctx, gradTensor, diopi_dtype_int32)); + } else if (gradTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, gradTensor, diopi_dtype_float32)); } - CnnlTensorDesc grad_inputDesc(grad_input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc gradDesc(grad_tensor, CNNL_LAYOUT_ARRAY); - - diopiScalar_t index_scalar; - index_scalar.stype = diopi_dtype_int64; - index_scalar.ival = index; - DiopiTensor index_tensor; - DIOPI_CALL(makeTensorFromScalar(ctx, &index_scalar, index_tensor)); - if (index_tensor.dtype() == diopi_dtype_int64) { - DIOPI_CALL(dataTypeCast(ctx, index_tensor, diopi_dtype_int32)); + CnnlTensorDesc gradInputDesc(gradInputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc gradDesc(gradTensor, CNNL_LAYOUT_ARRAY); + + diopiScalar_t indexScalar; + indexScalar.stype = diopi_dtype_int64; + indexScalar.ival = index; + DiopiTensor indexTensor; + DIOPI_CALL(makeTensorFromScalar(ctx, &indexScalar, indexTensor)); + if (indexTensor.dtype() == diopi_dtype_int64) { + DIOPI_CALL(dataTypeCast(ctx, indexTensor, diopi_dtype_int32)); } - CnnlTensorDesc indexDesc(index_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc indexDesc(indexTensor, CNNL_LAYOUT_ARRAY); - if (grad_input_tensor.dtype() == out_dtype) { + if (gradInputTensor.dtype() == outDtype) { DIOPI_CALLCNNL(cnnlIndexAdd(handle, dim, - grad_inputDesc.get(), - grad_input_tensor.data(), + gradInputDesc.get(), + gradInputTensor.data(), indexDesc.get(), - index_tensor.data(), + indexTensor.data(), gradDesc.get(), - grad_tensor.data(), - grad_inputDesc.get(), - grad_input_tensor.data())); + gradTensor.data(), + gradInputDesc.get(), + gradInputTensor.data())); } else { DIOPI_CALLCNNL(cnnlIndexAdd(handle, dim, - grad_inputDesc.get(), - grad_input_tensor.data(), + gradInputDesc.get(), + gradInputTensor.data(), indexDesc.get(), - index_tensor.data(), + indexTensor.data(), gradDesc.get(), - grad_tensor.data(), - grad_inputDesc.get(), - grad_input_tensor.data())); - DiopiTensor out_tensor(grad_input); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, grad_input_tensor)); + gradTensor.data(), + gradInputDesc.get(), + gradInputTensor.data())); + DiopiTensor outTensor(gradInput); + DIOPI_CALL(dataTypeCast(ctx, outTensor, gradInputTensor)); } return diopiSuccess; } -diopiError_t diopiSlice(diopiContextHandle_t ctx, diopiTensorHandle_t null_out, diopiConstTensorHandle_t input, int64_t dim, int64_t start, int64_t end, +diopiError_t diopiSlice(diopiContextHandle_t ctx, diopiTensorHandle_t nullOut, diopiConstTensorHandle_t input, int64_t dim, int64_t start, int64_t end, int64_t step) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(null_out); - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc outDesc(out_tensor, CNNL_LAYOUT_ARRAY); - - std::vector start_32(input_tensor.dim(), 0); - std::vector step_32(input_tensor.dim(), 1); - std::vector end_32(input_tensor.shape().begin(), input_tensor.shape().end()); - start_32[dim] = start; - step_32[dim] = step; - end_32[dim] = end; + DiopiTensor inputTensor(input); + DiopiTensor outTensor(nullOut); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); + + std::vector start32(inputTensor.dim(), 0); + std::vector step32(inputTensor.dim(), 1); + std::vector end32(inputTensor.shape().begin(), inputTensor.shape().end()); + start32[dim] = start; + step32[dim] = step; + end32[dim] = end; DIOPI_CALLCNNL( - cnnlStridedSlice(handle, inputDesc.get(), input_tensor.data(), start_32.data(), end_32.data(), step_32.data(), outDesc.get(), out_tensor.data())); + cnnlStridedSlice(handle, inputDesc.get(), inputTensor.data(), start32.data(), end32.data(), step32.data(), outDesc.get(), outTensor.data())); return diopiSuccess; } -diopiError_t diopiSliceBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, diopiSize_t input_sizes, +diopiError_t diopiSliceBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, diopiSize_t inputSizes, int64_t dim, int64_t start, int64_t end, int64_t step) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(grad_output); - DiopiTensor out_tensor(grad_input); - if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor, diopi_dtype_float32)); + DiopiTensor inputTensor(gradOutput); + DiopiTensor outTensor(gradInput); + if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensor, diopi_dtype_float32)); } - CnnlTensorDesc inputDesc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc outDesc(out_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); - std::vector start_32(input_tensor.dim(), 0); - std::vector step_32(input_tensor.dim(), 1); - std::vector end_32(input_tensor.shape().begin(), input_tensor.shape().end()); - start_32[dim] = start; - step_32[dim] = step; - end_32[dim] = end; + std::vector start32(inputTensor.dim(), 0); + std::vector step32(inputTensor.dim(), 1); + std::vector end32(inputTensor.shape().begin(), inputTensor.shape().end()); + start32[dim] = start; + step32[dim] = step; + end32[dim] = end; - if (out_tensor.dtype() == input_tensor.dtype()) { + if (outTensor.dtype() == inputTensor.dtype()) { DIOPI_CALLCNNL(cnnlStridedSliceBackward( - handle, start_32.data(), end_32.data(), step_32.data(), inputDesc.get(), input_tensor.data(), outDesc.get(), out_tensor.data())); + handle, start32.data(), end32.data(), step32.data(), inputDesc.get(), inputTensor.data(), outDesc.get(), outTensor.data())); } else { - DiopiTensor out_temp_tensor = requiresTensor(ctx, out_tensor.shape(), input_tensor.dtype()); - CnnlTensorDesc out_tempDesc(out_temp_tensor, CNNL_LAYOUT_ARRAY); + DiopiTensor outTempTensor = requiresTensor(ctx, outTensor.shape(), inputTensor.dtype()); + CnnlTensorDesc outTempDesc(outTempTensor, CNNL_LAYOUT_ARRAY); DIOPI_CALLCNNL(cnnlStridedSliceBackward( - handle, start_32.data(), end_32.data(), step_32.data(), inputDesc.get(), input_tensor.data(), out_tempDesc.get(), out_temp_tensor.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_temp_tensor)); + handle, start32.data(), end32.data(), step32.data(), inputDesc.get(), inputTensor.data(), outTempDesc.get(), outTempTensor.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTempTensor)); } return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/softmax.cpp b/DIOPI-IMPL/camb/functions/softmax.cpp index ee577d791..8400c62de 100644 --- a/DIOPI-IMPL/camb/functions/softmax.cpp +++ b/DIOPI-IMPL/camb/functions/softmax.cpp @@ -16,167 +16,167 @@ namespace impl { namespace camb { namespace { -diopiError_t softmax_forward(diopiContextHandle_t ctx, DiopiTensor input, DiopiTensor output, int64_t dim, bool is_log = false) { +diopiError_t softmaxForward(diopiContextHandle_t ctx, DiopiTensor input, DiopiTensor output, int64_t dim, bool isLog = false) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_casted = input; - DiopiTensor output_casted = output; + DiopiTensor inputCasted = input; + DiopiTensor outputCasted = output; - std::vector tensors{&input_casted, &output_casted}; + std::vector tensors{&inputCasted, &outputCasted}; DIOPI_CALL(autoCastTensorType(ctx, tensors, {diopi_dtype_float16, diopi_dtype_float32})); - std::vector src_input_shape{input_casted.shape().begin(), input_casted.shape().end()}; - std::vector src_output_shape{output_casted.shape().begin(), output_casted.shape().end()}; + std::vector srcInputShape{inputCasted.shape().begin(), inputCasted.shape().end()}; + std::vector srcOutputShape{outputCasted.shape().begin(), outputCasted.shape().end()}; - const int input_rank = input_casted.shape().size(); + const int inputRank = inputCasted.shape().size(); int mode = dim; - mode = (mode < 0) ? (mode + input_rank) : mode; - const size_t input_dim = 3; - std::vector input_shape(input_dim, 1); - if (input_rank != 0) { - if (input_rank <= 3) { - input_shape[2] = src_input_shape[input_rank - 1]; - input_shape[1] = (input_rank == 1) ? 1 : src_input_shape[input_rank - 2]; - input_shape[0] = (input_rank == 3) ? src_input_shape[0] : 1; + mode = (mode < 0) ? (mode + inputRank) : mode; + const size_t inputDim = 3; + std::vector inputShape(inputDim, 1); + if (inputRank != 0) { + if (inputRank <= 3) { + inputShape[2] = srcInputShape[inputRank - 1]; + inputShape[1] = (inputRank == 1) ? 1 : srcInputShape[inputRank - 2]; + inputShape[0] = (inputRank == 3) ? srcInputShape[0] : 1; } else { - auto reduce_dim = [](const std::vector& data, int from, int to) -> int { + auto reduceDim = [](const std::vector& data, int from, int to) -> int { to = std::min(to, data.size()); from = std::max(0, from); - return std::accumulate(data.cbegin() + from, data.cbegin() + to + 1, 1LL, std::multiplies()); + return std::accumulate(data.cbegin() + from, data.cbegin() + to + 1, 1LL, std::multiplies<>()); }; - const bool flag = (mode == input_rank - 1); - input_shape[0] = reduce_dim(src_input_shape, 0, flag ? (mode - 2) : (mode - 1)); - input_shape[1] = src_input_shape[flag ? (mode - 1) : mode]; - input_shape[2] = reduce_dim(src_input_shape, flag ? mode : (mode + 1), (input_rank - 1)); + const bool flag = (mode == inputRank - 1); + inputShape[0] = reduceDim(srcInputShape, 0, flag ? (mode - 2) : (mode - 1)); + inputShape[1] = srcInputShape[flag ? (mode - 1) : mode]; + inputShape[2] = reduceDim(srcInputShape, flag ? mode : (mode + 1), (inputRank - 1)); } } - cnnlSoftmaxMode_t mode_; - if (input_rank == 3 && mode == 0) { - mode_ = CNNL_SOFTMAX_MODE_HIGH_DIMENSION; - } else if (mode == input_rank - 1) { - mode_ = CNNL_SOFTMAX_MODE_LOW_DIMENSION; + cnnlSoftmaxMode_t modeTmp; + if (inputRank == 3 && mode == 0) { + modeTmp = CNNL_SOFTMAX_MODE_HIGH_DIMENSION; + } else if (mode == inputRank - 1) { + modeTmp = CNNL_SOFTMAX_MODE_LOW_DIMENSION; } else { - mode_ = CNNL_SOFTMAX_MODE_MEDIUM_DIMENSION; + modeTmp = CNNL_SOFTMAX_MODE_MEDIUM_DIMENSION; } const float alpha = 1; const float beta = 0; - CnnlTensorDesc x_desc, y_desc; - DIOPI_CALL(x_desc.set(input_casted, CNNL_LAYOUT_ARRAY, input_shape)); - DIOPI_CALL(y_desc.set(output_casted, CNNL_LAYOUT_ARRAY, input_shape)); + CnnlTensorDesc xDesc, yDesc; + DIOPI_CALL(xDesc.set(inputCasted, CNNL_LAYOUT_ARRAY, inputShape)); + DIOPI_CALL(yDesc.set(outputCasted, CNNL_LAYOUT_ARRAY, inputShape)); DIOPI_CALLCNNL(cnnlSoftmaxForward_v2(handle, - is_log ? CNNL_SOFTMAX_LOG : CNNL_SOFTMAX_ACCURATE, - mode_, + isLog ? CNNL_SOFTMAX_LOG : CNNL_SOFTMAX_ACCURATE, + modeTmp, CNNL_COMPUTATION_FAST, &alpha, - x_desc.get(), - input_casted.data(), + xDesc.get(), + inputCasted.data(), &beta, - y_desc.get(), - output_casted.data())); + yDesc.get(), + outputCasted.data())); - DIOPI_CALL(dataTypeCast(ctx, output, output_casted)); + DIOPI_CALL(dataTypeCast(ctx, output, outputCasted)); return diopiSuccess; } -diopiError_t softmax_backward(diopiContextHandle_t ctx, DiopiTensor grad_input_tensor, DiopiTensor grad_output_tensor, DiopiTensor output_tensor, int64_t dim, - bool is_log = false) { +diopiError_t softmaxBackward(diopiContextHandle_t ctx, DiopiTensor gradInputTensor, DiopiTensor gradOutputTensor, DiopiTensor outputTensor, int64_t dim, + bool isLog = false) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor grad_input_casted = grad_input_tensor; - DiopiTensor grad_output_casted = grad_output_tensor; - DiopiTensor output_casted = output_tensor; + DiopiTensor gradInputCasted = gradInputTensor; + DiopiTensor gradOutputCasted = gradOutputTensor; + DiopiTensor outputCasted = outputTensor; - std::vector tensors{&grad_input_casted, &grad_output_casted, &output_casted}; + std::vector tensors{&gradInputCasted, &gradOutputCasted, &outputCasted}; DIOPI_CALL(autoCastTensorType(ctx, tensors, {diopi_dtype_float16, diopi_dtype_float32})); - std::vector src_output_shape{output_casted.shape().begin(), output_casted.shape().end()}; + std::vector srcOutputShape{outputCasted.shape().begin(), outputCasted.shape().end()}; - const int input_rank = grad_input_casted.shape().size(); + const int inputRank = gradInputCasted.shape().size(); - const size_t input_dim = 3; + const size_t inputDim = 3; int mode = dim; - std::vector output_shape(input_dim, 1); - if (input_rank != 0) { - if (input_rank <= 3) { - output_shape[2] = src_output_shape[input_rank - 1]; - output_shape[1] = (input_rank == 1) ? 1 : src_output_shape[input_rank - 2]; - output_shape[0] = (input_rank == 3) ? src_output_shape[0] : 1; + std::vector outputShape(inputDim, 1); + if (inputRank != 0) { + if (inputRank <= 3) { + outputShape[2] = srcOutputShape[inputRank - 1]; + outputShape[1] = (inputRank == 1) ? 1 : srcOutputShape[inputRank - 2]; + outputShape[0] = (inputRank == 3) ? srcOutputShape[0] : 1; } else { - auto reduce_dim = [](const std::vector& data, int from, int to) -> int { + auto reduceDim = [](const std::vector& data, int from, int to) -> int { to = std::min(to, data.size()); from = std::max(0, from); - return std::accumulate(data.cbegin() + from, data.cbegin() + to + 1, 1LL, std::multiplies()); + return std::accumulate(data.cbegin() + from, data.cbegin() + to + 1, 1LL, std::multiplies<>()); }; - const bool flag = (mode == input_rank - 1); - output_shape[0] = reduce_dim(src_output_shape, 0, flag ? (mode - 2) : (mode - 1)); - output_shape[1] = src_output_shape[flag ? (mode - 1) : mode]; - output_shape[2] = reduce_dim(src_output_shape, flag ? mode : (mode + 1), (input_rank - 1)); + const bool flag = (mode == inputRank - 1); + outputShape[0] = reduceDim(srcOutputShape, 0, flag ? (mode - 2) : (mode - 1)); + outputShape[1] = srcOutputShape[flag ? (mode - 1) : mode]; + outputShape[2] = reduceDim(srcOutputShape, flag ? mode : (mode + 1), (inputRank - 1)); } } - mode = (mode < 0) ? (mode + input_rank) : mode; + mode = (mode < 0) ? (mode + inputRank) : mode; - cnnlSoftmaxMode_t mode_; - if (input_rank == 3 && mode == 0) { - mode_ = CNNL_SOFTMAX_MODE_HIGH_DIMENSION; - } else if (mode == input_rank - 1) { - mode_ = CNNL_SOFTMAX_MODE_LOW_DIMENSION; + cnnlSoftmaxMode_t modeTmp; + if (inputRank == 3 && mode == 0) { + modeTmp = CNNL_SOFTMAX_MODE_HIGH_DIMENSION; + } else if (mode == inputRank - 1) { + modeTmp = CNNL_SOFTMAX_MODE_LOW_DIMENSION; } else { - mode_ = CNNL_SOFTMAX_MODE_MEDIUM_DIMENSION; + modeTmp = CNNL_SOFTMAX_MODE_MEDIUM_DIMENSION; } - CnnlTensorDesc grad_input_desc, grad_output_desc, output_desc; - DIOPI_CALL(grad_input_desc.set(grad_input_casted, CNNL_LAYOUT_ARRAY, output_shape)); - DIOPI_CALL(grad_output_desc.set(grad_output_casted, CNNL_LAYOUT_ARRAY, output_shape)); - DIOPI_CALL(output_desc.set(output_casted, CNNL_LAYOUT_ARRAY, output_shape)); + CnnlTensorDesc gradInputDesc, gradOutputDesc, outputDesc; + DIOPI_CALL(gradInputDesc.set(gradInputCasted, CNNL_LAYOUT_ARRAY, outputShape)); + DIOPI_CALL(gradOutputDesc.set(gradOutputCasted, CNNL_LAYOUT_ARRAY, outputShape)); + DIOPI_CALL(outputDesc.set(outputCasted, CNNL_LAYOUT_ARRAY, outputShape)); DIOPI_CALLCNNL(cnnlSoftmaxBackward(handle, - is_log ? CNNL_SOFTMAX_LOG : CNNL_SOFTMAX_ACCURATE, - mode_, - NULL, - output_desc.get(), - output_casted.data(), - grad_output_desc.get(), - grad_output_casted.data(), - NULL, - grad_input_desc.get(), - grad_input_casted.data())); - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor, grad_input_casted)); + isLog ? CNNL_SOFTMAX_LOG : CNNL_SOFTMAX_ACCURATE, + modeTmp, + nullptr, + outputDesc.get(), + outputCasted.data(), + gradOutputDesc.get(), + gradOutputCasted.data(), + nullptr, + gradInputDesc.get(), + gradInputCasted.data())); + DIOPI_CALL(dataTypeCast(ctx, gradInputTensor, gradInputCasted)); return diopiSuccess; } } // namespace extern "C" diopiError_t diopiSoftmax(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, int64_t dim) { - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); - DIOPI_CALL(softmax_forward(ctx, input_tensor, output_tensor, dim)); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); + DIOPI_CALL(softmaxForward(ctx, inputTensor, outputTensor, dim)); return diopiSuccess; } -extern "C" diopiError_t diopiSoftmaxBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, +extern "C" diopiError_t diopiSoftmaxBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t output, int64_t dim) { - DiopiTensor grad_input_tensor(grad_input); - DiopiTensor grad_output_tensor(grad_output); - DiopiTensor output_tensor(output); - DIOPI_CALL(softmax_backward(ctx, grad_input_tensor, grad_output_tensor, output_tensor, dim)); + DiopiTensor gradInputTensor(gradInput); + DiopiTensor gradOutputTensor(gradOutput); + DiopiTensor outputTensor(output); + DIOPI_CALL(softmaxBackward(ctx, gradInputTensor, gradOutputTensor, outputTensor, dim)); return diopiSuccess; } extern "C" diopiError_t diopiLogSoftmax(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, int64_t dim) { - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); - DIOPI_CALL(softmax_forward(ctx, input_tensor, output_tensor, dim, true)); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); + DIOPI_CALL(softmaxForward(ctx, inputTensor, outputTensor, dim, true)); return diopiSuccess; } -extern "C" diopiError_t diopiLogSoftmaxBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, +extern "C" diopiError_t diopiLogSoftmaxBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t output, int64_t dim) { - DiopiTensor grad_input_tensor(grad_input); - DiopiTensor grad_output_tensor(grad_output); - DiopiTensor output_tensor(output); - DIOPI_CALL(softmax_backward(ctx, grad_input_tensor, grad_output_tensor, output_tensor, dim, true)); + DiopiTensor gradInputTensor(gradInput); + DiopiTensor gradOutputTensor(gradOutput); + DiopiTensor outputTensor(output); + DIOPI_CALL(softmaxBackward(ctx, gradInputTensor, gradOutputTensor, outputTensor, dim, true)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/sort.cpp b/DIOPI-IMPL/camb/functions/sort.cpp index 17af97e0f..f35b86261 100644 --- a/DIOPI-IMPL/camb/functions/sort.cpp +++ b/DIOPI-IMPL/camb/functions/sort.cpp @@ -16,64 +16,64 @@ extern "C" { diopiError_t diopiSort(diopiContextHandle_t ctx, diopiTensorHandle_t values, diopiTensorHandle_t indices, diopiConstTensorHandle_t input, int64_t dim, bool descending, const bool* stable) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - auto input_tensor = DiopiTensor(input); - auto indices_tensor = DiopiTensor(indices); - auto values_tensor = DiopiTensor(values); + auto inputTensor = DiopiTensor(input); + auto indicesTensor = DiopiTensor(indices); + auto valuesTensor = DiopiTensor(values); - DiopiTensor values_tensor_temp = values_tensor; - DiopiTensor input_tensor_temp = input_tensor; - if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor_temp, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, values_tensor_temp, diopi_dtype_float32)); - } else if (input_tensor.dtype() == diopi_dtype_int64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor_temp, diopi_dtype_int32)); - DIOPI_CALL(dataTypeCast(ctx, values_tensor_temp, diopi_dtype_int32)); + DiopiTensor valuesTensorTemp = valuesTensor; + DiopiTensor inputTensorTemp = inputTensor; + if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensorTemp, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, valuesTensorTemp, diopi_dtype_float32)); + } else if (inputTensor.dtype() == diopi_dtype_int64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensorTemp, diopi_dtype_int32)); + DIOPI_CALL(dataTypeCast(ctx, valuesTensorTemp, diopi_dtype_int32)); } else { - input_tensor_temp = DiopiTensor(input); - values_tensor_temp = DiopiTensor(values); + inputTensorTemp = DiopiTensor(input); + valuesTensorTemp = DiopiTensor(values); } - DiopiTensor indices_tensor_temp = indices_tensor; - DIOPI_CALL(dataTypeCast(ctx, indices_tensor_temp, diopi_dtype_int32)); - CnnlTensorDesc input_desc(input_tensor_temp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc values_desc(values_tensor_temp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc indices_desc(indices_tensor_temp, CNNL_LAYOUT_ARRAY); + DiopiTensor indicesTensorTemp = indicesTensor; + DIOPI_CALL(dataTypeCast(ctx, indicesTensorTemp, diopi_dtype_int32)); + CnnlTensorDesc inputDesc(inputTensorTemp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc valuesDesc(valuesTensorTemp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc indicesDesc(indicesTensorTemp, CNNL_LAYOUT_ARRAY); uint64_t k; - std::vector input_shape = input_tensor_temp.shape(); + std::vector inputShape = inputTensorTemp.shape(); if (dim < 0) { - dim += input_shape.size(); + dim += inputShape.size(); } - k = input_shape[dim]; + k = inputShape[dim]; - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetTopKTensorWorkspaceSize(handle, input_desc.get(), k, dim, descending, values_desc.get(), indices_desc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetTopKTensorWorkspaceSize(handle, inputDesc.get(), k, dim, descending, valuesDesc.get(), indicesDesc.get(), &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } - const bool lower_index_first = true; + const bool lowerIndexFirst = true; DIOPI_CALLCNNL(cnnlTopKTensor_v3(handle, - input_desc.get(), - input_tensor_temp.data(), + inputDesc.get(), + inputTensorTemp.data(), k, dim, descending, true, stable, workspace, - workspace_size, - values_desc.get(), - values_tensor_temp.data(), - indices_desc.get(), - indices_tensor_temp.data())) - if (values_tensor_temp.dtype() != values_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, values_tensor, values_tensor_temp)); + workspaceSize, + valuesDesc.get(), + valuesTensorTemp.data(), + indicesDesc.get(), + indicesTensorTemp.data())) + if (valuesTensorTemp.dtype() != valuesTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, valuesTensor, valuesTensorTemp)); } - if (indices_tensor_temp.dtype() != indices_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, indices_tensor, indices_tensor_temp)); + if (indicesTensorTemp.dtype() != indicesTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, indicesTensor, indicesTensorTemp)); } return diopiSuccess; diff --git a/DIOPI-IMPL/camb/functions/sqrt.cpp b/DIOPI-IMPL/camb/functions/sqrt.cpp index 8658bd5d2..9a03e2687 100644 --- a/DIOPI-IMPL/camb/functions/sqrt.cpp +++ b/DIOPI-IMPL/camb/functions/sqrt.cpp @@ -19,28 +19,28 @@ static diopiError_t sqrt(diopiContextHandle_t ctx, DiopiTensor& output, DiopiTen std::vector pTensors{&input}; std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor output_tmp = output; + DiopiTensor outputTmp = output; if (input.dtype() != output.dtype()) { - output_tmp = requiresTensor(ctx, output.shape(), input.dtype()); + outputTmp = requiresTensor(ctx, output.shape(), input.dtype()); } CnnlTensorDesc desc(input, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlSqrt_v2(handle, CNNL_COMPUTATION_HIGH_PRECISION, desc.get(), input.data(), desc.get(), output_tmp.data())); - if (output_tmp.dtype() != output.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, output, output_tmp)); + DIOPI_CALLCNNL(cnnlSqrt_v2(handle, CNNL_COMPUTATION_HIGH_PRECISION, desc.get(), input.data(), desc.get(), outputTmp.data())); + if (outputTmp.dtype() != output.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, output, outputTmp)); } return diopiSuccess; } extern "C" diopiError_t diopiSqrtInp(diopiContextHandle_t ctx, diopiTensorHandle_t input) { - DiopiTensor input_tensor(input); - DIOPI_CALL(sqrt(ctx, input_tensor, input_tensor)); + DiopiTensor inputTensor(input); + DIOPI_CALL(sqrt(ctx, inputTensor, inputTensor)); return diopiSuccess; } extern "C" diopiError_t diopiSqrt(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input) { - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); - DIOPI_CALL(sqrt(ctx, output_tensor, input_tensor)); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); + DIOPI_CALL(sqrt(ctx, outputTensor, inputTensor)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/stack.cpp b/DIOPI-IMPL/camb/functions/stack.cpp index 930f87fda..01ff6458f 100644 --- a/DIOPI-IMPL/camb/functions/stack.cpp +++ b/DIOPI-IMPL/camb/functions/stack.cpp @@ -8,35 +8,35 @@ extern "C" { diopiError_t diopiStack(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t* tensors, int64_t numTensors, int64_t dim) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); std::vector inputsDesc(numTensors); - std::vector inputs_desc(numTensors); - std::vector inputs_data(numTensors); + std::vector inputsDescTmp(numTensors); + std::vector inputsData(numTensors); // insert a new dim to input_tensors for (int i = 0; i < numTensors; i++) { - DiopiTensor temp_tensor(tensors[i]); - std::vector cat_shape(temp_tensor.shape().begin(), temp_tensor.shape().end()); + DiopiTensor tempTensor(tensors[i]); + std::vector catShape(tempTensor.shape().begin(), tempTensor.shape().end()); cnnlDataType_t dtype; - CnnlDataType::convertToCnnlType(&dtype, temp_tensor.dtype()); + CnnlDataType::convertToCnnlType(&dtype, tempTensor.dtype()); if (dim == -1) { - dim = temp_tensor.shape().size(); + dim = tempTensor.shape().size(); } - cat_shape.insert(cat_shape.begin() + dim, 1); - int cat_dimNb = cat_shape.size(); + catShape.insert(catShape.begin() + dim, 1); + int catDimNb = catShape.size(); - inputs_data[i] = temp_tensor.data(); - inputsDesc[i].set(temp_tensor, CNNL_LAYOUT_ARRAY); - inputs_desc[i] = inputsDesc[i].get(); - DIOPI_CALLCNNL(cnnlSetTensorDescriptor(inputs_desc[i], CNNL_LAYOUT_ARRAY, dtype, cat_dimNb, cat_shape.data())); + inputsData[i] = tempTensor.data(); + inputsDesc[i].set(tempTensor, CNNL_LAYOUT_ARRAY); + inputsDescTmp[i] = inputsDesc[i].get(); + DIOPI_CALLCNNL(cnnlSetTensorDescriptor(inputsDescTmp[i], CNNL_LAYOUT_ARRAY, dtype, catDimNb, catShape.data())); } - size_t workspace_size(0); - DIOPI_CALLCNNL(cnnlGetConcatWorkspaceSize(handle, numTensors, &workspace_size)); + size_t workspaceSize(0); + DIOPI_CALLCNNL(cnnlGetConcatWorkspaceSize(handle, numTensors, &workspaceSize)); void* workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } - DiopiTensor out_tensor(out); - CnnlTensorDesc out_desc(out_tensor, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlConcat(handle, numTensors, dim, inputs_desc.data(), inputs_data.data(), workspace, workspace_size, out_desc.get(), out_tensor.data())); + DiopiTensor outTensor(out); + CnnlTensorDesc outDesc(outTensor, CNNL_LAYOUT_ARRAY); + DIOPI_CALLCNNL(cnnlConcat(handle, numTensors, dim, inputsDescTmp.data(), inputsData.data(), workspace, workspaceSize, outDesc.get(), outTensor.data())); return diopiSuccess; } } // extern "C" diff --git a/DIOPI-IMPL/camb/functions/sub.cpp b/DIOPI-IMPL/camb/functions/sub.cpp index 10db55110..1c300f573 100644 --- a/DIOPI-IMPL/camb/functions/sub.cpp +++ b/DIOPI-IMPL/camb/functions/sub.cpp @@ -16,41 +16,41 @@ namespace camb { extern "C" diopiError_t diopiSub(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other, const diopiScalar_t* alpha) { - DiopiTensor input_tensor(input); - DiopiTensor other_tensor(other); - DiopiTensor output_tensor(out); - DIOPI_CALL(cnnl_op_tensor( - ctx, input_tensor, other_tensor, output_tensor, CNNL_OP_TENSOR_SUB, 1.0, DiopiDataType::isFloatPoint(alpha->stype) ? alpha->fval : alpha->ival)); + DiopiTensor inputTensor(input); + DiopiTensor otherTensor(other); + DiopiTensor outputTensor(out); + DIOPI_CALL(cnnlOpTensor( + ctx, inputTensor, otherTensor, outputTensor, CNNL_OP_TENSOR_SUB, 1.0, DiopiDataType::isFloatPoint(alpha->stype) ? alpha->fval : alpha->ival)); return diopiSuccess; } extern "C" diopiError_t diopiSubInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, diopiConstTensorHandle_t other, const diopiScalar_t* alpha) { - DiopiTensor input_tensor(input); - DiopiTensor other_tensor(other); - DiopiTensor output_tensor(input); - DIOPI_CALL(cnnl_op_tensor( - ctx, input_tensor, other_tensor, output_tensor, CNNL_OP_TENSOR_SUB, 1.0, DiopiDataType::isFloatPoint(alpha->stype) ? alpha->fval : alpha->ival)); + DiopiTensor inputTensor(input); + DiopiTensor otherTensor(other); + DiopiTensor outputTensor(input); + DIOPI_CALL(cnnlOpTensor( + ctx, inputTensor, otherTensor, outputTensor, CNNL_OP_TENSOR_SUB, 1.0, DiopiDataType::isFloatPoint(alpha->stype) ? alpha->fval : alpha->ival)); return diopiSuccess; } extern "C" diopiError_t diopiSubScalar(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* other, const diopiScalar_t* alpha) { - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); - DiopiTensor other_tensor; - DIOPI_CALL(makeTensorFromScalar(ctx, other, other_tensor)); - DIOPI_CALL(cnnl_op_tensor( - ctx, input_tensor, other_tensor, output_tensor, CNNL_OP_TENSOR_SUB, 1.0, DiopiDataType::isFloatPoint(alpha->stype) ? alpha->fval : alpha->ival)); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); + DiopiTensor otherTensor; + DIOPI_CALL(makeTensorFromScalar(ctx, other, otherTensor)); + DIOPI_CALL(cnnlOpTensor( + ctx, inputTensor, otherTensor, outputTensor, CNNL_OP_TENSOR_SUB, 1.0, DiopiDataType::isFloatPoint(alpha->stype) ? alpha->fval : alpha->ival)); return diopiSuccess; } extern "C" diopiError_t diopiSubInpScalar(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* other, const diopiScalar_t* alpha) { - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(input); - DiopiTensor other_tensor; - DIOPI_CALL(makeTensorFromScalar(ctx, other, other_tensor)); - DIOPI_CALL(cnnl_op_tensor( - ctx, input_tensor, other_tensor, output_tensor, CNNL_OP_TENSOR_SUB, 1.0, DiopiDataType::isFloatPoint(alpha->stype) ? alpha->fval : alpha->ival)); + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(input); + DiopiTensor otherTensor; + DIOPI_CALL(makeTensorFromScalar(ctx, other, otherTensor)); + DIOPI_CALL(cnnlOpTensor( + ctx, inputTensor, otherTensor, outputTensor, CNNL_OP_TENSOR_SUB, 1.0, DiopiDataType::isFloatPoint(alpha->stype) ? alpha->fval : alpha->ival)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/threshold.cpp b/DIOPI-IMPL/camb/functions/threshold.cpp index dc31aae19..2b24491cf 100644 --- a/DIOPI-IMPL/camb/functions/threshold.cpp +++ b/DIOPI-IMPL/camb/functions/threshold.cpp @@ -1,6 +1,6 @@ #include -#include "../../third_party/half/include/half.hpp" +#include "../common/float16.hpp" #include "../cnnl_helper.hpp" #include "../common/common.hpp" @@ -11,74 +11,74 @@ extern "C" { diopiError_t diopiThreshold(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, const diopiScalar_t* threshold, const diopiScalar_t* value) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); - std::vector pTensors{&input_tensor}; + std::vector pTensors{&inputTensor}; std::set supportedDtypes{diopi_dtype_int8, diopi_dtype_uint8, diopi_dtype_int16, diopi_dtype_int32, diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor out_tensor_temp = out_tensor; - if (out_tensor.dtype() != input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor_temp, input_tensor.dtype())); + DiopiTensor outTensorTemp = outTensor; + if (outTensor.dtype() != inputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensorTemp, inputTensor.dtype())); } - CnnlTensorDesc input_tensor_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_tensor_desc(out_tensor_temp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputTensorDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outTensorDesc(outTensorTemp, CNNL_LAYOUT_ARRAY); - auto threshold_scalar = DiopiDataType::isInteger(threshold->stype) ? threshold->ival : threshold->fval; - auto value_scalar = DiopiDataType::isInteger(value->stype) ? value->ival : value->fval; + auto thresholdScalar = DiopiDataType::isInteger(threshold->stype) ? threshold->ival : threshold->fval; + auto valueScalar = DiopiDataType::isInteger(value->stype) ? value->ival : value->fval; - void* threshold_val; - void* value_val; - int8_t value_int8, threshold_val_int8; - uint8_t value_uint8, threshold_val_uint8; - int16_t value_int16, threshold_val_int16; - int32_t value_int32, threshold_val_int32; - half_float::half value_float16, threshold_val_float16; - float value_float32, threshold_val_float32; + void* thresholdVal; + void* valueVal; + int8_t valueInt8, thresholdValInt8; + uint8_t valueUint8, thresholdValUint8; + int16_t valueInt16, thresholdValInt16; + int32_t valueInt32, thresholdValInt32; + half_float::half valueFloat16, thresholdValFloat16; + float valueFloat32, thresholdValFloat32; - switch (input_tensor.dtype()) { + switch (inputTensor.dtype()) { case diopi_dtype_int8: { - threshold_val_int8 = int8_t(threshold_scalar); - value_int8 = int8_t(value_scalar); - threshold_val = &threshold_val_int8; - value_val = &value_int8; + thresholdValInt8 = int8_t(thresholdScalar); + valueInt8 = int8_t(valueScalar); + thresholdVal = &thresholdValInt8; + valueVal = &valueInt8; break; } case diopi_dtype_uint8: { - threshold_val_uint8 = uint8_t(threshold_scalar); - value_uint8 = uint(value_scalar); - threshold_val = &threshold_val_uint8; - value_val = &value_uint8; + thresholdValUint8 = uint8_t(thresholdScalar); + valueUint8 = uint(valueScalar); + thresholdVal = &thresholdValUint8; + valueVal = &valueUint8; break; } case diopi_dtype_int16: { - threshold_val_int16 = int16_t(threshold_scalar); - value_int16 = int16_t(value_scalar); - threshold_val = &threshold_val_int16; - value_val = &value_int16; + thresholdValInt16 = int16_t(thresholdScalar); + valueInt16 = int16_t(valueScalar); + thresholdVal = &thresholdValInt16; + valueVal = &valueInt16; break; } case diopi_dtype_int32: { - threshold_val_int32 = int32_t(threshold_scalar); - value_int32 = int32_t(value_scalar); - threshold_val = &threshold_val_int32; - value_val = &value_int32; + thresholdValInt32 = int32_t(thresholdScalar); + valueInt32 = int32_t(valueScalar); + thresholdVal = &thresholdValInt32; + valueVal = &valueInt32; break; } case diopi_dtype_float16: { - threshold_val_float16 = half_float::half(threshold_scalar); - value_float16 = half_float::half(value_scalar); - threshold_val = &threshold_val_float16; - value_val = &value_float16; + thresholdValFloat16 = half_float::half(thresholdScalar); + valueFloat16 = half_float::half(valueScalar); + thresholdVal = &thresholdValFloat16; + valueVal = &valueFloat16; break; } case diopi_dtype_float32: { - threshold_val_float32 = static_cast(threshold_scalar); - value_float32 = static_cast(value_scalar); - threshold_val = &threshold_val_float32; - value_val = &value_float32; + thresholdValFloat32 = static_cast(thresholdScalar); + valueFloat32 = static_cast(valueScalar); + thresholdVal = &thresholdValFloat32; + valueVal = &valueFloat32; break; } default: @@ -86,52 +86,53 @@ diopiError_t diopiThreshold(diopiContextHandle_t ctx, diopiTensorHandle_t out, d } DIOPI_CALLCNNL( - cnnlThreshold(handle, input_tensor_desc.get(), input_tensor.data(), threshold_val, value_val, out_tensor_desc.get(), out_tensor_temp.data())); + cnnlThreshold(handle, inputTensorDesc.get(), inputTensor.data(), thresholdVal, valueVal, outTensorDesc.get(), outTensorTemp.data())); - if (out_tensor_temp.dtype() != out_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_temp)); + if (outTensorTemp.dtype() != outTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTemp)); } return diopiSuccess; } diopiError_t diopiThresholdInp(diopiContextHandle_t ctx, diopiTensorHandle_t input, const diopiScalar_t* threshold, const diopiScalar_t* value) { - diopiThreshold(ctx, input, input, threshold, value); + DIOPI_CALL(diopiThreshold(ctx, input, input, threshold, value)); + return diopiSuccess; } -diopiError_t diopiThresholdBackward(diopiContextHandle_t ctx, diopiTensorHandle_t grad_input, diopiConstTensorHandle_t grad_output, +diopiError_t diopiThresholdBackward(diopiContextHandle_t ctx, diopiTensorHandle_t gradInput, diopiConstTensorHandle_t gradOutput, diopiConstTensorHandle_t input, const diopiScalar_t* threshold) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor grad_input_tensor(grad_input); - DiopiTensor grad_output_tensor(grad_output); + DiopiTensor inputTensor(input); + DiopiTensor gradInputTensor(gradInput); + DiopiTensor gradOutputTensor(gradOutput); - std::vector pTensors{&input_tensor, &grad_output_tensor}; + std::vector pTensors{&inputTensor, &gradOutputTensor}; std::set supportedDtypes{diopi_dtype_float16, diopi_dtype_float32}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, supportedDtypes)); - DiopiTensor grad_input_tensor_temp = grad_input_tensor; - if (grad_input_tensor.dtype() != input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor_temp, input_tensor.dtype())); + DiopiTensor gradInputTensorTemp = gradInputTensor; + if (gradInputTensor.dtype() != inputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, gradInputTensorTemp, inputTensor.dtype())); } - CnnlTensorDesc input_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc grad_input_desc(grad_input_tensor_temp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc grad_output_desc(grad_output_tensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc gradInputDesc(gradInputTensorTemp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc gradOutputDesc(gradOutputTensor, CNNL_LAYOUT_ARRAY); - double threshold_scalar = DiopiDataType::isInteger(threshold->stype) ? threshold->ival : threshold->fval; + double thresholdScalar = DiopiDataType::isInteger(threshold->stype) ? threshold->ival : threshold->fval; - void* threshold_val; - half_float::half threshold_scalar_half; - float threshold_scalar_float; - switch (input_tensor.dtype()) { + void* thresholdVal; + half_float::half thresholdScalarHalf; + float thresholdScalarFloat; + switch (inputTensor.dtype()) { case diopi_dtype_float16: { - threshold_scalar_half = half_float::half(threshold_scalar); - threshold_val = &threshold_scalar_half; + thresholdScalarHalf = half_float::half(thresholdScalar); + thresholdVal = &thresholdScalarHalf; break; } case diopi_dtype_float32: { - threshold_scalar_float = static_cast(threshold_scalar); - threshold_val = &threshold_scalar_float; + thresholdScalarFloat = static_cast(thresholdScalar); + thresholdVal = &thresholdScalarFloat; break; } default: @@ -139,16 +140,16 @@ diopiError_t diopiThresholdBackward(diopiContextHandle_t ctx, diopiTensorHandle_ } DIOPI_CALLCNNL(cnnlThresholdBackward(handle, - input_desc.get(), - input_tensor.data(), - grad_output_desc.get(), - grad_output_tensor.data(), - threshold_val, - grad_input_desc.get(), - grad_input_tensor_temp.data())) - - if (grad_input_tensor_temp.dtype() != grad_input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, grad_input_tensor, grad_input_tensor_temp)); + inputDesc.get(), + inputTensor.data(), + gradOutputDesc.get(), + gradOutputTensor.data(), + thresholdVal, + gradInputDesc.get(), + gradInputTensorTemp.data())) + + if (gradInputTensorTemp.dtype() != gradInputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, gradInputTensor, gradInputTensorTemp)); } return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/topk.cpp b/DIOPI-IMPL/camb/functions/topk.cpp index f4c207176..4ae1bcbad 100644 --- a/DIOPI-IMPL/camb/functions/topk.cpp +++ b/DIOPI-IMPL/camb/functions/topk.cpp @@ -22,56 +22,56 @@ diopiError_t diopiTopk(diopiContextHandle_t ctx, bool largest, bool sorted) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor indices_tensor(indices); - DiopiTensor values_tensor(values); + DiopiTensor inputTensor(input); + DiopiTensor indicesTensor(indices); + DiopiTensor valuesTensor(values); - DiopiTensor values_tensor_temp = values_tensor; - DiopiTensor input_tensor_temp = input_tensor; - if (input_tensor.dtype() == diopi_dtype_float64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor_temp, diopi_dtype_float32)); - DIOPI_CALL(dataTypeCast(ctx, values_tensor_temp, diopi_dtype_float32)); - } else if (input_tensor.dtype() == diopi_dtype_int64) { - DIOPI_CALL(dataTypeCast(ctx, input_tensor_temp, diopi_dtype_int32)); - DIOPI_CALL(dataTypeCast(ctx, values_tensor_temp, diopi_dtype_int32)); + DiopiTensor valuesTensorTemp = valuesTensor; + DiopiTensor inputTensorTemp = inputTensor; + if (inputTensor.dtype() == diopi_dtype_float64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensorTemp, diopi_dtype_float32)); + DIOPI_CALL(dataTypeCast(ctx, valuesTensorTemp, diopi_dtype_float32)); + } else if (inputTensor.dtype() == diopi_dtype_int64) { + DIOPI_CALL(dataTypeCast(ctx, inputTensorTemp, diopi_dtype_int32)); + DIOPI_CALL(dataTypeCast(ctx, valuesTensorTemp, diopi_dtype_int32)); } else { - input_tensor_temp = DiopiTensor(input); - values_tensor_temp = DiopiTensor(values); + inputTensorTemp = DiopiTensor(input); + valuesTensorTemp = DiopiTensor(values); } - DiopiTensor indices_tensor_temp = indices_tensor; - DIOPI_CALL(dataTypeCast(ctx, indices_tensor_temp, diopi_dtype_int32)); - CnnlTensorDesc input_desc(input_tensor_temp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc values_desc(values_tensor_temp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc indices_desc(indices_tensor_temp, CNNL_LAYOUT_ARRAY); + DiopiTensor indicesTensorTemp = indicesTensor; + DIOPI_CALL(dataTypeCast(ctx, indicesTensorTemp, diopi_dtype_int32)); + CnnlTensorDesc inputDesc(inputTensorTemp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc valuesDesc(valuesTensorTemp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc indicesDesc(indicesTensorTemp, CNNL_LAYOUT_ARRAY); - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetTopKTensorWorkspaceSize(handle, input_desc.get(), k, dim, largest, values_desc.get(), indices_desc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetTopKTensorWorkspaceSize(handle, inputDesc.get(), k, dim, largest, valuesDesc.get(), indicesDesc.get(), &workspaceSize)); void *workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } - const bool lower_index_first = true; + const bool lowerIndexFirst = true; DIOPI_CALLCNNL(cnnlTopKTensor_v3(handle, - input_desc.get(), - input_tensor_temp.data(), + inputDesc.get(), + inputTensorTemp.data(), k, dim, largest, sorted, - lower_index_first, + lowerIndexFirst, workspace, - workspace_size, - values_desc.get(), - values_tensor_temp.data(), - indices_desc.get(), - indices_tensor_temp.data())) - if (values_tensor_temp.dtype() != values_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, values_tensor, values_tensor_temp)); + workspaceSize, + valuesDesc.get(), + valuesTensorTemp.data(), + indicesDesc.get(), + indicesTensorTemp.data())) + if (valuesTensorTemp.dtype() != valuesTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, valuesTensor, valuesTensorTemp)); } - if (indices_tensor_temp.dtype() != indices_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, indices_tensor, indices_tensor_temp)); + if (indicesTensorTemp.dtype() != indicesTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, indicesTensor, indicesTensorTemp)); } return diopiSuccess; diff --git a/DIOPI-IMPL/camb/functions/transpose.cpp b/DIOPI-IMPL/camb/functions/transpose.cpp index 5b47002c9..f368b1cc4 100644 --- a/DIOPI-IMPL/camb/functions/transpose.cpp +++ b/DIOPI-IMPL/camb/functions/transpose.cpp @@ -4,36 +4,35 @@ * @copyright (c) 2023, DeepLink. */ -#include +#include #include + #include "../cnnl_helper.hpp" namespace impl { namespace camb { -std::vector getPerm(diopiConstTensorHandle_t tensor_handle, +std::vector getPerm(diopiConstTensorHandle_t tensorHandle, int64_t dim0, int64_t dim1) { - DiopiTensor tensor(tensor_handle); - int input_size_ = tensor.shape().size(); + DiopiTensor tensor(tensorHandle); + int inputSize = tensor.shape().size(); - int dim0_ = 0; - dim0_ = static_cast(dim0); - if (dim0_ < 0) { - dim0_ = dim0_ + input_size_; + int dim0Tmp = static_cast(dim0); + if (dim0Tmp < 0) { + dim0Tmp = dim0Tmp + inputSize; } - int dim1_ = 0; - dim1_ = static_cast(dim1); - if (dim1_ < 0) { - dim1_ = dim1_ + input_size_; + int dim1Tmp = static_cast(dim1); + if (dim1Tmp < 0) { + dim1Tmp = dim1Tmp + inputSize; } - std::vector perms(input_size_); + std::vector perms(inputSize); std::iota(perms.begin(), perms.end(), 0); - perms[dim0_] = dim1_; - perms[dim1_] = dim0_; + perms[dim0Tmp] = dim1Tmp; + perms[dim1Tmp] = dim0Tmp; return perms; } @@ -46,43 +45,43 @@ diopiError_t diopiTranspose(diopiContextHandle_t ctx, int64_t dim0, int64_t dim1) { auto stream = getStream(ctx); - CnnlResourceGuard CnnlHandle; - cnnlHandle_t handle = CnnlHandle.get(); + CnnlResourceGuard cnnlHandle; + cnnlHandle_t handle = cnnlHandle.get(); DIOPI_CALLCNNL(cnnlSetQueue(handle, stream)); CnnlResourceGuard - CnnlTransposeDesc; - cnnlTransposeDescriptor_t transpose_desc = CnnlTransposeDesc.get(); + cnnlTransposeDesc; + cnnlTransposeDescriptor_t transposeDesc = cnnlTransposeDesc.get(); std::vector perms = getPerm(input, dim0, dim1); - DIOPI_CALLCNNL(cnnlSetTransposeDescriptor(transpose_desc, perms.size(), perms.data())); + DIOPI_CALLCNNL(cnnlSetTransposeDescriptor(transposeDesc, perms.size(), perms.data())); - DiopiTensor input_tensor(input); - DiopiTensor output_tensor(out); - if (input_tensor.dtype() == diopi_dtype_float64) { + DiopiTensor inputTensor(input); + DiopiTensor outputTensor(out); + if (inputTensor.dtype() == diopi_dtype_float64) { return diopiDtypeNotSupported; } - CnnlTensorDesc input_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc output_desc(output_tensor, CNNL_LAYOUT_ARRAY); - const void* input_ptr = input_tensor.data(); - void* out_ptr = output_tensor.data(); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outputDesc(outputTensor, CNNL_LAYOUT_ARRAY); + const void* inputPtr = inputTensor.data(); + void* outPtr = outputTensor.data(); - size_t workspace_size = 0; + size_t workspaceSize = 0; DIOPI_CALLCNNL(cnnlGetTransposeWorkspaceSize( - handle, input_desc.get(), transpose_desc, &workspace_size)); + handle, inputDesc.get(), transposeDesc, &workspaceSize)); void *workspace = nullptr; - if (0 != workspace_size) { - workspace = requiresBuffer(ctx, workspace_size).data(); + if (0 != workspaceSize) { + workspace = requiresBuffer(ctx, workspaceSize).data(); } DIOPI_CALLCNNL(cnnlTranspose_v2(handle, - transpose_desc, - input_desc.get(), - input_ptr, - output_desc.get(), - out_ptr, + transposeDesc, + inputDesc.get(), + inputPtr, + outputDesc.get(), + outPtr, workspace, - workspace_size)); + workspaceSize)); return diopiSuccess; } } // extern "C" diff --git a/DIOPI-IMPL/camb/functions/tril.cpp b/DIOPI-IMPL/camb/functions/tril.cpp index 299baff5a..3a75af1b8 100644 --- a/DIOPI-IMPL/camb/functions/tril.cpp +++ b/DIOPI-IMPL/camb/functions/tril.cpp @@ -15,20 +15,20 @@ extern "C" { DIOPI_API diopiError_t diopiTril(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t input, int64_t diagonal) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor outTensor(out); - std::vector pTensors{&input_tensor}; + std::vector pTensors{&inputTensor}; DIOPI_CALL(autoCastTensorType(ctx, pTensors, {diopi_dtype_int8, diopi_dtype_int16, diopi_dtype_int32, diopi_dtype_float16, diopi_dtype_float32})); - DiopiTensor input_tensor_tmp = *pTensors[0]; - DiopiTensor out_tensor_tmp = out_tensor; - DIOPI_CALL(dataTypeCast(ctx, out_tensor_tmp, input_tensor_tmp.dtype())); + DiopiTensor inputTensorTmp = *pTensors[0]; + DiopiTensor outTensorTmp = outTensor; + DIOPI_CALL(dataTypeCast(ctx, outTensorTmp, inputTensorTmp.dtype())); - CnnlTensorDesc input_desc(input_tensor_tmp, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor_tmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensorTmp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensorTmp, CNNL_LAYOUT_ARRAY); - DIOPI_CALLCNNL(cnnlTri(handle, static_cast(diagonal), false, input_desc.get(), input_tensor_tmp.data(), out_desc.get(), out_tensor_tmp.data())); - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_tmp)); + DIOPI_CALLCNNL(cnnlTri(handle, static_cast(diagonal), false, inputDesc.get(), inputTensorTmp.data(), outDesc.get(), outTensorTmp.data())); + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTmp)); return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/functions/where.cpp b/DIOPI-IMPL/camb/functions/where.cpp index 415a47e70..16351d718 100644 --- a/DIOPI-IMPL/camb/functions/where.cpp +++ b/DIOPI-IMPL/camb/functions/where.cpp @@ -9,48 +9,48 @@ extern "C" { diopiError_t diopiWhere(diopiContextHandle_t ctx, diopiTensorHandle_t out, diopiConstTensorHandle_t condition, diopiConstTensorHandle_t input, diopiConstTensorHandle_t other) { cnnlHandle_t handle = cnnlHandlePool.get(ctx); - DiopiTensor input_tensor(input); - DiopiTensor other_tensor(other); - DiopiTensor cond_tensor(condition); - DiopiTensor out_tensor(out); + DiopiTensor inputTensor(input); + DiopiTensor otherTensor(other); + DiopiTensor condTensor(condition); + DiopiTensor outTensor(out); - std::vector inputs{&input_tensor, &other_tensor, &cond_tensor}; - std::set inputs_support_dtype{ + std::vector inputs{&inputTensor, &otherTensor, &condTensor}; + std::set inputsSupportDtype{ diopi_dtype_int8, diopi_dtype_int16, diopi_dtype_int32, diopi_dtype_int64, diopi_dtype_float16, diopi_dtype_float32}; - DIOPI_CALL(autoCastTensorType(ctx, inputs, inputs_support_dtype)); - std::vector cond{&cond_tensor}; - std::set cond_support_dtype{diopi_dtype_uint8, diopi_dtype_bool}; - DIOPI_CALL(autoCastTensorType(ctx, cond, cond_support_dtype)); - - DiopiTensor out_tensor_temp = out_tensor; - if (out_tensor_temp.dtype() != input_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor_temp, input_tensor.dtype())); + DIOPI_CALL(autoCastTensorType(ctx, inputs, inputsSupportDtype)); + std::vector cond{&condTensor}; + std::set condSupportDtype{diopi_dtype_uint8, diopi_dtype_bool}; + DIOPI_CALL(autoCastTensorType(ctx, cond, condSupportDtype)); + + DiopiTensor outTensorTemp = outTensor; + if (outTensorTemp.dtype() != inputTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensorTemp, inputTensor.dtype())); } - CnnlTensorDesc input_desc(input_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc other_desc(other_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc cond_desc(cond_tensor, CNNL_LAYOUT_ARRAY); - CnnlTensorDesc out_desc(out_tensor_temp, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc inputDesc(inputTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc otherDesc(otherTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc condDesc(condTensor, CNNL_LAYOUT_ARRAY); + CnnlTensorDesc outDesc(outTensorTemp, CNNL_LAYOUT_ARRAY); - size_t workspace_size = 0; - DIOPI_CALLCNNL(cnnlGetSelectV2WorkspaceSize(handle, cond_desc.get(), input_desc.get(), other_desc.get(), &workspace_size)); + size_t workspaceSize = 0; + DIOPI_CALLCNNL(cnnlGetSelectV2WorkspaceSize(handle, condDesc.get(), inputDesc.get(), otherDesc.get(), &workspaceSize)); void* workspace = nullptr; - workspace = requiresBuffer(ctx, workspace_size).data(); + workspace = requiresBuffer(ctx, workspaceSize).data(); DIOPI_CALLCNNL(cnnlSelectV2(handle, - cond_desc.get(), - cond_tensor.data(), - input_desc.get(), - input_tensor.data(), - other_desc.get(), - other_tensor.data(), + condDesc.get(), + condTensor.data(), + inputDesc.get(), + inputTensor.data(), + otherDesc.get(), + otherTensor.data(), workspace, - workspace_size, - out_desc.get(), - out_tensor_temp.data())); + workspaceSize, + outDesc.get(), + outTensorTemp.data())); - if (out_tensor_temp.dtype() != out_tensor.dtype()) { - DIOPI_CALL(dataTypeCast(ctx, out_tensor, out_tensor_temp)); + if (outTensorTemp.dtype() != outTensor.dtype()) { + DIOPI_CALL(dataTypeCast(ctx, outTensor, outTensorTemp)); } return diopiSuccess; } diff --git a/DIOPI-IMPL/camb/test/conform_test.cpp b/DIOPI-IMPL/camb/test/conform_test.cpp index 37ddd0919..eed0a8693 100644 --- a/DIOPI-IMPL/camb/test/conform_test.cpp +++ b/DIOPI-IMPL/camb/test/conform_test.cpp @@ -32,39 +32,39 @@ void* device_malloc(uint64_t bytes) { void device_free(void* ptr) { CALL_CNRT(cnrtFree(ptr)); } -int32_t device_make_stream(diopiStreamHandle_t* stream_handle_ptr) { +int32_t device_make_stream(diopiStreamHandle_t* streamHandlePtr) { cnrtQueue_t phStream; CALL_CNRT(cnrtCreateQueue(&phStream)); - *stream_handle_ptr = (diopiStreamHandle_t)phStream; + *streamHandlePtr = (diopiStreamHandle_t)phStream; return diopiSuccess; } -int32_t device_destroy_stream(diopiStreamHandle_t stream_handle) { - cnrtQueue_t phStream = (cnrtQueue_t)stream_handle; +int32_t device_destroy_stream(diopiStreamHandle_t streamHandle) { + cnrtQueue_t phStream = (cnrtQueue_t)streamHandle; CALL_CNRT(cnrtDestroyQueue(phStream)); return diopiSuccess; } -int32_t device_synchronize_stream(diopiStreamHandle_t stream_handle) { - cnrtQueue_t phStream = (cnrtQueue_t)stream_handle; +int32_t device_synchronize_stream(diopiStreamHandle_t streamHandle) { + cnrtQueue_t phStream = (cnrtQueue_t)streamHandle; CALL_CNRT(cnrtSyncQueue(phStream)); return diopiSuccess; } -int32_t device_memcpy_h2d_async(diopiStreamHandle_t stream_handle, void* dst, const void* src, uint64_t bytes) { - cnrtQueue_t phStream = (cnrtQueue_t)stream_handle; +int32_t device_memcpy_h2d_async(diopiStreamHandle_t streamHandle, void* dst, const void* src, uint64_t bytes) { + cnrtQueue_t phStream = (cnrtQueue_t)streamHandle; CALL_CNRT(cnrtMemcpyAsync(dst, const_cast(src), bytes, phStream, CNRT_MEM_TRANS_DIR_HOST2DEV)); return diopiSuccess; } -int32_t device_memcpy_d2h_async(diopiStreamHandle_t stream_handle, void* dst, const void* src, uint64_t bytes) { - cnrtQueue_t phStream = (cnrtQueue_t)stream_handle; +int32_t device_memcpy_d2h_async(diopiStreamHandle_t streamHandle, void* dst, const void* src, uint64_t bytes) { + cnrtQueue_t phStream = (cnrtQueue_t)streamHandle; CALL_CNRT(cnrtMemcpyAsync(dst, const_cast(src), bytes, phStream, CNRT_MEM_TRANS_DIR_DEV2HOST)); return diopiSuccess; } -int32_t device_memcpy_d2d_async(diopiStreamHandle_t stream_handle, void* dst, const void* src, uint64_t bytes) { - cnrtQueue_t phStream = (cnrtQueue_t)stream_handle; +int32_t device_memcpy_d2d_async(diopiStreamHandle_t streamHandle, void* dst, const void* src, uint64_t bytes) { + cnrtQueue_t phStream = (cnrtQueue_t)streamHandle; CALL_CNRT(cnrtMemcpyAsync(dst, const_cast(src), bytes, phStream, CNRT_MEM_TRANS_DIR_DEV2DEV)); return diopiSuccess; } diff --git a/DIOPI-IMPL/scripts/build_impl.sh b/DIOPI-IMPL/scripts/build_impl.sh index 38c08ae71..cc59dec1f 100644 --- a/DIOPI-IMPL/scripts/build_impl.sh +++ b/DIOPI-IMPL/scripts/build_impl.sh @@ -10,25 +10,25 @@ case $1 in clean) rm -rf build;; cuda) - mkdir -p build && cd build && cmake .. -DIMPL_OPT=cuda -DTEST=${DIOPI_BUILD_TESTRT} && make;; + mkdir -p build && cd build && cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DIMPL_OPT=cuda -DTEST=${DIOPI_BUILD_TESTRT} && make -j8;; torch) - mkdir -p build && cd build && cmake .. -DIMPL_OPT=torch -DDEBUG=ON -DTEST=${DIOPI_BUILD_TESTRT} \ + mkdir -p build && cd build && cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DIMPL_OPT=torch -DDEBUG=ON -DTEST=${DIOPI_BUILD_TESTRT} \ -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` \ - && make;; + && make -j8;; torch_dyload) - mkdir -p build && cd build && cmake .. -DIMPL_OPT=torch -DDEBUG=ON -DDYLOAD=ON -DTEST=${DIOPI_BUILD_TESTRT} \ + mkdir -p build && cd build && cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DIMPL_OPT=torch -DDEBUG=ON -DDYLOAD=ON -DTEST=${DIOPI_BUILD_TESTRT} \ -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` \ - && make && mkdir -p ${DIOPI_TEST_PATH}/lib && ln -sf ${CURRENT_DIR}/../lib/libdiopi_real_impl.so ${DIOPI_TEST_PATH}/lib;; + && make -j8 && mkdir -p ${DIOPI_TEST_PATH}/lib && ln -sf ${CURRENT_DIR}/../lib/libdiopi_real_impl.so ${DIOPI_TEST_PATH}/lib;; camb_pytorch) - mkdir -p build && cd build && cmake .. -DIMPL_OPT=camb_pytorch -DTEST=${DIOPI_BUILD_TESTRT} \ + mkdir -p build && cd build && cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DIMPL_OPT=camb_pytorch -DTEST=${DIOPI_BUILD_TESTRT} \ -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` \ - && make;; + && make -j8;; camb) - mkdir -p build && cd build && cmake .. -DIMPL_OPT=camb -DTEST=${DIOPI_BUILD_TESTRT} && make;; + mkdir -p build && cd build && cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DIMPL_OPT=camb -DTEST=${DIOPI_BUILD_TESTRT} && make -j8;; ascend) - mkdir -p build && cd build && cmake .. -DIMPL_OPT=ascend -DTEST=${DIOPI_BUILD_TESTRT} && make;; + mkdir -p build && cd build && cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DIMPL_OPT=ascend -DTEST=${DIOPI_BUILD_TESTRT} && make -j8;; hip_pytorch) - mkdir build && cd build && cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DIMPL_OPT=TORCH -DHIP=ON && make -j4 \ + mkdir build && cd build && cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DIMPL_OPT=TORCH -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DHIP=ON && make -j8 \ || exit -1;; mmcv_ext) (cd third_party/mmcv_diopi && rm -rf build && mkdir build \ diff --git a/DIOPI-IMPL/scripts/ci_script.sh b/DIOPI-IMPL/scripts/ci_script.sh index d8766bc2b..5d13acdc1 100644 --- a/DIOPI-IMPL/scripts/ci_script.sh +++ b/DIOPI-IMPL/scripts/ci_script.sh @@ -1,19 +1,26 @@ # !/bin/bash set -e -current_path=$(cd "$(dirname "$0")"; pwd) - +CURRENT_PATH=$(cd "$(dirname "$0")"; pwd) +CMAKE_EXPORT_COMPILE_COMMANDS_FILE=${CURRENT_PATH}/../build/compile_commands.json case $1 in cpp-lint) # for other cpplint version, maybe -whitespace/indent is needed to check impl # --repository=.. will be deleted when repository changed. (echo "cpp-lint" && python scripts/cpplint.py --linelength=160 --repository=.. \ --filter=-build/c++11,-legal/copyright,-build/include_subdir,-runtime/references,-runtime/printf,-runtime/int,-build/namespace \ - --exclude=${current_path}/../third_party --exclude=${current_path}/../build \ + --exclude=${CURRENT_PATH}/../third_party --exclude=${CURRENT_PATH}/../build \ --recursive ./ ) \ - || exit -1;; - *) - echo -e "[ERROR] Incorrect option:" $1; - + || exit 1;; + clang-tidy) + ( + if [ -e ${CMAKE_EXPORT_COMPILE_COMMANDS_FILE} ]; then + python3 ${CURRENT_PATH}/../../run-clang-tidy.py -p `dirname "${CMAKE_EXPORT_COMPILE_COMMANDS_FILE}"` + else + echo "error: compile_commands.json not found." + exit 1 + fi);; + *) + echo -e "[ERROR] Incorrect option:" $1 && exit 1; esac exit 0 \ No newline at end of file diff --git a/DIOPI-TEST/csrc/litert.cpp b/DIOPI-TEST/csrc/litert.cpp index e3dd95eeb..390cc62cb 100644 --- a/DIOPI-TEST/csrc/litert.cpp +++ b/DIOPI-TEST/csrc/litert.cpp @@ -7,15 +7,16 @@ #include #include -#include #include +#include #include #include #include #include #include #include +#include extern "C" { @@ -47,9 +48,9 @@ DIOPI_RT_API const char* diopiGetVersion() { return szVersion; } -static void* host_malloc(uint64_t bytes) { return malloc(bytes); } +static void* hostMalloc(uint64_t bytes) { return malloc(bytes); } -static void host_free(void* ptr) { free(ptr); } +static void hostFree(void* ptr) { free(ptr); } int32_t itemsize(const diopiDtype_t dtype) { switch (dtype) { @@ -77,7 +78,7 @@ int32_t itemsize(const diopiDtype_t dtype) { return 0; } -const char* diopi_dtype_to_str(const diopiDtype_t dtype) { +const char* diopiDtypeToStr(const diopiDtype_t dtype) { #define _dtype2str(type) \ if (type == dtype) return #type; _dtype2str(diopi_dtype_float16); @@ -99,7 +100,7 @@ const char* diopi_dtype_to_str(const diopiDtype_t dtype) { #undef _dtype2str } -const char* device_to_str(const diopiDevice_t device) { +const char* deviceToStr(const diopiDevice_t device) { #define _device2str(type) \ if (type == device) return #type; _device2str(diopi_host); @@ -111,20 +112,20 @@ const char* device_to_str(const diopiDevice_t device) { class Storage final { private: - malloc_func_t malloc_fn_; - free_func_t free_fn_; + malloc_func_t mallocFn_; + free_func_t freeFn_; int64_t nbytes_; void* ptr_; public: - Storage(malloc_func_t malloc_fn, free_func_t free_fn, int64_t nbytes) : malloc_fn_(malloc_fn), free_fn_(free_fn), nbytes_(nbytes) { - assert(free_fn_); - assert(malloc_fn_); - ptr_ = malloc_fn_(nbytes_); + Storage(malloc_func_t mallocFn, free_func_t freeFn, int64_t nbytes) : mallocFn_(mallocFn), freeFn_(freeFn), nbytes_(nbytes) { + assert(freeFn_); + assert(mallocFn_); + ptr_ = mallocFn_(nbytes); } ~Storage() { - free_fn_(ptr_); + freeFn_(ptr_); ptr_ = nullptr; nbytes_ = 0; } @@ -158,7 +159,7 @@ struct diopiTensor { return stride; } - bool reset_shape(const diopiSize_t* size); + bool resetShape(const diopiSize_t* size); diopiDtype_t dtype() const { return dtype_; } diopiDevice_t device() const { return device_; } @@ -168,7 +169,7 @@ struct diopiTensor { const void* data() const { return storage_->data(); } int64_t nbytes() const { return storage_->nbytes(); } - diopiContextHandle_t get_ctx() const { return context_; } + diopiContextHandle_t getCtx() const { return context_; } }; diopiTensor::diopiTensor(const diopiSize_t* shape, const diopiSize_t* stride, diopiDtype_t dtype, diopiDevice_t device, diopiContextHandle_t context) { @@ -178,7 +179,7 @@ diopiTensor::diopiTensor(const diopiSize_t* shape, const diopiSize_t* stride, di shape_.resize(shape->len); stride_.resize(shape->len); - int64_t stride_temp = 1; + int64_t strideTemp = 1; numel_ = 1; for (int64_t i = shape->len - 1; i >= 0; --i) { shape_[i] = shape->data[i]; @@ -186,21 +187,21 @@ diopiTensor::diopiTensor(const diopiSize_t* shape, const diopiSize_t* stride, di if (stride != nullptr) { stride_[i] = stride->data[i]; } else { - stride_[i] = stride_temp; - stride_temp *= shape->data[i]; + stride_[i] = strideTemp; + strideTemp *= shape->data[i]; } } - const int64_t nbytes = numel_ * itemsize(dtype_); + const int64_t nbytes = numel_ * itemsize(dtype); if (device_ == diopi_host) { - storage_ = std::make_shared(host_malloc, host_free, nbytes); + storage_ = std::make_shared(hostMalloc, hostFree, nbytes); } else { storage_ = std::make_shared(device_malloc, device_free, nbytes); } context_ = context; } -bool diopiTensor::reset_shape(const diopiSize_t* size) { +bool diopiTensor::resetShape(const diopiSize_t* size) { int64_t numel = 1; for (int64_t i = 0; i < size->len; ++i) { numel *= size->data[i]; @@ -209,16 +210,16 @@ bool diopiTensor::reset_shape(const diopiSize_t* size) { shape_.resize(size->len); stride_.resize(size->len); - int64_t stride_temp = 1; + int64_t strideTemp = 1; for (int64_t i = size->len - 1; i >= 0; --i) { shape_[i] = size->data[i]; - stride_[i] = stride_temp; - stride_temp *= size->data[i]; + stride_[i] = strideTemp; + strideTemp *= size->data[i]; } return true; } -diopiTensor::~diopiTensor() {} +diopiTensor::~diopiTensor() = default; DIOPI_RT_API diopiError_t diopiGetTensorData(diopiTensorHandle_t th, void** pptr) { *pptr = th->data(); @@ -255,20 +256,20 @@ DIOPI_RT_API diopiError_t diopiGetTensorNumel(diopiConstTensorHandle_t th, int64 return diopiSuccess; } -DIOPI_RT_API diopiError_t diopiGetTensorElemSize(diopiConstTensorHandle_t th, int64_t* elem_size) { - *elem_size = itemsize(th->dtype()); +DIOPI_RT_API diopiError_t diopiGetTensorElemSize(diopiConstTensorHandle_t th, int64_t* elemSize) { + *elemSize = itemsize(th->dtype()); return diopiSuccess; } -DIOPI_RT_API diopiError_t _diopiTensorResetShape(diopiTensorHandle_t th, const diopiSize_t* size) { - if (!th->reset_shape(size)) { +DIOPI_RT_API diopiError_t diopiTensorResetShape(diopiTensorHandle_t th, const diopiSize_t* size) { + if (!th->resetShape(size)) { return diopiErrorOccurred; } return diopiSuccess; } -DIOPI_RT_API diopiError_t _diopiTensorGetCtxHandle(diopiConstTensorHandle_t th, diopiContextHandle_t* ctx) { - *ctx = th->get_ctx(); +DIOPI_RT_API diopiError_t diopiTensorGetCtxHandle(diopiConstTensorHandle_t th, diopiContextHandle_t* ctx) { + *ctx = th->getCtx(); return diopiSuccess; } @@ -278,7 +279,7 @@ struct diopiContext { std::set setTensors_; public: - diopiContext() {} + diopiContext() = default; ~diopiContext() { if (nullptr != stream_) { @@ -321,13 +322,13 @@ struct diopiContext { } }; -DIOPI_RT_API diopiError_t _diopiCreateContext(diopiContextHandle_t* ctx) { +DIOPI_RT_API diopiError_t diopiCreateContext(diopiContextHandle_t* ctx) { *ctx = new diopiContext(); diopi_log("create a Context instance: %16p", *ctx); return diopiSuccess; } -DIOPI_RT_API diopiError_t _diopiDestroyContext(diopiContextHandle_t ctx) { +DIOPI_RT_API diopiError_t diopiDestroyContext(diopiContextHandle_t ctx) { diopi_log("destroy a Context instance: %16p", ctx); delete ctx; return diopiSuccess; @@ -345,21 +346,21 @@ DIOPI_RT_API diopiError_t diopiRequireTensor(diopiContextHandle_t ctx, diopiTens size->len, stride, dtype, - diopi_dtype_to_str(dtype), + diopiDtypeToStr(dtype), dev, - device_to_str(dev)); + deviceToStr(dev)); *tensor = ctx->createTensor(size, stride, dtype, dev); return diopiSuccess; } DIOPI_RT_API diopiError_t diopiRequireBuffer(diopiContextHandle_t ctx, diopiTensorHandle_t* tensor, int64_t bytes, diopiDevice_t dev) { - diopi_log("requires a buffer, bytes: %" PRId64 ", device: %s", bytes, device_to_str(dev)); + diopi_log("requires a buffer, bytes: %" PRId64 ", device: %s", bytes, deviceToStr(dev)); diopiSize_t size(&bytes, 1); return diopiRequireTensor(ctx, tensor, &size, nullptr, diopi_dtype_int8, dev); } -DIOPI_RT_API diopiError_t _diopiDestoryTensor(diopiContextHandle_t ctx, diopiTensorHandle_t tensor) { +DIOPI_RT_API diopiError_t diopiDestoryTensor(diopiContextHandle_t ctx, diopiTensorHandle_t tensor) { ctx->destroyTensor(tensor); return diopiSuccess; } @@ -370,9 +371,9 @@ DIOPI_RT_API diopiError_t diopiInit() { return diopiSuccess; } inited = 1; - const char* log_level_env = getenv("DIOPIRT_LOG_LEVEL"); - if (log_level_env != nullptr) { - DIOPIRT_LOG_LEVEL = atoi(log_level_env); + const char* logLevelEnv = getenv("DIOPIRT_LOG_LEVEL"); + if (logLevelEnv != nullptr) { + DIOPIRT_LOG_LEVEL = atoi(logLevelEnv); } else { DIOPIRT_LOG_LEVEL = 0; } @@ -390,7 +391,7 @@ DIOPI_RT_API diopiError_t diopiFinalize() { return diopiSuccess; } -DIOPI_RT_API diopiError_t _diopiTensorCopyFromBuffer(diopiContextHandle_t ctx, const void* src, diopiTensorHandle_t tensor) { +DIOPI_RT_API diopiError_t diopiTensorCopyFromBuffer(diopiContextHandle_t ctx, const void* src, diopiTensorHandle_t tensor) { if (tensor->device() == diopi_device) { diopiStreamHandle_t stream; diopiGetStream(ctx, &stream); @@ -402,7 +403,7 @@ DIOPI_RT_API diopiError_t _diopiTensorCopyFromBuffer(diopiContextHandle_t ctx, c return diopiSuccess; } -DIOPI_RT_API diopiError_t _diopiTensorCopyToBuffer(diopiContextHandle_t ctx, diopiConstTensorHandle_t tensor, void* dst) { +DIOPI_RT_API diopiError_t diopiTensorCopyToBuffer(diopiContextHandle_t ctx, diopiConstTensorHandle_t tensor, void* dst) { if (tensor->device() == diopi_device) { diopiStreamHandle_t stream; diopiGetStream(ctx, &stream); @@ -414,7 +415,7 @@ DIOPI_RT_API diopiError_t _diopiTensorCopyToBuffer(diopiContextHandle_t ctx, dio return diopiSuccess; } -DIOPI_RT_API diopiError_t _diopiClearTensors(diopiContextHandle_t ctx) { +DIOPI_RT_API diopiError_t diopiClearTensors(diopiContextHandle_t ctx) { ctx->clearTensors(); return diopiSuccess; } diff --git a/DIOPI-TEST/python/conformance/diopi_runtime.py b/DIOPI-TEST/python/conformance/diopi_runtime.py index 1827b894b..49092c357 100644 --- a/DIOPI-TEST/python/conformance/diopi_runtime.py +++ b/DIOPI-TEST/python/conformance/diopi_runtime.py @@ -157,16 +157,16 @@ class Context: def __init__(self): self.context_handle = ContextHandle() - self.__class__._c_lib._diopiCreateContext(byref(self.context_handle)) + self.__class__._c_lib.diopiCreateContext(byref(self.context_handle)) def __del__(self): - self.__class__._c_lib._diopiDestroyContext(self.context_handle) + self.__class__._c_lib.diopiDestroyContext(self.context_handle) def get_handle(self): return self.context_handle def clear_tensors(self): - return self.__class__._c_lib._diopiClearTensors(self.context_handle) + return self.__class__._c_lib.diopiClearTensors(self.context_handle) default_context = Context() @@ -229,7 +229,7 @@ def __init__( @classmethod def from_handle(cls, tensor_handle): ctx_handle = ContextHandle() - diopirt_lib._diopiTensorGetCtxHandle(tensor_handle, byref(ctx_handle)) + diopirt_lib.diopiTensorGetCtxHandle(tensor_handle, byref(ctx_handle)) return cls(size=None, dtype=None, context_handle=ctx_handle, tensor_handle=tensor_handle) def __str__(self): @@ -291,7 +291,7 @@ def get_dtype(self): def reset_shape(self, shape): assert isinstance(shape, (tuple, list)) - diopirt_lib._diopiTensorResetShape(self.tensor_handle, byref(Sizes(tuple(shape)))) + diopirt_lib.diopiTensorResetShape(self.tensor_handle, byref(Sizes(tuple(shape)))) @classmethod def from_numpy(cls, darray): @@ -302,9 +302,9 @@ def from_numpy(cls, darray): stride = [int(darray.strides[i] / darray.itemsize) for i in range(len(darray.strides))] tr = cls(size=darray.shape, dtype=dtype, stride=stride) - diopirt_lib._diopiTensorCopyFromBuffer(tr.context_handle, - c_void_p(darray.ctypes.data), - tr.tensor_handle) + diopirt_lib.diopiTensorCopyFromBuffer(tr.context_handle, + c_void_p(darray.ctypes.data), + tr.tensor_handle) return tr def numpy(self) -> np.ndarray: @@ -313,9 +313,9 @@ def numpy(self) -> np.ndarray: stride = self.get_stride() strides = [int(stride[i] * itemsize) for i in range(len(stride))] darray = np.ndarray(shape=self.size(), dtype=dtype, strides=strides) - diopirt_lib._diopiTensorCopyToBuffer(self.context_handle, - self.tensor_handle, - c_void_p(darray.ctypes.data)) + diopirt_lib.diopiTensorCopyToBuffer(self.context_handle, + self.tensor_handle, + c_void_p(darray.ctypes.data)) return darray diff --git a/run-clang-tidy.py b/run-clang-tidy.py new file mode 100755 index 000000000..e3da6fb9b --- /dev/null +++ b/run-clang-tidy.py @@ -0,0 +1,397 @@ +#!/usr/bin/env python3 +# +#===- run-clang-tidy.py - Parallel clang-tidy runner --------*- python -*--===# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===-----------------------------------------------------------------------===# +# FIXME: Integrate with clang-tidy-diff.py + + +""" +Parallel clang-tidy runner +========================== + +Runs clang-tidy over all files in a compilation database. Requires clang-tidy +and clang-apply-replacements in $PATH. + +Example invocations. +- Run clang-tidy on all files in the current working directory with a default + set of checks and show warnings in the cpp files and all project headers. + run-clang-tidy.py $PWD + +- Fix all header guards. + run-clang-tidy.py -fix -checks=-*,llvm-header-guard + +- Fix all header guards included from clang-tidy and header guards + for clang-tidy headers. + run-clang-tidy.py -fix -checks=-*,llvm-header-guard extra/clang-tidy \ + -header-filter=extra/clang-tidy + +Compilation database setup: +http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html +""" + +from __future__ import print_function + +import argparse +import glob +import json +import multiprocessing +import os +import queue +import re +import shutil +import subprocess +import sys +import tempfile +import threading +import traceback + +try: + import yaml +except ImportError: + yaml = None + + +def strtobool(val): + """Convert a string representation of truth to a bool following LLVM's CLI argument parsing.""" + + val = val.lower() + if val in ['', 'true', '1']: + return True + elif val in ['false', '0']: + return False + + # Return ArgumentTypeError so that argparse does not substitute its own error message + raise argparse.ArgumentTypeError( + "'{}' is invalid value for boolean argument! Try 0 or 1.".format(val) + ) + + +def find_compilation_database(path): + """Adjusts the directory until a compilation database is found.""" + result = os.path.realpath('./') + while not os.path.isfile(os.path.join(result, path)): + parent = os.path.dirname(result) + if result == parent: + print('Error: could not find compilation database.') + sys.exit(1) + result = parent + return result + + +def make_absolute(f, directory): + if os.path.isabs(f): + return f + return os.path.normpath(os.path.join(directory, f)) + + +def get_tidy_invocation(f, clang_tidy_binary, checks, tmpdir, build_path, + header_filter, allow_enabling_alpha_checkers, + extra_arg, extra_arg_before, quiet, config_file_path, + config, line_filter, use_color, plugins): + """Gets a command line for clang-tidy.""" + start = [clang_tidy_binary] + if allow_enabling_alpha_checkers: + start.append('-allow-enabling-analyzer-alpha-checkers') + if header_filter is not None: + start.append('-header-filter=' + header_filter) + if line_filter is not None: + start.append('-line-filter=' + line_filter) + if use_color is not None: + if use_color: + start.append('--use-color') + else: + start.append('--use-color=false') + if checks: + start.append('-checks=' + checks) + if tmpdir is not None: + start.append('-export-fixes') + # Get a temporary file. We immediately close the handle so clang-tidy can + # overwrite it. + (handle, name) = tempfile.mkstemp(suffix='.yaml', dir=tmpdir) + os.close(handle) + start.append(name) + for arg in extra_arg: + start.append('-extra-arg=%s' % arg) + for arg in extra_arg_before: + start.append('-extra-arg-before=%s' % arg) + start.append('-p=' + build_path) + if quiet: + start.append('-quiet') + if config_file_path: + start.append('--config-file=' + config_file_path) + elif config: + start.append('-config=' + config) + for plugin in plugins: + start.append('-load=' + plugin) + start.append(f) + return start + + +def merge_replacement_files(tmpdir, mergefile): + """Merge all replacement files in a directory into a single file""" + # The fixes suggested by clang-tidy >= 4.0.0 are given under + # the top level key 'Diagnostics' in the output yaml files + mergekey = "Diagnostics" + merged=[] + for replacefile in glob.iglob(os.path.join(tmpdir, '*.yaml')): + content = yaml.safe_load(open(replacefile, 'r')) + if not content: + continue # Skip empty files. + merged.extend(content.get(mergekey, [])) + + if merged: + # MainSourceFile: The key is required by the definition inside + # include/clang/Tooling/ReplacementsYaml.h, but the value + # is actually never used inside clang-apply-replacements, + # so we set it to '' here. + output = {'MainSourceFile': '', mergekey: merged} + with open(mergefile, 'w') as out: + yaml.safe_dump(output, out) + else: + # Empty the file: + open(mergefile, 'w').close() + + +def find_binary(arg, name, build_path): + """Get the path for a binary or exit""" + if arg: + if shutil.which(arg): + return arg + else: + raise SystemExit( + "error: passed binary '{}' was not found or is not executable" + .format(arg)) + + built_path = os.path.join(build_path, "bin", name) + binary = shutil.which(name) or shutil.which(built_path) + if binary: + return binary + else: + raise SystemExit( + "error: failed to find {} in $PATH or at {}" + .format(name, built_path)) + + +def apply_fixes(args, clang_apply_replacements_binary, tmpdir): + """Calls clang-apply-fixes on a given directory.""" + invocation = [clang_apply_replacements_binary] + invocation.append('-ignore-insert-conflict') + if args.format: + invocation.append('-format') + if args.style: + invocation.append('-style=' + args.style) + invocation.append(tmpdir) + subprocess.call(invocation) + + +def run_tidy(args, clang_tidy_binary, tmpdir, build_path, queue, lock, + failed_files): + """Takes filenames out of queue and runs clang-tidy on them.""" + while True: + name = queue.get() + invocation = get_tidy_invocation(name, clang_tidy_binary, args.checks, + tmpdir, build_path, args.header_filter, + args.allow_enabling_alpha_checkers, + args.extra_arg, args.extra_arg_before, + args.quiet, args.config_file, args.config, + args.line_filter, args.use_color, + args.plugins) + + proc = subprocess.Popen(invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, err = proc.communicate() + if proc.returncode != 0: + if proc.returncode < 0: + msg = "%s: terminated by signal %d\n" % (name, -proc.returncode) + err += msg.encode('utf-8') + failed_files.append(name) + with lock: + sys.stdout.write(' '.join(invocation) + '\n' + output.decode('utf-8')) + if len(err) > 0: + sys.stdout.flush() + sys.stderr.write(err.decode('utf-8')) + queue.task_done() + + +def main(): + parser = argparse.ArgumentParser(description='Runs clang-tidy over all files ' + 'in a compilation database. Requires ' + 'clang-tidy and clang-apply-replacements in ' + '$PATH or in your build directory.') + parser.add_argument('-allow-enabling-alpha-checkers', + action='store_true', help='allow alpha checkers from ' + 'clang-analyzer.') + parser.add_argument('-clang-tidy-binary', metavar='PATH', + help='path to clang-tidy binary') + parser.add_argument('-clang-apply-replacements-binary', metavar='PATH', + help='path to clang-apply-replacements binary') + parser.add_argument('-checks', default=None, + help='checks filter, when not specified, use clang-tidy ' + 'default') + config_group = parser.add_mutually_exclusive_group() + config_group.add_argument('-config', default=None, + help='Specifies a configuration in YAML/JSON format: ' + ' -config="{Checks: \'*\', ' + ' CheckOptions: {x: y}}" ' + 'When the value is empty, clang-tidy will ' + 'attempt to find a file named .clang-tidy for ' + 'each source file in its parent directories.') + config_group.add_argument('-config-file', default=None, + help='Specify the path of .clang-tidy or custom config ' + 'file: e.g. -config-file=/some/path/myTidyConfigFile. ' + 'This option internally works exactly the same way as ' + '-config option after reading specified config file. ' + 'Use either -config-file or -config, not both.') + parser.add_argument('-header-filter', default=None, + help='regular expression matching the names of the ' + 'headers to output diagnostics from. Diagnostics from ' + 'the main file of each translation unit are always ' + 'displayed.') + parser.add_argument('-line-filter', default=None, + help='List of files with line ranges to filter the' + 'warnings.') + if yaml: + parser.add_argument('-export-fixes', metavar='filename', dest='export_fixes', + help='Create a yaml file to store suggested fixes in, ' + 'which can be applied with clang-apply-replacements.') + parser.add_argument('-j', type=int, default=0, + help='number of tidy instances to be run in parallel.') + parser.add_argument('files', nargs='*', default=['.*'], + help='files to be processed (regex on path)') + parser.add_argument('-fix', action='store_true', help='apply fix-its') + parser.add_argument('-format', action='store_true', help='Reformat code ' + 'after applying fixes') + parser.add_argument('-style', default='file', help='The style of reformat ' + 'code after applying fixes') + parser.add_argument('-use-color', type=strtobool, nargs='?', const=True, + help='Use colors in diagnostics, overriding clang-tidy\'s' + ' default behavior. This option overrides the \'UseColor' + '\' option in .clang-tidy file, if any.') + parser.add_argument('-p', dest='build_path', + help='Path used to read a compile command database.') + parser.add_argument('-extra-arg', dest='extra_arg', + action='append', default=[], + help='Additional argument to append to the compiler ' + 'command line.') + parser.add_argument('-extra-arg-before', dest='extra_arg_before', + action='append', default=[], + help='Additional argument to prepend to the compiler ' + 'command line.') + parser.add_argument('-quiet', action='store_true', + help='Run clang-tidy in quiet mode') + parser.add_argument('-load', dest='plugins', + action='append', default=[], + help='Load the specified plugin in clang-tidy.') + args = parser.parse_args() + + db_path = 'compile_commands.json' + + if args.build_path is not None: + build_path = args.build_path + else: + # Find our database + build_path = find_compilation_database(db_path) + + clang_tidy_binary = find_binary(args.clang_tidy_binary, "clang-tidy", + build_path) + + tmpdir = None + if args.fix or (yaml and args.export_fixes): + clang_apply_replacements_binary = find_binary( + args.clang_apply_replacements_binary, "clang-apply-replacements", + build_path) + tmpdir = tempfile.mkdtemp() + + try: + invocation = get_tidy_invocation("", clang_tidy_binary, args.checks, + None, build_path, args.header_filter, + args.allow_enabling_alpha_checkers, + args.extra_arg, args.extra_arg_before, + args.quiet, args.config_file, args.config, + args.line_filter, args.use_color, + args.plugins) + invocation.append('-list-checks') + invocation.append('-') + if args.quiet: + # Even with -quiet we still want to check if we can call clang-tidy. + with open(os.devnull, 'w') as dev_null: + subprocess.check_call(invocation, stdout=dev_null) + else: + subprocess.check_call(invocation) + except: + print("Unable to run clang-tidy.", file=sys.stderr) + sys.exit(1) + + # Load the database and extract all files. + database = json.load(open(os.path.join(build_path, db_path))) + files = set([make_absolute(entry['file'], entry['directory']) + for entry in database]) + + max_task = args.j + if max_task == 0: + max_task = multiprocessing.cpu_count() + + # Build up a big regexy filter from all command line arguments. + file_name_re = re.compile('|'.join(args.files)) + + return_code = 0 + try: + # Spin up a bunch of tidy-launching threads. + task_queue = queue.Queue(max_task) + # List of files with a non-zero return code. + failed_files = [] + lock = threading.Lock() + for _ in range(max_task): + t = threading.Thread(target=run_tidy, + args=(args, clang_tidy_binary, tmpdir, build_path, + task_queue, lock, failed_files)) + t.daemon = True + t.start() + + # Fill the queue with files. + for name in files: + if file_name_re.search(name): + task_queue.put(name) + + # Wait for all threads to be done. + task_queue.join() + if len(failed_files): + return_code = 1 + + except KeyboardInterrupt: + # This is a sad hack. Unfortunately subprocess goes + # bonkers with ctrl-c and we start forking merrily. + print('\nCtrl-C detected, goodbye.') + if tmpdir: + shutil.rmtree(tmpdir) + os.kill(0, 9) + + if yaml and args.export_fixes: + print('Writing fixes to ' + args.export_fixes + ' ...') + try: + merge_replacement_files(tmpdir, args.export_fixes) + except: + print('Error exporting fixes.\n', file=sys.stderr) + traceback.print_exc() + return_code=1 + + if args.fix: + print('Applying fixes ...') + try: + apply_fixes(args, clang_apply_replacements_binary, tmpdir) + except: + print('Error applying fixes.\n', file=sys.stderr) + traceback.print_exc() + return_code = 1 + + if tmpdir: + shutil.rmtree(tmpdir) + sys.exit(return_code) + + +if __name__ == '__main__': + main()