Skip to content

Commit badbe46

Browse files
authored
fix(dipu): avoid infinite recursion in dumpArg() (#752)
* fix(dipu): avoid infinite recursion in `dumpArg()` This happens when DIPU_DUMP_OP_ARGS is set to 3, and the tensor is copied from device to CPU before dumping the tensor. This patch uses a function previously used in auto_compare (renamed to `toCpuTensorWithoutDiopiCopy()`) to avoid the infinite recursion. * style(cpp): clang-format the code
1 parent 115eb8d commit badbe46

File tree

2 files changed

+27
-18
lines changed

2 files changed

+27
-18
lines changed

dipu/torch_dipu/csrc_dipu/aten/ops/AutoCompareUtils.hpp

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,25 @@
11
#pragma once
22

3+
#include <cstddef>
34
#include <iomanip>
5+
#include <ostream>
46
#include <sstream>
57
#include <string>
8+
#include <type_traits>
69

710
#include <ATen/core/TensorBody.h>
811
#include <ATen/ops/abs.h>
912
#include <ATen/ops/allclose.h>
13+
#include <ATen/ops/empty_strided.h>
14+
#include <c10/core/Device.h>
1015
#include <c10/util/ArrayRef.h>
1116
#include <c10/util/Exception.h>
1217

13-
#include "csrc_dipu/aten/ops/DIPUCopy.hpp"
14-
#include "csrc_dipu/runtime/device/deviceapis.h"
18+
#include "csrc_dipu/aten/ops/OpUtils.hpp"
1519

1620
namespace dipu {
1721
namespace native {
1822

19-
inline at::Tensor to_cpu_without_diopi(const at::Tensor& in) {
20-
if (in.is_cpu()) {
21-
return in;
22-
}
23-
24-
at::Tensor out = at::empty_strided(in.sizes(), in.strides(),
25-
in.options().device(c10::Device("cpu")));
26-
if (in.nbytes() > 0) {
27-
dipu::devapis::memCopyD2H(out.storage().nbytes(), out.data_ptr(),
28-
in.data_ptr());
29-
}
30-
return out;
31-
}
32-
3323
inline std::string cpu_tensor_to_one_line_string(const at::Tensor& tensor) {
3424
/*
3525
* This function retrieves the built-in string representation of the input
@@ -91,7 +81,7 @@ inline std::string allclose_autocompare(const at::Tensor& tensor_cpu,
9181
constexpr double tolerance_absolute = 1e-4;
9282
constexpr double tolerance_relative = 1e-5;
9383
const at::Tensor& tensor_cpu_from_device =
94-
to_cpu_without_diopi(tensor_device);
84+
toCpuTensorWithoutDiopiCopy(tensor_device);
9585
bool passed = at::allclose(tensor_cpu, tensor_cpu_from_device,
9686
tolerance_absolute, tolerance_relative, true);
9787
if (passed) {

dipu/torch_dipu/csrc_dipu/aten/ops/OpUtils.hpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <cstddef>
55
#include <cstdint>
66
#include <cstdlib>
7+
#include <cstring>
78
#include <sstream>
89
#include <string>
910
#include <utility>
@@ -16,19 +17,37 @@
1617
#include <ATen/native/cpu/mixed_data_type.h>
1718
#include <ATen/ops/abs.h>
1819
#include <ATen/ops/allclose.h>
20+
#include <ATen/ops/empty_strided.h>
21+
#include <c10/core/Device.h>
1922
#include <c10/core/ScalarType.h>
2023
#include <c10/util/ArrayRef.h>
2124
#include <c10/util/Optional.h>
2225
#include <c10/util/OptionalArrayRef.h>
2326
#include <c10/util/string_view.h>
2427

2528
#include "csrc_dipu/runtime/core/DIPUStream.h"
29+
#include "csrc_dipu/runtime/device/deviceapis.h"
2630
#include "csrc_dipu/runtime/rthelper.h"
2731
#include "csrc_dipu/utils/Log.h"
2832

2933
namespace dipu {
3034
namespace native {
3135

36+
// avoid infinite recursion when dumpArg() before calling diopiCopy()
37+
inline at::Tensor toCpuTensorWithoutDiopiCopy(const at::Tensor& in) {
38+
if (in.is_cpu()) {
39+
return in;
40+
}
41+
42+
at::Tensor out = at::empty_strided(in.sizes(), in.strides(),
43+
in.options().device(c10::Device("cpu")));
44+
if (in.nbytes() > 0) {
45+
dipu::devapis::memCopyD2H(out.storage().nbytes(), out.data_ptr(),
46+
in.data_ptr());
47+
}
48+
return out;
49+
}
50+
3251
inline bool checkTensorDevice() {
3352
static bool enable = []() {
3453
const char* env_ptr = std::getenv("DIPU_CHECK_TENSOR_DEVICE");
@@ -114,7 +133,7 @@ inline std::string dumpArg(const at::Tensor& tensor) {
114133
<< ", storage_data_ptr: " << tensor.storage().data_ptr().get()
115134
<< ", storage_offset: " << tensor.storage_offset();
116135
if (dumpOpArgLevel() > 2) {
117-
stream << '\n' << tensor;
136+
stream << '\n' << toCpuTensorWithoutDiopiCopy(tensor);
118137
}
119138
} else {
120139
stream << "undefined";

0 commit comments

Comments
 (0)