Skip to content

Commit f3d26d6

Browse files
authored
dev-0.6.0 (#78)
* INSTALL * make ttl::range less generic * ttl::copy * --with-cuda * add .size() and .dims() to tensor types (#79) * add size method to tensor * dims() * deprecate from_host, to_host (#80) * support customize install prefix
1 parent c0c6ffb commit f3d26d6

13 files changed

+150
-52
lines changed

CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ OPTION(BUILD_EXAMPLES "Build examples." OFF)
2727
OPTION(HAVE_CUDA "Have cuda_runtime.h." OFF)
2828

2929
IF(HAVE_CUDA)
30-
# noop
30+
INCLUDE_DIRECTORIES(${CUDA_HOME}/include)
31+
LINK_DIRECTORIES(${CUDA_HOME}/lib64)
3132
ELSE()
3233
ADD_DEFINITIONS(-DUSE_FAKE_CUDA_RUNTIME)
3334
ENDIF()

INSTALL

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/sh
2+
set -e
3+
4+
if [ -z $PREFIX ]; then
5+
PREFIX=$HOME/local
6+
fi
7+
8+
./configure --prefix=$PREFIX
9+
10+
make install

configure

+14-5
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
set -e
33

44
PREFIX=$(pwd)/local
5+
CUDA_HOME=/usr/local/cuda
56
USE_OPENCV=0
67
BUILD_TESTS=0
78
BUILD_BENCHMARKS=0
@@ -11,10 +12,6 @@ BUILD_GBENCH=0
1112
HAVE_CUDA=0
1213
VERBOSE=0
1314

14-
if [ $(find /usr/include/cuda_runtime.h | wc -l) -gt 0 ]; then
15-
HAVE_CUDA=1
16-
fi
17-
1815
parse_args() {
1916
for i in "$@"; do
2017
case $i in
@@ -48,6 +45,10 @@ parse_args() {
4845
--build-gbench)
4946
BUILD_GBENCH=1
5047
;;
48+
--with-cuda=*)
49+
CUDA_HOME="${i#*=}"
50+
echo "configure --with-cuda=$CUDA_HOME"
51+
;;
5152
--verbose)
5253
VERBOSE=1
5354
;;
@@ -57,6 +58,10 @@ parse_args() {
5758
;;
5859
esac
5960
done
61+
62+
if [ -f $CUDA_HOME/include/cuda_runtime.h ]; then
63+
HAVE_CUDA=1
64+
fi
6065
}
6166

6267
CMAKE_FLAGS=
@@ -96,7 +101,11 @@ add_cmake_flags() {
96101
add_cmake_flag BUILD_TESTS ${BUILD_TESTS}
97102
add_cmake_flag BUILD_BENCHMARKS ${BUILD_BENCHMARKS}
98103
add_cmake_flag BUILD_EXAMPLES ${BUILD_EXAMPLES}
99-
add_cmake_flag HAVE_CUDA ${HAVE_CUDA}
104+
105+
if [ ${HAVE_CUDA} -eq 1 ]; then
106+
add_cmake_flag HAVE_CUDA ${HAVE_CUDA}
107+
add_cmake_flag CUDA_HOME $CUDA_HOME
108+
fi
100109

101110
if [ ${BUILD_EXAMPLES} -eq 1 ]; then
102111
add_cmake_flag USE_OPENCV ${USE_OPENCV}

include/ttl/bits/std_copy.hpp

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#pragma once
2+
#include <ttl/bits/std_cuda_allocator.hpp>
3+
#include <ttl/bits/std_tensor.hpp>
4+
5+
namespace ttl
6+
{
7+
namespace internal
8+
{
9+
namespace experimental
10+
{
11+
template <typename R, typename S>
12+
void copy(const basic_tensor<R, S, host_memory, readwrite> &dst,
13+
const basic_tensor<R, S, cuda_memory, readonly> &src)
14+
{
15+
using copier = internal::cuda_copier;
16+
copier::copy<copier::d2h>(dst.data(), src.data(), src.data_size());
17+
}
18+
19+
template <typename R, typename S>
20+
void copy(const basic_tensor<R, S, cuda_memory, readwrite> &dst,
21+
const basic_tensor<R, S, host_memory, readonly> &src)
22+
{
23+
using copier = internal::cuda_copier;
24+
copier::copy<copier::h2d>(dst.data(), src.data(), src.data_size());
25+
}
26+
} // namespace experimental
27+
} // namespace internal
28+
} // namespace ttl

include/ttl/bits/std_range.hpp

-1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,5 @@ basic_integer_range<N> range(N m, N n)
4545
{
4646
return basic_integer_range<N>(m, n);
4747
}
48-
4948
} // namespace internal
5049
} // namespace ttl

include/ttl/bits/std_tensor_mixin.hpp

+10-20
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ class basic_scalar_mixin
1515
using data_ref = typename trait::ref_type;
1616
using data_t = typename trait::Data;
1717

18+
using Dim = typename S::dimension_type;
19+
1820
data_t data_;
1921

2022
protected:
@@ -33,23 +35,17 @@ class basic_scalar_mixin
3335

3436
basic_scalar_mixin(data_ptr data, const S &) : data_(data) {}
3537

38+
constexpr Dim size() const { return 1; }
39+
40+
constexpr auto dims() const { return S().dims(); }
41+
3642
constexpr size_t data_size() const { return sizeof(R); }
3743

3844
data_ptr data() const { return data_.get(); }
3945

4046
data_ptr data_end() const { return data_.get() + 1; }
4147

4248
S shape() const { return S(); }
43-
44-
void from_host(const void *data) const
45-
{
46-
basic_copier<D, host_memory>()(data_.get(), data, data_size());
47-
}
48-
49-
void to_host(void *data) const
50-
{
51-
basic_copier<host_memory, D>()(data, data_.get(), data_size());
52-
}
5349
};
5450

5551
template <typename R, typename S, typename D, typename A>
@@ -121,6 +117,10 @@ class basic_tensor_mixin
121117

122118
static constexpr auto rank = S::rank;
123119

120+
Dim size() const { return shape_.size(); }
121+
122+
const auto &dims() const { return shape_.dims(); }
123+
124124
size_t data_size() const { return shape_.size() * sizeof(R); }
125125

126126
const S &shape() const { return shape_; }
@@ -158,16 +158,6 @@ class basic_tensor_mixin
158158
return slice_type(data_.get() + i * sub_shape.size(),
159159
batch(j - i, sub_shape));
160160
}
161-
162-
void from_host(const void *data) const
163-
{
164-
basic_copier<D, host_memory>()(data_.get(), data, data_size());
165-
}
166-
167-
void to_host(void *data) const
168-
{
169-
basic_copier<host_memory, D>()(data, data_.get(), data_size());
170-
}
171161
};
172162
} // namespace internal
173163
} // namespace ttl

include/ttl/experimental/copy

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// # -*- mode: c++ -*-
2+
#pragma once
3+
#include <ttl/bits/std_copy.hpp>
4+
5+
namespace ttl
6+
{
7+
using internal::experimental::copy;
8+
} // namespace ttl

include/ttl/range

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,17 @@
44
#include <cstdint>
55

66
#include <ttl/bits/std_range.hpp>
7+
#include <ttl/bits/std_tensor_fwd.hpp>
78

89
namespace ttl
910
{
1011
using internal::range;
1112

1213
using rank_t = uint8_t;
1314

14-
// FIXME: make T less generic
15-
template <rank_t r, typename T> auto range(const T &t)
15+
template <rank_t r, typename R, typename S, typename D, typename A>
16+
auto range(const internal::basic_tensor<R, S, D, A> &t)
1617
{
1718
return range(std::get<r>(t.shape().dims()));
1819
}
19-
2020
} // namespace ttl

tests/bench_cuda_tensor.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
#include "benchmark.hpp"
22

33
#include <ttl/cuda_tensor>
4+
#include <ttl/experimental/copy>
45

5-
template <typename R, int n> struct bench_cuda_tensor {
6+
template <typename R, int n>
7+
struct bench_cuda_tensor {
68
static void run(benchmark::State &state)
79
{
810
ttl::cuda_tensor<R, 1> m1(n);
911
ttl::tensor<R, 1> m2(n);
1012

1113
for (auto _ : state) {
12-
m1.from_host(m2.data());
13-
m1.to_host(m2.data());
14+
ttl::copy(ttl::ref(m1), ttl::view(m2));
15+
ttl::copy(ttl::ref(m2), ttl::view(m1));
1416
}
1517
}
1618
};

tests/test_copy.cpp

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#include "testing.hpp"
2+
3+
#include <ttl/algorithm>
4+
#include <ttl/cuda_tensor>
5+
#include <ttl/device>
6+
#include <ttl/experimental/copy>
7+
#include <ttl/range>
8+
#include <ttl/tensor>
9+
10+
void test_copy(int n)
11+
{
12+
ttl::tensor<int, 1> x_host(n);
13+
ttl::cuda_tensor<int, 1> x_cuda(n);
14+
15+
ttl::fill(ttl::ref(x_host), 1);
16+
ttl::copy(ttl::ref(x_cuda), ttl::view(x_host));
17+
18+
ttl::fill(ttl::ref(x_host), 2);
19+
for (auto i : ttl::range<0>(x_host)) { ASSERT_EQ(x_host.data()[i], 2); }
20+
21+
ttl::copy(ttl::ref(x_host), ttl::view(x_cuda));
22+
for (auto i : ttl::range<0>(x_host)) { ASSERT_EQ(x_host.data()[i], 1); }
23+
}
24+
25+
TEST(copy_test, test_copy)
26+
{
27+
test_copy(1);
28+
test_copy(2);
29+
test_copy(10);
30+
test_copy(100);
31+
test_copy(1000);
32+
test_copy(1 << 20);
33+
test_copy(1 << 20);
34+
test_copy(1 << 20);
35+
}

tests/test_cuda_tensor.cpp

+17-13
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "testing.hpp"
22

33
#include <ttl/cuda_tensor>
4+
#include <ttl/experimental/copy>
45
#include <ttl/range>
56
#include <ttl/tensor>
67

@@ -23,11 +24,10 @@ TEST(cuda_tensor_test, test0)
2324
{
2425
using R = float;
2526
cuda_tensor<R, 0> m0;
26-
2727
tensor<R, 0> x;
2828

29-
m0.from_host(x.data());
30-
m0.to_host(x.data());
29+
ttl::copy(ttl::ref(m0), ttl::view(x));
30+
ttl::copy(ttl::ref(x), ttl::view(m0));
3131
}
3232

3333
TEST(cuda_tensor_test, test1)
@@ -42,8 +42,8 @@ TEST(cuda_tensor_test, test2)
4242
cuda_tensor<R, 2> m1(10, 100);
4343
tensor<R, 2> m2(10, 100);
4444

45-
m1.from_host(m2.data());
46-
m1.to_host(m2.data());
45+
ttl::copy(ttl::ref(m1), ttl::view(m2));
46+
ttl::copy(ttl::ref(m2), ttl::view(m1));
4747

4848
m1.slice(1, 2);
4949
auto r = ref(m1);
@@ -58,14 +58,16 @@ TEST(cuda_tensor_test, test_3)
5858
cuda_tensor<R, 2> m1(ttl::make_shape(10, 100));
5959
}
6060

61-
template <typename R, uint8_t r> void test_auto_ref()
61+
template <typename R, uint8_t r>
62+
void test_auto_ref()
6263
{
6364
static_assert(
6465
std::is_convertible<cuda_tensor<R, r>, cuda_tensor_ref<R, r>>::value,
6566
"can't convert to ref");
6667
}
6768

68-
template <typename R, uint8_t r> void test_auto_view()
69+
template <typename R, uint8_t r>
70+
void test_auto_view()
6971
{
7072
static_assert(
7173
std::is_convertible<cuda_tensor<R, r>, cuda_tensor_view<R, r>>::value,
@@ -87,28 +89,30 @@ TEST(cuda_tensor_test, test_convert)
8789
test_auto_view<int, 2>();
8890
}
8991

90-
template <typename R, uint8_t r> void test_copy(const ttl::shape<r> &shape)
92+
template <typename R, uint8_t r>
93+
void test_copy(const ttl::shape<r> &shape)
9194
{
9295
tensor<R, r> x(shape);
9396
cuda_tensor<R, r> y(shape);
9497
tensor<R, r> z(shape);
9598

9699
std::iota(x.data(), x.data_end(), 1);
97-
y.from_host(x.data());
98-
y.to_host(z.data());
100+
101+
ttl::copy(ttl::ref(y), ttl::view(x));
102+
ttl::copy(ttl::ref(z), ttl::view(y));
99103

100104
for (auto i : ttl::range(shape.size())) {
101105
ASSERT_EQ(x.data()[i], z.data()[i]);
102106
}
103107

104108
{
105109
cuda_tensor_ref<R, r> ry = ref(y);
106-
ry.from_host(x.data());
107-
ry.to_host(x.data());
110+
ttl::copy(ry, ttl::view(x));
111+
ttl::copy(ttl::ref(z), ttl::view(ry));
108112
}
109113
{
110114
cuda_tensor_view<R, r> vy = view(y);
111-
vy.to_host(x.data());
115+
ttl::copy(ttl::ref(x), vy);
112116
}
113117
}
114118

tests/test_public_types.cpp

+14
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,16 @@ ttl::shape<r> unit_shape()
4545
return ttl::shape<r>(dims);
4646
}
4747

48+
template <typename T>
49+
void test_public_apis(const T &t)
50+
{
51+
const auto size = t.size();
52+
ASSERT_EQ(size, static_cast<decltype(size)>(1));
53+
54+
const auto dims = t.dims();
55+
static_assert(dims.size() == T::rank, "");
56+
}
57+
4858
template <ttl::rank_t r>
4959
struct test_ranked_type {
5060
template <typename R>
@@ -65,6 +75,10 @@ struct test_ranked_type {
6575
Tensor t(unit_shape<r>());
6676
TensorRef tr(t);
6777
TensorView tv(t);
78+
79+
test_public_apis(t);
80+
test_public_apis(tr);
81+
test_public_apis(tv);
6882
}
6983
};
7084

0 commit comments

Comments
 (0)