Skip to content

Commit

Permalink
Add new Algorithms using explicit batch type
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelbacci committed Jun 23, 2021
1 parent e845404 commit a2cc837
Show file tree
Hide file tree
Showing 3 changed files with 274 additions and 12 deletions.
40 changes: 40 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,46 @@ void mean(const vector_type& a, const vector_type& b, vector_type& res)
}
```
Algorithms like `xsimd::reduce` and `xsimd::transform` are available also in the batch explicit modality:
```cpp
template <class C, class T = typename std::decay<decltype(*C().begin())>::type>
T nansum(const C& v)
{
return xsimd::reduce_batch(v.begin(), v.end(), 0.0,
[](auto x, auto y) {
return (std::isnan(x) ? 0.0 : x) + (std::isnan(y) ? 0.0 : y);
},
[](auto x, auto y) {
static decltype(x) zero(0.0);
auto xnan = xsimd::isnan(x);
auto ynan = xsimd::isnan(y);
auto xs = xsimd::select(xnan, zero, x);
auto ys = xsimd::select(ynan, zero, y);
return xs + ys;
});
}
```

To switch from `std::count_if` to `xsimd::count_if`:

```cpp
// v is an aligned vector of int type
auto count_expected = std::count_if(v.begin(), v.end(),
[](auto x) {
return x >= 50 && x <= 70 ? 1 : 0;
});
auto count = xsimd::count_if(v.begin(), v.end(),
[](auto x) {
return x >= 50 && x <= 70 ? 1 : 0;
},
[](auto b) {
static decltype(b) zero(0);
static decltype(b) one(1);
return xsimd::hadd(xsimd::select(b >= 50 && b <= 70, one, zero));
});
assert(count_expected == count);
```
## Building and Running the Tests
Expand Down
119 changes: 108 additions & 11 deletions include/xsimd/stl/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@

namespace xsimd
{
template <class I1, class I2, class O1, class UF>
void transform(I1 first, I2 last, O1 out_first, UF&& f)
template <class I1, class I2, class O1, class UF, class UFB>
void transform_batch(I1 first, I2 last, O1 out_first, UF&& f, UFB&& fb)
{
using value_type = typename std::decay<decltype(*first)>::type;
using traits = simd_traits<value_type>;
Expand All @@ -43,7 +43,7 @@ namespace xsimd
for (std::size_t i = align_begin; i < align_end; i += simd_size)
{
xsimd::load_aligned(&first[i], batch);
xsimd::store_aligned(&out_first[i], f(batch));
xsimd::store_aligned(&out_first[i], fb(batch));
}

for (std::size_t i = align_end; i < size; ++i)
Expand All @@ -62,7 +62,7 @@ namespace xsimd
for (std::size_t i = align_begin; i < align_end; i += simd_size)
{
xsimd::load_aligned(&first[i], batch);
xsimd::store_unaligned(&out_first[i], f(batch));
xsimd::store_unaligned(&out_first[i], fb(batch));
}

for (std::size_t i = align_end; i < size; ++i)
Expand All @@ -72,8 +72,14 @@ namespace xsimd
}
}

template <class I1, class I2, class I3, class O1, class UF>
void transform(I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f)
template <class I1, class I2, class O1, class UF>
void transform(I1 first, I2 last, O1 out_first, UF&& f)
{
transform_batch(first, last, out_first, f, f);
}

template <class I1, class I2, class I3, class O1, class UF, class UFB>
void transform_batch(I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f, UFB&& fb)
{
using value_type = typename std::decay<decltype(*first_1)>::type;
using traits = simd_traits<value_type>;
Expand Down Expand Up @@ -102,7 +108,7 @@ namespace xsimd
{ \
xsimd::A1(&first_1[i], batch_1); \
xsimd::A2(&first_2[i], batch_2); \
xsimd::A3(&out_first[i], f(batch_1, batch_2)); \
xsimd::A3(&out_first[i], fb(batch_1, batch_2)); \
} \
\
for (std::size_t i = align_end; i < size; ++i) \
Expand Down Expand Up @@ -130,6 +136,11 @@ namespace xsimd
#undef XSIMD_LOOP_MACRO
}

template <class I1, class I2, class I3, class O1, class UF>
void transform(I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f)
{
transform_batch(first_1, last_1, first_2, out_first, f, f);
}

// TODO: Remove this once we drop C++11 support
namespace detail
Expand All @@ -141,9 +152,8 @@ namespace xsimd
};
}


template <class Iterator1, class Iterator2, class Init, class BinaryFunction = detail::plus>
Init reduce(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus{})
template <class Iterator1, class Iterator2, class Init, class BinaryFunction, class BinaryFunctionBatch>
Init reduce_batch(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun, BinaryFunctionBatch&& binfun_batch)
{
using value_type = typename std::decay<decltype(*first)>::type;
using traits = simd_traits<value_type>;
Expand Down Expand Up @@ -180,7 +190,7 @@ namespace xsimd
for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
{
xsimd::load_aligned(ptr, batch);
batch_init = binfun(batch_init, batch);
batch_init = binfun_batch(batch_init, batch);
}

// reduce across batch
Expand All @@ -197,6 +207,93 @@ namespace xsimd
return init;
}

template <class Iterator1, class Iterator2, class Init, class BinaryFunction = detail::plus>
Init reduce(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus{})
{
return reduce_batch(first, last, init, binfun, binfun);
}

namespace detail
{
template <class T>
struct count_batch
{
count_batch(T value)
: value(value)
{}

count_batch(const count_batch<T>&) = default;
count_batch(count_batch<T>&&) = default;

template <class B>
std::size_t operator()(const B& b)
{
static auto zero = B(T(0));
static auto one = B(T(1));
return static_cast<std::size_t>(xsimd::hadd(xsimd::select(b == value, one, zero)));
}

private:
T value;
};
}

template <class Iterator1, class Iterator2, class UnaryPredicate, class UnaryPredicateBatch>
std::size_t count_if(Iterator1 first, Iterator2 last, UnaryPredicate&& predicate, UnaryPredicateBatch&& predicate_batch)
{
using value_type = typename std::decay<decltype(*first)>::type;
using traits = simd_traits<value_type>;
using batch_type = typename traits::type;

std::size_t size = static_cast<std::size_t>(std::distance(first, last));
constexpr std::size_t simd_size = traits::size;

std::size_t counter(0);
if(size < simd_size)
{
while(first != last)
{
counter += predicate(*first++);
}
return counter;
}

const auto* const ptr_begin = &(*first);

std::size_t align_begin = xsimd::get_alignment_offset(ptr_begin, size, simd_size);
std::size_t align_end = align_begin + ((size - align_begin) & ~(simd_size - 1));

// reduce initial unaligned part
for (std::size_t i = 0; i < align_begin; ++i)
{
counter += predicate(first[i]);
}

// reduce aligned part
batch_type batch;
auto ptr = ptr_begin + align_begin;
for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
{
xsimd::load_aligned(ptr, batch);
counter += predicate_batch(batch);
}

// reduce final unaligned part
for (std::size_t i = align_end; i < size; ++i)
{
counter += predicate(first[i]);
}

return counter;
}

template <class Iterator1, class Iterator2, class T>
std::size_t count(Iterator1 first, Iterator2 last, const T& value)
{
return count_if(first, last,
[&value](const T& x) { return value == x; }, detail::count_batch<T>{value});
}

}

#endif
127 changes: 126 additions & 1 deletion test/test_algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,62 @@ template <class T>
using test_allocator_type = std::allocator<T>;
#endif

template <class C>
struct types {
using value_type = typename std::decay<decltype(*C().begin())>::type;
using traits = xsimd::simd_traits<value_type>;
using batch_type = typename traits::type;
};

TEST(algorithms, unary_transform_batch)
{
using vector_type = std::vector<int, test_allocator_type<int>>;
using batch_type = types<vector_type>::batch_type;
auto flip_flop = vector_type(42, 0);
std::iota(flip_flop.begin(), flip_flop.end(), 1);
auto square_pair = [](int x) {
return !(x % 2) ? x : x*x;
};
auto flip_flop_axpected = flip_flop;
std::transform(flip_flop_axpected.begin(), flip_flop_axpected.end(), flip_flop_axpected.begin(), square_pair);

xsimd::transform_batch(flip_flop.begin(), flip_flop.end(), flip_flop.begin(),
// NOTE: since c++14 a simple `[](auto x)` reduce code complexity
[](int x) {
return !(x % 2) ? x : x*x;
},
// NOTE: since c++14 a simple `[](auto b)` reduce code complexity
[](batch_type b) {
return xsimd::select(!(b % 2), b, b*b);
});
EXPECT_TRUE(std::equal(flip_flop_axpected.begin(), flip_flop_axpected.end(), flip_flop.begin()) && flip_flop_axpected.size() == flip_flop.size());
}

TEST(algorithms, binary_transform_batch)
{
using vector_type = std::vector<int, test_allocator_type<int>>;
using batch_type = types<vector_type>::batch_type;
auto flip_flop_a = vector_type(42, 0);
auto flip_flop_b = vector_type(42, 0);
std::iota(flip_flop_a.begin(), flip_flop_a.end(), 1);
std::iota(flip_flop_b.begin(), flip_flop_b.end(), 3);
auto square_pair = [](int x, int y) {
return !((x + y) % 2) ? x + y : x*x + y*y;
};
auto flip_flop_axpected = flip_flop_a;
std::transform(flip_flop_a.begin(), flip_flop_a.end(), flip_flop_b.begin(), flip_flop_axpected.begin(), square_pair);

auto flip_flop_result = vector_type(flip_flop_axpected.size());
xsimd::transform_batch(flip_flop_a.begin(), flip_flop_a.end(), flip_flop_b.begin(), flip_flop_result.begin(),
[](int x, int y) {
return !((x +y) % 2) ? x + y : x*x + y*y;
},
[](batch_type bx, batch_type by) {
return xsimd::select(!((bx + by) % 2), bx + by, bx*bx + by+by);
});
EXPECT_TRUE(std::equal(flip_flop_axpected.begin(), flip_flop_axpected.end(), flip_flop_result.begin()) && flip_flop_axpected.size() == flip_flop_result.size());
}

TEST(algorithms, binary_transform)
{
std::vector<double> expected(93);
Expand Down Expand Up @@ -83,7 +139,6 @@ TEST(algorithms, binary_transform)
std::fill(ca.begin(), ca.end(), -1); // erase
}


TEST(algorithms, unary_transform)
{
std::vector<double> expected(93);
Expand Down Expand Up @@ -216,6 +271,76 @@ TEST_F(xsimd_reduce, using_custom_binary_function)
}
}

TEST(algorithms, reduce_batch)
{
const double nan = std::numeric_limits<double>::quiet_NaN();
using vector_type = std::vector<double, test_allocator_type<double>>;
using batch_type = types<vector_type>::batch_type;
auto vector_with_nan = vector_type(100, 0);
std::iota(vector_with_nan.begin(), vector_with_nan.end(), 3.14);
auto i = 0;
auto add_nan = [&i, &nan](const double x) {
return i % 2 ? nan : x;
};
std::transform(vector_with_nan.begin(), vector_with_nan.end(), vector_with_nan.begin(), add_nan);

auto nansum_expected = std::accumulate(vector_with_nan.begin(), vector_with_nan.end(), 0.0,
[](double x, double y) {
return (std::isnan(x) ? 0.0 : x) + (std::isnan(y) ? 0.0 : y);
});

auto nansum = xsimd::reduce_batch(vector_with_nan.begin(), vector_with_nan.end(), 0.0,
[](double x, double y) {
return (std::isnan(x) ? 0.0 : x) + (std::isnan(y) ? 0.0 : y);
},
[](batch_type x, batch_type y) {
static batch_type zero(0.0);
auto xnan = xsimd::isnan(x);
auto ynan = xsimd::isnan(y);
auto xs = xsimd::select(xnan, zero, x);
auto ys = xsimd::select(ynan, zero, y);
return xs + ys;
});

EXPECT_NEAR(nansum_expected, nansum, 1e-6);
}

TEST(algorithms, count)
{
using vector_type = std::vector<double, test_allocator_type<double>>;
auto v = vector_type(100, 0);
std::iota(v.begin(), v.end(), 3.14);
v[12] = 123.321;
v[42] = 123.321;
v[93] = 123.321;

EXPECT_EQ(3, xsimd::count(v.begin(), v.end(), 123.321));
}

TEST(algorithms, count_if)
{
using vector_type = std::vector<int, test_allocator_type<int>>;
using batch_type = types<vector_type>::batch_type;
auto v = vector_type(100, 0);
std::iota(v.begin(), v.end(), 1);

auto count_expected = std::count_if(v.begin(), v.end(),
[](int x) {
return x >= 50 && x <= 70 ? 1 : 0;
});

auto count = xsimd::count_if(v.begin(), v.end(),
[](int x) {
return x >= 50 && x <= 70 ? 1 : 0;
},
[](batch_type b) {
static batch_type zero(0);
static batch_type one(1);
return xsimd::hadd(xsimd::select(b >= 50 && b <= 70, one, zero));
});
EXPECT_EQ(count_expected, count);
}

#if XSIMD_X86_INSTR_SET > XSIMD_VERSION_NUMBER_NOT_AVAILABLE || XSIMD_ARM_INSTR_SET > XSIMD_VERSION_NUMBER_NOT_AVAILABLE
TEST(algorithms, iterator)
{
Expand Down

0 comments on commit a2cc837

Please sign in to comment.