Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ add_benchmark(find_and_count src/find_and_count.cpp)
add_benchmark(find_first_of src/find_first_of.cpp)
add_benchmark(has_single_bit src/has_single_bit.cpp)
add_benchmark(includes src/includes.cpp)
add_benchmark(integer_to_string src/integer_to_string.cpp)
add_benchmark(iota src/iota.cpp)
add_benchmark(is_sorted_until src/is_sorted_until.cpp)
add_benchmark(locale_classic src/locale_classic.cpp)
Expand Down
84 changes: 84 additions & 0 deletions benchmarks/src/integer_to_string.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <algorithm>
#include <array>
#include <benchmark/benchmark.h>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <random>
#include <type_traits>

using namespace std;

template <class T, double M, double S>
auto generate_array() {
array<T, 2000> a;

mt19937 gen;
lognormal_distribution<double> dis(M, S);
ranges::generate(a, [&] { return static_cast<T>(dis(gen)); });

if constexpr (is_signed_v<T>) {
bernoulli_distribution b(0.5);
ranges::for_each(a, [&](T& v) { v *= (b(gen) ? -1 : 1); });
}

return a;
}

template <class T, double M, double S>
void internal_integer_to_buff(benchmark::State& state) {
auto a = generate_array<T, M, S>();

char buff[24];

auto it = a.begin();
for (auto _ : state) {
auto i = *it;
benchmark::DoNotOptimize(i);
auto s = std::_UIntegral_to_buff(buff, i);
benchmark::DoNotOptimize(s);

++it;
if (it == a.end()) {
it = a.begin();
}
}
}

template <class T, double M, double S>
void integer_to_string(benchmark::State& state) {
auto a = generate_array<T, M, S>();

auto it = a.begin();
for (auto _ : state) {
auto i = *it;
benchmark::DoNotOptimize(i);
auto s = to_string(i);
benchmark::DoNotOptimize(s);

++it;
if (it == a.end()) {
it = a.begin();
}
}
}

BENCHMARK(internal_integer_to_buff<uint8_t, 2.5, 1.5>);
BENCHMARK(internal_integer_to_buff<uint16_t, 5.0, 3.0>);
BENCHMARK(internal_integer_to_buff<uint32_t, 10.0, 6.0>);
BENCHMARK(internal_integer_to_buff<uint64_t, 20.0, 12.0>);

BENCHMARK(integer_to_string<uint8_t, 2.5, 1.5>);
BENCHMARK(integer_to_string<uint16_t, 5.0, 3.0>);
BENCHMARK(integer_to_string<uint32_t, 10.0, 6.0>);
BENCHMARK(integer_to_string<uint64_t, 20.0, 12.0>);

BENCHMARK(integer_to_string<int8_t, 2.5, 1.5>);
BENCHMARK(integer_to_string<int16_t, 5.0, 3.0>);
BENCHMARK(integer_to_string<int32_t, 10.0, 6.0>);
BENCHMARK(integer_to_string<int64_t, 20.0, 12.0>);

BENCHMARK_MAIN();
28 changes: 16 additions & 12 deletions stl/inc/xcharconv_ryu_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,6 @@
#include <yvals_core.h>
#if _STL_COMPILER_PREPROCESSOR

#if !_HAS_CXX17
#error The contents of <charconv> are only available with C++17. (Also, you should not include this internal header.)
#endif // !_HAS_CXX17

#include <cstdint>

#pragma pack(push, _CRT_PACKING)
Expand All @@ -60,11 +56,15 @@ _STD_BEGIN

// vvvvvvvvvv DERIVED FROM digit_table.h vvvvvvvvvv

#if !_HAS_CXX17
namespace { // work around Clang link issues
#endif // !_HAS_CXX17

// A table of all two-digit numbers. This is used to speed up decimal digit
// generation by copying pairs of digits into the final output.
template <class _CharT> constexpr _CharT __DIGIT_TABLE[] = {_CharT{}};

template <> inline constexpr char __DIGIT_TABLE<char>[200] = {
template <> _INLINE_VAR constexpr char __DIGIT_TABLE<char>[200] = {
'0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9',
'1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9',
'2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9',
Expand All @@ -77,7 +77,7 @@ template <> inline constexpr char __DIGIT_TABLE<char>[200] = {
'9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9'
};

template <> inline constexpr wchar_t __DIGIT_TABLE<wchar_t>[200] = {
template <> _INLINE_VAR constexpr wchar_t __DIGIT_TABLE<wchar_t>[200] = {
L'0',L'0',L'0',L'1',L'0',L'2',L'0',L'3',L'0',L'4',L'0',L'5',L'0',L'6',L'0',L'7',L'0',L'8',L'0',L'9',
L'1',L'0',L'1',L'1',L'1',L'2',L'1',L'3',L'1',L'4',L'1',L'5',L'1',L'6',L'1',L'7',L'1',L'8',L'1',L'9',
L'2',L'0',L'2',L'1',L'2',L'2',L'2',L'3',L'2',L'4',L'2',L'5',L'2',L'6',L'2',L'7',L'2',L'8',L'2',L'9',
Expand All @@ -90,6 +90,10 @@ template <> inline constexpr wchar_t __DIGIT_TABLE<wchar_t>[200] = {
L'9',L'0',L'9',L'1',L'9',L'2',L'9',L'3',L'9',L'4',L'9',L'5',L'9',L'6',L'9',L'7',L'9',L'8',L'9',L'9'
};

#if !_HAS_CXX17
} // unnamed namespace
#endif // !_HAS_CXX17

// ^^^^^^^^^^ DERIVED FROM digit_table.h ^^^^^^^^^^

// vvvvvvvvvv DERIVED FROM d2s_full_table.h vvvvvvvvvv
Expand All @@ -103,9 +107,9 @@ extern const uint64_t __DOUBLE_POW5_SPLIT[326][2];

// vvvvvvvvvv DERIVED FROM d2fixed_full_table.h vvvvvvvvvv

inline constexpr int __TABLE_SIZE = 64;
_INLINE_VAR constexpr int __TABLE_SIZE = 64;

inline constexpr uint16_t __POW10_OFFSET[__TABLE_SIZE] = {
_INLINE_VAR constexpr uint16_t __POW10_OFFSET[__TABLE_SIZE] = {
0, 2, 5, 8, 12, 16, 21, 26, 32, 39,
46, 54, 62, 71, 80, 90, 100, 111, 122, 134,
146, 159, 173, 187, 202, 217, 233, 249, 266, 283,
Expand All @@ -117,10 +121,10 @@ inline constexpr uint16_t __POW10_OFFSET[__TABLE_SIZE] = {

extern const uint64_t __POW10_SPLIT[1224][3];

inline constexpr int __TABLE_SIZE_2 = 69;
inline constexpr int __ADDITIONAL_BITS_2 = 120;
_INLINE_VAR constexpr int __TABLE_SIZE_2 = 69;
_INLINE_VAR constexpr int __ADDITIONAL_BITS_2 = 120;

inline constexpr uint16_t __POW10_OFFSET_2[__TABLE_SIZE_2] = {
_INLINE_VAR constexpr uint16_t __POW10_OFFSET_2[__TABLE_SIZE_2] = {
0, 2, 6, 12, 20, 29, 40, 52, 66, 80,
95, 112, 130, 150, 170, 192, 215, 240, 265, 292,
320, 350, 381, 413, 446, 480, 516, 552, 590, 629,
Expand All @@ -130,7 +134,7 @@ inline constexpr uint16_t __POW10_OFFSET_2[__TABLE_SIZE_2] = {
2465, 2544, 2625, 2706, 2789, 2873, 2959, 3046, 3133
};

inline constexpr uint8_t __MIN_BLOCK_2[__TABLE_SIZE_2] = {
_INLINE_VAR constexpr uint8_t __MIN_BLOCK_2[__TABLE_SIZE_2] = {
0, 0, 0, 0, 0, 0, 1, 1, 2, 3,
3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
8, 9, 9, 10, 11, 11, 12, 12, 13, 13,
Expand Down
10 changes: 10 additions & 0 deletions stl/inc/xmemory
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <limits> // TRANSITION, see GH-4634: Lots of user code assumes that <xmemory> drags in <limits>
#include <new>
#include <xatomic.h>
#include <xcharconv_ryu_tables.h>
#include <xutility>

#if _HAS_CXX20
Expand Down Expand Up @@ -2782,6 +2783,15 @@ _NODISCARD _Elem* _UIntegral_to_buff(_Elem* _RNext, _UTy _UVal) { // used by bot
auto _UVal_trunc = static_cast<unsigned long>(_UVal);
#endif // ^^^ !defined(_WIN64) ^^^

if constexpr (_Is_any_of_v<_Elem, char, wchar_t>) {
while (_UVal_trunc >= 100) {
const unsigned long _UVal_trunc_part = _UVal_trunc % 100;
_UVal_trunc /= 100;
*--_RNext = static_cast<_Elem>(__DIGIT_TABLE<_Elem>[_UVal_trunc_part * 2 + 1]);
*--_RNext = static_cast<_Elem>(__DIGIT_TABLE<_Elem>[_UVal_trunc_part * 2]);
Comment on lines +2790 to +2791
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be further optimized to

            _RNext -= 2;
            _CSTD memcpy(_RNext, __DIGIT_TABLE<_Elem> + _UVal_trunc_part * 2, 2 * sizeof(_Elem))

still without much better results in the benchmark.

}
}

do {
*--_RNext = static_cast<_Elem>('0' + _UVal_trunc % 10);
_UVal_trunc /= 10;
Expand Down