diff --git a/Makefile b/Makefile index 51f7af7..9a1b756 100644 --- a/Makefile +++ b/Makefile @@ -11,10 +11,15 @@ all : bin/itoa_release_x64_gmake cd bin && ./itoa_release_x64_gmake $(ARGS) cd result && make -f makefile +debug: CONFIG = debug_x64 +debug: bin/itoa_debug_x64_gmake + cd bin && ./itoa_debug_x64_gmake $(ARGS) + cd result && make -f makefile + bin/itoa_%_gmake : build/gmake/itoa.make cd build/gmake && make -f itoa.make config=$(CONFIG) verbose=$(VERBOSE) -clean : +clean : rm -rf build/gmake rm -rf build/vs2005 rm -rf build/vs2008 @@ -27,8 +32,7 @@ clean : setup : cd build && ./premake.sh && ./machine.sh - -build/gmake/itoa.make : setup +build/gmake/itoa.make : setup clean_status : @echo "Filesystem status according to GIT" diff --git a/build/premake5.lua b/build/premake5.lua index d6a4832..3f003bf 100644 --- a/build/premake5.lua +++ b/build/premake5.lua @@ -5,7 +5,7 @@ function setTargetObjDir(outDir) end solution "benchmark" - configurations { "release" } + configurations { "debug", "release" } platforms { "x32", "x64" } location ("./" .. (_ACTION or "")) @@ -22,6 +22,9 @@ solution "benchmark" defines { "NDEBUG" } flags { "Optimize" } + configuration "debug" + flags { "Symbols" } + configuration "vs*" defines { "_CRT_SECURE_NO_WARNINGS" } diff --git a/src/Kabuki_Toolkit.cpp b/src/Kabuki_Toolkit.cpp new file mode 100644 index 0000000..fd093fd --- /dev/null +++ b/src/Kabuki_Toolkit.cpp @@ -0,0 +1,367 @@ +// source https://stackoverflow.com/a/48779770/21617688 + +/** Kabuki Toolkit + @version 0.x + @file ~/source/crabs/print_itos.cc + @author Cale McCollough + @license Copyright (C) 2017-2018 Cale McCollough ; + All right reserved (R). Licensed under the Apache License, Version + 2.0 (the "License"); you may not use this file except in + compliance with the License. You may obtain a copy of the License + [here](http://www.apache.org/licenses/LICENSE-2.0). Unless + required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied. See the License for the specific language governing + permissions and limitations under the License. +*/ + +#define MAJOR_SEAM 1 +#define MINOR_SEAM 2 + +#include +#include +#include +// #include +// #include +// #include "print_itos.h" + +#if MAJOR_SEAM >= 1 && MINOR_SEAM >= 1 + +#if MAJOR_SEAM == 1 && MINOR_SEAM == 1 +#define DEBUG 1 + +#define PRINTF(format, ...) printf(format, __VA_ARGS__); +#define PUTCHAR(c) putchar(c); +#define PRINT_PRINTED\ + sprintf_s (buffer, 24, "%u", value); *text_end = 0;\ + printf ("\n Printed \"%s\" leaving value:\"%s\":%u",\ + begin, buffer, (uint)strlen (buffer)); +#define PRINT_BINARY PrintBinary (value); +#define PRINT_BINARY_TABLE PrintBinaryTable (value); +#else +#define PRINTF(x, ...) +#define PUTCHAR(c) +#define PRINT_PRINTED +#define PRINT_BINARY +#define PRINT_BINARY_TABLE +#endif + +namespace kabuki_toolkit { + +void PrintLine (char c) { + std::cout << '\n'; + for (int i = 80; i > 0; --i) + std::cout << c; +} + +char* Print (uint32_t value, char* text, char* text_end) { + + // Lookup table for powers of 10. + static const uint32_t k10ToThe[]{ + 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, + 1000000000, ~(uint32_t)0 }; + + /** Lookup table of ASCII char pairs for 00, 01, ..., 99. + To convert this algorithm to big-endian, flip the digit pair bytes. */ + static const uint16_t kDigits00To99[100] = { + 0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, + 0x3930, 0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, + 0x3831, 0x3931, 0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, + 0x3732, 0x3832, 0x3932, 0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, + 0x3633, 0x3733, 0x3833, 0x3933, 0x3034, 0x3134, 0x3234, 0x3334, 0x3434, + 0x3534, 0x3634, 0x3734, 0x3834, 0x3934, 0x3035, 0x3135, 0x3235, 0x3335, + 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935, 0x3036, 0x3136, 0x3236, + 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936, 0x3037, 0x3137, + 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937, 0x3038, + 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938, + 0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, + 0x3939, }; + + static const char kMsbShift[] = { 4, 7, 11, 14, 17, 21, 24, 27, 30, }; + + if (!text) { + return nullptr; + } + if (text >= text_end) { + return nullptr; + } + + uint16_t* text16; + char digit; + uint32_t scalar; + uint16_t digits1and2, + digits3and4, + digits5and6, + digits7and8; + uint32_t comparator; + + #if MAJOR_SEAM == 1 && MINOR_SEAM == 1 + // Write a bunches of xxxxxx to the buffer for debug purposes. + for (int i = 0; i <= 21; ++i) { + *(text + i) = 'x'; + } + *(text + 21) = 0; + char* begin = text; + char buffer[256]; + #endif + + if (value < 10) { + PRINTF ("\n Range:[0, 9] length:1 ") + if (text + 1 >= text_end) { + return nullptr; + } + *text++ = '0' + (char)value; + PRINT_PRINTED + return text; + } + if (value < 100) { + PRINTF ("\n Range:[10, 99] length:2 ") + if (text + 2 >= text_end) { + return nullptr; + } + *reinterpret_cast (text) = kDigits00To99[value]; + PRINT_PRINTED + return text + 2; + } + if (value >> 14) { + if (value >> 27) { + if (value >> 30) { + PRINTF ("\n Range:[1073741824, 4294967295] length:10") + Print10: + if (text + 10 >= text_end) { + return nullptr; + } + comparator = 100000000; + digits1and2 = (uint16_t)(value / comparator); + PRINTF ("\n digits1and2:%u", digits1and2) + value -= digits1and2 * comparator; + *reinterpret_cast (text) = kDigits00To99[digits1and2]; + PRINT_PRINTED + text += 2; + goto Print8; + } + else { + comparator = 1000000000; + if (value >= comparator) { + PRINTF ("\n Range:[100000000, 1073741823] length:10") + goto Print10; + } + PRINTF ("\n Range:[134217727, 999999999] length:9") + if (text + 9 >= text_end) { + return nullptr; + } + comparator = 100000000; + digit = (char)(value / comparator); + *text++ = digit + '0'; + PRINT_PRINTED + value -= comparator * digit; + goto Print8; + } + } + else if (value >> 24) { + comparator = k10ToThe[8]; + if (value >= comparator) { + PRINTF ("\n Range:[100000000, 134217728] length:9") + if (text + 9 >= text_end) { + return nullptr; + } + *text++ = '1'; + PRINT_PRINTED + value -= comparator; + } + PRINTF ("\n Range:[16777216, 9999999] length:8") + if (text + 8 >= text_end) { + return nullptr; + } + Print8: + PRINTF ("\n Print8:") + scalar = 10000; + digits5and6 = (uint16_t)(value / scalar); + digits1and2 = value - scalar * digits5and6; + digits7and8 = digits5and6 / 100; + digits3and4 = digits1and2 / 100; + digits5and6 -= 100 * digits7and8; + digits1and2 -= 100 * digits3and4; + *reinterpret_cast (text + 6) = + kDigits00To99[digits1and2]; + PRINT_PRINTED + *reinterpret_cast (text + 4) = + kDigits00To99[digits3and4]; + PRINT_PRINTED + *reinterpret_cast (text + 2) = + kDigits00To99[digits5and6]; + PRINT_PRINTED + *reinterpret_cast (text) = + kDigits00To99[digits7and8]; + PRINT_PRINTED + return text + 8; + } + else if (value >> 20) { + comparator = 10000000; + if (value >= comparator) { + PRINTF ("\n Range:[10000000, 16777215] length:8") + if (text + 8 >= text_end) { + return nullptr; + } + *text++ = '1'; + PRINT_PRINTED + value -= comparator; + } + else { + PRINTF ("\n Range:[1048576, 9999999] length:7") + if (text + 7 >= text_end) { + return nullptr; + } + } + scalar = 10000; + digits5and6 = (uint16_t)(value / scalar); + digits1and2 = value - scalar * digits5and6; + digits7and8 = digits5and6 / 100; + digits3and4 = digits1and2 / 100; + digits5and6 -= 100 * digits7and8; + digits1and2 -= 100 * digits3and4;; + *reinterpret_cast (text + 5) = + kDigits00To99[digits1and2]; + PRINT_PRINTED + *reinterpret_cast (text + 3) = + kDigits00To99[digits3and4]; + PRINT_PRINTED + *reinterpret_cast (text + 1) = + kDigits00To99[digits5and6]; + PRINT_PRINTED + *text = (char)digits7and8 + '0'; + return text + 7; + } + else if (value >> 17) { + comparator = 1000000; + if (value >= comparator) { + PRINTF ("\n Range:[100000, 1048575] length:7") + if (text + 7 >= text_end) { + return nullptr; + } + *text++ = '1'; + PRINT_PRINTED + value -= comparator; + } + else { + PRINTF ("\n Range:[131072, 999999] length:6") + if (text + 6 >= text_end) { + return nullptr; + } + } + Print6: + scalar = 10000; + digits5and6 = (uint16_t)(value / scalar); + digits1and2 = value - scalar * digits5and6; + digits7and8 = digits5and6 / 100; + digits3and4 = digits1and2 / 100; + digits5and6 -= 100 * digits7and8; + digits1and2 -= 100 * digits3and4; + text16 = reinterpret_cast (text + 6); + *reinterpret_cast (text + 4) = kDigits00To99[digits1and2]; + PRINT_PRINTED + *reinterpret_cast (text + 2) = kDigits00To99[digits3and4]; + PRINT_PRINTED + *reinterpret_cast (text ) = kDigits00To99[digits5and6]; + PRINT_PRINTED + return text + 6; + } + else { // (value >> 14) + if (value >= 100000) { + PRINTF ("\n Range:[65536, 131071] length:6") + goto Print6; + } + PRINTF ("\n Range:[10000, 65535] length:5") + if (text + 5 >= text_end) { + return nullptr; + } + digits5and6 = 10000; + digit = (uint8_t)(value / digits5and6); + value -= digits5and6 * digit; + *text = digit + '0'; + PRINT_PRINTED + digits1and2 = (uint16_t)value; + digits5and6 = 100; + digits3and4 = digits1and2 / digits5and6; + digits1and2 -= digits3and4 * digits5and6; + *reinterpret_cast (text + 1) = + kDigits00To99[digits3and4]; + PRINT_PRINTED + PRINTF ("\n digits1and2:%u", digits1and2) + *reinterpret_cast (text + 3) = + kDigits00To99[digits1and2]; + PRINT_PRINTED + return text + 5; + } + } + digits1and2 = (uint16_t)value; + if (value >> 10) { + digits5and6 = 10000; + if (digits1and2 >= digits5and6) { + if (text + 5 >= text_end) { + return nullptr; + } + PRINTF ("\n Range:[10000, 16383] length:5") + *text++ = '1'; + PRINT_PRINTED + digits1and2 -= digits5and6; + + } + else { + PRINTF ("\n Range:[1024, 9999] length:4") + if (text + 4 >= text_end) { + return nullptr; + } + } + digits5and6 = 100; + digits3and4 = digits1and2 / digits5and6; + digits1and2 -= digits3and4 * digits5and6; + *reinterpret_cast (text ) = kDigits00To99[digits3and4]; + PRINT_PRINTED + *reinterpret_cast (text + 2) = kDigits00To99[digits1and2]; + PRINT_PRINTED + return text + 4; + } + else { + if (text + 4 >= text_end) { + return nullptr; + } + digits3and4 = 1000; + if (digits1and2 >= digits3and4) { + PRINTF ("\n Range:[1000, 1023] length:4") + digits1and2 -= digits3and4; + text16 = reinterpret_cast (text + 2); + *text16-- = kDigits00To99[digits1and2]; + PRINT_PRINTED + *text16 = (((uint16_t)'1') | (((uint16_t)'0') << 8)); + PRINT_PRINTED + return text + 4; + } + PRINTF ("\n Range:[100, 999] length:3") + digits1and2 = (uint16_t)value; + digits3and4 = 100; + digit = (char)(digits1and2 / digits3and4); + digits1and2 -= digit * digits3and4; + *text = digit + '0'; + PRINT_PRINTED + *reinterpret_cast (text + 1) = kDigits00To99[digits1and2]; + PRINT_PRINTED + return text + 3; + } +} + +}; //< namespace kabuki_toolkit +#undef PRINTF +#undef PRINT_PRINTED +#endif //< MAJOR_SEAM >= 1 && MINOR_SEAM >= 1 + + +void Kabuki_Toolkitu_u32toa(uint32_t i, char* b) +{ + kabuki_toolkit::Print(i, b, b + 12); +} + +#include "test.h" + +static Test Kabuki_Toolkit("Kabuki Toolkit", Kabuki_Toolkitu_u32toa, NULL, NULL, NULL); \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 4867e2d..9aef6f1 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -71,6 +71,8 @@ static void VerifyValue(T value, void(*f)(T, char*), void(*g)(T, char*), const c template static void Verify(void(*f)(T, char*), void(*g)(T, char*), const char* fname, const char* gname) { + if (!f || !g) + return; printf("Verifying %s = %s ... ", fname, gname); // Boundary cases @@ -229,11 +231,12 @@ void BenchRandom(void(*f)(T, char*), const char* type, const char* fname, FILE* template void Bench(void(*f)(T, char*), const char* type, const char* fname, FILE* fp) { + if (!f) + return; BenchSequential(f, type, fname, fp); BenchRandom(f, type, fname, fp); } - void BenchAll() { // Try to write to /result path, where template.php exists FILE *fp; diff --git a/src/ramanawithu.cpp b/src/ramanawithu.cpp new file mode 100644 index 0000000..db31127 --- /dev/null +++ b/src/ramanawithu.cpp @@ -0,0 +1,149 @@ + +// source: https://github.com/ramanawithu/fast_int_to_string/blob/master/fast_int_to_string.hpp + +#ifndef _FAST_INT_TO_STRING_ +#define _FAST_INT_TO_STRING_ + +#include +#include + +// An argument of second signature is pointer type +#define FILE_SCOPE static + +// Matrix of precomputed ASCII form of digits +static +const char digits_in_ascii[] = { + "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899" +}; + +// Wrapper over compiler intrensic to avoid forbidden cases +// x86 (x = 0 is not expected by builtin __builtin_clzll even-though it returns proper value) +// returns number of leading zeros in binary representation of input integer. +FILE_SCOPE +inline +uint32_t count_leading_zeros(uint64_t x) { +#ifdef __x86_64__ + if (0 == x) + return 64; + return __builtin_clzll(x); +#else + // TODO: Leverage loop un-rolled version of 'counting leading zeros' + #error "Builtin intrinisic is not defined on this platform" +#endif +} + +// Returns number of decimal digits of 'x'. +// Based on number of leading zeros, we index into two different tables and one comparison operation. +// Reference: Hacker's Delight +FILE_SCOPE +inline +uint32_t log10_characterstic(uint64_t x) { + static const unsigned char clz_to_no_of_decimal_digits[] = { + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 <----- Index into this array + 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, + + // 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 + 15, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, + + // 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 10, 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 5, + + // 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 + 5, 5, 4, 4, 4, 4, 3, 3, 3, 2, 2, 2, 1, 1, 1, 1 + + }; + + static const uint64_t power_of_10[] = { + // 0 1 2 3 4 5 6 7 8 + 1UL, 10UL, 100UL, 1000UL, 10000UL, 100000UL, 1000000UL, 10000000UL, 100000000UL, + // 9 10 11 12 + 1000000000UL, 10000000000UL, 100000000000UL, 1000000000000UL, + // 13 14 15 16 + 10000000000000UL, 100000000000000UL, 1000000000000000UL, 10000000000000000UL, + // 17 18 19 + 100000000000000000UL, 1000000000000000000UL, 10000000000000000000UL + }; + + uint32_t y = clz_to_no_of_decimal_digits[count_leading_zeros(x)]; + if (x < power_of_10[y]) // Can be avoidable? + y -= 1; + return y; +} + +/* Few useful links, and blog posts behind the inspiration. + * + * Andrei FB note, next link is his presentation + * https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ + * http://www.slideshare.net/andreialexandrescu1/three-optimization-tips-for-c-15708507 + * + * Little deviation from the technique explained in the above blog post, + * Mulch two digits at a time using natual 'short int' access. + * Use bit hack along with lookup to find number of digits in decimal representation of input. +*/ + +FILE_SCOPE +// Useful if string data is packed into a 'Slice' +uint32_t fast_uint64_to_string(uint64_t val, char *p) { + if (0 == val) { + p[0] = '0'; + p[1] = '\0'; + + return 1; + } + + const uint32_t size = log10_characterstic(val); + p[size+1] = '\0'; + + p += size; + while (val >= 10) { + *(uint16_t *)(p - 1) = *(uint16_t *)(digits_in_ascii + 2 * (val % 100)); + val /= 100; + p -= 2; + } + + if (val) + *p = '0' + val; + + return size; +} + +FILE_SCOPE +std::string fast_uint64_to_string(uint64_t val) { + if (0 == val) + return std::move(std::string("0")); + + const uint32_t size = log10_characterstic(val); + std::string s(size+1, '\0'); + char *p = const_cast(s.c_str()); // ugly hack, shut up the compiler + + p += size; + while (val >= 10) { + *(uint16_t *)(p - 1) = *(uint16_t *)(digits_in_ascii + 2 * (val % 100)); + val /= 100; + p -= 2; + } + + // Residue + if (val) + *p = '0' + val; + + return std::move(s); +} + +#endif // _FAST_INT_TO_STRING_ + +#include "test.h" +void u64toa_ramanawithu(uint64_t i, char* b) { + fast_uint64_to_string(i, b); +} + +static Test ramanawithu_test("ramanawithu", NULL, NULL, u64toa_ramanawithu, NULL);