diff --git a/Makefile b/Makefile
index 51f7af7..9a1b756 100644
--- a/Makefile
+++ b/Makefile
@@ -11,10 +11,15 @@ all : bin/itoa_release_x64_gmake
 	cd bin && ./itoa_release_x64_gmake $(ARGS)
 	cd result && make -f makefile
 
+debug: CONFIG = debug_x64
+debug: bin/itoa_debug_x64_gmake
+	cd bin && ./itoa_debug_x64_gmake $(ARGS)
+	cd result && make -f makefile
+
 bin/itoa_%_gmake : build/gmake/itoa.make
 	cd build/gmake && make -f itoa.make config=$(CONFIG) verbose=$(VERBOSE)
 
-clean : 
+clean :
 	rm -rf build/gmake
 	rm -rf build/vs2005
 	rm -rf build/vs2008
@@ -27,8 +32,7 @@ clean :
 setup :
 	cd build && ./premake.sh && ./machine.sh
 	
-	
-build/gmake/itoa.make : setup	
+build/gmake/itoa.make : setup
 	
 clean_status :
 	@echo "Filesystem status according to GIT"
diff --git a/build/premake5.lua b/build/premake5.lua
index d6a4832..3f003bf 100644
--- a/build/premake5.lua
+++ b/build/premake5.lua
@@ -5,7 +5,7 @@ function setTargetObjDir(outDir)
 end
 
 solution "benchmark"
-	configurations { "release" }
+	configurations { "debug", "release" }
 	platforms { "x32", "x64" }
 
 	location ("./" .. (_ACTION or ""))
@@ -22,6 +22,9 @@ solution "benchmark"
 		defines { "NDEBUG" }
 		flags { "Optimize" }
 
+	configuration "debug"
+		flags { "Symbols" }
+
 	configuration "vs*"
 		defines { "_CRT_SECURE_NO_WARNINGS" }
 		
diff --git a/src/Kabuki_Toolkit.cpp b/src/Kabuki_Toolkit.cpp
new file mode 100644
index 0000000..fd093fd
--- /dev/null
+++ b/src/Kabuki_Toolkit.cpp
@@ -0,0 +1,367 @@
+// source https://stackoverflow.com/a/48779770/21617688
+
+/** Kabuki Toolkit
+    @version 0.x
+    @file    ~/source/crabs/print_itos.cc
+    @author  Cale McCollough <cale.mccollough@gmail.com>
+    @license Copyright (C) 2017-2018 Cale McCollough <calemccollough@gmail.com>;
+             All right reserved (R). Licensed under the Apache License, Version 
+             2.0 (the "License"); you may not use this file except in 
+             compliance with the License. You may obtain a copy of the License 
+             [here](http://www.apache.org/licenses/LICENSE-2.0). Unless 
+             required by applicable law or agreed to in writing, software 
+             distributed under the License is distributed on an "AS IS" BASIS, 
+             WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
+             implied. See the License for the specific language governing 
+             permissions and limitations under the License.
+*/
+
+#define MAJOR_SEAM 1
+#define MINOR_SEAM 2
+
+#include <cstring>
+#include <cstdint>
+#include <iostream>
+// #include <stdafx.h>
+// #include <stdafx.h>
+// #include "print_itos.h"
+
+#if MAJOR_SEAM >= 1 && MINOR_SEAM >= 1
+
+#if MAJOR_SEAM == 1 && MINOR_SEAM == 1
+#define DEBUG 1
+
+#define PRINTF(format, ...) printf(format, __VA_ARGS__);
+#define PUTCHAR(c) putchar(c);
+#define PRINT_PRINTED\
+    sprintf_s (buffer, 24, "%u", value); *text_end = 0;\
+    printf ("\n    Printed \"%s\" leaving value:\"%s\":%u",\
+            begin, buffer, (uint)strlen (buffer));
+#define PRINT_BINARY PrintBinary (value);
+#define PRINT_BINARY_TABLE PrintBinaryTable (value);
+#else
+#define PRINTF(x, ...)
+#define PUTCHAR(c)
+#define PRINT_PRINTED
+#define PRINT_BINARY
+#define PRINT_BINARY_TABLE
+#endif
+
+namespace kabuki_toolkit {
+
+void PrintLine (char c) {
+    std::cout << '\n';
+    for (int i = 80; i > 0; --i) 
+        std::cout << c;
+}
+
+char* Print (uint32_t value, char* text, char* text_end) {
+
+    // Lookup table for powers of 10.
+    static const uint32_t k10ToThe[]{
+        1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000,
+        1000000000, ~(uint32_t)0 };
+
+    /** Lookup table of ASCII char pairs for 00, 01, ..., 99.
+        To convert this algorithm to big-endian, flip the digit pair bytes. */
+    static const uint16_t kDigits00To99[100] = {
+        0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830,
+        0x3930, 0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731,
+        0x3831, 0x3931, 0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632,
+        0x3732, 0x3832, 0x3932, 0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533,
+        0x3633, 0x3733, 0x3833, 0x3933, 0x3034, 0x3134, 0x3234, 0x3334, 0x3434,
+        0x3534, 0x3634, 0x3734, 0x3834, 0x3934, 0x3035, 0x3135, 0x3235, 0x3335,
+        0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935, 0x3036, 0x3136, 0x3236,
+        0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936, 0x3037, 0x3137,
+        0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937, 0x3038,
+        0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938,
+        0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839,
+        0x3939, };
+
+    static const char kMsbShift[] = { 4, 7, 11, 14, 17, 21, 24, 27, 30, };
+
+    if (!text) {
+        return nullptr;
+    }
+    if (text >= text_end) {
+        return nullptr;
+    }
+
+    uint16_t* text16;
+    char      digit;
+    uint32_t  scalar;
+    uint16_t  digits1and2,
+              digits3and4,
+              digits5and6,
+              digits7and8;
+    uint32_t  comparator;
+
+    #if MAJOR_SEAM == 1 && MINOR_SEAM == 1
+    // Write a bunches of xxxxxx to the buffer for debug purposes.
+    for (int i = 0; i <= 21; ++i) {
+        *(text + i) = 'x';
+    }
+    *(text + 21) = 0;
+    char* begin = text;
+    char buffer[256];
+    #endif
+
+    if (value < 10) {
+        PRINTF ("\n    Range:[0, 9] length:1 ")
+        if (text + 1 >= text_end) {
+            return nullptr;
+        }
+        *text++ = '0' + (char)value;
+        PRINT_PRINTED
+        return text;
+    }
+    if (value < 100) {
+        PRINTF ("\n    Range:[10, 99] length:2 ")
+        if (text + 2 >= text_end) {
+            return nullptr;
+        }
+        *reinterpret_cast<uint16_t*> (text) = kDigits00To99[value];
+        PRINT_PRINTED
+        return text + 2;
+    }
+    if (value >> 14) {
+        if (value >> 27) {
+            if (value >> 30) {
+                PRINTF ("\n    Range:[1073741824, 4294967295] length:10")
+                Print10:
+                if (text + 10 >= text_end) {
+                    return nullptr;
+                }
+                comparator = 100000000;
+                digits1and2 = (uint16_t)(value / comparator);
+                PRINTF ("\n    digits1and2:%u", digits1and2)
+                value -= digits1and2 * comparator;
+                *reinterpret_cast<uint16_t*> (text) = kDigits00To99[digits1and2];
+                PRINT_PRINTED
+                text += 2;
+                goto Print8;
+            }
+            else {
+                comparator = 1000000000;
+                if (value >= comparator) {
+                    PRINTF ("\n    Range:[100000000, 1073741823] length:10")
+                    goto Print10;
+                }
+                PRINTF ("\n    Range:[134217727, 999999999] length:9")
+                if (text + 9 >= text_end) {
+                    return nullptr;
+                }
+                comparator = 100000000;
+                digit = (char)(value / comparator);
+                *text++ = digit + '0';
+                PRINT_PRINTED
+                value -= comparator * digit;
+                goto Print8;
+            }
+        }
+        else if (value >> 24) {
+            comparator = k10ToThe[8];
+            if (value >= comparator) {
+                PRINTF ("\n    Range:[100000000, 134217728] length:9")
+                if (text + 9 >= text_end) {
+                    return nullptr;
+                }
+                *text++ = '1';
+                PRINT_PRINTED
+                value -= comparator;
+            }
+            PRINTF ("\n    Range:[16777216, 9999999] length:8")
+            if (text + 8 >= text_end) {
+                return nullptr;
+            }
+            Print8:
+            PRINTF ("\n    Print8:")
+            scalar = 10000;
+            digits5and6 = (uint16_t)(value / scalar);
+            digits1and2 = value - scalar * digits5and6;
+            digits7and8 = digits5and6 / 100;
+            digits3and4 = digits1and2 / 100;
+            digits5and6 -= 100 * digits7and8;
+            digits1and2 -= 100 * digits3and4;
+            *reinterpret_cast<uint16_t*> (text + 6) = 
+                kDigits00To99[digits1and2];
+            PRINT_PRINTED
+            *reinterpret_cast<uint16_t*> (text + 4) = 
+                kDigits00To99[digits3and4];
+            PRINT_PRINTED
+            *reinterpret_cast<uint16_t*> (text + 2) = 
+                kDigits00To99[digits5and6];
+            PRINT_PRINTED
+            *reinterpret_cast<uint16_t*> (text) = 
+                kDigits00To99[digits7and8];
+            PRINT_PRINTED
+            return text + 8;
+        }
+        else if (value >> 20) {
+            comparator = 10000000;
+            if (value >= comparator) {
+                PRINTF ("\n    Range:[10000000, 16777215] length:8")
+                if (text + 8 >= text_end) {
+                    return nullptr;
+                }
+                *text++ = '1';
+                PRINT_PRINTED
+                value -= comparator;
+            }
+            else {
+                PRINTF ("\n    Range:[1048576, 9999999] length:7")
+                if (text + 7 >= text_end) {
+                    return nullptr;
+                }
+            }
+            scalar = 10000;
+            digits5and6 = (uint16_t)(value / scalar);
+            digits1and2 = value - scalar * digits5and6;
+            digits7and8 = digits5and6 / 100;
+            digits3and4 = digits1and2 / 100;
+            digits5and6 -= 100 * digits7and8;
+            digits1and2 -= 100 * digits3and4;;
+            *reinterpret_cast<uint16_t*> (text + 5) = 
+                kDigits00To99[digits1and2];
+            PRINT_PRINTED
+            *reinterpret_cast<uint16_t*> (text + 3) = 
+                kDigits00To99[digits3and4];
+            PRINT_PRINTED
+            *reinterpret_cast<uint16_t*> (text + 1) = 
+                kDigits00To99[digits5and6];
+            PRINT_PRINTED
+            *text = (char)digits7and8 + '0';
+            return text + 7;
+        }
+        else if (value >> 17) {
+            comparator = 1000000;
+            if (value >= comparator) {
+                PRINTF ("\n    Range:[100000, 1048575] length:7")
+                if (text + 7 >= text_end) {
+                    return nullptr;
+                }
+                *text++ = '1';
+                PRINT_PRINTED
+                value -= comparator;
+            }
+            else {
+                PRINTF ("\n    Range:[131072, 999999] length:6")
+                if (text + 6 >= text_end) {
+                    return nullptr;
+                }
+            }
+            Print6:
+            scalar = 10000;
+            digits5and6 = (uint16_t)(value / scalar);
+            digits1and2 = value - scalar * digits5and6;
+            digits7and8 = digits5and6 / 100;
+            digits3and4 = digits1and2 / 100;
+            digits5and6 -= 100 * digits7and8;
+            digits1and2 -= 100 * digits3and4;
+            text16 = reinterpret_cast<uint16_t*> (text + 6);
+            *reinterpret_cast<uint16_t*> (text + 4) = kDigits00To99[digits1and2];
+            PRINT_PRINTED
+            *reinterpret_cast<uint16_t*> (text + 2) = kDigits00To99[digits3and4];
+            PRINT_PRINTED
+            *reinterpret_cast<uint16_t*> (text    ) = kDigits00To99[digits5and6];
+            PRINT_PRINTED
+            return text + 6;
+        }
+        else { // (value >> 14)
+            if (value >= 100000) {
+                PRINTF ("\n    Range:[65536, 131071] length:6")
+                goto Print6;
+            }
+            PRINTF ("\n    Range:[10000, 65535] length:5")
+            if (text + 5 >= text_end) {
+                return nullptr;
+            }
+            digits5and6 = 10000;
+            digit = (uint8_t)(value / digits5and6);
+            value -= digits5and6 * digit;
+            *text = digit + '0';
+            PRINT_PRINTED
+            digits1and2 = (uint16_t)value;
+            digits5and6 = 100;
+            digits3and4 = digits1and2 / digits5and6;
+            digits1and2 -= digits3and4 * digits5and6;
+            *reinterpret_cast<uint16_t*> (text + 1) = 
+                kDigits00To99[digits3and4];
+            PRINT_PRINTED
+                PRINTF ("\n    digits1and2:%u", digits1and2)
+            *reinterpret_cast<uint16_t*> (text + 3) = 
+                kDigits00To99[digits1and2];
+            PRINT_PRINTED
+            return text + 5;
+        }
+    }
+    digits1and2 = (uint16_t)value;
+    if (value >> 10) {
+        digits5and6 = 10000;
+        if (digits1and2 >= digits5and6) {
+            if (text + 5 >= text_end) {
+                return nullptr;
+            }
+            PRINTF ("\n    Range:[10000, 16383] length:5")
+            *text++ = '1';
+            PRINT_PRINTED
+            digits1and2 -= digits5and6;
+
+        }
+        else {
+            PRINTF ("\n    Range:[1024, 9999] length:4")
+            if (text + 4 >= text_end) {
+                return nullptr;
+            }
+        }
+        digits5and6 = 100;
+        digits3and4 = digits1and2 / digits5and6;
+        digits1and2 -= digits3and4 * digits5and6;
+        *reinterpret_cast<uint16_t*> (text    ) = kDigits00To99[digits3and4];
+        PRINT_PRINTED
+        *reinterpret_cast<uint16_t*> (text + 2) = kDigits00To99[digits1and2];
+        PRINT_PRINTED
+        return text + 4;
+    }
+    else {
+        if (text + 4 >= text_end) {
+            return nullptr;
+        }
+        digits3and4 = 1000;
+        if (digits1and2 >= digits3and4) {
+            PRINTF ("\n    Range:[1000, 1023] length:4")
+            digits1and2 -= digits3and4;
+            text16 = reinterpret_cast<uint16_t*> (text + 2);
+            *text16-- = kDigits00To99[digits1and2];
+            PRINT_PRINTED
+            *text16 = (((uint16_t)'1') | (((uint16_t)'0') << 8));
+            PRINT_PRINTED
+            return text + 4;
+        }
+        PRINTF ("\n    Range:[100, 999] length:3")
+        digits1and2 = (uint16_t)value;
+        digits3and4 = 100;
+        digit = (char)(digits1and2 / digits3and4);
+        digits1and2 -= digit * digits3and4;
+        *text = digit + '0';
+        PRINT_PRINTED
+        *reinterpret_cast<uint16_t*> (text + 1) = kDigits00To99[digits1and2];
+        PRINT_PRINTED
+        return text + 3;
+    }
+}
+
+};       //< namespace kabuki_toolkit
+#undef  PRINTF
+#undef  PRINT_PRINTED
+#endif  //< MAJOR_SEAM >= 1 && MINOR_SEAM >= 1
+
+
+void Kabuki_Toolkitu_u32toa(uint32_t i, char* b)
+{
+    kabuki_toolkit::Print(i, b, b + 12);
+}
+
+#include "test.h"
+
+static Test Kabuki_Toolkit("Kabuki Toolkit", Kabuki_Toolkitu_u32toa, NULL, NULL, NULL);
\ No newline at end of file
diff --git a/src/main.cpp b/src/main.cpp
index 4867e2d..9aef6f1 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -71,6 +71,8 @@ static void VerifyValue(T value, void(*f)(T, char*), void(*g)(T, char*), const c
 
 template <typename T>
 static void Verify(void(*f)(T, char*), void(*g)(T, char*), const char* fname, const char* gname) {
+    if (!f || !g)
+        return;
     printf("Verifying %s = %s ... ", fname, gname);
 
     // Boundary cases
@@ -229,11 +231,12 @@ void BenchRandom(void(*f)(T, char*), const char* type, const char* fname, FILE*
 
 template <typename T>
 void Bench(void(*f)(T, char*), const char* type, const char* fname, FILE* fp) {
+    if (!f)
+        return;
     BenchSequential(f, type, fname, fp);
     BenchRandom(f, type, fname, fp);
 }
 
-
 void BenchAll() {
     // Try to write to /result path, where template.php exists
     FILE *fp;
diff --git a/src/ramanawithu.cpp b/src/ramanawithu.cpp
new file mode 100644
index 0000000..db31127
--- /dev/null
+++ b/src/ramanawithu.cpp
@@ -0,0 +1,149 @@
+
+// source: https://github.com/ramanawithu/fast_int_to_string/blob/master/fast_int_to_string.hpp
+
+#ifndef _FAST_INT_TO_STRING_
+#define _FAST_INT_TO_STRING_
+
+#include <string>
+#include <cstdint>
+
+// An argument of second signature is pointer type
+#define FILE_SCOPE static
+
+// Matrix of precomputed ASCII form of digits
+static
+const char digits_in_ascii[] = {
+    "00010203040506070809"
+    "10111213141516171819"
+    "20212223242526272829"
+    "30313233343536373839"
+    "40414243444546474849"
+    "50515253545556575859"
+    "60616263646566676869"
+    "70717273747576777879"
+    "80818283848586878889"
+    "90919293949596979899"
+};
+
+// Wrapper over compiler intrensic to avoid forbidden cases
+//      x86 (x = 0 is not expected by builtin __builtin_clzll even-though it returns proper value)
+// returns number of leading zeros in binary representation of input integer.
+FILE_SCOPE
+inline
+uint32_t count_leading_zeros(uint64_t x) {
+#ifdef __x86_64__
+    if (0 == x)
+        return 64;
+    return __builtin_clzll(x);
+#else
+    // TODO: Leverage loop un-rolled version of 'counting leading zeros'
+    #error "Builtin intrinisic is not defined on this platform"
+#endif
+}
+
+// Returns number of decimal digits of 'x'.
+// Based on number of leading zeros, we index into two different tables and one comparison operation.
+// Reference: Hacker's Delight
+FILE_SCOPE
+inline
+uint32_t log10_characterstic(uint64_t x) {
+    static const unsigned char clz_to_no_of_decimal_digits[] = {
+    //   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15     <----- Index into this array
+        19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15,
+
+    //  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31
+        15, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10,
+
+    //  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+        10, 10,  9,  9,  9,  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  5,
+
+    //  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63
+         5,  5,  4,  4,  4,  4,  3,  3,  3,  2,  2,  2,  1,  1,  1,  1
+
+    };
+
+    static const uint64_t power_of_10[] = {
+    //   0     1      2       3        4         5          6           7            8
+        1UL, 10UL, 100UL, 1000UL, 10000UL, 100000UL, 1000000UL, 10000000UL, 100000000UL,
+    //            9             10              11               12
+        1000000000UL, 10000000000UL, 100000000000UL, 1000000000000UL,
+    //               13                 14                  15                   16
+        10000000000000UL, 100000000000000UL, 1000000000000000UL, 10000000000000000UL,
+    //                   17                     18                      19
+        100000000000000000UL, 1000000000000000000UL, 10000000000000000000UL
+    };
+
+    uint32_t y = clz_to_no_of_decimal_digits[count_leading_zeros(x)];
+    if (x < power_of_10[y]) // Can be avoidable?
+        y -= 1;
+    return y;
+}
+
+/* Few useful links, and blog posts behind the inspiration.
+ *
+ * Andrei FB note, next link is his presentation
+ * https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
+ * http://www.slideshare.net/andreialexandrescu1/three-optimization-tips-for-c-15708507
+ *
+ * Little deviation from the technique explained in the above blog post,
+ *      Mulch two digits at a time using natual 'short int' access.
+ *      Use bit hack along with lookup to find number of digits in decimal representation of input.
+*/
+
+FILE_SCOPE
+// Useful if string data is packed into a 'Slice'
+uint32_t fast_uint64_to_string(uint64_t val, char *p) {
+    if (0 == val) {
+        p[0] = '0';
+        p[1] = '\0';
+
+        return 1;
+    }
+
+    const uint32_t size = log10_characterstic(val);
+    p[size+1] = '\0';
+
+    p += size;
+    while (val >= 10) {
+        *(uint16_t *)(p - 1) = *(uint16_t *)(digits_in_ascii + 2 * (val % 100));
+        val /= 100;
+        p -= 2;
+    }
+
+    if (val)
+        *p = '0' + val;
+
+    return size;
+}
+
+FILE_SCOPE
+std::string fast_uint64_to_string(uint64_t val) {
+    if (0 == val)
+        return std::move(std::string("0"));
+
+    const uint32_t size = log10_characterstic(val);
+    std::string s(size+1, '\0');
+    char *p = const_cast<char *>(s.c_str()); // ugly hack, shut up the compiler
+
+    p += size;
+    while (val >= 10) {
+        *(uint16_t *)(p - 1) = *(uint16_t *)(digits_in_ascii + 2 * (val % 100));
+        val /= 100;
+        p -= 2;
+    }
+
+    // Residue
+    if (val)
+        *p = '0' + val;
+
+    return std::move(s);
+}
+
+#endif // _FAST_INT_TO_STRING_
+
+#include "test.h"
+void u64toa_ramanawithu(uint64_t i, char* b) {
+    fast_uint64_to_string(i, b);
+}
+
+static Test ramanawithu_test("ramanawithu", NULL, NULL, u64toa_ramanawithu, NULL);