From f1dd3263b8db6183f032a6e14865ab1f31455b5c Mon Sep 17 00:00:00 2001 From: Quentin Chateau Date: Mon, 7 Oct 2019 22:33:13 +0200 Subject: [PATCH] segfault fix, perf improvement Fixed segfault in byteswap with incorect usage Improved byteswap performance with string formats Compile with O3 when available Fixed NDEBUG define --- README.md | 38 ++++++++++---------- cbitstruct/_cbitstruct.c | 56 +++++++++++++++-------------- cbitstruct/tests/test_cornercase.py | 1 + setup.py | 7 ++-- 4 files changed, 55 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index c465887..c560c4b 100644 --- a/README.md +++ b/README.md @@ -41,25 +41,25 @@ The script available in `tests/test_perf.py` measures performance comparing to t Here are the result "on my machine" (Ubuntu in Virtualbox on a laptop): ``` -byteswap list of int | x 8.204 ( 9.208us -> 1.122us) -byteswap str | x 6.433 ( 9.689us -> 1.506us) -calcsize | x149.423 ( 61.967us -> 0.415us) -compiled pack | x 43.227 ( 34.758us -> 0.804us) -compiled pack_dict | x 26.490 ( 34.951us -> 1.319us) -compiled pack_into | x 32.017 ( 39.522us -> 1.234us) -compiled pack_into_dict | x 26.817 ( 38.984us -> 1.454us) -compiled unpack | x 34.454 ( 31.814us -> 0.923us) -compiled unpack_dict | x 23.534 ( 34.071us -> 1.448us) -compiled unpack_from | x 27.170 ( 31.884us -> 1.174us) -compiled unpack_from_dict | x 22.600 ( 33.927us -> 1.501us) -pack | x 78.314 ( 105.593us -> 1.348us) -pack_dict | x 52.916 ( 106.748us -> 2.017us) -pack_into | x 82.233 ( 119.950us -> 1.459us) -pack_into_dict | x 45.214 ( 111.338us -> 2.462us) -unpack | x 82.712 ( 93.686us -> 1.133us) -unpack_dict | x 41.064 ( 91.473us -> 2.228us) -unpack_from | x 81.678 ( 95.729us -> 1.172us) -unpack_from_dict | x 40.379 ( 90.430us -> 2.240us) +byteswap list of int | x 8.779 ( 8.638us -> 0.984us) +byteswap str | x 17.466 ( 9.158us -> 0.524us) +calcsize | x139.330 ( 61.060us -> 0.438us) +compiled pack | x 47.389 ( 35.968us -> 0.759us) +compiled pack_dict | x 27.184 ( 34.588us -> 1.272us) +compiled pack_into | x 32.037 ( 38.650us -> 1.206us) +compiled pack_into_dict | x 27.343 ( 37.718us -> 1.379us) +compiled unpack | x 33.928 ( 31.278us -> 0.922us) +compiled unpack_dict | x 21.627 ( 31.597us -> 1.461us) +compiled unpack_from | x 30.622 ( 29.977us -> 0.979us) +compiled unpack_from_dict | x 20.479 ( 30.936us -> 1.511us) +pack | x 77.003 ( 103.030us -> 1.338us) +pack_dict | x 53.254 ( 103.255us -> 1.939us) +pack_into | x 82.829 ( 119.373us -> 1.441us) +pack_into_dict | x 52.173 ( 108.135us -> 2.073us) +unpack | x 78.459 ( 91.896us -> 1.171us) +unpack_dict | x 40.287 ( 89.300us -> 2.217us) +unpack_from | x 77.027 ( 91.202us -> 1.184us) +unpack_from_dict | x 39.467 ( 88.043us -> 2.231us) ``` *Disclaimer:* these results may and will vary largely depending on the number of elements and types you pack/unpack. This script is provided as-is, and I will gladly accept an improved script providing more reliable results. diff --git a/cbitstruct/_cbitstruct.c b/cbitstruct/_cbitstruct.c index c9230d8..78648ef 100644 --- a/cbitstruct/_cbitstruct.c +++ b/cbitstruct/_cbitstruct.c @@ -321,7 +321,7 @@ static void c_pack( int nbytes = (desc->bits + 7) / 8; int padding = nbytes * 8 - desc->bits; #if PY_LITTLE_ENDIAN - assert(nbytes <= sizeof(data)); + assert(nbytes <= (int)sizeof(data)); c_byteswitch((uint8_t*)&data, nbytes); #endif data >>= padding; @@ -410,7 +410,7 @@ static void c_unpack( int padding = nbytes * 8 - desc->bits; data <<= padding; #if PY_LITTLE_ENDIAN - assert(nbytes <= sizeof(data)); + assert(nbytes <= (int)sizeof(data)); c_byteswitch((uint8_t*)&data, nbytes); #endif } @@ -456,7 +456,7 @@ static bool python_to_parsed_elements( Py_ssize_t data_size, CompiledFormat fmt) { - assert(data_size >= fmt.ndescs); + assert(data_size >= fmt.ndescs - fmt.npadding); int n = 0; for (int i = 0; i < fmt.ndescs; ++i) { @@ -676,8 +676,6 @@ static PyObject* CompiledFormat_pack_raw( PyObject** data, Py_ssize_t n_data) { - assert(PyTuple_Check(args)); - ParsedElement elements_stack[SMALL_FORMAT_OPTIMIZATION]; ParsedElement* elements = elements_stack; bool use_stack = compiled_fmt.ndescs <= SMALL_FORMAT_OPTIMIZATION; @@ -1170,8 +1168,6 @@ CompiledFormatDict_pack_into_impl(PyCompiledFormatDictObject *self, /*[clinic end generated code: output=ee246de261e9c699 input=290a9a4a3e3ed942]*/ // clang-format on { - assert(PyTuple_Check(args)); - PyObject* return_value = NULL; Py_ssize_t nnames = PySequence_Fast_GET_SIZE(self->names); @@ -1461,8 +1457,6 @@ pack_into_dict_impl(PyObject *module, const char *fmt, PyObject *names, /*[clinic end generated code: output=619b415fc187011b input=e72dec46484ec66f]*/ // clang-format on { - assert(PyTuple_Check(args)); - PyObject* return_value = NULL; PyCompiledFormatDictObject self; memset(&self, 0, sizeof(self)); @@ -1692,28 +1686,38 @@ byteswap_impl(PyObject *module, PyObject *fmt, Py_buffer *data, goto exit; } - int sum = 0; - for (int i = 0; i < length; ++i) { - PyObject* item = PySequence_GetItem(fmt, i); - if (!item) { + long sum = 0; + if (PyUnicode_Check(fmt)) { + const char* cfmt = PyUnicode_AsUTF8(fmt); + if (!cfmt) { goto exit; } - long len = -1; - if (PyUnicode_Check(item)) { - PyObject* pylong = PyLong_FromUnicodeObject(item, 10); - len = PyLong_AsLong(pylong); - Py_DECREF(pylong); - } - else { - len = PyLong_AsLong(item); + for (int i = 0; i < length; ++i) { + int len = cfmt[i] - '0'; + if (len < 0 || len > 9) { + PyErr_SetString( + PyExc_ValueError, "bad value in byteswap format"); + goto exit; + } + sum += len; + count_iter[i] = len; } + } + else { + for (int i = 0; i < length; ++i) { + PyObject* item = PySequence_GetItem(fmt, i); + if (!item) { + goto exit; + } - sum += len; - count_iter[i] = len; - Py_DECREF(item); - if (len < 0 || PyErr_Occurred()) { - goto exit; + long len = PyLong_AsLong(item); + sum += len; + count_iter[i] = len; + Py_DECREF(item); + if (len == -1 && PyErr_Occurred()) { + goto exit; + } } } diff --git a/cbitstruct/tests/test_cornercase.py b/cbitstruct/tests/test_cornercase.py index 5588ef5..3219ada 100644 --- a/cbitstruct/tests/test_cornercase.py +++ b/cbitstruct/tests/test_cornercase.py @@ -117,6 +117,7 @@ def test_byteswap_bad_args(self): self.assertRaises(TypeError, cbitstruct.byteswap, None, b"\xff") self.assertRaises(TypeError, cbitstruct.byteswap, "23") self.assertRaises(TypeError, cbitstruct.byteswap) + self.assertRaises(ValueError, cbitstruct.byteswap, "\x02\x02", b"z") def test_calcsize_bad_args(self): self.assertRaises(TypeError, cbitstruct.calcsize, "g32") diff --git a/setup.py b/setup.py index a0736b4..a4eae69 100644 --- a/setup.py +++ b/setup.py @@ -6,17 +6,19 @@ extra_compile_args = [] extra_link_args = [] +undef_macros = [] if sys.platform == "win32": extra_compile_args += [] else: - extra_compile_args += ["-std=c11", "-Wall", "-Werror"] + extra_compile_args += ["-std=c11", "-Wall", "-Werror", "-O3"] if os.environ.get("COVERAGE"): extra_compile_args += ["-g", "-O0", "-fprofile-arcs", "-ftest-coverage"] extra_link_args += ["-fprofile-arcs"] + undef_macros += ["NDEBUG"] with open("README.md", "r") as fh: @@ -25,7 +27,7 @@ setup( name="cbitstruct", - version="1.0.2", + version="1.0.3", author="Quentin CHATEAU", author_email="quentin.chateau@gmail.com", license="GPLv3", @@ -57,6 +59,7 @@ extra_link_args=extra_link_args, sources=["cbitstruct/_cbitstruct.c"], include_dirs=["cbitstruct/"], + undef_macros=undef_macros, ) ], packages=["cbitstruct", "cbitstruct.tests"],