-
-
Notifications
You must be signed in to change notification settings - Fork 30.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
gh-128213: fast path for bytes creation from list and tuple #128214
base: main
Are you sure you want to change the base?
Changes from all commits
6f699b5
18c8e4a
406fbdb
4912a05
56f802e
4e1e3e6
bf96d06
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Speed up :class:`bytes` creation from :class:`list` and :class:`tuple` of integers. Benchmarks show that from a list with 1000000 random numbers the time to create a bytes object is reduced by around 31%, or 30% with 10000 numbers, or 27% with 100 numbers. | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIRC, NEWS should not contain an empty line. |
||
Patch by Ben Hsing |
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -6,6 +6,7 @@ | |||||||||
#include "pycore_bytesobject.h" // _PyBytes_Find(), _PyBytes_Repeat() | ||||||||||
#include "pycore_call.h" // _PyObject_CallNoArgs() | ||||||||||
#include "pycore_ceval.h" // _PyEval_GetBuiltin() | ||||||||||
#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST() | ||||||||||
#include "pycore_format.h" // F_LJUST | ||||||||||
#include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT() | ||||||||||
#include "pycore_initconfig.h" // _PyStatus_OK() | ||||||||||
|
@@ -2810,82 +2811,46 @@ _PyBytes_FromBuffer(PyObject *x) | |||||||||
} | ||||||||||
|
||||||||||
static PyObject* | ||||||||||
_PyBytes_FromList(PyObject *x) | ||||||||||
_PyBytes_FromSequence(PyObject *x) | ||||||||||
{ | ||||||||||
Py_ssize_t i, size = PyList_GET_SIZE(x); | ||||||||||
Py_ssize_t value; | ||||||||||
char *str; | ||||||||||
PyObject *item; | ||||||||||
_PyBytesWriter writer; | ||||||||||
|
||||||||||
_PyBytesWriter_Init(&writer); | ||||||||||
str = _PyBytesWriter_Alloc(&writer, size); | ||||||||||
if (str == NULL) | ||||||||||
Py_ssize_t size = PySequence_Fast_GET_SIZE(x); | ||||||||||
PyObject *bytes = _PyBytes_FromSize(size, 0); | ||||||||||
if (bytes == NULL) { | ||||||||||
return NULL; | ||||||||||
writer.overallocate = 1; | ||||||||||
size = writer.allocated; | ||||||||||
|
||||||||||
for (i = 0; i < PyList_GET_SIZE(x); i++) { | ||||||||||
item = PyList_GET_ITEM(x, i); | ||||||||||
Py_INCREF(item); | ||||||||||
value = PyNumber_AsSsize_t(item, NULL); | ||||||||||
Py_DECREF(item); | ||||||||||
if (value == -1 && PyErr_Occurred()) | ||||||||||
goto error; | ||||||||||
|
||||||||||
if (value < 0 || value >= 256) { | ||||||||||
PyErr_SetString(PyExc_ValueError, | ||||||||||
"bytes must be in range(0, 256)"); | ||||||||||
goto error; | ||||||||||
} | ||||||||||
|
||||||||||
if (i >= size) { | ||||||||||
str = _PyBytesWriter_Resize(&writer, str, size+1); | ||||||||||
if (str == NULL) | ||||||||||
return NULL; | ||||||||||
size = writer.allocated; | ||||||||||
} | ||||||||||
*str++ = (char) value; | ||||||||||
} | ||||||||||
return _PyBytesWriter_Finish(&writer, str); | ||||||||||
|
||||||||||
error: | ||||||||||
_PyBytesWriter_Dealloc(&writer); | ||||||||||
return NULL; | ||||||||||
} | ||||||||||
|
||||||||||
static PyObject* | ||||||||||
_PyBytes_FromTuple(PyObject *x) | ||||||||||
{ | ||||||||||
PyObject *bytes; | ||||||||||
Py_ssize_t i, size = PyTuple_GET_SIZE(x); | ||||||||||
Py_ssize_t value; | ||||||||||
char *str; | ||||||||||
PyObject *item; | ||||||||||
|
||||||||||
bytes = PyBytes_FromStringAndSize(NULL, size); | ||||||||||
if (bytes == NULL) | ||||||||||
return NULL; | ||||||||||
str = ((PyBytesObject *)bytes)->ob_sval; | ||||||||||
|
||||||||||
for (i = 0; i < size; i++) { | ||||||||||
item = PyTuple_GET_ITEM(x, i); | ||||||||||
value = PyNumber_AsSsize_t(item, NULL); | ||||||||||
if (value == -1 && PyErr_Occurred()) | ||||||||||
char *str = PyBytes_AS_STRING(bytes); | ||||||||||
PyObject *const *items = PySequence_Fast_ITEMS(x); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we're going for performance, then we can do even better. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But all we know is that it is either a list or a tuple, but we still don't know which of the two it is, so a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, but There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alternatively, you can just duplicate the functions as we did previously. This is not really an issue IMO, and we couold use the fact that tuples are immutables for instance to avoid INCREF/DECREF values. |
||||||||||
Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(x); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You need to acquire the critical section before calling
Comment on lines
+2822
to
+2823
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
for (Py_ssize_t i = 0; i < size; i++) { | ||||||||||
if (!PyLong_Check(items[i])) { | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (The previous code wasn't doing an exact check so we shouldn't change it) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, it wouldn't be a breaking change, just a performance loss for the (very niche!) set of cases that use special ints. I'm also slightly worried that non-exact ints might have some nasty side effects that we aren't anticipating here (e.g. can they mess with the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They can mess with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is casting to bytes a common thing to do with numpy integers, or is that speculation? (I see your point, I'm just sort of gauging what the cost-benefit would be here.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd say yes, if we're considering serialization or introspection. I imagine that we can have such things when people are working with images because their arrays won't necessarily be pure Python lists but Nonetheless, if we want to fallback to a slow path for non-exact ints, benchmarks should show how performances are impacted (a simple |
||||||||||
Py_DECREF(bytes); | ||||||||||
/* Py_None as a fallback sentinel to the slow path */ | ||||||||||
bytes = Py_None; | ||||||||||
goto done; | ||||||||||
} | ||||||||||
Py_ssize_t value = PyNumber_AsSsize_t(items[i], NULL); | ||||||||||
if (value == -1 && PyErr_Occurred()) { | ||||||||||
goto error; | ||||||||||
|
||||||||||
} | ||||||||||
if (value < 0 || value >= 256) { | ||||||||||
PyErr_SetString(PyExc_ValueError, | ||||||||||
"bytes must be in range(0, 256)"); | ||||||||||
goto error; | ||||||||||
} | ||||||||||
*str++ = (char) value; | ||||||||||
} | ||||||||||
return bytes; | ||||||||||
|
||||||||||
goto done; | ||||||||||
error: | ||||||||||
Py_DECREF(bytes); | ||||||||||
return NULL; | ||||||||||
bytes = NULL; | ||||||||||
done: | ||||||||||
/* some C parsers require a label not to be at the end of a compound | ||||||||||
statement, which the ending macro of a critical section introduces, so | ||||||||||
we need an empty statement here to satisfy that syntax rule */ | ||||||||||
; | ||||||||||
/* both success and failure need to end the critical section */ | ||||||||||
Comment on lines
+2846
to
+2851
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of these goto tricks, consider instead to pull out the critical section code into a helper function; it should result in cleaner and more readable code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To avoid gotos in I defined a helper There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah yes, refactoring into a helper would definitely be cleaner and we wouldn't the comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Btw, I think we could have those macro helpers because they would help us in those situations. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'm not sure about introducing such an API. IMO, it is better to keep the critical section APIs dead simple. If you need to avoid gotos, refactor. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I agree. With critical sections the safest way to go is to create a |
||||||||||
Py_END_CRITICAL_SECTION_SEQUENCE_FAST(); | ||||||||||
blhsing marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
return bytes; | ||||||||||
} | ||||||||||
|
||||||||||
static PyObject * | ||||||||||
|
@@ -2968,11 +2933,13 @@ PyBytes_FromObject(PyObject *x) | |||||||||
if (PyObject_CheckBuffer(x)) | ||||||||||
return _PyBytes_FromBuffer(x); | ||||||||||
|
||||||||||
if (PyList_CheckExact(x)) | ||||||||||
return _PyBytes_FromList(x); | ||||||||||
|
||||||||||
if (PyTuple_CheckExact(x)) | ||||||||||
return _PyBytes_FromTuple(x); | ||||||||||
if (PyList_CheckExact(x) || PyTuple_CheckExact(x)) { | ||||||||||
PyObject *bytes = _PyBytes_FromSequence(x); | ||||||||||
/* Py_None as a fallback sentinel to the slow path */ | ||||||||||
if (bytes != Py_None) { | ||||||||||
blhsing marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
return bytes; | ||||||||||
} | ||||||||||
} | ||||||||||
|
||||||||||
if (!PyUnicode_Check(x)) { | ||||||||||
it = PyObject_GetIter(x); | ||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we have the pyperf benchmarks on the PR as well? (namely, the nice table with two columns and the diffs as well as the benchmark script? thanks)