Skip to content

Commit dca72b7

Browse files
authored
PYTHON-3222 Fix memory leak in cbson decode_all (#927)
Add decode_all keyword arg for codec_options. Make decode_all show up in docs.
1 parent 5ccbb4d commit dca72b7

File tree

4 files changed

+76
-69
lines changed

4 files changed

+76
-69
lines changed

bson/__init__.py

Lines changed: 35 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -982,6 +982,40 @@ def decode(
982982
return _bson_to_dict(data, opts)
983983

984984

985+
def _decode_all(data: _ReadableBuffer, opts: "CodecOptions[_DocumentType]") -> List[_DocumentType]:
986+
"""Decode a BSON data to multiple documents."""
987+
data, view = get_data_and_view(data)
988+
data_len = len(data)
989+
docs: List[_DocumentType] = []
990+
position = 0
991+
end = data_len - 1
992+
use_raw = _raw_document_class(opts.document_class)
993+
try:
994+
while position < end:
995+
obj_size = _UNPACK_INT_FROM(data, position)[0]
996+
if data_len - position < obj_size:
997+
raise InvalidBSON("invalid object size")
998+
obj_end = position + obj_size - 1
999+
if data[obj_end] != 0:
1000+
raise InvalidBSON("bad eoo")
1001+
if use_raw:
1002+
docs.append(opts.document_class(data[position : obj_end + 1], opts)) # type: ignore
1003+
else:
1004+
docs.append(_elements_to_dict(data, view, position + 4, obj_end, opts))
1005+
position += obj_size
1006+
return docs
1007+
except InvalidBSON:
1008+
raise
1009+
except Exception:
1010+
# Change exception type to InvalidBSON but preserve traceback.
1011+
_, exc_value, exc_tb = sys.exc_info()
1012+
raise InvalidBSON(str(exc_value)).with_traceback(exc_tb)
1013+
1014+
1015+
if _USE_C:
1016+
_decode_all = _cbson._decode_all # noqa: F811
1017+
1018+
9851019
def decode_all(
9861020
data: _ReadableBuffer, codec_options: "Optional[CodecOptions[_DocumentType]]" = None
9871021
) -> List[_DocumentType]:
@@ -1008,41 +1042,10 @@ def decode_all(
10081042
`codec_options`.
10091043
"""
10101044
opts = codec_options or DEFAULT_CODEC_OPTIONS
1011-
data, view = get_data_and_view(data)
10121045
if not isinstance(opts, CodecOptions):
10131046
raise _CODEC_OPTIONS_TYPE_ERROR
10141047

1015-
data_len = len(data)
1016-
docs: List[_DocumentType] = []
1017-
position = 0
1018-
end = data_len - 1
1019-
use_raw = _raw_document_class(opts.document_class)
1020-
try:
1021-
while position < end:
1022-
obj_size = _UNPACK_INT_FROM(data, position)[0]
1023-
if data_len - position < obj_size:
1024-
raise InvalidBSON("invalid object size")
1025-
obj_end = position + obj_size - 1
1026-
if data[obj_end] != 0:
1027-
raise InvalidBSON("bad eoo")
1028-
if use_raw:
1029-
docs.append(
1030-
opts.document_class(data[position : obj_end + 1], codec_options) # type: ignore
1031-
)
1032-
else:
1033-
docs.append(_elements_to_dict(data, view, position + 4, obj_end, opts))
1034-
position += obj_size
1035-
return docs
1036-
except InvalidBSON:
1037-
raise
1038-
except Exception:
1039-
# Change exception type to InvalidBSON but preserve traceback.
1040-
_, exc_value, exc_tb = sys.exc_info()
1041-
raise InvalidBSON(str(exc_value)).with_traceback(exc_tb)
1042-
1043-
1044-
if _USE_C:
1045-
decode_all = _cbson.decode_all # noqa: F811
1048+
return _decode_all(data, opts) # type: ignore[arg-type]
10461049

10471050

10481051
def _decode_selective(rawdoc: Any, fields: Any, codec_options: Any) -> Mapping[Any, Any]:

bson/_cbsonmodule.c

Lines changed: 6 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ struct module_state {
5353
PyObject* BSONInt64;
5454
PyObject* Decimal128;
5555
PyObject* Mapping;
56-
PyObject* CodecOptions;
5756
};
5857

5958
#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))
@@ -344,8 +343,7 @@ static int _load_python_objects(PyObject* module) {
344343
_load_object(&state->BSONInt64, "bson.int64", "Int64") ||
345344
_load_object(&state->Decimal128, "bson.decimal128", "Decimal128") ||
346345
_load_object(&state->UUID, "uuid", "UUID") ||
347-
_load_object(&state->Mapping, "collections.abc", "Mapping") ||
348-
_load_object(&state->CodecOptions, "bson.codec_options", "CodecOptions")) {
346+
_load_object(&state->Mapping, "collections.abc", "Mapping")) {
349347
return 1;
350348
}
351349
/* Reload our REType hack too. */
@@ -498,26 +496,6 @@ int convert_codec_options(PyObject* options_obj, void* p) {
498496
return 1;
499497
}
500498

501-
/* Fill out a codec_options_t* with default options.
502-
*
503-
* Return 1 on success.
504-
* Return 0 on failure.
505-
*/
506-
int default_codec_options(struct module_state* state, codec_options_t* options) {
507-
PyObject* options_obj = NULL;
508-
PyObject* codec_options_func = _get_object(
509-
state->CodecOptions, "bson.codec_options", "CodecOptions");
510-
if (codec_options_func == NULL) {
511-
return 0;
512-
}
513-
options_obj = PyObject_CallFunctionObjArgs(codec_options_func, NULL);
514-
Py_DECREF(codec_options_func);
515-
if (options_obj == NULL) {
516-
return 0;
517-
}
518-
return convert_codec_options(options_obj, options);
519-
}
520-
521499
void destroy_codec_options(codec_options_t* options) {
522500
Py_CLEAR(options->document_class);
523501
Py_CLEAR(options->tzinfo);
@@ -2411,15 +2389,10 @@ static PyObject* _cbson_element_to_dict(PyObject* self, PyObject* args) {
24112389
PyObject* value;
24122390
PyObject* result_tuple;
24132391

2414-
if (!PyArg_ParseTuple(args, "OII|O&", &bson, &position, &max,
2392+
if (!PyArg_ParseTuple(args, "OIIO&", &bson, &position, &max,
24152393
convert_codec_options, &options)) {
24162394
return NULL;
24172395
}
2418-
if (PyTuple_GET_SIZE(args) < 4) {
2419-
if (!default_codec_options(GETSTATE(self), &options)) {
2420-
return NULL;
2421-
}
2422-
}
24232396

24242397
if (!PyBytes_Check(bson)) {
24252398
PyErr_SetString(PyExc_TypeError, "argument to _element_to_dict must be a bytes object");
@@ -2594,17 +2567,13 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
25942567
PyObject* dict;
25952568
PyObject* result = NULL;
25962569
codec_options_t options;
2597-
PyObject* options_obj;
2570+
PyObject* options_obj = NULL;
25982571
Py_buffer view = {0};
25992572

2600-
if (!PyArg_ParseTuple(args, "O|O", &bson, &options_obj)) {
2573+
if (!PyArg_ParseTuple(args, "OO", &bson, &options_obj)) {
26012574
return NULL;
26022575
}
2603-
if ((PyTuple_GET_SIZE(args) < 2) || (options_obj == Py_None)) {
2604-
if (!default_codec_options(GETSTATE(self), &options)) {
2605-
return NULL;
2606-
}
2607-
} else if (!convert_codec_options(options_obj, &options)) {
2576+
if (!convert_codec_options(options_obj, &options)) {
26082577
return NULL;
26092578
}
26102579

@@ -2698,7 +2667,7 @@ static PyMethodDef _CBSONMethods[] = {
26982667
"convert a dictionary to a string containing its BSON representation."},
26992668
{"_bson_to_dict", _cbson_bson_to_dict, METH_VARARGS,
27002669
"convert a BSON string to a SON object."},
2701-
{"decode_all", _cbson_decode_all, METH_VARARGS,
2670+
{"_decode_all", _cbson_decode_all, METH_VARARGS,
27022671
"convert binary data to a sequence of documents."},
27032672
{"_element_to_dict", _cbson_element_to_dict, METH_VARARGS,
27042673
"Decode a single key, value pair."},

doc/changelog.rst

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,32 @@
11
Changelog
22
=========
33

4+
Changes in Version 4.1.1
5+
-------------------------
6+
7+
Issues Resolved
8+
...............
9+
10+
Version 4.1.1 fixes a number of bugs:
11+
12+
- Fixed a memory leak bug when calling :func:`~bson.decode_all` without a
13+
``codec_options`` argument (`PYTHON-3222`_).
14+
- Fixed a bug where :func:`~bson.decode_all` did not accept ``codec_options``
15+
as a keyword argument (`PYTHON-3222`_).
16+
- Fixed an oversight where type markers (py.typed files) were not included
17+
in our release distributions (`PYTHON-3214`_).
18+
- Fixed a bug where pymongo would raise a "NameError: name sys is not defined"
19+
exception when attempting to parse a "mongodb+srv://" URI when the dnspython
20+
dependency was not installed (`PYTHON-3198`_).
21+
22+
See the `PyMongo 4.1.1 release notes in JIRA`_ for the list of resolved issues
23+
in this release.
24+
25+
.. _PYTHON-3198: https://jira.mongodb.org/browse/PYTHON-3198
26+
.. _PYTHON-3214: https://jira.mongodb.org/browse/PYTHON-3214
27+
.. _PYTHON-3222: https://jira.mongodb.org/browse/PYTHON-3222
28+
.. _PyMongo 4.1.1 release notes in JIRA: https://jira.mongodb.org/secure/ReleaseNote.jspa?projectId=10004&version=33290
29+
430
Changes in Version 4.1
531
----------------------
632

test/test_bson.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,6 +1006,15 @@ def test_decode_all_no_options(self):
10061006
decoded = bson.decode_all(bson.encode(doc2), None)[0]
10071007
self.assertIsInstance(decoded["id"], Binary)
10081008

1009+
def test_decode_all_kwarg(self):
1010+
doc = {"a": uuid.uuid4()}
1011+
opts = CodecOptions(uuid_representation=UuidRepresentation.STANDARD)
1012+
encoded = encode(doc, codec_options=opts)
1013+
# Positional codec_options
1014+
self.assertEqual([doc], decode_all(encoded, opts))
1015+
# Keyword codec_options
1016+
self.assertEqual([doc], decode_all(encoded, codec_options=opts))
1017+
10091018
def test_unicode_decode_error_handler(self):
10101019
enc = encode({"keystr": "foobar"})
10111020

0 commit comments

Comments
 (0)