-
Notifications
You must be signed in to change notification settings - Fork 675
FOEPD-2119: Use SaveContext to write to DB in background thread #6389
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 9 commits
e263d1b
4ab57db
6b06585
e695129
55eaa13
1fd7aaf
960a858
5e824f6
9a11f94
67f65f3
65afd86
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
import os | ||
import random | ||
import string | ||
import threading | ||
import timeit | ||
import warnings | ||
|
||
|
@@ -234,6 +235,156 @@ def _save_batch(self): | |
self._reload_parents.clear() | ||
|
||
|
||
class AsyncSaveContext(SaveContext): | ||
def __init__(self, *args, executor=None, **kwargs): | ||
if executor is None: | ||
raise ValueError("executor must be specified") | ||
super().__init__(*args, **kwargs) | ||
self.executor = executor | ||
self.futures = [] | ||
|
||
self.samples_lock = threading.Lock() | ||
self.frames_lock = threading.Lock() | ||
self.batch_ids_lock = threading.Lock() | ||
self.reloading_lock = threading.Lock() | ||
|
||
def __enter__(self): | ||
super().__enter__() | ||
self.executor.__enter__() | ||
return self | ||
|
||
def __exit__(self, *args): | ||
super().__exit__(*args) | ||
|
||
error = None | ||
try: | ||
# Loop-drain self.futures so any submissions triggered by | ||
# super().__exit__() are awaited. | ||
while self.futures: | ||
futures = self.futures | ||
self.futures = [] | ||
for future in futures: | ||
try: | ||
future.result() | ||
except Exception as e: | ||
if error is None: | ||
error = e | ||
self.futures.clear() | ||
finally: | ||
self.executor.__exit__(*args) | ||
|
||
if error: | ||
raise error | ||
|
||
def save(self, sample): | ||
"""Registers the sample for saving in the next batch. | ||
|
||
Args: | ||
sample: a :class:`fiftyone.core.sample.Sample` or | ||
:class:`fiftyone.core.sample.SampleView` | ||
""" | ||
if sample._in_db and sample._dataset is not self._dataset: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is all of this just copied from SaveContext but with the locks? Feels like a lot of shared logic/similar code There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. What are your thoughts on resolving that? Put the locks in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes that seems like like the best path. Both to reduce code duplication and also because per my comments here, I see this either as (1) the way that
^reminder about this as well. More reason not to duplicate implementation of |
||
raise ValueError( | ||
"Dataset context '%s' cannot save sample from dataset '%s'" | ||
% (self._dataset.name, sample._dataset.name) | ||
) | ||
|
||
sample_ops, frame_ops = sample._save(deferred=True) | ||
updated = sample_ops or frame_ops | ||
|
||
if sample_ops: | ||
with self.samples_lock: | ||
self._sample_ops.extend(sample_ops) | ||
|
||
if frame_ops: | ||
with self.frames_lock: | ||
self._frame_ops.extend(frame_ops) | ||
|
||
if updated and self._is_generated: | ||
with self.batch_ids_lock: | ||
self._batch_ids.append(sample.id) | ||
|
||
if updated and isinstance(sample, fosa.SampleView): | ||
with self.reloading_lock: | ||
self._reload_parents.append(sample) | ||
|
||
if self._batching_strategy == "static": | ||
self._curr_batch_size += 1 | ||
if self._curr_batch_size >= self.batch_size: | ||
self._save_batch() | ||
self._curr_batch_size = 0 | ||
elif self._batching_strategy == "size": | ||
if sample_ops: | ||
self._curr_batch_size_bytes += sum( | ||
len(str(op)) for op in sample_ops | ||
) | ||
|
||
if frame_ops: | ||
self._curr_batch_size_bytes += sum( | ||
len(str(op)) for op in frame_ops | ||
) | ||
|
||
if ( | ||
self._curr_batch_size_bytes | ||
>= self.batch_size * self._encoding_ratio | ||
): | ||
self._save_batch() | ||
self._curr_batch_size_bytes = 0 | ||
elif self._batching_strategy == "latency": | ||
if timeit.default_timer() - self._last_time >= self.batch_size: | ||
self._save_batch() | ||
self._last_time = timeit.default_timer() | ||
|
||
def _do_save_batch(self): | ||
encoded_size = -1 | ||
if self._sample_ops: | ||
with self.samples_lock: | ||
sample_ops = self._sample_ops.copy() | ||
self._sample_ops.clear() | ||
res = foo.bulk_write( | ||
sample_ops, | ||
self._sample_coll, | ||
ordered=False, | ||
batcher=False, | ||
)[0] | ||
encoded_size += res.bulk_api_result.get("nBytes", 0) | ||
|
||
if self._frame_ops: | ||
with self.frames_lock: | ||
frame_ops = self._frame_ops.copy() | ||
self._frame_ops.clear() | ||
res = foo.bulk_write( | ||
frame_ops, | ||
self._frame_coll, | ||
ordered=False, | ||
batcher=False, | ||
)[0] | ||
encoded_size += res.bulk_api_result.get("nBytes", 0) | ||
|
||
coderabbitai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self._encoding_ratio = ( | ||
self._curr_batch_size_bytes / encoded_size | ||
if encoded_size > 0 and self._curr_batch_size_bytes | ||
else 1.0 | ||
) | ||
|
||
if self._batch_ids and self._is_generated: | ||
with self.batch_ids_lock: | ||
batch_ids = self._batch_ids.copy() | ||
self._batch_ids.clear() | ||
self.sample_collection._sync_source(ids=batch_ids) | ||
|
||
if self._reload_parents: | ||
with self.reloading_lock: | ||
reload_parents = self._reload_parents.copy() | ||
self._reload_parents.clear() | ||
for sample in reload_parents: | ||
sample._reload_parents() | ||
|
||
def _save_batch(self): | ||
future = self.executor.submit(self._do_save_batch) | ||
self.futures.append(future) | ||
|
||
|
||
class SampleCollection(object): | ||
"""Abstract class representing an ordered collection of | ||
:class:`fiftyone.core.sample.Sample` instances in a | ||
|
Uh oh!
There was an error while loading. Please reload this page.