From 19f2995d69d7c3c5d0708473916725586ac3acdf Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 13 Feb 2025 21:07:15 -0400 Subject: [PATCH 1/4] fix: don't return value from __init__ --- ietf/doc/storage_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ietf/doc/storage_backends.py b/ietf/doc/storage_backends.py index bab07cacd7..02c719c51b 100644 --- a/ietf/doc/storage_backends.py +++ b/ietf/doc/storage_backends.py @@ -22,7 +22,7 @@ class CustomS3Storage(S3Storage): def __init__(self, **settings): self.in_flight_custom_metadata: Dict[str, Dict[str, str]] = {} - return super().__init__(**settings) + super().__init__(**settings) def store_file( self, From 892117f09d98b8f12f187ca46161d7cb47454f3f Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 13 Feb 2025 21:43:09 -0400 Subject: [PATCH 2/4] feat: option to log timing of S3Storage calls --- ietf/doc/storage_backends.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/ietf/doc/storage_backends.py b/ietf/doc/storage_backends.py index 02c719c51b..96dbe0911d 100644 --- a/ietf/doc/storage_backends.py +++ b/ietf/doc/storage_backends.py @@ -24,6 +24,36 @@ def __init__(self, **settings): self.in_flight_custom_metadata: Dict[str, Dict[str, str]] = {} super().__init__(**settings) + def get_default_settings(self): + # add a default for the ietf_log_blob_timing boolean + return super().get_default_settings() | {"ietf_log_blob_timing": False} + + def _save(self, name, content): + # Only overriding this to add the option to time the operation + before = timezone.now() + result = super()._save(name, content) + if self.ietf_log_blob_timing: + dt = timezone.now() - before + log(f"S3Storage timing: _save('{name}', ...) for {self.bucket_name} took {dt.total_seconds()}") + return result + + def _open(self, name, mode="rb"): + # Only overriding this to add the option to time the operation + before = timezone.now() + result = super()._open(name, mode) + if self.ietf_log_blob_timing: + dt = timezone.now() - before + log(f"S3Storage timing: _open('{name}', ...) for {self.bucket_name} took {dt.total_seconds()}") + return result + + def delete(self, name): + # Only overriding this to add the option to time the operation + before = timezone.now() + super().delete(name) + if self.ietf_log_blob_timing: + dt = timezone.now() - before + log(f"S3Storage timing: delete('{name}') for {self.bucket_name} took {dt.total_seconds()}") + def store_file( self, kind: str, From 4be1bdffdc99609bcbdb8ffff4245334f9612c2d Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 13 Feb 2025 21:45:47 -0400 Subject: [PATCH 3/4] chore: units --- ietf/doc/storage_backends.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ietf/doc/storage_backends.py b/ietf/doc/storage_backends.py index 96dbe0911d..63381a0201 100644 --- a/ietf/doc/storage_backends.py +++ b/ietf/doc/storage_backends.py @@ -34,7 +34,7 @@ def _save(self, name, content): result = super()._save(name, content) if self.ietf_log_blob_timing: dt = timezone.now() - before - log(f"S3Storage timing: _save('{name}', ...) for {self.bucket_name} took {dt.total_seconds()}") + log(f"S3Storage timing: _save('{name}', ...) for {self.bucket_name} took {dt.total_seconds()} s") return result def _open(self, name, mode="rb"): @@ -43,7 +43,7 @@ def _open(self, name, mode="rb"): result = super()._open(name, mode) if self.ietf_log_blob_timing: dt = timezone.now() - before - log(f"S3Storage timing: _open('{name}', ...) for {self.bucket_name} took {dt.total_seconds()}") + log(f"S3Storage timing: _open('{name}', ...) for {self.bucket_name} took {dt.total_seconds()} s") return result def delete(self, name): @@ -52,7 +52,7 @@ def delete(self, name): super().delete(name) if self.ietf_log_blob_timing: dt = timezone.now() - before - log(f"S3Storage timing: delete('{name}') for {self.bucket_name} took {dt.total_seconds()}") + log(f"S3Storage timing: delete('{name}') for {self.bucket_name} took {dt.total_seconds()} s") def store_file( self, From 900097840770c7ed8787bb58cb705d7f58e26db9 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Fri, 14 Feb 2025 12:17:56 -0400 Subject: [PATCH 4/4] feat: log as JSON; refactor to share code; handle exceptions --- ietf/doc/storage_backends.py | 82 ++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 28 deletions(-) diff --git a/ietf/doc/storage_backends.py b/ietf/doc/storage_backends.py index 63381a0201..6be35fa067 100644 --- a/ietf/doc/storage_backends.py +++ b/ietf/doc/storage_backends.py @@ -1,7 +1,9 @@ # Copyright The IETF Trust 2025, All Rights Reserved import debug # pyflakes:ignore +import json +from contextlib import contextmanager from hashlib import sha384 from io import BufferedReader from storages.backends.s3 import S3Storage @@ -14,6 +16,34 @@ from ietf.utils.timezone import timezone +@contextmanager +def maybe_log_timing(enabled, op, **kwargs): + """If enabled, log elapsed time and additional data from kwargs + + Emits log even if an exception occurs + """ + before = timezone.now() + exception = None + try: + yield + except Exception as err: + exception = err + raise + finally: + if enabled: + dt = timezone.now() - before + log( + json.dumps( + { + "log": "S3Storage_timing", + "seconds": dt.total_seconds(), + "op": op, + "exception": "" if exception is None else repr(exception), + **kwargs, + } + ) + ) + # TODO-BLOBSTORE # Consider overriding save directly so that @@ -29,30 +59,26 @@ def get_default_settings(self): return super().get_default_settings() | {"ietf_log_blob_timing": False} def _save(self, name, content): - # Only overriding this to add the option to time the operation - before = timezone.now() - result = super()._save(name, content) - if self.ietf_log_blob_timing: - dt = timezone.now() - before - log(f"S3Storage timing: _save('{name}', ...) for {self.bucket_name} took {dt.total_seconds()} s") - return result + with maybe_log_timing( + self.ietf_log_blob_timing, "_save", bucket_name=self.bucket_name, name=name + ): + return super()._save(name, content) def _open(self, name, mode="rb"): - # Only overriding this to add the option to time the operation - before = timezone.now() - result = super()._open(name, mode) - if self.ietf_log_blob_timing: - dt = timezone.now() - before - log(f"S3Storage timing: _open('{name}', ...) for {self.bucket_name} took {dt.total_seconds()} s") - return result + with maybe_log_timing( + self.ietf_log_blob_timing, + "_open", + bucket_name=self.bucket_name, + name=name, + mode=mode, + ): + return super()._open(name, mode) def delete(self, name): - # Only overriding this to add the option to time the operation - before = timezone.now() - super().delete(name) - if self.ietf_log_blob_timing: - dt = timezone.now() - before - log(f"S3Storage timing: delete('{name}') for {self.bucket_name} took {dt.total_seconds()} s") + with maybe_log_timing( + self.ietf_log_blob_timing, "delete", bucket_name=self.bucket_name, name=name + ): + super().delete(name) def store_file( self, @@ -74,7 +100,7 @@ def store_file( new_name = self.save(name, file) now = timezone.now() record, created = StoredObject.objects.get_or_create( - store=kind, + store=kind, name=name, defaults=dict( sha384=self.in_flight_custom_metadata[name]["sha384"], @@ -82,14 +108,14 @@ def store_file( store_created=now, created=now, modified=now, - doc_name=doc_name, # Note that these are assumed to be invariant - doc_rev=doc_rev, # for a given name - ) + doc_name=doc_name, # Note that these are assumed to be invariant + doc_rev=doc_rev, # for a given name + ), ) if not created: - record.sha384=self.in_flight_custom_metadata[name]["sha384"] - record.len=int(self.in_flight_custom_metadata[name]["len"]) - record.modified=now + record.sha384 = self.in_flight_custom_metadata[name]["sha384"] + record.len = int(self.in_flight_custom_metadata[name]["len"]) + record.modified = now record.save() if new_name != name: complaint = f"Error encountered saving '{name}' - results stored in '{new_name}' instead." @@ -147,7 +173,7 @@ def _get_write_parameters(self, name, content=None): params["Metadata"] = {} try: content.seek(0) - except AttributeError: # TODO-BLOBSTORE + except AttributeError: # TODO-BLOBSTORE debug.say("Encountered Non-Seekable content") raise NotImplementedError("cannot handle unseekable content") content_bytes = content.read()