Skip to content

Commit

Permalink
feat: StoredObject metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
rjsparks committed Feb 6, 2025
1 parent 2effb95 commit 78c94a6
Show file tree
Hide file tree
Showing 30 changed files with 452 additions and 136 deletions.
2 changes: 1 addition & 1 deletion dev/deploy-to-container/settings_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@

for storagename in MORE_STORAGE_NAMES:
STORAGES[storagename] = {
"BACKEND": "storages.backends.s3.S3Storage",
"BACKEND": "ietf.doc.storage_backends.CustomS3Storage",
"OPTIONS": dict(
endpoint_url="http://blobstore:9000",
access_key="minio_root",
Expand Down
2 changes: 1 addition & 1 deletion dev/diff/settings_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@

for storagename in MORE_STORAGE_NAMES:
STORAGES[storagename] = {
"BACKEND": "storages.backends.s3.S3Storage",
"BACKEND": "ietf.doc.storage_backends.CustomS3Storage",
"OPTIONS": dict(
endpoint_url="http://blobstore:9000",
access_key="minio_root",
Expand Down
2 changes: 1 addition & 1 deletion dev/tests/settings_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@

for storagename in MORE_STORAGE_NAMES:
STORAGES[storagename] = {
"BACKEND": "storages.backends.s3.S3Storage",
"BACKEND": "ietf.doc.storage_backends.CustomS3Storage",
"OPTIONS": dict(
endpoint_url="http://blobstore:9000",
access_key="minio_root",
Expand Down
2 changes: 1 addition & 1 deletion docker/configs/settings_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
# ]
for storagename in MORE_STORAGE_NAMES:
STORAGES[storagename] = {
"BACKEND": "storages.backends.s3.S3Storage",
"BACKEND": "ietf.doc.storage_backends.CustomS3Storage",
"OPTIONS": dict(
endpoint_url="http://blobstore:9000",
access_key="minio_root",
Expand Down
8 changes: 7 additions & 1 deletion ietf/doc/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
TelechatDocEvent, BallotPositionDocEvent, ReviewRequestDocEvent, InitialReviewDocEvent,
AddedMessageEvent, SubmissionDocEvent, DeletedEvent, EditedAuthorsDocEvent, DocumentURL,
ReviewAssignmentDocEvent, IanaExpertDocEvent, IRSGBallotDocEvent, DocExtResource, DocumentActionHolder,
BofreqEditorDocEvent, BofreqResponsibleDocEvent )
BofreqEditorDocEvent, BofreqResponsibleDocEvent, StoredObject )

from ietf.utils.validators import validate_external_resource_value

Expand Down Expand Up @@ -218,3 +218,9 @@ class DocExtResourceAdmin(admin.ModelAdmin):
search_fields = ['doc__name', 'value', 'display_name', 'name__slug',]
raw_id_fields = ['doc', ]
admin.site.register(DocExtResource, DocExtResourceAdmin)

class StoredObjectAdmin(admin.ModelAdmin):
list_display = ['store', 'name', 'modified', 'deleted']
list_filter = ['deleted']
search_fields = ['store', 'name', 'doc_name', 'doc_rev', 'deleted']
admin.site.register(StoredObject, StoredObjectAdmin)
2 changes: 1 addition & 1 deletion ietf/doc/expire.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def remove_ftp_copy(f):
def remove_from_active_draft_storage(file):
# Assumes the glob will never find a file with no suffix
ext = file.suffix[1:]
remove_from_storage("active-draft", f"{ext}/{file.name}")
remove_from_storage("active-draft", f"{ext}/{file.name}", warn_if_missing=False)

# Note that the object is already in the "draft" storage.
src_dir = Path(settings.INTERNET_DRAFT_PATH)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Generated by Django 4.2.18 on 2025-02-04 20:51

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("doc", "0024_remove_ad_is_watching_states"),
]

operations = [
migrations.CreateModel(
name="StoredObject",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("store", models.CharField(max_length=256)),
("name", models.CharField(max_length=1024)),
("sha384", models.CharField(max_length=96)),
("len", models.PositiveBigIntegerField()),
(
"store_created",
models.DateTimeField(
help_text="The instant the object ws first placed in the store"
),
),
(
"created",
models.DateTimeField(
help_text="Instant object became known. May not be the same as the storage's created value for the instance. It will hold ctime for objects imported from older disk storage"
),
),
(
"modified",
models.DateTimeField(
help_text="Last instant object was modified. May not be the same as the storage's modified value for the instance. It will hold mtime for objects imported from older disk storage unless they've actually been overwritten more recently"
),
),
("doc_name", models.CharField(blank=True, max_length=255, null=True)),
("doc_rev", models.CharField(blank=True, max_length=16, null=True)),
("deleted", models.DateTimeField(null=True)),
],
options={
"indexes": [
models.Index(
fields=["doc_name", "doc_rev"],
name="doc_storedo_doc_nam_d04465_idx",
)
],
},
),
migrations.AddConstraint(
model_name="storedobject",
constraint=models.UniqueConstraint(
fields=("store", "name"), name="unique_name_per_store"
),
),
]
52 changes: 51 additions & 1 deletion ietf/doc/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@
import django.db
import rfc2html

from io import BufferedReader
from pathlib import Path
from lxml import etree
from typing import Optional, TYPE_CHECKING
from typing import Optional, TYPE_CHECKING, Union
from weasyprint import HTML as wpHTML
from weasyprint.text.fonts import FontConfiguration

from django.db import models
from django.core import checks
from django.core.files.base import File
from django.core.cache import caches
from django.core.validators import URLValidator, RegexValidator
from django.urls import reverse as urlreverse
Expand All @@ -30,6 +32,11 @@
import debug # pyflakes:ignore

from ietf.group.models import Group
from ietf.doc.storage_utils import (
store_str as utils_store_str,
store_bytes as utils_store_bytes,
store_file as utils_store_file
)
from ietf.name.models import ( DocTypeName, DocTagName, StreamName, IntendedStdLevelName, StdLevelName,
DocRelationshipName, DocReminderTypeName, BallotPositionName, ReviewRequestStateName, ReviewAssignmentStateName, FormalLanguageName,
DocUrlTagName, ExtResourceName)
Expand Down Expand Up @@ -714,6 +721,21 @@ def referenced_by_rfcs_as_rfc_or_draft(self):
if self.type_id == "rfc" and self.came_from_draft():
refs_to |= self.came_from_draft().referenced_by_rfcs()
return refs_to

def store_str(
self, name: str, content: str, allow_overwrite: bool = False
) -> None:
return utils_store_str(self.type_id, name, content, allow_overwrite, self.name, self.rev)

def store_bytes(
self, name: str, content: bytes, allow_overwrite: bool = False, doc_name: Optional[str] = None, doc_rev: Optional[str] = None
) -> None:
return utils_store_bytes(self.type_id, name, content, allow_overwrite, self.name, self.rev)

def store_file(
self, name: str, file: Union[File,BufferedReader], allow_overwrite: bool = False, doc_name: Optional[str] = None, doc_rev: Optional[str] = None
) -> None:
return utils_store_file(self.type_id, name, file, allow_overwrite, self.name, self.rev)

class Meta:
abstract = True
Expand Down Expand Up @@ -1538,3 +1560,31 @@ class BofreqEditorDocEvent(DocEvent):
class BofreqResponsibleDocEvent(DocEvent):
""" Capture the responsible leadership (IAB and IESG members) for a BOF Request """
responsible = models.ManyToManyField('person.Person', blank=True)

class StoredObject(models.Model):
"""Hold metadata about objects placed in object storage"""

store = models.CharField(max_length=256)
name = models.CharField(max_length=1024, null=False, blank=False) # N.B. the 1024 limit on name comes from S3
sha384 = models.CharField(max_length=96)
len = models.PositiveBigIntegerField()
store_created = models.DateTimeField(help_text="The instant the object ws first placed in the store")
created = models.DateTimeField(
null=False,
help_text="Instant object became known. May not be the same as the storage's created value for the instance. It will hold ctime for objects imported from older disk storage"
)
modified = models.DateTimeField(
null=False,
help_text="Last instant object was modified. May not be the same as the storage's modified value for the instance. It will hold mtime for objects imported from older disk storage unless they've actually been overwritten more recently"
)
doc_name = models.CharField(max_length=255, null=True, blank=True)
doc_rev = models.CharField(max_length=16, null=True, blank=True)
deleted = models.DateTimeField(null=True)

class Meta:
constraints = [
models.UniqueConstraint(fields=['store', 'name'], name='unique_name_per_store'),
]
indexes = [
models.Index(fields=["doc_name", "doc_rev"]),
]
25 changes: 24 additions & 1 deletion ietf/doc/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
RelatedDocHistory, BallotPositionDocEvent, AddedMessageEvent, SubmissionDocEvent,
ReviewRequestDocEvent, ReviewAssignmentDocEvent, EditedAuthorsDocEvent, DocumentURL,
IanaExpertDocEvent, IRSGBallotDocEvent, DocExtResource, DocumentActionHolder,
BofreqEditorDocEvent,BofreqResponsibleDocEvent)
BofreqEditorDocEvent, BofreqResponsibleDocEvent, StoredObject)

from ietf.name.resources import BallotPositionNameResource, DocTypeNameResource
class BallotTypeResource(ModelResource):
Expand Down Expand Up @@ -842,3 +842,26 @@ class Meta:
"responsible": ALL_WITH_RELATIONS,
}
api.doc.register(BofreqResponsibleDocEventResource())


class StoredObjectResource(ModelResource):
class Meta:
queryset = StoredObject.objects.all()
serializer = api.Serializer()
cache = SimpleCache()
#resource_name = 'storedobject'
ordering = ['id', ]
filtering = {
"id": ALL,
"store": ALL,
"name": ALL,
"sha384": ALL,
"len": ALL,
"store_created": ALL,
"created": ALL,
"modified": ALL,
"doc_name": ALL,
"doc_rev": ALL,
"deleted": ALL,
}
api.doc.register(StoredObjectResource())
137 changes: 137 additions & 0 deletions ietf/doc/storage_backends.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# Copyright The IETF Trust 2025, All Rights Reserved

import debug # pyflakes:ignore

from hashlib import sha384
from io import BufferedReader
from storages.backends.s3 import S3Storage
from storages.utils import is_seekable
from typing import Dict, Optional, Union

from django.core.files.base import File

from ietf.doc.models import StoredObject
from ietf.utils.log import log
from ietf.utils.timezone import timezone


class CustomS3Storage(S3Storage):

def __init__(self, **settings):
self.in_flight_custom_metadata: Dict[str, Dict[str, str]] = {}
return super().__init__(**settings)

def store_file(
self,
kind: str,
name: str,
file: Union[File, BufferedReader],
allow_overwrite: bool = False,
doc_name: Optional[str] = None,
doc_rev: Optional[str] = None,
):
is_new = not self.exists_in_storage(kind, name)
# debug.show('f"Asked to store {name} in {kind}: is_new={is_new}, allow_overwrite={allow_overwrite}"')
if not allow_overwrite and not is_new:
log(f"Failed to save {kind}:{name} - name already exists in store")
debug.show('f"Failed to save {kind}:{name} - name already exists in store"')
debug.traceback()
raise Exception("Not ignoring overwrite attempts while testing")
else:
try:
new_name = self.save(name, file)
now = timezone.now()
existing_record = StoredObject.objects.filter(store=kind, name=name)
if existing_record.exists():
# Note this is updating a queryset which is guaranteed by constraints to have one object
existing_record.update(
sha384=self.in_flight_custom_metadata[name]["sha384"],
len=int(self.in_flight_custom_metadata[name]["len"]),
modified=now,
)
else:
StoredObject.objects.create(
store=kind,
name=name,
sha384=self.in_flight_custom_metadata[name]["sha384"],
len=int(self.in_flight_custom_metadata[name]["len"]),
store_created=now,
created=now,
modified=now,
doc_name=doc_name,
doc_rev=doc_rev,
)
if new_name != name:
complaint = f"Error encountered saving '{name}' - results stored in '{new_name}' instead."
log(complaint)
debug.show("complaint")
# Note that we are otherwise ignoring this condition - it should become an error later.
except Exception as e:
# Log and then swallow the exception while we're learning.
# Don't let failure pass so quietly when these are the autoritative bits.
log(f"Failed to save {kind}:{name}", e)
raise e
debug.show("type(e)")
debug.show("e")
debug.traceback()
finally:
del self.in_flight_custom_metadata[name]
return None

def exists_in_storage(self, kind: str, name: str) -> bool:
try:
# open is realized with a HEAD
# See https://github.com/jschneier/django-storages/blob/b79ea310201e7afd659fe47e2882fe59aae5b517/storages/backends/s3.py#L528
with self.open(name):
return True
except FileNotFoundError:
return False

def remove_from_storage(
self, kind: str, name: str, warn_if_missing: bool = True
) -> None:
now = timezone.now()
try:
with self.open(name):
pass
self.delete(name)
# debug.show('f"deleted {name} from {kind} storage"')
except FileNotFoundError:
if warn_if_missing:
complaint = (
f"WARNING: Asked to delete non-existant {name} from {kind} storage"
)
log(complaint)
debug.show("complaint")
existing_record = StoredObject.objects.filter(store=kind, name=name)
if not existing_record.exists() and warn_if_missing:
complaint = f"WARNING: Asked to delete {name} from {kind} storage, but there was no matching StorageObject"
log(complaint)
debug.show("complaint")
else:
# Note that existing_record is a queryset that will have one matching object
existing_record.update(deleted=now)

def _get_write_parameters(self, name, content=None):
# debug.show('f"getting write parameters for {name}"')
params = super()._get_write_parameters(name, content)
if "Metadata" not in params:
params["Metadata"] = {}
if not is_seekable(content):
# TODO-BLOBSTORE
debug.say("Encountered Non-Seekable content")
raise NotImplementedError("cannot handle unseekable content")
content.seek(0)
content_bytes = content.read()
if not isinstance(
content_bytes, bytes
): # TODO-BLOBSTORE: This is sketch-development only -remove before committing
raise Exception(f"Expected bytes - got {type(content_bytes)}")
content.seek(0)
metadata = {
"len": f"{len(content_bytes)}",
"sha384": f"{sha384(content_bytes).hexdigest()}",
}
params["Metadata"].update(metadata)
self.in_flight_custom_metadata[name] = metadata
return params
Loading

0 comments on commit 78c94a6

Please sign in to comment.