Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [0.7] - unreleased

- Add gracful handling of image and document download errors

### Added

- Test against Wagtail 6.2 ([#35](https://github.com/torchbox/wagtail-bynder/pull/36)) @nickmoreton
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ requires-python = ">=3.11"
dependencies = [
"Django>=4.2",
"Wagtail>=5.2",
"bynder-sdk>=1.1.5,<2.0"
"bynder-sdk>=1.1.5,<2.1"
]

[project.optional-dependencies]
Expand Down
26 changes: 26 additions & 0 deletions src/wagtail_bynder/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,29 @@ class BynderAssetFileTooLarge(Exception):
Raised when an asset file being downloaded from Bynder is found to be
larger than specified in ``settings.BYNDER_MAX_DOWNLOAD_FILE_SIZE``
"""


class BynderAssetDownloadError(Exception):
"""Raised when an HTTP error or network issue occurs fetching an asset from Bynder."""

def __init__(self, url: str, status_code: int | None = None, message: str = ""):
parts = [f"Failed to download asset from '{url}'"]
if status_code is not None:
parts.append(f"status={status_code}")
if message:
parts.append(message)
super().__init__("; ".join(parts))
self.url = url
self.status_code = status_code
self.message = message


class BynderInvalidImageContentError(Exception):
"""Raised when the downloaded content for an image does not appear to be a valid image file."""

def __init__(self, url: str, reason: str):
super().__init__(
f"Downloaded content from '{url}' is not a valid image: {reason}"
)
self.url = url
self.reason = reason
64 changes: 39 additions & 25 deletions src/wagtail_bynder/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@

from wagtail_bynder import utils

from .exceptions import BynderAssetDataError
from .exceptions import (
BynderAssetDataError,
BynderAssetDownloadError,
BynderAssetFileTooLarge,
BynderInvalidImageContentError,
)


logger = logging.getLogger("wagtail.images")
Expand Down Expand Up @@ -174,17 +179,27 @@ def asset_file_has_changed(self, asset_data: dict[str, Any]) -> bool:

def update_file(self, asset_data: dict[str, Any]) -> None:
source_url = self.extract_file_source(asset_data)
file = self.download_file(source_url)
processed_file = self.process_downloaded_file(file, asset_data)

self.file = processed_file if processed_file is not None else file

# Used to trigger additional updates on save()
self._file_changed = True

# Update supplementary field values
self.source_filename = utils.filename_from_url(source_url)
self.original_filesize = int(asset_data["fileSize"])
try:
file = self.download_file(source_url)
processed_file = self.process_downloaded_file(file, asset_data)
except (
BynderAssetDownloadError,
BynderInvalidImageContentError,
BynderAssetFileTooLarge,
) as e:
logger.warning(
"Bynder asset download skipped for asset %s: %s",
asset_data.get("id"),
e,
)
if hasattr(self, "_file_changed"):
delattr(self, "_file_changed")
return None
else:
self.file = processed_file if processed_file is not None else file
self._file_changed = True
self.source_filename = utils.filename_from_url(source_url)
self.original_filesize = int(asset_data["fileSize"])

def download_file(self, source_url: str) -> UploadedFile:
raise NotImplementedError
Expand Down Expand Up @@ -296,19 +311,18 @@ def process_downloaded_file(
"""

# Write to filesystem to avoid using memory for the same image
tmp = NamedTemporaryFile(mode="w+b", dir=settings.FILE_UPLOAD_TEMP_DIR)
details = self.convert_downloaded_image(file, tmp)

# The original file is now redundant and can be deleted, making
# more memory available
del file.file

# Load the converted image into memory to speed up the additional
# reads and writes performed by Wagtail
new_file = io.BytesIO()
tmp.seek(0)
with open(tmp.name, "rb") as source:
for line in source:
with NamedTemporaryFile(mode="w+b", dir=settings.FILE_UPLOAD_TEMP_DIR) as tmp:
details = self.convert_downloaded_image(file, tmp)

# The original file is now redundant and can be deleted, making
# more memory available
del file.file

# Load the converted image into memory to speed up the additional
# reads and writes performed by Wagtail
new_file = io.BytesIO()
tmp.seek(0)
for line in tmp:
new_file.write(line)

name_minus_extension, _ = os.path.splitext(file.name)
Expand Down
68 changes: 61 additions & 7 deletions src/wagtail_bynder/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import mimetypes
import os
import re

from contextlib import suppress
from io import BytesIO

import requests
Expand All @@ -14,21 +16,46 @@
from wagtail.models import Collection
from willow import Image

from .exceptions import BynderAssetFileTooLarge
from .exceptions import (
BynderAssetDownloadError,
BynderAssetFileTooLarge,
BynderInvalidImageContentError,
)


_DEFAULT_COLLECTION = Local()


def download_file(
url: str, max_filesize: int, max_filesize_setting_name: str
url: str,
max_filesize: int,
max_filesize_setting_name: str,
*,
expect_image: bool = False,
) -> InMemoryUploadedFile:
name = os.path.basename(url)

# Stream file to memory
try:
response = requests.get(url, timeout=20, stream=True)
except Exception as e:
raise BynderAssetDownloadError(url, message=str(e)) from e

if response.status_code != 200:
# Consume (small) text body for context if available
message = ""
with suppress(Exception):
# Only read a small slice to avoid memory blow-up
message = response.text[:500]
raise BynderAssetDownloadError(
url, status_code=response.status_code, message=message
)

# Stream body to memory enforcing size limit
file = BytesIO()
for line in requests.get(url, timeout=20, stream=True):
file.write(line)
for chunk in response.iter_content(chunk_size=8192):
if not chunk:
continue
file.write(chunk)
if file.tell() > max_filesize:
file.truncate(0)
raise BynderAssetFileTooLarge(
Expand All @@ -38,7 +65,32 @@ def download_file(
size = file.tell()
file.seek(0)

content_type, charset = mimetypes.guess_type(name)
# If we expected an image but got probable HTML / JSON error page, detect early
if expect_image:
sample = file.read(4096)
file.seek(0)
# Heuristics: starts with <!DOCTYPE html or <html or JSON body or contains typical gateway phrases
lowered = sample.lower()
if (
lowered.startswith((b"<!doctype html", b"<html", b"<?xml"))
or b"<title>502" in lowered
or b"bad gateway" in lowered
or re.search(rb"<h1>.*(error|gateway).*</h1>", lowered)
):
raise BynderInvalidImageContentError(
url, "received HTML error page instead of image bytes"
)

content_type = response.headers.get("Content-Type")
if not content_type:
guessed, charset = mimetypes.guess_type(name)
content_type = guessed
else:
# Strip charset etc
if ";" in content_type:
content_type = content_type.split(";")[0].strip()
charset = None

return InMemoryUploadedFile(
file,
field_name="file",
Expand All @@ -58,7 +110,9 @@ def download_document(url: str) -> InMemoryUploadedFile:
def download_image(url: str) -> InMemoryUploadedFile:
max_filesize_setting_name = "BYNDER_MAX_IMAGE_FILE_SIZE"
max_filesize = getattr(settings, max_filesize_setting_name, 5242880)
return download_file(url, max_filesize, max_filesize_setting_name)
return download_file(
url, max_filesize, max_filesize_setting_name, expect_image=True
)


def get_image_info(file: File) -> tuple[int, int, str, bool]:
Expand Down
82 changes: 81 additions & 1 deletion tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@
from wagtail.images import get_image_model

from wagtail_bynder import get_video_model
from wagtail_bynder.exceptions import BynderAssetDataError
from wagtail_bynder.exceptions import (
BynderAssetDataError,
BynderAssetDownloadError,
BynderAssetFileTooLarge,
BynderInvalidImageContentError,
)
from wagtail_bynder.utils import filename_from_url

from .utils import (
Expand Down Expand Up @@ -118,6 +123,29 @@ def test_update_from_asset_data(self):
self.assertEqual(self.obj.is_limited_use, self.asset_data["limited"] == 1)
self.assertEqual(self.obj.is_public, self.asset_data["isPublic"] == 1)

def test_update_file_download_error_graceful(self):
self.obj.source_filename = None
self.obj.original_filesize = None
# Simulate download error
with mock.patch.object(
self.obj,
"download_file",
side_effect=BynderAssetDownloadError("http://example.com/bad-doc", 502),
):
self.obj.update_file(self.asset_data)
self.assertFalse(hasattr(self.obj, "_file_changed"))
self.assertFalse(self.obj.file)

def test_update_file_too_large_graceful(self):
self.obj.source_filename = None
self.obj.original_filesize = None
with mock.patch.object(
self.obj, "download_file", side_effect=BynderAssetFileTooLarge("Too big")
):
self.obj.update_file(self.asset_data)
self.assertFalse(hasattr(self.obj, "_file_changed"))
self.assertFalse(self.obj.file)


class BynderSyncedImageTests(SimpleTestCase):
def setUp(self):
Expand Down Expand Up @@ -220,6 +248,58 @@ def test_update_file(self):
self.assertEqual(self.obj.original_height, self.asset_data["height"])
self.assertEqual(self.obj.original_width, self.asset_data["width"])

def test_update_file_download_error_graceful(self):
self.obj.source_filename = None
self.obj.original_filesize = None
self.obj.original_height = None
self.obj.original_width = None

# Simulate a download error
with mock.patch.object(
self.obj,
"download_file",
side_effect=BynderAssetDownloadError("http://example.com/bad", 502),
):
# Should not raise
self.obj.update_file(self.asset_data)

# _file_changed should NOT be set
self.assertFalse(hasattr(self.obj, "_file_changed"))
# file should remain unset
self.assertFalse(self.obj.file)

def test_update_file_invalid_content_graceful(self):
self.obj.source_filename = None
self.obj.original_filesize = None
self.obj.original_height = None
self.obj.original_width = None

with mock.patch.object(
self.obj,
"download_file",
side_effect=BynderInvalidImageContentError(
"http://example.com/err", "html error"
),
):
self.obj.update_file(self.asset_data)

self.assertFalse(hasattr(self.obj, "_file_changed"))
self.assertFalse(self.obj.file)

def test_update_file_too_large_graceful(self):
self.obj.source_filename = None
self.obj.original_filesize = None
self.obj.original_height = None
self.obj.original_width = None

with mock.patch.object(
self.obj, "download_file", side_effect=BynderAssetFileTooLarge("Too big")
):
self.obj.update_file(self.asset_data)

self.assertFalse(hasattr(self.obj, "_file_changed"))
self.assertFalse(self.obj.file)

def test_update_from_asset_data(self):
self.obj.title = None
self.obj.copyright = None
Expand Down