Skip to content

Commit b58b91a

Browse files
author
David Robertson
authored
Distinguish b/w nonexistent and unreachable media (#49)
Fixes #46. I am assuming that after media retention kicks in, the remote homeserver responds to media requests with 404 M_NOT_FOUND. If so, the content scanner should do the same. Mypy should pass once #53 lands.
1 parent 9915345 commit b58b91a

File tree

5 files changed

+81
-4
lines changed

5 files changed

+81
-4
lines changed

docs/api.md

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ status code of the response for each scenario:
2828
| 400 | `MCS_MALFORMED_JSON` | The request body contains malformed JSON. |
2929
| 400 | `MCS_MEDIA_FAILED_TO_DECRYPT` | The server failed to decrypt the encrypted media downloaded from the media repo. |
3030
| 404 | `M_NOT_FOUND` | No route could be found at the given path. |
31+
| 404 | `M_NOT_FOUND` | The requested media was not present in the media repo. |
3132
| 403 | `MCS_MEDIA_NOT_CLEAN` | The server scanned the downloaded media but the antivirus script returned a non-zero exit code. |
3233
| 403 | `MCS_BAD_DECRYPTION` | The provided `encrypted_body` could not be decrypted, or the encrypted file could not be decrypted. The client should request the public key of the server and then retry (once). |
3334
| 500 | `M_UNKNOWN` | The server experienced an unexpected error. |

src/matrix_content_scanner/scanner/file_downloader.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ async def download_file(
8989
except _PathNotFoundException:
9090
# If that still failed, raise an error.
9191
raise ContentScannerRestError(
92-
http_status=HTTPStatus.BAD_GATEWAY,
93-
reason=ErrCode.REQUEST_FAILED,
92+
http_status=HTTPStatus.NOT_FOUND,
93+
reason=ErrCode.NOT_FOUND,
9494
info="File not found",
9595
)
9696

src/matrix_content_scanner/utils/constants.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,14 @@
1717
class ErrCode(str, Enum):
1818
# An unknown error happened.
1919
UNKNOWN = "M_UNKNOWN"
20-
# No route was found with the path and method provided in the request.
20+
# One of the following:
21+
# - No route was found with the path and method provided in the request.
22+
# - The homeserver does not have the requested piece of media.
2123
NOT_FOUND = "M_NOT_FOUND"
2224
# The file failed the scan.
2325
NOT_CLEAN = "MCS_MEDIA_NOT_CLEAN"
2426
# The file could not be retrieved from the homeserver.
27+
# Does NOT cover homeserver responses with M_NOT_FOUND.
2528
REQUEST_FAILED = "MCS_MEDIA_REQUEST_FAILED"
2629
# The encrypted file could not be decrypted with the provided metadata.
2730
FAILED_TO_DECRYPT = "MCS_MEDIA_FAILED_TO_DECRYPT"

tests/scanner/test_file_downloader.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ async def _test_retry(self) -> None:
124124
with self.assertRaises(ContentScannerRestError) as cm:
125125
await self.downloader.download_file(MEDIA_PATH)
126126

127-
self.assertEqual(cm.exception.http_status, 502)
127+
self.assertEqual(cm.exception.http_status, 404)
128128
self.assertEqual(cm.exception.info, "File not found")
129129

130130
# Check that we sent out two requests: one to the v3 path and one to the r0 path.

tests/servlets/test_scan.py

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# Copyright 2023 New Vector Ltd
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
from http import HTTPStatus
15+
from unittest.mock import patch
16+
17+
from aiohttp.test_utils import AioHTTPTestCase
18+
from aiohttp.web_app import Application
19+
from multidict import CIMultiDict
20+
21+
from matrix_content_scanner.httpserver import HTTPServer
22+
from matrix_content_scanner.utils.constants import ErrCode
23+
from matrix_content_scanner.utils.errors import ContentScannerRestError
24+
from tests.testutils import get_content_scanner
25+
26+
SERVER_NAME = "test"
27+
28+
29+
class TestScanHandler(AioHTTPTestCase):
30+
def setUp(self) -> None:
31+
# Bypass well-known lookups.
32+
self.scanner = get_content_scanner(
33+
{"download": {"base_homeserver_url": "http://my-site.com"}}
34+
)
35+
36+
async def get_application(self) -> Application:
37+
return HTTPServer(self.scanner)._app
38+
39+
async def test_media_not_found_on_remote_homeserver(self) -> None:
40+
"""Missing media on the remote HS should be presented as a 404 to the client."""
41+
patch_downloader = patch.object(
42+
self.scanner.file_downloader,
43+
"_get",
44+
return_value=(HTTPStatus.NOT_FOUND, b"", CIMultiDict()),
45+
)
46+
47+
with patch_downloader:
48+
async with self.client.get(
49+
f"/_matrix/media_proxy/unstable/download/{SERVER_NAME}/media-does-not-exist"
50+
) as resp:
51+
self.assertEqual(resp.status, 404)
52+
body = await resp.json()
53+
self.assertEqual(body["reason"], "M_NOT_FOUND", body)
54+
55+
async def test_remote_homeserver_unreachable(self) -> None:
56+
"""An unreachable HS should be presented as a 502 to the client."""
57+
patch_downloader = patch.object(
58+
self.scanner.file_downloader,
59+
"_get",
60+
side_effect=ContentScannerRestError(
61+
HTTPStatus.BAD_GATEWAY,
62+
ErrCode.REQUEST_FAILED,
63+
"dodgy network timeout :(((",
64+
),
65+
)
66+
67+
with patch_downloader:
68+
async with self.client.get(
69+
f"/_matrix/media_proxy/unstable/download/{SERVER_NAME}/media-does-not-exist"
70+
) as resp:
71+
self.assertEqual(resp.status, 502)
72+
body = await resp.json()
73+
self.assertEqual(body["reason"], "MCS_MEDIA_REQUEST_FAILED", body)

0 commit comments

Comments
 (0)