Skip to content

Commit 5690feb

Browse files
authored
Return auth specific errors when downloading media (#73)
Fixes #72 Also updates the auth media error code docs to move the auth media specific errors up into the `Error codes` section with the rest of the possible content scanner errors.
1 parent 553517c commit 5690feb

File tree

4 files changed

+78
-5
lines changed

4 files changed

+78
-5
lines changed

docs/api.md

+3-5
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ status code of the response for each scenario:
2727
|-------------|-------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
2828
| 400 | `MCS_MALFORMED_JSON` | The request body contains malformed JSON. |
2929
| 400 | `MCS_MEDIA_FAILED_TO_DECRYPT` | The server failed to decrypt the encrypted media downloaded from the media repo. |
30+
| 401 | `M_MISSING_TOKEN` | The request is missing a required access token for authentication. |
31+
| 401 | `M_UNKNOWN_TOKEN` | The access token provided for authentication is not valid. |
32+
| 404 | `M_NOT_FOUND` | The `Authorization` header was missing when requesting authenticated media. |
3033
| 404 | `M_NOT_FOUND` | No route could be found at the given path. |
3134
| 404 | `M_NOT_FOUND` | The requested media was not present in the media repo. |
3235
| 403 | `MCS_MEDIA_NOT_CLEAN` | The server scanned the downloaded media but the antivirus script returned a non-zero exit code. |
@@ -199,8 +202,3 @@ Example authorization header:
199202
```
200203
Authorization: Bearer <access_token>
201204
```
202-
203-
If a request is made for authenticated media and the access token is invalid, the content scanner
204-
will respond with HTTP status 502, errcode `MCS_MEDIA_REQUEST_FAILED`.
205-
If a request is made for authenticated media and the `Authorization` header is missing, the content
206-
scanner will respond with HTTP status 404, errcode `M_NOT_FOUND`.

src/matrix_content_scanner/scanner/file_downloader.py

+18
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,24 @@ async def _get_file_content(
221221
except (json.decoder.JSONDecodeError, KeyError):
222222
pass
223223

224+
if code == 401:
225+
try:
226+
err = json.loads(body)
227+
if err["errcode"] == ErrCode.MISSING_TOKEN:
228+
raise ContentScannerRestError(
229+
HTTPStatus.UNAUTHORIZED,
230+
ErrCode.MISSING_TOKEN,
231+
"Access token missing from request",
232+
)
233+
if err["errcode"] == ErrCode.UNKNOWN_TOKEN:
234+
raise ContentScannerRestError(
235+
HTTPStatus.UNAUTHORIZED,
236+
ErrCode.UNKNOWN_TOKEN,
237+
"Invalid access token passed",
238+
)
239+
except (json.decoder.JSONDecodeError, KeyError):
240+
pass
241+
224242
if code == 404:
225243
raise _PathNotFoundException
226244

src/matrix_content_scanner/utils/constants.py

+9
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,15 @@ class ErrCode(str, Enum):
1212
# - No route was found with the path and method provided in the request.
1313
# - The homeserver does not have the requested piece of media.
1414
NOT_FOUND = "M_NOT_FOUND"
15+
# The access token is missing from the request.
16+
MISSING_TOKEN = "M_MISSING_TOKEN"
17+
# The provided access token is invalid.
18+
# One of the following:
19+
# - the access token was never valid.
20+
# - the access token has been logged out.
21+
# - the access token has been soft logged out.
22+
# - [Added in v1.3] the access token needs to be refreshed.
23+
UNKNOWN_TOKEN = "M_UNKNOWN_TOKEN"
1524
# The file failed the scan.
1625
NOT_CLEAN = "MCS_MEDIA_NOT_CLEAN"
1726
# The file could not be retrieved from the homeserver.

tests/scanner/test_file_downloader.py

+48
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,54 @@ async def test_download_auth_media(self) -> None:
9595
self.assertTrue(args[0].startswith("http://my-site.com/"))
9696
self.assertIn("/_matrix/client/v1/media/download/" + MEDIA_PATH, args[0])
9797

98+
async def test_download_auth_media_invalid_token(self) -> None:
99+
"""Tests that downloading an authenticated media file with an invalid access
100+
token returns the correct error code.
101+
"""
102+
self.media_status = 401
103+
self.media_body = (
104+
b'{"errcode":"M_UNKNOWN_TOKEN","error":"Invalid access token"}'
105+
)
106+
self._set_headers({"content-type": ["application/json"]})
107+
108+
# Check that we fail at downloading the file.
109+
with self.assertRaises(ContentScannerRestError) as cm:
110+
await self.downloader.download_file(
111+
MEDIA_PATH, auth_header="Bearer access_token"
112+
)
113+
114+
self.assertEqual(cm.exception.http_status, 401)
115+
self.assertEqual(cm.exception.reason, "M_UNKNOWN_TOKEN")
116+
117+
# Check that we tried downloading from the set base URL.
118+
args = self.get_mock.call_args.args
119+
self.assertTrue(args[0].startswith("http://my-site.com/"))
120+
self.assertIn("/_matrix/client/v1/media/download/" + MEDIA_PATH, args[0])
121+
122+
async def test_download_auth_media_missing_token(self) -> None:
123+
"""Tests that downloading an authenticated media file with a missing access
124+
token returns the correct error code.
125+
"""
126+
self.media_status = 401
127+
self.media_body = (
128+
b'{"errcode":"M_MISSING_TOKEN","error":"Missing access token"}'
129+
)
130+
self._set_headers({"content-type": ["application/json"]})
131+
132+
# Check that we fail at downloading the file.
133+
with self.assertRaises(ContentScannerRestError) as cm:
134+
await self.downloader.download_file(
135+
MEDIA_PATH, auth_header="Bearer access_token"
136+
)
137+
138+
self.assertEqual(cm.exception.http_status, 401)
139+
self.assertEqual(cm.exception.reason, "M_MISSING_TOKEN")
140+
141+
# Check that we tried downloading from the set base URL.
142+
args = self.get_mock.call_args.args
143+
self.assertTrue(args[0].startswith("http://my-site.com/"))
144+
self.assertIn("/_matrix/client/v1/media/download/" + MEDIA_PATH, args[0])
145+
98146
async def test_no_base_url(self) -> None:
99147
"""Tests that configuring a base homeserver URL means files are downloaded from
100148
that homeserver (rather than the one the files were uploaded to) and .well-known

0 commit comments

Comments
 (0)