Skip to content

Commit 4248d43

Browse files
Fix aiohttp loader to follow Link rel=alternate for JSON-LD
Same fix as the requests loader: defer response.json() until after Link header processing, and actually fetch the alternate URL when found (previously it only updated doc['documentUrl'] without making a second request). Also fixes 2-space indentation to 4-space. Fixes #128 Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent f6f6706 commit 4248d43

File tree

1 file changed

+11
-14
lines changed

1 file changed

+11
-14
lines changed

lib/pyld/documentloader/aiohttp.py

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -82,41 +82,38 @@ async def async_loader(url, headers):
8282
async with session.get(url,
8383
headers=headers,
8484
**kwargs) as response:
85-
# Allow any content_type in trying to parse json
86-
# similar to requests library
87-
json_body = await response.json(content_type=None)
8885
content_type = response.headers.get('content-type')
8986
if not content_type:
9087
content_type = 'application/octet-stream'
9188
doc = {
9289
'contentType': content_type,
9390
'contextUrl': None,
9491
'documentUrl': response.url.human_repr(),
95-
'document': json_body
9692
}
9793
link_header = response.headers.get('link')
9894
if link_header:
9995
linked_context = parse_link_header(link_header).get(
10096
LINK_HEADER_REL)
10197
# only 1 related link header permitted
10298
if linked_context and content_type != 'application/ld+json':
103-
if isinstance(linked_context, list):
104-
raise JsonLdError(
105-
'URL could not be dereferenced, '
106-
'it has more than one '
107-
'associated HTTP Link Header.',
108-
'jsonld.LoadDocumentError',
109-
{'url': url},
110-
code='multiple context link headers')
111-
doc['contextUrl'] = linked_context['target']
99+
if isinstance(linked_context, list):
100+
raise JsonLdError(
101+
'URL could not be dereferenced, '
102+
'it has more than one '
103+
'associated HTTP Link Header.',
104+
'jsonld.LoadDocumentError',
105+
{'url': url},
106+
code='multiple context link headers')
107+
doc['contextUrl'] = linked_context['target']
112108
linked_alternate = parse_link_header(link_header).get('alternate')
113109
# if not JSON-LD, alternate may point there
114110
if (linked_alternate and
115111
linked_alternate.get('type') == 'application/ld+json' and
116112
not re.match(r'^application\/(\w*\+)?json$', content_type)):
117113
doc['contentType'] = 'application/ld+json'
118114
doc['documentUrl'] = iri_resolver.resolve(linked_alternate['target'], url)
119-
115+
return await async_loader(doc['documentUrl'], headers)
116+
doc['document'] = await response.json(content_type=None)
120117
return doc
121118
except JsonLdError as e:
122119
raise e

0 commit comments

Comments
 (0)