Skip to content

Commit 2371a2a

Browse files
authored
Merge pull request scrapy#2789 from starrify/add-response-follow-tag-link
[MRG+1] Added: Now supporting <link> tags in Response.follow
2 parents d1e948c + b33e0d5 commit 2371a2a

2 files changed

Lines changed: 12 additions & 5 deletions

File tree

scrapy/http/response/text.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def follow(self, url, callback=None, method='GET', headers=None, body=None,
135135
* an attribute Selector (not SelectorList) - e.g.
136136
``response.css('a::attr(href)')[0]`` or
137137
``response.xpath('//img/@src')[0]``.
138-
* a Selector for ``<a>`` element, e.g.
138+
* a Selector for ``<a>`` or ``<link>`` element, e.g.
139139
``response.css('a.my_link')[0]``.
140140
141141
See :ref:`response-follow-example` for usage examples.
@@ -165,10 +165,11 @@ def _url_from_selector(sel):
165165
return strip_html5_whitespace(sel.root)
166166
if not hasattr(sel.root, 'tag'):
167167
raise ValueError("Unsupported selector: %s" % sel)
168-
if sel.root.tag != 'a':
169-
raise ValueError("Only <a> elements are supported; got <%s>" %
168+
if sel.root.tag not in ('a', 'link'):
169+
raise ValueError("Only <a> and <link> elements are supported; got <%s>" %
170170
sel.root.tag)
171171
href = sel.root.get('href')
172172
if href is None:
173-
raise ValueError("<a> element has no href attribute: %s" % sel)
173+
raise ValueError("<%s> element has no href attribute: %s" %
174+
(sel.root.tag, sel))
174175
return strip_html5_whitespace(href)

tests/test_http_response.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,6 @@ def test_follow_whitespace_url(self):
162162
def test_follow_whitespace_link(self):
163163
self._assert_followed_url(Link('http://example.com/foo '),
164164
'http://example.com/foo%20')
165-
166165
def _assert_followed_url(self, follow_obj, target_url, response=None):
167166
if response is None:
168167
response = self._links_response()
@@ -402,6 +401,13 @@ def test_follow_selector(self):
402401
for sel, url in zip(sellist, urls):
403402
self._assert_followed_url(sel, url, response=resp)
404403

404+
# select <link> elements
405+
self._assert_followed_url(
406+
Selector(text='<link href="foo"></link>').css('link')[0],
407+
'http://example.com/foo',
408+
response=resp
409+
)
410+
405411
# href attributes should work
406412
for sellist in [resp.css('a::attr(href)'), resp.xpath('//a/@href')]:
407413
for sel, url in zip(sellist, urls):

0 commit comments

Comments
 (0)