From 8114f070e7fe8a9b74a20549add467d27b19b663 Mon Sep 17 00:00:00 2001 From: Vostretsov Nikita Date: Wed, 11 Apr 2018 15:22:55 +0000 Subject: [PATCH 1/6] ignore test cache --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0cb3fbb..5af20da 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ scrapy_splash.egg-info htmlcov .hypothesis .ipynb_checkpoints +.pytest_cache From f4c19789a95cf2a13e808bf060a2210bb37ca553 Mon Sep 17 00:00:00 2001 From: Vostretsov Nikita Date: Wed, 11 Apr 2018 15:45:39 +0000 Subject: [PATCH 2/6] test what exception is not raised --- tests/test_middleware.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_middleware.py b/tests/test_middleware.py index 66b79ce..7b7e5fc 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -286,6 +286,22 @@ def test_magic_response(): if c.name == 'spam': assert c.value == 'ham' + resp_data = { + 'url': "http://exmaple.com/#id42", + 'body': base64.b64encode(b'\xad').decode('ascii'), + 'headers': [ + {'name': 'Content-Type', 'value': "text/html; charset=cp1251"}, + ] + } + resp = TextResponse("http://mysplash.example.com/execute", + headers={b'Content-Type': b'application/json'}, + body=json.dumps(resp_data).encode('utf8')) + + try: + resp2 = mw.process_response(req, resp, None) + except: + assert 'process_response raised exception' is None + def test_cookies(): mw = _get_mw() From 65bd79374e9e6792bfc4bb0be2723d1401725b37 Mon Sep 17 00:00:00 2001 From: Vostretsov Nikita Date: Wed, 11 Apr 2018 15:49:53 +0000 Subject: [PATCH 3/6] do not try to decode body to text --- scrapy_splash/response.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scrapy_splash/response.py b/scrapy_splash/response.py index e5250c2..64066ef 100644 --- a/scrapy_splash/response.py +++ b/scrapy_splash/response.py @@ -176,7 +176,6 @@ def _load_from_json(self): # response.body if 'body' in self.data: self._body = base64.b64decode(self.data['body']) - self._cached_ubody = self._body.decode(self.encoding) elif 'html' in self.data: self._cached_ubody = self.data['html'] self._body = self._cached_ubody.encode(self.encoding) From fe140af81238d805c891cf679bb9d92615a17146 Mon Sep 17 00:00:00 2001 From: Vostretsov Nikita Date: Mon, 16 Apr 2018 10:06:14 +0000 Subject: [PATCH 4/6] more detailed description in Response section --- README.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 91b8855..e5155c9 100644 --- a/README.rst +++ b/README.rst @@ -328,7 +328,13 @@ SplashJsonResponse provide extra features: * response.url is set to the value of 'url' key; * response.body is set to the value of 'html' key, or to base64-decoded value of 'body' key; - * response.status is set from the value of 'http_status' key. + * response.status is set to the value of 'http_status' key. + When ``meta['splash']['http_status_from_error_code']`` is True + and ``assert(splash:go(..))`` fails with an HTTP error + response.status is also set to HTTP error code. + + Original URL, status and headers are available as ``response.real_url``, + ``response.splash_response_status`` and ``response.splash_response_headers``. When ``response.body`` is updated in SplashJsonResponse (either from 'html' or from 'body' keys) familiar ``response.css`` From ee767967d3181bb4895467c32a967e7627f8e0ae Mon Sep 17 00:00:00 2001 From: Vostretsov Nikita Date: Mon, 16 Apr 2018 10:07:16 +0000 Subject: [PATCH 5/6] single place to store details --- README.rst | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/README.rst b/README.rst index e5155c9..058486d 100644 --- a/README.rst +++ b/README.rst @@ -260,19 +260,8 @@ to set ``meta['splash']['args']`` use ``SplashRequest(..., args=myargs)``. * ``meta['splash']['magic_response']`` - when set to True and a JSON response is received from Splash, several attributes of the response - (headers, body, url, status code) are filled using data returned in JSON: - - * response.headers are filled from 'headers' keys; - * response.url is set to the value of 'url' key; - * response.body is set to the value of 'html' key, - or to base64-decoded value of 'body' key; - * response.status is set to the value of 'http_status' key. - When ``meta['splash']['http_status_from_error_code']`` is True - and ``assert(splash:go(..))`` fails with an HTTP error - response.status is also set to HTTP error code. - - Original URL, status and headers are available as ``response.real_url``, - ``response.splash_response_status`` and ``response.splash_response_headers``. + (headers, body, url, status code) are filled using data returned in JSON, + for details see Responses section This option is set to True by default if you use SplashRequest. ``render.json`` and ``execute`` endpoints may not have all the necessary From b772360abb9072042ee6eb53636d2b79a5d18814 Mon Sep 17 00:00:00 2001 From: Vostretsov Nikita Date: Mon, 16 Apr 2018 10:08:47 +0000 Subject: [PATCH 6/6] more details for encoding handling --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 058486d..895ce44 100644 --- a/README.rst +++ b/README.rst @@ -315,8 +315,8 @@ SplashJsonResponse provide extra features: * response.headers are filled from 'headers' keys; * response.url is set to the value of 'url' key; - * response.body is set to the value of 'html' key, - or to base64-decoded value of 'body' key; + * response.body is set to the value of 'html' key, utf-8 text expected, + or to base64-decoded binary value of 'body' key; * response.status is set to the value of 'http_status' key. When ``meta['splash']['http_status_from_error_code']`` is True and ``assert(splash:go(..))`` fails with an HTTP error