1717)
1818from scrapingant_client .headers import convert_headers
1919from scrapingant_client .proxy_type import ProxyType
20- from scrapingant_client .response import Response
20+ from scrapingant_client .response import Response , MarkdownResponse
2121from scrapingant_client .utils import base64_encode_string
2222
2323
@@ -43,7 +43,8 @@ def _form_payload(
4343 browser : bool = True ,
4444 return_page_source : Optional [bool ] = None ,
4545 ) -> Dict :
46- request_data = {'url' : url }
46+ request_data = {
47+ 'url' : url }
4748 if cookies is not None :
4849 request_data ['cookies' ] = cookies_list_to_string (cookies )
4950 if js_snippet is not None :
@@ -60,7 +61,7 @@ def _form_payload(
6061 request_data ['return_page_source' ] = return_page_source
6162 return request_data
6263
63- def _parse_response (self , response_status_code : int , response_data : Dict , url : str , endpoint : str ) -> Response :
64+ def _check_status_code (self , response_status_code : int , response_data : Dict , url : str ) -> None :
6465 if response_status_code == 403 :
6566 raise ScrapingantInvalidTokenException ()
6667 elif response_status_code == 404 :
@@ -71,25 +72,25 @@ def _parse_response(self, response_status_code: int, response_data: Dict, url: s
7172 raise ScrapingantDetectedException ()
7273 elif response_status_code == 500 :
7374 raise ScrapingantInternalException ()
74- if endpoint is None or endpoint == 'extended' :
75- content = response_data [ 'html' ]
76- cookies_string = response_data ['cookies ' ]
77- text = response_data ['text ' ]
78- status_code = response_data ['status_code ' ]
79- cookies_list = cookies_list_from_string ( cookies_string )
80- return Response (
81- content = content ,
82- cookies = cookies_list ,
83- text = text ,
84- status_code = status_code
85- )
86- elif endpoint == 'markdown' :
87- return Response (
88- content = '' ,
89- cookies = [],
90- text = response_data ['markdown ' ],
91- status_code = 0 ,
92- )
75+
76+ def _parse_extended_response ( self , response_data : Dict ) -> Response :
77+ content = response_data ['html ' ]
78+ cookies_string = response_data ['cookies ' ]
79+ text = response_data ['text ' ]
80+ status_code = response_data [ 'status_code' ]
81+ cookies_list = cookies_list_from_string ( cookies_string )
82+ return Response (
83+ content = content ,
84+ cookies = cookies_list ,
85+ text = text ,
86+ status_code = status_code ,
87+ )
88+
89+ def _parse_markdown_response ( self , response_data : Dict ) -> MarkdownResponse :
90+ return MarkdownResponse (
91+ url = response_data ['url ' ],
92+ markdown = response_data [ 'markdown' ] ,
93+ )
9394
9495 def _get_scrapingant_api_url (self , endpoint : Optional [str ] = None ) -> str :
9596 if endpoint is None or endpoint == 'extended' :
@@ -99,7 +100,7 @@ def _get_scrapingant_api_url(self, endpoint: Optional[str] = None) -> str:
99100 else :
100101 raise ValueError (f'Invalid endpoint: { endpoint } , must be either None or "markdown"' )
101102
102- def general_request (
103+ def _request (
103104 self ,
104105 url : str ,
105106 method : str = 'GET' ,
@@ -114,7 +115,7 @@ def general_request(
114115 data = None ,
115116 json = None ,
116117 endpoint : Optional [str ] = None ,
117- ) -> Response :
118+ ) -> Dict :
118119 request_data = self ._form_payload (
119120 url = url ,
120121 cookies = cookies ,
@@ -138,10 +139,10 @@ def general_request(
138139 raise ScrapingantTimeoutException ()
139140 response_status_code = response .status_code
140141 response_data = response .json ()
141- parsed_response : Response = self ._parse_response (response_status_code , response_data , url , endpoint )
142- return parsed_response
142+ self ._check_status_code (response_status_code , response_data , url )
143+ return response_data
143144
144- async def general_request_async (
145+ async def _request_async (
145146 self ,
146147 url : str ,
147148 method : str = 'GET' ,
@@ -156,7 +157,7 @@ async def general_request_async(
156157 data = None ,
157158 json = None ,
158159 endpoint : Optional [str ] = None ,
159- ) -> Response :
160+ ) -> Dict :
160161 import httpx
161162
162163 request_data = self ._form_payload (
@@ -189,5 +190,21 @@ async def general_request_async(
189190 raise ScrapingantTimeoutException ()
190191 response_status_code = response .status_code
191192 response_data = response .json ()
192- parsed_response : Response = self ._parse_response (response_status_code , response_data , url , endpoint )
193- return parsed_response
193+ self ._check_status_code (response_status_code , response_data , url )
194+ return response_data
195+
196+ def general_request (self , * args , ** kwargs ) -> Response :
197+ response_data = self ._request (* args , ** kwargs , endpoint = 'extended' )
198+ return self ._parse_extended_response (response_data )
199+
200+ async def general_request_async (self , * args , ** kwargs ) -> Response :
201+ response_data = await self ._request_async (* args , ** kwargs , endpoint = 'extended' )
202+ return self ._parse_extended_response (response_data )
203+
204+ def markdown_request (self , * args , ** kwargs ) -> MarkdownResponse :
205+ response_data = self ._request (* args , ** kwargs , endpoint = 'markdown' )
206+ return self ._parse_markdown_response (response_data )
207+
208+ async def markdown_request_async (self , * args , ** kwargs ) -> MarkdownResponse :
209+ response_data = await self ._request_async (* args , ** kwargs , endpoint = 'markdown' )
210+ return self ._parse_markdown_response (response_data )
0 commit comments