1
+ #!/usr/bin/env python3
2
+ """
3
+ Async Step-by-Step Cookies Example
4
+
5
+ This example demonstrates how to use cookies with SmartScraper API using async/await patterns.
6
+ It shows how to set up and execute requests with custom cookies for authentication and session management.
7
+ """
8
+
9
+ import asyncio
10
+ import json
11
+ import logging
12
+ import os
13
+ import time
14
+
15
+ import httpx
16
+ from dotenv import load_dotenv
17
+
18
+ # Configure logging
19
+ logging .basicConfig (
20
+ level = logging .INFO ,
21
+ format = "%(asctime)s - %(levelname)s - %(message)s" ,
22
+ handlers = [logging .StreamHandler ()],
23
+ )
24
+ logger = logging .getLogger (__name__ )
25
+
26
+ # Load environment variables from .env file
27
+ load_dotenv ()
28
+
29
+
30
+ async def step_1_environment_setup ():
31
+ """Step 1: Set up environment and API key"""
32
+ print ("STEP 1: Environment Setup" )
33
+ print ("=" * 40 )
34
+
35
+ # Check if API key is available
36
+ api_key = os .getenv ("TEST_API_KEY" )
37
+ if not api_key :
38
+ print ("❌ Error: TEST_API_KEY environment variable not set" )
39
+ print ("Please either:" )
40
+ print (" 1. Set environment variable: export TEST_API_KEY='your-api-key-here'" )
41
+ print (" 2. Create a .env file with: TEST_API_KEY=your-api-key-here" )
42
+ return None
43
+
44
+ print ("✅ API key found in environment" )
45
+ print (f"🔑 API Key: { api_key [:8 ]} ...{ api_key [- 4 :]} " )
46
+ return api_key
47
+
48
+
49
+ async def step_2_server_connectivity_check (api_key ):
50
+ """Step 2: Check server connectivity"""
51
+ print ("\n STEP 2: Server Connectivity Check" )
52
+ print ("=" * 40 )
53
+
54
+ url = "http://localhost:8001/v1/smartscraper"
55
+
56
+ try :
57
+ async with httpx .AsyncClient (timeout = 5.0 ) as client :
58
+ # Try to access the health endpoint
59
+ health_url = url .replace ("/v1/smartscraper" , "/healthz" )
60
+ response = await client .get (health_url )
61
+
62
+ if response .status_code == 200 :
63
+ print ("✅ Server is accessible" )
64
+ print (f"🔗 Health endpoint: { health_url } " )
65
+ return True
66
+ else :
67
+ print (f"❌ Server health check failed with status { response .status_code } " )
68
+ return False
69
+ except Exception as e :
70
+ print (f"❌ Server connectivity check failed: { e } " )
71
+ print ("Please ensure the server is running:" )
72
+ print (" poetry run uvicorn app.main:app --host 0.0.0.0 --port 8001 --reload" )
73
+ return False
74
+
75
+
76
+ def step_3_define_cookies ():
77
+ """Step 3: Define cookies for authentication"""
78
+ print ("\n STEP 3: Define Cookies" )
79
+ print ("=" * 40 )
80
+
81
+ # Example cookies for a website that requires authentication
82
+ cookies = {
83
+ "session_id" : "abc123def456ghi789" ,
84
+ "user_token" : "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." ,
85
+ "remember_me" : "true" ,
86
+ "language" : "en" ,
87
+ "theme" : "dark"
88
+ }
89
+
90
+ print ("🍪 Cookies configured:" )
91
+ for key , value in cookies .items ():
92
+ if "token" in key .lower ():
93
+ # Mask sensitive tokens
94
+ masked_value = value [:20 ] + "..." if len (value ) > 20 else value
95
+ print (f" { key } : { masked_value } " )
96
+ else :
97
+ print (f" { key } : { value } " )
98
+
99
+ print (f"\n 📊 Total cookies: { len (cookies )} " )
100
+ return cookies
101
+
102
+
103
+ def step_4_define_request_parameters ():
104
+ """Step 4: Define the request parameters"""
105
+ print ("\n STEP 4: Define Request Parameters" )
106
+ print ("=" * 40 )
107
+
108
+ # Configuration parameters
109
+ website_url = "https://example.com/dashboard"
110
+ user_prompt = "Extract user profile information and account details"
111
+
112
+ print ("🌐 Website URL:" )
113
+ print (f" { website_url } " )
114
+ print ("\n 📝 User Prompt:" )
115
+ print (f" { user_prompt } " )
116
+ print ("\n 🎯 Goal: Access authenticated content using cookies" )
117
+
118
+ return {
119
+ "website_url" : website_url ,
120
+ "user_prompt" : user_prompt
121
+ }
122
+
123
+
124
+ def step_5_prepare_headers (api_key ):
125
+ """Step 5: Prepare request headers"""
126
+ print ("\n STEP 5: Prepare Request Headers" )
127
+ print ("=" * 40 )
128
+
129
+ headers = {
130
+ "SGAI-APIKEY" : api_key ,
131
+ "Content-Type" : "application/json" ,
132
+ "User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36" ,
133
+ "Accept" : "application/json" ,
134
+ "Accept-Language" : "en-US,en;q=0.9" ,
135
+ "Accept-Encoding" : "gzip, deflate, br" ,
136
+ "Connection" : "keep-alive" ,
137
+ }
138
+
139
+ print ("📋 Headers configured:" )
140
+ for key , value in headers .items ():
141
+ if key == "SGAI-APIKEY" :
142
+ print (f" { key } : { value [:10 ]} ...{ value [- 10 :]} " ) # Mask API key
143
+ else :
144
+ print (f" { key } : { value } " )
145
+
146
+ return headers
147
+
148
+
149
+ async def step_6_execute_cookies_request (headers , cookies , config ):
150
+ """Step 6: Execute the request with cookies"""
151
+ print ("\n STEP 6: Execute Request with Cookies" )
152
+ print ("=" * 40 )
153
+
154
+ url = "http://localhost:8001/v1/smartscraper"
155
+
156
+ # Request payload with cookies
157
+ payload = {
158
+ "website_url" : config ["website_url" ],
159
+ "user_prompt" : config ["user_prompt" ],
160
+ "output_schema" : {},
161
+ "cookies" : cookies ,
162
+ }
163
+
164
+ print ("🚀 Starting request with cookies..." )
165
+ print ("🍪 Using authentication cookies for access..." )
166
+
167
+ try :
168
+ # Start timing
169
+ start_time = time .time ()
170
+
171
+ # Use timeout for cookies requests
172
+ async with httpx .AsyncClient (timeout = 120.0 ) as client :
173
+ response = await client .post (url , headers = headers , json = payload )
174
+
175
+ # Calculate duration
176
+ duration = time .time () - start_time
177
+
178
+ print (f"✅ Request completed in { duration :.2f} seconds" )
179
+ print (f"📊 Response Status: { response .status_code } " )
180
+
181
+ if response .status_code == 200 :
182
+ result = response .json ()
183
+ return result , duration
184
+ else :
185
+ print (f"❌ Request failed with status { response .status_code } " )
186
+ print (f"Response: { response .text } " )
187
+ return None , duration
188
+
189
+ except httpx .TimeoutException :
190
+ duration = time .time () - start_time
191
+ print (f"❌ Request timed out after { duration :.2f} seconds (>120s timeout)" )
192
+ print ("This may indicate authentication issues or slow response." )
193
+ return None , duration
194
+
195
+ except httpx .RequestError as e :
196
+ duration = time .time () - start_time
197
+ print (f"❌ Request error after { duration :.2f} seconds: { e } " )
198
+ print ("Common causes:" )
199
+ print (" - Server is not running" )
200
+ print (" - Invalid cookies" )
201
+ print (" - Network connectivity issues" )
202
+ return None , duration
203
+
204
+ except Exception as e :
205
+ duration = time .time () - start_time
206
+ print (f"❌ Unexpected error after { duration :.2f} seconds: { e } " )
207
+ return None , duration
208
+
209
+
210
+ def step_7_process_results (result , duration ):
211
+ """Step 7: Process and display the results"""
212
+ print ("\n STEP 7: Process Results" )
213
+ print ("=" * 40 )
214
+
215
+ if result is None :
216
+ print ("❌ No results to process" )
217
+ return
218
+
219
+ print ("📋 Processing authenticated results..." )
220
+
221
+ # Display results based on type
222
+ if isinstance (result , dict ):
223
+ print ("\n 🔍 Response Structure:" )
224
+ print (json .dumps (result , indent = 2 , ensure_ascii = False ))
225
+
226
+ # Check for authentication success indicators
227
+ if "result" in result :
228
+ print (f"\n ✨ Authentication successful! Data extracted with cookies" )
229
+
230
+ elif isinstance (result , list ):
231
+ print (f"\n ✅ Authentication successful! Extracted { len (result )} items" )
232
+
233
+ # Show first few items
234
+ print ("\n 📦 Sample Results:" )
235
+ for i , item in enumerate (result [:3 ]): # Show first 3 items
236
+ print (f" { i + 1 } . { item } " )
237
+
238
+ if len (result ) > 3 :
239
+ print (f" ... and { len (result ) - 3 } more items" )
240
+
241
+ else :
242
+ print (f"\n 📋 Result: { result } " )
243
+
244
+ print (f"\n ⏱️ Total processing time: { duration :.2f} seconds" )
245
+
246
+
247
+ def step_8_show_curl_equivalent (api_key , cookies , config ):
248
+ """Step 8: Show equivalent curl command"""
249
+ print ("\n STEP 8: Equivalent curl Command" )
250
+ print ("=" * 40 )
251
+
252
+ # Convert cookies dict to curl format
253
+ cookies_str = "; " .join ([f"{ k } ={ v } " for k , v in cookies .items ()])
254
+
255
+ curl_command = f"""
256
+ curl --location 'http://localhost:8001/v1/smartscraper' \\
257
+ --header 'SGAI-APIKEY: { api_key } ' \\
258
+ --header 'Content-Type: application/json' \\
259
+ --header 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36' \\
260
+ --header 'Accept: application/json' \\
261
+ --header 'Accept-Language: en-US,en;q=0.9' \\
262
+ --header 'Accept-Encoding: gzip, deflate, br' \\
263
+ --header 'Connection: keep-alive' \\
264
+ --cookie '{ cookies_str } ' \\
265
+ --data '{{
266
+ "website_url": "{ config ['website_url' ]} ",
267
+ "user_prompt": "{ config ['user_prompt' ]} ",
268
+ "output_schema": {{}},
269
+ "cookies": { json .dumps (cookies )}
270
+ }}'
271
+ """
272
+
273
+ print ("Equivalent curl command:" )
274
+ print (curl_command )
275
+
276
+
277
+ def step_9_cookie_management_tips ():
278
+ """Step 9: Provide cookie management tips"""
279
+ print ("\n STEP 9: Cookie Management Tips" )
280
+ print ("=" * 40 )
281
+
282
+ print ("🍪 Best Practices for Cookie Management:" )
283
+ print ("1. 🔐 Store sensitive cookies securely (environment variables)" )
284
+ print ("2. ⏰ Set appropriate expiration times" )
285
+ print ("3. 🧹 Clean up expired cookies regularly" )
286
+ print ("4. 🔄 Refresh tokens before they expire" )
287
+ print ("5. 🛡️ Use HTTPS for cookie transmission" )
288
+ print ("6. 📝 Log cookie usage for debugging" )
289
+ print ("7. 🚫 Don't hardcode cookies in source code" )
290
+ print ("8. 🔍 Validate cookie format before sending" )
291
+
292
+
293
+ async def main ():
294
+ """Main function to run the async step-by-step cookies example"""
295
+ total_start_time = time .time ()
296
+ logger .info ("Starting Async Step-by-Step Cookies Example" )
297
+
298
+ print ("ScrapeGraph SDK - Async Step-by-Step Cookies Example" )
299
+ print ("=" * 60 )
300
+ print ("This example shows the complete async process of setting up and" )
301
+ print ("executing requests with cookies for authentication" )
302
+ print ("=" * 60 )
303
+
304
+ # Step 1: Environment setup
305
+ api_key = await step_1_environment_setup ()
306
+ if not api_key :
307
+ return
308
+
309
+ # Step 2: Server connectivity check
310
+ server_ok = await step_2_server_connectivity_check (api_key )
311
+ if not server_ok :
312
+ return
313
+
314
+ # Step 3: Define cookies
315
+ cookies = step_3_define_cookies ()
316
+
317
+ # Step 4: Define request parameters
318
+ config = step_4_define_request_parameters ()
319
+
320
+ # Step 5: Prepare headers
321
+ headers = step_5_prepare_headers (api_key )
322
+
323
+ # Step 6: Execute request
324
+ result , duration = await step_6_execute_cookies_request (headers , cookies , config )
325
+
326
+ # Step 7: Process results
327
+ step_7_process_results (result , duration )
328
+
329
+ # Step 8: Show curl equivalent
330
+ step_8_show_curl_equivalent (api_key , cookies , config )
331
+
332
+ # Step 9: Cookie management tips
333
+ step_9_cookie_management_tips ()
334
+
335
+ total_duration = time .time () - total_start_time
336
+ logger .info (f"Example completed! Total execution time: { total_duration :.2f} seconds" )
337
+
338
+ print ("\n " + "=" * 60 )
339
+ print ("Async step-by-step cookies example completed!" )
340
+ print (f"⏱️ Total execution time: { total_duration :.2f} seconds" )
341
+ print ("\n Key takeaways:" )
342
+ print ("1. Async/await provides better performance for I/O operations" )
343
+ print ("2. Cookies enable access to authenticated content" )
344
+ print ("3. Always validate API key and server connectivity first" )
345
+ print ("4. Secure cookie storage is crucial for production use" )
346
+ print ("5. Handle authentication errors gracefully" )
347
+ print ("6. Use equivalent curl commands for testing" )
348
+ print ("\n Next steps:" )
349
+ print ("- Implement secure cookie storage" )
350
+ print ("- Add cookie refresh logic" )
351
+ print ("- Handle authentication failures" )
352
+ print ("- Monitor cookie expiration" )
353
+ print ("- Implement retry logic for failed requests" )
354
+
355
+
356
+ if __name__ == "__main__" :
357
+ asyncio .run (main ())
0 commit comments