name contexts differently

juniorguru · Apr 17, 2024 · 98e22fa · 98e22fa
1 parent 2d23156
commit 98e22fa
Showing 2 changed files with 6 additions and 9 deletions.
diff --git a/jg/plucker/jobs_linkedin/spider.py b/jg/plucker/jobs_linkedin/spider.py
@@ -167,18 +167,12 @@ def verify_job(
     def _retry(self, url: str, request: Request | None = None) -> Request:
         if not request:
             raise ValueError(f"Request object is required to retry {url}")
-        meta = request.meta | dict(playwright=True)
         self.logger.warning(f"Retrying {url} using browser")
-
-        # TODO proxy support
-        # see https://docs.apify.com/sdk/python/docs/concepts/proxy-management#configuring-proxy-based-on-actor-input
-        # see https://github.com/scrapy-plugins/scrapy-playwright?tab=readme-ov-file#proxy-support
-        # see https://docs.scrapy.org/en/latest/topics/spiders.html#scrapy.Spider.update_settings
         return request.replace(
             url=url,
             dont_filter=True,
             headers=self.request_headers,
-            meta=meta,
+            meta=request.meta | dict(playwright=True),
         )
 
     def _request(

diff --git a/jg/plucker/scrapers.py b/jg/plucker/scrapers.py
@@ -1,3 +1,4 @@
+import hashlib
 from pathlib import Path
 from typing import Any, Generator, Self, Type
 from urllib.parse import urlparse
@@ -141,10 +142,12 @@ async def process_request(self, request: Request, spider: Spider):
                 )
 
             proxy = url.geturl()
-            Actor.log.info(f"Creating a new Playwright context with proxy {proxy}")
+            proxy_hash = hashlib.sha1(proxy.encode()).hexdigest()[0:8]
+            context_name = f"proxy_{proxy_hash}"
+            Actor.log.info(f"Using Playwright context {context_name}")
             request.meta.update(
                 {
-                    "playwright_context": f"proxy_{urlparse(request.url).hostname}",
+                    "playwright_context": f"proxy_{context_name}",
                     "playwright_context_kwargs": {
                         "proxy": {
                             "server": proxy,