Merge pull request #131 from pbtcameron/main

Asian Sex Diary Support
ThePornDatabase · Jan 9, 2025 · 9fb7315 · 9fb7315
2 parents 1db8292 + 4b1ef28
commit 9fb7315
Showing 1 changed file with 29 additions and 7 deletions.
diff --git a/scenes/siteEuroSexDiary.py → scenes/networkGlobeTwatters.py b/scenes/siteEuroSexDiary.py → scenes/networkGlobeTwatters.py
@@ -3,14 +3,18 @@
 from tpdb.BaseSceneScraper import BaseSceneScraper
 
 
-class SiteEuroSexDiarySpider(BaseSceneScraper):
-    name = 'EuroSexDiary'
-    network = 'Euro Sex Diary'
-    parent = 'Euro Sex Diary'
-    site = 'Euro Sex Diary'
+class networkGlobeTwatters(BaseSceneScraper):
+    name = 'GlobeTwatter'
+    network = 'Globe Twatters'
+    parent = 'Globe Twatters'
 
     start_urls = [
+        'https://asiansexdiary.com',
         'https://eurosexdiary.com',
+        'https://trikepatrol.com',
+        'https://tuktukpatrol.com',
+        'https://milftrip.com',
+        'https://helloladyboy.com',
     ]
 
     selector_map = {
@@ -24,16 +28,34 @@ class SiteEuroSexDiarySpider(BaseSceneScraper):
         'tags': '//div[@class="amp-category"]/span/a/text()',
         'external_id': r'.*/(.*?)/$',
         'trailer': '//div[contains(@class, "video-player")]/amp-video/@src',
-        'pagination': '/category/conquests/page/%s/'
+        'pagination': ['/category/conquests/page/%s/','/all-updates/page/%s/'],
+        'duration': '//i[contains(@class,"fa") and contains(@class,"fa-video-camera")]/ancestor::div[1]/text()',
     }
 
+    pattern = re.compile(r'\b[\w\s?]*\s?\b')
+
     def get_scenes(self, response):
         scenes = response.xpath('//article/a/@href').getall()
+
+        meta = response.meta
+        site = response.xpath('//div[contains(@class,"amp-logo-footer")]//a/@title').get()
+        site = re.match(r'\b[\w\s?]*\s?\b', site).group(0).strip()
+        meta['site'] = site
+
         for scene in scenes:
             if re.search(self.get_selector_map('external_id'), scene):
-                yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene)
+                yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta)
 
     def get_description(self, response):
         description = super().get_description(response)
         description = re.sub('<[^<]+?>', '', description).strip()
         return description
+
+    def get_site(self, response):
+        return response.meta['site']
+
+    def get_next_page_url(self, base, page):
+        if "diary" in base:
+            return self.format_url(base, self.get_selector_map('pagination')[0] % page)
+        else:
+            return self.format_url(base, self.get_selector_map('pagination')[1] % page)