Skip to content

Commit

Permalink
Merge pull request #131 from pbtcameron/main
Browse files Browse the repository at this point in the history
Asian Sex Diary Support
  • Loading branch information
Darklyter authored Jan 9, 2025
2 parents 1db8292 + 4b1ef28 commit 9fb7315
Showing 1 changed file with 29 additions and 7 deletions.
36 changes: 29 additions & 7 deletions scenes/siteEuroSexDiary.py → scenes/networkGlobeTwatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@
from tpdb.BaseSceneScraper import BaseSceneScraper


class SiteEuroSexDiarySpider(BaseSceneScraper):
name = 'EuroSexDiary'
network = 'Euro Sex Diary'
parent = 'Euro Sex Diary'
site = 'Euro Sex Diary'
class networkGlobeTwatters(BaseSceneScraper):
name = 'GlobeTwatter'
network = 'Globe Twatters'
parent = 'Globe Twatters'

start_urls = [
'https://asiansexdiary.com',
'https://eurosexdiary.com',
'https://trikepatrol.com',
'https://tuktukpatrol.com',
'https://milftrip.com',
'https://helloladyboy.com',
]

selector_map = {
Expand All @@ -24,16 +28,34 @@ class SiteEuroSexDiarySpider(BaseSceneScraper):
'tags': '//div[@class="amp-category"]/span/a/text()',
'external_id': r'.*/(.*?)/$',
'trailer': '//div[contains(@class, "video-player")]/amp-video/@src',
'pagination': '/category/conquests/page/%s/'
'pagination': ['/category/conquests/page/%s/','/all-updates/page/%s/'],
'duration': '//i[contains(@class,"fa") and contains(@class,"fa-video-camera")]/ancestor::div[1]/text()',
}

pattern = re.compile(r'\b[\w\s?]*\s?\b')

def get_scenes(self, response):
scenes = response.xpath('//article/a/@href').getall()

meta = response.meta
site = response.xpath('//div[contains(@class,"amp-logo-footer")]//a/@title').get()
site = re.match(r'\b[\w\s?]*\s?\b', site).group(0).strip()
meta['site'] = site

for scene in scenes:
if re.search(self.get_selector_map('external_id'), scene):
yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene)
yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta)

def get_description(self, response):
description = super().get_description(response)
description = re.sub('<[^<]+?>', '', description).strip()
return description

def get_site(self, response):
return response.meta['site']

def get_next_page_url(self, base, page):
if "diary" in base:
return self.format_url(base, self.get_selector_map('pagination')[0] % page)
else:
return self.format_url(base, self.get_selector_map('pagination')[1] % page)

0 comments on commit 9fb7315

Please sign in to comment.