Skip to content
This repository was archived by the owner on Apr 17, 2023. It is now read-only.

Commit db28a8d

Browse files
authored
Merge pull request #42 from code-yeongyu/feature/fix-only-videos
Fix `TwitterCrawler` to only extract video tweets
2 parents 0bb8481 + a1f5362 commit db28a8d

File tree

3 files changed

+15
-14
lines changed

3 files changed

+15
-14
lines changed

poetry.lock

Lines changed: 10 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ yt-dlp = "^2022.11.11"
6565
monkeytype = "^22.2.0"
6666
mypy = "^0.982"
6767
yapf = "^0.32.0"
68-
pylint = "^2.15.5"
68+
pylint = "^2.15.7"
6969
isort = "^5.10.1"
7070
pytest = "^7.2.0"
7171
pytest-cov = "^4.0.0"
@@ -77,7 +77,7 @@ types-requests = "^2.28.11"
7777
types-beautifulsoup4 = "^4.11.6"
7878
types-invoke = "^1.7.3"
7979
types-toml = "^0.10.8"
80-
pyright = "^1.1.280"
80+
pyright = "^1.1.282"
8181
colorama = "^0.4.6"
8282

8383
[tool.pylint.MASTER]

twitter_video_tools/twitter_crawler.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def get_recent_liked_tweet(self, username: str) -> str:
7272
self._goto_liked_tweets(username)
7373
return self._get_article_links_in_current_screen()[0]
7474

75-
def get_video_of_tweet(self, link: str, timeout: Optional[float] = 5000) -> list[tuple[str, str]]:
75+
def get_video_of_tweet(self, link: str, timeout: Optional[float] = 10000) -> list[tuple[str, str]]:
7676
video_links: list[str] = []
7777

7878
def _request_m3u8_capture_handler(request: Request) -> None:
@@ -101,7 +101,7 @@ def _get_article_links_in_current_screen(self) -> list[str]:
101101
links: list[str] = []
102102

103103
while True:
104-
articles = self.page.locator('article')
104+
articles = self.page.locator('article:has(video)')
105105
article_length = articles.count()
106106
try:
107107
links = [
@@ -112,5 +112,6 @@ def _get_article_links_in_current_screen(self) -> list[str]:
112112
break
113113
except Error: # if articles in the page are not reachable
114114
self.page.mouse.wheel(0, 500) # scrolling down to refresh the articles
115+
self.page.mouse.wheel(0, -500) # scrolling down to refresh the articles
115116

116117
return links

0 commit comments

Comments
 (0)