Merge pull request #42 from code-yeongyu/feature/fix-only-videos

code-yeongyu · web-flow · commit db28a8d6b1c4 · 2022-12-04T16:53:24.000+09:00
Fix `TwitterCrawler` to only extract video tweets
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -65,7 +65,7 @@ yt-dlp = "^2022.11.11"
 monkeytype = "^22.2.0"
 mypy = "^0.982"
 yapf = "^0.32.0"
-pylint = "^2.15.5"
+pylint = "^2.15.7"
 isort = "^5.10.1"
 pytest = "^7.2.0"
 pytest-cov = "^4.0.0"
@@ -77,7 +77,7 @@ types-requests = "^2.28.11"
 types-beautifulsoup4 = "^4.11.6"
 types-invoke = "^1.7.3"
 types-toml = "^0.10.8"
-pyright = "^1.1.280"
+pyright = "^1.1.282"
 colorama = "^0.4.6"
 
 [tool.pylint.MASTER]
diff --git a/twitter_video_tools/twitter_crawler.py b/twitter_video_tools/twitter_crawler.py
@@ -72,7 +72,7 @@ def get_recent_liked_tweet(self, username: str) -> str:
         self._goto_liked_tweets(username)
         return self._get_article_links_in_current_screen()[0]
 
-    def get_video_of_tweet(self, link: str, timeout: Optional[float] = 5000) -> list[tuple[str, str]]:
+    def get_video_of_tweet(self, link: str, timeout: Optional[float] = 10000) -> list[tuple[str, str]]:
         video_links: list[str] = []
 
         def _request_m3u8_capture_handler(request: Request) -> None:
@@ -101,7 +101,7 @@ def _get_article_links_in_current_screen(self) -> list[str]:
         links: list[str] = []
 
         while True:
-            articles = self.page.locator('article')
+            articles = self.page.locator('article:has(video)')
             article_length = articles.count()
             try:
                 links = [
@@ -112,5 +112,6 @@ def _get_article_links_in_current_screen(self) -> list[str]:
                 break
             except Error:    # if articles in the page are not reachable
                 self.page.mouse.wheel(0, 500)    #  scrolling down to refresh the articles
+                self.page.mouse.wheel(0, -500)    #  scrolling down to refresh the articles
 
         return links