diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..c7d51a9 Binary files /dev/null and b/.DS_Store differ diff --git a/tumblr-photo-video-ripper.py b/tumblr-photo-video-ripper.py index 6ad1b86..491c11c 100644 --- a/tumblr-photo-video-ripper.py +++ b/tumblr-photo-video-ripper.py @@ -68,8 +68,23 @@ def run(self): def download(self, medium_type, post, target_folder): try: medium_url = self._handle_medium_url(medium_type, post) - if medium_url is not None: - self._download(medium_type, medium_url, target_folder) + #print("medium url is %s", medium_url) + resp_raw = requests.get(medium_url, stream=True, proxies=self.proxies, timeout=TIMEOUT) + if medium_type == "video": + self._download(medium_type, medium_url, target_folder, resp_raw) + elif medium_type == "photo": + medium_url_bak = medium_url + medium_url_dot = medium_url.split('.') + medium_url_underline = medium_url_dot[-2].split('_') + medium_url_raw = "http://data.tumblr." + for index in range(len(medium_url_underline) - 1): + medium_url_raw = medium_url_raw + medium_url_underline[index] + "_" + medium_url_raw = medium_url_raw + "raw." + medium_url_dot[-1] + if medium_url is not None: + self._download(medium_type, medium_url_raw, target_folder, resp_raw) + elif medium_url_bak is not None and resp_raw.status_code == 403: + resp= requests.get(medium_url_bak, stream=True, proxies=self.proxies, timeout=TIMEOUT) + self._download(medium_type, medium_url_bak, target_folder, resp) except TypeError: pass @@ -99,7 +114,7 @@ def _handle_medium_url(self, medium_type, post): "issues/new attached with below information:\n\n" "%s" % post) - def _download(self, medium_type, medium_url, target_folder): + def _download(self, medium_type, medium_url, target_folder, resp): medium_name = medium_url.split("/")[-1].split("?")[0] if medium_type == "video": if not medium_name.startswith("tumblr"): @@ -115,10 +130,6 @@ def _download(self, medium_type, medium_url, target_folder): retry_times = 0 while retry_times < RETRY: try: - resp = requests.get(medium_url, - stream=True, - proxies=self.proxies, - timeout=TIMEOUT) if resp.status_code == 403: retry_times = RETRY print("Access Denied when retrieve %s.\n" % medium_url)