-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproses.py
299 lines (241 loc) · 10.7 KB
/
proses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urlunparse
import re
from domain_ganti import domain_ganti
def scrape_website(url: str) -> list:
headers = {
"User -Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"
}
results = []
page_number = 1
# First request to fetch the total number of pages
response = requests.get(url, headers=headers)
# Check for successful response
if response.status_code != 200:
print(f"Failed to fetch initial page: {response.status_code}")
return results # Return empty list instead of None
soup = BeautifulSoup(response.text, 'html.parser')
# Extract total number of pages from the pagination links
pagination = soup.find('ul', class_='pagination mt-4')
if pagination:
last_page_link = pagination.find_all('a', class_='page-link')[-1] # Get the last link
max_pages = int(last_page_link.text) # Extract the page number
else:
print("No pagination found. Exiting.")
return results
# Start scraping each page
while page_number <= max_pages:
# Create the paginated URL
paginated_url = f"{url}?p={page_number}" # Adjust based on the pagination structure
response = requests.get(paginated_url, headers=headers)
# Check for successful response
if response.status_code != 200:
print(f"Failed to fetch page {page_number}: {response.status_code}")
break
soup = BeautifulSoup(response.text, 'html.parser')
videos = soup.find_all('div', class_='col-sm col-md-6 col-lg-4')
if not videos:
print(f"No more videos found on page {page_number}. Ending scrape.")
break # Exit loop if no videos are found
# Extract video data
for video in videos:
img_src = video.find('img')['src']
a_href = video.find('a')['href']
# Add domain from the input URL to the scraped href
domain = url.split("/")[2] # Extract domain from input URL
a_href = f"https://{domain}{a_href}"
print(a_href)
title = video.find('a', class_='title_video').strong.get_text()
results.append({'img_src': img_src, 'a_href': a_href, 'title': title})
print(f"Scraped page {page_number} successfully.")
page_number += 1 # Move to the next page
return results # Return the list of results
def main():
# Read URLs from link.txt
with open('link.txt', 'r', encoding='utf-8') as file:
urls = file.readlines()
scraped_content = []
new_netloc = urlparse(domain_ganti).netloc # Mengambil netloc dari domain_ganti
for url in urls:
url = url.strip() # Remove any leading/trailing whitespace
if url:
# Replace the domain with 'poophd.cc'
parsed_url = urlparse(url)
url = urlunparse(parsed_url._replace(netloc=new_netloc)) # Replace the domain
scraped_result = scrape_website(url)
scraped_content.extend(scraped_result)
# Define the file path
file_path = "output_link.txt"
# Write only the links to the file
with open(file_path, "w", encoding="utf-8") as file:
for item in scraped_content:
file.write(f"{item['a_href']}\n")
total_links = len(scraped_content)
print(f"Total links scraped: {total_links}")
# Lanjut upload========================================================
import subprocess
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad
import base64
import httpx
try:
with open('output_link.txt', 'r') as file:
url1 = file.readlines()
# Menghilangkan baris kosong dan mengganti domain apapun
url_mentah = [url.strip().replace(url.split('/')[2], domain_ganti.split('/')[2]) for url in url1 if url.strip()]
# Menulis kembali URL yang telah diperbarui ke dalam link.txt
with open('output_link.txt', 'w') as file:
for url in url_mentah:
file.write(url + '\n')
print("Domain URLs updated successfully in link.txt.")
except FileNotFoundError:
print("The file 'link.txt' was not found. Please ensure it exists in the same directory.")
exit()
import requests
from bs4 import BeautifulSoup
import json
import re
# Define headers for the first request (to the initial URLs)
headers_initial = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'id-ID,id;q=0.9,en-US;q=0.8,en;q=0.7',
'priority': 'u=0, i',
'referer': 'https://metrolagu.cam/',
'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Linux"',
'sec-fetch-dest': 'iframe',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'cross-site',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
}
def convert_url(url):
match = re.search(r'/[de]/([^/]+)$', url)
if match:
video_id = match.group(1)
base_url = url.split('/d/')[0] if '/d/' in url else url.split('/e/')[0]
return f"{base_url}/p0?id={video_id}"
return url
def extract_download_link(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
script_tag = soup.find('script', string=lambda t: 'fetchDirectLink' in t if t else False)
if script_tag:
# Menggunakan regex untuk mencari URL dalam fungsi fetchDirectLink
match = re.search(r'https?://[^\s]+/\w+\.php\?key=[\w\d]+', script_tag.string)
if match:
print("Found download link:", match.group(0))
return match.group(0) # Mengembalikan URL yang ditemukan
return None
# if script_tag:
# download_link_start = script_tag.string.find("https://mba.dog/download_hashed.php?key=")
# if download_link_start != -1:
# download_link_end = script_tag.string.find('"', download_link_start)
# if download_link_end != -1:
# download_link = script_tag.string[download_link_start:download_link_end]
# return download_link
# return None
def extract_authorization_token(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
script_tag = soup.find('script', string=lambda t: 'fetchDirectLink' in t if t else False)
if script_tag:
# Use regex to find the authorization token
# match = re.search(r"'Authorization':\s*'Bearer\s*([^']+)'", script_tag.string)
match = re.search(r"'Authorization':\s*'([^']+)'", script_tag.string)
if match:
return match.group(1) # Return the captured token
return None
def extract_final_direct_link(download_url, authorization_token):
print(authorization_token)
headers_download = {
'accept': '*/*',
'accept-language': 'id-ID,id;q=0.9,en-US;q=0.8,en;q=0.7',
'authorization': f'{authorization_token}',
'content-type': 'application/json',
'origin': domain_ganti,
'priority': 'u=1, i',
'referer': f'{domain_ganti}/',
'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Linux"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'cross-site',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
}
try:
response = requests.get(download_url, headers=headers_download, timeout=10)
response.raise_for_status() # Memicu exception untuk status kode 4xx/5xx
json_data = response.json()
return json_data.get("direct_link")
except requests.RequestException as e:
print(f"Request error: {e}")
return None
except json.JSONDecodeError:
print(f"Invalid JSON response from {download_url}")
return None
def exmain():
try:
with open('output_link.txt', 'r') as file:
urls = [convert_url(line.strip()) for line in file if line.strip()]
except FileNotFoundError:
print("The file 'output_link.txt' was not found.")
return
except Exception as e:
print(f"Error reading 'output_link.txt': {e}")
return
if not urls:
print("No URLs found in 'output_link.txt'.")
return
total_direct_links = 0 # Counter for total direct links found
total_errors = 0 # Counter for errors or "not found" cases
with open('zfinal_hasil.txt', 'w') as output_file:
# Process each URL
for url in urls:
try:
response = requests.get(url, headers=headers_initial, timeout=10)
response.raise_for_status()
download_url = extract_download_link(response.text)
if not download_url:
print(f"No download.php link found in {url}")
total_errors += 1
continue
authorization_token = extract_authorization_token(response.text)
if not authorization_token:
print(f"No authorization token found in {url}")
total_errors += 1
continue
final_direct_link = extract_final_direct_link(download_url, authorization_token)
if final_direct_link:
print(f"{final_direct_link}\n")
output_file.write(f"{final_direct_link}\n")
total_direct_links += 1
else:
print(f"No direct link found in {download_url}")
total_errors += 1
except requests.RequestException as e:
print(f"Error fetching {url}: {e}\n")
total_errors += 1
except Exception as e:
print(f"An unexpected error occurred while processing {url}: {e}\n")
total_errors += 1
print("\n===========================================")
print(f"Total direct links found: {total_direct_links}")
print(f"Total not found or errors: {total_errors}")
# Menjalankan main.py
try:
# Run the main function
main()
exmain()
print("main.py executed successfully!")
except Exception as e:
print(f"Failed to run main.py: {e}")
exit()
# Menjalankan file remote_upload.py
try:
result = subprocess.run(['python3', 'remote_upload_folder.py'], check=True)
print("remote_upload.py telah dijalankan dengan sukses.")
except subprocess.CalledProcessError as e:
print(f"Terjadi kesalahan saat menjalankan remote_upload_folder.py: {e}")