-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathUserScrape.py
57 lines (48 loc) · 2.04 KB
/
UserScrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import sys
import requests
from bs4 import BeautifulSoup
from tqdm.auto import trange
import CollectionDL
import WorkshopDL
def UGCClassFinder(tag):
"""BeautifulSoup filter that finds HTML tags with a data-publishedfileid attribute"""
return tag.has_attr("data-publishedfileid")
def scrape(SteamID64):
"""Batch download the Steam Workshop submissions of a user with the specific SteamID64"""
# URL Setup
BaseURL = "https://steamcommunity.com/profiles/"
Endpoint = "/myworkshopfiles/?p="
EndpointCollections = "/myworkshopfiles/?section=collections&p="
SubmissionIDs = []
CollectionIDs = []
# Get HTML
for Page in trange(1668, desc="Retrieving Item Pages"):
EndpointRaw = requests.get(url=BaseURL + SteamID64 + Endpoint + str(Page + 1))
EndpointHTML = EndpointRaw.text
# Parse HTML
HTMLParse = BeautifulSoup(EndpointHTML, "html.parser")
# find Submissions
WorkshopSubmissions = HTMLParse.find_all(UGCClassFinder)
for Submission in WorkshopSubmissions:
SubmissionIDs.append(Submission.get("data-publishedfileid"))
if len(WorkshopSubmissions) == 0:
break
for SubmissionID in SubmissionIDs:
WorkshopDL.download(SubmissionID)
for Page2 in trange(1668, desc="Retrieving Collection Pages"):
EndpointCollectionsRaw = requests.get(
url=BaseURL + SteamID64 + EndpointCollections + str(Page2 + 1)
)
EndpointCollectionsHTML = EndpointCollectionsRaw.text
# Parse HTML
HTMLParse = BeautifulSoup(EndpointCollectionsHTML, "html.parser")
# find Submissions
WorkshopCollections = HTMLParse.find_all(UGCClassFinder)
for Collection in WorkshopCollections:
CollectionIDs.append(Collection.get("data-publishedfileid"))
if len(WorkshopCollections) == 0:
break
for CollectionID in CollectionIDs:
CollectionDL.download(CollectionID)
if __name__ == "__main__":
scrape(sys.argv[1])