-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_lots_of_decks.py
73 lines (63 loc) · 2.01 KB
/
get_lots_of_decks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#%%
from cards import *
from pickle_helper import *
from scrape import *
from time import sleep
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
cards = make_all_cards_df()
#%%
filepaths = [
'2019-10-04-2019-10-25.html', '2019-10-25-2019-11-18.html',
'2019-11-18-202006-01.html', '2020-06-01-2020-08-03.html',
'2020-08-03-2020-09-17.html', '2020-09-17-2020-09-28.html',
'2020-09-28-2020-10-12.html', '2020-10-12-20201-04-15.html'
]
matrixs = []
deck_urls = []
for filepath in filepaths:
matrixs.append(scrape_metagame_matrix_filepath('html/'+filepath))
deck_urls += [v['link'] for v in matrixs[-1].values()]
#%%
deck_urls = []
deck_names = []
for matrix in matrixs:
deck_urls += [v['link'] for v in matrix.values()]
deck_names += list(matrix.keys())
decks = []
prices = []
bad = []
for i, url in enumerate(deck_urls):
card_names, card_counts, deck_price = scrape_deck_url(url)
deck = get_deck(cards, card_names, card_counts)
missing = missing_cards(deck, card_names)
if len(missing) != 0:
bad.append(i)
print(len(missing), url)
decks.append(deck)
prices.append(deck_price)
sleep(1)
meta_performance = []
for matrix in matrixs:
for v in matrix.values():
meta_performance.append(float(v['confidence_interval'].split(' ')[0].replace('%','')))
#%%
print(len(decks) - len(bad))
print(len(decks))
#%%
save(decks, 'datasets/all_decks.pkl')
save(prices, 'datasets/all_prices.pkl')
save(bad, 'datasets/all_bad.pkl')
save(matrixs, 'datasets/all_matrixs.pkl')
save(meta_performance, 'datasets/all_meta_performance.pkl')
save(deck_urls, 'datasets/all_deck_urls.pkl')
save(deck_names, 'datasets/all_deck_names.pkl')
#%%
decks = load('datasets/all_decks.pkl')
prices = load('datasets/all_prices.pkl')
bad = load('datasets/all_bad.pkl')
matrixs = load('datasets/all_matrixs.pkl')
meta_performance = load('datasets/all_meta_performance.pkl')
deck_urls = load('datasets/all_deck_urls.pkl')
deck_names = load('datasets/all_deck_names.pkl')