-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsite_parser.py
83 lines (74 loc) · 2.65 KB
/
site_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import json
import requests
from google.protobuf.json_format import MessageToJson
from proto_structs import offers_pb2
cities = [
'moskva',
'sankt-peterburg'
]
shops = [
'dixy',
'5ka',
'vkusvill_offline',
'magnit-univer',
'perekrestok',
'amwine'
]
def parse_page(city = 'sankt-peterburg', shop = 'amwine', page_num = 1):
"""
:param city: location of the shop
:param shop: shop name
:param page_num: parsed page number
:return: {'offer': [{продукт1}, {продукт2}, ..., {продуктn}]}
"""
url = f"https://squark.edadeal.ru/web/search/offers?count=10000&locality={city}&page={page_num}&retailer={shop}&segment=food"
data = requests.get(url, allow_redirects=True)
offers = offers_pb2.Offers()
offers.ParseFromString(data.content)
products: str = MessageToJson(offers)
products = json.loads(products)
if not products:
return {'offer': []}
page_num += 1
while True:
url = f"https://squark.edadeal.ru/web/search/offers?count=10000&locality={city}&page={page_num}&retailer={shop}&segment=food"
data = requests.get(url, allow_redirects=True)
if data.status_code != 200:
break
offers = offers_pb2.Offers()
offers.ParseFromString(data.content)
upd_products: str = MessageToJson(offers)
upd_products = json.loads(upd_products)
products['offer'] += upd_products['offer']
page_num += 1
print(products.keys())
#print(products.get('offer'))
#json.dumps(products, indent=4, ensure_ascii=False)
products_copy = []
for elem in products['offer']:
if 'amount' in list(elem.keys()):
products_copy.append(elem)
products['offer'] = products_copy
products = filter_products(products=products['offer'], shop=shop)
# with open('a.json', 'wb') as fd:
# fd.write(str(products).encode())
return products
def filter_products(products, shop):
new_offer = []
for product in products:
if "amount" not in list(product.keys()):
continue
if "пакетик" in product['name']:
continue
if 'кг' in product['name']:
product['amount'] = product['amount'] / 1000
if 'мл' in product['name']:
product['amount'] = product['amount'] / 1000
product["shop"] = shop
new_offer.append(product)
return {"offer": new_offer}
def get_several_pages(city = 'sankt-peterburg', shop = 'amwine', page_amount = 2):
result = list()
for page in range(1, page_amount + 1):
result.extend(parse_page(city, shop, page).get('offer'))
return {'offer': result}