-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathcheck_proxy.py
97 lines (87 loc) · 3.26 KB
/
check_proxy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python
# encoding: utf-8
import time
from simplejson import JSONDecodeError
import requests
from bs4 import BeautifulSoup
from gevent import monkey
from requests.exceptions import ProxyError
from db import delete_proxy_from_db, save_proxy_to_db
from log import logger
from proxy import Proxy_IP
from tool import fetch
monkey.patch_all()
from gevent.pool import Pool
class Check_proxy:
def __init__(self):
self.pool = Pool(20)
self.recheck = False
self.proxies = []
def _check_one_http_proxy(self, proxy):
check_anonymity_url = "http://www.xxorg.com/tools/checkproxy/"
fetch_result = fetch(check_anonymity_url, proxy)
response = fetch_result['response']
if response is None:
if self.recheck:
delete_proxy_from_db(proxy)
return
response.encoding = 'utf-8'
html = response.text
result = BeautifulSoup(html, "html5lib").find("div", id="result")
anonymities = {"透明": "transparent",
"普通匿名": "normal_anonymity",
"高匿名": "high_anonymity"
}
for anonymity in anonymities.keys():
if anonymity in str(result):
proxy.anonymity = anonymities[anonymity]
check_address_url = "http://ip-api.com/json/"
fetch_result = fetch(check_address_url, proxy)
response = fetch_result['response']
if response is None:
if self.recheck:
delete_proxy_from_db(proxy)
return
try:
proxy.country = response.json()['country']
proxy.round_trip_time = fetch_result['round_trip_time']
save_proxy_to_db(proxy)
except JSONDecodeError:
delete_proxy_from_db(proxy)
return
break
def _check_one_https_proxy(self, proxy):
testURL = "https://book.douban.com/"
fetch_result = fetch(url=testURL, proxy=proxy, proxy_type='https')
response = fetch_result['response']
if response is None:
logger.info('response is None , proxy:{}'.format(proxy))
if self.recheck:
delete_proxy_from_db(proxy)
return
response.encoding = 'utf-8'
html = response.text
if "豆瓣读书,新书速递,畅销书,书评,书单" in html:
proxy.round_trip_time = fetch_result['round_trip_time']
save_proxy_to_db(proxy)
else:
if self.recheck:
delete_proxy_from_db(proxy)
return
def _check_one_proxy(self, proxy):
if proxy.type == 'http':
self._check_one_http_proxy(proxy)
else:
self._check_one_https_proxy(proxy)
def run(self, ):
for proxy in self.proxies:
self.pool.spawn(self._check_one_proxy, proxy)
self.pool.join()
if __name__ == "__main__":
logger.info("-------Recheck Start-------")
check_proxy = Check_proxy()
check_proxy.recheck = True
proxies = Proxy_IP.select()
check_proxy.proxies.extend(proxies)
check_proxy.run()
logger.info("-------Recheck Finish-------")