Skip to content

Commit

Permalink
make urlcache be clean when len(urlcache)>100
Browse files Browse the repository at this point in the history
Signed-off-by: wwqgtxx <[email protected]>
  • Loading branch information
wwqgtxx committed Jun 13, 2016
1 parent 4dc1acf commit 104faac
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
19 changes: 16 additions & 3 deletions wwqLyParse/common.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
#!/usr/bin/env python3.5
# -*- coding: utf-8 -*-
# author wwqgtxx <[email protected]>
import urllib.request,io,os,sys,json,re,gzip
import urllib.request,io,os,sys,json,re,gzip,time

urlcache = {}
def getUrl(oUrl, encoding = 'utf-8' , headers = {}, data = None, method = None) :
url_json = {"oUrl":oUrl,"encoding":encoding,"headers":headers,"data":data,"method":method}
url_json = json.dumps(url_json,sort_keys=True, ensure_ascii=False)
if url_json in urlcache:
html_text = urlcache[url_json]
item = urlcache[url_json]
html_text = item["html_text"]
item["lasttimestap"] = int(time.time())
print("cache get:"+url_json)
if (len(urlcache)>100):
cleanUrlcache()
return html_text
print("normal get:"+url_json)
# url 包含中文时 parse.quote_from_bytes(oUrl.encode('utf-8'), ':/&%?=+')
Expand All @@ -26,9 +30,18 @@ def getUrl(oUrl, encoding = 'utf-8' , headers = {}, data = None, method = None)
html_text = data.decode(encoding,'ignore')
else:
html_text = blob.decode(encoding,'ignore')
urlcache[url_json] = html_text
urlcache[url_json] = {"html_text":html_text,"lasttimestap":int(time.time())}
return html_text

def cleanUrlcache():
sortedDict = sorted(urlcache.items(), key=lambda d: d[1]["lasttimestap"], reverse=True)
newDict = sortedDict[:100] # 从数组中取索引start开始到end-1的记录
urlcache = newDict
print("urlcache has been cleaned")

def debug(input):
print (((str(input))).encode('gbk', 'ignore').decode('gbk') )

class Parser(object):
filters = []
def Parse(self,url,types=None):
Expand Down
2 changes: 1 addition & 1 deletion wwqLyParse/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
version = {
'port_version' : "0.5.0",
'type' : 'parse',
'version' : '0.2.2',
'version' : '0.2.3',
'uuid' : '{C35B9DFC-559F-49E2-B80B-79B66EC77471}',
'filter' : [],
'name' : 'WWQ猎影解析插件',
Expand Down

0 comments on commit 104faac

Please sign in to comment.