many bug fix

Signed-off-by: wwqgtxx <[email protected]>
wwqgtxx · Jun 15, 2016 · 201fcac · 201fcac
1 parent 104faac
commit 201fcac
Show file tree

Hide file tree

Showing 6 changed files with 51 additions and 33 deletions.
diff --git a/wwqLyParse/common.py b/wwqLyParse/common.py
@@ -4,15 +4,19 @@
 import urllib.request,io,os,sys,json,re,gzip,time
 
 urlcache = {}
+URLCACHE_MAX = 1000
 def getUrl(oUrl, encoding = 'utf-8' , headers = {}, data = None, method = None) :
+    global urlcache
+    global URLCACHE_MAX
+    urlcache_temp =  urlcache
     url_json = {"oUrl":oUrl,"encoding":encoding,"headers":headers,"data":data,"method":method}
     url_json = json.dumps(url_json,sort_keys=True, ensure_ascii=False)
-    if url_json in urlcache:
-        item = urlcache[url_json]
+    if url_json in urlcache_temp:
+        item = urlcache_temp[url_json]
         html_text = item["html_text"]
         item["lasttimestap"] = int(time.time())
         print("cache get:"+url_json)
-        if (len(urlcache)>100):
+        if (len(urlcache_temp)>URLCACHE_MAX):
             cleanUrlcache()
         return html_text
     print("normal get:"+url_json)
@@ -34,8 +38,12 @@ def getUrl(oUrl, encoding = 'utf-8' , headers = {}, data = None, method = None)
         return html_text
 
 def cleanUrlcache():
+    global urlcache
+    global URLCACHE_MAX
     sortedDict = sorted(urlcache.items(), key=lambda d: d[1]["lasttimestap"], reverse=True)
-    newDict = sortedDict[:100] # 从数组中取索引start开始到end-1的记录
+    newDict = {}
+    for (k, v) in sortedDict[:int(URLCACHE_MAX - URLCACHE_MAX/10)]:# 从数组中取索引start开始到end-1的记录
+        newDict[k] = v
     urlcache = newDict
     print("urlcache has been cleaned")
 

diff --git a/wwqLyParse/main.py b/wwqLyParse/main.py
@@ -37,7 +37,7 @@
 version = {
     'port_version' : "0.5.0", 
     'type' : 'parse', 
-    'version' : '0.2.3', 
+    'version' : '0.2.4', 
     'uuid' : '{C35B9DFC-559F-49E2-B80B-79B66EC77471}',
     'filter' : [],
     'name' : 'WWQ猎影解析插件', 

diff --git a/wwqLyParse/parsers/anypageparser.py b/wwqLyParse/parsers/anypageparser.py
@@ -23,6 +23,7 @@ class AnyPageParser(common.Parser):
     TWICE_PARSE_TIMEOUT = 5    
 
     def Parse(self,input_text,types=None):
+        global TWICE_PARSE_TIMEOUT
         if (types is not None) and ("collection" not in types):
             return
         html = PyQuery(common.getUrl(input_text))
@@ -52,7 +53,7 @@ def Parse(self,input_text,types=None):
                 url = 'direct:'+url
             if not re.match('(^(http|https)://.+\.(shtml|html|mp4|mkv|ts|avi))|(^(http|https)://.+/video/)',url):
                 continue
-            if re.search('(list|mall|about|help|shop|map|vip|faq|support|download|copyright|contract|product|tencent|upload|common|index.html|v.qq.com/u/|open.baidu.com|www.iqiyi.com/lib/s_|www.iqiyi.com/dv/|top.iqiyi.com)',url):
+            if re.search('[^\?](list|mall|about|help|shop|map|vip|faq|support|download|copyright|contract|product|tencent|upload|common|index.html|v.qq.com/u/|open.baidu.com|www.iqiyi.com/lib/s_|www.iqiyi.com/dv/|top.iqiyi.com)',url):
                 continue
             if re.search('(下载|播 放|播放|投诉|评论|(\d{1,2}:\d{1,2}))',no):
                 continue
@@ -114,7 +115,7 @@ def runlist_parser(queue,parser,url):
             for parser_thread in parser_threads:
                 parser_thread.start()
             for parser_thread in parser_threads:
-                parser_thread.join(TWICE_PARSE_TIMEOUT)
+                parser_thread.join(self.TWICE_PARSE_TIMEOUT)
             while not q_results.empty():
                 t_results.append(q_results.get())
 
@@ -129,6 +130,13 @@ def runlist_parser(queue,parser,url):
                 if ddata["url"] not in parse_urls:
                     #print(ddata["url"])
                     data["data"].append(ddata)
+        oldddata = data["data"]
+        data["data"] = []
+        parsed_urls = []
+        for ddata in oldddata:
+            if ddata["url"] not in parsed_urls:
+                data["data"].append(ddata)
+                parsed_urls.append(ddata["url"])
         data["total"] = len(data["data"])
         data["caption"] = "全页地址列表"
         return data

diff --git a/wwqLyParse/parsers/listparser.py b/wwqLyParse/parsers/listparser.py
@@ -241,7 +241,7 @@ def run(queue,get_list_info,html_text):
             except Exception as e:
                 #import traceback  
                 #traceback.print_exc()  
-                print(e)
+                print(str(get_list_info)+str(e))
         html_text = common.getUrl(input_text)
         html = PyQuery(html_text)
         title = html('h1.main_title').children('a').text()
@@ -276,7 +276,7 @@ def run(queue,get_list_info,html_text):
             except Exception as e:
                 #import traceback  
                 #traceback.print_exc()  
-                print(e)
+                print(str(get_list_info_html)+e)
 
         data["total"] = len(data["data"])
 

diff --git a/wwqLyParse/parsers/lyppvparser.py b/wwqLyParse/parsers/lyppvparser.py
@@ -35,27 +35,28 @@ def Parse(self,url,types=None):
             return []
         print("call lyp_pv.run.Parse("+url+")")
         out = run.Parse(url)
-        for data in out['data']:
-            data['label'] = re.compile('\(\d\)\s*').sub('',str(data['label']))
-            parts = data['label'].split('_')
-            num = int(parts[0])
-            if num == -3:
-                parts[0] = "0"
-            elif num == -1:
-                parts[0] = "1"
-            elif num == 0:
-                parts[0] = "2"
-            elif num == 2:
-                parts[0] = "3"
-            elif num == 4:
-                parts[0] = "4"
-            elif num == 7:
-                parts[0] = "5"
-            data['label']=('_').join(parts)
-            data['label'] = data['label'] + ("@lyppv")
-        out["caption"]= "负锐解析"
-        out.pop("icon")
-        out.pop("warning")
+        if "data" in out:
+            for data in out['data']:
+                data['label'] = re.compile('\(\d\)\s*').sub('',str(data['label']))
+                parts = data['label'].split('_')
+                num = int(parts[0])
+                if num == -3:
+                    parts[0] = "0"
+                elif num == -1:
+                    parts[0] = "1"
+                elif num == 0:
+                    parts[0] = "2"
+                elif num == 2:
+                    parts[0] = "3"
+                elif num == 4:
+                    parts[0] = "4"
+                elif num == 7:
+                    parts[0] = "5"
+                data['label']=('_').join(parts)
+                data['label'] = data['label'] + ("@lyppv")
+            out["caption"]= "负锐解析"
+            out.pop("icon")
+            out.pop("warning")
         return out
 
     def ParseURL(self,url,label,min=None,max=None):

diff --git a/wwqLyParse/run.py b/wwqLyParse/run.py
@@ -120,7 +120,7 @@ def main():
     #debug(Parse('http://www.iqiyi.com/lib/m_209445514.html?src=search'))
     #debug(Parse('http://www.iqiyi.com/a_19rrhacdwt.html#vfrm=2-4-0-1'))
     #debug(Parse('http://www.iqiyi.com/a_19rrhaare5.html'))
-    debug(Parse('http://www.iqiyi.com/a_19rrhbhf6d.html#vfrm=2-3-0-1'))
+    #debug(Parse('http://www.iqiyi.com/a_19rrhbhf6d.html#vfrm=2-3-0-1'))
     #debug(Parse('http://www.le.com'))
     #debug(Parse('http://www.letv.com/comic/10010294.html'))
     #debug(Parse('http://www.mgtv.com/v/1/1/'))
@@ -132,6 +132,7 @@ def main():
     #debug(Parse('http://v.qq.com/tv/'))
     #debug(Parse('http://www.pptv.com/'))
     #debug(Parse('http://yyfm.xyz/video/album/1300046802.html'))
+    debug(Parse('http://www.iqiyi.com/playlist392712002.html',"collection"))
     #debug(Parse('http://list.iqiyi.com/www/2/----------------iqiyi--.html'))
     #debug(Parse('http://www.iqiyi.com/a_19rrhb8fjp.html',"list"))
     #debug(Parse('http://www.iqiyi.com/v_19rrl8pmn8.html#vfrm=2-3-0-1'))
@@ -143,8 +144,8 @@ def main():
     #debug(Parse('http://v.pptv.com/show/NWR29Yzj2hh7ibWE.html?rcc_src=S1'))
     #debug(Parse('http://www.bilibili.com/video/av2557971/')) #don't support
     #debug(Parse('http://v.baidu.com/link?url=dm_10tBNoD-LLAMb79CB_p0kxozuoJcW0SiN3eycdo6CdO3GZgQm26uOzZh9fqcNSWZmz9aU9YYCCfT0NmZoGfEMoznyHhz3st-QvlOeyArYdIbhzBbdIrmntA4h1HsSampAs4Z3c17r_exztVgUuHZqChPeZZQ4tlmM5&page=tvplaydetail&vfm=bdvtx&frp=v.baidu.com%2Ftv_intro%2F&bl=jp_video',"formats"))
-    debug(Parse('http://www.hunantv.com/v/1/291976/c/3137384.html'))
-    #debug(ParseURL('http://www.mgtv.com/v/1/291976/c/3137384.html',"1",parsers = [mvtvparser.MgTVParser()]))
+    #debug(Parse('http://www.hunantv.com/v/1/291976/c/3137384.html'))
+    #debug(ParseURL('http://www.mgtv.com/v/1/291976/c/3137384.html',"1"))
 
 
 if __name__ == '__main__':