From bfff70eb938f909675e6611c41efde1dae0eae18 Mon Sep 17 00:00:00 2001 From: wwqgtxx Date: Fri, 24 Jun 2016 19:03:20 +0800 Subject: [PATCH] fix bugs Signed-off-by: wwqgtxx --- wwqLyParse/parsers/anypageparser.py | 2 ++ wwqLyParse/parsers/iqiyiparser.py | 2 ++ wwqLyParse/run.py | 4 ++-- wwqLyParse/test.py | 4 ++-- wwqLyParse/urlhandles/postfixurlhandle.py | 6 +++--- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/wwqLyParse/parsers/anypageparser.py b/wwqLyParse/parsers/anypageparser.py index 90d18e5..f846ee0 100644 --- a/wwqLyParse/parsers/anypageparser.py +++ b/wwqLyParse/parsers/anypageparser.py @@ -26,6 +26,8 @@ def Parse(self,input_text,types=None): global TWICE_PARSE_TIMEOUT if (types is not None) and ("collection" not in types): return + if (re.search('www.iqiyi.com/(lib/m|a_|v_)',input_text)): + return html = PyQuery(getUrl(input_text)) items = html('a') title = html('title').text() diff --git a/wwqLyParse/parsers/iqiyiparser.py b/wwqLyParse/parsers/iqiyiparser.py index c2ca90a..dd73602 100644 --- a/wwqLyParse/parsers/iqiyiparser.py +++ b/wwqLyParse/parsers/iqiyiparser.py @@ -143,6 +143,8 @@ def getInfo(self,url): def Parse(self,input_text,types=None): + if (re.search('www.iqiyi.com/(lib/m|a_)',input_text)): + return if (types is None) or ("formats" in types): self.init() data = { diff --git a/wwqLyParse/run.py b/wwqLyParse/run.py index 8b877aa..d5aa7be 100644 --- a/wwqLyParse/run.py +++ b/wwqLyParse/run.py @@ -117,7 +117,7 @@ def debug(input): def main(): debug(GetVersion(debug=True)) - #debug(Parse('http://www.iqiyi.com/lib/m_209445514.html?src=search')) + debug(Parse('http://www.iqiyi.com/lib/m_209445514.html?src=search')) #debug(Parse('http://www.iqiyi.com/a_19rrhacdwt.html#vfrm=2-4-0-1')) #debug(Parse('http://www.iqiyi.com/a_19rrhaare5.html')) #debug(Parse('http://www.iqiyi.com/a_19rrhbhf6d.html#vfrm=2-3-0-1')) @@ -136,7 +136,7 @@ def main(): #debug(Parse('http://list.iqiyi.com/www/2/----------------iqiyi--.html')) #debug(Parse('http://www.iqiyi.com/a_19rrhb8fjp.html',"list")) #debug(Parse('http://www.iqiyi.com/v_19rrl8pmn8.html#vfrm=2-3-0-1')) - debug(Parse('http://www.iqiyi.com/v_19rrl8pmn8.html',"formats")) + #debug(Parse('http://www.iqiyi.com/v_19rrl8pmn8.html',"formats")) #debug(Parse('http://www.iqiyi.com/v_19rrl8pmn8.html')) #debug(ParseURL("http://www.iqiyi.com/v_19rrl8pmn8.html","4_fullhd_全高清_895.21 MB@youget")) #debug(ParseURL("http://www.iqiyi.com/v_19rrl8pmn8.html","4_1080p_1920x1080_2746.0kbps_44:30.660_7_flv_@lyppv")) diff --git a/wwqLyParse/test.py b/wwqLyParse/test.py index 7533e0f..9e7e576 100644 --- a/wwqLyParse/test.py +++ b/wwqLyParse/test.py @@ -5,5 +5,5 @@ if __name__ == "__main__": import parsers.iqiyiparser - #print(parsers.iqiyiparser.IQiYiParser().Parse("http://www.iqiyi.com/v_19rrl8pmn8.html")) - print(parsers.iqiyiparser.IQiYiParser().ParseURL("http://www.iqiyi.com/v_19rrl8pmn8.html","fullhd")) \ No newline at end of file + print(parsers.iqiyiparser.IQiYiParser().Parse("http://www.iqiyi.com/a_19rrhacdwt.html")) + #print(parsers.iqiyiparser.IQiYiParser().ParseURL("http://www.iqiyi.com/v_19rrl8pmn8.html","fullhd")) \ No newline at end of file diff --git a/wwqLyParse/urlhandles/postfixurlhandle.py b/wwqLyParse/urlhandles/postfixurlhandle.py index 13f9516..5fa0f4b 100644 --- a/wwqLyParse/urlhandles/postfixurlhandle.py +++ b/wwqLyParse/urlhandles/postfixurlhandle.py @@ -18,13 +18,13 @@ class PostfixUrlHandle(UrlHandle): filters = ['^(http|https)://[^\s]+/[^\s]+\.[s]{0,1}html(\?|#)[^\s]+'] def urlHandle(self,url): - def getUrl(queue,url): + def _getUrl(queue,url): queue.put(getUrl(url)) result = re.match('^(http|https)://[^\s]+/[^\s]+\.[s]{0,1}html',url).group() q_results = queue.Queue() htmls = [] - t1 = threading.Thread(target=getUrl, args=(q_results, url)) - t2 = threading.Thread(target=getUrl, args=(q_results, result)) + t1 = threading.Thread(target=_getUrl, args=(q_results, url)) + t2 = threading.Thread(target=_getUrl, args=(q_results, result)) t1.start() t2.start() t1.join()