From 509fe358453d86b3d737f21ad5136fccb7eac4af Mon Sep 17 00:00:00 2001 From: hjdhnx Date: Fri, 16 Sep 2022 18:05:16 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96pdfh?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- base/rules.db | Bin 40960 -> 40960 bytes controllers/cms.py | 12 +++++++----- "js/\350\277\275\345\211\247\345\226\265.js" | 1 + utils/htmlParser.py | 8 +++++--- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/base/rules.db b/base/rules.db index 8a503d3657320f471a0335c39c461d2716c4b710..8579e1d50696c9b01a66654ccb3e03aef629c1f2 100644 GIT binary patch delta 90 zcmZoTz|?SnX@WH4>xnYXjITE) -1: cookie = verifyCode(url,self.headers,self.timeout,self.retry_count,self.ocr_api) # cookie = '' @@ -1063,14 +1063,15 @@ class CMS: html = r.text items = pdfa(html,p[0].replace('json:','',1)) - # print(items) + print(len(items),items) videos = [] for item in items: # print(item) try: # title = pdfh(item, p[1]) - title =''.join([pdfh(item, i) for i in p[1].split('||')]) - + print(p[1].split('||')) + title = ''.join([pdfh(item, i) for i in p[1].split('||')]) + print(title) try: img = pd(item, p[2]) except: @@ -1095,7 +1096,8 @@ class CMS: "vod_remarks": desc, "vod_content": content, # 无用参数 }) - except: + except Exception as e: + print(e) pass # print(videos) except Exception as e: diff --git "a/js/\350\277\275\345\211\247\345\226\265.js" "b/js/\350\277\275\345\211\247\345\226\265.js" index 18f314d..9a6f989 100644 --- "a/js/\350\277\275\345\211\247\345\226\265.js" +++ "b/js/\350\277\275\345\211\247\345\226\265.js" @@ -2,4 +2,5 @@ var rule = Object.assign(muban.海螺2,{ title:'追剧喵', host:'https://zjmiao.com', + 搜索:'.search-list;a&&Text;.lazy&&data-original;.deployment&&Text;a&&href', }); \ No newline at end of file diff --git a/utils/htmlParser.py b/utils/htmlParser.py index f2fadaf..f94dfad 100644 --- a/utils/htmlParser.py +++ b/utils/htmlParser.py @@ -22,6 +22,7 @@ class jsoup: def pdfh(self,html,parse:str,add_url=False): if not parse: return '' + doc = pq(html) option = None if parse.find('&&') > -1: @@ -32,8 +33,9 @@ class jsoup: else: parse = parse[0] if self.test(':eq|:lt|:gt|#',parse[0]) else f'{parse[0]}:eq(0)' # FIXME 暂时不支持jsonpath那样的|| 分割取或属性 + if option: - # print(f'parse:{parse}=>(option:{option})') + print(f'parse:{parse}=>(option:{option})') ret = doc(parse) # print(html) # FIXME 解析出来有多个的情况应该自动取第一个 @@ -42,8 +44,8 @@ class jsoup: elif option == 'Html': ret = ret.html() else: - ret = ret.attr(option) - if add_url and option in ['url','src','href','data-original','data-src']: + ret = ret.attr(option) or '' + if ret and add_url and option in ['url','src','href','data-original','data-src']: if 'http' in ret: ret = ret[ret.find('http'):] else: -- GitLab