提交 509fe358 编写于 作者: H hjdhnx

优化pdfh

上级 ab2d9145
无法预览此类型文件
...@@ -1048,7 +1048,7 @@ class CMS: ...@@ -1048,7 +1048,7 @@ class CMS:
if is_json: if is_json:
html = self.dealJson(html) html = self.dealJson(html)
html = json.loads(html) html = json.loads(html)
# print(html) print(html)
if not is_json and html.find('输入验证码') > -1: if not is_json and html.find('输入验证码') > -1:
cookie = verifyCode(url,self.headers,self.timeout,self.retry_count,self.ocr_api) cookie = verifyCode(url,self.headers,self.timeout,self.retry_count,self.ocr_api)
# cookie = '' # cookie = ''
...@@ -1063,14 +1063,15 @@ class CMS: ...@@ -1063,14 +1063,15 @@ class CMS:
html = r.text html = r.text
items = pdfa(html,p[0].replace('json:','',1)) items = pdfa(html,p[0].replace('json:','',1))
# print(items) print(len(items),items)
videos = [] videos = []
for item in items: for item in items:
# print(item) # print(item)
try: try:
# title = pdfh(item, p[1]) # title = pdfh(item, p[1])
title =''.join([pdfh(item, i) for i in p[1].split('||')]) print(p[1].split('||'))
title = ''.join([pdfh(item, i) for i in p[1].split('||')])
print(title)
try: try:
img = pd(item, p[2]) img = pd(item, p[2])
except: except:
...@@ -1095,7 +1096,8 @@ class CMS: ...@@ -1095,7 +1096,8 @@ class CMS:
"vod_remarks": desc, "vod_remarks": desc,
"vod_content": content, # 无用参数 "vod_content": content, # 无用参数
}) })
except: except Exception as e:
print(e)
pass pass
# print(videos) # print(videos)
except Exception as e: except Exception as e:
......
...@@ -2,4 +2,5 @@ ...@@ -2,4 +2,5 @@
var rule = Object.assign(muban.海螺2,{ var rule = Object.assign(muban.海螺2,{
title:'追剧喵', title:'追剧喵',
host:'https://zjmiao.com', host:'https://zjmiao.com',
搜索:'.search-list;a&&Text;.lazy&&data-original;.deployment&&Text;a&&href',
}); });
\ No newline at end of file
...@@ -22,6 +22,7 @@ class jsoup: ...@@ -22,6 +22,7 @@ class jsoup:
def pdfh(self,html,parse:str,add_url=False): def pdfh(self,html,parse:str,add_url=False):
if not parse: if not parse:
return '' return ''
doc = pq(html) doc = pq(html)
option = None option = None
if parse.find('&&') > -1: if parse.find('&&') > -1:
...@@ -32,8 +33,9 @@ class jsoup: ...@@ -32,8 +33,9 @@ class jsoup:
else: else:
parse = parse[0] if self.test(':eq|:lt|:gt|#',parse[0]) else f'{parse[0]}:eq(0)' parse = parse[0] if self.test(':eq|:lt|:gt|#',parse[0]) else f'{parse[0]}:eq(0)'
# FIXME 暂时不支持jsonpath那样的|| 分割取或属性 # FIXME 暂时不支持jsonpath那样的|| 分割取或属性
if option: if option:
# print(f'parse:{parse}=>(option:{option})') print(f'parse:{parse}=>(option:{option})')
ret = doc(parse) ret = doc(parse)
# print(html) # print(html)
# FIXME 解析出来有多个的情况应该自动取第一个 # FIXME 解析出来有多个的情况应该自动取第一个
...@@ -42,8 +44,8 @@ class jsoup: ...@@ -42,8 +44,8 @@ class jsoup:
elif option == 'Html': elif option == 'Html':
ret = ret.html() ret = ret.html()
else: else:
ret = ret.attr(option) ret = ret.attr(option) or ''
if add_url and option in ['url','src','href','data-original','data-src']: if ret and add_url and option in ['url','src','href','data-original','data-src']:
if 'http' in ret: if 'http' in ret:
ret = ret[ret.find('http'):] ret = ret[ret.find('http'):]
else: else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册