提交 509fe358 编写于 作者: H hjdhnx

优化pdfh

上级 ab2d9145
无法预览此类型文件
......@@ -1048,7 +1048,7 @@ class CMS:
if is_json:
html = self.dealJson(html)
html = json.loads(html)
# print(html)
print(html)
if not is_json and html.find('输入验证码') > -1:
cookie = verifyCode(url,self.headers,self.timeout,self.retry_count,self.ocr_api)
# cookie = ''
......@@ -1063,14 +1063,15 @@ class CMS:
html = r.text
items = pdfa(html,p[0].replace('json:','',1))
# print(items)
print(len(items),items)
videos = []
for item in items:
# print(item)
try:
# title = pdfh(item, p[1])
title =''.join([pdfh(item, i) for i in p[1].split('||')])
print(p[1].split('||'))
title = ''.join([pdfh(item, i) for i in p[1].split('||')])
print(title)
try:
img = pd(item, p[2])
except:
......@@ -1095,7 +1096,8 @@ class CMS:
"vod_remarks": desc,
"vod_content": content, # 无用参数
})
except:
except Exception as e:
print(e)
pass
# print(videos)
except Exception as e:
......
......@@ -2,4 +2,5 @@
var rule = Object.assign(muban.海螺2,{
title:'追剧喵',
host:'https://zjmiao.com',
搜索:'.search-list;a&&Text;.lazy&&data-original;.deployment&&Text;a&&href',
});
\ No newline at end of file
......@@ -22,6 +22,7 @@ class jsoup:
def pdfh(self,html,parse:str,add_url=False):
if not parse:
return ''
doc = pq(html)
option = None
if parse.find('&&') > -1:
......@@ -32,8 +33,9 @@ class jsoup:
else:
parse = parse[0] if self.test(':eq|:lt|:gt|#',parse[0]) else f'{parse[0]}:eq(0)'
# FIXME 暂时不支持jsonpath那样的|| 分割取或属性
if option:
# print(f'parse:{parse}=>(option:{option})')
print(f'parse:{parse}=>(option:{option})')
ret = doc(parse)
# print(html)
# FIXME 解析出来有多个的情况应该自动取第一个
......@@ -42,8 +44,8 @@ class jsoup:
elif option == 'Html':
ret = ret.html()
else:
ret = ret.attr(option)
if add_url and option in ['url','src','href','data-original','data-src']:
ret = ret.attr(option) or ''
if ret and add_url and option in ['url','src','href','data-original','data-src']:
if 'http' in ret:
ret = ret[ret.find('http'):]
else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册