提交 a9b591aa 编写于 作者: H hjdhnx

修复js模式0二级title和desc与js模式1不一致的问题

修复pdfa函数定位的列表数据不对的问题
8号影院js模式0已完美
上级 2502945d
......@@ -30,7 +30,6 @@
{"key": "dr_剧迷", "name": "剧迷(道长)", "type": 1, "api": "{{host}}/vod?rule=剧迷&ext=txt/js/tg/剧迷.js", "searchable": 2, "quickSearch": 0, "filterable": 0},
{"key": "dr_大师兄影视", "name": "大师兄影视(道长)", "type": 1, "api": "{{host}}/vod?rule=大师兄影视&ext=txt/js/tg/大师兄影视.js", "searchable": 2, "quickSearch": 0, "filterable": 0},
{"key": "dr_天空影视", "name": "天空影视(道长)", "type": 1, "api": "{{host}}/vod?rule=天空影视&ext=txt/js/tg/天空影视.js", "searchable": 2, "quickSearch": 0, "filterable": 0},
{"key": "dr_完美看看", "name": "完美看看(道长)", "type": 1, "api": "{{host}}/vod?rule=完美看看&ext=txt/js/tg/完美看看.js", "searchable": 2, "quickSearch": 0, "filterable": 0},
{"key": "dr_快云影院", "name": "快云影院(道长)", "type": 1, "api": "{{host}}/vod?rule=快云影院&ext=txt/js/tg/快云影院.js", "searchable": 2, "quickSearch": 0, "filterable": 0},
{"key": "dr_爱看影视", "name": "爱看影视(道长)", "type": 1, "api": "{{host}}/vod?rule=爱看影视&ext=txt/js/tg/爱看影视.js", "searchable": 2, "quickSearch": 0, "filterable": 0},
{"key": "dr_爱看电影", "name": "爱看电影(道长)", "type": 1, "api": "{{host}}/vod?rule=爱看电影&ext=txt/js/tg/爱看电影.js", "searchable": 2, "quickSearch": 0, "filterable": 0},
......
......@@ -864,8 +864,7 @@ class CMS:
pdfa = jsp.pjfa if is_json else jsp.pdfa
pd = jsp.pj if is_json else jsp.pd
pq = jsp.pq
obj = {}
vod_name = ''
vod['vod_id'] = detailUrl
if not html: # 没传递html参数接下来智能获取
r = requests.get(url, headers=self.headers, timeout=self.timeout,verify=False)
html = self.checkHtml(r)
......@@ -874,16 +873,16 @@ class CMS:
html = json.loads(html)
if p.get('title'):
p1 = p['title'].split(';')
vod_name = pdfh(html, p1[0]).replace('\n', ' ')
# title = '\n'.join([pdfh(html,i).replace('\n',' ') for i in p1])
title = '\n'.join([','.join([pdfh(html, pp1).strip() for pp1 in i.split('+')]) for i in p1])
# print(title)
obj['title'] = title
vod['vod_name'] = pdfh(html, p1[0]).replace('\n', ' ').strip()
vod['type_name'] = pdfh(html, p1[1]).replace('\n',' ').strip() if len(p1)>1 else ''
if p.get('desc'):
try:
p1 = p['desc'].split(';')
desc = '\n'.join([pdfh(html, i).replace('\n', ' ') for i in p1])
obj['desc'] = desc
vod['vod_remarks'] = pdfh(html, p1[0]).replace('\n', '').strip()
vod['vod_year'] = pdfh(html, p1[1]).replace('\n', ' ').strip() if len(p1) > 1 else ''
vod['vod_area'] = pdfh(html, p1[2]).replace('\n', ' ').strip() if len(p1) > 2 else ''
vod['vod_actor'] = pdfh(html, p1[3]).replace('\n', ' ').strip() if len(p1) > 3 else ''
vod['vod_director'] = pdfh(html, p1[4]).replace('\n', ' ').strip() if len(p1) > 4 else ''
except:
pass
......@@ -891,7 +890,7 @@ class CMS:
p1 = p['content'].split(';')
try:
content = '\n'.join([pdfh(html, i).replace('\n', ' ') for i in p1])
obj['content'] = content
vod['vod_content'] = content
except:
pass
......@@ -899,23 +898,10 @@ class CMS:
p1 = p['img']
try:
img = pd(html, p1)
obj['img'] = img
vod['vod_pic'] = img
except Exception as e:
logger.info(f'二级图片定位失败,但不影响使用{e}')
vod = {
"vod_id": detailUrl,
"vod_name": vod_name,
"vod_pic": obj.get('img', ''),
"type_name": obj.get('title', ''),
"vod_year": "",
"vod_area": "",
"vod_remarks": obj.get('desc', ''),
"vod_actor": "",
"vod_director": "",
"vod_content": obj.get('content', '')
}
vod_play_from = '$$$'
playFrom = []
init_flag = {'ctx':False}
......@@ -966,19 +952,28 @@ class CMS:
vHeader = vHeader.to_list()
vodHeader = vHeader
else:
# print(p['tabs'].split(';')[0])
vHeader = pdfa(html, p['tabs'].split(';')[0])
# print(f'线路列表数:{len((vodHeader))}')
# print(vodHeader)
tab_parse = p['tabs'].split(';')[0]
# print('tab_parse:',tab_parse)
vHeader = pdfa(html, tab_parse)
# print(vHeader)
print(f'二级线路定位列表数:{len((vHeader))}')
# print(vHeader[0].outerHtml())
# print(vHeader[0].toString())
# from lxml import etree
# print(str(etree.tostring(vHeader[0], pretty_print=True), 'utf-8'))
from lxml.html import tostring as html2str
# print(html2str(vHeader[0].root).decode('utf-8'))
if not is_json:
for v in vHeader:
# 过滤排除掉线路标题
v_title = pq(v).text()
# print(v_title)
if self.tab_exclude and jsp.test(self.tab_exclude, v_title):
continue
vodHeader.append(v_title)
else:
vodHeader = vHeader
print(f'过滤后真实线路列表数:{len((vodHeader))} {vodHeader}')
else:
vodHeader = ['道长在线']
......@@ -1010,9 +1005,11 @@ class CMS:
else:
for i in range(len(vodHeader)):
tab_name = str(vodHeader[i])
# print(tab_name)
tab_ext = p['tabs'].split(';')[1] if len(p['tabs'].split(';')) > 1 else ''
p1 = p['lists'].replace('#idv', tab_name).replace('#id', str(i))
tab_ext = tab_ext.replace('#idv', tab_name).replace('#id', str(i))
# print(p1)
vodList = pdfa(html, p1) # 1条线路的选集列表
# print(vodList)
# vodList = [pq(i).text()+'$'+pd(i,'a&&href') for i in vodList] # 拼接成 名称$链接
......@@ -1028,7 +1025,7 @@ class CMS:
vod_tab_list.append(vlist)
vod_play_url = vod_play_url.join(vod_tab_list)
print(vod_play_url)
# print(vod_play_url)
vod['vod_play_from'] = vod_play_from
# print(vod_play_from)
vod['vod_play_url'] = vod_play_url
......
var rule={
title:'8号影院',
host:'http://www.8hysw.com',
// homeUrl:'/',
url:'/frim/fyclass-fypage.html',
searchUrl:'/search.php?page=fypage&searchword=**&searchtype=',
searchable:2,//是否启用全局搜索,
quickSearch:0,//是否启用快速搜索,
filterable:0,//是否启用分类筛选,
class_name:'电影&电视剧&综艺&动漫&日韩剧&国产剧&欧美剧&港台剧',
class_url:'1&2&3&4&16&13&15&14',
play_parse:true,
lazy:'',
limit:6,
tab_exclude:'本周热门|最近更新',
推荐:'.stui-pannel_bd;.stui-vodlist li;h4&&Text;.lazyload&&data-original;.text-right&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'.stui-vodlist.clearfix&&li;a&&title;.lazyload&&data-original;.text-right&&Text;a&&href',
二级:{"title":"h1&&Text;.stui-content__detail&&p&&Text","img":".lazyload&&data-original","desc":".data:eq(0)&&Text;.data:eq(1)&&Text;.data:eq(2)&&Text;.data:eq(3)&&Text","content":".desc&&Text","tabs":".stui-pannel__head.bottom-line h3","lists":".stui-content__playlist:eq(#id) li"},
搜索:muban.首图2.搜索2,
}
\ No newline at end of file
var rule = {
title:'KUBO影视',
host:'https://123kubo.tv',
// homeUrl:'/',
url:'/show/fyclass/page/fypage.html',
searchUrl:'/search/page/fypage/wd/**.html',
searchable:2,//是否启用全局搜索,
quickSearch:0,//是否启用快速搜索,
filterable:0,//是否启用分类筛选,
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
// "Cookie": "searchneed=ok"
},
class_name:'电影&电视剧&综艺&动漫',
class_url:'1&2&3&4',
//class_parse:'.myui-panel-box&&ul&&li;a&&Text;a&&href;/v/(.*)/',
play_parse:true,
lazy:'',
limit:6,
推荐:'ul.hl-vod-list;li;a&&title;.hl-item-thumb.hl-lazy&&data-original;.hl-pic-text&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'.hl-list-item;a&&title;.hl-item-thumb.hl-lazy&&data-original;.hl-pic-text&&Text;a&&href',
二级:{"title":".hl-item-thumb.hl-lazy&&title;.hl-full-box&&ul li:eq(6)&&Text","img":".hl-item-thumb.hl-lazy&&data-original","desc":".hl-full-box&&ul&&li:eq(1)&&Text;.hl-full-box&&ul&&li:eq(2)&&Text;.hl-full-box&&ul&&li:eq(3)&&Text","content":".hl-col-xs-12.blurb&&Text","tabs":".hl-plays-from:eq(0) a","lists":".hl-plays-list:eq(#id) li"},
搜索:'.hl-item-div;a&&title;.hl-item-thumb&&data-original;.hl-lc-1&&Text;a&&href;.text-muted:eq(-1)&&Text',
}
var rule = {
title:'TV蜂',
host:'https://www.tvfeng.net',
// homeUrl:'/',
url:'/tvfenshow/fyclass--------fypage---.html',
searchUrl:'/tvfensearch/**----------fypage---.html',
searchable:2,//是否启用全局搜索,
quickSearch:0,//是否启用快速搜索,
filterable:0,//是否启用分类筛选,
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
// "Cookie": "searchneed=ok"
},
class_name:'电影&电视剧&综艺&动漫',
class_url:'1&2&3&4',
play_parse:true,
lazy:'',
limit:6,
推荐:'.module-list;.module-items&&.module-item;a&&title;img&&data-src;.module-item-text&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'.module-items .module-item;a&&title;img&&data-src;.module-item-text&&Text;a&&href',
二级:{"title":"h1&&Text;.tag-link&&Text","img":".module-item-pic&&img&&data-src","desc":".video-info-items:eq(0)&&Text;.video-info-items:eq(3)&&Text;.video-info-items:eq(2)&&Text;.video-info-items:eq(1)&&Text","content":".vod_content&&Text","tabs":".module-tab-item","lists":".module-player-list:eq(#id)&&.scroll-content&&a"},
搜索:'.module-items .module-search-item;h3&&Text;img&&data-src;.video-serial&&Text;a&&href',
}
......@@ -2,9 +2,8 @@ muban.首图2.二级.tabs = '.stui-pannel__head&&h3';
var rule = Object.assign(muban.首图2,{
title:'完美看看',
host:'https://www.wanmeikk.film',
class_parse:'.dropdown&&li;a&&Text;a&&href;.*/(.*?).html',
cate_exclude:'消息|专题',
url:'/category/fyclass-fypage.html',
searchUrl:'/vodsearch/**-------------.html',
class_name:'电影&美剧&韩剧&日剧&国产剧&动漫',//静态分类名称拼接
class_url:'1&2&3&4&5&6',//静态分类标识拼接
class_parse:'',
});
searchUrl:'/so/-------------.html?wd=**&submit=',
});
\ No newline at end of file
var rule={
title:'尘落影视',
host:'http://v.ftixkrv.cn',
url:'/whole/fyclass_______0_addtime_fypage.html',
searchUrl:'/?c=search&wd=**&sort=addtime&order=desc&page=fypage',
searchable:2,//是否启用全局搜索,
quickSearch:0,//是否启用快速搜索,
filterable:0,//是否启用分类筛选,
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'PC_UA',
// "Cookie": "searchneed=ok"
},
class_name:'电影&电视剧&综艺&动漫',
class_url:'1&2&4&3',
cate_exclude:'全网资源',
play_parse:true,
lazy:'',
limit:6,
推荐:'.movie-item-in;a&&title;img&&src;em&&Text;a&&href',
一级:'.movie-item-in;a&&title;img&&src;em&&Text;a&&href',
二级:{"title":"h1&&Text;.table-striped tr:eq(2)&&Text","img":".img-thumbnail&&src","desc":";;.table-striped tr:eq(3)&&Text;.table-striped tr:eq(1)&&Text;.table-striped tr:eq(0)&&Text","content":".movie-introduce&&Text","tabs":".nav.nav-tabs li a","lists":".tab-pane.active:eq(#id) div a"},
搜索:'.movie-item-in;a&&title;img&&src;em&&Text;a&&href',
}
\ No newline at end of file
// 道长 drpy仓库 https://gitcode.net/qq_32394351/dr_py
// drpy安卓本地搭建说明 https://gitcode.net/qq_32394351/dr_py/-/blob/master/%E5%AE%89%E5%8D%93%E6%9C%AC%E5%9C%B0%E6%90%AD%E5%BB%BA%E8%AF%B4%E6%98%8E.md
// Pluto Player官方TG https://t.me/PlutoPlayer
// Pluto Player官方TG https://t.me/PlutoPlayerChannel
var rule = {
title:'抓饭体育',
host:'https://www.zhuafan.tech',
url:'/sports-home/category/fyclass',
class_name:'全部&足球&篮球&羽乒&台球&棒球&户外&搏击&综合&棋盘&电竞&网球&排球&聊天&原声',
class_url:'all&Football&Basketball&Badminton&Billiards&Baseball&Outdoors&Wrestling&Others&Boardgame&Popular&Tennis&Volleyball&Chat&Acoustic',
homeUrl:'/sports-home/category/all',//网站的首页链接,用于分类获取和推荐获取
detailUrl:'https://m.zhuafan.tech/fyid',//二级详情拼接链接(json格式用)
searchUrl:'/live-search/search/query/data?keyword=**&page=fypage&num=&searchType=all&uid=null&from=pc',
searchable:2,
quickSearch:0,
headers:{
'User-Agent':'PC_UA'
},
limit:6,
timeout:5000,
play_parse:true,
lazy:'',
double:false,
推荐:'*',
一级:'json:data;cname;imageUrl;uname;id',
二级:'*',
搜索:'json:cObj.cList;*;*;*;_id',
}
\ No newline at end of file
// 道长 drpy仓库 https://gitcode.net/qq_32394351/dr_py
// drpy安卓本地搭建说明 https://gitcode.net/qq_32394351/dr_py/-/blob/master/%E5%AE%89%E5%8D%93%E6%9C%AC%E5%9C%B0%E6%90%AD%E5%BB%BA%E8%AF%B4%E6%98%8E.md
// Pluto Player官方TG https://t.me/PlutoPlayer
// Pluto Player官方TG https://t.me/PlutoPlayerChannel
var rule = {
title:'斗鱼直播',
host:'https://www.douyu.com',
homeUrl:'/japi/weblist/apinc/recLabelList?',//网站的首页链接,用于分类获取和推荐获取
url:'/gapi/rkc/directory/mixList/fyclass/fypage',
class_name:'一起看&网游竞技&单机热游&手游休闲&娱乐天地&科技文化&语音互动&语音直播&正能量&颜值&音乐&舞蹈&二次元&户外&美食&互动交友&趣生活&数码科技&文化&科普&社会人文&汽车&纪录片&斗鱼购物&交友&电台&一起玩&音乐之声&正能量&英雄联盟&热门游戏&DOTA2&穿越火线&CFHD&DNF&炉石传说&CS:GO&逆战&lol云顶之弈&魔兽争霸&魔兽怀旧服&网易游戏&守望先锋&DOTA&魔兽世界&天涯明月刀&三国杀&主机游戏&永劫无间&生死狙击2&迷失ARK&艾尔登法环&逃离塔科夫&V Rising&海上狼人杀&怀旧游戏&王者荣耀&和平精英&火影忍者&LOL手游&金铲铲之战&重返帝国&COD手游&哈利波特:魔法觉醒&CF手游&欢乐斗地主&原神&天刀手游&棋牌娱乐&欢乐麻将&新游中心&QQ飞车&阴阳师&热门手游',
class_url:'2_208&1_1&1_15&1_9&1_2&1_11&1_20&1_18&1_13&2_201&2_175&2_1008&2_174&2_124&2_194&2_1555&2_1097&2_134&2_195&2_204&2_1162&2_136&2_514&2_1203&2_1221&2_1556&2_1575&2_910&2_250&2_1&2_270&2_3&2_33&2_1997&2_40&2_2&2_6&2_46&2_917&2_55&2_1055&2_3567&2_148&2_217&2_5&2_59&2_14&2_19&2_1227&2_1781&2_3528&2_3406&2_1024&2_3684&2_3556&2_26&2_181&2_350&2_196&2_1920&2_2556&2_2915&2_767&2_1192&2_178&2_416&2_1223&2_911&2_113&2_451&2_229&2_331&2_240&2_30',
detailUrl:'/fyid',//二级详情拼接链接(json格式用)
searchUrl:'/japi/search/api/searchShow?kw=**&page=fypage&pageSize=20',
searchable:2,
quickSearch:0,
headers:{
'User-Agent':'PC_UA'
},
timeout:5000,
limit:8,
play_parse:true,
lazy:'',
double:true,
推荐:'json:data.list;room;*;cover;*;*',
一级:'json:data.rl;rn;rs16;nn;rid',
二级:'*',
搜索:'json:data.relateShow;roomName;roomSrc;nickName;*',
}
\ No newline at end of file
var rule={
title:'爱迪影视',
host:'https://aidi.tv',
url:'/show/fyclass--------fypage---.html',
searchUrl:'/vsearch/-------------.html?wd=**&submit=',
searchable:2,
quickSearch:0,
filterable:0,
headers:{ 'User-Agent':'MOBILE_UA', },
class_name:'电影&电视剧&综艺&动漫',
class_url:'dianying&lianxuju&zongyi&dongman',
tab_exclude:'app专用|VIP线路',
play_parse:true,
double:true,
推荐:'body .vodlist.vodlist_wi;li;a&&title;.vodlist_thumb.lazyload&&data-original;.pic-text&&Text;a&&href',
一级:'.vodlist.vodlist_wi&&li;a&&title;.lazyload&&data-original;.pic-text&&Text;a&&href',
二级:{"title":"h2&&Text;.data:eq(1)&&Text","img":".lazyload&&data-original","desc":";.content_min li:eq(1)&&Text;;.content_min li:eq(2)&&Text;.content_min li:eq(3)&&Text;.data:eq(4)&&Text","content":".context.clearfix&&Text","tabs":".play_source_tab&&a","lists":".content_playlist:eq(#id) li"},
搜索:'.searchlist_img;a&&title;.vodlist_thumb.lazyload&&data-original;.pic-text&&Text;a&&href',
}
\ No newline at end of file
......@@ -3,7 +3,7 @@ var rule={
host:'https://www.dandanzan10.top',
// homeUrl:'/',
url:'/fyclass/index_fypage.html[/fyclass/index.html]',
//searchUrl:'/search/**/',
searchUrl:'/so/**-**--.html',
searchable:2,//是否启用全局搜索,
quickSearch:0,//是否启用快速搜索,
filterable:0,//是否启用分类筛选,
......@@ -20,5 +20,5 @@ var rule={
double:true, // 推荐内容是否双层定位
二级:{"title":"h1&&Text;.product-excerpt:eq(2)&&Text","img":".thumb&&src","desc":";;.product-excerpt:eq(3)&&Text;.product-excerpt:eq(1)&&Text;.product-excerpt:eq(0)&&Text","content":".product-excerpt:eq(5)&&Text","tabs":".playlists dl dt","lists":".play-div-oa:eq(#id) li"},
搜索:'ul.img-list.clearfix&&li;a&&title;.lazyload&&data-original;.pic-text&&Text;a&&href',
搜索:'.lists-content&&ul&&li;*;*;*;*',
}
\ No newline at end of file
......@@ -6,6 +6,7 @@
import json
from pyquery import PyQuery as pq
from lxml import etree
from urllib.parse import urljoin
import re
from jsonpath import jsonpath
......@@ -22,7 +23,6 @@ class jsoup:
def pdfh(self,html,parse:str,add_url=False):
if not parse:
return ''
doc = pq(html)
option = None
if parse.find('&&') > -1:
......@@ -66,10 +66,14 @@ class jsoup:
# ret = doc(parse) # 下面注释的写法不对的
# ret = ret.find(':first')
# ret = ret.children(':first')
ret = str(ret)
# print(parse)
# ret = str(ret)
ret = ret.outerHtml()
return ret
def pdfa(self,html,parse:str):
# 看官方文档才能解决这个问题!!!
# https://pyquery.readthedocs.io/en/latest/api.html
if not parse:
return []
if parse.find('&&') > -1:
......@@ -78,8 +82,15 @@ class jsoup:
parse = ' '.join([parse[i] if self.test(':eq|:lt|:gt', parse[i]) or i>=len(parse)-1 else f'{parse[i]}:eq(0)' for i in range(len(parse))])
# print(f'pdfa:{parse}')
doc = pq(html)
res = [str(item) for item in doc(parse).items()]
result = doc(parse)
# 节点转字符串
# print(str(etree.tostring(result[0], pretty_print=True), 'utf-8'))
# res = [item for item in result.items()]
res = [item.outerHtml() for item in result.items()] # 这个才是对的!!str() item str(etree.tostring 统统错误
# res = [str(item) for item in result.items()]
# res = [str(etree.tostring(item, pretty_print=True), 'utf-8') for item in result]
# print(len(res),res)
# print('pdfa执行结果数:',len(res))
return res
def pd(self,html,parse:str):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册