diff --git a/base/custom.conf b/base/custom.conf index 275cedfe1817a68ba4ea24398720260834fdd73b..4c4b426b15bc2bb2488c98861e19e5f8d8a27bc7 100644 --- a/base/custom.conf +++ b/base/custom.conf @@ -30,7 +30,6 @@ {"key": "dr_剧迷", "name": "剧迷(道长)", "type": 1, "api": "{{host}}/vod?rule=剧迷&ext=txt/js/tg/剧迷.js", "searchable": 2, "quickSearch": 0, "filterable": 0}, {"key": "dr_大师兄影视", "name": "大师兄影视(道长)", "type": 1, "api": "{{host}}/vod?rule=大师兄影视&ext=txt/js/tg/大师兄影视.js", "searchable": 2, "quickSearch": 0, "filterable": 0}, {"key": "dr_天空影视", "name": "天空影视(道长)", "type": 1, "api": "{{host}}/vod?rule=天空影视&ext=txt/js/tg/天空影视.js", "searchable": 2, "quickSearch": 0, "filterable": 0}, -{"key": "dr_完美看看", "name": "完美看看(道长)", "type": 1, "api": "{{host}}/vod?rule=完美看看&ext=txt/js/tg/完美看看.js", "searchable": 2, "quickSearch": 0, "filterable": 0}, {"key": "dr_快云影院", "name": "快云影院(道长)", "type": 1, "api": "{{host}}/vod?rule=快云影院&ext=txt/js/tg/快云影院.js", "searchable": 2, "quickSearch": 0, "filterable": 0}, {"key": "dr_爱看影视", "name": "爱看影视(道长)", "type": 1, "api": "{{host}}/vod?rule=爱看影视&ext=txt/js/tg/爱看影视.js", "searchable": 2, "quickSearch": 0, "filterable": 0}, {"key": "dr_爱看电影", "name": "爱看电影(道长)", "type": 1, "api": "{{host}}/vod?rule=爱看电影&ext=txt/js/tg/爱看电影.js", "searchable": 2, "quickSearch": 0, "filterable": 0}, diff --git a/controllers/cms.py b/controllers/cms.py index b5b7d14a99921dd6ae922f923be41315832df29b..c4d0cefd8048b625e60584c1c31dfddae9c7e9d2 100644 --- a/controllers/cms.py +++ b/controllers/cms.py @@ -864,8 +864,7 @@ class CMS: pdfa = jsp.pjfa if is_json else jsp.pdfa pd = jsp.pj if is_json else jsp.pd pq = jsp.pq - obj = {} - vod_name = '' + vod['vod_id'] = detailUrl if not html: # 没传递html参数接下来智能获取 r = requests.get(url, headers=self.headers, timeout=self.timeout,verify=False) html = self.checkHtml(r) @@ -874,16 +873,16 @@ class CMS: html = json.loads(html) if p.get('title'): p1 = p['title'].split(';') - vod_name = pdfh(html, p1[0]).replace('\n', ' ') - # title = '\n'.join([pdfh(html,i).replace('\n',' ') for i in p1]) - title = '\n'.join([','.join([pdfh(html, pp1).strip() for pp1 in i.split('+')]) for i in p1]) - # print(title) - obj['title'] = title + vod['vod_name'] = pdfh(html, p1[0]).replace('\n', ' ').strip() + vod['type_name'] = pdfh(html, p1[1]).replace('\n',' ').strip() if len(p1)>1 else '' if p.get('desc'): try: p1 = p['desc'].split(';') - desc = '\n'.join([pdfh(html, i).replace('\n', ' ') for i in p1]) - obj['desc'] = desc + vod['vod_remarks'] = pdfh(html, p1[0]).replace('\n', '').strip() + vod['vod_year'] = pdfh(html, p1[1]).replace('\n', ' ').strip() if len(p1) > 1 else '' + vod['vod_area'] = pdfh(html, p1[2]).replace('\n', ' ').strip() if len(p1) > 2 else '' + vod['vod_actor'] = pdfh(html, p1[3]).replace('\n', ' ').strip() if len(p1) > 3 else '' + vod['vod_director'] = pdfh(html, p1[4]).replace('\n', ' ').strip() if len(p1) > 4 else '' except: pass @@ -891,7 +890,7 @@ class CMS: p1 = p['content'].split(';') try: content = '\n'.join([pdfh(html, i).replace('\n', ' ') for i in p1]) - obj['content'] = content + vod['vod_content'] = content except: pass @@ -899,23 +898,10 @@ class CMS: p1 = p['img'] try: img = pd(html, p1) - obj['img'] = img + vod['vod_pic'] = img except Exception as e: logger.info(f'二级图片定位失败,但不影响使用{e}') - vod = { - "vod_id": detailUrl, - "vod_name": vod_name, - "vod_pic": obj.get('img', ''), - "type_name": obj.get('title', ''), - "vod_year": "", - "vod_area": "", - "vod_remarks": obj.get('desc', ''), - "vod_actor": "", - "vod_director": "", - "vod_content": obj.get('content', '') - } - vod_play_from = '$$$' playFrom = [] init_flag = {'ctx':False} @@ -966,19 +952,28 @@ class CMS: vHeader = vHeader.to_list() vodHeader = vHeader else: - # print(p['tabs'].split(';')[0]) - vHeader = pdfa(html, p['tabs'].split(';')[0]) - # print(f'线路列表数:{len((vodHeader))}') - # print(vodHeader) + tab_parse = p['tabs'].split(';')[0] + # print('tab_parse:',tab_parse) + vHeader = pdfa(html, tab_parse) + # print(vHeader) + print(f'二级线路定位列表数:{len((vHeader))}') + # print(vHeader[0].outerHtml()) + # print(vHeader[0].toString()) + # from lxml import etree + # print(str(etree.tostring(vHeader[0], pretty_print=True), 'utf-8')) + from lxml.html import tostring as html2str + # print(html2str(vHeader[0].root).decode('utf-8')) if not is_json: for v in vHeader: # 过滤排除掉线路标题 v_title = pq(v).text() + # print(v_title) if self.tab_exclude and jsp.test(self.tab_exclude, v_title): continue vodHeader.append(v_title) else: vodHeader = vHeader + print(f'过滤后真实线路列表数:{len((vodHeader))} {vodHeader}') else: vodHeader = ['道长在线'] @@ -1010,9 +1005,11 @@ class CMS: else: for i in range(len(vodHeader)): tab_name = str(vodHeader[i]) + # print(tab_name) tab_ext = p['tabs'].split(';')[1] if len(p['tabs'].split(';')) > 1 else '' p1 = p['lists'].replace('#idv', tab_name).replace('#id', str(i)) tab_ext = tab_ext.replace('#idv', tab_name).replace('#id', str(i)) + # print(p1) vodList = pdfa(html, p1) # 1条线路的选集列表 # print(vodList) # vodList = [pq(i).text()+'$'+pd(i,'a&&href') for i in vodList] # 拼接成 名称$链接 @@ -1028,7 +1025,7 @@ class CMS: vod_tab_list.append(vlist) vod_play_url = vod_play_url.join(vod_tab_list) - print(vod_play_url) + # print(vod_play_url) vod['vod_play_from'] = vod_play_from # print(vod_play_from) vod['vod_play_url'] = vod_play_url diff --git "a/js/8\345\217\267\345\275\261\351\231\242.js" "b/js/8\345\217\267\345\275\261\351\231\242.js" new file mode 100644 index 0000000000000000000000000000000000000000..2378991a3f2f6321491c4e663351d1e4020d464b --- /dev/null +++ "b/js/8\345\217\267\345\275\261\351\231\242.js" @@ -0,0 +1,22 @@ +var rule={ +title:'8号影院', + host:'http://www.8hysw.com', + // homeUrl:'/', + url:'/frim/fyclass-fypage.html', + searchUrl:'/search.php?page=fypage&searchword=**&searchtype=', +searchable:2,//是否启用全局搜索, +quickSearch:0,//是否启用快速搜索, +filterable:0,//是否启用分类筛选, +class_name:'电影&电视剧&综艺&动漫&日韩剧&国产剧&欧美剧&港台剧', + class_url:'1&2&3&4&16&13&15&14', +play_parse:true, +lazy:'', +limit:6, +tab_exclude:'本周热门|最近更新', +推荐:'.stui-pannel_bd;.stui-vodlist li;h4&&Text;.lazyload&&data-original;.text-right&&Text;a&&href', +double:true, // 推荐内容是否双层定位 +一级:'.stui-vodlist.clearfix&&li;a&&title;.lazyload&&data-original;.text-right&&Text;a&&href', +二级:{"title":"h1&&Text;.stui-content__detail&&p&&Text","img":".lazyload&&data-original","desc":".data:eq(0)&&Text;.data:eq(1)&&Text;.data:eq(2)&&Text;.data:eq(3)&&Text","content":".desc&&Text","tabs":".stui-pannel__head.bottom-line h3","lists":".stui-content__playlist:eq(#id) li"}, + 搜索:muban.首图2.搜索2, + +} \ No newline at end of file diff --git "a/js/KUBO\345\275\261\350\247\206[\351\243\236].js" "b/js/KUBO\345\275\261\350\247\206[\351\243\236].js" new file mode 100644 index 0000000000000000000000000000000000000000..fa644a15c1c446293a19c5ccef72f8fa3207a272 --- /dev/null +++ "b/js/KUBO\345\275\261\350\247\206[\351\243\236].js" @@ -0,0 +1,25 @@ +var rule = { + title:'KUBO影视', + host:'https://123kubo.tv', + // homeUrl:'/', + url:'/show/fyclass/page/fypage.html', + searchUrl:'/search/page/fypage/wd/**.html', + searchable:2,//是否启用全局搜索, + quickSearch:0,//是否启用快速搜索, + filterable:0,//是否启用分类筛选, + headers:{//网站的请求头,完整支持所有的,常带ua和cookies + 'User-Agent':'MOBILE_UA', + // "Cookie": "searchneed=ok" + }, + class_name:'电影&电视剧&综艺&动漫', + class_url:'1&2&3&4', + //class_parse:'.myui-panel-box&&ul&&li;a&&Text;a&&href;/v/(.*)/', + play_parse:true, + lazy:'', + limit:6, + 推荐:'ul.hl-vod-list;li;a&&title;.hl-item-thumb.hl-lazy&&data-original;.hl-pic-text&&Text;a&&href', + double:true, // 推荐内容是否双层定位 + 一级:'.hl-list-item;a&&title;.hl-item-thumb.hl-lazy&&data-original;.hl-pic-text&&Text;a&&href', + 二级:{"title":".hl-item-thumb.hl-lazy&&title;.hl-full-box&&ul li:eq(6)&&Text","img":".hl-item-thumb.hl-lazy&&data-original","desc":".hl-full-box&&ul&&li:eq(1)&&Text;.hl-full-box&&ul&&li:eq(2)&&Text;.hl-full-box&&ul&&li:eq(3)&&Text","content":".hl-col-xs-12.blurb&&Text","tabs":".hl-plays-from:eq(0) a","lists":".hl-plays-list:eq(#id) li"}, + 搜索:'.hl-item-div;a&&title;.hl-item-thumb&&data-original;.hl-lc-1&&Text;a&&href;.text-muted:eq(-1)&&Text', +} diff --git "a/js/TV\350\234\202.js" "b/js/TV\350\234\202.js" new file mode 100644 index 0000000000000000000000000000000000000000..21b14cf5ea47ba76c52b9edee2397fc40aa8eb90 --- /dev/null +++ "b/js/TV\350\234\202.js" @@ -0,0 +1,25 @@ + +var rule = { + title:'TV蜂', + host:'https://www.tvfeng.net', + // homeUrl:'/', + url:'/tvfenshow/fyclass--------fypage---.html', + searchUrl:'/tvfensearch/**----------fypage---.html', + searchable:2,//是否启用全局搜索, + quickSearch:0,//是否启用快速搜索, + filterable:0,//是否启用分类筛选, + headers:{//网站的请求头,完整支持所有的,常带ua和cookies + 'User-Agent':'MOBILE_UA', + // "Cookie": "searchneed=ok" + }, + class_name:'电影&电视剧&综艺&动漫', + class_url:'1&2&3&4', + play_parse:true, + lazy:'', + limit:6, + 推荐:'.module-list;.module-items&&.module-item;a&&title;img&&data-src;.module-item-text&&Text;a&&href', + double:true, // 推荐内容是否双层定位 + 一级:'.module-items .module-item;a&&title;img&&data-src;.module-item-text&&Text;a&&href', + 二级:{"title":"h1&&Text;.tag-link&&Text","img":".module-item-pic&&img&&data-src","desc":".video-info-items:eq(0)&&Text;.video-info-items:eq(3)&&Text;.video-info-items:eq(2)&&Text;.video-info-items:eq(1)&&Text","content":".vod_content&&Text","tabs":".module-tab-item","lists":".module-player-list:eq(#id)&&.scroll-content&&a"}, + 搜索:'.module-items .module-search-item;h3&&Text;img&&data-src;.video-serial&&Text;a&&href', +} diff --git "a/js/\345\256\214\347\276\216\347\234\213\347\234\213.js" "b/js/\345\256\214\347\276\216\347\234\213\347\234\213.js" new file mode 100644 index 0000000000000000000000000000000000000000..a2de524aad1cd6bc8a55354351dd983e1101a246 --- /dev/null +++ "b/js/\345\256\214\347\276\216\347\234\213\347\234\213.js" @@ -0,0 +1,9 @@ +muban.首图2.二级.tabs = '.stui-pannel__head&&h3'; +var rule = Object.assign(muban.首图2,{ +title:'完美看看', +host:'https://www.wanmeikk.film', +class_parse:'.dropdown&&li;a&&Text;a&&href;.*/(.*?).html', +cate_exclude:'消息|专题', +url:'/category/fyclass-fypage.html', +searchUrl:'/so/-------------.html?wd=**&submit=', +}); \ No newline at end of file diff --git "a/js/\345\260\230\350\220\275\345\275\261\350\247\206.js" "b/js/\345\260\230\350\220\275\345\275\261\350\247\206.js" new file mode 100644 index 0000000000000000000000000000000000000000..d8ec9102a7d9f1de561d467df18015b4a2ba12b1 --- /dev/null +++ "b/js/\345\260\230\350\220\275\345\275\261\350\247\206.js" @@ -0,0 +1,23 @@ +var rule={ + title:'尘落影视', + host:'http://v.ftixkrv.cn', + url:'/whole/fyclass_______0_addtime_fypage.html', + searchUrl:'/?c=search&wd=**&sort=addtime&order=desc&page=fypage', + searchable:2,//是否启用全局搜索, + quickSearch:0,//是否启用快速搜索, + filterable:0,//是否启用分类筛选, + headers:{//网站的请求头,完整支持所有的,常带ua和cookies + 'User-Agent':'PC_UA', + // "Cookie": "searchneed=ok" + }, + class_name:'电影&电视剧&综艺&动漫', + class_url:'1&2&4&3', + cate_exclude:'全网资源', + play_parse:true, + lazy:'', + limit:6, + 推荐:'.movie-item-in;a&&title;img&&src;em&&Text;a&&href', + 一级:'.movie-item-in;a&&title;img&&src;em&&Text;a&&href', + 二级:{"title":"h1&&Text;.table-striped tr:eq(2)&&Text","img":".img-thumbnail&&src","desc":";;.table-striped tr:eq(3)&&Text;.table-striped tr:eq(1)&&Text;.table-striped tr:eq(0)&&Text","content":".movie-introduce&&Text","tabs":".nav.nav-tabs li a","lists":".tab-pane.active:eq(#id) div a"}, + 搜索:'.movie-item-in;a&&title;img&&src;em&&Text;a&&href', +} \ No newline at end of file diff --git "a/js/\346\212\223\351\245\255\344\275\223\350\202\262.js" "b/js/\346\212\223\351\245\255\344\275\223\350\202\262.js" new file mode 100644 index 0000000000000000000000000000000000000000..ea8785b261d1a13cecf5f2976bc17b5082f1f5ae --- /dev/null +++ "b/js/\346\212\223\351\245\255\344\275\223\350\202\262.js" @@ -0,0 +1,29 @@ +// 道长 drpy仓库 https://gitcode.net/qq_32394351/dr_py +// drpy安卓本地搭建说明 https://gitcode.net/qq_32394351/dr_py/-/blob/master/%E5%AE%89%E5%8D%93%E6%9C%AC%E5%9C%B0%E6%90%AD%E5%BB%BA%E8%AF%B4%E6%98%8E.md +// Pluto Player官方TG https://t.me/PlutoPlayer +// Pluto Player官方TG https://t.me/PlutoPlayerChannel + +var rule = { + title:'抓饭体育', + host:'https://www.zhuafan.tech', + url:'/sports-home/category/fyclass', + class_name:'全部&足球&篮球&羽乒&台球&棒球&户外&搏击&综合&棋盘&电竞&网球&排球&聊天&原声', + class_url:'all&Football&Basketball&Badminton&Billiards&Baseball&Outdoors&Wrestling&Others&Boardgame&Popular&Tennis&Volleyball&Chat&Acoustic', + homeUrl:'/sports-home/category/all',//网站的首页链接,用于分类获取和推荐获取 + detailUrl:'https://m.zhuafan.tech/fyid',//二级详情拼接链接(json格式用) + searchUrl:'/live-search/search/query/data?keyword=**&page=fypage&num=&searchType=all&uid=null&from=pc', + searchable:2, + quickSearch:0, + headers:{ + 'User-Agent':'PC_UA' + }, + limit:6, + timeout:5000, + play_parse:true, + lazy:'', + double:false, + 推荐:'*', + 一级:'json:data;cname;imageUrl;uname;id', + 二级:'*', + 搜索:'json:cObj.cList;*;*;*;_id', +} \ No newline at end of file diff --git "a/js/\346\226\227\351\261\274\347\233\264\346\222\255.js" "b/js/\346\226\227\351\261\274\347\233\264\346\222\255.js" new file mode 100644 index 0000000000000000000000000000000000000000..745a312c286b411ad1399c20e1e87592e8470f38 --- /dev/null +++ "b/js/\346\226\227\351\261\274\347\233\264\346\222\255.js" @@ -0,0 +1,29 @@ +// 道长 drpy仓库 https://gitcode.net/qq_32394351/dr_py +// drpy安卓本地搭建说明 https://gitcode.net/qq_32394351/dr_py/-/blob/master/%E5%AE%89%E5%8D%93%E6%9C%AC%E5%9C%B0%E6%90%AD%E5%BB%BA%E8%AF%B4%E6%98%8E.md +// Pluto Player官方TG https://t.me/PlutoPlayer +// Pluto Player官方TG https://t.me/PlutoPlayerChannel + +var rule = { + title:'斗鱼直播', + host:'https://www.douyu.com', + homeUrl:'/japi/weblist/apinc/recLabelList?',//网站的首页链接,用于分类获取和推荐获取 + url:'/gapi/rkc/directory/mixList/fyclass/fypage', + class_name:'一起看&网游竞技&单机热游&手游休闲&娱乐天地&科技文化&语音互动&语音直播&正能量&颜值&音乐&舞蹈&二次元&户外&美食&互动交友&趣生活&数码科技&文化&科普&社会人文&汽车&纪录片&斗鱼购物&交友&电台&一起玩&音乐之声&正能量&英雄联盟&热门游戏&DOTA2&穿越火线&CFHD&DNF&炉石传说&CS:GO&逆战&lol云顶之弈&魔兽争霸&魔兽怀旧服&网易游戏&守望先锋&DOTA&魔兽世界&天涯明月刀&三国杀&主机游戏&永劫无间&生死狙击2&迷失ARK&艾尔登法环&逃离塔科夫&V Rising&海上狼人杀&怀旧游戏&王者荣耀&和平精英&火影忍者&LOL手游&金铲铲之战&重返帝国&COD手游&哈利波特:魔法觉醒&CF手游&欢乐斗地主&原神&天刀手游&棋牌娱乐&欢乐麻将&新游中心&QQ飞车&阴阳师&热门手游', + class_url:'2_208&1_1&1_15&1_9&1_2&1_11&1_20&1_18&1_13&2_201&2_175&2_1008&2_174&2_124&2_194&2_1555&2_1097&2_134&2_195&2_204&2_1162&2_136&2_514&2_1203&2_1221&2_1556&2_1575&2_910&2_250&2_1&2_270&2_3&2_33&2_1997&2_40&2_2&2_6&2_46&2_917&2_55&2_1055&2_3567&2_148&2_217&2_5&2_59&2_14&2_19&2_1227&2_1781&2_3528&2_3406&2_1024&2_3684&2_3556&2_26&2_181&2_350&2_196&2_1920&2_2556&2_2915&2_767&2_1192&2_178&2_416&2_1223&2_911&2_113&2_451&2_229&2_331&2_240&2_30', + detailUrl:'/fyid',//二级详情拼接链接(json格式用) + searchUrl:'/japi/search/api/searchShow?kw=**&page=fypage&pageSize=20', + searchable:2, + quickSearch:0, + headers:{ + 'User-Agent':'PC_UA' + }, + timeout:5000, + limit:8, + play_parse:true, + lazy:'', + double:true, + 推荐:'json:data.list;room;*;cover;*;*', + 一级:'json:data.rl;rn;rs16;nn;rid', + 二级:'*', + 搜索:'json:data.relateShow;roomName;roomSrc;nickName;*', +} \ No newline at end of file diff --git "a/js/\347\210\261\350\277\252\345\275\261\350\247\206.js" "b/js/\347\210\261\350\277\252\345\275\261\350\247\206.js" new file mode 100644 index 0000000000000000000000000000000000000000..cb300567b43c6015a9c6a85d94a59251918e1d7c --- /dev/null +++ "b/js/\347\210\261\350\277\252\345\275\261\350\247\206.js" @@ -0,0 +1,19 @@ +var rule={ + title:'爱迪影视', + host:'https://aidi.tv', + url:'/show/fyclass--------fypage---.html', + searchUrl:'/vsearch/-------------.html?wd=**&submit=', + searchable:2, + quickSearch:0, + filterable:0, + headers:{ 'User-Agent':'MOBILE_UA', }, + class_name:'电影&电视剧&综艺&动漫', + class_url:'dianying&lianxuju&zongyi&dongman', + tab_exclude:'app专用|VIP线路', + play_parse:true, + double:true, + 推荐:'body .vodlist.vodlist_wi;li;a&&title;.vodlist_thumb.lazyload&&data-original;.pic-text&&Text;a&&href', + 一级:'.vodlist.vodlist_wi&&li;a&&title;.lazyload&&data-original;.pic-text&&Text;a&&href', + 二级:{"title":"h2&&Text;.data:eq(1)&&Text","img":".lazyload&&data-original","desc":";.content_min li:eq(1)&&Text;;.content_min li:eq(2)&&Text;.content_min li:eq(3)&&Text;.data:eq(4)&&Text","content":".context.clearfix&&Text","tabs":".play_source_tab&&a","lists":".content_playlist:eq(#id) li"}, + 搜索:'.searchlist_img;a&&title;.vodlist_thumb.lazyload&&data-original;.pic-text&&Text;a&&href', +} \ No newline at end of file diff --git "a/js/\350\233\213\350\233\213\350\265\236.js" "b/js/\350\233\213\350\233\213\350\265\236.js" index 026905293307e1b003b2a127c13fd72f7bae0cf4..e98ce3a290d48f1786568bfd2dd8dbab2e051ef9 100644 --- "a/js/\350\233\213\350\233\213\350\265\236.js" +++ "b/js/\350\233\213\350\233\213\350\265\236.js" @@ -3,7 +3,7 @@ var rule={ host:'https://www.dandanzan10.top', // homeUrl:'/', url:'/fyclass/index_fypage.html[/fyclass/index.html]', - //searchUrl:'/search/**/', + searchUrl:'/so/**-**--.html', searchable:2,//是否启用全局搜索, quickSearch:0,//是否启用快速搜索, filterable:0,//是否启用分类筛选, @@ -20,5 +20,5 @@ var rule={ double:true, // 推荐内容是否双层定位 二级:{"title":"h1&&Text;.product-excerpt:eq(2)&&Text","img":".thumb&&src","desc":";;.product-excerpt:eq(3)&&Text;.product-excerpt:eq(1)&&Text;.product-excerpt:eq(0)&&Text","content":".product-excerpt:eq(5)&&Text","tabs":".playlists dl dt","lists":".play-div-oa:eq(#id) li"}, - 搜索:'ul.img-list.clearfix&&li;a&&title;.lazyload&&data-original;.pic-text&&Text;a&&href', + 搜索:'.lists-content&&ul&&li;*;*;*;*', } \ No newline at end of file diff --git "a/txt/js/tg/\345\256\214\347\276\216\347\234\213\347\234\213.js" "b/txt/js/tg/\345\256\214\347\276\216\347\234\213\347\234\213.js" deleted file mode 100644 index c8211ceb5751edb102956bd25eed9dc1de5ab532..0000000000000000000000000000000000000000 --- "a/txt/js/tg/\345\256\214\347\276\216\347\234\213\347\234\213.js" +++ /dev/null @@ -1,10 +0,0 @@ -muban.首图2.二级.tabs = '.stui-pannel__head&&h3'; -var rule = Object.assign(muban.首图2,{ -title:'完美看看', -host:'https://www.wanmeikk.film', -url:'/category/fyclass-fypage.html', -searchUrl:'/vodsearch/**-------------.html', -class_name:'电影&美剧&韩剧&日剧&国产剧&动漫',//静态分类名称拼接 -class_url:'1&2&3&4&5&6',//静态分类标识拼接 -class_parse:'', -}); diff --git a/utils/htmlParser.py b/utils/htmlParser.py index ad822e6b8345f889b8d082d1b27695cd30e77647..a0827f9f78cfe3ae7f98eae6bc942c5c03458a54 100644 --- a/utils/htmlParser.py +++ b/utils/htmlParser.py @@ -6,6 +6,7 @@ import json from pyquery import PyQuery as pq +from lxml import etree from urllib.parse import urljoin import re from jsonpath import jsonpath @@ -22,7 +23,6 @@ class jsoup: def pdfh(self,html,parse:str,add_url=False): if not parse: return '' - doc = pq(html) option = None if parse.find('&&') > -1: @@ -66,10 +66,14 @@ class jsoup: # ret = doc(parse) # 下面注释的写法不对的 # ret = ret.find(':first') # ret = ret.children(':first') - ret = str(ret) + # print(parse) + # ret = str(ret) + ret = ret.outerHtml() return ret def pdfa(self,html,parse:str): + # 看官方文档才能解决这个问题!!! + # https://pyquery.readthedocs.io/en/latest/api.html if not parse: return [] if parse.find('&&') > -1: @@ -78,8 +82,15 @@ class jsoup: parse = ' '.join([parse[i] if self.test(':eq|:lt|:gt', parse[i]) or i>=len(parse)-1 else f'{parse[i]}:eq(0)' for i in range(len(parse))]) # print(f'pdfa:{parse}') doc = pq(html) - res = [str(item) for item in doc(parse).items()] + result = doc(parse) + # 节点转字符串 + # print(str(etree.tostring(result[0], pretty_print=True), 'utf-8')) + # res = [item for item in result.items()] + res = [item.outerHtml() for item in result.items()] # 这个才是对的!!str() item str(etree.tostring 统统错误 + # res = [str(item) for item in result.items()] + # res = [str(etree.tostring(item, pretty_print=True), 'utf-8') for item in result] # print(len(res),res) + # print('pdfa执行结果数:',len(res)) return res def pd(self,html,parse:str):