From 3adf261f2e1aac4588b5e7c007ba6db77e991c6f Mon Sep 17 00:00:00 2001 From: hjdhnx Date: Sat, 3 Sep 2022 13:10:59 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E4=BA=86=E4=B8=80=E5=A0=86?= =?UTF-8?q?=E4=B8=9C=E8=A5=BF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app.py | 34 ++++++-- classes/cms.py | 121 ++++++++++++++++++++-------- "js/360\345\275\261\350\247\206.js" | 23 ++++++ "js/\350\247\243\346\236\220.conf" | 16 ++++ "js/\350\247\243\346\236\220.txt" | 13 --- requirements.txt | 3 +- templates/config.txt | 5 +- txt/issue.txt | 2 + utils/htmlParser.py | 60 ++++++++++++-- 9 files changed, 213 insertions(+), 64 deletions(-) create mode 100644 "js/360\345\275\261\350\247\206.js" create mode 100644 "js/\350\247\243\346\236\220.conf" delete mode 100644 "js/\350\247\243\346\236\220.txt" diff --git a/app.py b/app.py index 62061dc..d6730f4 100644 --- a/app.py +++ b/app.py @@ -89,10 +89,14 @@ def forbidden(): # put application's code here @app.route('/index') def index(): # put application's code here # logger.info("进入了首页") - sup_port = app.config.get('SUP_PORT', 9001) - manager0 = ':'.join(getHost(0).split(':')[0:2]) + f':{sup_port}' - manager1 = ':'.join(getHost(1).split(':')[0:2]) + f':{sup_port}' - manager2 = ':'.join(getHost(2).split(':')[0:2]) + f':{sup_port}' + sup_port = app.config.get('SUP_PORT', False) + manager0 = ':'.join(getHost(0).split(':')[0:2]) + manager1 = ':'.join(getHost(1).split(':')[0:2]) + manager2 = ':'.join(getHost(2).split(':')[0:2]).replace('https','http') + if sup_port: + manager0 += f':{sup_port}' + manager1 += f':{sup_port}' + manager2 += f':{sup_port}' # print(manager1) # print(manager2) return render_template('index.html',getHost=getHost,manager0=manager0,manager1=manager1,manager2=manager2,is_linux=is_linux()) @@ -243,7 +247,12 @@ def vod(): if play_url: # 播放 jxs = getJxs() play_url = cms.playContent(play_url,jxs) - return redirect(play_url) + if isinstance(play_url,str): + return redirect(play_url) + elif isinstance(play_url,dict): + return jsonify(play_url) + else: + return play_url if ac and t: # 一级 data = cms.categoryContent(t,pg) @@ -337,9 +346,20 @@ def getPics(path='images'): return pic_list def getJxs(path='js'): - with open(f'{path}/解析.txt',encoding='utf-8') as f: + with open(f'{path}/解析.conf',encoding='utf-8') as f: data = f.read().strip() - jxs = [{'name':dt.split(',')[0],'url':dt.split(',')[1]} for dt in data.split('\n')] + jxs = [] + for i in data.split('\n'): + i = i.strip() + dt = i.split(',') + if not i.startswith('#'): + jxs.append({ + 'name':dt[0], + 'url':dt[1], + 'type':dt[2] if len(dt) > 2 else 0, + }) + # jxs = [{'name':dt.split(',')[0],'url':dt.split(',')[1]} for dt in data.split('\n')] + # jxs = list(filter(lambda x:not str(x['name']).strip().startswith('#'),jxs)) # print(jxs) print(f'共计{len(jxs)}条解析') return jxs diff --git a/classes/cms.py b/classes/cms.py index d31319d..258dd34 100644 --- a/classes/cms.py +++ b/classes/cms.py @@ -186,6 +186,10 @@ class CMS: pdfh = jsp.pdfh pdfa = jsp.pdfa pd = jsp.pd + pjfh = jsp.pjfh + pjfa = jsp.pjfa + pj = jsp.pj + pq = jsp.pq return pdfh,pdfa,pd,pq @@ -409,9 +413,11 @@ class CMS: result = {} videos = [] jsp = jsoup(self.homeUrl) - pdfh = jsp.pdfh - pdfa = jsp.pdfa - pd = jsp.pd + is_json = str(p[0]).startswith('json:') + pdfh = jsp.pjfh if is_json else jsp.pdfh + pdfa = jsp.pjfa if is_json else jsp.pdfa + pd = jsp.pj if is_json else jsp.pd + print(html) try: if self.double: items = pdfa(html, p[0]) @@ -422,7 +428,8 @@ class CMS: title = pdfh(item2, p[2]) img = pd(item2, p[3]) desc = pdfh(item2, p[4]) - link = pd(item2, p[5]) + links = [pd(item, p5) if not self.detailUrl else pdfh(item, p5) for p5 in p[5].split('+')] + link = '$'.join(links) content = '' if len(p) < 7 else pdfh(item2, p[6]) videos.append({ "vod_id": link, @@ -436,13 +443,16 @@ class CMS: except: pass else: - items = pdfa(html, p[0]) + items = pdfa(html, p[0].replace('json:','')) + # print(items) for item in items: try: title = pdfh(item, p[1]) img = pd(item, p[2]) desc = pdfh(item, p[3]) - link = pd(item, p[4]) + # link = pd(item, p[4]) + links = [pd(item, p5) if not self.detailUrl else pdfh(item, p5) for p5 in p[4].split('+')] + link = '$'.join(links) content = '' if len(p) < 6 else pdfh(item, p[5]) videos.append({ "vod_id": link, @@ -495,9 +505,10 @@ class CMS: return self.blank() jsp = jsoup(self.url) - pdfh = jsp.pdfh - pdfa = jsp.pdfa - pd = jsp.pd + is_json = str(p[0]).startswith('json:') + pdfh = jsp.pjfh if is_json else jsp.pdfh + pdfa = jsp.pjfa if is_json else jsp.pdfa + pd = jsp.pj if is_json else jsp.pd # print(pdfh(r.text,'body a.module-poster-item.module-item:eq(1)&&Text')) # print(pdfh(r.text,'body a.module-poster-item.module-item:eq(0)')) # print(pdfh(r.text,'body a.module-poster-item.module-item:first')) @@ -508,28 +519,32 @@ class CMS: r = requests.get(url, headers=self.headers, timeout=self.timeout) r.encoding = self.encoding print(r.url) - html = r.text + # html = r.text + html = r.json() if is_json else r.text # print(html) - items = pdfa(html, p[0]) + items = pdfa(html,p[0].replace('json:','',1)) except: pass + # print(items) for item in items: # print(item) try: title = pdfh(item, p[1]) img = pd(item, p[2]) desc = pdfh(item, p[3]) - link = pd(item, p[4]) + links = [pd(item, p4) if not self.detailUrl else pdfh(item, p4) for p4 in p[4].split('+')] + link = '$'.join(links) content = '' if len(p) < 6 else pdfh(item, p[5]) # sid = self.regStr(sid, "/video/(\\S+).html") videos.append({ - "vod_id": link, + "vod_id": f'{fyclass}${link}' if self.detailUrl else link,# 分类,播放链接 "vod_name": title, "vod_pic": img, "vod_remarks": desc, "vod_content": content, }) - except: + except Exception as e: + print(f'发生了错误:{e}') pass result['list'] = videos result['page'] = fypage @@ -540,11 +555,11 @@ class CMS: return result - def detailOneVod(self,id): + def detailOneVod(self,id,fyclass=''): detailUrl = str(id) vod = {} if not detailUrl.startswith('http'): - url = self.detailUrl.replace('fyid', detailUrl) + url = self.detailUrl.replace('fyid', detailUrl).replace('fyclass',fyclass) else: url = detailUrl # print(url) @@ -563,20 +578,24 @@ class CMS: return vod jsp = jsoup(self.url) - pdfh = jsp.pdfh - pdfa = jsp.pdfa - pd = jsp.pd + + is_json = p.get('is_json',False) # 二级里加is_json参数 + pdfh = jsp.pjfh if is_json else jsp.pdfh + pdfa = jsp.pjfa if is_json else jsp.pdfa + pd = jsp.pj if is_json else jsp.pd pq = jsp.pq obj = {} vod_name = '' r = requests.get(url, headers=self.headers, timeout=self.timeout) r.encoding = self.encoding - html = r.text + # html = r.text + html = r.json() if is_json else r.text # print(html) if p.get('title'): p1 = p['title'].split(';') vod_name = pdfh(html,p1[0]).replace('\n',' ') - title = '\n'.join([pdfh(html,i).replace('\n',' ') for i in p1]) + # title = '\n'.join([pdfh(html,i).replace('\n',' ') for i in p1]) + title = '\n'.join([','.join([pdfh(html, pp1).strip() for pp1 in i.split('+')]) for i in p1]) # print(title) obj['title'] = title if p.get('desc'): @@ -610,10 +629,11 @@ class CMS: vod_play_from = '$$$' playFrom = [] if p.get('tabs'): - vodHeader = pdfa(html,p['tabs']) + vodHeader = pdfa(html,p['tabs'].split(';')[0]) # print(f'线路列表数:{len((vodHeader))}') # print(vodHeader) - vodHeader = [pq(v).text() for v in vodHeader] + if not is_json: + vodHeader = [pq(v).text() for v in vodHeader] else: vodHeader = ['道长在线'] @@ -625,10 +645,20 @@ class CMS: vod_tab_list = [] if p.get('lists'): for i in range(len(vodHeader)): - p1 = p['lists'].replace('#id',str(i)) + tab_name = str(vodHeader[i]) + tab_ext = p['tabs'].split(';')[1] if len(p['tabs'].split(';')) > 1 else '' + p1 = p['lists'].replace('#idv',tab_name).replace('#id',str(i)) + tab_ext = tab_ext.replace('#idv',tab_name).replace('#id',str(i)) vodList = pdfa(html,p1) # 1条线路的选集列表 + # print(vodList) # vodList = [pq(i).text()+'$'+pd(i,'a&&href') for i in vodList] # 拼接成 名称$链接 - vodList = [pq(i).text()+'$'+self.play_url+pd(i,'a&&href') for i in vodList] # 拼接成 名称$链接 + if self.play_parse: # 自动base64编码 + vodList = [(pdfh(html,tab_ext) if tab_ext else tab_name)+'$'+self.play_url+base64Encode(i) for i in vodList] if is_json else\ + [pq(i).text()+'$'+self.play_url+base64Encode(pd(i,'a&&href')) for i in vodList] # 拼接成 名称$链接 + else: + vodList = [(pdfh(html, tab_ext) if tab_ext else tab_name) + '$' + self.play_url + i for i in + vodList] if is_json else \ + [pq(i).text() + '$' + self.play_url + pd(i, 'a&&href') for i in vodList] # 拼接成 名称$链接 vlist = '#'.join(vodList) # 拼多个选集 vod_tab_list.append(vlist) vod_play_url = vod_play_url.join(vod_tab_list) @@ -652,7 +682,12 @@ class CMS: obj_list = [] try: for vod_url in array: - obj = thread_pool.submit(self.detailOneVod, vod_url) + vod_class = '' + if vod_url.find('$') > -1: + tmp = vod_url.split('$') + vod_class = tmp[0] + vod_url = tmp[1] + obj = thread_pool.submit(self.detailOneVod, vod_url,vod_class) obj_list.append(obj) thread_pool.shutdown(wait=True) # 等待所有子线程并行完毕 vod_list = [obj.result() for obj in obj_list] @@ -680,17 +715,19 @@ class CMS: if len(p) < 5: return self.blank() jsp = jsoup(self.url) - pdfh = jsp.pdfh - pdfa = jsp.pdfa - pd = jsp.pd + is_json = str(p[0]).startswith('json:') + pdfh = jsp.pjfh if is_json else jsp.pdfh + pdfa = jsp.pjfa if is_json else jsp.pdfa + pd = jsp.pj if is_json else jsp.pd pq = jsp.pq videos = [] try: r = requests.get(url, headers=self.headers,timeout=self.timeout) r.encoding = self.encoding - html = r.text + # html = r.text + html = r.json() if is_json else r.text # print(html) - if html.find('输入验证码') > -1: + if not is_json and html.find('输入验证码') > -1: cookie = verifyCode(url,self.headers,self.timeout,self.retry_count,self.ocr_api) # cookie = '' if not cookie: @@ -703,7 +740,7 @@ class CMS: r.encoding = self.encoding html = r.text - items = pdfa(html, p[0]) + items = pdfa(html,p[0].replace('json:','',1)) # print(items) videos = [] for item in items: @@ -712,7 +749,9 @@ class CMS: title = pdfh(item, p[1]) img = pd(item, p[2]) desc = pdfh(item, p[3]) - link = pd(item, p[4]) + # link = '$'.join([pd(item, p4) for p4 in p[4].split('+')]) + links = [pd(item, p4) if not self.detailUrl else pdfh(item, p4) for p4 in p[4].split('+')] + link = '$'.join(links) content = '' if len(p) < 6 else pdfh(item, p[5]) # sid = self.regStr(sid, "/video/(\\S+).html") videos.append({ @@ -736,6 +775,10 @@ class CMS: # logger.info('播放免嗅地址: ' + self.play_url) if not jxs: jxs = [] + try: + play_url = baseDecode(play_url) # 自动base64解码 + except: + pass if self.lazy: print(f'{play_url}->开始执行免嗅代码{type(self.lazy)}->{self.lazy}') t1 = time() @@ -777,6 +820,10 @@ class CMS: loader,_ = runJScode(jscode,ctx=ctx) # print(loader.toString()) play_url = loader.eval('input') + if isinstance(play_url,JsObjectWrapper): + play_url = play_url.to_dict() + # print(type(play_url)) + # print(play_url) logger.info(f'js免嗅耗时:{get_interval(t1)}毫秒,播放地址:{play_url}') except Exception as e: logger.info(f'免嗅耗时:{get_interval(t1)}毫秒,并发生错误:{e}') @@ -786,12 +833,15 @@ class CMS: return play_url if __name__ == '__main__': + print(urljoin('https://api.web.360kan.com/v1/f', + '//0img.hitv.com/preview/sp_images/2022/01/28/202201281528074643023.jpg')) + # exit() from utils import parser # js_path = f'js/玩偶姐姐.js' # js_path = f'js/555影视.js' with open('../js/模板.js', encoding='utf-8') as f: before = f.read() - js_path = f'js/vip影院.js' + js_path = f'js/360影视.js' ctx, js_code = parser.runJs(js_path,before=before) ruleDict = ctx.rule.to_dict() # lazy = ctx.eval('lazy') @@ -807,4 +857,5 @@ if __name__ == '__main__': # print(cms.detailContent(1,['https://cokemv.me/voddetail/40573.html'])) # cms.categoryContent('dianying',1) # print(cms.detailContent(['67391'])) - print(cms.searchContent('斗罗大陆')) \ No newline at end of file + # print(cms.searchContent('斗罗大陆')) + print(cms.searchContent('独行月球')) \ No newline at end of file diff --git "a/js/360\345\275\261\350\247\206.js" "b/js/360\345\275\261\350\247\206.js" new file mode 100644 index 0000000..44427a8 --- /dev/null +++ "b/js/360\345\275\261\350\247\206.js" @@ -0,0 +1,23 @@ +var rule = { + title:'360影视', + host:'https://www.360kan.com', + homeUrl:'https://api.web.360kan.com/v1/rank?cat=2&size=9', + detailUrl:'https://api.web.360kan.com/v1/detail?cat=fyclass&id=fyid', + searchUrl:'https://api.so.360kan.com/index?force_v=1&kw=**&from=&pageno=fypage&v_ap=1&tab=all', + url:'https://api.web.360kan.com/v1/filter/list?catid=fyclass&rank=rankhot&cat=&year=&area=&act=&size=35&pageno=fypage&callback=', + headers:{ + 'User-Agent':'MOBILE_UA' + }, + timeout:5000, + class_name:'电视剧&电影&综艺&动漫', + class_url:'2&1&3&4', + limit:5, + play_parse:true, + // play_parse:true, + lazy:'js:input={parse: 1, playUrl: "", jx: 1, url: input}', + 推荐:'json:data;title;cover;comment;cat+ent_id;description', + 一级:'json:data.movies;title;cover;pubdate;id;description', + 二级:{is_json:1,"title":"data.title;data.moviecategory[0]+data.moviecategory[1]","img":"data.cdncover","desc":"data.area[0];data.director[0]","content":"data.description","tabs":"data.playlink_sites;data.playlinksdetail.#idv.quality","lists":"data.playlinksdetail.#idv.default_url"}, + // 二级:{is_json:1,"title":"data.title;data.moviecategory[0]+data.moviecategory[1]","img":"data.cdncover","desc":"data.area[0];data.director[0]","content":"data.description","tabs":"data.playlink_sites","lists":"data.playlinksdetail.#idv.default_url"}, + 搜索:'json:data.longData.rows;titleTxt;cover;score;cat_id+id;description', +} \ No newline at end of file diff --git "a/js/\350\247\243\346\236\220.conf" "b/js/\350\247\243\346\236\220.conf" new file mode 100644 index 0000000..539b79b --- /dev/null +++ "b/js/\350\247\243\346\236\220.conf" @@ -0,0 +1,16 @@ +# 0123,对应,普通解析,json解析,并发解析,聚合解析,参数3不填默认0 +BT5V,https://rx.bt5v.com/json/jsonindex.php/?url=,1 +爱酷,https://cache.json.icu/home/api?type=ys&uid=292796&key=fnoryABDEFJNPQV269&url=,1 +# m3u8tv,https://jx.m3u8.tv/jiexi/?url= +# 思古解析,https://jsap.attakids.com/?url= +# 云解析,https://jx.ppflv.com/?url= +# 云解析2,https://jx.aidouer.net/?url= +# BL解析,https://vip.bljiex.cc/?v= +# 虾米解析,https://jx.xmflv.com/?url= +# 飞飞智能,https://y.9dan.cc/?v= +# 左岸解析,https://jx.bozrc.com:4433/player/?url= +# ok解析,https://okjx.cc/?url= +# 8090解析,https://www.8090g.cn/?url= +# ckplayer无广,https://www.ckplayer.vip/jiexi/?url= +# 盘古无广,http://www.pangujiexi.cc/jiexi.php?url= +# 江湖,http://jx.vipmv.co/?url= \ No newline at end of file diff --git "a/js/\350\247\243\346\236\220.txt" "b/js/\350\247\243\346\236\220.txt" deleted file mode 100644 index 7ecbb2f..0000000 --- "a/js/\350\247\243\346\236\220.txt" +++ /dev/null @@ -1,13 +0,0 @@ -m3u8tv,https://jx.m3u8.tv/jiexi/?url= -思古解析,https://jsap.attakids.com/?url= -云解析,https://jx.ppflv.com/?url= -云解析2,https://jx.aidouer.net/?url= -BL解析,https://vip.bljiex.cc/?v= -虾米解析,https://jx.xmflv.com/?url= -飞飞智能,https://y.9dan.cc/?v= -左岸解析,https://jx.bozrc.com:4433/player/?url= -ok解析,https://okjx.cc/?url= -8090解析,https://www.8090g.cn/?url= -ckplayer无广,https://www.ckplayer.vip/jiexi/?url= -盘古无广,http://www.pangujiexi.cc/jiexi.php?url= -江湖,http://jx.vipmv.co/?url= \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index dbe593b..586aeb7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,5 @@ gevent ; python_version < '3.9' gunicorn ; python_version >= '3.6' supervisor ; sys_platform != 'win32' func_timeout -easydict \ No newline at end of file +easydict +jsonpath \ No newline at end of file diff --git a/templates/config.txt b/templates/config.txt index 3a0f55a..1c18c9c 100644 --- a/templates/config.txt +++ b/templates/config.txt @@ -2,6 +2,7 @@ {% if config.WALL_PAPER_ENABLE %}"wallpaper":"{{ host }}/pics",{% endif %} "dr_count": {{rules.list|length}}, "mode": {{ mode }}, +"homepage":"https://gitcode.net/qq_32394351/dr_py", "sites": [{% for rule in rules.list %}{% if mode == 0 %} { "key":"dr_{{ rule.name }}", @@ -38,9 +39,9 @@ { "name": "{{ jx.name }}", "url": "{{ jx.url }}", - "type": 1, + "type": {{ jx.type }}, "ext": { - "flag": ["qiyi", "爱奇艺", "奇艺", "qq", "腾讯", "youku", "优酷", "pptv", "PPTV", "letv", "乐视", "bilibili", "哔哩哔哩", "哔哩", "mgtv", "芒果","sohu", "xigua"], + "flag": ["qiyi","imgo","爱奇艺", "奇艺", "qq", "腾讯", "youku", "优酷", "pptv", "PPTV", "letv", "乐视", "bilibili", "哔哩哔哩", "哔哩", "mgtv", "芒果","sohu", "xigua"], "header": { "User-Agent": "Dart/2.14 (dart:io)" } diff --git a/txt/issue.txt b/txt/issue.txt index 3396ce9..d50d8c9 100644 --- a/txt/issue.txt +++ b/txt/issue.txt @@ -30,3 +30,5 @@ https://cuiqingcai.com/202232.html "lives":[{"group":"redirect","channels":[{"name":"直播","urls":["proxy://do=live&type=txt&ext={% if config.LIVE_MODE==0 %}{{base64Encode(host+'/lives')}}{% else %}{{ base64Encode('https://gitcode.net/qq_26898231/TVBox/-/raw/main/live/0830zb.txt')}}{% endif %}"]}]}], +"homepage":"https://gitcode.net/qq_32394351/dr_py", +"imgo", \ No newline at end of file diff --git a/utils/htmlParser.py b/utils/htmlParser.py index 5088ea7..9d00e93 100644 --- a/utils/htmlParser.py +++ b/utils/htmlParser.py @@ -3,21 +3,23 @@ # File : htmlParser.py # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------ # Date : 2022/8/25 +import json from pyquery import PyQuery as pq from urllib.parse import urljoin import re +from jsonpath import jsonpath class jsoup: def __init__(self,MY_URL=''): self.MY_URL = MY_URL - def test(self, text, string): + def test(self, text:str, string:str): searchObj = re.search(rf'{text}', string, re.M | re.I) test_ret = True if searchObj else False return test_ret - def pdfh(self,html,parse,pd=False): + def pdfh(self,html,parse:str,add_url=False): if not parse: return '' doc = pq(html) @@ -40,7 +42,7 @@ class jsoup: ret = ret.html() else: ret = ret.attr(option) - if pd and option in ['url','src','href','data-original','data-src']: + if add_url and option in ['url','src','href','data-original','data-src']: ret = urljoin(self.MY_URL,ret) else: # ret = doc(parse+':first') @@ -52,7 +54,7 @@ class jsoup: ret = str(ret) return ret - def pdfa(self,html,parse): + def pdfa(self,html,parse:str): if not parse: return [] if parse.find('&&') > -1: @@ -64,12 +66,58 @@ class jsoup: # return [item.html() for item in doc(parse).items()] return [str(item) for item in doc(parse).items()] - def pd(self,html,parse): + def pd(self,html,parse:str): return self.pdfh(html,parse,True) - def pq(self,html): + def pq(self,html:str): return pq(html) + def pjfh(self,html,parse:str,add_url=False): + if not parse: + return '' + if isinstance(html,str): + # print(html) + try: + html = json.loads(html) + # html = eval(html) + except: + print('字符串转json失败') + return '' + if not parse.startswith('$.'): + parse = f'$.{parse}' + ret = jsonpath(html,parse) + if isinstance(ret,list): + ret = str(ret[0]) if ret[0] else '' + else: + ret = str(ret) if ret else '' + if add_url: + ret = urljoin(self.MY_URL, ret) + return ret + + def pj(self, html, parse:str): + return self.pjfh(html, parse, True) + + def pjfa(self,html,parse:str): + if not parse: + return [] + if isinstance(html,str): + try: + html = json.loads(html) + except: + return '' + if not parse.startswith('$.'): + parse = f'$.{parse}' + # print(parse) + ret = jsonpath(html,parse) + # print(ret) + # print(type(ret)) + # print(type(ret[0])) + # print(len(ret)) + if isinstance(ret,list) and isinstance(ret[0],list) and len(ret) == 1: + # print('自动解包') + ret = ret[0] # 自动解包 + return ret or [] + if __name__ == '__main__': import requests from parsel import Selector -- GitLab