From fffbb681b72febfe15df73be595284fc97bf7c8a Mon Sep 17 00:00:00 2001 From: hjdhnx Date: Sun, 28 Aug 2022 23:10:57 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E8=87=AA=E5=AE=9A=E4=B9=89?= =?UTF-8?q?=E5=85=8D=E5=97=85=EF=BC=8C=E5=A2=9E=E5=8A=A0=E8=A7=84=E5=88=99?= =?UTF-8?q?=E6=A8=A1=E6=9D=BF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app.py | 13 +++++++-- classes/cms.py | 18 ++++++++++--- js/rules.py | 2 +- "js/\345\244\234\347\251\272.js" | 25 ++++-------------- "js/\345\244\234\347\251\272.js.old" | 22 +++++++++++++++ ...62\351\245\255\345\275\261\350\247\206.js" | 2 +- "js/\346\250\241\346\235\277.js" | 24 +++++++++++++++++ models/rules.db | Bin 16384 -> 16384 bytes "py/\345\271\262\351\245\255.py" | 16 +++++------ "py/\346\265\213\350\257\225.py" | 6 ++++- ...32\347\224\250\345\205\215\345\227\205.py" | 8 +++--- readme.md | 1 + requirements.txt | 3 ++- utils/parser.py | 9 +++++-- 14 files changed, 106 insertions(+), 43 deletions(-) create mode 100644 "js/\345\244\234\347\251\272.js.old" create mode 100644 "js/\346\250\241\346\235\277.js" diff --git a/app.py b/app.py index ffba0c6..8cf63fe 100644 --- a/app.py +++ b/app.py @@ -3,6 +3,7 @@ # File : app.py # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------ # Date : 2022/8/25 +import time from flask_sqlalchemy import SQLAlchemy import config @@ -77,12 +78,20 @@ def vod(): msg = f'服务端本地仅支持以下规则:{",".join(rule_list)}' return jsonify(error.failed(msg)) + t1 = time.time() js_path = f'js/{rule}.js' if not ext.startswith('http') else ext - ctx,js_code = parser.runJs(js_path) + # before = '' + with open('js/模板.js', encoding='utf-8') as f: + before = f.read() + # print(before) + ctx,js_code = parser.runJs(js_path,before=before) if not js_code: return jsonify(error.failed('爬虫规则加载失败')) rule = ctx.eval('rule') + t2 = time.time() + logger.info(f'js装载耗时:{round((t2-t1)*1000,2)}毫秒') + # print(rule) cms = CMS(rule,db,RuleClass,PlayParse,app.config) wd = getParmas('wd') ac = getParmas('ac') @@ -137,7 +146,7 @@ def getRules(path='cache'): # print(base_path) os.makedirs(base_path,exist_ok=True) file_name = os.listdir(base_path) - file_name = list(filter(lambda x: str(x).endswith('.js'), file_name)) + file_name = list(filter(lambda x: str(x).endswith('.js') and str(x).find('模板') < 0, file_name)) # print(file_name) rule_list = [file.replace('.js', '') for file in file_name] rules = {'list': rule_list, 'count': len(rule_list)} diff --git a/classes/cms.py b/classes/cms.py index d65561a..9cbe480 100644 --- a/classes/cms.py +++ b/classes/cms.py @@ -18,6 +18,7 @@ from urllib.parse import urljoin from concurrent.futures import ThreadPoolExecutor # 引入线程池 from time import time from flask import url_for,redirect +from easydict import EasyDict as edict class CMS: def __init__(self, rule, db=None, RuleClass=None, PlayParse=None,new_conf=None): @@ -96,10 +97,23 @@ class CMS: self.timeout = round(int(timeout)/1000,2) self.filter = rule.get('filter',[]) self.extend = rule.get('extend',[]) + self.d = self.getObject() def getName(self): return self.title + def getObject(self): + o = edict({ + 'jsp':jsoup(self.url), + 'getParse':self.getParse, + 'saveParse':self.saveParse, + 'headers':self.headers, + 'encoding':self.encoding, + 'name':self.title, + 'timeout':self.timeout, + }) + return o + def regexp(self,prule,text,pos=None): ret = re.search(prule,text).groups() if pos != None and isinstance(pos,int): @@ -612,9 +626,7 @@ class CMS: if pos < 0: return play_url pyenv = safePython(self.lazy,pycode[pos:]) - # print(pyenv) - jsp = jsoup(self.url) - lazy_url = pyenv.action_task_exec('lazyParse',[play_url,jsp,self.getParse,self.saveParse,self.headers,self.encoding]) + lazy_url = pyenv.action_task_exec('lazyParse',[play_url,self.d]) logger.info(f'播放免嗅结果:{lazy_url}') if isinstance(lazy_url,str) and lazy_url.startswith('http'): play_url = lazy_url diff --git a/js/rules.py b/js/rules.py index a4e4aa8..f365fa2 100644 --- a/js/rules.py +++ b/js/rules.py @@ -10,7 +10,7 @@ def getRules(): base_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在目录 # print(base_path) file_name = os.listdir(base_path) - file_name = list(filter(lambda x:str(x).endswith('.js'),file_name)) + file_name = list(filter(lambda x:str(x).endswith('.js') and str(x).find('模板') < 0,file_name)) # print(file_name) rule_list = [file.replace('.js','') for file in file_name] # print(rule_list) diff --git "a/js/\345\244\234\347\251\272.js" "b/js/\345\244\234\347\251\272.js" index 86f1864..d73aebd 100644 --- "a/js/\345\244\234\347\251\272.js" +++ "b/js/\345\244\234\347\251\272.js" @@ -1,22 +1,7 @@ -var rule = { - title:'夜空', - host:'https://www.yekong.cc', - // homeUrl:'/', - url:'/pianku-fyclass--------fypage---/', +var rule = Object.assign(muban.mxpro,{ +title:'夜空', +host:'https://www.yekong.cc', +url:'/pianku-fyclass--------fypage---/', searchUrl:'/search-**----------fypage---/', - headers:{//网站的请求头,完整支持所有的,常带ua和cookies - 'User-Agent':'MOBILE_UA', - "Cookie": "searchneed=ok" - }, - // class_name:'电影&连续剧&福利&动漫&综艺', - // class_url:'1&2&124&4&3', class_parse:'.navbar-items li:gt(1):lt(7);a&&Text;a&&href;.*v/(.*?)/', - play_parse:true, - lazy:'', - limit:10, - 推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href', - double:true, // 推荐内容是否双层定位 - 一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href', - 二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"}, - 搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text', -} \ No newline at end of file +}); \ No newline at end of file diff --git "a/js/\345\244\234\347\251\272.js.old" "b/js/\345\244\234\347\251\272.js.old" new file mode 100644 index 0000000..86f1864 --- /dev/null +++ "b/js/\345\244\234\347\251\272.js.old" @@ -0,0 +1,22 @@ +var rule = { + title:'夜空', + host:'https://www.yekong.cc', + // homeUrl:'/', + url:'/pianku-fyclass--------fypage---/', + searchUrl:'/search-**----------fypage---/', + headers:{//网站的请求头,完整支持所有的,常带ua和cookies + 'User-Agent':'MOBILE_UA', + "Cookie": "searchneed=ok" + }, + // class_name:'电影&连续剧&福利&动漫&综艺', + // class_url:'1&2&124&4&3', + class_parse:'.navbar-items li:gt(1):lt(7);a&&Text;a&&href;.*v/(.*?)/', + play_parse:true, + lazy:'', + limit:10, + 推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href', + double:true, // 推荐内容是否双层定位 + 一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href', + 二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"}, + 搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text', +} \ No newline at end of file diff --git "a/js/\345\271\262\351\245\255\345\275\261\350\247\206.js" "b/js/\345\271\262\351\245\255\345\275\261\350\247\206.js" index 4e82ee8..739f5b7 100644 --- "a/js/\345\271\262\351\245\255\345\275\261\350\247\206.js" +++ "b/js/\345\271\262\351\245\255\345\275\261\350\247\206.js" @@ -11,7 +11,7 @@ var rule = { // class_url:'20&1&2&3&4&23', class_parse:'.stui-header__menu li:gt(0):lt(5);a&&Text;a&&href;/(\\d+).html', play_parse:true, - // lazy:'干饭', + lazy:'干饭', limit:5, 推荐:'ul.stui-vodlist.clearfix;li;a&&title;.lazyload&&data-original;;a&&href', double:true, // 推荐内容是否双层定位 diff --git "a/js/\346\250\241\346\235\277.js" "b/js/\346\250\241\346\235\277.js" new file mode 100644 index 0000000..02c7001 --- /dev/null +++ "b/js/\346\250\241\346\235\277.js" @@ -0,0 +1,24 @@ +var muban = { + mxpro:{ + title:'', + host:'', + // homeUrl:'/', + url:'/vodshow/fyclass--------fypage---.html', + searchUrl:'/vodsearch/**----------fypage---.html', + headers:{//网站的请求头,完整支持所有的,常带ua和cookies + 'User-Agent':'MOBILE_UA', + "Cookie": "searchneed=ok" + }, + class_parse:'.navbar-items li:gt(2):lt(8);a&&Text;a&&href;/(\\d+).html', + play_parse:true, + lazy:'', + limit:10, + 推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href', + double:true, // 推荐内容是否双层定位 + 一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href', + 二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"}, + 搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text', +} + + +}; \ No newline at end of file diff --git a/models/rules.db b/models/rules.db index 2158a3d5269ef3c111351a7db83455ab397369f7..c8ce8d3d2be7dc60e58988faf5b890d6a473ccae 100644 GIT binary patch delta 679 zcmZ{i&u-H|5Ql4(Kom;3Ac0f_LR?TP#Cq2@$t5TyX%gh!G_LK9_d@+Av17-fDz<}t zX~GkL@(R2_1<$}KcU}c?!lqIIwbf|nHuKH*(=Ha=V!?gd;i{h>zUHcDM_*ptD_7PV zcgNf7ZFtAq`o8txc2-%REx%RD%iVHm!(E=PX0M)daL7S+Y!3I2M>L2HSdV1H#+W)M%y$yAyZ64Y4q~&bMApCScW?f0 z1Fy4z@@$$oG~(ruB-UXa4iR~0Av?cZ8Pm3?u}mlCR2S4hnT<4l??45YagR#{&b_b^ z3Y>bKjg`maOwNNdCN)4>!JPP7f>k>Y(T1eezKrPH$Ppi3=7^^pT>WfRPp?=H&0#Y( z!|8O?nM!6?_pIE`3hndYAjllmO>oP|SU5;oXow`lBB6uSPy^9`y2o0IXJow$`}+Y= z6%Rx01M6Uc9EuS}ny|dTS(s8UidH`wk2WO% 10: real_url = 'https://player.buyaotou.xyz/?url='+url - saveParse(input,real_url) + d.saveParse(input,real_url) return real_url else: return input diff --git "a/py/\346\265\213\350\257\225.py" "b/py/\346\265\213\350\257\225.py" index 849d3b6..465a33a 100644 --- "a/py/\346\265\213\350\257\225.py" +++ "b/py/\346\265\213\350\257\225.py" @@ -7,4 +7,8 @@ import re txt = 'var player_aaaa={"flag":"play","encrypt":3,"trysee":0,"points":0,"link":"\/vodplay\/44640-1-1.html","link_next":"","link_pre":"","url":"Zd2fZg56c6y10828ZDRiNzZjNzk1Y2E3OWQzNmQzYWEyM2IwODM0ZjM3MgO0O0OO0O0O","url_next":"d","from":"vip","server":"no","note":"","id":"44640","sid":1,"nid":1}' ret = re.search('var player_(.*?)=(.*?)<',txt,re.M|re.I) -print(ret) \ No newline at end of file +print(ret) + + +def lazyParse(input,jsp,getParse,saveParse,headers,encoding): + pass \ No newline at end of file diff --git "a/py/\351\200\232\347\224\250\345\205\215\345\227\205.py" "b/py/\351\200\232\347\224\250\345\205\215\345\227\205.py" index 67b2f26..1ade259 100644 --- "a/py/\351\200\232\347\224\250\345\205\215\345\227\205.py" +++ "b/py/\351\200\232\347\224\250\345\205\215\345\227\205.py" @@ -9,10 +9,10 @@ import json from urllib.parse import urljoin,quote,unquote import base64 -def lazyParse(input,jsp,getParse,saveParse,headers,encoding): - print('lazyParse:',input) - r = requests.get(input,headers=headers) - r.encoding = encoding +def lazyParse(input,d): + print('通用免嗅:',input) + r = requests.get(input, headers=d.headers,timeout=d.timeout) + r.encoding = d.encoding html = r.text # print(html) # js = jsp.pdfh(html,'.stui-player__video script:eq(0)&&Html') diff --git a/readme.md b/readme.md index bb92cff..e04b3fe 100644 --- a/readme.md +++ b/readme.md @@ -5,6 +5,7 @@ - [X] 2.转移文本文件到txt目录 - [X] 3.增加服务器解析播放(全局配置和js分别配置.后期可以针对性运行解析) - [X] 4.增加自定义免嗅(基于道长任务仓库核心逻辑实现云函数) +- [X] 5.增加模板继承,优化免嗅参数二 ###### 2022/08/27 - [X] 1.增加PC_UA变量 - [X] 2.首页增加更多功能按钮 diff --git a/requirements.txt b/requirements.txt index c7d56bd..6d0046c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,5 @@ flask-sqlalchemy netifaces gevent ; python_version < '3.9' supervisor ; sys_platform != 'win32' and python_version < '3.9' -func_timeout \ No newline at end of file +func_timeout +easydict \ No newline at end of file diff --git a/utils/parser.py b/utils/parser.py index f73eecc..0767121 100644 --- a/utils/parser.py +++ b/utils/parser.py @@ -17,7 +17,7 @@ import execjs # os.environ["EXECJS_RUNTIME"] = "JScript" # print(execjs.get().name) -def runJs(jsPath): +def runJs(jsPath,before='',after=''): # base_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在目录 # base_path = os.path.dirname(os.getcwd()) # 当前主程序所在工作目录 # base_path = os.path.dirname(os.path.abspath('.')) # 上级目录 @@ -43,7 +43,12 @@ def runJs(jsPath): with open(js_path, 'r', encoding='UTF-8') as fp: js_code = fp.read() # print(js_code) - loader = execjs.compile(js_code) + jscode_to_run = js_code + if before: + jscode_to_run = before + jscode_to_run + if after: + jscode_to_run += after + loader = execjs.compile(jscode_to_run) return loader,js_code def toJs(jsPath): -- GitLab