提交 fffbb681 编写于 作者: H hjdhnx

增加自定义免嗅,增加规则模板

上级 f8bc0b3a
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
# File : app.py # File : app.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------ # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/8/25 # Date : 2022/8/25
import time
from flask_sqlalchemy import SQLAlchemy from flask_sqlalchemy import SQLAlchemy
import config import config
...@@ -77,12 +78,20 @@ def vod(): ...@@ -77,12 +78,20 @@ def vod():
msg = f'服务端本地仅支持以下规则:{",".join(rule_list)}' msg = f'服务端本地仅支持以下规则:{",".join(rule_list)}'
return jsonify(error.failed(msg)) return jsonify(error.failed(msg))
t1 = time.time()
js_path = f'js/{rule}.js' if not ext.startswith('http') else ext js_path = f'js/{rule}.js' if not ext.startswith('http') else ext
ctx,js_code = parser.runJs(js_path) # before = ''
with open('js/模板.js', encoding='utf-8') as f:
before = f.read()
# print(before)
ctx,js_code = parser.runJs(js_path,before=before)
if not js_code: if not js_code:
return jsonify(error.failed('爬虫规则加载失败')) return jsonify(error.failed('爬虫规则加载失败'))
rule = ctx.eval('rule') rule = ctx.eval('rule')
t2 = time.time()
logger.info(f'js装载耗时:{round((t2-t1)*1000,2)}毫秒')
# print(rule)
cms = CMS(rule,db,RuleClass,PlayParse,app.config) cms = CMS(rule,db,RuleClass,PlayParse,app.config)
wd = getParmas('wd') wd = getParmas('wd')
ac = getParmas('ac') ac = getParmas('ac')
...@@ -137,7 +146,7 @@ def getRules(path='cache'): ...@@ -137,7 +146,7 @@ def getRules(path='cache'):
# print(base_path) # print(base_path)
os.makedirs(base_path,exist_ok=True) os.makedirs(base_path,exist_ok=True)
file_name = os.listdir(base_path) file_name = os.listdir(base_path)
file_name = list(filter(lambda x: str(x).endswith('.js'), file_name)) file_name = list(filter(lambda x: str(x).endswith('.js') and str(x).find('模板') < 0, file_name))
# print(file_name) # print(file_name)
rule_list = [file.replace('.js', '') for file in file_name] rule_list = [file.replace('.js', '') for file in file_name]
rules = {'list': rule_list, 'count': len(rule_list)} rules = {'list': rule_list, 'count': len(rule_list)}
......
...@@ -18,6 +18,7 @@ from urllib.parse import urljoin ...@@ -18,6 +18,7 @@ from urllib.parse import urljoin
from concurrent.futures import ThreadPoolExecutor # 引入线程池 from concurrent.futures import ThreadPoolExecutor # 引入线程池
from time import time from time import time
from flask import url_for,redirect from flask import url_for,redirect
from easydict import EasyDict as edict
class CMS: class CMS:
def __init__(self, rule, db=None, RuleClass=None, PlayParse=None,new_conf=None): def __init__(self, rule, db=None, RuleClass=None, PlayParse=None,new_conf=None):
...@@ -96,10 +97,23 @@ class CMS: ...@@ -96,10 +97,23 @@ class CMS:
self.timeout = round(int(timeout)/1000,2) self.timeout = round(int(timeout)/1000,2)
self.filter = rule.get('filter',[]) self.filter = rule.get('filter',[])
self.extend = rule.get('extend',[]) self.extend = rule.get('extend',[])
self.d = self.getObject()
def getName(self): def getName(self):
return self.title return self.title
def getObject(self):
o = edict({
'jsp':jsoup(self.url),
'getParse':self.getParse,
'saveParse':self.saveParse,
'headers':self.headers,
'encoding':self.encoding,
'name':self.title,
'timeout':self.timeout,
})
return o
def regexp(self,prule,text,pos=None): def regexp(self,prule,text,pos=None):
ret = re.search(prule,text).groups() ret = re.search(prule,text).groups()
if pos != None and isinstance(pos,int): if pos != None and isinstance(pos,int):
...@@ -612,9 +626,7 @@ class CMS: ...@@ -612,9 +626,7 @@ class CMS:
if pos < 0: if pos < 0:
return play_url return play_url
pyenv = safePython(self.lazy,pycode[pos:]) pyenv = safePython(self.lazy,pycode[pos:])
# print(pyenv) lazy_url = pyenv.action_task_exec('lazyParse',[play_url,self.d])
jsp = jsoup(self.url)
lazy_url = pyenv.action_task_exec('lazyParse',[play_url,jsp,self.getParse,self.saveParse,self.headers,self.encoding])
logger.info(f'播放免嗅结果:{lazy_url}') logger.info(f'播放免嗅结果:{lazy_url}')
if isinstance(lazy_url,str) and lazy_url.startswith('http'): if isinstance(lazy_url,str) and lazy_url.startswith('http'):
play_url = lazy_url play_url = lazy_url
......
...@@ -10,7 +10,7 @@ def getRules(): ...@@ -10,7 +10,7 @@ def getRules():
base_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在目录 base_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在目录
# print(base_path) # print(base_path)
file_name = os.listdir(base_path) file_name = os.listdir(base_path)
file_name = list(filter(lambda x:str(x).endswith('.js'),file_name)) file_name = list(filter(lambda x:str(x).endswith('.js') and str(x).find('模板') < 0,file_name))
# print(file_name) # print(file_name)
rule_list = [file.replace('.js','') for file in file_name] rule_list = [file.replace('.js','') for file in file_name]
# print(rule_list) # print(rule_list)
......
var rule = { var rule = Object.assign(muban.mxpro,{
title:'夜空', title:'夜空',
host:'https://www.yekong.cc', host:'https://www.yekong.cc',
// homeUrl:'/', url:'/pianku-fyclass--------fypage---/',
url:'/pianku-fyclass--------fypage---/',
searchUrl:'/search-**----------fypage---/', searchUrl:'/search-**----------fypage---/',
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
"Cookie": "searchneed=ok"
},
// class_name:'电影&连续剧&福利&动漫&综艺',
// class_url:'1&2&124&4&3',
class_parse:'.navbar-items li:gt(1):lt(7);a&&Text;a&&href;.*v/(.*?)/', class_parse:'.navbar-items li:gt(1):lt(7);a&&Text;a&&href;.*v/(.*?)/',
play_parse:true, });
lazy:'', \ No newline at end of file
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"},
搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text',
}
\ No newline at end of file
var rule = {
title:'夜空',
host:'https://www.yekong.cc',
// homeUrl:'/',
url:'/pianku-fyclass--------fypage---/',
searchUrl:'/search-**----------fypage---/',
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
"Cookie": "searchneed=ok"
},
// class_name:'电影&连续剧&福利&动漫&综艺',
// class_url:'1&2&124&4&3',
class_parse:'.navbar-items li:gt(1):lt(7);a&&Text;a&&href;.*v/(.*?)/',
play_parse:true,
lazy:'',
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"},
搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text',
}
\ No newline at end of file
...@@ -11,7 +11,7 @@ var rule = { ...@@ -11,7 +11,7 @@ var rule = {
// class_url:'20&1&2&3&4&23', // class_url:'20&1&2&3&4&23',
class_parse:'.stui-header__menu li:gt(0):lt(5);a&&Text;a&&href;/(\\d+).html', class_parse:'.stui-header__menu li:gt(0):lt(5);a&&Text;a&&href;/(\\d+).html',
play_parse:true, play_parse:true,
// lazy:'干饭', lazy:'干饭',
limit:5, limit:5,
推荐:'ul.stui-vodlist.clearfix;li;a&&title;.lazyload&&data-original;;a&&href', 推荐:'ul.stui-vodlist.clearfix;li;a&&title;.lazyload&&data-original;;a&&href',
double:true, // 推荐内容是否双层定位 double:true, // 推荐内容是否双层定位
......
var muban = {
mxpro:{
title:'',
host:'',
// homeUrl:'/',
url:'/vodshow/fyclass--------fypage---.html',
searchUrl:'/vodsearch/**----------fypage---.html',
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
"Cookie": "searchneed=ok"
},
class_parse:'.navbar-items li:gt(2):lt(8);a&&Text;a&&href;/(\\d+).html',
play_parse:true,
lazy:'',
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"},
搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text',
}
};
\ No newline at end of file
无法预览此类型文件
...@@ -10,24 +10,24 @@ import json ...@@ -10,24 +10,24 @@ import json
from urllib.parse import urljoin,quote,unquote from urllib.parse import urljoin,quote,unquote
import base64 import base64
def lazyParse(input,jsp,getParse,saveParse,headers,encoding): def lazyParse(input,d):
cacheUrl = getParse(input) cacheUrl = d.getParse(input)
print(f'cacheUrl:{cacheUrl}') print(f'干饭免嗅:cacheUrl:{cacheUrl}')
if cacheUrl: if cacheUrl:
return cacheUrl return cacheUrl
r = requests.get(input, headers=headers) r = requests.get(input, headers=d.headers,timeout=d.timeout)
r.encoding = encoding r.encoding = d.encoding
html = r.text html = r.text
# print(html) # print(html)
# js = jsp.pdfh(html,'.stui-player__video script:eq(0)&&Html') js = d.jsp.pdfh(html,'.stui-player__video script:eq(0)&&Html')
# print(js) print(js)
try: try:
ret = re.search('var player_(.*?)=(.*?)<', html, re.M | re.I).groups()[1] ret = re.search('var player_(.*?)=(.*?)<', html, re.M | re.I).groups()[1]
ret = json.loads(ret) ret = json.loads(ret)
url = ret.get('url','') url = ret.get('url','')
if len(url) > 10: if len(url) > 10:
real_url = 'https://player.buyaotou.xyz/?url='+url real_url = 'https://player.buyaotou.xyz/?url='+url
saveParse(input,real_url) d.saveParse(input,real_url)
return real_url return real_url
else: else:
return input return input
......
...@@ -7,4 +7,8 @@ ...@@ -7,4 +7,8 @@
import re import re
txt = 'var player_aaaa={"flag":"play","encrypt":3,"trysee":0,"points":0,"link":"\/vodplay\/44640-1-1.html","link_next":"","link_pre":"","url":"Zd2fZg56c6y10828ZDRiNzZjNzk1Y2E3OWQzNmQzYWEyM2IwODM0ZjM3MgO0O0OO0O0O","url_next":"d","from":"vip","server":"no","note":"","id":"44640","sid":1,"nid":1}' txt = 'var player_aaaa={"flag":"play","encrypt":3,"trysee":0,"points":0,"link":"\/vodplay\/44640-1-1.html","link_next":"","link_pre":"","url":"Zd2fZg56c6y10828ZDRiNzZjNzk1Y2E3OWQzNmQzYWEyM2IwODM0ZjM3MgO0O0OO0O0O","url_next":"d","from":"vip","server":"no","note":"","id":"44640","sid":1,"nid":1}'
ret = re.search('var player_(.*?)=(.*?)<',txt,re.M|re.I) ret = re.search('var player_(.*?)=(.*?)<',txt,re.M|re.I)
print(ret) print(ret)
\ No newline at end of file
def lazyParse(input,jsp,getParse,saveParse,headers,encoding):
pass
\ No newline at end of file
...@@ -9,10 +9,10 @@ import json ...@@ -9,10 +9,10 @@ import json
from urllib.parse import urljoin,quote,unquote from urllib.parse import urljoin,quote,unquote
import base64 import base64
def lazyParse(input,jsp,getParse,saveParse,headers,encoding): def lazyParse(input,d):
print('lazyParse:',input) print('通用免嗅:',input)
r = requests.get(input,headers=headers) r = requests.get(input, headers=d.headers,timeout=d.timeout)
r.encoding = encoding r.encoding = d.encoding
html = r.text html = r.text
# print(html) # print(html)
# js = jsp.pdfh(html,'.stui-player__video script:eq(0)&&Html') # js = jsp.pdfh(html,'.stui-player__video script:eq(0)&&Html')
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
- [X] 2.转移文本文件到txt目录 - [X] 2.转移文本文件到txt目录
- [X] 3.增加服务器解析播放(全局配置和js分别配置.后期可以针对性运行解析) - [X] 3.增加服务器解析播放(全局配置和js分别配置.后期可以针对性运行解析)
- [X] 4.增加自定义免嗅(基于道长任务仓库核心逻辑实现云函数) - [X] 4.增加自定义免嗅(基于道长任务仓库核心逻辑实现云函数)
- [X] 5.增加模板继承,优化免嗅参数二
###### 2022/08/27 ###### 2022/08/27
- [X] 1.增加PC_UA变量 - [X] 1.增加PC_UA变量
- [X] 2.首页增加更多功能按钮 - [X] 2.首页增加更多功能按钮
......
...@@ -6,4 +6,5 @@ flask-sqlalchemy ...@@ -6,4 +6,5 @@ flask-sqlalchemy
netifaces netifaces
gevent ; python_version < '3.9' gevent ; python_version < '3.9'
supervisor ; sys_platform != 'win32' and python_version < '3.9' supervisor ; sys_platform != 'win32' and python_version < '3.9'
func_timeout func_timeout
\ No newline at end of file easydict
\ No newline at end of file
...@@ -17,7 +17,7 @@ import execjs ...@@ -17,7 +17,7 @@ import execjs
# os.environ["EXECJS_RUNTIME"] = "JScript" # os.environ["EXECJS_RUNTIME"] = "JScript"
# print(execjs.get().name) # print(execjs.get().name)
def runJs(jsPath): def runJs(jsPath,before='',after=''):
# base_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在目录 # base_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在目录
# base_path = os.path.dirname(os.getcwd()) # 当前主程序所在工作目录 # base_path = os.path.dirname(os.getcwd()) # 当前主程序所在工作目录
# base_path = os.path.dirname(os.path.abspath('.')) # 上级目录 # base_path = os.path.dirname(os.path.abspath('.')) # 上级目录
...@@ -43,7 +43,12 @@ def runJs(jsPath): ...@@ -43,7 +43,12 @@ def runJs(jsPath):
with open(js_path, 'r', encoding='UTF-8') as fp: with open(js_path, 'r', encoding='UTF-8') as fp:
js_code = fp.read() js_code = fp.read()
# print(js_code) # print(js_code)
loader = execjs.compile(js_code) jscode_to_run = js_code
if before:
jscode_to_run = before + jscode_to_run
if after:
jscode_to_run += after
loader = execjs.compile(jscode_to_run)
return loader,js_code return loader,js_code
def toJs(jsPath): def toJs(jsPath):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册