提交 fffbb681 编写于 作者: H hjdhnx

增加自定义免嗅,增加规则模板

上级 f8bc0b3a
......@@ -3,6 +3,7 @@
# File : app.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/8/25
import time
from flask_sqlalchemy import SQLAlchemy
import config
......@@ -77,12 +78,20 @@ def vod():
msg = f'服务端本地仅支持以下规则:{",".join(rule_list)}'
return jsonify(error.failed(msg))
t1 = time.time()
js_path = f'js/{rule}.js' if not ext.startswith('http') else ext
ctx,js_code = parser.runJs(js_path)
# before = ''
with open('js/模板.js', encoding='utf-8') as f:
before = f.read()
# print(before)
ctx,js_code = parser.runJs(js_path,before=before)
if not js_code:
return jsonify(error.failed('爬虫规则加载失败'))
rule = ctx.eval('rule')
t2 = time.time()
logger.info(f'js装载耗时:{round((t2-t1)*1000,2)}毫秒')
# print(rule)
cms = CMS(rule,db,RuleClass,PlayParse,app.config)
wd = getParmas('wd')
ac = getParmas('ac')
......@@ -137,7 +146,7 @@ def getRules(path='cache'):
# print(base_path)
os.makedirs(base_path,exist_ok=True)
file_name = os.listdir(base_path)
file_name = list(filter(lambda x: str(x).endswith('.js'), file_name))
file_name = list(filter(lambda x: str(x).endswith('.js') and str(x).find('模板') < 0, file_name))
# print(file_name)
rule_list = [file.replace('.js', '') for file in file_name]
rules = {'list': rule_list, 'count': len(rule_list)}
......
......@@ -18,6 +18,7 @@ from urllib.parse import urljoin
from concurrent.futures import ThreadPoolExecutor # 引入线程池
from time import time
from flask import url_for,redirect
from easydict import EasyDict as edict
class CMS:
def __init__(self, rule, db=None, RuleClass=None, PlayParse=None,new_conf=None):
......@@ -96,10 +97,23 @@ class CMS:
self.timeout = round(int(timeout)/1000,2)
self.filter = rule.get('filter',[])
self.extend = rule.get('extend',[])
self.d = self.getObject()
def getName(self):
return self.title
def getObject(self):
o = edict({
'jsp':jsoup(self.url),
'getParse':self.getParse,
'saveParse':self.saveParse,
'headers':self.headers,
'encoding':self.encoding,
'name':self.title,
'timeout':self.timeout,
})
return o
def regexp(self,prule,text,pos=None):
ret = re.search(prule,text).groups()
if pos != None and isinstance(pos,int):
......@@ -612,9 +626,7 @@ class CMS:
if pos < 0:
return play_url
pyenv = safePython(self.lazy,pycode[pos:])
# print(pyenv)
jsp = jsoup(self.url)
lazy_url = pyenv.action_task_exec('lazyParse',[play_url,jsp,self.getParse,self.saveParse,self.headers,self.encoding])
lazy_url = pyenv.action_task_exec('lazyParse',[play_url,self.d])
logger.info(f'播放免嗅结果:{lazy_url}')
if isinstance(lazy_url,str) and lazy_url.startswith('http'):
play_url = lazy_url
......
......@@ -10,7 +10,7 @@ def getRules():
base_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在目录
# print(base_path)
file_name = os.listdir(base_path)
file_name = list(filter(lambda x:str(x).endswith('.js'),file_name))
file_name = list(filter(lambda x:str(x).endswith('.js') and str(x).find('模板') < 0,file_name))
# print(file_name)
rule_list = [file.replace('.js','') for file in file_name]
# print(rule_list)
......
var rule = {
title:'夜空',
host:'https://www.yekong.cc',
// homeUrl:'/',
url:'/pianku-fyclass--------fypage---/',
var rule = Object.assign(muban.mxpro,{
title:'夜空',
host:'https://www.yekong.cc',
url:'/pianku-fyclass--------fypage---/',
searchUrl:'/search-**----------fypage---/',
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
"Cookie": "searchneed=ok"
},
// class_name:'电影&连续剧&福利&动漫&综艺',
// class_url:'1&2&124&4&3',
class_parse:'.navbar-items li:gt(1):lt(7);a&&Text;a&&href;.*v/(.*?)/',
play_parse:true,
lazy:'',
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"},
搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text',
}
\ No newline at end of file
});
\ No newline at end of file
var rule = {
title:'夜空',
host:'https://www.yekong.cc',
// homeUrl:'/',
url:'/pianku-fyclass--------fypage---/',
searchUrl:'/search-**----------fypage---/',
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
"Cookie": "searchneed=ok"
},
// class_name:'电影&连续剧&福利&动漫&综艺',
// class_url:'1&2&124&4&3',
class_parse:'.navbar-items li:gt(1):lt(7);a&&Text;a&&href;.*v/(.*?)/',
play_parse:true,
lazy:'',
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"},
搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text',
}
\ No newline at end of file
......@@ -11,7 +11,7 @@ var rule = {
// class_url:'20&1&2&3&4&23',
class_parse:'.stui-header__menu li:gt(0):lt(5);a&&Text;a&&href;/(\\d+).html',
play_parse:true,
// lazy:'干饭',
lazy:'干饭',
limit:5,
推荐:'ul.stui-vodlist.clearfix;li;a&&title;.lazyload&&data-original;;a&&href',
double:true, // 推荐内容是否双层定位
......
var muban = {
mxpro:{
title:'',
host:'',
// homeUrl:'/',
url:'/vodshow/fyclass--------fypage---.html',
searchUrl:'/vodsearch/**----------fypage---.html',
headers:{//网站的请求头,完整支持所有的,常带ua和cookies
'User-Agent':'MOBILE_UA',
"Cookie": "searchneed=ok"
},
class_parse:'.navbar-items li:gt(2):lt(8);a&&Text;a&&href;/(\\d+).html',
play_parse:true,
lazy:'',
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
二级:{"title":"h1&&Text;.module-info-tag&&Text","img":".lazyload&&data-original","desc":".module-info-item:eq(1)&&Text;.module-info-item:eq(2)&&Text;.module-info-item:eq(3)&&Text","content":".module-info-introduction&&Text","tabs":".module-tab-item","lists":".module-play-list:eq(#id) a"},
搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text',
}
};
\ No newline at end of file
无法预览此类型文件
......@@ -10,24 +10,24 @@ import json
from urllib.parse import urljoin,quote,unquote
import base64
def lazyParse(input,jsp,getParse,saveParse,headers,encoding):
cacheUrl = getParse(input)
print(f'cacheUrl:{cacheUrl}')
def lazyParse(input,d):
cacheUrl = d.getParse(input)
print(f'干饭免嗅:cacheUrl:{cacheUrl}')
if cacheUrl:
return cacheUrl
r = requests.get(input, headers=headers)
r.encoding = encoding
r = requests.get(input, headers=d.headers,timeout=d.timeout)
r.encoding = d.encoding
html = r.text
# print(html)
# js = jsp.pdfh(html,'.stui-player__video script:eq(0)&&Html')
# print(js)
js = d.jsp.pdfh(html,'.stui-player__video script:eq(0)&&Html')
print(js)
try:
ret = re.search('var player_(.*?)=(.*?)<', html, re.M | re.I).groups()[1]
ret = json.loads(ret)
url = ret.get('url','')
if len(url) > 10:
real_url = 'https://player.buyaotou.xyz/?url='+url
saveParse(input,real_url)
d.saveParse(input,real_url)
return real_url
else:
return input
......
......@@ -7,4 +7,8 @@
import re
txt = 'var player_aaaa={"flag":"play","encrypt":3,"trysee":0,"points":0,"link":"\/vodplay\/44640-1-1.html","link_next":"","link_pre":"","url":"Zd2fZg56c6y10828ZDRiNzZjNzk1Y2E3OWQzNmQzYWEyM2IwODM0ZjM3MgO0O0OO0O0O","url_next":"d","from":"vip","server":"no","note":"","id":"44640","sid":1,"nid":1}'
ret = re.search('var player_(.*?)=(.*?)<',txt,re.M|re.I)
print(ret)
\ No newline at end of file
print(ret)
def lazyParse(input,jsp,getParse,saveParse,headers,encoding):
pass
\ No newline at end of file
......@@ -9,10 +9,10 @@ import json
from urllib.parse import urljoin,quote,unquote
import base64
def lazyParse(input,jsp,getParse,saveParse,headers,encoding):
print('lazyParse:',input)
r = requests.get(input,headers=headers)
r.encoding = encoding
def lazyParse(input,d):
print('通用免嗅:',input)
r = requests.get(input, headers=d.headers,timeout=d.timeout)
r.encoding = d.encoding
html = r.text
# print(html)
# js = jsp.pdfh(html,'.stui-player__video script:eq(0)&&Html')
......
......@@ -5,6 +5,7 @@
- [X] 2.转移文本文件到txt目录
- [X] 3.增加服务器解析播放(全局配置和js分别配置.后期可以针对性运行解析)
- [X] 4.增加自定义免嗅(基于道长任务仓库核心逻辑实现云函数)
- [X] 5.增加模板继承,优化免嗅参数二
###### 2022/08/27
- [X] 1.增加PC_UA变量
- [X] 2.首页增加更多功能按钮
......
......@@ -6,4 +6,5 @@ flask-sqlalchemy
netifaces
gevent ; python_version < '3.9'
supervisor ; sys_platform != 'win32' and python_version < '3.9'
func_timeout
\ No newline at end of file
func_timeout
easydict
\ No newline at end of file
......@@ -17,7 +17,7 @@ import execjs
# os.environ["EXECJS_RUNTIME"] = "JScript"
# print(execjs.get().name)
def runJs(jsPath):
def runJs(jsPath,before='',after=''):
# base_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在目录
# base_path = os.path.dirname(os.getcwd()) # 当前主程序所在工作目录
# base_path = os.path.dirname(os.path.abspath('.')) # 上级目录
......@@ -43,7 +43,12 @@ def runJs(jsPath):
with open(js_path, 'r', encoding='UTF-8') as fp:
js_code = fp.read()
# print(js_code)
loader = execjs.compile(js_code)
jscode_to_run = js_code
if before:
jscode_to_run = before + jscode_to_run
if after:
jscode_to_run += after
loader = execjs.compile(jscode_to_run)
return loader,js_code
def toJs(jsPath):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册