提交 f8bc0b3a 编写于 作者: H hjdhnx

增加自定义免嗅

上级 693c645f
......@@ -39,6 +39,7 @@ from gevent.pywsgi import WSGIServer
# from geventwebsocket.handler import WebSocketHandler
RuleClass = rule_classes.init(db)
PlayParse = play_parse.init(db)
def getParmas(key=None,value=''):
"""
......@@ -68,11 +69,6 @@ def index(): # put application's code here
@app.route('/vod')
def vod():
play_url = getParmas('play_url')
if play_url: # 播放
logger.info(f'播放重定向到:{play_url}')
return redirect(play_url)
rule = getParmas('rule')
ext = getParmas('ext')
if not ext.startswith('http') and not rule:
......@@ -85,8 +81,9 @@ def vod():
ctx,js_code = parser.runJs(js_path)
if not js_code:
return jsonify(error.failed('爬虫规则加载失败'))
rule = ctx.eval('rule')
cms = CMS(rule,db,RuleClass,app.config)
cms = CMS(rule,db,RuleClass,PlayParse,app.config)
wd = getParmas('wd')
ac = getParmas('ac')
quick = getParmas('quick')
......@@ -98,6 +95,11 @@ def vod():
pg = int(pg)
ids = getParmas('ids')
q = getParmas('q')
play_url = getParmas('play_url')
if play_url: # 播放
play_url = cms.playContent(play_url)
return redirect(play_url)
if ac and t: # 一级
data = cms.categoryContent(t,pg)
......@@ -204,7 +206,7 @@ def rules_raw():
@app.route('/config/<int:mode>')
def config_render(mode):
print(dict(app.config))
# print(dict(app.config))
html = render_template('config.txt',rules=getRules('js'),host=getHost(mode),mode=mode,jxs=getJxs(),config=dict(app.config))
response = make_response(html)
response.headers['Content-Type'] = 'application/json; charset=utf-8'
......
......@@ -3,6 +3,7 @@
# File : cms.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/8/25
import execjs
import requests
import re
import math
......@@ -10,6 +11,8 @@ from utils.web import *
from models import *
from utils.config import config
from utils.log import logger
from utils.safePython import safePython
from utils.parser import runPy
from utils.htmlParser import jsoup
from urllib.parse import urljoin
from concurrent.futures import ThreadPoolExecutor # 引入线程池
......@@ -17,19 +20,23 @@ from time import time
from flask import url_for,redirect
class CMS:
def __init__(self, rule, db=None, RuleClass=None, new_conf=None):
def __init__(self, rule, db=None, RuleClass=None, PlayParse=None,new_conf=None):
if new_conf is None:
new_conf = {}
self.title = rule.get('title', '')
self.lazy = rule.get('lazy', False)
self.play_disable = new_conf.get('PLAY_DISABLE',False)
self.vod = redirect(url_for('vod')).headers['Location']
# if not self.play_disable and self.lazy:
if not self.play_disable:
self.play_parse = rule.get('play_parse', False)
play_url = new_conf.get('PLAY_URL',getHost(1))
if not play_url.startswith('http'):
play_url = 'http://'+play_url
if self.play_parse:
self.play_url = play_url + self.vod + '?play_url='
logger.info(f'cms重定向链接:{self.play_url}')
# self.play_url = play_url + self.vod + '?play_url='
self.play_url = f'{play_url}{self.vod}?rule={self.title}&play_url='
# logger.info(f'cms重定向链接:{self.play_url}')
else:
self.play_url = ''
else:
......@@ -38,6 +45,7 @@ class CMS:
self.db = db
self.RuleClass = RuleClass
self.PlayParse = PlayParse
host = rule.get('host','').rstrip('/')
timeout = rule.get('timeout',5000)
homeUrl = rule.get('homeUrl','/')
......@@ -84,7 +92,6 @@ class CMS:
self.二级 = rule.get('二级','')
self.搜索 = rule.get('搜索','')
self.推荐 = rule.get('推荐','')
self.title = rule.get('title','')
self.encoding = encoding
self.timeout = round(int(timeout)/1000,2)
self.filter = rule.get('filter',[])
......@@ -187,6 +194,50 @@ class CMS:
except Exception as e:
return f'发生了错误:{e}'
def getParse(self,play_url):
if not self.db:
msg = '未提供数据库连接'
print(msg)
return ''
name = self.getName()
# self.db.metadata.clear()
# RuleClass = rule_classes.init(self.db)
res = self.db.session.query(self.PlayParse).filter(self.PlayParse.play_url == play_url).first()
# _logger.info('xxxxxx')
if res:
real_url = res.real_url
logger.info(f"{name}使用缓存播放地址:{real_url}")
return real_url
else:
return []
def saveParse(self, play_url,real_url):
if not self.db:
msg = '未提供数据库连接'
print(msg)
return msg
name = self.getName()
# data = RuleClass.query.filter(RuleClass.name == '555影视').all()
# self.db.metadata.clear()
# RuleClass = rule_classes.init(self.db)
res = self.db.session.query(self.PlayParse).filter(self.PlayParse.play_url == play_url).first()
# print(res)
if res:
res.real_url = real_url
self.db.session.add(res)
msg = f'{name}服务端免嗅修改成功:{res.id}'
else:
res = self.PlayParse(play_url=play_url, real_url=real_url)
self.db.session.add(res)
res = self.db.session.query(self.PlayParse).filter(self.PlayParse.play_url == play_url).first()
msg = f'{name}服务端免嗅新增成功:{res.id}'
try:
self.db.session.commit()
logger.info(msg)
except Exception as e:
return f'{name}发生了错误:{e}'
def homeContent(self,fypage=1):
# yanaifei
......@@ -459,6 +510,8 @@ class CMS:
playFrom = []
if p.get('tabs'):
vodHeader = pdfa(html,p['tabs'])
# print(f'线路列表数:{len((vodHeader))}')
# print(vodHeader)
vodHeader = [pq(v).text() for v in vodHeader]
else:
vodHeader = ['道长在线']
......@@ -549,6 +602,27 @@ class CMS:
}
return result
def playContent(self, play_url):
if self.lazy:
print(f'{play_url}->开始执行免嗅代码->{self.lazy}')
pycode = runPy(self.lazy)
if pycode:
# print(pycode)
pos = pycode.find('def lazyParse')
if pos < 0:
return play_url
pyenv = safePython(self.lazy,pycode[pos:])
# print(pyenv)
jsp = jsoup(self.url)
lazy_url = pyenv.action_task_exec('lazyParse',[play_url,jsp,self.getParse,self.saveParse,self.headers,self.encoding])
logger.info(f'播放免嗅结果:{lazy_url}')
if isinstance(lazy_url,str) and lazy_url.startswith('http'):
play_url = lazy_url
return play_url
else:
logger.info(f'播放重定向到:{play_url}')
return play_url
if __name__ == '__main__':
from utils import parser
# js_path = f'js/玩偶姐姐.js'
......
......@@ -12,6 +12,7 @@ var rule = {
// class_url:'1&2&124&4&3',
class_parse:'.navbar-items li:gt(2):lt(8);a&&Text;a&&href;/(\\d+).html',
play_parse:true,
lazy:'',
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
......
......@@ -11,6 +11,7 @@ var rule = {
timeout:5000,
class_parse:'.navbar-items li:gt(1):lt(7);a&&Text;a&&href;/(\\d+).html',
play_parse:true,
lazy:'',
limit:8,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
......
......@@ -10,6 +10,7 @@ var rule = {
timeout:5000,
class_parse:'.bm-item-list a:gt(0):lt(7);a&&Text;a&&href;/(\\d+).html',
play_parse:true,
lazy:'',
limit:5,
推荐:'.movie-list-body;.movie-list-item;.movie-title&&Text;.movie-post-lazyload&&data-original;.movie-rating&&Text;a&&href',
double:true, // 推荐内容是否双层定位
......
......@@ -12,6 +12,7 @@ var rule = {
// class_url:'1&2&124&4&3',
class_parse:'.navbar-items li:gt(1):lt(7);a&&Text;a&&href;.*v/(.*?)/',
play_parse:true,
lazy:'',
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
......
var rule = {
title:'干饭影视',
host:'https://www.gfysys.com/',
// homeUrl:'/',
url:'/vodtype/fyclass-fypage.html',
headers:{
'User-Agent':'MOBILE_UA'
},
searchUrl:'/search/**-fypage.html',
// class_name:'电影&网剧&剧集&动漫&综艺&记录',
// class_url:'20&1&2&3&4&23',
class_parse:'.stui-header__menu li:gt(0):lt(5);a&&Text;a&&href;/(\\d+).html',
play_parse:true,
// lazy:'干饭',
limit:5,
推荐:'ul.stui-vodlist.clearfix;li;a&&title;.lazyload&&data-original;;a&&href',
double:true, // 推荐内容是否双层定位
一级:'body .stui-vodlist__box;a&&title;.lazyload&&data-original;;a&&href',
二级:{"title":"h1.title&&Text;.stui-content__detail p:eq(1)&&Text","img":".lazyload&&data-original","desc":".stui-content__detail p:eq(-3)&&Text;.stui-content__detail p:eq(-2)&&Text","content":".stui-content__detail p.detail&&Text","tabs":".stui-pannel__head ul li","lists":".stui-content__playlist:eq(#id) li"},
// 搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text',
}
\ No newline at end of file
......@@ -9,6 +9,7 @@ var rule = {
timeout:5000,
class_parse:'#side-menu:lt(1) li;a&&Text;a&&href;com/(.*?)/',
play_parse:true,
lazy:'',
一级:'.col-sm-6;h3&&Text;img&&data-src;.date&&Text;a&&href',
二级:'*',
}
\ No newline at end of file
......@@ -12,6 +12,7 @@ var rule = {
// class_url:'1&2&124&4&3',
class_parse:'.navbar-items li:gt(1):lt(8);a&&Text;a&&href;.*-(.*?).html',
play_parse:true,
lazy:'',
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
......
......@@ -12,6 +12,7 @@ var rule = {
// class_url:'1&2&124&4&3',
class_parse:'.navbar-items li:gt(2):lt(8);a&&Text;a&&href;.*/(.*?).html',
play_parse:true,
lazy:'',
limit:10,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
......
......@@ -11,6 +11,7 @@ var rule = {
// class_url:'20&1&2&3&4&23',
class_parse:'.navbar-items li:gt(1):lt(8);a&&Text;a&&href;/(\\d+).html',
play_parse:true,
// lazy:'通用免嗅',
limit:30,
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
......
......@@ -9,6 +9,7 @@ var rule = {
// class_url:'dianying&lianxuju&zongyi&dongman',
class_parse:'.navbar-items li:gt(1):lt(6);a&&Text;a&&href;.*/(.*?).html',
play_parse:true,
lazy:'',
推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href',
double:true, // 推荐内容是否双层定位
一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href',
......
......@@ -4,4 +4,5 @@
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/8/25
from . import rule_classes
\ No newline at end of file
from . import rule_classes
from . import play_parse
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File : play_parse.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/8/28
def init(db):
class PlayParse(db.Model):
__tablename__ = 'play_parse'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
play_url = db.Column(db.String(255))
real_url = db.Column(db.String(255))
def __repr__(self):
return "<PlayParse(play_url='%s', real_url='%s')>" % (
self.play_url, self.real_url)
# db.create_all()
db.create_all()
return PlayParse
\ No newline at end of file
无法预览此类型文件
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File : 干饭.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/8/28
import requests
import re
import json
from urllib.parse import urljoin,quote,unquote
import base64
def lazyParse(input,jsp,getParse,saveParse,headers,encoding):
cacheUrl = getParse(input)
print(f'cacheUrl:{cacheUrl}')
if cacheUrl:
return cacheUrl
r = requests.get(input, headers=headers)
r.encoding = encoding
html = r.text
# print(html)
# js = jsp.pdfh(html,'.stui-player__video script:eq(0)&&Html')
# print(js)
try:
ret = re.search('var player_(.*?)=(.*?)<', html, re.M | re.I).groups()[1]
ret = json.loads(ret)
url = ret.get('url','')
if len(url) > 10:
real_url = 'https://player.buyaotou.xyz/?url='+url
saveParse(input,real_url)
return real_url
else:
return input
except Exception as e:
print(f'错误:{e}')
return input
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File : 测试.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/8/28
import re
txt = 'var player_aaaa={"flag":"play","encrypt":3,"trysee":0,"points":0,"link":"\/vodplay\/44640-1-1.html","link_next":"","link_pre":"","url":"Zd2fZg56c6y10828ZDRiNzZjNzk1Y2E3OWQzNmQzYWEyM2IwODM0ZjM3MgO0O0OO0O0O","url_next":"d","from":"vip","server":"no","note":"","id":"44640","sid":1,"nid":1}'
ret = re.search('var player_(.*?)=(.*?)<',txt,re.M|re.I)
print(ret)
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File : 通用免嗅.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/8/28
import requests
import re
import json
from urllib.parse import urljoin,quote,unquote
import base64
def lazyParse(input,jsp,getParse,saveParse,headers,encoding):
print('lazyParse:',input)
r = requests.get(input,headers=headers)
r.encoding = encoding
html = r.text
# print(html)
# js = jsp.pdfh(html,'.stui-player__video script:eq(0)&&Html')
# print(js)
try:
ret = re.search('var player_(.*?)=(.*?)<', html, re.M | re.I).groups()[1]
ret = json.loads(ret)
url = ret.get('url','')
if len(url) > 10:
if url.find('.m3u8') > -1 or url.find('.mp4') > -1:
return url
elif url.find('http') < 0:
try:
l = unquote(base64.b64decode(url).decode("utf-8"))
print(l)
return l
except Exception as e:
print(f'非url和base64编码:{e}')
return input
else:
return input
except Exception as e:
print(f'错误:{e}')
return input
\ No newline at end of file
......@@ -4,6 +4,7 @@
- [X] 1.增加linux进程启动,命令 supervisord -c manager.conf
- [X] 2.转移文本文件到txt目录
- [X] 3.增加服务器解析播放(全局配置和js分别配置.后期可以针对性运行解析)
- [X] 4.增加自定义免嗅(基于道长任务仓库核心逻辑实现云函数)
###### 2022/08/27
- [X] 1.增加PC_UA变量
- [X] 2.首页增加更多功能按钮
......@@ -76,6 +77,8 @@ var rule = {
class_parse:'#side-menu:lt(1) li;a&&Text;a&&href;com/(.*?)/',
// 服务器解析播放
play_parse:true,
// 自定义免嗅
lazy:'',
// 首页推荐显示数量
limit:6,
double:true,//是否双层列表定位,默认false
......
......@@ -5,4 +5,5 @@ requests
flask-sqlalchemy
netifaces
gevent ; python_version < '3.9'
supervisor ; sys_platform != 'win32' and python_version < '3.9'
\ No newline at end of file
supervisor ; sys_platform != 'win32' and python_version < '3.9'
func_timeout
\ No newline at end of file
......@@ -12,6 +12,8 @@ class jsoup:
self.MY_URL = MY_URL
def pdfh(self,html,parse,pd=False):
if not parse:
return ''
doc = pq(html)
option = None
if parse.find('&&') > -1:
......@@ -39,6 +41,8 @@ class jsoup:
return ret
def pdfa(self,html,parse):
if not parse:
return []
doc = pq(html)
# return [item.html() for item in doc(parse).items()]
return [str(item) for item in doc(parse).items()]
......
......@@ -66,3 +66,37 @@ def toHtml(jsPath):
response = make_response(js)
response.headers['Content-Type'] = 'text/html; charset=utf-8'
return response
def runPy(pyPath):
# base_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在目录
# base_path = os.path.dirname(os.getcwd()) # 当前主程序所在工作目录
# base_path = os.path.dirname(os.path.abspath('.')) # 上级目录
# js_code = 'var rule={}'
if pyPath and not str(pyPath).endswith('.py'):
pyPath += '.py'
base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目录
if str(pyPath).startswith('http'):
py_name = pyPath.split('/')[-1]
cache_path = os.path.join(base_path, f'cache/{py_name}')
print('远程免嗅:',py_name)
if not os.path.exists(cache_path):
try:
py_code = requests.get(pyPath,timeout=2).text
with open(cache_path,mode='w+',encoding='utf-8') as f:
f.write(py_code)
except Exception as e:
print('发生了错误:',e)
return None, ''
else:
with open(cache_path, 'r', encoding='UTF-8') as fp:
py_code = fp.read()
else:
py_root = os.path.join(base_path, 'py/')
os.makedirs(py_root,exist_ok=True)
py_path = os.path.join(py_root, pyPath)
if not os.path.exists(py_path):
return ''
with open(py_path, 'r', encoding='UTF-8') as fp:
py_code = fp.read()
# print(js_code)
return py_code
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File : safePython.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date : 2022/8/28
import io
import tokenize
from func_timeout import func_set_timeout
from func_timeout.exceptions import FunctionTimedOut
from urllib.parse import urljoin,quote,unquote
import requests
import time
import json
import re
from lxml import etree
import datetime
import base64
from utils.log import logger
time_out_sec = 8
class my_exception(Exception):
def __init__(self, message):
self.message = message
def __str__(self):
message = f'函数执行超时: "{self.message}"'
return message
@func_set_timeout(time_out_sec)
def excute(*args):
exec(*args)
def check_unsafe_attributes(string):
"""
安全检测需要exec执行的python代码
:param string:
:return:
"""
g = tokenize.tokenize(io.BytesIO(string.encode('utf-8')).readline)
pre_op = ''
for toktype, tokval, _, _, _ in g:
if toktype == tokenize.NAME and pre_op == '.' and tokval.startswith('_'):
attr = tokval
msg = "access to attribute '{0}' is unsafe.".format(attr)
raise AttributeError(msg)
elif toktype == tokenize.OP:
pre_op = tokval
DEFAULT_PYTHON_CODE = """# 可用内置环境变量:
# - log: log(message): 打印日志功能
# - error: 弹出用户错误的弹窗
# 返回变量值: result = {...}\n\n
zyw_lists = env['hikerule.zyw.list'].with_context(active_test=True).sudo().search(
[('option', '=', 'zy'), ('cate_id.name', '!=', '18+'),('cate_id.is_bad', '!=', True)])
result = env['hikerule.zyw.list2data.wizard'].sudo().get_publish_value(zyw_lists)
"""
class safePython:
def __init__(self,name, code):
self.name = name or '未定义'
self.code = code
def action_task_exec(self,call=None,params=None):
"""
接口调用执行函数
:return:
"""
if not params:
params = []
builtins = __builtins__
builtins = dict(builtins).copy()
for key in ['__import__','eval','exec','globals','dir','copyright','open','quit']:
del builtins[key] # 删除不安全的关键字
# print(builtins)
global_dict = {'__builtins__': builtins,
'requests': requests, 'urljoin':urljoin,'quote':quote,'unquote': unquote,
'log': logger.info, 'json': json,'print':print,
're':re,'etree':etree,'time':time,'datetime':datetime,'base64':base64
} # 禁用内置函数,不允许导入包
try:
check_unsafe_attributes(self.code)
localdict = {'result': None}
# 待解决windows下运行超时的问题
base_code = self.code.strip()
if call:
logger.info(f'开始执行:{call}')
try:
# excute(to_run_code, global_dict, localdict)
excute(base_code, global_dict, localdict)
run = localdict.get(call)
if run:
localdict['result'] = run(*params)
except FunctionTimedOut:
raise my_exception(f'函数[{self.name}]运行时间超过{time_out_sec}秒,疑似死循环,已被系统切断')
except Exception as e:
ret = f'执行报错:{e}'
logger.info(ret)
return ret
else:
# print(global_dict)
# print(localdict)
ret = localdict['result']
return ret
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册