From f8bc0b3adac30ebc054c113c308c3c4aa59eb935 Mon Sep 17 00:00:00 2001 From: hjdhnx Date: Sun, 28 Aug 2022 20:54:59 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E8=87=AA=E5=AE=9A=E4=B9=89?= =?UTF-8?q?=E5=85=8D=E5=97=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app.py | 16 +-- classes/cms.py | 82 +++++++++++++- "js/555\345\275\261\350\247\206.js" | 1 + js/cokemv.js | 1 + "js/\345\226\265\345\226\265.js" | 1 + "js/\345\244\234\347\251\272.js" | 1 + ...62\351\245\255\345\275\261\350\247\206.js" | 21 ++++ ...51\345\201\266\345\247\220\345\247\220.js" | 1 + "js/\347\223\234\347\232\256TV.js" | 1 + "js/\350\216\253\346\211\216\345\205\224.js" | 1 + ...35\350\216\223\345\275\261\350\247\206.js" | 1 + "js/\351\270\255\345\245\210\351\243\236.js" | 1 + models/__init__.py | 3 +- models/play_parse.py | 21 ++++ models/rules.db | Bin 12288 -> 16384 bytes "py/\345\271\262\351\245\255.py" | 36 ++++++ "py/\346\265\213\350\257\225.py" | 10 ++ ...32\347\224\250\345\205\215\345\227\205.py" | 39 +++++++ readme.md | 3 + requirements.txt | 3 +- utils/htmlParser.py | 4 + utils/parser.py | 34 ++++++ utils/safePython.py | 105 ++++++++++++++++++ 23 files changed, 373 insertions(+), 13 deletions(-) create mode 100644 "js/\345\271\262\351\245\255\345\275\261\350\247\206.js" create mode 100644 models/play_parse.py create mode 100644 "py/\345\271\262\351\245\255.py" create mode 100644 "py/\346\265\213\350\257\225.py" create mode 100644 "py/\351\200\232\347\224\250\345\205\215\345\227\205.py" create mode 100644 utils/safePython.py diff --git a/app.py b/app.py index e8189ca..ffba0c6 100644 --- a/app.py +++ b/app.py @@ -39,6 +39,7 @@ from gevent.pywsgi import WSGIServer # from geventwebsocket.handler import WebSocketHandler RuleClass = rule_classes.init(db) +PlayParse = play_parse.init(db) def getParmas(key=None,value=''): """ @@ -68,11 +69,6 @@ def index(): # put application's code here @app.route('/vod') def vod(): - play_url = getParmas('play_url') - if play_url: # 播放 - logger.info(f'播放重定向到:{play_url}') - return redirect(play_url) - rule = getParmas('rule') ext = getParmas('ext') if not ext.startswith('http') and not rule: @@ -85,8 +81,9 @@ def vod(): ctx,js_code = parser.runJs(js_path) if not js_code: return jsonify(error.failed('爬虫规则加载失败')) + rule = ctx.eval('rule') - cms = CMS(rule,db,RuleClass,app.config) + cms = CMS(rule,db,RuleClass,PlayParse,app.config) wd = getParmas('wd') ac = getParmas('ac') quick = getParmas('quick') @@ -98,6 +95,11 @@ def vod(): pg = int(pg) ids = getParmas('ids') q = getParmas('q') + play_url = getParmas('play_url') + + if play_url: # 播放 + play_url = cms.playContent(play_url) + return redirect(play_url) if ac and t: # 一级 data = cms.categoryContent(t,pg) @@ -204,7 +206,7 @@ def rules_raw(): @app.route('/config/') def config_render(mode): - print(dict(app.config)) + # print(dict(app.config)) html = render_template('config.txt',rules=getRules('js'),host=getHost(mode),mode=mode,jxs=getJxs(),config=dict(app.config)) response = make_response(html) response.headers['Content-Type'] = 'application/json; charset=utf-8' diff --git a/classes/cms.py b/classes/cms.py index a0f500d..d65561a 100644 --- a/classes/cms.py +++ b/classes/cms.py @@ -3,6 +3,7 @@ # File : cms.py # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------ # Date : 2022/8/25 +import execjs import requests import re import math @@ -10,6 +11,8 @@ from utils.web import * from models import * from utils.config import config from utils.log import logger +from utils.safePython import safePython +from utils.parser import runPy from utils.htmlParser import jsoup from urllib.parse import urljoin from concurrent.futures import ThreadPoolExecutor # 引入线程池 @@ -17,19 +20,23 @@ from time import time from flask import url_for,redirect class CMS: - def __init__(self, rule, db=None, RuleClass=None, new_conf=None): + def __init__(self, rule, db=None, RuleClass=None, PlayParse=None,new_conf=None): if new_conf is None: new_conf = {} + self.title = rule.get('title', '') + self.lazy = rule.get('lazy', False) self.play_disable = new_conf.get('PLAY_DISABLE',False) self.vod = redirect(url_for('vod')).headers['Location'] + # if not self.play_disable and self.lazy: if not self.play_disable: self.play_parse = rule.get('play_parse', False) play_url = new_conf.get('PLAY_URL',getHost(1)) if not play_url.startswith('http'): play_url = 'http://'+play_url if self.play_parse: - self.play_url = play_url + self.vod + '?play_url=' - logger.info(f'cms重定向链接:{self.play_url}') + # self.play_url = play_url + self.vod + '?play_url=' + self.play_url = f'{play_url}{self.vod}?rule={self.title}&play_url=' + # logger.info(f'cms重定向链接:{self.play_url}') else: self.play_url = '' else: @@ -38,6 +45,7 @@ class CMS: self.db = db self.RuleClass = RuleClass + self.PlayParse = PlayParse host = rule.get('host','').rstrip('/') timeout = rule.get('timeout',5000) homeUrl = rule.get('homeUrl','/') @@ -84,7 +92,6 @@ class CMS: self.二级 = rule.get('二级','') self.搜索 = rule.get('搜索','') self.推荐 = rule.get('推荐','') - self.title = rule.get('title','') self.encoding = encoding self.timeout = round(int(timeout)/1000,2) self.filter = rule.get('filter',[]) @@ -187,6 +194,50 @@ class CMS: except Exception as e: return f'发生了错误:{e}' + def getParse(self,play_url): + if not self.db: + msg = '未提供数据库连接' + print(msg) + return '' + name = self.getName() + # self.db.metadata.clear() + # RuleClass = rule_classes.init(self.db) + res = self.db.session.query(self.PlayParse).filter(self.PlayParse.play_url == play_url).first() + # _logger.info('xxxxxx') + if res: + real_url = res.real_url + logger.info(f"{name}使用缓存播放地址:{real_url}") + return real_url + else: + return [] + + def saveParse(self, play_url,real_url): + if not self.db: + msg = '未提供数据库连接' + print(msg) + return msg + name = self.getName() + # data = RuleClass.query.filter(RuleClass.name == '555影视').all() + # self.db.metadata.clear() + # RuleClass = rule_classes.init(self.db) + res = self.db.session.query(self.PlayParse).filter(self.PlayParse.play_url == play_url).first() + # print(res) + if res: + res.real_url = real_url + self.db.session.add(res) + msg = f'{name}服务端免嗅修改成功:{res.id}' + else: + res = self.PlayParse(play_url=play_url, real_url=real_url) + self.db.session.add(res) + res = self.db.session.query(self.PlayParse).filter(self.PlayParse.play_url == play_url).first() + msg = f'{name}服务端免嗅新增成功:{res.id}' + + try: + self.db.session.commit() + logger.info(msg) + except Exception as e: + return f'{name}发生了错误:{e}' + def homeContent(self,fypage=1): # yanaifei @@ -459,6 +510,8 @@ class CMS: playFrom = [] if p.get('tabs'): vodHeader = pdfa(html,p['tabs']) + # print(f'线路列表数:{len((vodHeader))}') + # print(vodHeader) vodHeader = [pq(v).text() for v in vodHeader] else: vodHeader = ['道长在线'] @@ -549,6 +602,27 @@ class CMS: } return result + def playContent(self, play_url): + if self.lazy: + print(f'{play_url}->开始执行免嗅代码->{self.lazy}') + pycode = runPy(self.lazy) + if pycode: + # print(pycode) + pos = pycode.find('def lazyParse') + if pos < 0: + return play_url + pyenv = safePython(self.lazy,pycode[pos:]) + # print(pyenv) + jsp = jsoup(self.url) + lazy_url = pyenv.action_task_exec('lazyParse',[play_url,jsp,self.getParse,self.saveParse,self.headers,self.encoding]) + logger.info(f'播放免嗅结果:{lazy_url}') + if isinstance(lazy_url,str) and lazy_url.startswith('http'): + play_url = lazy_url + return play_url + else: + logger.info(f'播放重定向到:{play_url}') + return play_url + if __name__ == '__main__': from utils import parser # js_path = f'js/玩偶姐姐.js' diff --git "a/js/555\345\275\261\350\247\206.js" "b/js/555\345\275\261\350\247\206.js" index a9df19b..becabad 100644 --- "a/js/555\345\275\261\350\247\206.js" +++ "b/js/555\345\275\261\350\247\206.js" @@ -12,6 +12,7 @@ var rule = { // class_url:'1&2&124&4&3', class_parse:'.navbar-items li:gt(2):lt(8);a&&Text;a&&href;/(\\d+).html', play_parse:true, + lazy:'', limit:10, 推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href', double:true, // 推荐内容是否双层定位 diff --git a/js/cokemv.js b/js/cokemv.js index 8d094d4..851c4da 100644 --- a/js/cokemv.js +++ b/js/cokemv.js @@ -11,6 +11,7 @@ var rule = { timeout:5000, class_parse:'.navbar-items li:gt(1):lt(7);a&&Text;a&&href;/(\\d+).html', play_parse:true, + lazy:'', limit:8, 推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href', double:true, // 推荐内容是否双层定位 diff --git "a/js/\345\226\265\345\226\265.js" "b/js/\345\226\265\345\226\265.js" index 5aea615..e42073c 100644 --- "a/js/\345\226\265\345\226\265.js" +++ "b/js/\345\226\265\345\226\265.js" @@ -10,6 +10,7 @@ var rule = { timeout:5000, class_parse:'.bm-item-list a:gt(0):lt(7);a&&Text;a&&href;/(\\d+).html', play_parse:true, + lazy:'', limit:5, 推荐:'.movie-list-body;.movie-list-item;.movie-title&&Text;.movie-post-lazyload&&data-original;.movie-rating&&Text;a&&href', double:true, // 推荐内容是否双层定位 diff --git "a/js/\345\244\234\347\251\272.js" "b/js/\345\244\234\347\251\272.js" index 79921cb..86f1864 100644 --- "a/js/\345\244\234\347\251\272.js" +++ "b/js/\345\244\234\347\251\272.js" @@ -12,6 +12,7 @@ var rule = { // class_url:'1&2&124&4&3', class_parse:'.navbar-items li:gt(1):lt(7);a&&Text;a&&href;.*v/(.*?)/', play_parse:true, + lazy:'', limit:10, 推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href', double:true, // 推荐内容是否双层定位 diff --git "a/js/\345\271\262\351\245\255\345\275\261\350\247\206.js" "b/js/\345\271\262\351\245\255\345\275\261\350\247\206.js" new file mode 100644 index 0000000..4e82ee8 --- /dev/null +++ "b/js/\345\271\262\351\245\255\345\275\261\350\247\206.js" @@ -0,0 +1,21 @@ +var rule = { + title:'干饭影视', + host:'https://www.gfysys.com/', + // homeUrl:'/', + url:'/vodtype/fyclass-fypage.html', + headers:{ + 'User-Agent':'MOBILE_UA' + }, + searchUrl:'/search/**-fypage.html', + // class_name:'电影&网剧&剧集&动漫&综艺&记录', + // class_url:'20&1&2&3&4&23', + class_parse:'.stui-header__menu li:gt(0):lt(5);a&&Text;a&&href;/(\\d+).html', + play_parse:true, + // lazy:'干饭', + limit:5, + 推荐:'ul.stui-vodlist.clearfix;li;a&&title;.lazyload&&data-original;;a&&href', + double:true, // 推荐内容是否双层定位 + 一级:'body .stui-vodlist__box;a&&title;.lazyload&&data-original;;a&&href', + 二级:{"title":"h1.title&&Text;.stui-content__detail p:eq(1)&&Text","img":".lazyload&&data-original","desc":".stui-content__detail p:eq(-3)&&Text;.stui-content__detail p:eq(-2)&&Text","content":".stui-content__detail p.detail&&Text","tabs":".stui-pannel__head ul li","lists":".stui-content__playlist:eq(#id) li"}, + // 搜索:'body .module-item;.module-card-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href;.module-info-item-content&&Text', +} \ No newline at end of file diff --git "a/js/\347\216\251\345\201\266\345\247\220\345\247\220.js" "b/js/\347\216\251\345\201\266\345\247\220\345\247\220.js" index dce2f3a..3b52486 100644 --- "a/js/\347\216\251\345\201\266\345\247\220\345\247\220.js" +++ "b/js/\347\216\251\345\201\266\345\247\220\345\247\220.js" @@ -9,6 +9,7 @@ var rule = { timeout:5000, class_parse:'#side-menu:lt(1) li;a&&Text;a&&href;com/(.*?)/', play_parse:true, + lazy:'', 一级:'.col-sm-6;h3&&Text;img&&data-src;.date&&Text;a&&href', 二级:'*', } \ No newline at end of file diff --git "a/js/\347\223\234\347\232\256TV.js" "b/js/\347\223\234\347\232\256TV.js" index 99024a3..d022fe4 100644 --- "a/js/\347\223\234\347\232\256TV.js" +++ "b/js/\347\223\234\347\232\256TV.js" @@ -12,6 +12,7 @@ var rule = { // class_url:'1&2&124&4&3', class_parse:'.navbar-items li:gt(1):lt(8);a&&Text;a&&href;.*-(.*?).html', play_parse:true, + lazy:'', limit:10, 推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href', double:true, // 推荐内容是否双层定位 diff --git "a/js/\350\216\253\346\211\216\345\205\224.js" "b/js/\350\216\253\346\211\216\345\205\224.js" index 232f7ca..d6a2c0c 100644 --- "a/js/\350\216\253\346\211\216\345\205\224.js" +++ "b/js/\350\216\253\346\211\216\345\205\224.js" @@ -12,6 +12,7 @@ var rule = { // class_url:'1&2&124&4&3', class_parse:'.navbar-items li:gt(2):lt(8);a&&Text;a&&href;.*/(.*?).html', play_parse:true, + lazy:'', limit:10, 推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href', double:true, // 推荐内容是否双层定位 diff --git "a/js/\350\223\235\350\216\223\345\275\261\350\247\206.js" "b/js/\350\223\235\350\216\223\345\275\261\350\247\206.js" index 8ad39f8..ede431e 100644 --- "a/js/\350\223\235\350\216\223\345\275\261\350\247\206.js" +++ "b/js/\350\223\235\350\216\223\345\275\261\350\247\206.js" @@ -11,6 +11,7 @@ var rule = { // class_url:'20&1&2&3&4&23', class_parse:'.navbar-items li:gt(1):lt(8);a&&Text;a&&href;/(\\d+).html', play_parse:true, + // lazy:'通用免嗅', limit:30, 推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href', double:true, // 推荐内容是否双层定位 diff --git "a/js/\351\270\255\345\245\210\351\243\236.js" "b/js/\351\270\255\345\245\210\351\243\236.js" index 7d942a4..ac7bbdf 100644 --- "a/js/\351\270\255\345\245\210\351\243\236.js" +++ "b/js/\351\270\255\345\245\210\351\243\236.js" @@ -9,6 +9,7 @@ var rule = { // class_url:'dianying&lianxuju&zongyi&dongman', class_parse:'.navbar-items li:gt(1):lt(6);a&&Text;a&&href;.*/(.*?).html', play_parse:true, + lazy:'', 推荐:'.tab-list.active;a.module-poster-item.module-item;.module-poster-item-title&&Text;.lazyload&&data-original;.module-item-note&&Text;a&&href', double:true, // 推荐内容是否双层定位 一级:'body a.module-poster-item.module-item;a&&title;.lazyload&&data-original;.module-item-note&&Text;a&&href', diff --git a/models/__init__.py b/models/__init__.py index 147bef4..ad5d31f 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -4,4 +4,5 @@ # Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------ # Date : 2022/8/25 -from . import rule_classes \ No newline at end of file +from . import rule_classes +from . import play_parse \ No newline at end of file diff --git a/models/play_parse.py b/models/play_parse.py new file mode 100644 index 0000000..e5cb136 --- /dev/null +++ b/models/play_parse.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# File : play_parse.py +# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------ +# Date : 2022/8/28 + + +def init(db): + class PlayParse(db.Model): + __tablename__ = 'play_parse' + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + play_url = db.Column(db.String(255)) + real_url = db.Column(db.String(255)) + + def __repr__(self): + return "" % ( + self.play_url, self.real_url) + + # db.create_all() + db.create_all() + return PlayParse \ No newline at end of file diff --git a/models/rules.db b/models/rules.db index cef7602b3b0d907555e3eeb0f3264276979b051c..2158a3d5269ef3c111351a7db83455ab397369f7 100644 GIT binary patch delta 699 zcmZojXlP)ZAT6lEz`(!)#4x}#QO8(Vg+b4I0x$m$24=o=27X5V!+hx*3%~MOH;OZ} ziz_NJHddA-Cgr3S*Q5&kmkk9)0eM z+!OelHWrp}JKA%zGDs%NC+BCU=9WF{n)Y)3=I2wkKHa-f4M@FM-uAR}Ifw-kc-pn% z*`C#E&v);6(Yec1%}C8q%|y+3^F`^6%qGS>tPHBo(oc78dbxBhP%%&^Ojqafm$TcT zx}atl0yP_}P2MK&$H&E=$H2djzn=dte;0q=Wsu9B<(3^89+VlG6>92dsuz4FL18^4S0Y delta 151 zcmZo@U~EX3AT21*z`(!^#4x}(QO6i4sOLR_m;VO?6F(yZKO_I)&58;W`4%(sGm7)q zGjMQUWZ<{v_2J*n%f_>VCznT``{KsNo!px*N^fN5vye=dPtMQQVPj>Gx0asFE33d} vsAi;QXf%17yq^dMe-{J)KK{G>UHoVG7xPc$f6u=GXt*!`#0jdCyY$NdDv2bW diff --git "a/py/\345\271\262\351\245\255.py" "b/py/\345\271\262\351\245\255.py" new file mode 100644 index 0000000..3449712 --- /dev/null +++ "b/py/\345\271\262\351\245\255.py" @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# File : 干饭.py +# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------ +# Date : 2022/8/28 + +import requests +import re +import json +from urllib.parse import urljoin,quote,unquote +import base64 + +def lazyParse(input,jsp,getParse,saveParse,headers,encoding): + cacheUrl = getParse(input) + print(f'cacheUrl:{cacheUrl}') + if cacheUrl: + return cacheUrl + r = requests.get(input, headers=headers) + r.encoding = encoding + html = r.text + # print(html) + # js = jsp.pdfh(html,'.stui-player__video script:eq(0)&&Html') + # print(js) + try: + ret = re.search('var player_(.*?)=(.*?)<', html, re.M | re.I).groups()[1] + ret = json.loads(ret) + url = ret.get('url','') + if len(url) > 10: + real_url = 'https://player.buyaotou.xyz/?url='+url + saveParse(input,real_url) + return real_url + else: + return input + except Exception as e: + print(f'错误:{e}') + return input \ No newline at end of file diff --git "a/py/\346\265\213\350\257\225.py" "b/py/\346\265\213\350\257\225.py" new file mode 100644 index 0000000..849d3b6 --- /dev/null +++ "b/py/\346\265\213\350\257\225.py" @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# File : 测试.py +# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------ +# Date : 2022/8/28 + +import re +txt = 'var player_aaaa={"flag":"play","encrypt":3,"trysee":0,"points":0,"link":"\/vodplay\/44640-1-1.html","link_next":"","link_pre":"","url":"Zd2fZg56c6y10828ZDRiNzZjNzk1Y2E3OWQzNmQzYWEyM2IwODM0ZjM3MgO0O0OO0O0O","url_next":"d","from":"vip","server":"no","note":"","id":"44640","sid":1,"nid":1}' +ret = re.search('var player_(.*?)=(.*?)<',txt,re.M|re.I) +print(ret) \ No newline at end of file diff --git "a/py/\351\200\232\347\224\250\345\205\215\345\227\205.py" "b/py/\351\200\232\347\224\250\345\205\215\345\227\205.py" new file mode 100644 index 0000000..67b2f26 --- /dev/null +++ "b/py/\351\200\232\347\224\250\345\205\215\345\227\205.py" @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# File : 通用免嗅.py +# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------ +# Date : 2022/8/28 +import requests +import re +import json +from urllib.parse import urljoin,quote,unquote +import base64 + +def lazyParse(input,jsp,getParse,saveParse,headers,encoding): + print('lazyParse:',input) + r = requests.get(input,headers=headers) + r.encoding = encoding + html = r.text + # print(html) + # js = jsp.pdfh(html,'.stui-player__video script:eq(0)&&Html') + # print(js) + try: + ret = re.search('var player_(.*?)=(.*?)<', html, re.M | re.I).groups()[1] + ret = json.loads(ret) + url = ret.get('url','') + if len(url) > 10: + if url.find('.m3u8') > -1 or url.find('.mp4') > -1: + return url + elif url.find('http') < 0: + try: + l = unquote(base64.b64decode(url).decode("utf-8")) + print(l) + return l + except Exception as e: + print(f'非url和base64编码:{e}') + return input + else: + return input + except Exception as e: + print(f'错误:{e}') + return input \ No newline at end of file diff --git a/readme.md b/readme.md index ca91dee..bb92cff 100644 --- a/readme.md +++ b/readme.md @@ -4,6 +4,7 @@ - [X] 1.增加linux进程启动,命令 supervisord -c manager.conf - [X] 2.转移文本文件到txt目录 - [X] 3.增加服务器解析播放(全局配置和js分别配置.后期可以针对性运行解析) +- [X] 4.增加自定义免嗅(基于道长任务仓库核心逻辑实现云函数) ###### 2022/08/27 - [X] 1.增加PC_UA变量 - [X] 2.首页增加更多功能按钮 @@ -76,6 +77,8 @@ var rule = { class_parse:'#side-menu:lt(1) li;a&&Text;a&&href;com/(.*?)/', // 服务器解析播放 play_parse:true, + // 自定义免嗅 + lazy:'', // 首页推荐显示数量 limit:6, double:true,//是否双层列表定位,默认false diff --git a/requirements.txt b/requirements.txt index ee0a957..c7d56bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ requests flask-sqlalchemy netifaces gevent ; python_version < '3.9' -supervisor ; sys_platform != 'win32' and python_version < '3.9' \ No newline at end of file +supervisor ; sys_platform != 'win32' and python_version < '3.9' +func_timeout \ No newline at end of file diff --git a/utils/htmlParser.py b/utils/htmlParser.py index 32570f0..85a1799 100644 --- a/utils/htmlParser.py +++ b/utils/htmlParser.py @@ -12,6 +12,8 @@ class jsoup: self.MY_URL = MY_URL def pdfh(self,html,parse,pd=False): + if not parse: + return '' doc = pq(html) option = None if parse.find('&&') > -1: @@ -39,6 +41,8 @@ class jsoup: return ret def pdfa(self,html,parse): + if not parse: + return [] doc = pq(html) # return [item.html() for item in doc(parse).items()] return [str(item) for item in doc(parse).items()] diff --git a/utils/parser.py b/utils/parser.py index 4552ba4..f73eecc 100644 --- a/utils/parser.py +++ b/utils/parser.py @@ -66,3 +66,37 @@ def toHtml(jsPath): response = make_response(js) response.headers['Content-Type'] = 'text/html; charset=utf-8' return response + +def runPy(pyPath): + # base_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在目录 + # base_path = os.path.dirname(os.getcwd()) # 当前主程序所在工作目录 + # base_path = os.path.dirname(os.path.abspath('.')) # 上级目录 + # js_code = 'var rule={}' + if pyPath and not str(pyPath).endswith('.py'): + pyPath += '.py' + base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目录 + if str(pyPath).startswith('http'): + py_name = pyPath.split('/')[-1] + cache_path = os.path.join(base_path, f'cache/{py_name}') + print('远程免嗅:',py_name) + if not os.path.exists(cache_path): + try: + py_code = requests.get(pyPath,timeout=2).text + with open(cache_path,mode='w+',encoding='utf-8') as f: + f.write(py_code) + except Exception as e: + print('发生了错误:',e) + return None, '' + else: + with open(cache_path, 'r', encoding='UTF-8') as fp: + py_code = fp.read() + else: + py_root = os.path.join(base_path, 'py/') + os.makedirs(py_root,exist_ok=True) + py_path = os.path.join(py_root, pyPath) + if not os.path.exists(py_path): + return '' + with open(py_path, 'r', encoding='UTF-8') as fp: + py_code = fp.read() + # print(js_code) + return py_code \ No newline at end of file diff --git a/utils/safePython.py b/utils/safePython.py new file mode 100644 index 0000000..070b764 --- /dev/null +++ b/utils/safePython.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# File : safePython.py +# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------ +# Date : 2022/8/28 + +import io +import tokenize + +from func_timeout import func_set_timeout +from func_timeout.exceptions import FunctionTimedOut +from urllib.parse import urljoin,quote,unquote +import requests +import time +import json +import re +from lxml import etree +import datetime +import base64 +from utils.log import logger + +time_out_sec = 8 +class my_exception(Exception): + def __init__(self, message): + self.message = message + + def __str__(self): + message = f'函数执行超时: "{self.message}"' + return message + +@func_set_timeout(time_out_sec) +def excute(*args): + exec(*args) + +def check_unsafe_attributes(string): + """ + 安全检测需要exec执行的python代码 + :param string: + :return: + """ + g = tokenize.tokenize(io.BytesIO(string.encode('utf-8')).readline) + pre_op = '' + for toktype, tokval, _, _, _ in g: + if toktype == tokenize.NAME and pre_op == '.' and tokval.startswith('_'): + attr = tokval + msg = "access to attribute '{0}' is unsafe.".format(attr) + raise AttributeError(msg) + elif toktype == tokenize.OP: + pre_op = tokval + +DEFAULT_PYTHON_CODE = """# 可用内置环境变量: +# - log: log(message): 打印日志功能 +# - error: 弹出用户错误的弹窗 +# 返回变量值: result = {...}\n\n +zyw_lists = env['hikerule.zyw.list'].with_context(active_test=True).sudo().search( + [('option', '=', 'zy'), ('cate_id.name', '!=', '18+'),('cate_id.is_bad', '!=', True)]) +result = env['hikerule.zyw.list2data.wizard'].sudo().get_publish_value(zyw_lists) +""" + +class safePython: + def __init__(self,name, code): + self.name = name or '未定义' + self.code = code + + def action_task_exec(self,call=None,params=None): + """ + 接口调用执行函数 + :return: + """ + if not params: + params = [] + builtins = __builtins__ + builtins = dict(builtins).copy() + for key in ['__import__','eval','exec','globals','dir','copyright','open','quit']: + del builtins[key] # 删除不安全的关键字 + # print(builtins) + global_dict = {'__builtins__': builtins, + 'requests': requests, 'urljoin':urljoin,'quote':quote,'unquote': unquote, + 'log': logger.info, 'json': json,'print':print, + 're':re,'etree':etree,'time':time,'datetime':datetime,'base64':base64 + } # 禁用内置函数,不允许导入包 + try: + check_unsafe_attributes(self.code) + localdict = {'result': None} + # 待解决windows下运行超时的问题 + base_code = self.code.strip() + if call: + logger.info(f'开始执行:{call}') + try: + # excute(to_run_code, global_dict, localdict) + excute(base_code, global_dict, localdict) + run = localdict.get(call) + if run: + localdict['result'] = run(*params) + except FunctionTimedOut: + raise my_exception(f'函数[{self.name}]运行时间超过{time_out_sec}秒,疑似死循环,已被系统切断') + except Exception as e: + ret = f'执行报错:{e}' + logger.info(ret) + return ret + else: + # print(global_dict) + # print(localdict) + ret = localdict['result'] + return ret \ No newline at end of file -- GitLab