From a8f273816c0b630febe22ac9c23a75ebd4afe9ba Mon Sep 17 00:00:00 2001 From: hjdhnx Date: Thu, 25 Aug 2022 17:17:10 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E9=94=99=E8=AF=AF=E8=AE=B0?= =?UTF-8?q?=E5=BD=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app.py | 8 +++++--- utils/parser.py | 18 ++++++++++++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/app.py b/app.py index 458d0a5..68be761 100644 --- a/app.py +++ b/app.py @@ -46,14 +46,16 @@ def index(): # put application's code here def vod(): rule = getParmas('rule') ext = getParmas('ext') - if not rule: + if not ext.startswith('http') and not rule: return jsonify(error.failed('规则字段必填')) - if not rule in rule_list: - msg = f'仅支持以下规则:{",".join(rule_list)}' + if not ext.startswith('http') and not rule in rule_list: + msg = f'服务端本地仅支持以下规则:{",".join(rule_list)}' return jsonify(error.failed(msg)) js_path = f'js/{rule}.js' if not ext.startswith('http') else ext ctx,js_code = parser.runJs(js_path) + if not js_code: + return jsonify(error.failed('爬虫规则加载失败')) rule = ctx.eval('rule') cms = CMS(rule) wd = getParmas('wd') diff --git a/utils/parser.py b/utils/parser.py index b47f174..e3cb7ca 100644 --- a/utils/parser.py +++ b/utils/parser.py @@ -21,10 +21,24 @@ def runJs(jsPath): # base_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在目录 # base_path = os.path.dirname(os.getcwd()) # 当前主程序所在工作目录 # base_path = os.path.dirname(os.path.abspath('.')) # 上级目录 + # js_code = 'var rule={}' + base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目录 if str(jsPath).startswith('http'): - jscode = requests.get(jsPath).text + js_name = jsPath.split('/')[-1] + cache_path = os.path.join(base_path, f'cache/{js_name}') + print('远程规则:',js_name) + if not os.path.exists(cache_path): + try: + js_code = requests.get(jsPath,timeout=2).text + with open(cache_path,mode='w+',encoding='utf-8') as f: + f.write(js_code) + except Exception as e: + print('发生了错误:',e) + return None, '' + else: + with open(cache_path, 'r', encoding='UTF-8') as fp: + js_code = fp.read() else: - base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目录 js_path = os.path.join(base_path, jsPath) with open(js_path, 'r', encoding='UTF-8') as fp: js_code = fp.read() -- GitLab