diff --git a/app.py b/app.py index 458d0a5ca4430ece590b7cb2315592a6ee36306b..68be7619c8ffc78085c2ebdaa7675fce3114d812 100644 --- a/app.py +++ b/app.py @@ -46,14 +46,16 @@ def index(): # put application's code here def vod(): rule = getParmas('rule') ext = getParmas('ext') - if not rule: + if not ext.startswith('http') and not rule: return jsonify(error.failed('规则字段必填')) - if not rule in rule_list: - msg = f'仅支持以下规则:{",".join(rule_list)}' + if not ext.startswith('http') and not rule in rule_list: + msg = f'服务端本地仅支持以下规则:{",".join(rule_list)}' return jsonify(error.failed(msg)) js_path = f'js/{rule}.js' if not ext.startswith('http') else ext ctx,js_code = parser.runJs(js_path) + if not js_code: + return jsonify(error.failed('爬虫规则加载失败')) rule = ctx.eval('rule') cms = CMS(rule) wd = getParmas('wd') diff --git a/utils/parser.py b/utils/parser.py index b47f17457cca1fcabdd1bd046393a9f40f172d09..e3cb7ca9b3d6937d856d151420aaf19bc4acbc10 100644 --- a/utils/parser.py +++ b/utils/parser.py @@ -21,10 +21,24 @@ def runJs(jsPath): # base_path = os.path.dirname(os.path.abspath(__file__)) # 当前文件所在目录 # base_path = os.path.dirname(os.getcwd()) # 当前主程序所在工作目录 # base_path = os.path.dirname(os.path.abspath('.')) # 上级目录 + # js_code = 'var rule={}' + base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目录 if str(jsPath).startswith('http'): - jscode = requests.get(jsPath).text + js_name = jsPath.split('/')[-1] + cache_path = os.path.join(base_path, f'cache/{js_name}') + print('远程规则:',js_name) + if not os.path.exists(cache_path): + try: + js_code = requests.get(jsPath,timeout=2).text + with open(cache_path,mode='w+',encoding='utf-8') as f: + f.write(js_code) + except Exception as e: + print('发生了错误:',e) + return None, '' + else: + with open(cache_path, 'r', encoding='UTF-8') as fp: + js_code = fp.read() else: - base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目录 js_path = os.path.join(base_path, jsPath) with open(js_path, 'r', encoding='UTF-8') as fp: js_code = fp.read()