vod.py 12.5 KB
Newer Older
H
hjdhnx 已提交
1 2 3 4 5
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File  : vod.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date  : 2022/9/6
H
hjdhnx 已提交
6
import json
H
hjdhnx 已提交
7

H
hjdhnx 已提交
8
from flask import Blueprint,abort,request,render_template,render_template_string,jsonify,make_response,redirect
H
hjdhnx 已提交
9
from time import time
H
hjdhnx 已提交
10 11
from utils.web import getParmas,get_interval
from utils.cfg import cfg
H
hjdhnx 已提交
12
from utils.env import get_env
H
hjdhnx 已提交
13 14 15 16 17 18 19 20
from js.rules import getRuleLists,getJxs
from base.R import R
from utils.log import logger
from utils import parser
from controllers.cms import CMS
from base.database import db
from models.ruleclass import RuleClass
from models.playparse import PlayParse
H
hjdhnx 已提交
21
from js.rules import getRules
H
hjdhnx 已提交
22
from controllers.service import storage_service,rules_service
H
hjdhnx 已提交
23
from concurrent.futures import ThreadPoolExecutor,as_completed,thread  # 引入线程池
H
hjdhnx 已提交
24 25
from quickjs import Function,Context
import ujson
H
hjdhnx 已提交
26 27
vod = Blueprint("vod", __name__)

H
hjdhnx 已提交
28

H
hjdhnx 已提交
29
def search_one_py(rule, wd, before: str = ''):
H
hjdhnx 已提交
30 31 32
    t1 = time()
    if not before:
        with open('js/模板.js', encoding='utf-8') as f:
H
hjdhnx 已提交
33
            before = f.read().split('export')[0]
H
hjdhnx 已提交
34 35
    js_path = f'js/{rule}.js'
    try:
H
hjdhnx 已提交
36 37 38 39 40 41
        ctx, js_code = parser.runJs(js_path, before=before)
        if not js_code:
            return None
        ruleDict = ctx.rule.to_dict()
        ruleDict['id'] = rule  # 把路由请求的id装到字典里,后面播放嗅探才能用
        logger.info(f'规则{rule}装载耗时:{get_interval(t1)}毫秒')
H
hjdhnx 已提交
42 43 44 45 46 47 48
        cms = CMS(ruleDict, db, RuleClass, PlayParse, cfg)
        data = cms.searchContent(wd, show_name=True)
        return data
    except Exception as e:
        print(f'{rule}发生错误:{e}')
        return None

H
hjdhnx 已提交
49 50 51 52 53 54 55 56 57 58 59
def search_one(rule, wd, before: str = ''):
    t1 = time()
    if not before:
        with open('js/模板.js', encoding='utf-8') as f:
            before = f.read().split('export')[0]
    end_code = """\nif (rule.模板 && muban.hasOwnProperty(rule.模板)) {rule = Object.assign(muban[rule.模板], rule);}"""
    js_path = f'js/{rule}.js'
    ctx = Context()
    try:
        with open(js_path, encoding='utf-8') as f2:
            jscode = f2.read()
H
hjdhnx 已提交
60 61 62
        env = get_env()
        if env:
            jscode = render_template_string(jscode, **env)
H
hjdhnx 已提交
63 64 65 66 67 68 69 70 71 72 73 74 75 76
        jscode = before + jscode + end_code
        # print(jscode)
        ctx.eval(jscode)
        js_ret = ctx.get('rule')
        ruleDict = ujson.loads(js_ret.json())
        ruleDict['id'] = rule  # 把路由请求的id装到字典里,后面播放嗅探才能用
        logger.info(f'规则{rule}装载耗时:{get_interval(t1)}毫秒')
        cms = CMS(ruleDict, db, RuleClass, PlayParse, cfg)
        data = cms.searchContent(wd, show_name=True)
        return data
    except Exception as e:
        logger.info(f'{e}')
        return R.failed('爬虫规则加载失败')

H
hjdhnx 已提交
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
def multi_search2(wd):
    t1 = time()
    lsg = storage_service()
    try:
        timeout = round(int(lsg.getItem('SEARCH_TIMEOUT', 5000)) / 1000, 2)
    except:
        timeout = 5
    rules = getRules('js')['list']
    rule_names = list(map(lambda x: x['name'], rules))
    rules_exclude = ['drpy']
    new_rules = list(filter(lambda x: x.get('searchable', 0) and x.get('name', '') not in rules_exclude, rules))
    search_sites = [new_rule['name'] for new_rule in new_rules]
    nosearch_sites = set(rule_names) ^ set(search_sites)
    nosearch_sites.remove('drpy')
    # print(nosearch_sites)
    logger.info(f'开始聚搜{wd},共计{len(search_sites)}个规则,聚搜超时{timeout}秒')
    logger.info(f'不支持聚搜的规则,共计{len(nosearch_sites)}个规则:{",".join(nosearch_sites)}')
    # print(search_sites)
    res = []
    with open('js/模板.js', encoding='utf-8') as f:
H
hjdhnx 已提交
97
        before = f.read().split('export')[0]
H
hjdhnx 已提交
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
    logger.info(f'聚搜准备工作耗时:{get_interval(t1)}毫秒')
    t2 = time()
    thread_pool = ThreadPoolExecutor(len(search_sites))  # 定义线程池来启动多线程执行此任务
    obj_list = []
    try:
        for site in search_sites:
            obj = thread_pool.submit(search_one, site, wd, before)
            obj_list.append(obj)
        thread_pool.shutdown(wait=True)  # 等待所有子线程并行完毕
        vod_list = [obj.result() for obj in obj_list]
        for vod in vod_list:
            if vod and isinstance(vod, dict) and vod.get('list') and len(vod['list']) > 0:
                res.extend(vod['list'])
        result = {
            'list': res
        }
        logger.info(f'drpy聚搜{len(search_sites)}个源耗时{get_interval(t2)}毫秒,含准备共计耗时{get_interval(t1)}毫秒')
    except Exception as e:
        result = {
            'list': []
        }
        logger.info(f'drpy聚搜{len(search_sites)}个源耗时{get_interval(t2)}毫秒,含准备共计耗时:{get_interval(t1)}毫秒,发生错误:{e}')
    return jsonify(result)

H
hjdhnx 已提交
122

H
hjdhnx 已提交
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
def merged_hide(merged_rules):
    t1 = time()
    store_rule = rules_service()
    hide_rules = store_rule.getHideRules()
    hide_rule_names = list(map(lambda x: x['name'], hide_rules))
    # print('隐藏:',hide_rule_names)
    all_cnt = len(merged_rules)

    def filter_show(x):
        # name = x['api'].split('rule=')[1].split('&')[0] if 'rule=' in x['api'] else x['key'].replace('dr_','')
        name = x
        # print(name)
        return name not in hide_rule_names

    merged_rules = list(filter(filter_show, merged_rules))
    # print('隐藏后:',merged_rules)
    logger.info(f'数据库筛选隐藏规则耗时{get_interval(t1)}毫秒,共计{all_cnt}条规则,隐藏后可渲染{len(merged_rules)}条规则')
    return merged_rules

H
hjdhnx 已提交
142 143 144 145 146 147 148 149 150 151 152 153
def multi_search(wd):
    lsg = storage_service()
    t1 = time()
    try:
        timeout = round(int(lsg.getItem('SEARCH_TIMEOUT',5000))/1000,2)
    except:
        timeout = 5
    rules = getRules('js')['list']
    rule_names = list(map(lambda x:x['name'],rules))
    rules_exclude = ['drpy']
    new_rules = list(filter(lambda x: x.get('searchable', 0) and x.get('name', '') not in rules_exclude, rules))
    search_sites = [new_rule['name'] for new_rule in new_rules]
H
hjdhnx 已提交
154
    # print(search_sites)
H
hjdhnx 已提交
155 156 157 158 159
    nosearch_sites = set(rule_names) ^ set(search_sites)
    nosearch_sites.remove('drpy')
    # print(nosearch_sites)
    logger.info(f'开始聚搜{wd},共计{len(search_sites)}个规则,聚搜超时{timeout}秒')
    logger.info(f'不支持聚搜的规则,共计{len(nosearch_sites)}个规则:{",".join(nosearch_sites)}')
H
hjdhnx 已提交
160
    search_sites = merged_hide(search_sites)
H
hjdhnx 已提交
161
    # print(search_sites)
H
hjdhnx 已提交
162
    # search_sites = []
H
hjdhnx 已提交
163
    res = []
H
hjdhnx 已提交
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
    if len(search_sites) > 0:
        with open('js/模板.js', encoding='utf-8') as f:
            before = f.read().split('export')[0]
        with ThreadPoolExecutor(max_workers=len(search_sites)) as executor:
            to_do = []
            for site in search_sites:
                future = executor.submit(search_one, site, wd, before)
                to_do.append(future)
            try:
                for future in as_completed(to_do, timeout=timeout):  # 并发执行
                    ret = future.result()
                    # print(ret)
                    if ret and isinstance(ret,dict) and ret.get('list'):
                        res.extend(ret['list'])
            except Exception as e:
                print(f'发生错误:{e}')
                import atexit
                atexit.unregister(thread._python_exit)
                executor.shutdown = lambda wait: None
H
hjdhnx 已提交
183 184 185 186
    logger.info(f'drpy聚搜{len(search_sites)}个源共计耗时{get_interval(t1)}毫秒')
    return jsonify({
        "list": res
    })
H
hjdhnx 已提交
187

H
hjdhnx 已提交
188 189
@vod.route('/vod')
def vod_home():
H
hjdhnx 已提交
190 191 192 193 194 195 196 197 198 199
    lsg = storage_service()
    js0_disable = lsg.getItem('JS0_DISABLE',cfg.get('JS0_DISABLE',0))
    if js0_disable:
        abort(403)
    js0_password = lsg.getItem('JS0_PASSWORD', cfg.get('JS0_PASSWORD', ''))
    # print('js0_password:',js0_password)
    if js0_password:
        pwd = getParmas('pwd')
        if pwd != js0_password:
            abort(403)
H
hjdhnx 已提交
200 201
    t0 = time()
    rule = getParmas('rule')
H
hjdhnx 已提交
202 203 204 205 206
    ac = getParmas('ac')
    ids = getParmas('ids')
    if ac and ids and ids.find('#') > -1:  # 聚搜的二级
        id_list = ids.split(',')
        rule = id_list[0].split('#')[1]
H
hjdhnx 已提交
207
        # print(rule)
H
hjdhnx 已提交
208

H
hjdhnx 已提交
209
    ext = getParmas('ext')
H
hjdhnx 已提交
210
    filters = getParmas('f')
H
hjdhnx 已提交
211 212
    tp = getParmas('type')
    # print(f'type:{tp}')
H
hjdhnx 已提交
213 214
    # if not ext.startswith('http') and not rule:
    if not rule:
H
hjdhnx 已提交
215 216
        return R.failed('规则字段必填')
    rule_list = getRuleLists()
H
hjdhnx 已提交
217 218
    # if not ext.startswith('http') and not rule in rule_list:
    if not ext and not rule in rule_list:
H
hjdhnx 已提交
219 220 221 222
        msg = f'服务端本地仅支持以下规则:{",".join(rule_list)}'
        return R.failed(msg)
    # logger.info(f'检验耗时:{get_interval(t0)}毫秒')
    t1 = time()
H
hjdhnx 已提交
223 224
    # js_path = f'js/{rule}.js' if not ext.startswith('http') else ext
    js_path = f'js/{rule}.js' if not ext else ext
H
hjdhnx 已提交
225
    with open('js/模板.js', encoding='utf-8') as f:
H
hjdhnx 已提交
226
        before = f.read().split('export')[0]
H
hjdhnx 已提交
227
    # logger.info(f'js读取耗时:{get_interval(t1)}毫秒')
H
hjdhnx 已提交
228
    end_code = """\nif (rule.模板 && muban.hasOwnProperty(rule.模板)) {rule = Object.assign(muban[rule.模板], rule);}"""
H
hjdhnx 已提交
229 230
    logger.info(f'参数检验js读取共计耗时:{get_interval(t0)}毫秒')
    t2 = time()
H
hjdhnx 已提交
231 232 233 234 235 236 237 238 239 240 241 242 243


    # ctx, js_code = parser.runJs(js_path,before=before)
    # if not js_code:
    #     return R.failed('爬虫规则加载失败')
    # # rule = ctx.eval('rule')
    # # print(type(ctx.rule.lazy()),ctx.rule.lazy().toString())
    # ruleDict = ctx.rule.to_dict()

    ctx = Context()
    try:
        with open(js_path,encoding='utf-8') as f2:
            jscode = f2.read()
H
hjdhnx 已提交
244 245
        env = get_env()
        if env:
H
hjdhnx 已提交
246
            jscode = render_template_string(jscode,**env)
H
hjdhnx 已提交
247
        # print(jscode)
H
hjdhnx 已提交
248 249
        jscode = before + jscode + end_code
        # print(jscode)
H
hjdhnx 已提交
250 251 252 253 254
        ctx.eval(jscode)
        js_ret = ctx.get('rule')
        ruleDict = ujson.loads(js_ret.json())
    except Exception as e:
        logger.info(f'{e}')
H
hjdhnx 已提交
255 256
        return R.failed('爬虫规则加载失败')

H
hjdhnx 已提交
257 258
    # print(type(ruleDict))
    # print(ruleDict)
H
hjdhnx 已提交
259
    # print(ruleDict)
H
hjdhnx 已提交
260 261 262 263 264 265 266 267
    ruleDict['id'] = rule  # 把路由请求的id装到字典里,后面播放嗅探才能用
    # print(ruleDict)
    # print(rule)
    # print(type(rule))
    # print(ruleDict)
    logger.info(f'js装载耗时:{get_interval(t2)}毫秒')
    # print(ruleDict)
    # print(rule)
H
hjdhnx 已提交
268
    cms = CMS(ruleDict,db,RuleClass,PlayParse,cfg,ext)
H
hjdhnx 已提交
269 270 271 272
    wd = getParmas('wd')
    quick = getParmas('quick')
    play = getParmas('play') # 类型为4的时候点击播放会带上来
    flag = getParmas('flag') # 类型为4的时候点击播放会带上来
H
hjdhnx 已提交
273
    # myfilter = getParmas('filter')
H
hjdhnx 已提交
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
    t = getParmas('t')
    pg = getParmas('pg','1')
    pg = int(pg)
    q = getParmas('q')
    play_url = getParmas('play_url')

    if play:
        jxs = getJxs()
        play_url = play.split('play_url=')[1]
        play_url = cms.playContent(play_url, jxs,flag)
        if isinstance(play_url, str):
            # return redirect(play_url)
            # return jsonify({'parse': 0, 'playUrl': play_url, 'jx': 0, 'url': play_url})
            # return jsonify({'parse': 0, 'playUrl': play_url, 'jx': 0, 'url': ''})
            return jsonify({'parse': 0, 'playUrl': '', 'jx': 0, 'url': play_url})
        elif isinstance(play_url, dict):
            return jsonify(play_url)
        else:
            return play_url

    if play_url:  # 播放
        jxs = getJxs()
        play_url = cms.playContent(play_url,jxs)
        if isinstance(play_url,str):
            return redirect(play_url)
        elif isinstance(play_url,dict):
            return jsonify(play_url)
        else:
            return play_url

H
hjdhnx 已提交
304 305 306 307 308 309 310
    if ac and t:  # 一级
        fl = {}
        if filters and filters.find('{') > -1 and filters.find('}') > -1:
            fl = json.loads(filters)
        # print(filters,type(filters))
        # print(fl,type(fl))
        data = cms.categoryContent(t,pg,fl)
H
hjdhnx 已提交
311 312 313 314
        # print(data)
        return jsonify(data)
    if ac and ids: # 二级
        id_list = ids.split(',')
H
hjdhnx 已提交
315
        show_name = False
H
hjdhnx 已提交
316 317
        if ids.find('#') > -1:
            id_list = list(map(lambda x:x.split('#')[0],id_list))
H
hjdhnx 已提交
318
            show_name = True
H
hjdhnx 已提交
319 320
        # print('app:377',len(id_list))
        # print(id_list)
H
hjdhnx 已提交
321
        data = cms.detailContent(pg,id_list,show_name)
H
hjdhnx 已提交
322 323 324
        # print(data)
        return jsonify(data)
    if wd: # 搜索
H
hjdhnx 已提交
325 326 327
        if rule == 'drpy':
            # print(f'准备单独处理聚合搜索:{wd}')
            return multi_search(wd)
H
hjdhnx 已提交
328
            # return multi_search2(wd)
H
hjdhnx 已提交
329 330 331 332
        else:
            data = cms.searchContent(wd)
            # print(data)
            return jsonify(data)
H
hjdhnx 已提交
333 334 335
    # return jsonify({'rule':rule,'js_code':js_code})
    home_data = cms.homeContent(pg)
    return jsonify(home_data)