vod.py 12.5 KB
Newer Older
H
hjdhnx 已提交
1 2 3 4 5
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File  : vod.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date  : 2022/9/6
H
hjdhnx 已提交
6
import json
H
hjdhnx 已提交
7

H
hjdhnx 已提交
8
from flask import Blueprint,abort,request,render_template,render_template_string,jsonify,make_response,redirect
H
hjdhnx 已提交
9
from time import time
H
hjdhnx 已提交
10 11
from utils.web import getParmas,get_interval
from utils.cfg import cfg
H
hjdhnx 已提交
12
from utils.env import get_env
H
hjdhnx 已提交
13 14 15 16 17 18 19 20
from js.rules import getRuleLists,getJxs
from base.R import R
from utils.log import logger
from utils import parser
from controllers.cms import CMS
from base.database import db
from models.ruleclass import RuleClass
from models.playparse import PlayParse
H
hjdhnx 已提交
21
from js.rules import getRules
H
hjdhnx 已提交
22
from controllers.service import storage_service,rules_service
H
hjdhnx 已提交
23
from concurrent.futures import ThreadPoolExecutor,as_completed,thread  # 引入线程池
H
hjdhnx 已提交
24 25
from quickjs import Function,Context
import ujson
H
hjdhnx 已提交
26 27
vod = Blueprint("vod", __name__)

H
hjdhnx 已提交
28

H
hjdhnx 已提交
29
def search_one_py(rule, wd, before: str = ''):
H
hjdhnx 已提交
30 31 32
    t1 = time()
    if not before:
        with open('js/模板.js', encoding='utf-8') as f:
H
hjdhnx 已提交
33
            before = f.read().split('export')[0]
H
hjdhnx 已提交
34 35
    js_path = f'js/{rule}.js'
    try:
H
hjdhnx 已提交
36 37 38 39 40 41
        ctx, js_code = parser.runJs(js_path, before=before)
        if not js_code:
            return None
        ruleDict = ctx.rule.to_dict()
        ruleDict['id'] = rule  # 把路由请求的id装到字典里,后面播放嗅探才能用
        logger.info(f'规则{rule}装载耗时:{get_interval(t1)}毫秒')
H
hjdhnx 已提交
42 43 44 45 46 47 48
        cms = CMS(ruleDict, db, RuleClass, PlayParse, cfg)
        data = cms.searchContent(wd, show_name=True)
        return data
    except Exception as e:
        print(f'{rule}发生错误:{e}')
        return None

H
hjdhnx 已提交
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
def search_one(rule, wd, before: str = ''):
    t1 = time()
    if not before:
        with open('js/模板.js', encoding='utf-8') as f:
            before = f.read().split('export')[0]
    end_code = """\nif (rule.模板 && muban.hasOwnProperty(rule.模板)) {rule = Object.assign(muban[rule.模板], rule);}"""
    js_path = f'js/{rule}.js'
    ctx = Context()
    try:
        with open(js_path, encoding='utf-8') as f2:
            jscode = f2.read()
        jscode = before + jscode + end_code
        # print(jscode)
        ctx.eval(jscode)
        js_ret = ctx.get('rule')
        ruleDict = ujson.loads(js_ret.json())
        ruleDict['id'] = rule  # 把路由请求的id装到字典里,后面播放嗅探才能用
        logger.info(f'规则{rule}装载耗时:{get_interval(t1)}毫秒')
        cms = CMS(ruleDict, db, RuleClass, PlayParse, cfg)
        data = cms.searchContent(wd, show_name=True)
        return data
    except Exception as e:
        logger.info(f'{e}')
        return R.failed('爬虫规则加载失败')

H
hjdhnx 已提交
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
def multi_search2(wd):
    t1 = time()
    lsg = storage_service()
    try:
        timeout = round(int(lsg.getItem('SEARCH_TIMEOUT', 5000)) / 1000, 2)
    except:
        timeout = 5
    rules = getRules('js')['list']
    rule_names = list(map(lambda x: x['name'], rules))
    rules_exclude = ['drpy']
    new_rules = list(filter(lambda x: x.get('searchable', 0) and x.get('name', '') not in rules_exclude, rules))
    search_sites = [new_rule['name'] for new_rule in new_rules]
    nosearch_sites = set(rule_names) ^ set(search_sites)
    nosearch_sites.remove('drpy')
    # print(nosearch_sites)
    logger.info(f'开始聚搜{wd},共计{len(search_sites)}个规则,聚搜超时{timeout}秒')
    logger.info(f'不支持聚搜的规则,共计{len(nosearch_sites)}个规则:{",".join(nosearch_sites)}')
    # print(search_sites)
    res = []
    with open('js/模板.js', encoding='utf-8') as f:
H
hjdhnx 已提交
94
        before = f.read().split('export')[0]
H
hjdhnx 已提交
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
    logger.info(f'聚搜准备工作耗时:{get_interval(t1)}毫秒')
    t2 = time()
    thread_pool = ThreadPoolExecutor(len(search_sites))  # 定义线程池来启动多线程执行此任务
    obj_list = []
    try:
        for site in search_sites:
            obj = thread_pool.submit(search_one, site, wd, before)
            obj_list.append(obj)
        thread_pool.shutdown(wait=True)  # 等待所有子线程并行完毕
        vod_list = [obj.result() for obj in obj_list]
        for vod in vod_list:
            if vod and isinstance(vod, dict) and vod.get('list') and len(vod['list']) > 0:
                res.extend(vod['list'])
        result = {
            'list': res
        }
        logger.info(f'drpy聚搜{len(search_sites)}个源耗时{get_interval(t2)}毫秒,含准备共计耗时{get_interval(t1)}毫秒')
    except Exception as e:
        result = {
            'list': []
        }
        logger.info(f'drpy聚搜{len(search_sites)}个源耗时{get_interval(t2)}毫秒,含准备共计耗时:{get_interval(t1)}毫秒,发生错误:{e}')
    return jsonify(result)

H
hjdhnx 已提交
119

H
hjdhnx 已提交
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
def merged_hide(merged_rules):
    t1 = time()
    store_rule = rules_service()
    hide_rules = store_rule.getHideRules()
    hide_rule_names = list(map(lambda x: x['name'], hide_rules))
    # print('隐藏:',hide_rule_names)
    all_cnt = len(merged_rules)

    def filter_show(x):
        # name = x['api'].split('rule=')[1].split('&')[0] if 'rule=' in x['api'] else x['key'].replace('dr_','')
        name = x
        # print(name)
        return name not in hide_rule_names

    merged_rules = list(filter(filter_show, merged_rules))
    # print('隐藏后:',merged_rules)
    logger.info(f'数据库筛选隐藏规则耗时{get_interval(t1)}毫秒,共计{all_cnt}条规则,隐藏后可渲染{len(merged_rules)}条规则')
    return merged_rules

H
hjdhnx 已提交
139 140 141 142 143 144 145 146 147 148 149 150
def multi_search(wd):
    lsg = storage_service()
    t1 = time()
    try:
        timeout = round(int(lsg.getItem('SEARCH_TIMEOUT',5000))/1000,2)
    except:
        timeout = 5
    rules = getRules('js')['list']
    rule_names = list(map(lambda x:x['name'],rules))
    rules_exclude = ['drpy']
    new_rules = list(filter(lambda x: x.get('searchable', 0) and x.get('name', '') not in rules_exclude, rules))
    search_sites = [new_rule['name'] for new_rule in new_rules]
H
hjdhnx 已提交
151
    # print(search_sites)
H
hjdhnx 已提交
152 153 154 155 156
    nosearch_sites = set(rule_names) ^ set(search_sites)
    nosearch_sites.remove('drpy')
    # print(nosearch_sites)
    logger.info(f'开始聚搜{wd},共计{len(search_sites)}个规则,聚搜超时{timeout}秒')
    logger.info(f'不支持聚搜的规则,共计{len(nosearch_sites)}个规则:{",".join(nosearch_sites)}')
H
hjdhnx 已提交
157
    search_sites = merged_hide(search_sites)
H
hjdhnx 已提交
158
    # print(search_sites)
H
hjdhnx 已提交
159
    # search_sites = []
H
hjdhnx 已提交
160
    res = []
H
hjdhnx 已提交
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
    if len(search_sites) > 0:
        with open('js/模板.js', encoding='utf-8') as f:
            before = f.read().split('export')[0]
        with ThreadPoolExecutor(max_workers=len(search_sites)) as executor:
            to_do = []
            for site in search_sites:
                future = executor.submit(search_one, site, wd, before)
                to_do.append(future)
            try:
                for future in as_completed(to_do, timeout=timeout):  # 并发执行
                    ret = future.result()
                    # print(ret)
                    if ret and isinstance(ret,dict) and ret.get('list'):
                        res.extend(ret['list'])
            except Exception as e:
                print(f'发生错误:{e}')
                import atexit
                atexit.unregister(thread._python_exit)
                executor.shutdown = lambda wait: None
H
hjdhnx 已提交
180 181 182 183
    logger.info(f'drpy聚搜{len(search_sites)}个源共计耗时{get_interval(t1)}毫秒')
    return jsonify({
        "list": res
    })
H
hjdhnx 已提交
184

H
hjdhnx 已提交
185 186
@vod.route('/vod')
def vod_home():
H
hjdhnx 已提交
187 188 189 190 191 192 193 194 195 196
    lsg = storage_service()
    js0_disable = lsg.getItem('JS0_DISABLE',cfg.get('JS0_DISABLE',0))
    if js0_disable:
        abort(403)
    js0_password = lsg.getItem('JS0_PASSWORD', cfg.get('JS0_PASSWORD', ''))
    # print('js0_password:',js0_password)
    if js0_password:
        pwd = getParmas('pwd')
        if pwd != js0_password:
            abort(403)
H
hjdhnx 已提交
197 198
    t0 = time()
    rule = getParmas('rule')
H
hjdhnx 已提交
199 200 201 202 203
    ac = getParmas('ac')
    ids = getParmas('ids')
    if ac and ids and ids.find('#') > -1:  # 聚搜的二级
        id_list = ids.split(',')
        rule = id_list[0].split('#')[1]
H
hjdhnx 已提交
204
        # print(rule)
H
hjdhnx 已提交
205

H
hjdhnx 已提交
206
    ext = getParmas('ext')
H
hjdhnx 已提交
207
    filters = getParmas('f')
H
hjdhnx 已提交
208 209
    tp = getParmas('type')
    # print(f'type:{tp}')
H
hjdhnx 已提交
210 211
    # if not ext.startswith('http') and not rule:
    if not rule:
H
hjdhnx 已提交
212 213
        return R.failed('规则字段必填')
    rule_list = getRuleLists()
H
hjdhnx 已提交
214 215
    # if not ext.startswith('http') and not rule in rule_list:
    if not ext and not rule in rule_list:
H
hjdhnx 已提交
216 217 218 219
        msg = f'服务端本地仅支持以下规则:{",".join(rule_list)}'
        return R.failed(msg)
    # logger.info(f'检验耗时:{get_interval(t0)}毫秒')
    t1 = time()
H
hjdhnx 已提交
220 221
    # js_path = f'js/{rule}.js' if not ext.startswith('http') else ext
    js_path = f'js/{rule}.js' if not ext else ext
H
hjdhnx 已提交
222
    with open('js/模板.js', encoding='utf-8') as f:
H
hjdhnx 已提交
223
        before = f.read().split('export')[0]
H
hjdhnx 已提交
224
    # logger.info(f'js读取耗时:{get_interval(t1)}毫秒')
H
hjdhnx 已提交
225
    end_code = """\nif (rule.模板 && muban.hasOwnProperty(rule.模板)) {rule = Object.assign(muban[rule.模板], rule);}"""
H
hjdhnx 已提交
226 227
    logger.info(f'参数检验js读取共计耗时:{get_interval(t0)}毫秒')
    t2 = time()
H
hjdhnx 已提交
228 229 230 231 232 233 234 235 236 237 238 239 240


    # ctx, js_code = parser.runJs(js_path,before=before)
    # if not js_code:
    #     return R.failed('爬虫规则加载失败')
    # # rule = ctx.eval('rule')
    # # print(type(ctx.rule.lazy()),ctx.rule.lazy().toString())
    # ruleDict = ctx.rule.to_dict()

    ctx = Context()
    try:
        with open(js_path,encoding='utf-8') as f2:
            jscode = f2.read()
H
hjdhnx 已提交
241 242 243 244
        env = get_env()
        if env:
            jscode = render_template_string(jscode,env=env)
        # print(jscode)
H
hjdhnx 已提交
245 246
        jscode = before + jscode + end_code
        # print(jscode)
H
hjdhnx 已提交
247 248 249 250 251
        ctx.eval(jscode)
        js_ret = ctx.get('rule')
        ruleDict = ujson.loads(js_ret.json())
    except Exception as e:
        logger.info(f'{e}')
H
hjdhnx 已提交
252 253
        return R.failed('爬虫规则加载失败')

H
hjdhnx 已提交
254 255
    # print(type(ruleDict))
    # print(ruleDict)
H
hjdhnx 已提交
256
    # print(ruleDict)
H
hjdhnx 已提交
257 258 259 260 261 262 263 264
    ruleDict['id'] = rule  # 把路由请求的id装到字典里,后面播放嗅探才能用
    # print(ruleDict)
    # print(rule)
    # print(type(rule))
    # print(ruleDict)
    logger.info(f'js装载耗时:{get_interval(t2)}毫秒')
    # print(ruleDict)
    # print(rule)
H
hjdhnx 已提交
265
    cms = CMS(ruleDict,db,RuleClass,PlayParse,cfg,ext)
H
hjdhnx 已提交
266 267 268 269
    wd = getParmas('wd')
    quick = getParmas('quick')
    play = getParmas('play') # 类型为4的时候点击播放会带上来
    flag = getParmas('flag') # 类型为4的时候点击播放会带上来
H
hjdhnx 已提交
270
    # myfilter = getParmas('filter')
H
hjdhnx 已提交
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
    t = getParmas('t')
    pg = getParmas('pg','1')
    pg = int(pg)
    q = getParmas('q')
    play_url = getParmas('play_url')

    if play:
        jxs = getJxs()
        play_url = play.split('play_url=')[1]
        play_url = cms.playContent(play_url, jxs,flag)
        if isinstance(play_url, str):
            # return redirect(play_url)
            # return jsonify({'parse': 0, 'playUrl': play_url, 'jx': 0, 'url': play_url})
            # return jsonify({'parse': 0, 'playUrl': play_url, 'jx': 0, 'url': ''})
            return jsonify({'parse': 0, 'playUrl': '', 'jx': 0, 'url': play_url})
        elif isinstance(play_url, dict):
            return jsonify(play_url)
        else:
            return play_url

    if play_url:  # 播放
        jxs = getJxs()
        play_url = cms.playContent(play_url,jxs)
        if isinstance(play_url,str):
            return redirect(play_url)
        elif isinstance(play_url,dict):
            return jsonify(play_url)
        else:
            return play_url

H
hjdhnx 已提交
301 302 303 304 305 306 307
    if ac and t:  # 一级
        fl = {}
        if filters and filters.find('{') > -1 and filters.find('}') > -1:
            fl = json.loads(filters)
        # print(filters,type(filters))
        # print(fl,type(fl))
        data = cms.categoryContent(t,pg,fl)
H
hjdhnx 已提交
308 309 310 311
        # print(data)
        return jsonify(data)
    if ac and ids: # 二级
        id_list = ids.split(',')
H
hjdhnx 已提交
312
        show_name = False
H
hjdhnx 已提交
313 314
        if ids.find('#') > -1:
            id_list = list(map(lambda x:x.split('#')[0],id_list))
H
hjdhnx 已提交
315
            show_name = True
H
hjdhnx 已提交
316 317
        # print('app:377',len(id_list))
        # print(id_list)
H
hjdhnx 已提交
318
        data = cms.detailContent(pg,id_list,show_name)
H
hjdhnx 已提交
319 320 321
        # print(data)
        return jsonify(data)
    if wd: # 搜索
H
hjdhnx 已提交
322 323 324
        if rule == 'drpy':
            # print(f'准备单独处理聚合搜索:{wd}')
            return multi_search(wd)
H
hjdhnx 已提交
325
            # return multi_search2(wd)
H
hjdhnx 已提交
326 327 328 329
        else:
            data = cms.searchContent(wd)
            # print(data)
            return jsonify(data)
H
hjdhnx 已提交
330 331 332
    # return jsonify({'rule':rule,'js_code':js_code})
    home_data = cms.homeContent(pg)
    return jsonify(home_data)