vod.py 15.7 KB
Newer Older
H
hjdhnx 已提交
1 2 3 4 5
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File  : vod.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date  : 2022/9/6
H
hjdhnx 已提交
6
import functools
H
hjdhnx 已提交
7
import json
H
hjdhnx 已提交
8

9
from flask import Blueprint,abort,request,render_template,render_template_string,jsonify,make_response,redirect,current_app
H
hjdhnx 已提交
10
from time import time
H
hjdhnx 已提交
11 12
from utils.web import getParmas,get_interval
from utils.cfg import cfg
H
hjdhnx 已提交
13
from utils.env import get_env
H
hjdhnx 已提交
14 15 16 17 18 19 20 21
from js.rules import getRuleLists,getJxs
from base.R import R
from utils.log import logger
from utils import parser
from controllers.cms import CMS
from base.database import db
from models.ruleclass import RuleClass
from models.playparse import PlayParse
H
hjdhnx 已提交
22
from js.rules import getRules
H
hjdhnx 已提交
23
from controllers.service import storage_service,rules_service
H
hjdhnx 已提交
24
from concurrent.futures import ThreadPoolExecutor,as_completed,thread  # 引入线程池
H
hjdhnx 已提交
25 26
from quickjs import Function,Context
import ujson
H
hjdhnx 已提交
27 28
vod = Blueprint("vod", __name__)

H
hjdhnx 已提交
29
def search_one_py(rule, wd, before: str = ''):
H
hjdhnx 已提交
30 31 32
    t1 = time()
    if not before:
        with open('js/模板.js', encoding='utf-8') as f:
H
hjdhnx 已提交
33
            before = f.read().split('export')[0]
H
hjdhnx 已提交
34 35
    js_path = f'js/{rule}.js'
    try:
H
hjdhnx 已提交
36 37 38 39 40 41
        ctx, js_code = parser.runJs(js_path, before=before)
        if not js_code:
            return None
        ruleDict = ctx.rule.to_dict()
        ruleDict['id'] = rule  # 把路由请求的id装到字典里,后面播放嗅探才能用
        logger.info(f'规则{rule}装载耗时:{get_interval(t1)}毫秒')
H
hjdhnx 已提交
42 43 44 45 46 47 48
        cms = CMS(ruleDict, db, RuleClass, PlayParse, cfg)
        data = cms.searchContent(wd, show_name=True)
        return data
    except Exception as e:
        print(f'{rule}发生错误:{e}')
        return None

49
def search_one(rule, wd, before: str = '',env:dict=None,app=None):
H
hjdhnx 已提交
50 51 52 53 54 55 56 57 58 59
    t1 = time()
    if not before:
        with open('js/模板.js', encoding='utf-8') as f:
            before = f.read().split('export')[0]
    end_code = """\nif (rule.模板 && muban.hasOwnProperty(rule.模板)) {rule = Object.assign(muban[rule.模板], rule);}"""
    js_path = f'js/{rule}.js'
    ctx = Context()
    try:
        with open(js_path, encoding='utf-8') as f2:
            jscode = f2.read()
H
hjdhnx 已提交
60
        if env:
61 62
            # 渲染字符串文本 render_template_string 必须带 flask的上下文
            with app.app_context():
63 64 65 66 67
                for k in env:
                    # print(f'${k}', f'{env[k]}')
                    if f'${k}' in jscode:
                        jscode = jscode.replace(f'${k}', f'{env[k]}')
                # jscode = render_template_string(jscode, **env)
68 69
            # if '007' in rule:
            #     print(rule,jscode)
H
hjdhnx 已提交
70 71 72 73 74 75 76 77 78 79 80 81 82 83
        jscode = before + jscode + end_code
        # print(jscode)
        ctx.eval(jscode)
        js_ret = ctx.get('rule')
        ruleDict = ujson.loads(js_ret.json())
        ruleDict['id'] = rule  # 把路由请求的id装到字典里,后面播放嗅探才能用
        logger.info(f'规则{rule}装载耗时:{get_interval(t1)}毫秒')
        cms = CMS(ruleDict, db, RuleClass, PlayParse, cfg)
        data = cms.searchContent(wd, show_name=True)
        return data
    except Exception as e:
        logger.info(f'{e}')
        return R.failed('爬虫规则加载失败')

H
hjdhnx 已提交
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
def multi_search2(wd):
    t1 = time()
    lsg = storage_service()
    try:
        timeout = round(int(lsg.getItem('SEARCH_TIMEOUT', 5000)) / 1000, 2)
    except:
        timeout = 5
    rules = getRules('js')['list']
    rule_names = list(map(lambda x: x['name'], rules))
    rules_exclude = ['drpy']
    new_rules = list(filter(lambda x: x.get('searchable', 0) and x.get('name', '') not in rules_exclude, rules))
    search_sites = [new_rule['name'] for new_rule in new_rules]
    nosearch_sites = set(rule_names) ^ set(search_sites)
    nosearch_sites.remove('drpy')
    # print(nosearch_sites)
    logger.info(f'开始聚搜{wd},共计{len(search_sites)}个规则,聚搜超时{timeout}秒')
    logger.info(f'不支持聚搜的规则,共计{len(nosearch_sites)}个规则:{",".join(nosearch_sites)}')
    # print(search_sites)
    res = []
    with open('js/模板.js', encoding='utf-8') as f:
H
hjdhnx 已提交
104
        before = f.read().split('export')[0]
H
hjdhnx 已提交
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
    logger.info(f'聚搜准备工作耗时:{get_interval(t1)}毫秒')
    t2 = time()
    thread_pool = ThreadPoolExecutor(len(search_sites))  # 定义线程池来启动多线程执行此任务
    obj_list = []
    try:
        for site in search_sites:
            obj = thread_pool.submit(search_one, site, wd, before)
            obj_list.append(obj)
        thread_pool.shutdown(wait=True)  # 等待所有子线程并行完毕
        vod_list = [obj.result() for obj in obj_list]
        for vod in vod_list:
            if vod and isinstance(vod, dict) and vod.get('list') and len(vod['list']) > 0:
                res.extend(vod['list'])
        result = {
            'list': res
        }
        logger.info(f'drpy聚搜{len(search_sites)}个源耗时{get_interval(t2)}毫秒,含准备共计耗时{get_interval(t1)}毫秒')
    except Exception as e:
        result = {
            'list': []
        }
        logger.info(f'drpy聚搜{len(search_sites)}个源耗时{get_interval(t2)}毫秒,含准备共计耗时:{get_interval(t1)}毫秒,发生错误:{e}')
    return jsonify(result)

H
hjdhnx 已提交
129

H
hjdhnx 已提交
130 131 132 133 134 135 136
def merged_hide(merged_rules):
    t1 = time()
    store_rule = rules_service()
    hide_rules = store_rule.getHideRules()
    hide_rule_names = list(map(lambda x: x['name'], hide_rules))
    # print('隐藏:',hide_rule_names)
    all_cnt = len(merged_rules)
H
hjdhnx 已提交
137
    # print(merged_rules)
H
hjdhnx 已提交
138 139 140 141 142 143 144 145 146 147

    def filter_show(x):
        # name = x['api'].split('rule=')[1].split('&')[0] if 'rule=' in x['api'] else x['key'].replace('dr_','')
        name = x
        # print(name)
        return name not in hide_rule_names

    merged_rules = list(filter(filter_show, merged_rules))
    # print('隐藏后:',merged_rules)
    logger.info(f'数据库筛选隐藏规则耗时{get_interval(t1)}毫秒,共计{all_cnt}条规则,隐藏后可渲染{len(merged_rules)}条规则')
H
hjdhnx 已提交
148
    # merged_rules = []
H
hjdhnx 已提交
149 150
    return merged_rules

151 152 153 154 155
def disable_exit_for_threadpool_executor():
    import atexit
    import concurrent.futures
    atexit.unregister(concurrent.futures.thread._python_exit)

H
hjdhnx 已提交
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
def sort_lsg_rules(sites:list):
    """
     查询结果按order和write_date 联合排序
    :param sites:
    :return:
    """
    def comp(x, y):
        if x['order'] > y['order']:
            return 1
        elif x['order'] < y['order']:
            return - 1
        else:
            if x['write_date'] < y['write_date']:
                return 1
            elif x['write_date'] > y['write_date']:
                return -1
            else:
                return 0

    sites.sort(key=functools.cmp_to_key(comp), reverse=False)
    return sites

def sort_lsg_rules2(sites:list,lsg_rule_names:list):
    """
     查询结果按order和write_date 联合排序
    :param sites:
    :return:
    """
    def comp(x, y):
        try:
            x1 = lsg_rule_names.index(x)
        except:
            x1 = 999

        try:
            y1 = lsg_rule_names.index(y)
        except:
            y1 = 999

        if x1 >= y1:
            return 1
        elif x1 < y1:
            return - 1

    sites.sort(key=functools.cmp_to_key(comp), reverse=False)
    return sites

H
hjdhnx 已提交
203 204
def getSearchSites():
    val = {}
H
hjdhnx 已提交
205 206 207 208 209
    lsg = storage_service()
    try:
        timeout = round(int(lsg.getItem('SEARCH_TIMEOUT',5000))/1000,2)
    except:
        timeout = 5
H
hjdhnx 已提交
210
    val['timeout'] = timeout
H
hjdhnx 已提交
211
    rules = getRules('js')['list']
H
hjdhnx 已提交
212
    rule_names = list(map(lambda x: x['name'], rules))
H
hjdhnx 已提交
213 214
    rules_exclude = ['drpy']
    new_rules = list(filter(lambda x: x.get('searchable', 0) and x.get('name', '') not in rules_exclude, rules))
H
hjdhnx 已提交
215 216
    total_search = [new_rule['name'] for new_rule in new_rules]
    nosearch_sites = set(rule_names) ^ set(total_search)
H
hjdhnx 已提交
217
    nosearch_sites.remove('drpy')
H
hjdhnx 已提交
218 219 220
    val['total_search'] = total_search
    val['nosearch_sites'] = list(nosearch_sites)
    search_sites = merged_hide(total_search)
H
hjdhnx 已提交
221 222
    lsg_rules = rules_service()
    lsg_rule_list = lsg_rules.query_all()
H
hjdhnx 已提交
223
    lsg_rule_list = list(filter(lambda x: x['name'] in search_sites, lsg_rule_list))
H
hjdhnx 已提交
224 225
    lsg_rule_names = list(map(lambda x: x['name'], lsg_rule_list))

H
hjdhnx 已提交
226 227
    search_sites = sort_lsg_rules2(search_sites, lsg_rule_names)
    search_limit = lsg.getItem('SEARCH_LIMIT', 24)
H
hjdhnx 已提交
228
    try:
H
hjdhnx 已提交
229
        search_limit = int(search_limit)
H
hjdhnx 已提交
230
    except:
H
hjdhnx 已提交
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
        search_limit = 0
    if search_limit < 1:
        search_limit = 0
    search_sites = search_sites[:search_limit]
    val['search_limit'] = search_limit
    val['search_sites'] = search_sites
    return val

def multi_search(wd):
    t1 = time()
    val = getSearchSites()
    timeout = val['timeout']
    total_search = val['total_search']
    nosearch_sites = val['nosearch_sites']
    search_limit = val['search_limit']
    search_sites = val['search_sites']

    env = get_env()
    logger.info(f'开始聚搜{wd},共计{len(total_search)}个规则,聚搜超时{timeout}秒')
    logger.info(f'不支持聚搜的规则,共计{len(nosearch_sites)}个规则:{",".join(nosearch_sites)}')
    msearch_msg = f'搜索限制条数:{search_limit}/{len(search_sites)} {search_sites}'
H
hjdhnx 已提交
252 253
    logger.info(msearch_msg)
    print(msearch_msg)
H
hjdhnx 已提交
254
    # search_sites = []
H
hjdhnx 已提交
255
    res = []
H
hjdhnx 已提交
256 257 258 259 260 261
    if len(search_sites) > 0:
        with open('js/模板.js', encoding='utf-8') as f:
            before = f.read().split('export')[0]
        with ThreadPoolExecutor(max_workers=len(search_sites)) as executor:
            to_do = []
            for site in search_sites:
262
                future = executor.submit(search_one, site, wd, before,env,current_app._get_current_object())
H
hjdhnx 已提交
263 264 265 266 267 268 269 270 271 272 273 274
                to_do.append(future)
            try:
                for future in as_completed(to_do, timeout=timeout):  # 并发执行
                    ret = future.result()
                    # print(ret)
                    if ret and isinstance(ret,dict) and ret.get('list'):
                        res.extend(ret['list'])
            except Exception as e:
                print(f'发生错误:{e}')
                import atexit
                atexit.unregister(thread._python_exit)
                executor.shutdown = lambda wait: None
275 276

                # disable_exit_for_threadpool_executor()
H
hjdhnx 已提交
277 278 279 280
    logger.info(f'drpy聚搜{len(search_sites)}个源共计耗时{get_interval(t1)}毫秒')
    return jsonify({
        "list": res
    })
H
hjdhnx 已提交
281

H
hjdhnx 已提交
282 283 284 285 286 287 288 289
@vod.route('/vods')
def vods_search():
    val = getSearchSites()
    print(val)

    # return jsonify(val)
    return render_template('show_search.html',val=val)

H
hjdhnx 已提交
290 291
@vod.route('/vod')
def vod_home():
H
hjdhnx 已提交
292 293 294 295 296 297 298 299 300 301
    lsg = storage_service()
    js0_disable = lsg.getItem('JS0_DISABLE',cfg.get('JS0_DISABLE',0))
    if js0_disable:
        abort(403)
    js0_password = lsg.getItem('JS0_PASSWORD', cfg.get('JS0_PASSWORD', ''))
    # print('js0_password:',js0_password)
    if js0_password:
        pwd = getParmas('pwd')
        if pwd != js0_password:
            abort(403)
H
hjdhnx 已提交
302 303
    t0 = time()
    rule = getParmas('rule')
H
hjdhnx 已提交
304 305 306 307 308
    ac = getParmas('ac')
    ids = getParmas('ids')
    if ac and ids and ids.find('#') > -1:  # 聚搜的二级
        id_list = ids.split(',')
        rule = id_list[0].split('#')[1]
H
hjdhnx 已提交
309
        # print(rule)
H
hjdhnx 已提交
310

H
hjdhnx 已提交
311
    ext = getParmas('ext')
H
hjdhnx 已提交
312
    filters = getParmas('f')
H
hjdhnx 已提交
313 314
    tp = getParmas('type')
    # print(f'type:{tp}')
H
hjdhnx 已提交
315 316
    # if not ext.startswith('http') and not rule:
    if not rule:
H
hjdhnx 已提交
317 318
        return R.failed('规则字段必填')
    rule_list = getRuleLists()
H
hjdhnx 已提交
319 320
    # if not ext.startswith('http') and not rule in rule_list:
    if not ext and not rule in rule_list:
H
hjdhnx 已提交
321 322 323 324
        msg = f'服务端本地仅支持以下规则:{",".join(rule_list)}'
        return R.failed(msg)
    # logger.info(f'检验耗时:{get_interval(t0)}毫秒')
    t1 = time()
H
hjdhnx 已提交
325 326
    # js_path = f'js/{rule}.js' if not ext.startswith('http') else ext
    js_path = f'js/{rule}.js' if not ext else ext
H
hjdhnx 已提交
327
    with open('js/模板.js', encoding='utf-8') as f:
H
hjdhnx 已提交
328
        before = f.read().split('export')[0]
H
hjdhnx 已提交
329
    # logger.info(f'js读取耗时:{get_interval(t1)}毫秒')
H
hjdhnx 已提交
330
    end_code = """\nif (rule.模板 && muban.hasOwnProperty(rule.模板)) {rule = Object.assign(muban[rule.模板], rule);}"""
H
hjdhnx 已提交
331 332
    logger.info(f'参数检验js读取共计耗时:{get_interval(t0)}毫秒')
    t2 = time()
H
hjdhnx 已提交
333 334 335 336 337 338 339 340 341 342 343 344 345


    # ctx, js_code = parser.runJs(js_path,before=before)
    # if not js_code:
    #     return R.failed('爬虫规则加载失败')
    # # rule = ctx.eval('rule')
    # # print(type(ctx.rule.lazy()),ctx.rule.lazy().toString())
    # ruleDict = ctx.rule.to_dict()

    ctx = Context()
    try:
        with open(js_path,encoding='utf-8') as f2:
            jscode = f2.read()
H
hjdhnx 已提交
346
        env = get_env()
347 348 349 350 351 352 353
        for k in env:
            # print(f'${k}',f'{env[k]}')
            if f'${k}' in jscode:
                jscode = jscode.replace(f'${k}',f'{env[k]}')
        # print(env)
        # if env:
        #     jscode = render_template_string(jscode,**env)
H
hjdhnx 已提交
354
        # print(jscode)
H
hjdhnx 已提交
355 356
        jscode = before + jscode + end_code
        # print(jscode)
H
hjdhnx 已提交
357 358 359 360 361
        ctx.eval(jscode)
        js_ret = ctx.get('rule')
        ruleDict = ujson.loads(js_ret.json())
    except Exception as e:
        logger.info(f'{e}')
H
hjdhnx 已提交
362 363
        return R.failed('爬虫规则加载失败')

H
hjdhnx 已提交
364 365
    # print(type(ruleDict))
    # print(ruleDict)
H
hjdhnx 已提交
366
    # print(ruleDict)
H
hjdhnx 已提交
367 368 369 370 371 372 373 374
    ruleDict['id'] = rule  # 把路由请求的id装到字典里,后面播放嗅探才能用
    # print(ruleDict)
    # print(rule)
    # print(type(rule))
    # print(ruleDict)
    logger.info(f'js装载耗时:{get_interval(t2)}毫秒')
    # print(ruleDict)
    # print(rule)
H
hjdhnx 已提交
375
    cms = CMS(ruleDict,db,RuleClass,PlayParse,cfg,ext)
H
hjdhnx 已提交
376 377 378 379
    wd = getParmas('wd')
    quick = getParmas('quick')
    play = getParmas('play') # 类型为4的时候点击播放会带上来
    flag = getParmas('flag') # 类型为4的时候点击播放会带上来
H
hjdhnx 已提交
380
    # myfilter = getParmas('filter')
H
hjdhnx 已提交
381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
    t = getParmas('t')
    pg = getParmas('pg','1')
    pg = int(pg)
    q = getParmas('q')
    play_url = getParmas('play_url')

    if play:
        jxs = getJxs()
        play_url = play.split('play_url=')[1]
        play_url = cms.playContent(play_url, jxs,flag)
        if isinstance(play_url, str):
            # return redirect(play_url)
            # return jsonify({'parse': 0, 'playUrl': play_url, 'jx': 0, 'url': play_url})
            # return jsonify({'parse': 0, 'playUrl': play_url, 'jx': 0, 'url': ''})
            return jsonify({'parse': 0, 'playUrl': '', 'jx': 0, 'url': play_url})
        elif isinstance(play_url, dict):
            return jsonify(play_url)
        else:
            return play_url

    if play_url:  # 播放
        jxs = getJxs()
        play_url = cms.playContent(play_url,jxs)
        if isinstance(play_url,str):
            return redirect(play_url)
        elif isinstance(play_url,dict):
            return jsonify(play_url)
        else:
            return play_url

H
hjdhnx 已提交
411 412 413 414 415 416 417
    if ac and t:  # 一级
        fl = {}
        if filters and filters.find('{') > -1 and filters.find('}') > -1:
            fl = json.loads(filters)
        # print(filters,type(filters))
        # print(fl,type(fl))
        data = cms.categoryContent(t,pg,fl)
H
hjdhnx 已提交
418 419 420 421
        # print(data)
        return jsonify(data)
    if ac and ids: # 二级
        id_list = ids.split(',')
H
hjdhnx 已提交
422
        show_name = False
H
hjdhnx 已提交
423 424
        if ids.find('#') > -1:
            id_list = list(map(lambda x:x.split('#')[0],id_list))
H
hjdhnx 已提交
425
            show_name = True
H
hjdhnx 已提交
426 427
        # print('app:377',len(id_list))
        # print(id_list)
H
hjdhnx 已提交
428
        data = cms.detailContent(pg,id_list,show_name)
H
hjdhnx 已提交
429 430 431
        # print(data)
        return jsonify(data)
    if wd: # 搜索
H
hjdhnx 已提交
432
        if rule == 'drpy':
433
            print(f'准备单独处理聚合搜索:{wd}')
H
hjdhnx 已提交
434
            return multi_search(wd)
H
hjdhnx 已提交
435
            # return multi_search2(wd)
H
hjdhnx 已提交
436 437 438 439
        else:
            data = cms.searchContent(wd)
            # print(data)
            return jsonify(data)
H
hjdhnx 已提交
440 441 442
    # return jsonify({'rule':rule,'js_code':js_code})
    home_data = cms.homeContent(pg)
    return jsonify(home_data)