vod.py 11.0 KB
Newer Older
H
hjdhnx 已提交
1 2 3 4 5
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File  : vod.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date  : 2022/9/6
H
hjdhnx 已提交
6
import json
H
hjdhnx 已提交
7 8 9

from flask import Blueprint,request,render_template,jsonify,make_response,redirect
from time import time
H
hjdhnx 已提交
10 11
from utils.web import getParmas,get_interval
from utils.cfg import cfg
H
hjdhnx 已提交
12 13 14 15 16 17 18 19
from js.rules import getRuleLists,getJxs
from base.R import R
from utils.log import logger
from utils import parser
from controllers.cms import CMS
from base.database import db
from models.ruleclass import RuleClass
from models.playparse import PlayParse
H
hjdhnx 已提交
20
from js.rules import getRules
H
hjdhnx 已提交
21
from controllers.service import storage_service
H
hjdhnx 已提交
22
from concurrent.futures import ThreadPoolExecutor,as_completed,thread  # 引入线程池
H
hjdhnx 已提交
23 24
from quickjs import Function,Context
import ujson
H
hjdhnx 已提交
25 26
vod = Blueprint("vod", __name__)

H
hjdhnx 已提交
27

H
hjdhnx 已提交
28
def search_one_py(rule, wd, before: str = ''):
H
hjdhnx 已提交
29 30 31
    t1 = time()
    if not before:
        with open('js/模板.js', encoding='utf-8') as f:
H
hjdhnx 已提交
32
            before = f.read().split('export')[0]
H
hjdhnx 已提交
33 34
    js_path = f'js/{rule}.js'
    try:
H
hjdhnx 已提交
35 36 37 38 39 40
        ctx, js_code = parser.runJs(js_path, before=before)
        if not js_code:
            return None
        ruleDict = ctx.rule.to_dict()
        ruleDict['id'] = rule  # 把路由请求的id装到字典里,后面播放嗅探才能用
        logger.info(f'规则{rule}装载耗时:{get_interval(t1)}毫秒')
H
hjdhnx 已提交
41 42 43 44 45 46 47
        cms = CMS(ruleDict, db, RuleClass, PlayParse, cfg)
        data = cms.searchContent(wd, show_name=True)
        return data
    except Exception as e:
        print(f'{rule}发生错误:{e}')
        return None

H
hjdhnx 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
def search_one(rule, wd, before: str = ''):
    t1 = time()
    if not before:
        with open('js/模板.js', encoding='utf-8') as f:
            before = f.read().split('export')[0]
    end_code = """\nif (rule.模板 && muban.hasOwnProperty(rule.模板)) {rule = Object.assign(muban[rule.模板], rule);}"""
    js_path = f'js/{rule}.js'
    ctx = Context()
    try:
        with open(js_path, encoding='utf-8') as f2:
            jscode = f2.read()
        jscode = before + jscode + end_code
        # print(jscode)
        ctx.eval(jscode)
        js_ret = ctx.get('rule')
        ruleDict = ujson.loads(js_ret.json())
        ruleDict['id'] = rule  # 把路由请求的id装到字典里,后面播放嗅探才能用
        logger.info(f'规则{rule}装载耗时:{get_interval(t1)}毫秒')
        cms = CMS(ruleDict, db, RuleClass, PlayParse, cfg)
        data = cms.searchContent(wd, show_name=True)
        return data
    except Exception as e:
        logger.info(f'{e}')
        return R.failed('爬虫规则加载失败')

H
hjdhnx 已提交
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
def multi_search2(wd):
    t1 = time()
    lsg = storage_service()
    try:
        timeout = round(int(lsg.getItem('SEARCH_TIMEOUT', 5000)) / 1000, 2)
    except:
        timeout = 5
    rules = getRules('js')['list']
    rule_names = list(map(lambda x: x['name'], rules))
    rules_exclude = ['drpy']
    new_rules = list(filter(lambda x: x.get('searchable', 0) and x.get('name', '') not in rules_exclude, rules))
    search_sites = [new_rule['name'] for new_rule in new_rules]
    nosearch_sites = set(rule_names) ^ set(search_sites)
    nosearch_sites.remove('drpy')
    # print(nosearch_sites)
    logger.info(f'开始聚搜{wd},共计{len(search_sites)}个规则,聚搜超时{timeout}秒')
    logger.info(f'不支持聚搜的规则,共计{len(nosearch_sites)}个规则:{",".join(nosearch_sites)}')
    # print(search_sites)
    res = []
    with open('js/模板.js', encoding='utf-8') as f:
H
hjdhnx 已提交
93
        before = f.read().split('export')[0]
H
hjdhnx 已提交
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
    logger.info(f'聚搜准备工作耗时:{get_interval(t1)}毫秒')
    t2 = time()
    thread_pool = ThreadPoolExecutor(len(search_sites))  # 定义线程池来启动多线程执行此任务
    obj_list = []
    try:
        for site in search_sites:
            obj = thread_pool.submit(search_one, site, wd, before)
            obj_list.append(obj)
        thread_pool.shutdown(wait=True)  # 等待所有子线程并行完毕
        vod_list = [obj.result() for obj in obj_list]
        for vod in vod_list:
            if vod and isinstance(vod, dict) and vod.get('list') and len(vod['list']) > 0:
                res.extend(vod['list'])
        result = {
            'list': res
        }
        logger.info(f'drpy聚搜{len(search_sites)}个源耗时{get_interval(t2)}毫秒,含准备共计耗时{get_interval(t1)}毫秒')
    except Exception as e:
        result = {
            'list': []
        }
        logger.info(f'drpy聚搜{len(search_sites)}个源耗时{get_interval(t2)}毫秒,含准备共计耗时:{get_interval(t1)}毫秒,发生错误:{e}')
    return jsonify(result)

H
hjdhnx 已提交
118

H
hjdhnx 已提交
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
def multi_search(wd):
    lsg = storage_service()
    t1 = time()
    try:
        timeout = round(int(lsg.getItem('SEARCH_TIMEOUT',5000))/1000,2)
    except:
        timeout = 5
    rules = getRules('js')['list']
    rule_names = list(map(lambda x:x['name'],rules))
    rules_exclude = ['drpy']
    new_rules = list(filter(lambda x: x.get('searchable', 0) and x.get('name', '') not in rules_exclude, rules))
    search_sites = [new_rule['name'] for new_rule in new_rules]
    nosearch_sites = set(rule_names) ^ set(search_sites)
    nosearch_sites.remove('drpy')
    # print(nosearch_sites)
    logger.info(f'开始聚搜{wd},共计{len(search_sites)}个规则,聚搜超时{timeout}秒')
    logger.info(f'不支持聚搜的规则,共计{len(nosearch_sites)}个规则:{",".join(nosearch_sites)}')
    # print(search_sites)
    res = []
    with open('js/模板.js', encoding='utf-8') as f:
H
hjdhnx 已提交
139
        before = f.read().split('export')[0]
H
hjdhnx 已提交
140 141 142 143 144
    with ThreadPoolExecutor(max_workers=len(search_sites)) as executor:
        to_do = []
        for site in search_sites:
            future = executor.submit(search_one, site, wd, before)
            to_do.append(future)
H
hjdhnx 已提交
145
        try:
H
hjdhnx 已提交
146 147 148 149 150 151 152 153 154 155 156 157 158 159
            for future in as_completed(to_do, timeout=timeout):  # 并发执行
                ret = future.result()
                # print(ret)
                if ret and isinstance(ret,dict) and ret.get('list'):
                    res.extend(ret['list'])
        except Exception as e:
            print(f'发生错误:{e}')
            import atexit
            atexit.unregister(thread._python_exit)
            executor.shutdown = lambda wait: None
    logger.info(f'drpy聚搜{len(search_sites)}个源共计耗时{get_interval(t1)}毫秒')
    return jsonify({
        "list": res
    })
H
hjdhnx 已提交
160

H
hjdhnx 已提交
161 162
@vod.route('/vod')
def vod_home():
H
hjdhnx 已提交
163 164
    t0 = time()
    rule = getParmas('rule')
H
hjdhnx 已提交
165 166 167 168 169
    ac = getParmas('ac')
    ids = getParmas('ids')
    if ac and ids and ids.find('#') > -1:  # 聚搜的二级
        id_list = ids.split(',')
        rule = id_list[0].split('#')[1]
H
hjdhnx 已提交
170
        # print(rule)
H
hjdhnx 已提交
171

H
hjdhnx 已提交
172
    ext = getParmas('ext')
H
hjdhnx 已提交
173
    filters = getParmas('f')
H
hjdhnx 已提交
174 175
    tp = getParmas('type')
    # print(f'type:{tp}')
H
hjdhnx 已提交
176 177
    # if not ext.startswith('http') and not rule:
    if not rule:
H
hjdhnx 已提交
178 179
        return R.failed('规则字段必填')
    rule_list = getRuleLists()
H
hjdhnx 已提交
180 181
    # if not ext.startswith('http') and not rule in rule_list:
    if not ext and not rule in rule_list:
H
hjdhnx 已提交
182 183 184 185
        msg = f'服务端本地仅支持以下规则:{",".join(rule_list)}'
        return R.failed(msg)
    # logger.info(f'检验耗时:{get_interval(t0)}毫秒')
    t1 = time()
H
hjdhnx 已提交
186 187
    # js_path = f'js/{rule}.js' if not ext.startswith('http') else ext
    js_path = f'js/{rule}.js' if not ext else ext
H
hjdhnx 已提交
188
    with open('js/模板.js', encoding='utf-8') as f:
H
hjdhnx 已提交
189
        before = f.read().split('export')[0]
H
hjdhnx 已提交
190
    # logger.info(f'js读取耗时:{get_interval(t1)}毫秒')
H
hjdhnx 已提交
191
    end_code = """\nif (rule.模板 && muban.hasOwnProperty(rule.模板)) {rule = Object.assign(muban[rule.模板], rule);}"""
H
hjdhnx 已提交
192 193
    logger.info(f'参数检验js读取共计耗时:{get_interval(t0)}毫秒')
    t2 = time()
H
hjdhnx 已提交
194 195 196 197 198 199 200 201 202 203 204 205 206


    # ctx, js_code = parser.runJs(js_path,before=before)
    # if not js_code:
    #     return R.failed('爬虫规则加载失败')
    # # rule = ctx.eval('rule')
    # # print(type(ctx.rule.lazy()),ctx.rule.lazy().toString())
    # ruleDict = ctx.rule.to_dict()

    ctx = Context()
    try:
        with open(js_path,encoding='utf-8') as f2:
            jscode = f2.read()
H
hjdhnx 已提交
207 208
        jscode = before + jscode + end_code
        # print(jscode)
H
hjdhnx 已提交
209 210 211 212 213
        ctx.eval(jscode)
        js_ret = ctx.get('rule')
        ruleDict = ujson.loads(js_ret.json())
    except Exception as e:
        logger.info(f'{e}')
H
hjdhnx 已提交
214 215
        return R.failed('爬虫规则加载失败')

H
hjdhnx 已提交
216 217
    # print(type(ruleDict))
    # print(ruleDict)
H
hjdhnx 已提交
218
    # print(ruleDict)
H
hjdhnx 已提交
219 220 221 222 223 224 225 226
    ruleDict['id'] = rule  # 把路由请求的id装到字典里,后面播放嗅探才能用
    # print(ruleDict)
    # print(rule)
    # print(type(rule))
    # print(ruleDict)
    logger.info(f'js装载耗时:{get_interval(t2)}毫秒')
    # print(ruleDict)
    # print(rule)
H
hjdhnx 已提交
227
    cms = CMS(ruleDict,db,RuleClass,PlayParse,cfg,ext)
H
hjdhnx 已提交
228 229 230 231
    wd = getParmas('wd')
    quick = getParmas('quick')
    play = getParmas('play') # 类型为4的时候点击播放会带上来
    flag = getParmas('flag') # 类型为4的时候点击播放会带上来
H
hjdhnx 已提交
232
    # myfilter = getParmas('filter')
H
hjdhnx 已提交
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
    t = getParmas('t')
    pg = getParmas('pg','1')
    pg = int(pg)
    q = getParmas('q')
    play_url = getParmas('play_url')

    if play:
        jxs = getJxs()
        play_url = play.split('play_url=')[1]
        play_url = cms.playContent(play_url, jxs,flag)
        if isinstance(play_url, str):
            # return redirect(play_url)
            # return jsonify({'parse': 0, 'playUrl': play_url, 'jx': 0, 'url': play_url})
            # return jsonify({'parse': 0, 'playUrl': play_url, 'jx': 0, 'url': ''})
            return jsonify({'parse': 0, 'playUrl': '', 'jx': 0, 'url': play_url})
        elif isinstance(play_url, dict):
            return jsonify(play_url)
        else:
            return play_url

    if play_url:  # 播放
        jxs = getJxs()
        play_url = cms.playContent(play_url,jxs)
        if isinstance(play_url,str):
            return redirect(play_url)
        elif isinstance(play_url,dict):
            return jsonify(play_url)
        else:
            return play_url

H
hjdhnx 已提交
263 264 265 266 267 268 269
    if ac and t:  # 一级
        fl = {}
        if filters and filters.find('{') > -1 and filters.find('}') > -1:
            fl = json.loads(filters)
        # print(filters,type(filters))
        # print(fl,type(fl))
        data = cms.categoryContent(t,pg,fl)
H
hjdhnx 已提交
270 271 272 273
        # print(data)
        return jsonify(data)
    if ac and ids: # 二级
        id_list = ids.split(',')
H
hjdhnx 已提交
274
        show_name = False
H
hjdhnx 已提交
275 276
        if ids.find('#') > -1:
            id_list = list(map(lambda x:x.split('#')[0],id_list))
H
hjdhnx 已提交
277
            show_name = True
H
hjdhnx 已提交
278 279
        # print('app:377',len(id_list))
        # print(id_list)
H
hjdhnx 已提交
280
        data = cms.detailContent(pg,id_list,show_name)
H
hjdhnx 已提交
281 282 283
        # print(data)
        return jsonify(data)
    if wd: # 搜索
H
hjdhnx 已提交
284 285 286
        if rule == 'drpy':
            # print(f'准备单独处理聚合搜索:{wd}')
            return multi_search(wd)
H
hjdhnx 已提交
287
            # return multi_search2(wd)
H
hjdhnx 已提交
288 289 290 291
        else:
            data = cms.searchContent(wd)
            # print(data)
            return jsonify(data)
H
hjdhnx 已提交
292 293 294
    # return jsonify({'rule':rule,'js_code':js_code})
    home_data = cms.homeContent(pg)
    return jsonify(home_data)