parse.py 4.9 KB
Newer Older
H
hjdhnx 已提交
1 2 3 4 5
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File  : parse.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Date  : 2022/9/24
H
hjdhnx 已提交
6
from flask import Blueprint, jsonify,redirect,make_response
H
hjdhnx 已提交
7 8
from utils.web import getParmas,get_interval
import os
H
hjdhnx 已提交
9
from utils.cfg import cfg
H
hjdhnx 已提交
10
from utils.log import logger
H
hjdhnx 已提交
11
from utils.encode import OcrApi,base64ToImage
H
hjdhnx 已提交
12
from controllers.service import storage_service
H
hjdhnx 已提交
13
from utils.pyctx import py_ctx,getPreJs,runJScode,JsObjectWrapper,PyJsString,parseText,jsoup,time
H
hjdhnx 已提交
14
from utils.env import get_env
H
hjdhnx 已提交
15
import base64
H
hjdhnx 已提交
16 17 18 19 20 21 22 23 24

parse = Blueprint("parse", __name__)

class R(object):

    @classmethod
    def ok(self, msg='操作成功', url=None, extra=None):
        if extra is None:
            extra = {}
H
小改  
hjdhnx 已提交
25
        result = {"code": 200, "msg": msg, "url":url,"header":{"user-agent":"Mozilla/5.0"}}
H
hjdhnx 已提交
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
        result.update(extra)
        return jsonify(result)

    @classmethod
    def error(self,msg="系统异常",code=404,extra=None):
        if extra is None:
            extra = {}
        result = {"code": code, "msg": msg}
        result.update(extra)
        return jsonify(result)

    @classmethod
    def success(self,msg='操作成功', url=None,extra=None):
        return self.ok(msg,url,extra)

    @classmethod
    def failed(self,msg="系统异常", code=404,extra=None):
        return self.error(msg,code,extra)

H
hjdhnx 已提交
45 46 47 48 49 50 51 52
def 重定向(url:str):
    if isinstance(url, PyJsString):
        url = parseText(str(url))
    if str(url).startswith('http'):
        return f'redirect://{url}'
    else:
        return str(url)

H
hjdhnx 已提交
53 54 55 56 57
def toast(url:str):
    if isinstance(url, PyJsString):
        url = parseText(str(url))
    return f'toast://{url}'

H
hjdhnx 已提交
58 59 60 61 62
def image(text:str):
    if isinstance(text, PyJsString):
        text = parseText(str(text))
    return f'image://{text}'

H
hjdhnx 已提交
63 64 65 66
@parse.route('/api/<path:filename>')
def parse_home(filename):
    url = getParmas('url')
    # http://localhost:5705/parse/api/%E6%97%A0%E5%90%8D.js?url=https://www.iqiyi.com/v_ik3832z0go.html
H
hjdhnx 已提交
67
    # http://localhost:5705/parse/api/哔哩.js?url=https://www.bilibili.com/bangumi/play/ep704873
H
hjdhnx 已提交
68 69 70 71 72 73 74 75 76 77
    if not url or not url.startswith('http'):
        return R.failed(f'url必填!{url},且必须是http开头')
    base_path = 'jiexi'
    os.makedirs(base_path, exist_ok=True)
    file_path = os.path.join(base_path, filename)
    if not os.path.exists(file_path):
        return R.failed(f'{file_path}文件不存在')
    logger.info(f'开始尝试通过{filename}解析:{url}')

    jsp = jsoup(url)
H
hjdhnx 已提交
78
    env = get_env()
H
hjdhnx 已提交
79 80 81
    py_ctx.update({
        'vipUrl': url,
        'fetch_params': {'headers': {'Referer':url}, 'timeout': 10, 'encoding': 'utf-8'},
H
hjdhnx 已提交
82
        'jsp':jsp,
H
hjdhnx 已提交
83 84
        '重定向':重定向,
        'toast':toast,
H
hjdhnx 已提交
85
        'env':env,
H
hjdhnx 已提交
86
        'image':image,
H
hjdhnx 已提交
87 88 89 90
        'print':print,
        'log':logger.info,
        'getParmas':getParmas,
        'params':getParmas()
H
hjdhnx 已提交
91 92 93 94 95 96 97 98 99 100 101 102 103 104
    })
    ctx = py_ctx
    with open(file_path,encoding='utf-8') as f:
        code = f.read()
    jscode = getPreJs() + code.strip().replace('js:', '', 1)
    # print(jscode)
    t1 = time()
    try:
        loader, _ = runJScode(jscode, ctx=ctx)
        realUrl = loader.eval('realUrl')
        if not realUrl:
            return R.failed(f'解析失败:{realUrl}')
        if isinstance(realUrl, PyJsString):
            realUrl = parseText(str(realUrl))
H
hjdhnx 已提交
105 106
        if not realUrl or realUrl == url:
            return R.failed(f'解析失败',extra={'from':realUrl})
H
hjdhnx 已提交
107 108 109
        # print(realUrl)
        if str(realUrl).startswith('redirect://'):
            return redirect(realUrl.split('redirect://')[1])
H
hjdhnx 已提交
110 111
        elif str(realUrl).startswith('toast://'):
            return R.failed(str(realUrl).split('toast://')[1],extra={'from':url})
H
hjdhnx 已提交
112 113 114 115 116
        elif str(realUrl).startswith('image://'):
            img_data = base64ToImage(str(realUrl).split('image://')[1])
            response = make_response(img_data)
            response.headers['Content-Type'] = 'image/jpeg'
            return response
H
hjdhnx 已提交
117
        return R.success(f'{filename}解析成功',realUrl,{'time':f'{get_interval(t1)}毫秒','from':url})
H
hjdhnx 已提交
118 119 120
    except Exception as e:
        msg = f'{filename}解析出错:{e}'
        logger.info(msg)
H
hjdhnx 已提交
121 122 123 124
        return R.failed(msg,extra={'time':f'{get_interval(t1)}毫秒','from':url})

@parse.route('/ocr',methods=['POST'])
def base64_ocr():
H
hjdhnx 已提交
125 126
    lsg = storage_service()
    ocr_api = lsg.getItem('OCR_API',cfg.OCR_API)
H
hjdhnx 已提交
127
    # print(ocr_api)
H
hjdhnx 已提交
128 129 130
    # print('params:',getParmas())
    img = getParmas('img')
    # print(img)
H
hjdhnx 已提交
131 132 133 134 135 136
    if not img:
        return R.failed('识别失败:缺少img参数')
    try:
        img_bytes = base64.b64decode(img)
    except:
        return R.failed('识别失败:img参数不是正确的base64格式')
H
hjdhnx 已提交
137 138 139 140 141
    # print(img_bytes)
    img_path = 'txt/pluto'
    os.makedirs(img_path,exist_ok=True)
    with open(f'{img_path}/yzm.png','wb+') as f:
        f.write(img_bytes)
H
hjdhnx 已提交
142
    ocr = OcrApi(ocr_api)
H
hjdhnx 已提交
143 144 145 146
    code = ocr.classification(img_bytes)
    resp = R.success('识别成功',code)
    print(resp.json)
    return resp