调试真不卡

69e781d1 · hjdhnx · cd9b48db · 69e781d1 · 69e781d1 · 69e781d1
隐藏空白更改
内联并排

Showing with 92 addition and 26 deletion

base/rules.db base/rules.db +0 -0

controllers/parse.py controllers/parse.py +20 -1

txt/pluto/drpy.js txt/pluto/drpy.js +72 -25

未找到文件。
--- a/base/rules.db
+++ b/base/rules.db
--- a/controllers/parse.py
+++ b/controllers/parse.py
@@ -7,7 +7,9 @@ from flask import Blueprint, jsonify,redirect
 from utils.web import getParmas,get_interval
 import os
 from utils.log import logger
+from utils.encode import OcrApi
 from utils.pyctx import py_ctx,getPreJs,runJScode,JsObjectWrapper,PyJsString,parseText,jsoup,time
+import base64

 parse = Blueprint("parse", __name__)

@@ -89,4 +91,21 @@ def parse_home(filename):
    except Exception as e:
        msg = f'{filename}解析出错:{e}'
        logger.info(msg)
-        return R.failed(msg,extra={'time':f'{get_interval(t1)}毫秒','from':url})
\ No newline at end of file
+        return R.failed(msg,extra={'time':f'{get_interval(t1)}毫秒','from':url})
+
+@parse.route('/ocr',methods=['POST'])
+def base64_ocr():
+    # print('params:',getParmas())
+    img = getParmas('img')
+    # print(img)
+    img_bytes = base64.b64decode(img)
+    # print(img_bytes)
+    img_path = 'txt/pluto'
+    os.makedirs(img_path,exist_ok=True)
+    with open(f'{img_path}/yzm.png','wb+') as f:
+        f.write(img_bytes)
+    ocr = OcrApi('http://dm.mudery.com:10000')
+    code = ocr.classification(img_bytes)
+    resp = R.success('识别成功',code)
+    print(resp.json)
+    return resp
\ No newline at end of file
--- a/txt/pluto/drpy.js
+++ b/txt/pluto/drpy.js
@@ -8,13 +8,15 @@ import ch from './cheerio.min.js';
 const key = 'drpy_zbk';

 function init_test(){
+    console.log("init_test_start");
+    // clearItem(RULE_CK);
    console.log(JSON.stringify(rule));
    // console.log(request('https://www.baidu.com',{withHeaders:true}));
-    console.log(request('https://www.baidu.com/favicon.ico',{toBase64:true}));
-    console.log("init_test");
-    require('http://192.168.10.99:5705/txt/pluto/drT.js');
-    console.log(typeof(drT));
-    console.log(drT.renderText('{{fl.cate}},hi, {{fl}}哈哈.{{fl}}',{sort: 1,cate:'movie'},'fl'));
+    // console.log(request('https://www.baidu.com/favicon.ico',{toBase64:true}));
+    // require('http://192.168.10.99:5705/txt/pluto/drT.js');
+    // console.log(typeof(drT));
+    // console.log(drT.renderText('{{fl.cate}},hi, {{fl}}哈哈.{{fl}}',{sort: 1,cate:'movie'},'fl'));
+    console.log("init_test_end");
 }

 let rule = {
@@ -71,8 +73,9 @@ const RULE_CK = 'cookie'; // 源cookie的key值
 const KEY = typeof(key)!=='undefined'&&key?key:'drpy_'+rule.title; // 源的唯一标识
 const CATE_EXCLUDE = '首页|留言|APP|下载|资讯|新闻|动态';
 const TAB_EXCLUDE = '猜你|喜欢|APP|下载|剧情|热播';
-const OCR_RETRY = 3;//ocr验证重试次数
-const OCR_API = 'http://dm.mudery.com:10000';//ocr在线识别接口
+const OCR_RETRY = 1;//ocr验证重试次数
+// const OCR_API = 'http://dm.mudery.com:10000';//ocr在线识别接口
+const OCR_API = 'http://192.168.3.239:5705/parse/ocr';//ocr在线识别接口
 var MY_URL; // 全局注入变量,pd函数需要

 /** 处理一下 rule规则关键字段没传递的情况 **/
@@ -115,7 +118,9 @@ var OcrApi={
    classification:function (img){ // img是byte类型,这里不方便搞啊
        let code = '';
        try {
-            code = request(this.api,{data:img,headers:{'user-agent':PC_UA},'method':'POST'});
+            let html = request(this.api,{data:{img:img},headers:{'User-Agent':PC_UA},'method':'POST'});
+            html = JSON.parse(html);
+            code = html.url||'';
        }catch (e) {}
        return code
    }
@@ -132,17 +137,35 @@ function verifyCode(url){
    while (cnt < OCR_RETRY){
        try{
            // let obj = {headers:headers,timeout:timeout};
-            let img = request(`${host}/index.php/verify/index.html`);
+            let yzm_url = `${host}/index.php/verify/index.html`;
+            console.log(`验证码链接:${yzm_url}`);
+            let hhtml = request(yzm_url,{withHeaders:true,toBase64:true});
+            let json = JSON.parse(hhtml);
+            if(!cookie){
+                cookie = json['set-cookie']?json['set-cookie'].split(';')[0]:'';
+            }
+            // console.log(hhtml);
+            console.log('cookie:'+cookie);
+            let img = json.body;
+            // console.log(img);
            let code = OcrApi.classification(img);
            console.log(`第${cnt+1}次验证码识别结果:${code}`);
-            let html = request(`${host}/index.php/ajax/verify_check?type=search&verify=${code}`,{'method':'POST'});
+            let submit_url = `${host}/index.php/ajax/verify_check?type=search&verify=${code}`;
+            console.log(submit_url);
+            let html = request(submit_url,{headers:{Cookie:cookie,'User-Agent':MOBILE_UA},'method':'POST'});
+            console.log(html);
            html = JSON.parse(html);
            if(html.msg === 'ok'){
-                cookie = '';
+                console.log(`第${cnt+1}次验证码提交成功`);
                return cookie // 需要返回cookie
+            }else if(html.msg!=='ok'&&cnt+1>=OCR_RETRY){
+                cookie = ''; // 需要清空返回cookie
            }
        }catch (e) {
-            console.log(`第${cnt+1}次验证码提交失败`)
+            console.log(`第${cnt+1}次验证码提交失败:${e.message}`);
+            if(cnt+1>=OCR_RETRY){
+                cookie = '';
+            }
        }
        cnt+=1
    }
@@ -459,13 +482,13 @@ function homeVodParse(homeVodObj){
        console.log('double:'+homeVodObj.double);
        if(homeVodObj.double){
            p[0] = p[0].trim().startsWith('json:')?p[0].replace('json:',''):p[0];
-            console.log(p[0]);
+            // console.log(p[0]);
            let items = pdfa(html, p[0]);
-            console.log(items.length);
+            // console.log(items.length);
            for(let item of items){
-                console.log(p[1]);
+                // console.log(p[1]);
                let items2 = pdfa(item,p[1]);
-                console.log(items2.length);
+                // console.log(items2.length);
                for(let item2 of items2){
                    try {
                        let title = pdfh(item2, p[2]);
@@ -552,7 +575,32 @@ function categoryParse(cateObj) {
        return '{}'
    }
    let d = [];
-    let url = cateObj.url.replaceAll('fyclass', cateObj.tid).replaceAll('fypage', cateObj.pg);
+    // let url = cateObj.url.replaceAll('fyclass', cateObj.tid).replaceAll('fypage', cateObj.pg);
+    let url = cateObj.url.replaceAll('fyclass', cateObj.tid);
+    if(rule.filter_url){
+        if(!/fyfilter/.test(url)){
+            if(!url.endsWith('&')&&!rule.filter_url.startsWith('&')){
+                url+='&'
+            }
+            url+=rule.filter_url;
+        }else{
+            url = url.replace('fyfilter', rule.filter_url);
+        }
+        url = drT.renderText(url,cateObj.filter);
+    }
+    if(/fypage/.test(url)){
+        if(url.includes('(')&&url.includes(')')){
+            let url_rep = url.match(/.*?\((.*)\)/)[1];
+            let cnt_page = url_rep.replaceAll('fypage', cateObj.pg);
+            eval(`let cnt_pg=${cnt_page}`);
+            url = url.replaceAll(url_rep,cnt_pg).replaceAll('(','').replaceAll(')','');
+        }else{
+            url = url.replaceAll('fypage',cateObj.pg);
+        }
+    }
+    if(cateObj.pg === 1 && url.includes('[')&&url.includes(']')){
+        url = url.split('[')[1].split(']')[0];
+    }
    MY_URL = url;
    // setItem('MY_URL',MY_URL);
    console.log(MY_URL);
@@ -750,20 +798,19 @@ function detailParse(detailObj){
                let p1 = p.lists.replaceAll('#idv', tab_name).replaceAll('#id', i);
                tab_ext = tab_ext.replaceAll('#idv', tab_name).replaceAll('#id', i);
                console.log(p1);
-                console.log(645);
-                console.log(html);
+                // console.log(html);
                let vodList = [];
                try {
-                    vodList =  pdfa(html, p1)
+                    vodList =  pdfa(html, p1);
+                    console.log('len(vodList):'+vodList.length);
                }catch (e) {
-                    console.log(e.message)
+                    // console.log(e.message);
                }
-                console.log(647);
-                console.log('len(vodList):'+vodList.length);
                let new_vod_list = [];
                let tabName = tab_ext?pdfh(html, tab_ext):tab_name;
+                console.log(tabName);
                vodList.forEach(it=>{
-                    new_vod_list.push(tabName+'$'+pD(it,'a&&href',MY_URL));
+                    new_vod_list.push(pdfh(it,'body&&Text')+'$'+pD(it,'a&&href',MY_URL));
                });
                let vlist = new_vod_list.join('#');
                vod_tab_list.push(vlist);
@@ -871,7 +918,7 @@ function category(tid, pg, filter, extend) {
        url: urljoin(rule.host, rule.url),
        一级: rule.一级,
        tid: tid,
-        pg: pg,
+        pg: parseInt(pg),
        filter: filter,
        extend: extend
    };