去除ddddocr依赖,增加在线验证码接口配置

38092847 · hjdhnx · 21d52490 · 38092847 · 38092847 · 38092847
隐藏空白更改
内联并排

Showing with 167 addition and 5 deletion

classes/cms.py classes/cms.py +2 -1

config.py config.py +1 -0

py/flaskOcrDz.py py/flaskOcrDz.py +148 -0

utils/encode.py utils/encode.py +16 -4

未找到文件。
--- a/classes/cms.py
+++ b/classes/cms.py
@@ -38,6 +38,7 @@ class CMS:
        self.play_disable = new_conf.get('PLAY_DISABLE',False)
        self.retry_count = new_conf.get('RETRY_CNT',3)
        self.lazy_mode = new_conf.get('LAZYPARSE_MODE')
+        self.ocr_api = new_conf.get('OCR_API')
        try:
            self.vod = redirect(url_for('vod')).headers['Location']
        except:
@@ -679,7 +680,7 @@ class CMS:
            html = r.text
            # print(html)
            if html.find('输入验证码') > -1:
-                cookie = verifyCode(url,self.headers,self.timeout,self.retry_count)
+                cookie = verifyCode(url,self.headers,self.timeout,self.retry_count,self.ocr_api)
                # cookie = ''
                if not cookie:
                    return {

--- a/config.py
+++ b/config.py
@@ -25,4 +25,5 @@ WALL_PAPER_ENABLE = True  # 启用自定义壁纸
 WALL_PAPER = "https://picsum.photos/1280/720/?blur=10"  # 自定义壁纸,可注释
 SUP_PORT = 9001  # supervisord 服务端口
 RETRY_CNT = 3 # 验证码重试次数
+OCR_API = 'http://192.168.3.224:9000/api/ocr_img' # 验证码识别接口,传参数data
 # {% if config.WALL_PAPER %}"wallpaper":"{{ config.WALL_PAPER }}",{% endif %}
\ No newline at end of file
--- a/py/flaskOcrDz.py
+++ b/py/flaskOcrDz.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# File  : flaskOcrDz.py
+# Author: DaShenHan&道长-----先苦后甜，任凭晚风拂柳颜------
+# Date  : 2021/11/1
+import json
+
+from flask import Flask, jsonify, request,Response
+import requests
+import ddddocr
+ocr = ddddocr.DdddOcr()
+
+app = Flask(__name__)
+app.config["JSON_AS_ASCII"] = False  # jsonify返回的中文正常显示
+
+@app.route("/",methods=['GET'])
+def index():
+    return '欢迎使用简单验证码文字识别,海阔视界道长专用'
+
+def hexStringTobytes(str):
+    str = str.replace(" ", "")
+    print(str)
+    return bytes.fromhex(str)
+
+def bytesToHexString(bs):
+    return ''.join(['%02X ' % b for b in bs])
+
+class LocalOcr:
+    def __init__(self,file=None,url='http://127.0.0.1:10000'):
+        self.file = file
+        self.url = url
+        self.yzm = self.yzm_ocr(0)
+
+    def read(self):
+        return self.yzm
+
+    def yzm_ocr(self,count=0):
+        try:
+            r = requests.post(self.url, data=self.file,timeout=(0.5,2))
+            yzm = r.text
+            return yzm
+        except:
+            if count < 3:
+                count += 1
+                return self.yzm_ocr(count)
+            else:
+                return ""
+
+def ocr2(hex):
+    if type(hex) == list:
+        hex = ''.join(hex)
+    img_bytes = hexStringTobytes(hex)
+    # print(img_bytes)
+    # with open('1.png','wb+') as f:
+    #     f.write(img_bytes)
+
+    dm_url = 'http://dm.mudery.com:10000'
+    im_ocr = LocalOcr(img_bytes, dm_url)
+    yzm = im_ocr.read()
+    ret = {'msg': 'ok','ret':yzm,'code':0,'detail':'验证码识别成功'}
+    print(ret)
+    return jsonify(ret)
+
+def docr(hex):
+    if type(hex) == list:
+        hex = ''.join(hex)
+    img_bytes = hexStringTobytes(hex)
+    try:
+        img_str = img_bytes.decode("latin1")
+        if img_str.find('html') > -1:
+            ret = {'msg': '拜托,我收到你传过来的数据是个网页而不是图片,麻烦解密后把图片的hex给我', 'ret': img_str[:500], 'code': -1, 'detail': '图片识别失败'}
+            return jsonify(ret)
+        else:
+            res = ocr.classification(img_bytes)
+            ret = {'msg': 'ok', 'ret': res, 'code': 0, 'detail': '图片识别成功'}
+            # print(ret)
+            return jsonify(ret)
+    except Exception as e:
+        # print(f'{e}')
+        ret = {'msg': 'error', 'ret': f'{e}', 'code': -2, 'detail': '发生了意外的错误'}
+        return ret
+
+@app.route("/api/ocr",methods=['GET', 'POST'])
+def ocr_fast():
+    args = {}
+    try:
+        ctp = request.content_type
+        if request.method == 'POST':
+            if ctp.find('application/json') > -1:
+                try:
+                    args = request.json
+                except Exception as e:
+                    # args = request.get_data(as_text=True)
+                    args = {}
+            else:
+                args = request.form
+        elif request.method == 'GET':
+            args = request.args
+        if not args.get('hex'):
+            return '缺少必传参数:hex!'
+    except Exception as e:
+        return jsonify({'msg':'非法调用','code':'-1'})
+    # print(args.get('hex'))
+    # return ocr2(args.get('hex'))
+    return docr(args.get('hex'))
+
+@app.route("/api/hex2img",methods=['GET'])
+def ocr_hex2img():
+    try:
+        args = request.args
+        if not args.get('hex'):
+            return '缺少必传参数:hex!'
+    except Exception as e:
+        return jsonify({'msg':'非法调用','code':'-1'})
+    # print(args.get('hex'))
+    # return ocr2(args.get('hex'))
+    hex = args.get('hex')
+    if type(hex) == list:
+        hex = ''.join(hex)
+    img_bytes = hexStringTobytes(hex)
+    resp = Response(img_bytes, mimetype='image/jpeg')
+    return resp
+
+@app.route("/api/ocr_img",methods=['POST'])
+def ocr_img_fast():
+    # print(request.values)
+    # print(request.files)
+    # print(request.data)
+    try:
+        img_bytes = request.data
+        ret = ocr.classification(img_bytes)
+        return ret
+        # return jsonify({'ret':ret,'code':0,'msg':'识别完毕'})
+    except Exception as e:
+        return ''
+        # return jsonify({'msg':'请求出错','code':-1,'detail':f'{e}'})
+
+def test():
+    pic = 'yzm1.png'
+    # pic = '2.png'
+    with open(pic, 'rb') as f:
+        img_bytes = f.read()
+    res = ocr.classification(img_bytes)
+    print(res)
+
+if __name__ == '__main__':
+    app.run(host="0.0.0.0", port=9000)
+    # test()
\ No newline at end of file
--- a/utils/encode.py
+++ b/utils/encode.py
@@ -10,7 +10,6 @@ import requests.utils
 from time import sleep
 import os
 from utils.web import UC_UA,PC_UA
-# import ddddocr

 def getPreJs():
    base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))  # 上级目
@@ -36,15 +35,28 @@ def getHome(url):
    homeUrl = urls[0] + '//' + urls[1].split('/')[0]
    return homeUrl

-def verifyCode(url,headers,timeout=5,total_cnt=3):
+class OcrApi:
+    def __init__(self,api):
+        self.api = api
+
+    def classification(self,img):
+        try:
+            code = requests.post(self.api,data=img,headers={'user-agent':PC_UA}).text
+        except Exception as e:
+            print(f'ocr识别发生错误:{e}')
+            code = ''
+        return code
+
+def verifyCode(url,headers,timeout=5,total_cnt=3,api=None):
+    if not api:
+        api = 'http://192.168.3.224:9000/api/ocr_img'
    lower_keys = list(map(lambda x: x.lower(), headers.keys()))
    host = getHome(url)
    if not 'referer' in lower_keys:
        headers['Referer'] = host
    print(f'开始自动过验证,请求头:{headers}')
    cnt = 0
-    import ddddocr
-    ocr = ddddocr.DdddOcr()
+    ocr = OcrApi(api)
    while cnt < total_cnt:
        s = requests.session()
        try: