From 3809284778e1a5f87e497b42c23174e0175ec57e Mon Sep 17 00:00:00 2001 From: hjdhnx Date: Wed, 31 Aug 2022 01:28:13 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8E=BB=E9=99=A4ddddocr=E4=BE=9D=E8=B5=96,?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=9C=A8=E7=BA=BF=E9=AA=8C=E8=AF=81=E7=A0=81?= =?UTF-8?q?=E6=8E=A5=E5=8F=A3=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- classes/cms.py | 3 +- config.py | 1 + py/flaskOcrDz.py | 148 +++++++++++++++++++++++++++++++++++++++++++++++ utils/encode.py | 20 +++++-- 4 files changed, 167 insertions(+), 5 deletions(-) create mode 100644 py/flaskOcrDz.py diff --git a/classes/cms.py b/classes/cms.py index e25bc2a..a17db11 100644 --- a/classes/cms.py +++ b/classes/cms.py @@ -38,6 +38,7 @@ class CMS: self.play_disable = new_conf.get('PLAY_DISABLE',False) self.retry_count = new_conf.get('RETRY_CNT',3) self.lazy_mode = new_conf.get('LAZYPARSE_MODE') + self.ocr_api = new_conf.get('OCR_API') try: self.vod = redirect(url_for('vod')).headers['Location'] except: @@ -679,7 +680,7 @@ class CMS: html = r.text # print(html) if html.find('输入验证码') > -1: - cookie = verifyCode(url,self.headers,self.timeout,self.retry_count) + cookie = verifyCode(url,self.headers,self.timeout,self.retry_count,self.ocr_api) # cookie = '' if not cookie: return { diff --git a/config.py b/config.py index 9b7263d..25388e6 100644 --- a/config.py +++ b/config.py @@ -25,4 +25,5 @@ WALL_PAPER_ENABLE = True # 启用自定义壁纸 WALL_PAPER = "https://picsum.photos/1280/720/?blur=10" # 自定义壁纸,可注释 SUP_PORT = 9001 # supervisord 服务端口 RETRY_CNT = 3 # 验证码重试次数 +OCR_API = 'http://192.168.3.224:9000/api/ocr_img' # 验证码识别接口,传参数data # {% if config.WALL_PAPER %}"wallpaper":"{{ config.WALL_PAPER }}",{% endif %} \ No newline at end of file diff --git a/py/flaskOcrDz.py b/py/flaskOcrDz.py new file mode 100644 index 0000000..abf08f2 --- /dev/null +++ b/py/flaskOcrDz.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# File : flaskOcrDz.py +# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------ +# Date : 2021/11/1 +import json + +from flask import Flask, jsonify, request,Response +import requests +import ddddocr +ocr = ddddocr.DdddOcr() + +app = Flask(__name__) +app.config["JSON_AS_ASCII"] = False # jsonify返回的中文正常显示 + +@app.route("/",methods=['GET']) +def index(): + return '欢迎使用简单验证码文字识别,海阔视界道长专用' + +def hexStringTobytes(str): + str = str.replace(" ", "") + print(str) + return bytes.fromhex(str) + +def bytesToHexString(bs): + return ''.join(['%02X ' % b for b in bs]) + +class LocalOcr: + def __init__(self,file=None,url='http://127.0.0.1:10000'): + self.file = file + self.url = url + self.yzm = self.yzm_ocr(0) + + def read(self): + return self.yzm + + def yzm_ocr(self,count=0): + try: + r = requests.post(self.url, data=self.file,timeout=(0.5,2)) + yzm = r.text + return yzm + except: + if count < 3: + count += 1 + return self.yzm_ocr(count) + else: + return "" + +def ocr2(hex): + if type(hex) == list: + hex = ''.join(hex) + img_bytes = hexStringTobytes(hex) + # print(img_bytes) + # with open('1.png','wb+') as f: + # f.write(img_bytes) + + dm_url = 'http://dm.mudery.com:10000' + im_ocr = LocalOcr(img_bytes, dm_url) + yzm = im_ocr.read() + ret = {'msg': 'ok','ret':yzm,'code':0,'detail':'验证码识别成功'} + print(ret) + return jsonify(ret) + +def docr(hex): + if type(hex) == list: + hex = ''.join(hex) + img_bytes = hexStringTobytes(hex) + try: + img_str = img_bytes.decode("latin1") + if img_str.find('html') > -1: + ret = {'msg': '拜托,我收到你传过来的数据是个网页而不是图片,麻烦解密后把图片的hex给我', 'ret': img_str[:500], 'code': -1, 'detail': '图片识别失败'} + return jsonify(ret) + else: + res = ocr.classification(img_bytes) + ret = {'msg': 'ok', 'ret': res, 'code': 0, 'detail': '图片识别成功'} + # print(ret) + return jsonify(ret) + except Exception as e: + # print(f'{e}') + ret = {'msg': 'error', 'ret': f'{e}', 'code': -2, 'detail': '发生了意外的错误'} + return ret + +@app.route("/api/ocr",methods=['GET', 'POST']) +def ocr_fast(): + args = {} + try: + ctp = request.content_type + if request.method == 'POST': + if ctp.find('application/json') > -1: + try: + args = request.json + except Exception as e: + # args = request.get_data(as_text=True) + args = {} + else: + args = request.form + elif request.method == 'GET': + args = request.args + if not args.get('hex'): + return '缺少必传参数:hex!' + except Exception as e: + return jsonify({'msg':'非法调用','code':'-1'}) + # print(args.get('hex')) + # return ocr2(args.get('hex')) + return docr(args.get('hex')) + +@app.route("/api/hex2img",methods=['GET']) +def ocr_hex2img(): + try: + args = request.args + if not args.get('hex'): + return '缺少必传参数:hex!' + except Exception as e: + return jsonify({'msg':'非法调用','code':'-1'}) + # print(args.get('hex')) + # return ocr2(args.get('hex')) + hex = args.get('hex') + if type(hex) == list: + hex = ''.join(hex) + img_bytes = hexStringTobytes(hex) + resp = Response(img_bytes, mimetype='image/jpeg') + return resp + +@app.route("/api/ocr_img",methods=['POST']) +def ocr_img_fast(): + # print(request.values) + # print(request.files) + # print(request.data) + try: + img_bytes = request.data + ret = ocr.classification(img_bytes) + return ret + # return jsonify({'ret':ret,'code':0,'msg':'识别完毕'}) + except Exception as e: + return '' + # return jsonify({'msg':'请求出错','code':-1,'detail':f'{e}'}) + +def test(): + pic = 'yzm1.png' + # pic = '2.png' + with open(pic, 'rb') as f: + img_bytes = f.read() + res = ocr.classification(img_bytes) + print(res) + +if __name__ == '__main__': + app.run(host="0.0.0.0", port=9000) + # test() \ No newline at end of file diff --git a/utils/encode.py b/utils/encode.py index 84d76fa..38f2fe6 100644 --- a/utils/encode.py +++ b/utils/encode.py @@ -10,7 +10,6 @@ import requests.utils from time import sleep import os from utils.web import UC_UA,PC_UA -# import ddddocr def getPreJs(): base_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) # 上级目 @@ -36,15 +35,28 @@ def getHome(url): homeUrl = urls[0] + '//' + urls[1].split('/')[0] return homeUrl -def verifyCode(url,headers,timeout=5,total_cnt=3): +class OcrApi: + def __init__(self,api): + self.api = api + + def classification(self,img): + try: + code = requests.post(self.api,data=img,headers={'user-agent':PC_UA}).text + except Exception as e: + print(f'ocr识别发生错误:{e}') + code = '' + return code + +def verifyCode(url,headers,timeout=5,total_cnt=3,api=None): + if not api: + api = 'http://192.168.3.224:9000/api/ocr_img' lower_keys = list(map(lambda x: x.lower(), headers.keys())) host = getHome(url) if not 'referer' in lower_keys: headers['Referer'] = host print(f'开始自动过验证,请求头:{headers}') cnt = 0 - import ddddocr - ocr = ddddocr.DdddOcr() + ocr = OcrApi(api) while cnt < total_cnt: s = requests.session() try: -- GitLab