提交 322efb3d 编写于 作者: L LDOUBLEV

Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into dygraph

...@@ -54,7 +54,10 @@ PPOCRLabel can be started in two ways: whl package and Python script. The whl pa ...@@ -54,7 +54,10 @@ PPOCRLabel can be started in two ways: whl package and Python script. The whl pa
```bash ```bash
pip install PPOCRLabel # install pip install PPOCRLabel # install
PPOCRLabel # run
# Select label mode and run
PPOCRLabel # [Normal mode] for [detection + recognition] labeling
PPOCRLabel --kie True # [KIE mode] for [detection + recognition + keyword extraction] labeling
``` ```
> If you getting this error `OSError: [WinError 126] The specified module could not be found` when you install shapely on windows. Please try to download Shapely whl file using http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely. > If you getting this error `OSError: [WinError 126] The specified module could not be found` when you install shapely on windows. Please try to download Shapely whl file using http://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely.
...@@ -67,13 +70,18 @@ PPOCRLabel # run ...@@ -67,13 +70,18 @@ PPOCRLabel # run
```bash ```bash
pip3 install PPOCRLabel pip3 install PPOCRLabel
pip3 install trash-cli pip3 install trash-cli
PPOCRLabel
# Select label mode and run
PPOCRLabel # [Normal mode] for [detection + recognition] labeling
PPOCRLabel --kie True # [KIE mode] for [detection + recognition + keyword extraction] labeling
``` ```
#### MacOS #### MacOS
```bash ```bash
pip3 install PPOCRLabel pip3 install PPOCRLabel
pip3 install opencv-contrib-python-headless==4.2.0.32 pip3 install opencv-contrib-python-headless==4.2.0.32
# Select label mode and run
PPOCRLabel # [Normal mode] for [detection + recognition] labeling PPOCRLabel # [Normal mode] for [detection + recognition] labeling
PPOCRLabel --kie True # [KIE mode] for [detection + recognition + keyword extraction] labeling PPOCRLabel --kie True # [KIE mode] for [detection + recognition + keyword extraction] labeling
``` ```
...@@ -90,6 +98,8 @@ pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl ...@@ -90,6 +98,8 @@ pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl
```bash ```bash
cd ./PPOCRLabel # Switch to the PPOCRLabel directory cd ./PPOCRLabel # Switch to the PPOCRLabel directory
# Select label mode and run
python PPOCRLabel.py # [Normal mode] for [detection + recognition] labeling python PPOCRLabel.py # [Normal mode] for [detection + recognition] labeling
python PPOCRLabel.py --kie True # [KIE mode] for [detection + recognition + keyword extraction] labeling python PPOCRLabel.py --kie True # [KIE mode] for [detection + recognition + keyword extraction] labeling
``` ```
......
...@@ -57,7 +57,10 @@ PPOCRLabel可通过whl包与Python脚本两种方式启动,whl包形式启动 ...@@ -57,7 +57,10 @@ PPOCRLabel可通过whl包与Python脚本两种方式启动,whl包形式启动
```bash ```bash
pip install PPOCRLabel # 安装 pip install PPOCRLabel # 安装
PPOCRLabel --lang ch # 运行
# 选择标签模式来启动
PPOCRLabel --lang ch # 启动【普通模式】,用于打【检测+识别】场景的标签
PPOCRLabel --lang ch --kie True # 启动 【KIE 模式】,用于打【检测+识别+关键字提取】场景的标签
``` ```
> 注意:通过whl包安装PPOCRLabel会自动下载 `paddleocr` whl包,其中shapely依赖可能会出现 `[winRrror 126] 找不到指定模块的问题。` 的错误,建议从[这里](https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely)下载并安装 > 注意:通过whl包安装PPOCRLabel会自动下载 `paddleocr` whl包,其中shapely依赖可能会出现 `[winRrror 126] 找不到指定模块的问题。` 的错误,建议从[这里](https://www.lfd.uci.edu/~gohlke/pythonlibs/#shapely)下载并安装
##### Ubuntu Linux ##### Ubuntu Linux
...@@ -65,13 +68,18 @@ PPOCRLabel --lang ch # 运行 ...@@ -65,13 +68,18 @@ PPOCRLabel --lang ch # 运行
```bash ```bash
pip3 install PPOCRLabel pip3 install PPOCRLabel
pip3 install trash-cli pip3 install trash-cli
PPOCRLabel --lang ch
# 选择标签模式来启动
PPOCRLabel --lang ch # 启动【普通模式】,用于打【检测+识别】场景的标签
PPOCRLabel --lang ch --kie True # 启动 【KIE 模式】,用于打【检测+识别+关键字提取】场景的标签
``` ```
##### MacOS ##### MacOS
```bash ```bash
pip3 install PPOCRLabel pip3 install PPOCRLabel
pip3 install opencv-contrib-python-headless==4.2.0.32 # 如果下载过慢请添加"-i https://mirror.baidu.com/pypi/simple" pip3 install opencv-contrib-python-headless==4.2.0.32 # 如果下载过慢请添加"-i https://mirror.baidu.com/pypi/simple"
# 选择标签模式来启动
PPOCRLabel --lang ch # 启动【普通模式】,用于打【检测+识别】场景的标签 PPOCRLabel --lang ch # 启动【普通模式】,用于打【检测+识别】场景的标签
PPOCRLabel --lang ch --kie True # 启动 【KIE 模式】,用于打【检测+识别+关键字提取】场景的标签 PPOCRLabel --lang ch --kie True # 启动 【KIE 模式】,用于打【检测+识别+关键字提取】场景的标签
``` ```
...@@ -92,6 +100,8 @@ pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl -i https://mirror.baidu. ...@@ -92,6 +100,8 @@ pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl -i https://mirror.baidu.
```bash ```bash
cd ./PPOCRLabel # 切换到PPOCRLabel目录 cd ./PPOCRLabel # 切换到PPOCRLabel目录
# 选择标签模式来启动
python PPOCRLabel.py --lang ch # 启动【普通模式】,用于打【检测+识别】场景的标签 python PPOCRLabel.py --lang ch # 启动【普通模式】,用于打【检测+识别】场景的标签
python PPOCRLabel.py --lang ch --kie True # 启动 【KIE 模式】,用于打【检测+识别+关键字提取】场景的标签 python PPOCRLabel.py --lang ch --kie True # 启动 【KIE 模式】,用于打【检测+识别+关键字提取】场景的标签
``` ```
......
...@@ -45,8 +45,9 @@ public: ...@@ -45,8 +45,9 @@ public:
const double &det_db_thresh, const double &det_db_thresh,
const double &det_db_box_thresh, const double &det_db_box_thresh,
const double &det_db_unclip_ratio, const double &det_db_unclip_ratio,
const bool &use_polygon_score, const bool &visualize, const bool &use_polygon_score, const bool &use_dilation,
const bool &use_tensorrt, const std::string &precision) { const bool &visualize, const bool &use_tensorrt,
const std::string &precision) {
this->use_gpu_ = use_gpu; this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id; this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem; this->gpu_mem_ = gpu_mem;
...@@ -59,6 +60,7 @@ public: ...@@ -59,6 +60,7 @@ public:
this->det_db_box_thresh_ = det_db_box_thresh; this->det_db_box_thresh_ = det_db_box_thresh;
this->det_db_unclip_ratio_ = det_db_unclip_ratio; this->det_db_unclip_ratio_ = det_db_unclip_ratio;
this->use_polygon_score_ = use_polygon_score; this->use_polygon_score_ = use_polygon_score;
this->use_dilation_ = use_dilation;
this->visualize_ = visualize; this->visualize_ = visualize;
this->use_tensorrt_ = use_tensorrt; this->use_tensorrt_ = use_tensorrt;
...@@ -71,7 +73,8 @@ public: ...@@ -71,7 +73,8 @@ public:
void LoadModel(const std::string &model_dir); void LoadModel(const std::string &model_dir);
// Run predictor // Run predictor
void Run(cv::Mat &img, std::vector<std::vector<std::vector<int>>> &boxes, std::vector<double> *times); void Run(cv::Mat &img, std::vector<std::vector<std::vector<int>>> &boxes,
std::vector<double> *times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<Predictor> predictor_;
...@@ -88,6 +91,7 @@ private: ...@@ -88,6 +91,7 @@ private:
double det_db_box_thresh_ = 0.5; double det_db_box_thresh_ = 0.5;
double det_db_unclip_ratio_ = 2.0; double det_db_unclip_ratio_ = 2.0;
bool use_polygon_score_ = false; bool use_polygon_score_ = false;
bool use_dilation_ = false;
bool visualize_ = true; bool visualize_ = true;
bool use_tensorrt_ = false; bool use_tensorrt_ = false;
......
...@@ -4,16 +4,20 @@ ...@@ -4,16 +4,20 @@
C++在性能计算上优于python,因此,在大多数CPU、GPU部署场景,多采用C++的部署方式,本节将介绍如何在Linux\Windows (CPU\GPU)环境下配置C++环境并完成 C++在性能计算上优于python,因此,在大多数CPU、GPU部署场景,多采用C++的部署方式,本节将介绍如何在Linux\Windows (CPU\GPU)环境下配置C++环境并完成
PaddleOCR模型部署。 PaddleOCR模型部署。
* [1. 准备环境](#1) - [服务器端C++预测](#服务器端c预测)
+ [1.0 运行准备](#10) - [1. 准备环境](#1-准备环境)
+ [1.1 编译opencv库](#11) - [1.0 运行准备](#10-运行准备)
+ [1.2 下载或者编译Paddle预测库](#12) - [1.1 编译opencv库](#11-编译opencv库)
- [1.2.1 直接下载安装](#121) - [1.2 下载或者编译Paddle预测库](#12-下载或者编译paddle预测库)
- [1.2.2 预测库源码编译](#122) - [1.2.1 直接下载安装](#121-直接下载安装)
* [2 开始运行](#2) - [1.2.2 预测库源码编译](#122-预测库源码编译)
+ [2.1 将模型导出为inference model](#21) - [2 开始运行](#2-开始运行)
+ [2.2 编译PaddleOCR C++预测demo](#22) - [2.1 将模型导出为inference model](#21-将模型导出为inference-model)
+ [2.3运行demo](#23) - [2.2 编译PaddleOCR C++预测demo](#22-编译paddleocr-c预测demo)
- [2.3 运行demo](#23-运行demo)
- [1. 只调用检测:](#1-只调用检测)
- [2. 只调用识别:](#2-只调用识别)
- [3. 调用串联:](#3-调用串联)
<a name="1"></a> <a name="1"></a>
...@@ -103,7 +107,7 @@ opencv3/ ...@@ -103,7 +107,7 @@ opencv3/
#### 1.2.1 直接下载安装 #### 1.2.1 直接下载安装
* [Paddle预测库官网](https://paddle-inference.readthedocs.io/en/latest/user_guides/download_lib.html) 上提供了不同cuda版本的Linux预测库,可以在官网查看并选择合适的预测库版本(*建议选择paddle版本>=2.0.1版本的预测库* )。 * [Paddle预测库官网](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html#linux) 上提供了不同cuda版本的Linux预测库,可以在官网查看并选择合适的预测库版本(*建议选择paddle版本>=2.0.1版本的预测库* )。
* 下载之后使用下面的方法解压。 * 下载之后使用下面的方法解压。
...@@ -249,7 +253,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ...@@ -249,7 +253,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
|gpu_id|int|0|GPU id,使用GPU时有效| |gpu_id|int|0|GPU id,使用GPU时有效|
|gpu_mem|int|4000|申请的GPU内存| |gpu_mem|int|4000|申请的GPU内存|
|cpu_math_library_num_threads|int|10|CPU预测时的线程数,在机器核数充足的情况下,该值越大,预测速度越快| |cpu_math_library_num_threads|int|10|CPU预测时的线程数,在机器核数充足的情况下,该值越大,预测速度越快|
|use_mkldnn|bool|true|是否使用mkldnn库| |enable_mkldnn|bool|true|是否使用mkldnn库|
- 检测模型相关 - 检测模型相关
......
...@@ -78,7 +78,7 @@ opencv3/ ...@@ -78,7 +78,7 @@ opencv3/
#### 1.2.1 Direct download and installation #### 1.2.1 Direct download and installation
[Paddle inference library official website](https://paddle-inference.readthedocs.io/en/latest/user_guides/download_lib.html). You can review and select the appropriate version of the inference library on the official website. [Paddle inference library official website](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html#linux). You can review and select the appropriate version of the inference library on the official website.
* After downloading, use the following command to extract files. * After downloading, use the following command to extract files.
...@@ -231,7 +231,7 @@ More parameters are as follows, ...@@ -231,7 +231,7 @@ More parameters are as follows,
|gpu_id|int|0|GPU id when use_gpu is true| |gpu_id|int|0|GPU id when use_gpu is true|
|gpu_mem|int|4000|GPU memory requested| |gpu_mem|int|4000|GPU memory requested|
|cpu_math_library_num_threads|int|10|Number of threads when using CPU inference. When machine cores is enough, the large the value, the faster the inference speed| |cpu_math_library_num_threads|int|10|Number of threads when using CPU inference. When machine cores is enough, the large the value, the faster the inference speed|
|use_mkldnn|bool|true|Whether to use mkdlnn library| |enable_mkldnn|bool|true|Whether to use mkdlnn library|
- Detection related parameters - Detection related parameters
......
...@@ -28,14 +28,14 @@ ...@@ -28,14 +28,14 @@
#include <numeric> #include <numeric>
#include <glog/logging.h> #include <glog/logging.h>
#include <include/ocr_det.h>
#include <include/ocr_cls.h> #include <include/ocr_cls.h>
#include <include/ocr_det.h>
#include <include/ocr_rec.h> #include <include/ocr_rec.h>
#include <include/utility.h> #include <include/utility.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <gflags/gflags.h>
#include "auto_log/autolog.h" #include "auto_log/autolog.h"
#include <gflags/gflags.h>
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU."); DEFINE_bool(use_gpu, false, "Infering with GPU or CPU.");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute."); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute.");
...@@ -51,9 +51,10 @@ DEFINE_string(image_dir, "", "Dir of input image."); ...@@ -51,9 +51,10 @@ DEFINE_string(image_dir, "", "Dir of input image.");
DEFINE_string(det_model_dir, "", "Path of det inference model."); DEFINE_string(det_model_dir, "", "Path of det inference model.");
DEFINE_int32(max_side_len, 960, "max_side_len of input image."); DEFINE_int32(max_side_len, 960, "max_side_len of input image.");
DEFINE_double(det_db_thresh, 0.3, "Threshold of det_db_thresh."); DEFINE_double(det_db_thresh, 0.3, "Threshold of det_db_thresh.");
DEFINE_double(det_db_box_thresh, 0.5, "Threshold of det_db_box_thresh."); DEFINE_double(det_db_box_thresh, 0.6, "Threshold of det_db_box_thresh.");
DEFINE_double(det_db_unclip_ratio, 1.6, "Threshold of det_db_unclip_ratio."); DEFINE_double(det_db_unclip_ratio, 1.5, "Threshold of det_db_unclip_ratio.");
DEFINE_bool(use_polygon_score, false, "Whether use polygon score."); DEFINE_bool(use_polygon_score, false, "Whether use polygon score.");
DEFINE_bool(use_dilation, false, "Whether use the dilation on output map.");
DEFINE_bool(visualize, true, "Whether show the detection results."); DEFINE_bool(visualize, true, "Whether show the detection results.");
// classification related // classification related
DEFINE_bool(use_angle_cls, false, "Whether use use_angle_cls."); DEFINE_bool(use_angle_cls, false, "Whether use use_angle_cls.");
...@@ -62,281 +63,260 @@ DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh."); ...@@ -62,281 +63,260 @@ DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh.");
// recognition related // recognition related
DEFINE_string(rec_model_dir, "", "Path of rec inference model."); DEFINE_string(rec_model_dir, "", "Path of rec inference model.");
DEFINE_int32(rec_batch_num, 6, "rec_batch_num."); DEFINE_int32(rec_batch_num, 6, "rec_batch_num.");
DEFINE_string(char_list_file, "../../ppocr/utils/ppocr_keys_v1.txt", "Path of dictionary."); DEFINE_string(char_list_file, "../../ppocr/utils/ppocr_keys_v1.txt",
"Path of dictionary.");
using namespace std; using namespace std;
using namespace cv; using namespace cv;
using namespace PaddleOCR; using namespace PaddleOCR;
static bool PathExists(const std::string &path) {
static bool PathExists(const std::string& path){
#ifdef _WIN32 #ifdef _WIN32
struct _stat buffer; struct _stat buffer;
return (_stat(path.c_str(), &buffer) == 0); return (_stat(path.c_str(), &buffer) == 0);
#else #else
struct stat buffer; struct stat buffer;
return (stat(path.c_str(), &buffer) == 0); return (stat(path.c_str(), &buffer) == 0);
#endif // !_WIN32 #endif // !_WIN32
} }
int main_det(std::vector<cv::String> cv_all_img_names) { int main_det(std::vector<cv::String> cv_all_img_names) {
std::vector<double> time_info = {0, 0, 0}; std::vector<double> time_info = {0, 0, 0};
DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh, FLAGS_max_side_len, FLAGS_det_db_thresh,
FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
FLAGS_use_polygon_score, FLAGS_visualize, FLAGS_use_polygon_score, FLAGS_use_dilation, FLAGS_visualize,
FLAGS_use_tensorrt, FLAGS_precision); FLAGS_use_tensorrt, FLAGS_precision);
for (int i = 0; i < cv_all_img_names.size(); ++i) { for (int i = 0; i < cv_all_img_names.size(); ++i) {
// LOG(INFO) << "The predict img: " << cv_all_img_names[i]; // LOG(INFO) << "The predict img: " << cv_all_img_names[i];
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!srcimg.data) { if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl; std::cerr << "[ERROR] image read failed! image path: "
exit(1); << cv_all_img_names[i] << endl;
} exit(1);
std::vector<std::vector<std::vector<int>>> boxes;
std::vector<double> det_times;
det.Run(srcimg, boxes, &det_times);
time_info[0] += det_times[0];
time_info[1] += det_times[1];
time_info[2] += det_times[2];
if (FLAGS_benchmark) {
cout << cv_all_img_names[i] << '\t';
for (int n = 0; n < boxes.size(); n++) {
for (int m = 0; m < boxes[n].size(); m++) {
cout << boxes[n][m][0] << ' ' << boxes[n][m][1] << ' ';
}
}
cout << endl;
}
} }
std::vector<std::vector<std::vector<int>>> boxes;
std::vector<double> det_times;
det.Run(srcimg, boxes, &det_times);
time_info[0] += det_times[0];
time_info[1] += det_times[1];
time_info[2] += det_times[2];
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
AutoLogger autolog("ocr_det", cout << cv_all_img_names[i] << '\t';
FLAGS_use_gpu, for (int n = 0; n < boxes.size(); n++) {
FLAGS_use_tensorrt, for (int m = 0; m < boxes[n].size(); m++) {
FLAGS_enable_mkldnn, cout << boxes[n][m][0] << ' ' << boxes[n][m][1] << ' ';
FLAGS_cpu_threads, }
1, }
"dynamic", cout << endl;
FLAGS_precision,
time_info,
cv_all_img_names.size());
autolog.report();
} }
return 0; }
}
if (FLAGS_benchmark) {
AutoLogger autolog("ocr_det", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads, 1, "dynamic",
FLAGS_precision, time_info, cv_all_img_names.size());
autolog.report();
}
return 0;
}
int main_rec(std::vector<cv::String> cv_all_img_names) { int main_rec(std::vector<cv::String> cv_all_img_names) {
std::vector<double> time_info = {0, 0, 0}; std::vector<double> time_info = {0, 0, 0};
std::string char_list_file = FLAGS_char_list_file;
if (FLAGS_benchmark)
char_list_file = FLAGS_char_list_file.substr(6);
cout << "label file: " << char_list_file << endl;
CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads,
FLAGS_enable_mkldnn, char_list_file,
FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num);
std::vector<cv::Mat> img_list; std::string char_list_file = FLAGS_char_list_file;
for (int i = 0; i < cv_all_img_names.size(); ++i) { if (FLAGS_benchmark)
LOG(INFO) << "The predict img: " << cv_all_img_names[i]; char_list_file = FLAGS_char_list_file.substr(6);
cout << "label file: " << char_list_file << endl;
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
if (!srcimg.data) { FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl; char_list_file, FLAGS_use_tensorrt, FLAGS_precision,
exit(1); FLAGS_rec_batch_num);
}
img_list.push_back(srcimg); std::vector<cv::Mat> img_list;
} for (int i = 0; i < cv_all_img_names.size(); ++i) {
std::vector<double> rec_times; LOG(INFO) << "The predict img: " << cv_all_img_names[i];
rec.Run(img_list, &rec_times);
time_info[0] += rec_times[0]; cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
time_info[1] += rec_times[1]; if (!srcimg.data) {
time_info[2] += rec_times[2]; std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl;
if (FLAGS_benchmark) { exit(1);
AutoLogger autolog("ocr_rec",
FLAGS_use_gpu,
FLAGS_use_tensorrt,
FLAGS_enable_mkldnn,
FLAGS_cpu_threads,
FLAGS_rec_batch_num,
"dynamic",
FLAGS_precision,
time_info,
cv_all_img_names.size());
autolog.report();
} }
return 0; img_list.push_back(srcimg);
} }
std::vector<double> rec_times;
rec.Run(img_list, &rec_times);
time_info[0] += rec_times[0];
time_info[1] += rec_times[1];
time_info[2] += rec_times[2];
if (FLAGS_benchmark) {
AutoLogger autolog("ocr_rec", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads,
FLAGS_rec_batch_num, "dynamic", FLAGS_precision,
time_info, cv_all_img_names.size());
autolog.report();
}
return 0;
}
int main_system(std::vector<cv::String> cv_all_img_names) { int main_system(std::vector<cv::String> cv_all_img_names) {
std::vector<double> time_info_det = {0, 0, 0}; std::vector<double> time_info_det = {0, 0, 0};
std::vector<double> time_info_rec = {0, 0, 0}; std::vector<double> time_info_rec = {0, 0, 0};
DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads,
FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh,
FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
FLAGS_use_polygon_score, FLAGS_visualize,
FLAGS_use_tensorrt, FLAGS_precision);
Classifier *cls = nullptr;
if (FLAGS_use_angle_cls) {
cls = new Classifier(FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads,
FLAGS_enable_mkldnn, FLAGS_cls_thresh,
FLAGS_use_tensorrt, FLAGS_precision);
}
std::string char_list_file = FLAGS_char_list_file; DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
if (FLAGS_benchmark) FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
char_list_file = FLAGS_char_list_file.substr(6); FLAGS_max_side_len, FLAGS_det_db_thresh,
cout << "label file: " << char_list_file << endl; FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
FLAGS_use_polygon_score, FLAGS_use_dilation, FLAGS_visualize,
CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_use_tensorrt, FLAGS_precision);
FLAGS_gpu_mem, FLAGS_cpu_threads,
FLAGS_enable_mkldnn, char_list_file, Classifier *cls = nullptr;
FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num); if (FLAGS_use_angle_cls) {
cls = new Classifier(FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
for (int i = 0; i < cv_all_img_names.size(); ++i) { FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
LOG(INFO) << "The predict img: " << cv_all_img_names[i]; FLAGS_cls_thresh, FLAGS_use_tensorrt, FLAGS_precision);
}
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!srcimg.data) { std::string char_list_file = FLAGS_char_list_file;
std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl; if (FLAGS_benchmark)
exit(1); char_list_file = FLAGS_char_list_file.substr(6);
} cout << "label file: " << char_list_file << endl;
std::vector<std::vector<std::vector<int>>> boxes;
std::vector<double> det_times; CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
std::vector<double> rec_times; FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
char_list_file, FLAGS_use_tensorrt, FLAGS_precision,
det.Run(srcimg, boxes, &det_times); FLAGS_rec_batch_num);
time_info_det[0] += det_times[0];
time_info_det[1] += det_times[1]; for (int i = 0; i < cv_all_img_names.size(); ++i) {
time_info_det[2] += det_times[2]; LOG(INFO) << "The predict img: " << cv_all_img_names[i];
std::vector<cv::Mat> img_list;
for (int j = 0; j < boxes.size(); j++) {
cv::Mat crop_img;
crop_img = Utility::GetRotateCropImage(srcimg, boxes[j]);
if (cls != nullptr) {
crop_img = cls->Run(crop_img);
}
img_list.push_back(crop_img);
}
rec.Run(img_list, &rec_times); cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
time_info_rec[0] += rec_times[0]; if (!srcimg.data) {
time_info_rec[1] += rec_times[1]; std::cerr << "[ERROR] image read failed! image path: "
time_info_rec[2] += rec_times[2]; << cv_all_img_names[i] << endl;
exit(1);
} }
std::vector<std::vector<std::vector<int>>> boxes;
if (FLAGS_benchmark) { std::vector<double> det_times;
AutoLogger autolog_det("ocr_det", std::vector<double> rec_times;
FLAGS_use_gpu,
FLAGS_use_tensorrt,
FLAGS_enable_mkldnn,
FLAGS_cpu_threads,
1,
"dynamic",
FLAGS_precision,
time_info_det,
cv_all_img_names.size());
AutoLogger autolog_rec("ocr_rec",
FLAGS_use_gpu,
FLAGS_use_tensorrt,
FLAGS_enable_mkldnn,
FLAGS_cpu_threads,
FLAGS_rec_batch_num,
"dynamic",
FLAGS_precision,
time_info_rec,
cv_all_img_names.size());
autolog_det.report();
std::cout << endl;
autolog_rec.report();
}
return 0;
}
det.Run(srcimg, boxes, &det_times);
time_info_det[0] += det_times[0];
time_info_det[1] += det_times[1];
time_info_det[2] += det_times[2];
void check_params(char* mode) { std::vector<cv::Mat> img_list;
if (strcmp(mode, "det")==0) { for (int j = 0; j < boxes.size(); j++) {
if (FLAGS_det_model_dir.empty() || FLAGS_image_dir.empty()) { cv::Mat crop_img;
std::cout << "Usage[det]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ " crop_img = Utility::GetRotateCropImage(srcimg, boxes[j]);
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl; if (cls != nullptr) {
exit(1); crop_img = cls->Run(crop_img);
} }
img_list.push_back(crop_img);
} }
if (strcmp(mode, "rec")==0) {
if (FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) { rec.Run(img_list, &rec_times);
std::cout << "Usage[rec]: ./ppocr --rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ " time_info_rec[0] += rec_times[0];
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl; time_info_rec[1] += rec_times[1];
exit(1); time_info_rec[2] += rec_times[2];
} }
if (FLAGS_benchmark) {
AutoLogger autolog_det("ocr_det", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads, 1, "dynamic",
FLAGS_precision, time_info_det,
cv_all_img_names.size());
AutoLogger autolog_rec("ocr_rec", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads,
FLAGS_rec_batch_num, "dynamic", FLAGS_precision,
time_info_rec, cv_all_img_names.size());
autolog_det.report();
std::cout << endl;
autolog_rec.report();
}
return 0;
}
void check_params(char *mode) {
if (strcmp(mode, "det") == 0) {
if (FLAGS_det_model_dir.empty() || FLAGS_image_dir.empty()) {
std::cout << "Usage[det]: ./ppocr "
"--det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
exit(1);
} }
if (strcmp(mode, "system")==0) { }
if ((FLAGS_det_model_dir.empty() || FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) || if (strcmp(mode, "rec") == 0) {
(FLAGS_use_angle_cls && FLAGS_cls_model_dir.empty())) { if (FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) {
std::cout << "Usage[system without angle cls]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ " std::cout << "Usage[rec]: ./ppocr "
<< "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ " "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl; << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
std::cout << "Usage[system with angle cls]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ " exit(1);
<< "--use_angle_cls=true "
<< "--cls_model_dir=/PATH/TO/CLS_INFERENCE_MODEL/ "
<< "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
exit(1);
}
} }
if (FLAGS_precision != "fp32" && FLAGS_precision != "fp16" && FLAGS_precision != "int8") { }
cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. " << endl; if (strcmp(mode, "system") == 0) {
exit(1); if ((FLAGS_det_model_dir.empty() || FLAGS_rec_model_dir.empty() ||
FLAGS_image_dir.empty()) ||
(FLAGS_use_angle_cls && FLAGS_cls_model_dir.empty())) {
std::cout << "Usage[system without angle cls]: ./ppocr "
"--det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
<< "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
std::cout << "Usage[system with angle cls]: ./ppocr "
"--det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
<< "--use_angle_cls=true "
<< "--cls_model_dir=/PATH/TO/CLS_INFERENCE_MODEL/ "
<< "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
exit(1);
} }
}
if (FLAGS_precision != "fp32" && FLAGS_precision != "fp16" &&
FLAGS_precision != "int8") {
cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. " << endl;
exit(1);
}
} }
int main(int argc, char **argv) { int main(int argc, char **argv) {
if (argc<=1 || (strcmp(argv[1], "det")!=0 && strcmp(argv[1], "rec")!=0 && strcmp(argv[1], "system")!=0)) { if (argc <= 1 ||
std::cout << "Please choose one mode of [det, rec, system] !" << std::endl; (strcmp(argv[1], "det") != 0 && strcmp(argv[1], "rec") != 0 &&
return -1; strcmp(argv[1], "system") != 0)) {
} std::cout << "Please choose one mode of [det, rec, system] !" << std::endl;
std::cout << "mode: " << argv[1] << endl; return -1;
}
// Parsing command-line std::cout << "mode: " << argv[1] << endl;
google::ParseCommandLineFlags(&argc, &argv, true);
check_params(argv[1]); // Parsing command-line
google::ParseCommandLineFlags(&argc, &argv, true);
if (!PathExists(FLAGS_image_dir)) { check_params(argv[1]);
std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir << endl;
exit(1); if (!PathExists(FLAGS_image_dir)) {
} std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir
<< endl;
std::vector<cv::String> cv_all_img_names; exit(1);
cv::glob(FLAGS_image_dir, cv_all_img_names); }
std::cout << "total images num: " << cv_all_img_names.size() << endl;
std::vector<cv::String> cv_all_img_names;
if (strcmp(argv[1], "det")==0) { cv::glob(FLAGS_image_dir, cv_all_img_names);
return main_det(cv_all_img_names); std::cout << "total images num: " << cv_all_img_names.size() << endl;
}
if (strcmp(argv[1], "rec")==0) {
return main_rec(cv_all_img_names);
}
if (strcmp(argv[1], "system")==0) {
return main_system(cv_all_img_names);
}
if (strcmp(argv[1], "det") == 0) {
return main_det(cv_all_img_names);
}
if (strcmp(argv[1], "rec") == 0) {
return main_rec(cv_all_img_names);
}
if (strcmp(argv[1], "system") == 0) {
return main_system(cv_all_img_names);
}
} }
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include <include/ocr_det.h> #include <include/ocr_det.h>
namespace PaddleOCR { namespace PaddleOCR {
void DBDetector::LoadModel(const std::string &model_dir) { void DBDetector::LoadModel(const std::string &model_dir) {
...@@ -30,13 +29,10 @@ void DBDetector::LoadModel(const std::string &model_dir) { ...@@ -30,13 +29,10 @@ void DBDetector::LoadModel(const std::string &model_dir) {
if (this->precision_ == "fp16") { if (this->precision_ == "fp16") {
precision = paddle_infer::Config::Precision::kHalf; precision = paddle_infer::Config::Precision::kHalf;
} }
if (this->precision_ == "int8") { if (this->precision_ == "int8") {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} }
config.EnableTensorRtEngine( config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false);
1 << 20, 10, 3,
precision,
false, false);
std::map<std::string, std::vector<int>> min_input_shape = { std::map<std::string, std::vector<int>> min_input_shape = {
{"x", {1, 3, 50, 50}}, {"x", {1, 3, 50, 50}},
{"conv2d_92.tmp_0", {1, 96, 20, 20}}, {"conv2d_92.tmp_0", {1, 96, 20, 20}},
...@@ -105,7 +101,7 @@ void DBDetector::Run(cv::Mat &img, ...@@ -105,7 +101,7 @@ void DBDetector::Run(cv::Mat &img,
cv::Mat srcimg; cv::Mat srcimg;
cv::Mat resize_img; cv::Mat resize_img;
img.copyTo(srcimg); img.copyTo(srcimg);
auto preprocess_start = std::chrono::steady_clock::now(); auto preprocess_start = std::chrono::steady_clock::now();
this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w, this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w,
this->use_tensorrt_); this->use_tensorrt_);
...@@ -116,16 +112,16 @@ void DBDetector::Run(cv::Mat &img, ...@@ -116,16 +112,16 @@ void DBDetector::Run(cv::Mat &img,
std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f); std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
this->permute_op_.Run(&resize_img, input.data()); this->permute_op_.Run(&resize_img, input.data());
auto preprocess_end = std::chrono::steady_clock::now(); auto preprocess_end = std::chrono::steady_clock::now();
// Inference. // Inference.
auto input_names = this->predictor_->GetInputNames(); auto input_names = this->predictor_->GetInputNames();
auto input_t = this->predictor_->GetInputHandle(input_names[0]); auto input_t = this->predictor_->GetInputHandle(input_names[0]);
input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
auto inference_start = std::chrono::steady_clock::now(); auto inference_start = std::chrono::steady_clock::now();
input_t->CopyFromCpu(input.data()); input_t->CopyFromCpu(input.data());
this->predictor_->Run(); this->predictor_->Run();
std::vector<float> out_data; std::vector<float> out_data;
auto output_names = this->predictor_->GetOutputNames(); auto output_names = this->predictor_->GetOutputNames();
auto output_t = this->predictor_->GetOutputHandle(output_names[0]); auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
...@@ -136,7 +132,7 @@ void DBDetector::Run(cv::Mat &img, ...@@ -136,7 +132,7 @@ void DBDetector::Run(cv::Mat &img,
out_data.resize(out_num); out_data.resize(out_num);
output_t->CopyToCpu(out_data.data()); output_t->CopyToCpu(out_data.data());
auto inference_end = std::chrono::steady_clock::now(); auto inference_end = std::chrono::steady_clock::now();
auto postprocess_start = std::chrono::steady_clock::now(); auto postprocess_start = std::chrono::steady_clock::now();
int n2 = output_shape[2]; int n2 = output_shape[2];
int n3 = output_shape[3]; int n3 = output_shape[3];
...@@ -157,24 +153,29 @@ void DBDetector::Run(cv::Mat &img, ...@@ -157,24 +153,29 @@ void DBDetector::Run(cv::Mat &img,
const double maxvalue = 255; const double maxvalue = 255;
cv::Mat bit_map; cv::Mat bit_map;
cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY); cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);
cv::Mat dilation_map; if (this->use_dilation_) {
cv::Mat dila_ele = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2)); cv::Mat dila_ele =
cv::dilate(bit_map, dilation_map, dila_ele); cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
cv::dilate(bit_map, bit_map, dila_ele);
}
boxes = post_processor_.BoxesFromBitmap( boxes = post_processor_.BoxesFromBitmap(
pred_map, dilation_map, this->det_db_box_thresh_, pred_map, bit_map, this->det_db_box_thresh_, this->det_db_unclip_ratio_,
this->det_db_unclip_ratio_, this->use_polygon_score_); this->use_polygon_score_);
boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg); boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);
auto postprocess_end = std::chrono::steady_clock::now(); auto postprocess_end = std::chrono::steady_clock::now();
std::cout << "Detected boxes num: " << boxes.size() << endl; std::cout << "Detected boxes num: " << boxes.size() << endl;
std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start; std::chrono::duration<float> preprocess_diff =
preprocess_end - preprocess_start;
times->push_back(double(preprocess_diff.count() * 1000)); times->push_back(double(preprocess_diff.count() * 1000));
std::chrono::duration<float> inference_diff = inference_end - inference_start; std::chrono::duration<float> inference_diff = inference_end - inference_start;
times->push_back(double(inference_diff.count() * 1000)); times->push_back(double(inference_diff.count() * 1000));
std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start; std::chrono::duration<float> postprocess_diff =
postprocess_end - postprocess_start;
times->push_back(double(postprocess_diff.count() * 1000)); times->push_back(double(postprocess_diff.count() * 1000));
//// visualization //// visualization
if (this->visualize_) { if (this->visualize_) {
Utility::VisualizeBboxes(srcimg, boxes); Utility::VisualizeBboxes(srcimg, boxes);
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
* [2.1 启动训练](#21-----) * [2.1 启动训练](#21-----)
* [2.2 断点训练](#22-----) * [2.2 断点训练](#22-----)
* [2.3 更换Backbone 训练](#23---backbone---) * [2.3 更换Backbone 训练](#23---backbone---)
* [2.4 知识蒸馏训练](#24---distill---)
- [3. 模型评估与预测](#3--------) - [3. 模型评估与预测](#3--------)
* [3.1 指标评估](#31-----) * [3.1 指标评估](#31-----)
* [3.2 测试检测效果](#32-------) * [3.2 测试检测效果](#32-------)
...@@ -182,6 +183,15 @@ args1: args1 ...@@ -182,6 +183,15 @@ args1: args1
**注意**:如果要更换网络的其他模块,可以参考[文档](./add_new_algorithm.md)。 **注意**:如果要更换网络的其他模块,可以参考[文档](./add_new_algorithm.md)。
<a name="24---distill---"></a>
## 2.4 知识蒸馏训练
PaddleOCR支持了基于知识蒸馏的检测模型训练过程,更多内容可以参考[知识蒸馏说明文档](./knowledge_distillation.md)。
<a name="3--------"></a> <a name="3--------"></a>
# 3. 模型评估与预测 # 3. 模型评估与预测
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
- [2.1 数据增强](#数据增强) - [2.1 数据增强](#数据增强)
- [2.2 通用模型训练](#通用模型训练) - [2.2 通用模型训练](#通用模型训练)
- [2.3 多语言模型训练](#多语言模型训练) - [2.3 多语言模型训练](#多语言模型训练)
- [2.4 知识蒸馏训练](#知识蒸馏训练)
- [3 评估](#评估) - [3 评估](#评估)
- [4 预测](#预测) - [4 预测](#预测)
- [5 转Inference模型测试](#Inference) - [5 转Inference模型测试](#Inference)
...@@ -368,6 +369,13 @@ Eval: ...@@ -368,6 +369,13 @@ Eval:
label_file_list: ["./train_data/french_val.txt"] label_file_list: ["./train_data/french_val.txt"]
... ...
``` ```
<a name="知识蒸馏训练"></a>
### 2.4 知识蒸馏训练
PaddleOCR支持了基于知识蒸馏的文本识别模型训练过程,更多内容可以参考[知识蒸馏说明文档](./knowledge_distillation.md)
<a name="评估"></a> <a name="评估"></a>
## 3 评估 ## 3 评估
......
...@@ -9,6 +9,7 @@ This section uses the icdar2015 dataset as an example to introduce the training, ...@@ -9,6 +9,7 @@ This section uses the icdar2015 dataset as an example to introduce the training,
* [2.1 Start Training](#21-start-training) * [2.1 Start Training](#21-start-training)
* [2.2 Load Trained Model and Continue Training](#22-load-trained-model-and-continue-training) * [2.2 Load Trained Model and Continue Training](#22-load-trained-model-and-continue-training)
* [2.3 Training with New Backbone](#23-training-with-new-backbone) * [2.3 Training with New Backbone](#23-training-with-new-backbone)
* [2.4 Training with knowledge distillation](#24)
- [3. Evaluation and Test](#3-evaluation-and-test) - [3. Evaluation and Test](#3-evaluation-and-test)
* [3.1 Evaluation](#31-evaluation) * [3.1 Evaluation](#31-evaluation)
* [3.2 Test](#32-test) * [3.2 Test](#32-test)
...@@ -174,6 +175,11 @@ After adding the four-part modules of the network, you only need to configure th ...@@ -174,6 +175,11 @@ After adding the four-part modules of the network, you only need to configure th
**NOTE**: More details about replace Backbone and other mudule can be found in [doc](add_new_algorithm_en.md). **NOTE**: More details about replace Backbone and other mudule can be found in [doc](add_new_algorithm_en.md).
### 2.4 Training with knowledge distillation
Knowledge distillation is supported in PaddleOCR for text detection training process. For more details, please refer to [doc](./knowledge_distillation_en.md).
## 3. Evaluation and Test ## 3. Evaluation and Test
### 3.1 Evaluation ### 3.1 Evaluation
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
- [2.1 Data Augmentation](#Data_Augmentation) - [2.1 Data Augmentation](#Data_Augmentation)
- [2.2 General Training](#Training) - [2.2 General Training](#Training)
- [2.3 Multi-language Training](#Multi_language) - [2.3 Multi-language Training](#Multi_language)
- [2.4 Training with Knowledge Distillation](#kd)
- [3. Evaluation](#EVALUATION) - [3. Evaluation](#EVALUATION)
...@@ -361,6 +362,12 @@ Eval: ...@@ -361,6 +362,12 @@ Eval:
... ...
``` ```
<a name="kd"></a>
### 2.4 Training with Knowledge Distillation
Knowledge distillation is supported in PaddleOCR for text recognition training process. For more details, please refer to [doc](./knowledge_distillation_en.md).
<a name="EVALUATION"></a> <a name="EVALUATION"></a>
## 3. Evalution ## 3. Evalution
......
...@@ -95,9 +95,15 @@ class DMLLoss(nn.Layer): ...@@ -95,9 +95,15 @@ class DMLLoss(nn.Layer):
self.act = None self.act = None
self.use_log = use_log self.use_log = use_log
self.jskl_loss = KLJSLoss(mode="js") self.jskl_loss = KLJSLoss(mode="js")
def _kldiv(self, x, target):
eps = 1.0e-10
loss = target * (paddle.log(target + eps) - x)
# batch mean loss
loss = paddle.sum(loss) / loss.shape[0]
return loss
def forward(self, out1, out2): def forward(self, out1, out2):
if self.act is not None: if self.act is not None:
out1 = self.act(out1) out1 = self.act(out1)
...@@ -106,9 +112,8 @@ class DMLLoss(nn.Layer): ...@@ -106,9 +112,8 @@ class DMLLoss(nn.Layer):
# for recognition distillation, log is needed for feature map # for recognition distillation, log is needed for feature map
log_out1 = paddle.log(out1) log_out1 = paddle.log(out1)
log_out2 = paddle.log(out2) log_out2 = paddle.log(out2)
loss = (F.kl_div( loss = (
log_out1, out2, reduction='batchmean') + F.kl_div( self._kldiv(log_out1, out2) + self._kldiv(log_out2, out1)) / 2.0
log_out2, out1, reduction='batchmean')) / 2.0
else: else:
# for detection distillation log is not needed # for detection distillation log is not needed
loss = self.jskl_loss(out1, out2) loss = self.jskl_loss(out1, out2)
......
...@@ -49,5 +49,5 @@ inference:tools/infer/predict_rec.py ...@@ -49,5 +49,5 @@ inference:tools/infer/predict_rec.py
null:null null:null
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,320]}]
...@@ -49,5 +49,5 @@ inference:tools/infer/predict_rec.py ...@@ -49,5 +49,5 @@ inference:tools/infer/predict_rec.py
null:null null:null
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,320]}]
...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py ...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py
--save_log_path:./test/output/ --save_log_path:./test/output/
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,100]}]
...@@ -48,4 +48,6 @@ inference:tools/infer/predict_rec.py ...@@ -48,4 +48,6 @@ inference:tools/infer/predict_rec.py
--image_dir:./inference/rec_inference --image_dir:./inference/rec_inference
null:null null:null
--benchmark:True --benchmark:True
null:null null:null
\ No newline at end of file ===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,320]}]
\ No newline at end of file
...@@ -48,4 +48,6 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ppocr_ke ...@@ -48,4 +48,6 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ppocr_ke
--image_dir:./inference/rec_inference --image_dir:./inference/rec_inference
--save_log_path:./test/output/ --save_log_path:./test/output/
--benchmark:True --benchmark:True
null:null null:null
\ No newline at end of file ===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,320]}]
...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py ...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py
--save_log_path:./test/output/ --save_log_path:./test/output/
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,100]}]
...@@ -49,4 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/EN_symbo ...@@ -49,4 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/EN_symbo
--save_log_path:./test/output/ --save_log_path:./test/output/
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[1,32,100]}]
...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic ...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic
--save_log_path:./test/output/ --save_log_path:./test/output/
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,100]}]
\ No newline at end of file
...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic ...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic
--save_log_path:./test/output/ --save_log_path:./test/output/
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,100]}]
\ No newline at end of file
...@@ -49,4 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic ...@@ -49,4 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic
--save_log_path:./test/output/ --save_log_path:./test/output/
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,100]}]
...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic ...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic
--save_log_path:./test/output/ --save_log_path:./test/output/
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,100]}]
...@@ -49,4 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/dict90.t ...@@ -49,4 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/dict90.t
--save_log_path:./test/output/ --save_log_path:./test/output/
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,48,48,160]}]
...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic ...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic
--save_log_path:./test/output/ --save_log_path:./test/output/
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,100]}]
\ No newline at end of file
...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic ...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic
--save_log_path:./test/output/ --save_log_path:./test/output/
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,100]}]
\ No newline at end of file
...@@ -49,4 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic ...@@ -49,4 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic
--save_log_path:./test/output/ --save_log_path:./test/output/
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,100]}]
...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic ...@@ -49,3 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic
--save_log_path:./test/output/ --save_log_path:./test/output/
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,100]}]
...@@ -49,4 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic ...@@ -49,4 +49,5 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic
--save_log_path:./test/output/ --save_log_path:./test/output/
--benchmark:True --benchmark:True
null:null null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[1,64,256]}]
...@@ -16,7 +16,7 @@ import sys ...@@ -16,7 +16,7 @@ import sys
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth' os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
......
...@@ -16,7 +16,7 @@ import sys ...@@ -16,7 +16,7 @@ import sys
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth' os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
......
...@@ -16,7 +16,7 @@ import sys ...@@ -16,7 +16,7 @@ import sys
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth' os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
......
...@@ -16,7 +16,7 @@ import sys ...@@ -16,7 +16,7 @@ import sys
from PIL import Image from PIL import Image
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth' os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
......
...@@ -17,7 +17,7 @@ import subprocess ...@@ -17,7 +17,7 @@ import subprocess
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth' os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
......
...@@ -23,7 +23,7 @@ import sys ...@@ -23,7 +23,7 @@ import sys
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth' os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
......
...@@ -23,7 +23,7 @@ import sys ...@@ -23,7 +23,7 @@ import sys
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth' os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
......
...@@ -23,7 +23,7 @@ import sys ...@@ -23,7 +23,7 @@ import sys
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth' os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
......
...@@ -24,7 +24,7 @@ import sys ...@@ -24,7 +24,7 @@ import sys
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth' os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
......
...@@ -24,7 +24,7 @@ import json ...@@ -24,7 +24,7 @@ import json
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth' os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
......
...@@ -24,7 +24,7 @@ import json ...@@ -24,7 +24,7 @@ import json
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth' os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
......
...@@ -23,7 +23,7 @@ import sys ...@@ -23,7 +23,7 @@ import sys
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth' os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
import cv2 import cv2
......
...@@ -23,7 +23,7 @@ import sys ...@@ -23,7 +23,7 @@ import sys
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth' os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
import cv2 import cv2
......
...@@ -21,7 +21,7 @@ import sys ...@@ -21,7 +21,7 @@ import sys
__dir__ = os.path.dirname(os.path.abspath(__file__)) __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))
import yaml import yaml
import paddle import paddle
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册