diff --git a/PPOCRLabel/PPOCRLabel.py b/PPOCRLabel/PPOCRLabel.py index af7c2642de896de82a86db8f6302e68beca0aa72..9ffa909e98f9c4c6156b8cae0675ef8aa5141620 100644 --- a/PPOCRLabel/PPOCRLabel.py +++ b/PPOCRLabel/PPOCRLabel.py @@ -92,7 +92,7 @@ class WindowMixin(object): class MainWindow(QMainWindow, WindowMixin): FIT_WINDOW, FIT_WIDTH, MANUAL_ZOOM = list(range(3)) - def __init__(self, lang="ch", defaultFilename=None, defaultPrefdefClassFile=None, defaultSaveDir=None): + def __init__(self, lang="ch", gpu=False, defaultFilename=None, defaultPrefdefClassFile=None, defaultSaveDir=None): super(MainWindow, self).__init__() self.setWindowTitle(__appname__) @@ -108,7 +108,7 @@ class MainWindow(QMainWindow, WindowMixin): getStr = lambda strId: self.stringBundle.getString(strId) self.defaultSaveDir = defaultSaveDir - self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=False, lang=lang) + self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=gpu, lang=lang) if os.path.exists('./data/paddle.png'): result = self.ocr.ocr('./data/paddle.png', cls=True, det=True) @@ -1239,6 +1239,8 @@ class MainWindow(QMainWindow, WindowMixin): def loadFile(self, filePath=None): """Load the specified file, or the last opened file if None.""" + if self.dirty: + self.mayContinue() self.resetState() self.canvas.setEnabled(False) if filePath is None: @@ -2037,6 +2039,8 @@ def read(filename, default=None): except: return default +def str2bool(v): + return v.lower() in ("true", "t", "1") def get_main_app(argv=[]): """ @@ -2048,13 +2052,14 @@ def get_main_app(argv=[]): app.setWindowIcon(newIcon("app")) # Tzutalin 201705+: Accept extra agruments to change predefined class file argparser = argparse.ArgumentParser() - argparser.add_argument("--lang", default='en', nargs="?") + argparser.add_argument("--lang", type=str, default='en', nargs="?") + argparser.add_argument("--gpu", type=str2bool, default=False, nargs="?") argparser.add_argument("--predefined_classes_file", default=os.path.join(os.path.dirname(__file__), "data", "predefined_classes.txt"), nargs="?") args = argparser.parse_args(argv[1:]) # Usage : labelImg.py image predefClassFile saveDir - win = MainWindow(lang=args.lang, + win = MainWindow(lang=args.lang, gpu=args.gpu, defaultPrefdefClassFile=args.predefined_classes_file) win.show() return app, win diff --git a/deploy/cpp_infer/include/utility.h b/deploy/cpp_infer/include/utility.h index 367e37e434b396ac1eae28961f366dc397ed446f..6e8173e007279319657250b376de022240bc6f62 100644 --- a/deploy/cpp_infer/include/utility.h +++ b/deploy/cpp_infer/include/utility.h @@ -44,6 +44,9 @@ public: inline static size_t argmax(ForwardIterator first, ForwardIterator last) { return std::distance(first, std::max_element(first, last)); } + + static void GetAllFiles(const char *dir_name, + std::vector &all_inputs); }; } // namespace PaddleOCR \ No newline at end of file diff --git a/deploy/cpp_infer/readme.md b/deploy/cpp_infer/readme.md index ee5a9ed4b9aa16b76836dc01096ae132fead56dd..6a57044b0ef81c4600c13180bb33c45b2bf0bc01 100644 --- a/deploy/cpp_infer/readme.md +++ b/deploy/cpp_infer/readme.md @@ -77,7 +77,7 @@ opencv3/ #### 1.2.1 直接下载安装 -* [Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)上提供了不同cuda版本的Linux预测库,可以在官网查看并选择合适的预测库版本(*建议选择paddle版本>=2.0.1版本的预测库* )。 +* [Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html) 上提供了不同cuda版本的Linux预测库,可以在官网查看并选择合适的预测库版本(*建议选择paddle版本>=2.0.1版本的预测库* )。 * 下载之后使用下面的方法解压。 @@ -89,10 +89,11 @@ tar -xf paddle_inference.tgz #### 1.2.2 预测库源码编译 * 如果希望获取最新预测库特性,可以从Paddle github上克隆最新代码,源码编译预测库。 -* 可以参考[Paddle预测库官网](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html)的说明,从github上获取Paddle代码,然后进行编译,生成最新的预测库。使用git获取代码方法如下。 +* 可以参考[Paddle预测库安装编译说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#congyuanmabianyi) 的说明,从github上获取Paddle代码,然后进行编译,生成最新的预测库。使用git获取代码方法如下。 ```shell git clone https://github.com/PaddlePaddle/Paddle.git +git checkout release/2.1 ``` * 进入Paddle目录后,编译方法如下。 @@ -115,7 +116,7 @@ make -j make inference_lib_dist ``` -更多编译参数选项可以参考Paddle C++预测库官网:[https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/build_and_install_lib_cn.html)。 +更多编译参数选项介绍可以参考[文档说明](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#congyuanmabianyi)。 * 编译完成之后,可以在`build/paddle_inference_install_dir/`文件下看到生成了以下文件及文件夹。 @@ -140,11 +141,11 @@ build/paddle_inference_install_dir/ ``` inference/ |-- det_db -| |--inference.pdparams -| |--inference.pdimodel +| |--inference.pdiparams +| |--inference.pdmodel |-- rec_rcnn -| |--inference.pdparams -| |--inference.pdparams +| |--inference.pdiparams +| |--inference.pdmodel ``` diff --git a/deploy/cpp_infer/readme_en.md b/deploy/cpp_infer/readme_en.md index 913ba1f91668d682c7c3fa614f8997293d52db89..6c0a18db4f76d4e2971cea16130216434ff01d7b 100644 --- a/deploy/cpp_infer/readme_en.md +++ b/deploy/cpp_infer/readme_en.md @@ -78,8 +78,7 @@ opencv3/ #### 1.2.1 Direct download and installation -* Different cuda versions of the Linux inference library (based on GCC 4.8.2) are provided on the -[Paddle inference library official website](https://www.paddlepaddle.org.cn/documentation/docs/en/develop/guides/05_inference_deployment/inference/build_and_install_lib_en.html). You can view and select the appropriate version of the inference library on the official website. +[Paddle inference library official website](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html). You can view and select the appropriate version of the inference library on the official website. * After downloading, use the following method to uncompress. @@ -97,9 +96,10 @@ Finally you can see the following files in the folder of `paddle_inference/`. ```shell git clone https://github.com/PaddlePaddle/Paddle.git +git checkout release/2.1 ``` -* After entering the Paddle directory, the compilation method is as follows. +* After entering the Paddle directory, the commands to compile the paddle inference library are as follows. ```shell rm -rf build @@ -119,7 +119,7 @@ make -j make inference_lib_dist ``` -For more compilation parameter options, please refer to the official website of the Paddle C++ inference library:[https://www.paddlepaddle.org.cn/documentation/docs/en/develop/guides/05_inference_deployment/inference/build_and_install_lib_en.html](https://www.paddlepaddle.org.cn/documentation/docs/en/develop/guides/05_inference_deployment/inference/build_and_install_lib_en.html). +For more compilation parameter options, please refer to the [document](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0/guides/05_inference_deployment/inference/build_and_install_lib_cn.html#congyuanmabianyi). * After the compilation process, you can see the following files in the folder of `build/paddle_inference_install_dir/`. @@ -144,11 +144,11 @@ Among them, `paddle` is the Paddle library required for C++ prediction later, an ``` inference/ |-- det_db -| |--inference.pdparams -| |--inference.pdimodel +| |--inference.pdiparams +| |--inference.pdmodel |-- rec_rcnn -| |--inference.pdparams -| |--inference.pdparams +| |--inference.pdiparams +| |--inference.pdmodel ``` diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp index 588c8374ab341163835aea2ba6c7132640c74c64..f25e674b489ea92118fe45c63939fca203ce3823 100644 --- a/deploy/cpp_infer/src/main.cpp +++ b/deploy/cpp_infer/src/main.cpp @@ -27,9 +27,12 @@ #include #include +#include #include #include #include +#include +#include using namespace std; using namespace cv; @@ -47,13 +50,8 @@ int main(int argc, char **argv) { config.PrintConfigInfo(); std::string img_path(argv[2]); - - cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR); - - if (!srcimg.data) { - std::cerr << "[ERROR] image read failed! image path: " << img_path << "\n"; - exit(1); - } + std::vector all_img_names; + Utility::GetAllFiles((char *)img_path.c_str(), all_img_names); DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id, config.gpu_mem, config.cpu_math_library_num_threads, @@ -76,18 +74,30 @@ int main(int argc, char **argv) { config.use_tensorrt, config.use_fp16); auto start = std::chrono::system_clock::now(); - std::vector>> boxes; - det.Run(srcimg, boxes); - - rec.Run(boxes, srcimg, cls); - auto end = std::chrono::system_clock::now(); - auto duration = - std::chrono::duration_cast(end - start); - std::cout << "Cost " - << double(duration.count()) * - std::chrono::microseconds::period::num / - std::chrono::microseconds::period::den - << "s" << std::endl; + + for (auto img_dir : all_img_names) { + LOG(INFO) << "The predict img: " << img_dir; + + cv::Mat srcimg = cv::imread(img_dir, cv::IMREAD_COLOR); + if (!srcimg.data) { + std::cerr << "[ERROR] image read failed! image path: " << img_path + << "\n"; + exit(1); + } + std::vector>> boxes; + + det.Run(srcimg, boxes); + + rec.Run(boxes, srcimg, cls); + auto end = std::chrono::system_clock::now(); + auto duration = + std::chrono::duration_cast(end - start); + std::cout << "Cost " + << double(duration.count()) * + std::chrono::microseconds::period::num / + std::chrono::microseconds::period::den + << "s" << std::endl; + } return 0; } diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp index 9bfee6138577288156496d9b533b4da906ae7268..33ad468a33b42c3d9f25beb19452f2fa6a81db9e 100644 --- a/deploy/cpp_infer/src/ocr_det.cpp +++ b/deploy/cpp_infer/src/ocr_det.cpp @@ -30,6 +30,42 @@ void DBDetector::LoadModel(const std::string &model_dir) { this->use_fp16_ ? paddle_infer::Config::Precision::kHalf : paddle_infer::Config::Precision::kFloat32, false, false); + std::map> min_input_shape = { + {"x", {1, 3, 50, 50}}, + {"conv2d_92.tmp_0", {1, 96, 20, 20}}, + {"conv2d_91.tmp_0", {1, 96, 10, 10}}, + {"nearest_interp_v2_1.tmp_0", {1, 96, 10, 10}}, + {"nearest_interp_v2_2.tmp_0", {1, 96, 20, 20}}, + {"nearest_interp_v2_3.tmp_0", {1, 24, 20, 20}}, + {"nearest_interp_v2_4.tmp_0", {1, 24, 20, 20}}, + {"nearest_interp_v2_5.tmp_0", {1, 24, 20, 20}}, + {"elementwise_add_7", {1, 56, 2, 2}}, + {"nearest_interp_v2_0.tmp_0", {1, 96, 2, 2}}}; + std::map> max_input_shape = { + {"x", {1, 3, this->max_side_len_, this->max_side_len_}}, + {"conv2d_92.tmp_0", {1, 96, 400, 400}}, + {"conv2d_91.tmp_0", {1, 96, 200, 200}}, + {"nearest_interp_v2_1.tmp_0", {1, 96, 200, 200}}, + {"nearest_interp_v2_2.tmp_0", {1, 96, 400, 400}}, + {"nearest_interp_v2_3.tmp_0", {1, 24, 400, 400}}, + {"nearest_interp_v2_4.tmp_0", {1, 24, 400, 400}}, + {"nearest_interp_v2_5.tmp_0", {1, 24, 400, 400}}, + {"elementwise_add_7", {1, 56, 400, 400}}, + {"nearest_interp_v2_0.tmp_0", {1, 96, 400, 400}}}; + std::map> opt_input_shape = { + {"x", {1, 3, 640, 640}}, + {"conv2d_92.tmp_0", {1, 96, 160, 160}}, + {"conv2d_91.tmp_0", {1, 96, 80, 80}}, + {"nearest_interp_v2_1.tmp_0", {1, 96, 80, 80}}, + {"nearest_interp_v2_2.tmp_0", {1, 96, 160, 160}}, + {"nearest_interp_v2_3.tmp_0", {1, 24, 160, 160}}, + {"nearest_interp_v2_4.tmp_0", {1, 24, 160, 160}}, + {"nearest_interp_v2_5.tmp_0", {1, 24, 160, 160}}, + {"elementwise_add_7", {1, 56, 40, 40}}, + {"nearest_interp_v2_0.tmp_0", {1, 96, 40, 40}}}; + + config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape, + opt_input_shape); } } else { config.DisableGpu(); @@ -48,7 +84,7 @@ void DBDetector::LoadModel(const std::string &model_dir) { config.SwitchIrOptim(true); config.EnableMemoryOptim(); - config.DisableGlogInfo(); + // config.DisableGlogInfo(); this->predictor_ = CreatePredictor(config); } diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp index 76873dad3c871a027c7fccd88409227639edefdf..b09282b0283743b530cd5477dbe9c5ff751de93c 100644 --- a/deploy/cpp_infer/src/ocr_rec.cpp +++ b/deploy/cpp_infer/src/ocr_rec.cpp @@ -25,8 +25,9 @@ void CRNNRecognizer::Run(std::vector>> boxes, std::cout << "The predicted text is :" << std::endl; int index = 0; - for (int i = boxes.size() - 1; i >= 0; i--) { + for (int i = 0; i < boxes.size(); i++) { crop_img = GetRotateCropImage(srcimg, boxes[i]); + if (cls != nullptr) { crop_img = cls->Run(crop_img); } @@ -105,6 +106,15 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { this->use_fp16_ ? paddle_infer::Config::Precision::kHalf : paddle_infer::Config::Precision::kFloat32, false, false); + std::map> min_input_shape = { + {"x", {1, 3, 32, 10}}}; + std::map> max_input_shape = { + {"x", {1, 3, 32, 2000}}}; + std::map> opt_input_shape = { + {"x", {1, 3, 32, 320}}}; + + config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape, + opt_input_shape); } } else { config.DisableGpu(); diff --git a/deploy/cpp_infer/src/preprocess_op.cpp b/deploy/cpp_infer/src/preprocess_op.cpp old mode 100755 new mode 100644 index fb7590e359da81e27c52c5a0037b93e19edb77df..28590e18520e6cc6f4e13260605366ec9bd93ed3 --- a/deploy/cpp_infer/src/preprocess_op.cpp +++ b/deploy/cpp_infer/src/preprocess_op.cpp @@ -77,19 +77,13 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img, int resize_h = int(float(h) * ratio); int resize_w = int(float(w) * ratio); - + resize_h = max(int(round(float(resize_h) / 32) * 32), 32); resize_w = max(int(round(float(resize_w) / 32) * 32), 32); - if (!use_tensorrt) { - cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); - ratio_h = float(resize_h) / float(h); - ratio_w = float(resize_w) / float(w); - } else { - cv::resize(img, resize_img, cv::Size(640, 640)); - ratio_h = float(640) / float(h); - ratio_w = float(640) / float(w); - } + cv::resize(img, resize_img, cv::Size(resize_w, resize_h)); + ratio_h = float(resize_h) / float(h); + ratio_w = float(resize_w) / float(w); } void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio, @@ -108,23 +102,12 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio, resize_w = imgW; else resize_w = int(ceilf(imgH * ratio)); - if (!use_tensorrt) { - cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, - cv::INTER_LINEAR); - cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, - int(imgW - resize_img.cols), cv::BORDER_CONSTANT, - {127, 127, 127}); - } else { - int k = int(img.cols * 32 / img.rows); - if (k >= 100) { - cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f, - cv::INTER_LINEAR); - } else { - cv::resize(img, resize_img, cv::Size(k, 32), 0.f, 0.f, cv::INTER_LINEAR); - cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, int(100 - k), - cv::BORDER_CONSTANT, {127, 127, 127}); - } - } + + cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, + cv::INTER_LINEAR); + cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, + int(imgW - resize_img.cols), cv::BORDER_CONSTANT, + {127, 127, 127}); } void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, @@ -142,15 +125,11 @@ void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, else resize_w = int(ceilf(imgH * ratio)); - if (!use_tensorrt) { - cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, - cv::INTER_LINEAR); - if (resize_w < imgW) { - cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w, - cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); - } - } else { - cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f, cv::INTER_LINEAR); + cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, + cv::INTER_LINEAR); + if (resize_w < imgW) { + cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w, + cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); } } diff --git a/deploy/cpp_infer/src/utility.cpp b/deploy/cpp_infer/src/utility.cpp index c1c9d9382a06432daca71eb7b08acb8b19b8ee98..2cd84f7e8dbdd8144b5337f55b3f3a62ed43d5b3 100644 --- a/deploy/cpp_infer/src/utility.cpp +++ b/deploy/cpp_infer/src/utility.cpp @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include #include #include +#include +#include #include -#include - namespace PaddleOCR { std::vector Utility::ReadDict(const std::string &path) { @@ -57,4 +59,37 @@ void Utility::VisualizeBboxes( << std::endl; } +// list all files under a directory +void Utility::GetAllFiles(const char *dir_name, + std::vector &all_inputs) { + if (NULL == dir_name) { + std::cout << " dir_name is null ! " << std::endl; + return; + } + struct stat s; + lstat(dir_name, &s); + if (!S_ISDIR(s.st_mode)) { + std::cout << "dir_name is not a valid directory !" << std::endl; + all_inputs.push_back(dir_name); + return; + } else { + struct dirent *filename; // return value for readdir() + DIR *dir; // return value for opendir() + dir = opendir(dir_name); + if (NULL == dir) { + std::cout << "Can not open dir " << dir_name << std::endl; + return; + } + std::cout << "Successfully opened the dir !" << std::endl; + while ((filename = readdir(dir)) != NULL) { + if (strcmp(filename->d_name, ".") == 0 || + strcmp(filename->d_name, "..") == 0) + continue; + // img_dir + std::string("/") + all_inputs[0]; + all_inputs.push_back(dir_name + std::string("/") + + std::string(filename->d_name)); + } + } +} + } // namespace PaddleOCR \ No newline at end of file diff --git a/deploy/cpp_infer/tools/build.sh b/deploy/cpp_infer/tools/build.sh index 606539487fce82adf817e7a3ee300e3bf890643b..79611300584755e531e6a2f645ab1a9420d3c5ad 100755 --- a/deploy/cpp_infer/tools/build.sh +++ b/deploy/cpp_infer/tools/build.sh @@ -12,9 +12,10 @@ cmake .. \ -DWITH_MKL=ON \ -DWITH_GPU=OFF \ -DWITH_STATIC_LIB=OFF \ - -DUSE_TENSORRT=OFF \ + -DWITH_TENSORRT=OFF \ -DOPENCV_DIR=${OPENCV_DIR} \ -DCUDNN_LIB=${CUDNN_LIB_DIR} \ -DCUDA_LIB=${CUDA_LIB_DIR} \ + -DTENSORRT_DIR=${TENSORRT_DIR} \ make -j diff --git a/deploy/cpp_infer/tools/config.txt b/deploy/cpp_infer/tools/config.txt index 0e5f8472ab62f5fc646738bc2974736a0564b343..d4d66d65225bc9d1d4d62f45550db71fb5d8414e 100644 --- a/deploy/cpp_infer/tools/config.txt +++ b/deploy/cpp_infer/tools/config.txt @@ -20,10 +20,10 @@ cls_thresh 0.9 # rec config rec_model_dir ./inference/ch_ppocr_mobile_v2.0_rec_infer/ -char_list_file ../../ppocr/utils/ppocr_keys_v1.txt +char_list_file ../../ppocr/utils/ppocr_keys_v1.txt # show the detection results -visualize 1 +visualize 0 # use_tensorrt use_tensorrt 0 diff --git a/deploy/lite/readme_en.md b/deploy/lite/readme_en.md index ec36de35b8882d1b81d8e2e2f9aa5f68d510d28d..2c04c5673b1d7afec99d4e1219a20462c55be203 100644 --- a/deploy/lite/readme_en.md +++ b/deploy/lite/readme_en.md @@ -6,7 +6,7 @@ paddle-lite is a lightweight inference engine for PaddlePaddle. It provides effi ## 1. Preparation -### 运行准备 +### Preparation environment - Computer (for Compiling Paddle Lite) - Mobile phone (arm7 or arm8) @@ -87,8 +87,8 @@ The following table also provides a series of models that can be deployed on mob |Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch| |---|---|---|---|---|---|---| -|V2.0|extra-lightweight chinese OCR optimized model|7.8M|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb)|[download lin](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[download lin](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_opt.nb)|v2.9| -|V2.0(slim)|extra-lightweight chinese OCR optimized model|3.3M|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_slim_opt.nb)|v2.9| +|V2.0|extra-lightweight chinese OCR optimized model|7.8M|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_opt.nb)|v2.9| +|V2.0(slim)|extra-lightweight chinese OCR optimized model|3.3M|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_rec_slim_opt.nb)|v2.9| If you directly use the model in the above table for deployment, you can skip the following steps and directly read [Section 2.2](#2.2 Run optimized model on Phone). diff --git a/doc/doc_ch/whl.md b/doc/doc_ch/whl.md index 2e93c487c2f2071c7c89c753cf86eef61ce20805..c341b49a7b12aa10f0f3187bc861306fcae05c29 100644 --- a/doc/doc_ch/whl.md +++ b/doc/doc_ch/whl.md @@ -59,7 +59,7 @@ im_show.save('result.jpg') from paddleocr import PaddleOCR, draw_ocr ocr = PaddleOCR() # need to run only once to download and load model into memory img_path = 'PaddleOCR/doc/imgs/11.jpg' -result = ocr.ocr(img_path) +result = ocr.ocr(img_path,cls=False) for line in result: print(line) diff --git a/doc/doc_en/distributed_training.md b/doc/doc_en/distributed_training.md new file mode 100644 index 0000000000000000000000000000000000000000..7a8b71ce308837568c84bf56292f78e9979d3907 --- /dev/null +++ b/doc/doc_en/distributed_training.md @@ -0,0 +1,50 @@ +# Distributed training + +## Introduction + +The high performance of distributed training is one of the core advantages of PaddlePaddle. In the classification task, distributed training can achieve almost linear speedup ratio. Generally, OCR training task need massive training data. Such as recognition, ppocrv2.0 model is trained based on 1800W dataset, which is very time-consuming if using single machine. Therefore, the distributed training is used in paddleocr to speedup the training task. For more information about distributed training, please refer to [distributed training quick start tutorial](https://fleet-x.readthedocs.io/en/latest/paddle_fleet_rst/parameter_server/ps_quick_start.html). + +## Quick Start + +### Training with single machine + +Take recognition as an example. After the data is prepared locally, start the training task with the interface of `paddle.distributed.launch`. The start command as follows: + +```shell +python3 -m paddle.distributed.launch \ + --log_dir=./log/ \ + --gpus '0,1,2,3,4,5,6,7' \ + tools/train.py \ + -c configs/rec/rec_mv3_none_bilstm_ctc.yml +``` + +### Training with multi machine + +Compared with single machine, training with multi machine only needs to add the parameter `--ips` to start command, which represents the IP list of machines used for distributed training, and the IP of different machines are separated by commas. The start command as follows: + +```shell +ip_list="192.168.0.1,192.168.0.2" +python3 -m paddle.distributed.launch \ + --log_dir=./log/ \ + --ips="${ip_list}" \ + --gpus="0,1,2,3,4,5,6,7" \ + tools/train.py \ + -c configs/rec/rec_mv3_none_bilstm_ctc.yml +``` + +**Notice:** +* The IP addresses of different machines need to be separated by commas, which can be queried through `ifconfig` or `ipconfig`. +* Different machines need to be set to be secret free and can `ping` success with others directly, otherwise communication cannot establish between them. +* The code, data and start command betweent different machines must be completely consistent and then all machines need to run start command. The first machine in the `ip_list` is set to `trainer0`, and so on. + + +## Performance comparison + +* Based on 26W public recognition dataset (LSVT, rctw, mtwi), training on single 8-card P40 and dual 8-card P40, the final time consumption is as follows. + +| Model | Config file | Number of machines | Number of GPUs per machine | Training time | Recognition acc | Speedup ratio | +| :-------: | :------------: | :----------------: | :----------------------------: | :------------------: | :--------------: | :-----------: | +| CRNN | configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml | 1 | 8 | 60h | 66.7% | - | +| CRNN | configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml | 2 | 8 | 40h | 67.0% | 150% | + +It can be seen that the training time is shortened from 60h to 40h, the speedup ratio can reach 150% (60h / 40h), and the efficiency is 75% (60h / (40h * 2)). diff --git a/doc/doc_en/models_list_en.md b/doc/doc_en/models_list_en.md index 5e5a6cfab0383c10414b7ae8cd77324f19848b97..9bee4aef5121b1964a9bdbdeeaad4e81dd9ff6d4 100644 --- a/doc/doc_en/models_list_en.md +++ b/doc/doc_en/models_list_en.md @@ -103,14 +103,14 @@ python3 generate_multi_language_configs.py -l it \ | german_mobile_v2.0_rec | ppocr/utils/dict/german_dict.txt | Lightweight model for German recognition|[rec_german_lite_train.yml](../../configs/rec/multi_language/rec_german_lite_train.yml)|2.65M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_train.tar) | | korean_mobile_v2.0_rec | ppocr/utils/dict/korean_dict.txt | Lightweight model for Korean recognition|[rec_korean_lite_train.yml](../../configs/rec/multi_language/rec_korean_lite_train.yml)|3.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_train.tar) | | japan_mobile_v2.0_rec | ppocr/utils/dict/japan_dict.txt | Lightweight model for Japanese recognition|[rec_japan_lite_train.yml](../../configs/rec/multi_language/rec_japan_lite_train.yml)|4.23M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_train.tar) | -| chinese_cht_mobile_v2.0_rec | ppocr/utils/dict/chinese_cht_dict.txt | Lightweight model for chinese cht recognition|rec_chinese_cht_lite_train.yml|5.63M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) | +| chinese_cht_mobile_v2.0_rec | ppocr/utils/dict/chinese_cht_dict.txt | Lightweight model for chinese cht recognition|rec_chinese_cht_lite_train.yml|5.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_train.tar) | | te_mobile_v2.0_rec | ppocr/utils/dict/te_dict.txt | Lightweight model for Telugu recognition|rec_te_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_train.tar) | | ka_mobile_v2.0_rec | ppocr/utils/dict/ka_dict.txt | Lightweight model for Kannada recognition|rec_ka_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_train.tar) | | ta_mobile_v2.0_rec | ppocr/utils/dict/ta_dict.txt | Lightweight model for Tamil recognition|rec_ta_lite_train.yml|2.63M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_train.tar) | -| latin_mobile_v2.0_rec | ppocr/utils/dict/latin_dict.txt | Lightweight model for latin recognition | [rec_latin_lite_train.yml](../../configs/rec/multi_language/rec_latin_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_train.tar) | -| arabic_mobile_v2.0_rec | ppocr/utils/dict/arabic_dict.txt | Lightweight model for arabic recognition | [rec_arabic_lite_train.yml](../../configs/rec/multi_language/rec_arabic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_train.tar) | -| cyrillic_mobile_v2.0_rec | ppocr/utils/dict/cyrillic_dict.txt | Lightweight model for cyrillic recognition | [rec_cyrillic_lite_train.yml](../../configs/rec/multi_language/rec_cyrillic_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_train.tar) | -| devanagari_mobile_v2.0_rec | ppocr/utils/dict/devanagari_dict.txt | Lightweight model for devanagari recognition | [rec_devanagari_lite_train.yml](../../configs/rec/multi_language/rec_devanagari_lite_train.yml) |2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_train.tar) | +| latin_mobile_v2.0_rec | ppocr/utils/dict/latin_dict.txt | Lightweight model for latin recognition | [rec_latin_lite_train.yml](../../configs/rec/multi_language/rec_latin_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_train.tar) | +| arabic_mobile_v2.0_rec | ppocr/utils/dict/arabic_dict.txt | Lightweight model for arabic recognition | [rec_arabic_lite_train.yml](../../configs/rec/multi_language/rec_arabic_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_train.tar) | +| cyrillic_mobile_v2.0_rec | ppocr/utils/dict/cyrillic_dict.txt | Lightweight model for cyrillic recognition | [rec_cyrillic_lite_train.yml](../../configs/rec/multi_language/rec_cyrillic_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_train.tar) | +| devanagari_mobile_v2.0_rec | ppocr/utils/dict/devanagari_dict.txt | Lightweight model for devanagari recognition | [rec_devanagari_lite_train.yml](../../configs/rec/multi_language/rec_devanagari_lite_train.yml) |2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_train.tar) | For more supported languages, please refer to : [Multi-language model](./multi_languages_en.md) diff --git a/doc/doc_en/whl_en.md b/doc/doc_en/whl_en.md index 69abf085556f466853798077bb116b3986582bcc..eeaf1347dc77a24f158ba8ba2c6f013b1fd89b81 100644 --- a/doc/doc_en/whl_en.md +++ b/doc/doc_en/whl_en.md @@ -59,7 +59,7 @@ Visualization of results from paddleocr import PaddleOCR,draw_ocr ocr = PaddleOCR(lang='en') # need to run only once to download and load model into memory img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg' -result = ocr.ocr(img_path) +result = ocr.ocr(img_path, cls=False) for line in result: print(line) diff --git a/doc/joinus.PNG b/doc/joinus.PNG index 571232b4734ca52d74dcd346b1c981470663e8f8..6e299a2ebe0eb52aa799ba9fa924bd685cd248de 100644 Binary files a/doc/joinus.PNG and b/doc/joinus.PNG differ diff --git a/paddleocr.py b/paddleocr.py index c5da7248d2cc7d778758a87309cfeaedcbd8ceb5..1e4d94ff4e72da951e1ffb92edb50715482581ae 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -30,7 +30,7 @@ from ppocr.utils.logging import get_logger logger = get_logger() from ppocr.utils.utility import check_and_read_gif, get_image_file_list -from tools.infer.utility import draw_ocr +from tools.infer.utility import draw_ocr, init_args, str2bool __all__ = ['PaddleOCR'] @@ -167,106 +167,24 @@ def maybe_download(model_storage_directory, url): os.remove(tmp_path) -def parse_args(mMain=True, add_help=True): +def parse_args(mMain=True): import argparse - - def str2bool(v): - return v.lower() in ("true", "t", "1") - + parser = init_args() + parser.add_help = mMain + parser.add_argument("--lang", type=str, default='ch') + parser.add_argument("--det", type=str2bool, default=True) + parser.add_argument("--rec", type=str2bool, default=True) + + for action in parser._actions: + if action.dest == 'rec_char_dict_path': + action.default = None if mMain: - parser = argparse.ArgumentParser(add_help=add_help) - # params for prediction engine - parser.add_argument("--use_gpu", type=str2bool, default=True) - parser.add_argument("--ir_optim", type=str2bool, default=True) - parser.add_argument("--use_tensorrt", type=str2bool, default=False) - parser.add_argument("--gpu_mem", type=int, default=8000) - - # params for text detector - parser.add_argument("--image_dir", type=str) - parser.add_argument("--det_algorithm", type=str, default='DB') - parser.add_argument("--det_model_dir", type=str, default=None) - parser.add_argument("--det_limit_side_len", type=float, default=960) - parser.add_argument("--det_limit_type", type=str, default='max') - - # DB parmas - parser.add_argument("--det_db_thresh", type=float, default=0.3) - parser.add_argument("--det_db_box_thresh", type=float, default=0.5) - parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6) - parser.add_argument("--use_dilation", type=bool, default=False) - parser.add_argument("--det_db_score_mode", type=str, default="fast") - - # EAST parmas - parser.add_argument("--det_east_score_thresh", type=float, default=0.8) - parser.add_argument("--det_east_cover_thresh", type=float, default=0.1) - parser.add_argument("--det_east_nms_thresh", type=float, default=0.2) - - # params for text recognizer - parser.add_argument("--rec_algorithm", type=str, default='CRNN') - parser.add_argument("--rec_model_dir", type=str, default=None) - parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320") - parser.add_argument("--rec_char_type", type=str, default='ch') - parser.add_argument("--rec_batch_num", type=int, default=6) - parser.add_argument("--max_text_length", type=int, default=25) - parser.add_argument("--rec_char_dict_path", type=str, default=None) - parser.add_argument("--use_space_char", type=bool, default=True) - parser.add_argument("--drop_score", type=float, default=0.5) - - # params for text classifier - parser.add_argument("--cls_model_dir", type=str, default=None) - parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192") - parser.add_argument("--label_list", type=list, default=['0', '180']) - parser.add_argument("--cls_batch_num", type=int, default=6) - parser.add_argument("--cls_thresh", type=float, default=0.9) - - parser.add_argument("--enable_mkldnn", type=bool, default=False) - parser.add_argument("--use_zero_copy_run", type=bool, default=False) - parser.add_argument("--use_pdserving", type=str2bool, default=False) - - parser.add_argument("--lang", type=str, default='ch') - parser.add_argument("--det", type=str2bool, default=True) - parser.add_argument("--rec", type=str2bool, default=True) - parser.add_argument("--use_angle_cls", type=str2bool, default=False) return parser.parse_args() else: - return argparse.Namespace( - use_gpu=True, - ir_optim=True, - use_tensorrt=False, - gpu_mem=8000, - image_dir='', - det_algorithm='DB', - det_model_dir=None, - det_limit_side_len=960, - det_limit_type='max', - det_db_thresh=0.3, - det_db_box_thresh=0.5, - det_db_unclip_ratio=1.6, - use_dilation=False, - det_db_score_mode="fast", - det_east_score_thresh=0.8, - det_east_cover_thresh=0.1, - det_east_nms_thresh=0.2, - rec_algorithm='CRNN', - rec_model_dir=None, - rec_image_shape="3, 32, 320", - rec_char_type='ch', - rec_batch_num=6, - max_text_length=25, - rec_char_dict_path=None, - use_space_char=True, - drop_score=0.5, - cls_model_dir=None, - cls_image_shape="3, 48, 192", - label_list=['0', '180'], - cls_batch_num=6, - cls_thresh=0.9, - enable_mkldnn=False, - use_zero_copy_run=False, - use_pdserving=False, - lang='ch', - det=True, - rec=True, - use_angle_cls=False) + inference_args_dict = {} + for action in parser._actions: + inference_args_dict[action.dest] = action.default + return argparse.Namespace(**inference_args_dict) class PaddleOCR(predict_system.TextSystem): @@ -276,7 +194,7 @@ class PaddleOCR(predict_system.TextSystem): args: **kwargs: other params show in paddleocr --help """ - postprocess_params = parse_args(mMain=False, add_help=False) + postprocess_params = parse_args(mMain=False) postprocess_params.__dict__.update(**kwargs) self.use_angle_cls = postprocess_params.use_angle_cls lang = postprocess_params.lang @@ -346,7 +264,7 @@ class PaddleOCR(predict_system.TextSystem): # init det_model and rec_model super().__init__(postprocess_params) - def ocr(self, img, det=True, rec=True, cls=False): + def ocr(self, img, det=True, rec=True, cls=True): """ ocr with paddleocr args: @@ -358,9 +276,7 @@ class PaddleOCR(predict_system.TextSystem): if isinstance(img, list) and det == True: logger.error('When input a list of images, det must be false') exit(0) - if cls == False: - self.use_angle_cls = False - elif cls == True and self.use_angle_cls == False: + if cls == True and self.use_angle_cls == False: logger.warning( 'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process' ) @@ -382,7 +298,7 @@ class PaddleOCR(predict_system.TextSystem): if isinstance(img, np.ndarray) and len(img.shape) == 2: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) if det and rec: - dt_boxes, rec_res = self.__call__(img) + dt_boxes, rec_res = self.__call__(img, cls) return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)] elif det and not rec: dt_boxes, elapse = self.text_detector(img) @@ -392,7 +308,7 @@ class PaddleOCR(predict_system.TextSystem): else: if not isinstance(img, list): img = [img] - if self.use_angle_cls: + if self.use_angle_cls and cls: img, cls_res, elapse = self.text_classifier(img) if not rec: return cls_res diff --git a/ppstructure/README.md b/ppstructure/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ppstructure/README_ch.md b/ppstructure/README_ch.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ppstructure/layout/README.md b/ppstructure/layout/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ppstructure/layout/README_ch.md b/ppstructure/layout/README_ch.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ppstructure/predict_system.py b/ppstructure/predict_system.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ppstructure/table/README.md b/ppstructure/table/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ppstructure/table/README_ch.md b/ppstructure/table/README_ch.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py index 391779e6641657a81b9fffbcee1018e4f85023cd..8f1b8f4812a6ae47a2456b7da9d456befa374183 100755 --- a/tools/infer/predict_system.py +++ b/tools/infer/predict_system.py @@ -84,7 +84,7 @@ class TextSystem(object): cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno]) logger.info(bno, rec_res[bno]) - def __call__(self, img): + def __call__(self, img, cls=True): ori_im = img.copy() dt_boxes, elapse = self.text_detector(img) @@ -98,7 +98,7 @@ class TextSystem(object): tmp_box = copy.deepcopy(dt_boxes[bno]) img_crop = self.get_rotate_crop_image(ori_im, tmp_box) img_crop_list.append(img_crop) - if self.use_angle_cls: + if self.use_angle_cls and cls: img_crop_list, angle_list, elapse = self.text_classifier( img_crop_list) diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 5de4e69a787f23f4205ab9d35dccd3f45f350d44..20fcfe4bc7475822a5e382f8908b289b614a9fe1 100755 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -23,13 +23,15 @@ import math from paddle import inference import time from ppocr.utils.logging import get_logger + logger = get_logger() -def parse_args(): - def str2bool(v): - return v.lower() in ("true", "t", "1") +def str2bool(v): + return v.lower() in ("true", "t", "1") + +def init_args(): parser = argparse.ArgumentParser() # params for prediction engine parser.add_argument("--use_gpu", type=str2bool, default=True) @@ -110,6 +112,12 @@ def parse_args(): parser.add_argument("--benchmark", type=bool, default=False) parser.add_argument("--save_log_path", type=str, default="./log_output/") + + return parser + + +def parse_args(): + parser = init_args() return parser.parse_args() @@ -221,22 +229,97 @@ def create_predictor(args, mode, logger): config.enable_use_gpu(args.gpu_mem, 0) if args.use_tensorrt: config.enable_tensorrt_engine( - precision_mode=inference.PrecisionType.Half - if args.use_fp16 else inference.PrecisionType.Float32, - max_batch_size=args.max_batch_size) + precision_mode=inference.PrecisionType.Float32, + max_batch_size=args.max_batch_size, + min_subgraph_size=3) # skip the minmum trt subgraph + if mode == "det" and "mobile" in model_file_path: + min_input_shape = { + "x": [1, 3, 50, 50], + "conv2d_92.tmp_0": [1, 96, 20, 20], + "conv2d_91.tmp_0": [1, 96, 10, 10], + "nearest_interp_v2_1.tmp_0": [1, 96, 10, 10], + "nearest_interp_v2_2.tmp_0": [1, 96, 20, 20], + "nearest_interp_v2_3.tmp_0": [1, 24, 20, 20], + "nearest_interp_v2_4.tmp_0": [1, 24, 20, 20], + "nearest_interp_v2_5.tmp_0": [1, 24, 20, 20], + "elementwise_add_7": [1, 56, 2, 2], + "nearest_interp_v2_0.tmp_0": [1, 96, 2, 2] + } + max_input_shape = { + "x": [1, 3, 2000, 2000], + "conv2d_92.tmp_0": [1, 96, 400, 400], + "conv2d_91.tmp_0": [1, 96, 200, 200], + "nearest_interp_v2_1.tmp_0": [1, 96, 200, 200], + "nearest_interp_v2_2.tmp_0": [1, 96, 400, 400], + "nearest_interp_v2_3.tmp_0": [1, 24, 400, 400], + "nearest_interp_v2_4.tmp_0": [1, 24, 400, 400], + "nearest_interp_v2_5.tmp_0": [1, 24, 400, 400], + "elementwise_add_7": [1, 56, 400, 400], + "nearest_interp_v2_0.tmp_0": [1, 96, 400, 400] + } + opt_input_shape = { + "x": [1, 3, 640, 640], + "conv2d_92.tmp_0": [1, 96, 160, 160], + "conv2d_91.tmp_0": [1, 96, 80, 80], + "nearest_interp_v2_1.tmp_0": [1, 96, 80, 80], + "nearest_interp_v2_2.tmp_0": [1, 96, 160, 160], + "nearest_interp_v2_3.tmp_0": [1, 24, 160, 160], + "nearest_interp_v2_4.tmp_0": [1, 24, 160, 160], + "nearest_interp_v2_5.tmp_0": [1, 24, 160, 160], + "elementwise_add_7": [1, 56, 40, 40], + "nearest_interp_v2_0.tmp_0": [1, 96, 40, 40] + } + if mode == "det" and "server" in model_file_path: + min_input_shape = { + "x": [1, 3, 50, 50], + "conv2d_59.tmp_0": [1, 96, 20, 20], + "nearest_interp_v2_2.tmp_0": [1, 96, 20, 20], + "nearest_interp_v2_3.tmp_0": [1, 24, 20, 20], + "nearest_interp_v2_4.tmp_0": [1, 24, 20, 20], + "nearest_interp_v2_5.tmp_0": [1, 24, 20, 20] + } + max_input_shape = { + "x": [1, 3, 2000, 2000], + "conv2d_59.tmp_0": [1, 96, 400, 400], + "nearest_interp_v2_2.tmp_0": [1, 96, 400, 400], + "nearest_interp_v2_3.tmp_0": [1, 24, 400, 400], + "nearest_interp_v2_4.tmp_0": [1, 24, 400, 400], + "nearest_interp_v2_5.tmp_0": [1, 24, 400, 400] + } + opt_input_shape = { + "x": [1, 3, 640, 640], + "conv2d_59.tmp_0": [1, 96, 160, 160], + "nearest_interp_v2_2.tmp_0": [1, 96, 160, 160], + "nearest_interp_v2_3.tmp_0": [1, 24, 160, 160], + "nearest_interp_v2_4.tmp_0": [1, 24, 160, 160], + "nearest_interp_v2_5.tmp_0": [1, 24, 160, 160] + } + elif mode == "rec": + min_input_shape = {"x": [args.rec_batch_num, 3, 32, 10]} + max_input_shape = {"x": [args.rec_batch_num, 3, 32, 2000]} + opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]} + elif mode == "cls": + min_input_shape = {"x": [args.rec_batch_num, 3, 48, 10]} + max_input_shape = {"x": [args.rec_batch_num, 3, 48, 2000]} + opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]} + else: + min_input_shape = {"x": [1, 3, 10, 10]} + max_input_shape = {"x": [1, 3, 1000, 1000]} + opt_input_shape = {"x": [1, 3, 500, 500]} + config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, + opt_input_shape) + else: config.disable_gpu() if hasattr(args, "cpu_threads"): config.set_cpu_math_library_num_threads(args.cpu_threads) else: + # default cpu threads as 10 config.set_cpu_math_library_num_threads(10) if args.enable_mkldnn: # cache 10 different shapes for mkldnn to avoid memory leak config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() - # TODO LDOUBLEV: fix mkldnn bug when bach_size > 1 - #config.set_mkldnn_op({'conv2d', 'depthwise_conv2d', 'pool2d', 'batch_norm'}) - args.rec_batch_num = 1 # enable memory optim config.enable_memory_optim() @@ -299,7 +382,7 @@ def draw_ocr(image, txts=None, scores=None, drop_score=0.5, - font_path="./doc/simfang.ttf"): + font_path="./doc/fonts/simfang.ttf"): """ Visualize the results of OCR detection and recognition args: @@ -532,22 +615,4 @@ def get_current_memory_mb(gpu_id=None): if __name__ == '__main__': - test_img = "./doc/test_v2" - predict_txt = "./doc/predict.txt" - f = open(predict_txt, 'r') - data = f.readlines() - img_path, anno = data[0].strip().split('\t') - img_name = os.path.basename(img_path) - img_path = os.path.join(test_img, img_name) - image = Image.open(img_path) - - data = json.loads(anno) - boxes, txts, scores = [], [], [] - for dic in data: - boxes.append(dic['points']) - txts.append(dic['transcription']) - scores.append(round(dic['scores'], 3)) - - new_img = draw_ocr(image, boxes, txts, scores) - - cv2.imwrite(img_name, new_img) + pass