Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleOCR into fixocr

afecc497 · LDOUBLEV · 75d2c47d · c63624b3 · afecc497 · afecc497
5 changed file
--- a/ppocr/data/det/dataset_traversal.py
+++ b/ppocr/data/det/dataset_traversal.py
@@ -13,6 +13,7 @@
 #limitations under the License.
 import os
+import sys
 import math
 import random
 import functools
@@ -42,6 +43,10 @@ class TrainReader(object):
            img_num = len(label_infor_list)
            img_id_list = list(range(img_num))
            random.shuffle(img_id_list)
+            if sys.platform == "win32":
+                print("multiprocess is not fully compatible with Windows."
+                      "num_workers will be 1.")
+                self.num_workers = 1
            for img_id in range(process_id, img_num, self.num_workers):
                label_infor = label_infor_list[img_id_list[img_id]]
                outs = self.process(label_infor)

--- a/ppocr/data/reader_main.py
+++ b/ppocr/data/reader_main.py
@@ -66,6 +66,8 @@ def reader_main(config=None, mode=None):
    reader_function = params['reader_function']
    function = create_module(reader_function)(params)
    if mode == "train":
+        if sys.platform == "win32":
+            return function(0)
        readers = []
        num_workers = params['num_workers']
        for process_id in range(num_workers):

--- a/ppocr/data/rec/dataset_traversal.py
+++ b/ppocr/data/rec/dataset_traversal.py
@@ -13,6 +13,7 @@
 #limitations under the License.
 import os
+import sys
 import math
 import random
 import numpy as np
@@ -191,16 +192,21 @@ class SimpleReader(object):
                img_num = len(label_infor_list)
                img_id_list = list(range(img_num))
                random.shuffle(img_id_list)
+                if sys.platform=="win32":
+                    print("multiprocess is not fully compatible with Windows."
+                          "num_workers will be 1.")
+                    self.num_workers = 1
                for img_id in range(process_id, img_num, self.num_workers):
                    label_infor = label_infor_list[img_id_list[img_id]]
                    substr = label_infor.decode('utf-8').strip("\n").split("\t")
                    img_path = self.img_set_dir + "/" + substr[0]
                    img = cv2.imread(img_path)
-                    if img.shape[-1]==1 or len(list(img.shape))==2:
-                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
                    if img is None:
                        logger.info("{} does not exist!".format(img_path))
                        continue
+                    if img.shape[-1]==1 or len(list(img.shape))==2:
+                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
                    label = substr[1]
                    outs = process_image(img, self.image_shape, label,
                                         self.char_ops, self.loss_type,

--- a/ppocr/utils/character.py
+++ b/ppocr/utils/character.py
@@ -34,7 +34,7 @@ class CharacterOps(object):
            with open(character_dict_path, "rb") as fin:
                lines = fin.readlines()
                for line in lines:
-                    line = line.decode('utf-8').strip("\n")
+                    line = line.decode('utf-8').strip("\n").strip("\r\n")
                    self.character_str += line
            dict_character = list(self.character_str)
        elif self.character_type == "en_sensitive":

--- a/tools/eval_utils/eval_rec_utils.py
+++ b/tools/eval_utils/eval_rec_utils.py
@@ -48,7 +48,7 @@ def eval_rec_run(exe, config, eval_info_dict, mode):
    total_sample_num = 0
    total_acc_num = 0
    total_batch_num = 0
-    if mode == "eval":
+    if mode == "test":
        is_remove_duplicate = False
    else:
        is_remove_duplicate = True
@@ -91,11 +91,11 @@ def test_rec_benchmark(exe, config, eval_info_dict):
    total_correct_number = 0
    eval_data_acc_info = {}
    for eval_data in eval_data_list:
-        config['TestReader']['lmdb_sets_dir'] = \
+        config['EvalReader']['lmdb_sets_dir'] = \
            eval_data_dir + "/" + eval_data
-        eval_reader = reader_main(config=config, mode="test")
+        eval_reader = reader_main(config=config, mode="eval")
        eval_info_dict['reader'] = eval_reader
-        metrics = eval_rec_run(exe, config, eval_info_dict, "test")
+        metrics = eval_rec_run(exe, config, eval_info_dict, "eval")
        total_evaluation_data_number += metrics['total_sample_num']
        total_correct_number += metrics['total_acc_num']
        eval_data_acc_info[eval_data] = metrics