Merge pull request #7960 from andyjpaddle/fix_vl

[cp]7957

Merge pull request #7960 from andyjpaddle/fix_vl
[cp]7957
ef9e8702 · andyj · GitHub · 4d3cd67c · bbc5048e · ef9e8702
4 changed file
--- a/doc/doc_ch/algorithm_rec_visionlan.md
+++ b/doc/doc_ch/algorithm_rec_visionlan.md
@@ -139,7 +139,7 @@ Predicts of ./doc/imgs_words/en/word_2.png:('yourself', 0.9999493)
 ## 5. FAQ
 1. MJSynth和SynthText两种数据集来自于[VisionLAN源repo](https://github.com/wangyuxin87/VisionLAN) 。
-2. 我们使用VisionLAN作者提供的预训练模型进行finetune训练。
+2. 我们使用VisionLAN作者提供的预训练模型进行finetune训练，预训练模型配套字典为'ppocr/utils/ic15_dict.txt'。
 ## 引用

--- a/doc/doc_en/algorithm_rec_visionlan_en.md
+++ b/doc/doc_en/algorithm_rec_visionlan_en.md
@@ -120,7 +120,7 @@ Not supported
 ## 5. FAQ
 1. Note that the MJSynth and SynthText datasets come from [VisionLAN repo](https://github.com/wangyuxin87/VisionLAN).
-2. We use the pre-trained model provided by the VisionLAN authors for finetune training.
+2. We use the pre-trained model provided by the VisionLAN authors for finetune training. The dictionary for the pre-trained model is 'ppocr/utils/ic15_dict.txt'.
 ## Citation

--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -107,6 +107,7 @@ class BaseRecLabelEncode(object):
        self.beg_str = "sos"
        self.end_str = "eos"
        self.lower = lower
+        self.use_default_dict = False
        if character_dict_path is None:
            logger = get_logger()
@@ -116,8 +117,11 @@ class BaseRecLabelEncode(object):
            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
            dict_character = list(self.character_str)
            self.lower = True
+            self.use_default_dict = True
        else:
            self.character_str = []
+            if 'ppocr/utils/ic15_dict.txt' in character_dict_path:
+                self.use_default_dict = True
            with open(character_dict_path, "rb") as fin:
                lines = fin.readlines()
                for line in lines:
@@ -1344,8 +1348,9 @@ class VLLabelEncode(BaseRecLabelEncode):
                 **kwargs):
        super(VLLabelEncode, self).__init__(
            max_text_length, character_dict_path, use_space_char, lower)
-        self.character = self.character[10:] + self.character[
+        if self.use_default_dict:
-            1:10] + [self.character[0]]
+            self.character = self.character[10:] + self.character[
+                1:10] + [self.character[0]]
        self.dict = {}
        for i, char in enumerate(self.character):
            self.dict[char] = i

--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -26,10 +26,15 @@ class BaseRecLabelDecode(object):
        self.end_str = "eos"
        self.reverse = False
        self.character_str = []
+        self.use_default_dict = False
        if character_dict_path is None:
            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
            dict_character = list(self.character_str)
+            self.use_default_dict = True
        else:
+            if 'ppocr/utils/ic15_dict.txt' in character_dict_path:
+                self.use_default_dict = True
            with open(character_dict_path, "rb") as fin:
                lines = fin.readlines()
                for line in lines:
@@ -720,8 +725,9 @@ class VLLabelDecode(BaseRecLabelDecode):
        super(VLLabelDecode, self).__init__(character_dict_path, use_space_char)
        self.max_text_length = kwargs.get('max_text_length', 25)
        self.nclass = len(self.character) + 1
-        self.character = self.character[10:] + self.character[
+        if self.use_default_dict:
-            1:10] + [self.character[0]]
+            self.character = self.character[10:] + self.character[
+                1:10] + [self.character[0]]
    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
        """ convert text-index into text-label. """