提交 edeb12b1 编写于 作者: T tink2123

rename en_sensitive EN_symbol

上级 d9ae86f4
Global: Global:
use_gpu: True use_gpu: False
epoch_num: 500 epoch_num: 500
log_smooth_window: 20 log_smooth_window: 20
print_batch_step: 10 print_batch_step: 10
...@@ -16,7 +16,7 @@ Global: ...@@ -16,7 +16,7 @@ Global:
infer_img: infer_img:
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict/en_dict.txt character_dict_path: ppocr/utils/dict/en_dict.txt
character_type: En character_type: EN
max_text_length: 25 max_text_length: 25
infer_mode: False infer_mode: False
use_space_char: False use_space_char: False
...@@ -63,8 +63,8 @@ Metric: ...@@ -63,8 +63,8 @@ Metric:
Train: Train:
dataset: dataset:
name: SimpleDataSet name: SimpleDataSet
data_dir: ./train_data/ data_dir: ./train_data/ic15_data/
label_file_list: ["./train_data/train_list.txt"] label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
transforms: transforms:
- DecodeImage: # load image - DecodeImage: # load image
img_mode: BGR img_mode: BGR
...@@ -77,15 +77,15 @@ Train: ...@@ -77,15 +77,15 @@ Train:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader: loader:
shuffle: True shuffle: True
batch_size_per_card: 256 batch_size_per_card: 1
drop_last: True drop_last: True
num_workers: 8 num_workers: 1
Eval: Eval:
dataset: dataset:
name: SimpleDataSet name: SimpleDataSet
data_dir: ./train_data/ data_dir: ./train_data/ic15_data/
label_file_list: ["./train_data/eval_list.txt"] label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
transforms: transforms:
- DecodeImage: # load image - DecodeImage: # load image
img_mode: BGR img_mode: BGR
......
...@@ -348,7 +348,7 @@ PaddleOCR目前已支持26种(除中文外)语种识别,`configs/rec/multi ...@@ -348,7 +348,7 @@ PaddleOCR目前已支持26种(除中文外)语种识别,`configs/rec/multi
| 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type | | 配置文件 | 算法名称 | backbone | trans | seq | pred | language | character_type |
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | | :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: |
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht| | rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 中文繁体 | chinese_cht|
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语 | En | | rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 英语(区分大小写) | EN |
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french | | rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 法语 | french |
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german | | rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 德语 | german |
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan | | rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | 日语 | japan |
......
...@@ -350,7 +350,7 @@ Currently, the multi-language algorithms supported by PaddleOCR are: ...@@ -350,7 +350,7 @@ Currently, the multi-language algorithms supported by PaddleOCR are:
| Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type | | Configuration file | Algorithm name | backbone | trans | seq | pred | language | character_type |
| :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: | | :--------: | :-------: | :-------: | :-------: | :-----: | :-----: | :-----: | :-----: |
| rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht| | rec_chinese_cht_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | chinese traditional | chinese_cht|
| rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English | En | | rec_en_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | English(Case sensitive) | EN |
| rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french | | rec_french_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | French | french |
| rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german | | rec_ger_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | German | german |
| rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan | | rec_japan_lite_train.yml | CRNN | Mobilenet_v3 small 0.5 | None | BiLSTM | ctc | Japanese | japan |
......
...@@ -18,6 +18,7 @@ from __future__ import print_function ...@@ -18,6 +18,7 @@ from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
import numpy as np import numpy as np
import string
class ClsLabelEncode(object): class ClsLabelEncode(object):
...@@ -92,8 +93,8 @@ class BaseRecLabelEncode(object): ...@@ -92,8 +93,8 @@ class BaseRecLabelEncode(object):
character_type='ch', character_type='ch',
use_space_char=False): use_space_char=False):
support_character_type = [ support_character_type = [
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean', 'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
'En', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'EN', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs',
'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'oc', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi',
'mr', 'ne' 'mr', 'ne'
] ]
...@@ -104,9 +105,8 @@ class BaseRecLabelEncode(object): ...@@ -104,9 +105,8 @@ class BaseRecLabelEncode(object):
if character_type == "en": if character_type == "en":
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str) dict_character = list(self.character_str)
elif character_type == "en_sensitive": elif character_type == "EN_symbol":
# same with ASTER setting (use 94 char). # same with ASTER setting (use 94 char).
import string
self.character_str = string.printable[:-6] self.character_str = string.printable[:-6]
dict_character = list(self.character_str) dict_character = list(self.character_str)
elif character_type in support_character_type: elif character_type in support_character_type:
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np import numpy as np
import string
import paddle import paddle
from paddle.nn import functional as F from paddle.nn import functional as F
...@@ -24,10 +25,10 @@ class BaseRecLabelDecode(object): ...@@ -24,10 +25,10 @@ class BaseRecLabelDecode(object):
character_type='ch', character_type='ch',
use_space_char=False): use_space_char=False):
support_character_type = [ support_character_type = [
'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean', 'ch', 'en', 'EN_symbol', 'french', 'german', 'japan', 'korean',
'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc', 'it', 'xi', 'pu', 'ru', 'ar', 'ta', 'ug', 'fa', 'ur', 'rs', 'oc',
'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr', 'rsc', 'bg', 'uk', 'be', 'te', 'ka', 'chinese_cht', 'hi', 'mr',
'ne', 'En' 'ne', 'EN'
] ]
assert character_type in support_character_type, "Only {} are supported now but get {}".format( assert character_type in support_character_type, "Only {} are supported now but get {}".format(
support_character_type, character_type) support_character_type, character_type)
...@@ -35,9 +36,8 @@ class BaseRecLabelDecode(object): ...@@ -35,9 +36,8 @@ class BaseRecLabelDecode(object):
if character_type == "en": if character_type == "en":
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str) dict_character = list(self.character_str)
elif character_type == "en_sensitive": elif character_type == "EN_symbol":
# same with ASTER setting (use 94 char). # same with ASTER setting (use 94 char).
import string
self.character_str = string.printable[:-6] self.character_str = string.printable[:-6]
dict_character = list(self.character_str) dict_character = list(self.character_str)
elif character_type in support_character_type: elif character_type in support_character_type:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册