diff --git a/configs/rec/ch_ppocr_v1.1/rec_chinese_lite_train_v1.1.yaml b/configs/rec/ch_ppocr_v1.1/rec_chinese_lite_train_v1.1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..94a22e5c6e738429939d8df7b9d1c556abba7f6c --- /dev/null +++ b/configs/rec/ch_ppocr_v1.1/rec_chinese_lite_train_v1.1.yaml @@ -0,0 +1,102 @@ +Global: + use_gpu: true + epoch_num: 500 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec_chinese_lite_v1.1 + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 2000] + # if pretrained_model is saved in static mode, load_static_weights must set to True + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: ppocr/utils/ppocr_keys_v1.txt + character_type: ch + max_text_length: 25 + infer_mode: False + use_space_char: False + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + regularizer: + name: 'L2' + factor: 0.00001 + +Architecture: + model_type: rec + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: small + small_stride: [1, 2, 2, 2] + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 48 + Head: + name: CTCHead + fc_decay: 0.00001 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ + label_file_list: ["./train_data/train_list.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - RecAug: + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data + label_file_list: ["./train_data/val_list.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 8 diff --git a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml new file mode 100644 index 0000000000000000000000000000000000000000..74937138aac1ae155ce8754d861d21a85b35e031 --- /dev/null +++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml @@ -0,0 +1,96 @@ +Global: + use_gpu: true + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/r34_vd_none_bilstm_ctc/ + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 2000] + # if pretrained_model is saved in static mode, load_static_weights must set to True + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: + character_type: en + max_text_length: 25 + infer_mode: False + use_space_char: False + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + learning_rate: 0.0005 + regularizer: + name: 'L2' + factor: 0 + +Architecture: + model_type: rec + algorithm: CRNN + Transform: + Backbone: + name: ResNet + layers: 34 + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 256 + Head: + name: CTCHead + fc_decay: 0 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: LMDBDateSet + data_dir: ./train_data/data_lmdb_release/training/ + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: LMDBDateSet + data_dir: ./train_data/data_lmdb_release/validation/ + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 4 diff --git a/ppocr/data/simple_dataset.py b/ppocr/data/simple_dataset.py index e9a0a2ae4c2b77b5a390e7f196f364c529847d71..2d175c2107678264668f57861e6b1ab3789b5a3a 100644 --- a/ppocr/data/simple_dataset.py +++ b/ppocr/data/simple_dataset.py @@ -22,6 +22,7 @@ from .imaug import transform, create_operators class SimpleDataSet(Dataset): def __init__(self, config, mode, logger): super(SimpleDataSet, self).__init__() + self.logger = logger global_config = config['Global'] dataset_config = config[mode]['dataset'] @@ -100,16 +101,22 @@ class SimpleDataSet(Dataset): def __getitem__(self, idx): dataset_idx, file_idx = self.data_idx_order_list[idx] data_line = self.data_lines_list[dataset_idx][file_idx] - data_line = data_line.decode('utf-8') - substr = data_line.strip("\n").split(self.delimiter) - file_name = substr[0] - label = substr[1] - img_path = os.path.join(self.data_dir, file_name) - data = {'img_path': img_path, 'label': label} - with open(data['img_path'], 'rb') as f: - img = f.read() - data['image'] = img - outs = transform(data, self.ops) + try: + data_line = data_line.decode('utf-8') + substr = data_line.strip("\n").split(self.delimiter) + file_name = substr[1] + label = substr[0] + img_path = os.path.join(self.data_dir, file_name) + data = {'img_path': img_path, 'label': label} + with open(data['img_path'], 'rb') as f: + img = f.read() + data['image'] = img + outs = transform(data, self.ops) + except Exception as e: + self.logger.error( + "When parsing line {}, error happened with msg: {}".format( + data_line, e)) + outs = None if outs is None: return self.__getitem__(np.random.randint(self.__len__())) return outs