diff --git a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml index 0f08909add17d8c73ad6e1b00e17d4c351def7e5..ab484a44833a405513d7f2b4079a4da4c2e403c8 100644 --- a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml +++ b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml @@ -141,6 +141,7 @@ Train: img_mode: BGR channel_first: False - DetLabelEncode: # Class handling label + - CopyPaste: - IaaAugment: augmenter_args: - { 'type': Fliplr, 'args': { 'p': 0.5 } } diff --git a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml index 1159d71bf94c330e26c3009b38c5c2b4a9c96f52..46daeeb86d004772a6fb964d602369dcd53b3a01 100644 --- a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml +++ b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml @@ -68,8 +68,7 @@ Loss: ohem_ratio: 3 - DistillationDBLoss: weight: 1.0 - model_name_list: ["Student", "Teacher"] - # key: maps + model_name_list: ["Student"] name: DBLoss balance_loss: true main_loss_type: DiceLoss @@ -116,6 +115,7 @@ Train: img_mode: BGR channel_first: False - DetLabelEncode: # Class handling label + - CopyPaste: - IaaAugment: augmenter_args: - { 'type': Fliplr, 'args': { 'p': 0.5 } } diff --git a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml index 7fe2d2e1a065b54d0e2479475f5f67ac5e38a166..bfbc3b6268cf521acb035be33ced9141046fc430 100644 --- a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml +++ b/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml @@ -118,6 +118,7 @@ Train: img_mode: BGR channel_first: False - DetLabelEncode: # Class handling label + - CopyPaste: - IaaAugment: augmenter_args: - { 'type': Fliplr, 'args': { 'p': 0.5 } } diff --git a/configs/e2e/e2e_r50_vd_pg.yml b/configs/e2e/e2e_r50_vd_pg.yml index 4a6e19f4461c7236f3a9a5253437eff97fa72f67..c4c5226e796a42db723ce78ef65473e357c25dc6 100644 --- a/configs/e2e/e2e_r50_vd_pg.yml +++ b/configs/e2e/e2e_r50_vd_pg.yml @@ -94,7 +94,7 @@ Eval: label_file_list: [./train_data/total_text/test/test.txt] transforms: - DecodeImage: # load image - img_mode: RGB + img_mode: BGR channel_first: False - E2ELabelEncodeTest: - E2EResizeForTest: @@ -111,4 +111,4 @@ Eval: shuffle: False drop_last: False batch_size_per_card: 1 # must be 1 - num_workers: 2 \ No newline at end of file + num_workers: 2 diff --git a/ppocr/losses/rec_sar_loss.py b/ppocr/losses/rec_sar_loss.py index 9e1c6495fb5ffa274662a3d1031b174d7297ee30..c8bd8bb0ca395fa4658e57b8dcac52a3e94aadce 100644 --- a/ppocr/losses/rec_sar_loss.py +++ b/ppocr/losses/rec_sar_loss.py @@ -9,11 +9,14 @@ from paddle import nn class SARLoss(nn.Layer): def __init__(self, **kwargs): super(SARLoss, self).__init__() - self.loss_func = paddle.nn.loss.CrossEntropyLoss(reduction="mean", ignore_index=96) + self.loss_func = paddle.nn.loss.CrossEntropyLoss( + reduction="mean", ignore_index=92) def forward(self, predicts, batch): - predict = predicts[:, :-1, :] # ignore last index of outputs to be in same seq_len with targets - label = batch[1].astype("int64")[:, 1:] # ignore first index of target in loss calculation + predict = predicts[:, : + -1, :] # ignore last index of outputs to be in same seq_len with targets + label = batch[1].astype( + "int64")[:, 1:] # ignore first index of target in loss calculation batch_size, num_steps, num_classes = predict.shape[0], predict.shape[ 1], predict.shape[2] assert len(label.shape) == len(list(predict.shape)) - 1, \ diff --git a/ppocr/modeling/necks/rnn.py b/ppocr/modeling/necks/rnn.py index de87b3d9895168657f8c9722177c026b992c2966..86e649028f8fbb76cb5a1fd85381bd361277c6ee 100644 --- a/ppocr/modeling/necks/rnn.py +++ b/ppocr/modeling/necks/rnn.py @@ -51,7 +51,7 @@ class EncoderWithFC(nn.Layer): super(EncoderWithFC, self).__init__() self.out_channels = hidden_size weight_attr, bias_attr = get_para_bias_attr( - l2_decay=0.00001, k=in_channels, name='reduce_encoder_fea') + l2_decay=0.00001, k=in_channels) self.fc = nn.Linear( in_channels, hidden_size, diff --git a/ppocr/utils/dict90.txt b/ppocr/utils/dict90.txt new file mode 100644 index 0000000000000000000000000000000000000000..a945ae9c526e4faa68852eb3fb47d078a2f3f6ce --- /dev/null +++ b/ppocr/utils/dict90.txt @@ -0,0 +1,90 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +: +; +< += +> +? +@ +[ +\ +] +_ +` +~ \ No newline at end of file diff --git a/ppstructure/layout/train_layoutparser_model.md b/ppstructure/layout/train_layoutparser_model.md index 08f5ebbf1aa276e4a3ecf27af46442161afcda1f..58975d71606e45b2f68a7f68565459042ef32775 100644 --- a/ppstructure/layout/train_layoutparser_model.md +++ b/ppstructure/layout/train_layoutparser_model.md @@ -4,9 +4,9 @@ ​ [1.1 Requirements](#Requirements) -​ [1.2 Install PaddleDetection](#Install PaddleDetection) +​ [1.2 Install PaddleDetection](#Install_PaddleDetection) -[2. Data preparation](#Data preparation) +[2. Data preparation](#Data_reparation) [3. Configuration](#Configuration) @@ -16,7 +16,7 @@ [6. Deployment](#Deployment) -​ [6.1 Export model](#Export model) +​ [6.1 Export model](#Export_model) ​ [6.2 Inference](#Inference) @@ -35,7 +35,7 @@ - CUDA >= 10.1 - cuDNN >= 7.6 - + ### 1.2 Install PaddleDetection @@ -51,7 +51,7 @@ pip install -r requirements.txt For more installation tutorials, please refer to: [Install doc](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/INSTALL_cn.md) - + ## 2. Data preparation @@ -165,7 +165,7 @@ python tools/infer.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --infer Use your trained model in Layout Parser - + ### 6.1 Export model diff --git a/tools/infer/predict_e2e.py b/tools/infer/predict_e2e.py index 8ff279d7437965f725082a9eb1c83e05a7ffc8a8..5029d6059346a00062418d8d1b6cb029b0110643 100755 --- a/tools/infer/predict_e2e.py +++ b/tools/infer/predict_e2e.py @@ -141,7 +141,6 @@ if __name__ == "__main__": img, flag = check_and_read_gif(image_file) if not flag: img = cv2.imread(image_file) - img = img[:, :, ::-1] if img is None: logger.info("error in loading image:{}".format(image_file)) continue