From f875556541cf4374287257a3864fcf4fb9d6bcac Mon Sep 17 00:00:00 2001 From: dorren Date: Mon, 17 Oct 2022 17:18:14 +0800 Subject: [PATCH] update can transform method and add copyright info for new file --- configs/rec/rec_d28_can.yml | 23 +++++++------ ppocr/data/imaug/__init__.py | 2 +- ppocr/data/imaug/label_ops.py | 4 +-- ppocr/data/imaug/operators.py | 24 ++++++++++++++ ppocr/data/imaug/rec_img_aug.py | 30 ----------------- ppocr/losses/rec_can_loss.py | 18 +++++++++++ ppocr/modeling/backbones/rec_densenet.py | 32 +++++++++++-------- ppocr/modeling/heads/rec_can_head.py | 27 +++++++++++++++- ppocr/postprocess/__init__.py | 4 +-- ppocr/postprocess/rec_postprocess.py | 4 +-- test_tipc/configs/rec_d28_can/rec_d28_can.yml | 31 ++++++++++-------- .../rec_d28_can/train_infer_python.txt | 4 +-- test_tipc/prepare.sh | 1 - tools/infer/predict_rec.py | 2 +- 14 files changed, 129 insertions(+), 77 deletions(-) diff --git a/configs/rec/rec_d28_can.yml b/configs/rec/rec_d28_can.yml index 9fe936ae..2149100d 100644 --- a/configs/rec/rec_d28_can.yml +++ b/configs/rec/rec_d28_can.yml @@ -42,7 +42,6 @@ Architecture: bottleneck: True use_dropout: True input_channel: 1 - Head: name: CANHead in_channel: 684 @@ -66,8 +65,8 @@ Loss: name: CANLoss PostProcess: - name: SeqLabelDecode - character: 111 + name: CANLabelDecode + character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt Metric: name: CANMetric @@ -75,15 +74,18 @@ Metric: Train: dataset: - name: PGDataSet + name: SimpleDataSet data_dir: ./train_data/CROHME/training/images/ transforms: - DecodeImage: channel_first: False + - NormalizeImage: + mean: [0,0,0] + std: [1,1,1] + order: 'hwc' - GrayImageChannelFormat: - normalize: True inverse: True - - SeqLabelEncode: + - CANLabelEncode: character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt lower: False - KeepKeys: @@ -98,15 +100,18 @@ Train: Eval: dataset: - name: PGDataSet + name: SimpleDataSet data_dir: ./train_data/CROHME/evaluation/images/ transforms: - DecodeImage: channel_first: False + - NormalizeImage: + mean: [0,0,0] + std: [1,1,1] + order: 'hwc' - GrayImageChannelFormat: - normalize: True inverse: True - - SeqLabelEncode: + - CANLabelEncode: character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt lower: False - KeepKeys: diff --git a/ppocr/data/imaug/__init__.py b/ppocr/data/imaug/__init__.py index a6409228..93d97446 100644 --- a/ppocr/data/imaug/__init__.py +++ b/ppocr/data/imaug/__init__.py @@ -27,7 +27,7 @@ from .make_pse_gt import MakePseGt from .rec_img_aug import BaseDataAugmentation, RecAug, RecConAug, RecResizeImg, ClsResizeImg, \ SRNRecResizeImg, GrayRecResizeImg, SARRecResizeImg, PRENResizeImg, \ ABINetRecResizeImg, SVTRRecResizeImg, ABINetRecAug, VLRecResizeImg, SPINRecResizeImg, RobustScannerRecResizeImg, \ - RFLRecResizeImg, GrayImageChannelFormat + RFLRecResizeImg from .ssl_img_aug import SSLRotateResize from .randaugment import RandAugment from .copy_paste import CopyPaste diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index ae916b2e..e1389639 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -1479,14 +1479,14 @@ class CTLabelEncode(object): return data -class SeqLabelEncode(BaseRecLabelEncode): +class CANLabelEncode(BaseRecLabelEncode): def __init__(self, character_dict_path, max_text_length=100, use_space_char=False, lower=True, **kwargs): - super(SeqLabelEncode, self).__init__( + super(CANLabelEncode, self).__init__( max_text_length, character_dict_path, use_space_char, lower) def encode(self, text_seq): diff --git a/ppocr/data/imaug/operators.py b/ppocr/data/imaug/operators.py index 5e84b1aa..4ff2d29e 100644 --- a/ppocr/data/imaug/operators.py +++ b/ppocr/data/imaug/operators.py @@ -498,3 +498,27 @@ class ResizeNormalize(object): img_numpy = np.array(img).astype("float32") img_numpy = img_numpy.transpose((2, 0, 1)) / 255 return img_numpy + + +class GrayImageChannelFormat(object): + """ + format gray scale image's channel: (3,h,w) -> (1,h,w) + Args: + inverse: inverse gray image + """ + + def __init__(self, inverse=False, **kwargs): + self.inverse = inverse + + def __call__(self, data): + img = data['image'] + img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + img_expanded = np.expand_dims(img_single_channel, 0) + + if self.inverse: + data['image'] = np.abs(img_expanded - 1) + else: + data['image'] = img_expanded + + data['src_image'] = img + return data \ No newline at end of file diff --git a/ppocr/data/imaug/rec_img_aug.py b/ppocr/data/imaug/rec_img_aug.py index bc7fbc60..e22153bd 100644 --- a/ppocr/data/imaug/rec_img_aug.py +++ b/ppocr/data/imaug/rec_img_aug.py @@ -465,36 +465,6 @@ class RobustScannerRecResizeImg(object): return data -class GrayImageChannelFormat(object): - """ - format gray scale image's channel: (3,h,w) -> (1,h,w) - Args: - normalize: True/False - when True convert image dynamic range [0,255]->[0,1] - inverse: inverse gray image - """ - - def __init__(self, normalize=True, inverse=False, **kwargs): - self.normalize = normalize - self.inverse = inverse - - def __call__(self, data): - img = data['image'] - img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - img_single_channel = np.expand_dims(img_single_channel, 0) - - if self.normalize: - img_single_channel = img_single_channel / 255.0 - - if self.inverse: - data['image'] = np.abs(img_single_channel - 1).astype('float32') - else: - data['image'] = img_single_channel.astype('float32') - - data['src_image'] = img - return data - - def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25): imgC, imgH, imgW_min, imgW_max = image_shape h = img.shape[0] diff --git a/ppocr/losses/rec_can_loss.py b/ppocr/losses/rec_can_loss.py index a6c655e0..227e17f5 100644 --- a/ppocr/losses/rec_can_loss.py +++ b/ppocr/losses/rec_can_loss.py @@ -1,3 +1,21 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is refer from: +https://github.com/LBH1024/CAN/models/can.py +""" + import paddle import paddle.nn as nn import numpy as np diff --git a/ppocr/modeling/backbones/rec_densenet.py b/ppocr/modeling/backbones/rec_densenet.py index d3391d40..b9fab765 100644 --- a/ppocr/modeling/backbones/rec_densenet.py +++ b/ppocr/modeling/backbones/rec_densenet.py @@ -1,3 +1,21 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import math import paddle import paddle.nn as nn @@ -5,14 +23,6 @@ import paddle.nn.functional as F class Bottleneck(nn.Layer): - ''' - ratio: 16 - growthRate: 24 - reduction: 0.5 - bottleneck: True - use_dropout: True - ''' - def __init__(self, nChannels, growthRate, use_dropout): super(Bottleneck, self).__init__() interChannels = 4 * growthRate @@ -78,11 +88,7 @@ class DenseNet(nn.Layer): def __init__(self, growthRate, reduction, bottleneck, use_dropout, input_channel, **kwargs): super(DenseNet, self).__init__() - ''' - ratio: 16 - growthRate: 24 - reduction: 0.5 - ''' + nDenseBlocks = 16 nChannels = 2 * growthRate diff --git a/ppocr/modeling/heads/rec_can_head.py b/ppocr/modeling/heads/rec_can_head.py index afd78ee9..732dbfe2 100644 --- a/ppocr/modeling/heads/rec_can_head.py +++ b/ppocr/modeling/heads/rec_can_head.py @@ -1,4 +1,29 @@ -from turtle import forward +# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is refer from: +https://github.com/LBH1024/CAN/models/can.py +https://github.com/LBH1024/CAN/models/counting.py +https://github.com/LBH1024/CAN/models/decoder.py +https://github.com/LBH1024/CAN/models/attention.py + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import paddle.nn as nn import paddle import math diff --git a/ppocr/postprocess/__init__.py b/ppocr/postprocess/__init__.py index e86a7ea7..36a3152f 100644 --- a/ppocr/postprocess/__init__.py +++ b/ppocr/postprocess/__init__.py @@ -37,7 +37,7 @@ from .table_postprocess import TableMasterLabelDecode, TableLabelDecode from .picodet_postprocess import PicoDetPostProcess from .ct_postprocess import CTPostProcess from .drrg_postprocess import DRRGPostprocess -from .rec_postprocess import SeqLabelDecode +from .rec_postprocess import CANLabelDecode def build_post_process(config, global_config=None): @@ -52,7 +52,7 @@ def build_post_process(config, global_config=None): 'TableMasterLabelDecode', 'SPINLabelDecode', 'DistillationSerPostProcess', 'DistillationRePostProcess', 'VLLabelDecode', 'PicoDetPostProcess', 'CTPostProcess', - 'RFLLabelDecode', 'DRRGPostprocess', 'SeqLabelDecode' + 'RFLLabelDecode', 'DRRGPostprocess', 'CANLabelDecode' ] if config['name'] == 'PSEPostProcess': diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index 4d88c278..0664ac6d 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -899,12 +899,12 @@ class VLLabelDecode(BaseRecLabelDecode): return text, label -class SeqLabelDecode(BaseRecLabelDecode): +class CANLabelDecode(BaseRecLabelDecode): """ Convert between latex-symbol and symbol-index """ def __init__(self, character_dict_path=None, use_space_char=False, **kwargs): - super(SeqLabelDecode, self).__init__(character_dict_path, + super(CANLabelDecode, self).__init__(character_dict_path, use_space_char) def decode(self, text_index, preds_prob=None): diff --git a/test_tipc/configs/rec_d28_can/rec_d28_can.yml b/test_tipc/configs/rec_d28_can/rec_d28_can.yml index ac7b0771..2149100d 100644 --- a/test_tipc/configs/rec_d28_can/rec_d28_can.yml +++ b/test_tipc/configs/rec_d28_can/rec_d28_can.yml @@ -42,7 +42,6 @@ Architecture: bottleneck: True use_dropout: True input_channel: 1 - Head: name: CANHead in_channel: 684 @@ -66,8 +65,8 @@ Loss: name: CANLoss PostProcess: - name: SeqLabelDecode - character: 111 + name: CANLabelDecode + character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt Metric: name: CANMetric @@ -75,20 +74,23 @@ Metric: Train: dataset: - name: PGDataSet - data_dir: ./train_data/CROHME_lite/training/images/ + name: SimpleDataSet + data_dir: ./train_data/CROHME/training/images/ transforms: - DecodeImage: channel_first: False + - NormalizeImage: + mean: [0,0,0] + std: [1,1,1] + order: 'hwc' - GrayImageChannelFormat: - normalize: True inverse: True - - SeqLabelEncode: + - CANLabelEncode: character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt lower: False - KeepKeys: keep_keys: ['image', 'label'] - label_file_list: ["./train_data/CROHME_lite/training/labels.txt"] + label_file_list: ["./train_data/CROHME/training/labels.txt"] loader: shuffle: True batch_size_per_card: 8 @@ -98,20 +100,23 @@ Train: Eval: dataset: - name: PGDataSet - data_dir: ./train_data/CROHME_lite/evaluation/images/ + name: SimpleDataSet + data_dir: ./train_data/CROHME/evaluation/images/ transforms: - DecodeImage: channel_first: False + - NormalizeImage: + mean: [0,0,0] + std: [1,1,1] + order: 'hwc' - GrayImageChannelFormat: - normalize: True inverse: True - - SeqLabelEncode: + - CANLabelEncode: character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt lower: False - KeepKeys: keep_keys: ['image', 'label'] - label_file_list: ["./train_data/CROHME_lite/evaluation/labels.txt"] + label_file_list: ["./train_data/CROHME/evaluation/labels.txt"] loader: shuffle: False drop_last: False diff --git a/test_tipc/configs/rec_d28_can/train_infer_python.txt b/test_tipc/configs/rec_d28_can/train_infer_python.txt index 731d327c..1794e78c 100644 --- a/test_tipc/configs/rec_d28_can/train_infer_python.txt +++ b/test_tipc/configs/rec_d28_can/train_infer_python.txt @@ -1,7 +1,7 @@ ===========================train_params=========================== model_name:rec_d28_can -python:python3.7 -gpu_list:0|0,1 +python:python +gpu_list:0|0 Global.use_gpu:True|True Global.auto_cast:null Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=240 diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index 4aab1701..dc0d2fdb 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -262,7 +262,6 @@ if [ ${MODE} = "lite_train_lite_infer" ];then cd ./pretrain_models/ && tar xf can_train.tar && cd ../ wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/CROHME_lite.tar --no-check-certificate cd ./train_data/ && tar xf CROHME_lite.tar && cd ../ - fi if [ ${model_name} == "layoutxlm_ser" ]; then ${python_name} -m pip install -r ppstructure/kie/requirements.txt diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index c1604798..b3ef557c 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -111,7 +111,7 @@ class TextRecognizer(object): elif self.rec_algorithm == "CAN": self.inverse = args.rec_image_inverse postprocess_params = { - 'name': 'SeqLabelDecode', + 'name': 'CANLabelDecode', "character_dict_path": args.rec_char_dict_path, "use_space_char": args.use_space_char } -- GitLab