diff --git a/deploy/pdserving/README.md b/deploy/pdserving/README.md index de7965bac752f6bc9cd1de224b791b0a84f0e699..cb2845c581d244e80ca597e0eb485a16ad369f20 100644 --- a/deploy/pdserving/README.md +++ b/deploy/pdserving/README.md @@ -114,7 +114,7 @@ The recognition model is the same. git clone https://github.com/PaddlePaddle/PaddleOCR # Enter the working directory - cd PaddleOCR/deploy/pdserver/ + cd PaddleOCR/deploy/pdserving/ ``` The pdserver directory contains the code to start the pipeline service and send prediction requests, including: diff --git a/deploy/pdserving/README_CN.md b/deploy/pdserving/README_CN.md index 5106fd9b2d03e6169cf0c723b241cb1385a22906..067be8bbda10d971b709afdf822aea96a979d000 100644 --- a/deploy/pdserving/README_CN.md +++ b/deploy/pdserving/README_CN.md @@ -112,7 +112,7 @@ python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_mobile_v2.0_rec_in git clone https://github.com/PaddlePaddle/PaddleOCR # 进入到工作目录 - cd PaddleOCR/deploy/pdserver/ + cd PaddleOCR/deploy/pdserving/ ``` pdserver目录包含启动pipeline服务和发送预测请求的代码,包括: ``` @@ -206,7 +206,7 @@ pip3 install paddle-serving-app==0.3.1 1. 启动服务端程序 ``` -cd win +cd win python3 ocr_web_server.py gpu(使用gpu方式) 或者 python3 ocr_web_server.py cpu(使用cpu方式) diff --git a/ppocr/data/imaug/iaa_augment.py b/ppocr/data/imaug/iaa_augment.py index 9ce6bd4209034389df04334a83717142ca8c7b40..0aac7877c257f3e7532ca2806891775913d416b7 100644 --- a/ppocr/data/imaug/iaa_augment.py +++ b/ppocr/data/imaug/iaa_augment.py @@ -11,6 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This code is refer from: +https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/iaa_augment.py +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppocr/data/imaug/make_border_map.py b/ppocr/data/imaug/make_border_map.py index cc2c9034e147eb7bb6a70e43eda4903337a523f0..abab38368db2de84e54b060598fc509a65219296 100644 --- a/ppocr/data/imaug/make_border_map.py +++ b/ppocr/data/imaug/make_border_map.py @@ -1,4 +1,20 @@ -# -*- coding:utf-8 -*- +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is refer from: +https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/make_border_map.py +""" from __future__ import absolute_import from __future__ import division diff --git a/ppocr/data/imaug/make_pse_gt.py b/ppocr/data/imaug/make_pse_gt.py index 55abc8970784fd00843d2e91f259c58b65ae8579..255d076bde848d53f3b2fb04e80594872f4ae8c7 100644 --- a/ppocr/data/imaug/make_pse_gt.py +++ b/ppocr/data/imaug/make_pse_gt.py @@ -1,4 +1,16 @@ -# -*- coding:utf-8 -*- +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from __future__ import absolute_import from __future__ import division @@ -12,12 +24,8 @@ from shapely.geometry import Polygon __all__ = ['MakePseGt'] -class MakePseGt(object): - r''' - Making binary mask from detection data with ICDAR format. - Typically following the process of class `MakeICDARData`. - ''' +class MakePseGt(object): def __init__(self, kernel_num=7, size=640, min_shrink_ratio=0.4, **kwargs): self.kernel_num = kernel_num self.min_shrink_ratio = min_shrink_ratio @@ -38,16 +46,20 @@ class MakePseGt(object): text_polys *= scale gt_kernels = [] - for i in range(1,self.kernel_num+1): + for i in range(1, self.kernel_num + 1): # s1->sn, from big to small - rate = 1.0 - (1.0 - self.min_shrink_ratio) / (self.kernel_num - 1) * i - text_kernel, ignore_tags = self.generate_kernel(image.shape[0:2], rate, text_polys, ignore_tags) + rate = 1.0 - (1.0 - self.min_shrink_ratio) / (self.kernel_num - 1 + ) * i + text_kernel, ignore_tags = self.generate_kernel( + image.shape[0:2], rate, text_polys, ignore_tags) gt_kernels.append(text_kernel) training_mask = np.ones(image.shape[0:2], dtype='uint8') for i in range(text_polys.shape[0]): if ignore_tags[i]: - cv2.fillPoly(training_mask, text_polys[i].astype(np.int32)[np.newaxis, :, :], 0) + cv2.fillPoly(training_mask, + text_polys[i].astype(np.int32)[np.newaxis, :, :], + 0) gt_kernels = np.array(gt_kernels) gt_kernels[gt_kernels > 0] = 1 @@ -59,16 +71,25 @@ class MakePseGt(object): data['mask'] = training_mask.astype('float32') return data - def generate_kernel(self, img_size, shrink_ratio, text_polys, ignore_tags=None): + def generate_kernel(self, + img_size, + shrink_ratio, + text_polys, + ignore_tags=None): + """ + Refer to part of the code: + https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/textdet_targets/base_textdet_targets.py + """ + h, w = img_size text_kernel = np.zeros((h, w), dtype=np.float32) for i, poly in enumerate(text_polys): polygon = Polygon(poly) - distance = polygon.area * (1 - shrink_ratio * shrink_ratio) / (polygon.length + 1e-6) + distance = polygon.area * (1 - shrink_ratio * shrink_ratio) / ( + polygon.length + 1e-6) subject = [tuple(l) for l in poly] pco = pyclipper.PyclipperOffset() - pco.AddPath(subject, pyclipper.JT_ROUND, - pyclipper.ET_CLOSEDPOLYGON) + pco.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) shrinked = np.array(pco.Execute(-distance)) if len(shrinked) == 0 or shrinked.size == 0: diff --git a/ppocr/data/imaug/make_shrink_map.py b/ppocr/data/imaug/make_shrink_map.py index 15e8afa05bb9f7315a2e9342c78cb98718a54df9..6c65c20e5621f91a5b1fba549b059c92923fca6f 100644 --- a/ppocr/data/imaug/make_shrink_map.py +++ b/ppocr/data/imaug/make_shrink_map.py @@ -1,4 +1,20 @@ -# -*- coding:utf-8 -*- +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is refer from: +https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/make_shrink_map.py +""" from __future__ import absolute_import from __future__ import division diff --git a/ppocr/data/imaug/random_crop_data.py b/ppocr/data/imaug/random_crop_data.py index 7c1c25abb56a0cf7d4d59b8523962bd5d81c873a..64aa110de4e3df950ce21e6d657877081b0fdd13 100644 --- a/ppocr/data/imaug/random_crop_data.py +++ b/ppocr/data/imaug/random_crop_data.py @@ -1,4 +1,20 @@ -# -*- coding:utf-8 -*- +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is refer from: +https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/random_crop_data.py +""" from __future__ import absolute_import from __future__ import division diff --git a/ppocr/data/imaug/text_image_aug/augment.py b/ppocr/data/imaug/text_image_aug/augment.py index 1aeff3733a4521c56dd5972fc058f6e0c245e4b7..2d15dd5f353c72a6cc3876481c423d81a8175c95 100644 --- a/ppocr/data/imaug/text_image_aug/augment.py +++ b/ppocr/data/imaug/text_image_aug/augment.py @@ -11,6 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This code is refer from: +https://github.com/RubanSeven/Text-Image-Augmentation-python/blob/master/augment.py +""" import numpy as np from .warp_mls import WarpMLS diff --git a/ppocr/data/imaug/text_image_aug/warp_mls.py b/ppocr/data/imaug/text_image_aug/warp_mls.py index d6cbe749b61aa4cf3163927c096868c83f4a4cdd..75de11115cf9ba824a7cd62b8b880ea7f99e4cb2 100644 --- a/ppocr/data/imaug/text_image_aug/warp_mls.py +++ b/ppocr/data/imaug/text_image_aug/warp_mls.py @@ -11,6 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This code is refer from: +https://github.com/RubanSeven/Text-Image-Augmentation-python/blob/master/warp_mls.py +""" import numpy as np @@ -161,4 +165,4 @@ class WarpMLS: dst = np.clip(dst, 0, 255) dst = np.array(dst, dtype=np.uint8) - return dst \ No newline at end of file + return dst diff --git a/ppocr/losses/ace_loss.py b/ppocr/losses/ace_loss.py index bf15f8e3a7b355bd9e8b69435a5dae01fc75a892..915b99e6ec1d6cb4641d8032fa188c61006dfbb3 100644 --- a/ppocr/losses/ace_loss.py +++ b/ppocr/losses/ace_loss.py @@ -11,6 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +# This code is refer from: https://github.com/viig99/LS-ACELoss + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -32,7 +35,7 @@ class ACELoss(nn.Layer): def __call__(self, predicts, batch): if isinstance(predicts, (list, tuple)): predicts = predicts[-1] - + B, N = predicts.shape[:2] div = paddle.to_tensor([N]).astype('float32') diff --git a/ppocr/losses/center_loss.py b/ppocr/losses/center_loss.py index cbef4df965e2659c6aa63c0c69cd8798143df485..f8c57fdd5c9b3f0dec5c3d0a811e5532abd2e45a 100644 --- a/ppocr/losses/center_loss.py +++ b/ppocr/losses/center_loss.py @@ -12,6 +12,8 @@ #See the License for the specific language governing permissions and #limitations under the License. +# This code is refer from: https://github.com/KaiyangZhou/pytorch-center-loss + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -27,6 +29,7 @@ class CenterLoss(nn.Layer): """ Reference: Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016. """ + def __init__(self, num_classes=6625, feat_dim=96, diff --git a/ppocr/losses/det_basic_loss.py b/ppocr/losses/det_basic_loss.py index 7017236c284e55710f242275a413d56d32158d34..61ea579b41d3cdf7831c168f563a1e3cd72463a0 100644 --- a/ppocr/losses/det_basic_loss.py +++ b/ppocr/losses/det_basic_loss.py @@ -11,7 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +""" +This code is refer from: +https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/basic_loss.py +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -147,4 +150,4 @@ class BCELoss(nn.Layer): def forward(self, input, label, mask=None, weight=None, name=None): loss = F.binary_cross_entropy(input, label, reduction=self.reduction) - return loss \ No newline at end of file + return loss diff --git a/ppocr/losses/det_db_loss.py b/ppocr/losses/det_db_loss.py index b079aabff7c7deccc7e365b91c9407f7e894bcb9..708ffbdb47f349304e2bfd781a836e79348475f4 100755 --- a/ppocr/losses/det_db_loss.py +++ b/ppocr/losses/det_db_loss.py @@ -11,6 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This code is refer from: +https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/DB_loss.py +""" from __future__ import absolute_import from __future__ import division diff --git a/ppocr/losses/det_pse_loss.py b/ppocr/losses/det_pse_loss.py index 78423091f841f29b1217f73f79beb26fe1575844..9b8ac4b5a5dfac176c398dd0a9e490e5ca67ad5f 100644 --- a/ppocr/losses/det_pse_loss.py +++ b/ppocr/losses/det_pse_loss.py @@ -11,6 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This code is refer from: +https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py +""" import paddle from paddle import nn diff --git a/ppocr/modeling/backbones/rec_mv1_enhance.py b/ppocr/modeling/backbones/rec_mv1_enhance.py index 04a909b8ccafd8e62f9a7076c7dedf63ff745303..d8a7f4b5646eb70b5202aa3b3ac6494318b424ad 100644 --- a/ppocr/modeling/backbones/rec_mv1_enhance.py +++ b/ppocr/modeling/backbones/rec_mv1_enhance.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# This code is refer from: https://github.com/PaddlePaddle/PaddleClas/blob/develop/ppcls/arch/backbone/legendary_models/pp_lcnet.py + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppocr/modeling/backbones/rec_resnet_31.py b/ppocr/modeling/backbones/rec_resnet_31.py index f60729cdcced2af7626e5615ca323e32c99432ec..965170138d00a53fca720b3b5f535a3dd34272d9 100644 --- a/ppocr/modeling/backbones/rec_resnet_31.py +++ b/ppocr/modeling/backbones/rec_resnet_31.py @@ -1,3 +1,22 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is refer from: +https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/layers/conv_layer.py +https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/backbones/resnet31_ocr.py +""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -18,12 +37,12 @@ def conv3x3(in_channel, out_channel, stride=1): kernel_size=3, stride=stride, padding=1, - bias_attr=False - ) + bias_attr=False) class BasicBlock(nn.Layer): expansion = 1 + def __init__(self, in_channels, channels, stride=1, downsample=False): super().__init__() self.conv1 = conv3x3(in_channels, channels, stride) @@ -34,9 +53,13 @@ class BasicBlock(nn.Layer): self.downsample = downsample if downsample: self.downsample = nn.Sequential( - nn.Conv2D(in_channels, channels * self.expansion, 1, stride, bias_attr=False), - nn.BatchNorm2D(channels * self.expansion), - ) + nn.Conv2D( + in_channels, + channels * self.expansion, + 1, + stride, + bias_attr=False), + nn.BatchNorm2D(channels * self.expansion), ) else: self.downsample = nn.Sequential() self.stride = stride @@ -57,7 +80,7 @@ class BasicBlock(nn.Layer): out += residual out = self.relu(out) - return out + return out class ResNet31(nn.Layer): @@ -69,12 +92,13 @@ class ResNet31(nn.Layer): out_indices (None | Sequence[int]): Indices of output stages. last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage. ''' - def __init__(self, - in_channels=3, - layers=[1, 2, 5, 3], - channels=[64, 128, 256, 256, 512, 512, 512], - out_indices=None, - last_stage_pool=False): + + def __init__(self, + in_channels=3, + layers=[1, 2, 5, 3], + channels=[64, 128, 256, 256, 512, 512, 512], + out_indices=None, + last_stage_pool=False): super(ResNet31, self).__init__() assert isinstance(in_channels, int) assert isinstance(last_stage_pool, bool) @@ -83,46 +107,56 @@ class ResNet31(nn.Layer): self.last_stage_pool = last_stage_pool # conv 1 (Conv Conv) - self.conv1_1 = nn.Conv2D(in_channels, channels[0], kernel_size=3, stride=1, padding=1) + self.conv1_1 = nn.Conv2D( + in_channels, channels[0], kernel_size=3, stride=1, padding=1) self.bn1_1 = nn.BatchNorm2D(channels[0]) self.relu1_1 = nn.ReLU() - self.conv1_2 = nn.Conv2D(channels[0], channels[1], kernel_size=3, stride=1, padding=1) + self.conv1_2 = nn.Conv2D( + channels[0], channels[1], kernel_size=3, stride=1, padding=1) self.bn1_2 = nn.BatchNorm2D(channels[1]) self.relu1_2 = nn.ReLU() # conv 2 (Max-pooling, Residual block, Conv) - self.pool2 = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self.pool2 = nn.MaxPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) self.block2 = self._make_layer(channels[1], channels[2], layers[0]) - self.conv2 = nn.Conv2D(channels[2], channels[2], kernel_size=3, stride=1, padding=1) + self.conv2 = nn.Conv2D( + channels[2], channels[2], kernel_size=3, stride=1, padding=1) self.bn2 = nn.BatchNorm2D(channels[2]) self.relu2 = nn.ReLU() # conv 3 (Max-pooling, Residual block, Conv) - self.pool3 = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self.pool3 = nn.MaxPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) self.block3 = self._make_layer(channels[2], channels[3], layers[1]) - self.conv3 = nn.Conv2D(channels[3], channels[3], kernel_size=3, stride=1, padding=1) + self.conv3 = nn.Conv2D( + channels[3], channels[3], kernel_size=3, stride=1, padding=1) self.bn3 = nn.BatchNorm2D(channels[3]) self.relu3 = nn.ReLU() # conv 4 (Max-pooling, Residual block, Conv) - self.pool4 = nn.MaxPool2D(kernel_size=(2, 1), stride=(2, 1), padding=0, ceil_mode=True) + self.pool4 = nn.MaxPool2D( + kernel_size=(2, 1), stride=(2, 1), padding=0, ceil_mode=True) self.block4 = self._make_layer(channels[3], channels[4], layers[2]) - self.conv4 = nn.Conv2D(channels[4], channels[4], kernel_size=3, stride=1, padding=1) + self.conv4 = nn.Conv2D( + channels[4], channels[4], kernel_size=3, stride=1, padding=1) self.bn4 = nn.BatchNorm2D(channels[4]) self.relu4 = nn.ReLU() # conv 5 ((Max-pooling), Residual block, Conv) self.pool5 = None if self.last_stage_pool: - self.pool5 = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self.pool5 = nn.MaxPool2D( + kernel_size=2, stride=2, padding=0, ceil_mode=True) self.block5 = self._make_layer(channels[4], channels[5], layers[3]) - self.conv5 = nn.Conv2D(channels[5], channels[5], kernel_size=3, stride=1, padding=1) + self.conv5 = nn.Conv2D( + channels[5], channels[5], kernel_size=3, stride=1, padding=1) self.bn5 = nn.BatchNorm2D(channels[5]) self.relu5 = nn.ReLU() self.out_channels = channels[-1] - + def _make_layer(self, input_channels, output_channels, blocks): layers = [] for _ in range(blocks): @@ -130,19 +164,19 @@ class ResNet31(nn.Layer): if input_channels != output_channels: downsample = nn.Sequential( nn.Conv2D( - input_channels, - output_channels, - kernel_size=1, - stride=1, + input_channels, + output_channels, + kernel_size=1, + stride=1, bias_attr=False), - nn.BatchNorm2D(output_channels), - ) - - layers.append(BasicBlock(input_channels, output_channels, downsample=downsample)) + nn.BatchNorm2D(output_channels), ) + + layers.append( + BasicBlock( + input_channels, output_channels, downsample=downsample)) input_channels = output_channels return nn.Sequential(*layers) - def forward(self, x): x = self.conv1_1(x) x = self.bn1_1(x) @@ -166,11 +200,11 @@ class ResNet31(nn.Layer): x = block_layer(x) x = conv_layer(x) x = bn_layer(x) - x= relu_layer(x) + x = relu_layer(x) outs.append(x) - + if self.out_indices is not None: return tuple([outs[i] for i in self.out_indices]) - + return x diff --git a/ppocr/modeling/backbones/rec_resnet_aster.py b/ppocr/modeling/backbones/rec_resnet_aster.py index bdecaf46af98f9b967d9a339f82d4e938abdc6d9..6a2710dfa079b4d910146c10ca2cff31321b2513 100644 --- a/ppocr/modeling/backbones/rec_resnet_aster.py +++ b/ppocr/modeling/backbones/rec_resnet_aster.py @@ -11,7 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +""" +This code is refer from: +https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/resnet_aster.py +""" import paddle import paddle.nn as nn diff --git a/ppocr/modeling/heads/det_pse_head.py b/ppocr/modeling/heads/det_pse_head.py index db800f57a216ab437b724988ce692a9ac0c545d9..32a5b48e190b7566411b19841b6aa14455b5d41d 100644 --- a/ppocr/modeling/heads/det_pse_head.py +++ b/ppocr/modeling/heads/det_pse_head.py @@ -1,4 +1,4 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,22 +11,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This code is refer from: +https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py +""" + from paddle import nn class PSEHead(nn.Layer): - def __init__(self, - in_channels, - hidden_dim=256, - out_channels=7, - **kwargs): + def __init__(self, in_channels, hidden_dim=256, out_channels=7, **kwargs): super(PSEHead, self).__init__() - self.conv1 = nn.Conv2D(in_channels, hidden_dim, kernel_size=3, stride=1, padding=1) + self.conv1 = nn.Conv2D( + in_channels, hidden_dim, kernel_size=3, stride=1, padding=1) self.bn1 = nn.BatchNorm2D(hidden_dim) self.relu1 = nn.ReLU() - self.conv2 = nn.Conv2D(hidden_dim, out_channels, kernel_size=1, stride=1, padding=0) - + self.conv2 = nn.Conv2D( + hidden_dim, out_channels, kernel_size=1, stride=1, padding=0) def forward(self, x, **kwargs): out = self.conv1(x) diff --git a/ppocr/modeling/heads/rec_aster_head.py b/ppocr/modeling/heads/rec_aster_head.py index 4961897b409020fe6cff72eb96f3257156fa33ac..9240f002d3a8bcbde517142be6b45559430de610 100644 --- a/ppocr/modeling/heads/rec_aster_head.py +++ b/ppocr/modeling/heads/rec_aster_head.py @@ -11,6 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This code is refer from: +https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/attention_recognition_head.py +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppocr/modeling/heads/rec_att_head.py b/ppocr/modeling/heads/rec_att_head.py index 4286d7691d1abcf80c283d1c1ab76f8cd1f4a634..6d77e42eb5def579052687ab6fdc265159311884 100644 --- a/ppocr/modeling/heads/rec_att_head.py +++ b/ppocr/modeling/heads/rec_att_head.py @@ -75,7 +75,7 @@ class AttentionHead(nn.Layer): probs_step, axis=1)], axis=1) next_input = probs_step.argmax(axis=1) targets = next_input - + probs = paddle.nn.functional.softmax(probs, axis=2) return probs diff --git a/ppocr/modeling/heads/rec_sar_head.py b/ppocr/modeling/heads/rec_sar_head.py index 2f15801b7057001a8af92ec676ef49f5ac7a9f78..a46cce7de2c8e59cf797db96fc6fcb7e25fa549a 100644 --- a/ppocr/modeling/heads/rec_sar_head.py +++ b/ppocr/modeling/heads/rec_sar_head.py @@ -1,3 +1,22 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is refer from: +https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/encoders/sar_encoder.py +https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/decoders/sar_decoder.py +""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppocr/modeling/necks/fpn.py b/ppocr/modeling/necks/fpn.py index 8728a5c9ded5b9c174fd34f088d8012961f65ec0..48c85b1e53bd889bc887e8fedcd33b1b12cb734b 100644 --- a/ppocr/modeling/necks/fpn.py +++ b/ppocr/modeling/necks/fpn.py @@ -11,64 +11,102 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This code is refer from: +https://github.com/whai362/PSENet/blob/python3/models/neck/fpn.py +""" import paddle.nn as nn import paddle import math import paddle.nn.functional as F + class Conv_BN_ReLU(nn.Layer): - def __init__(self, in_planes, out_planes, kernel_size=1, stride=1, padding=0): + def __init__(self, + in_planes, + out_planes, + kernel_size=1, + stride=1, + padding=0): super(Conv_BN_ReLU, self).__init__() - self.conv = nn.Conv2D(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, - bias_attr=False) + self.conv = nn.Conv2D( + in_planes, + out_planes, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias_attr=False) self.bn = nn.BatchNorm2D(out_planes, momentum=0.1) self.relu = nn.ReLU() for m in self.sublayers(): if isinstance(m, nn.Conv2D): n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels - m.weight = paddle.create_parameter(shape=m.weight.shape, dtype='float32', default_initializer=paddle.nn.initializer.Normal(0, math.sqrt(2. / n))) + m.weight = paddle.create_parameter( + shape=m.weight.shape, + dtype='float32', + default_initializer=paddle.nn.initializer.Normal( + 0, math.sqrt(2. / n))) elif isinstance(m, nn.BatchNorm2D): - m.weight = paddle.create_parameter(shape=m.weight.shape, dtype='float32', default_initializer=paddle.nn.initializer.Constant(1.0)) - m.bias = paddle.create_parameter(shape=m.bias.shape, dtype='float32', default_initializer=paddle.nn.initializer.Constant(0.0)) + m.weight = paddle.create_parameter( + shape=m.weight.shape, + dtype='float32', + default_initializer=paddle.nn.initializer.Constant(1.0)) + m.bias = paddle.create_parameter( + shape=m.bias.shape, + dtype='float32', + default_initializer=paddle.nn.initializer.Constant(0.0)) def forward(self, x): return self.relu(self.bn(self.conv(x))) + class FPN(nn.Layer): def __init__(self, in_channels, out_channels): super(FPN, self).__init__() # Top layer - self.toplayer_ = Conv_BN_ReLU(in_channels[3], out_channels, kernel_size=1, stride=1, padding=0) + self.toplayer_ = Conv_BN_ReLU( + in_channels[3], out_channels, kernel_size=1, stride=1, padding=0) # Lateral layers - self.latlayer1_ = Conv_BN_ReLU(in_channels[2], out_channels, kernel_size=1, stride=1, padding=0) + self.latlayer1_ = Conv_BN_ReLU( + in_channels[2], out_channels, kernel_size=1, stride=1, padding=0) - self.latlayer2_ = Conv_BN_ReLU(in_channels[1], out_channels, kernel_size=1, stride=1, padding=0) + self.latlayer2_ = Conv_BN_ReLU( + in_channels[1], out_channels, kernel_size=1, stride=1, padding=0) - self.latlayer3_ = Conv_BN_ReLU(in_channels[0], out_channels, kernel_size=1, stride=1, padding=0) + self.latlayer3_ = Conv_BN_ReLU( + in_channels[0], out_channels, kernel_size=1, stride=1, padding=0) # Smooth layers - self.smooth1_ = Conv_BN_ReLU(out_channels, out_channels, kernel_size=3, stride=1, padding=1) - - self.smooth2_ = Conv_BN_ReLU(out_channels, out_channels, kernel_size=3, stride=1, padding=1) + self.smooth1_ = Conv_BN_ReLU( + out_channels, out_channels, kernel_size=3, stride=1, padding=1) - self.smooth3_ = Conv_BN_ReLU(out_channels, out_channels, kernel_size=3, stride=1, padding=1) + self.smooth2_ = Conv_BN_ReLU( + out_channels, out_channels, kernel_size=3, stride=1, padding=1) + self.smooth3_ = Conv_BN_ReLU( + out_channels, out_channels, kernel_size=3, stride=1, padding=1) self.out_channels = out_channels * 4 for m in self.sublayers(): if isinstance(m, nn.Conv2D): n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels - m.weight = paddle.create_parameter(shape=m.weight.shape, dtype='float32', - default_initializer=paddle.nn.initializer.Normal(0, - math.sqrt(2. / n))) + m.weight = paddle.create_parameter( + shape=m.weight.shape, + dtype='float32', + default_initializer=paddle.nn.initializer.Normal( + 0, math.sqrt(2. / n))) elif isinstance(m, nn.BatchNorm2D): - m.weight = paddle.create_parameter(shape=m.weight.shape, dtype='float32', - default_initializer=paddle.nn.initializer.Constant(1.0)) - m.bias = paddle.create_parameter(shape=m.bias.shape, dtype='float32', - default_initializer=paddle.nn.initializer.Constant(0.0)) + m.weight = paddle.create_parameter( + shape=m.weight.shape, + dtype='float32', + default_initializer=paddle.nn.initializer.Constant(1.0)) + m.bias = paddle.create_parameter( + shape=m.bias.shape, + dtype='float32', + default_initializer=paddle.nn.initializer.Constant(0.0)) def _upsample(self, x, scale=1): return F.upsample(x, scale_factor=scale, mode='bilinear') @@ -81,15 +119,15 @@ class FPN(nn.Layer): p5 = self.toplayer_(f5) f4 = self.latlayer1_(f4) - p4 = self._upsample_add(p5, f4,2) + p4 = self._upsample_add(p5, f4, 2) p4 = self.smooth1_(p4) f3 = self.latlayer2_(f3) - p3 = self._upsample_add(p4, f3,2) + p3 = self._upsample_add(p4, f3, 2) p3 = self.smooth2_(p3) f2 = self.latlayer3_(f2) - p2 = self._upsample_add(p3, f2,2) + p2 = self._upsample_add(p3, f2, 2) p2 = self.smooth3_(p2) p3 = self._upsample(p3, 2) @@ -97,4 +135,4 @@ class FPN(nn.Layer): p5 = self._upsample(p5, 8) fuse = paddle.concat([p2, p3, p4, p5], axis=1) - return fuse \ No newline at end of file + return fuse diff --git a/ppocr/modeling/transforms/stn.py b/ppocr/modeling/transforms/stn.py index 215895f4c4c719f407f4998f7429d965e0529ddc..6f2bdda050f217d8253740001901fbff4065782a 100644 --- a/ppocr/modeling/transforms/stn.py +++ b/ppocr/modeling/transforms/stn.py @@ -11,7 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +""" +This code is refer from: +https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/stn_head.py +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppocr/modeling/transforms/tps.py b/ppocr/modeling/transforms/tps.py index 6cd68555369dd1ddbd6ccf5236688a4b957b8525..9bdab0f85112b90d8da959dce4e258188a812052 100644 --- a/ppocr/modeling/transforms/tps.py +++ b/ppocr/modeling/transforms/tps.py @@ -11,6 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This code is refer from: +https://github.com/clovaai/deep-text-recognition-benchmark/blob/master/modules/transformation.py +""" from __future__ import absolute_import from __future__ import division diff --git a/ppocr/modeling/transforms/tps_spatial_transformer.py b/ppocr/modeling/transforms/tps_spatial_transformer.py index b510acb0d4012c9a4d90c7ca07cac895f0bf242e..4db34f7b4833c1c9b2901c68899bfb294b5843c4 100644 --- a/ppocr/modeling/transforms/tps_spatial_transformer.py +++ b/ppocr/modeling/transforms/tps_spatial_transformer.py @@ -11,6 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This code is refer from: +https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/tps_spatial_transformer.py +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function diff --git a/ppocr/postprocess/locality_aware_nms.py b/ppocr/postprocess/locality_aware_nms.py index 53280cc13ed7e41859e23e2517938d4f6eb07076..d305ef681882b4a393a73190bcbd20a65d1f0c15 100644 --- a/ppocr/postprocess/locality_aware_nms.py +++ b/ppocr/postprocess/locality_aware_nms.py @@ -1,5 +1,6 @@ """ Locality aware nms. +This code is refered from: https://github.com/songdejia/EAST/blob/master/locality_aware_nms.py """ import numpy as np diff --git a/ppocr/postprocess/pse_postprocess/pse/README.md b/ppocr/postprocess/pse_postprocess/pse/README.md index 9c2d9eaeaa5f93550358ebdd4d9161330b78a86f..6a19d5d1b6b1d8e6952eb054d74c6672ed10bc48 100644 --- a/ppocr/postprocess/pse_postprocess/pse/README.md +++ b/ppocr/postprocess/pse_postprocess/pse/README.md @@ -1,5 +1,6 @@ ## 编译 -code from https://github.com/whai362/pan_pp.pytorch +This code is refer from: +https://github.com/whai362/PSENet/blob/python3/models/post_processing/pse ```python python3 setup.py build_ext --inplace ``` diff --git a/ppocr/postprocess/pse_postprocess/pse_postprocess.py b/ppocr/postprocess/pse_postprocess/pse_postprocess.py index 4b89d221d284602933ab3d4f21468fcae79ef310..0234d592d6dde8419b1d623e33b9ca5bb251fb97 100755 --- a/ppocr/postprocess/pse_postprocess/pse_postprocess.py +++ b/ppocr/postprocess/pse_postprocess/pse_postprocess.py @@ -1,16 +1,20 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This code is refer from: +https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py +""" from __future__ import absolute_import from __future__ import division @@ -47,7 +51,8 @@ class PSEPostProcess(object): pred = outs_dict['maps'] if not isinstance(pred, paddle.Tensor): pred = paddle.to_tensor(pred) - pred = F.interpolate(pred, scale_factor=4 // self.scale, mode='bilinear') + pred = F.interpolate( + pred, scale_factor=4 // self.scale, mode='bilinear') score = F.sigmoid(pred[:, 0, :, :]) @@ -60,7 +65,9 @@ class PSEPostProcess(object): boxes_batch = [] for batch_index in range(pred.shape[0]): - boxes, scores = self.boxes_from_bitmap(score[batch_index], kernels[batch_index], shape_list[batch_index]) + boxes, scores = self.boxes_from_bitmap(score[batch_index], + kernels[batch_index], + shape_list[batch_index]) boxes_batch.append({'points': boxes, 'scores': scores}) return boxes_batch @@ -98,15 +105,14 @@ class PSEPostProcess(object): mask = np.zeros((box_height, box_width), np.uint8) mask[points[:, 1], points[:, 0]] = 255 - contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, + cv2.CHAIN_APPROX_SIMPLE) bbox = np.squeeze(contours[0], 1) else: raise NotImplementedError - bbox[:, 0] = np.clip( - np.round(bbox[:, 0] / ratio_w), 0, src_w) - bbox[:, 1] = np.clip( - np.round(bbox[:, 1] / ratio_h), 0, src_h) + bbox[:, 0] = np.clip(np.round(bbox[:, 0] / ratio_w), 0, src_w) + bbox[:, 1] = np.clip(np.round(bbox[:, 1] / ratio_h), 0, src_h) boxes.append(bbox) scores.append(score_i) return boxes, scores diff --git a/ppocr/utils/iou.py b/ppocr/utils/iou.py index 20529dee2d14083f3de4ac034668d004136c56e2..35459f5f053cde0a74f76c5652bfb723a48ca890 100644 --- a/ppocr/utils/iou.py +++ b/ppocr/utils/iou.py @@ -1,4 +1,4 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,18 +11,23 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This code is refer from: +https://github.com/whai362/PSENet/blob/python3/models/loss/iou.py +""" import paddle EPS = 1e-6 + def iou_single(a, b, mask, n_class): valid = mask == 1 a = a.masked_select(valid) b = b.masked_select(valid) miou = [] for i in range(n_class): - if a.shape == [0] and a.shape==b.shape: + if a.shape == [0] and a.shape == b.shape: inter = paddle.to_tensor(0.0) union = paddle.to_tensor(0.0) else: @@ -32,6 +37,7 @@ def iou_single(a, b, mask, n_class): miou = sum(miou) / len(miou) return miou + def iou(a, b, mask, n_class=2, reduce=True): batch_size = a.shape[0] @@ -39,10 +45,10 @@ def iou(a, b, mask, n_class=2, reduce=True): b = b.reshape([batch_size, -1]) mask = mask.reshape([batch_size, -1]) - iou = paddle.zeros((batch_size,), dtype='float32') + iou = paddle.zeros((batch_size, ), dtype='float32') for i in range(batch_size): iou[i] = iou_single(a[i], b[i], mask[i], n_class) if reduce: iou = paddle.mean(iou) - return iou \ No newline at end of file + return iou diff --git a/ppocr/utils/logging.py b/ppocr/utils/logging.py index 11896c37d9285e19a9526caa9c637d7eda7b1979..ce827e8b10c4b63b736886a2f72106c7570576b1 100644 --- a/ppocr/utils/logging.py +++ b/ppocr/utils/logging.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,6 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +This code is refer from: +https://github.com/WenmuZhou/PytorchOCR/blob/master/torchocr/utils/logging.py +""" import os import sys