diff --git a/configs/rec/rec_r31_robustscanner.yml b/configs/rec/rec_r31_robustscanner.yml index 271ae8fd327acb514ba5eafd60205a0c8d120364..40d39aee3c42c18085ace035944dba057b923245 100644 --- a/configs/rec/rec_r31_robustscanner.yml +++ b/configs/rec/rec_r31_robustscanner.yml @@ -39,6 +39,7 @@ Architecture: Transform: Backbone: name: ResNet31 + init_type: KaimingNormal Head: name: RobustScannerHead enc_outchannles: 128 @@ -64,9 +65,8 @@ Metric: Train: dataset: - name: SimpleDataSet - label_file_list: ['./train_data/train_list.txt'] - data_dir: ./train_data/ + name: LMDBDataSet + data_dir: ./train_data/data_lmdb_release/training/ transforms: - DecodeImage: # load image img_mode: BGR diff --git a/ppocr/modeling/backbones/rec_resnet_31.py b/ppocr/modeling/backbones/rec_resnet_31.py index b7990b67a248e2b22750f117a41e01820d3e83cc..46dc374008b56a20dbd4be257775368e9cbbace4 100644 --- a/ppocr/modeling/backbones/rec_resnet_31.py +++ b/ppocr/modeling/backbones/rec_resnet_31.py @@ -29,11 +29,7 @@ import numpy as np __all__ = ["ResNet31"] - -conv_weight_attr = nn.initializer.KaimingNormal() -bn_weight_attr = ParamAttr(initializer=nn.initializer.Uniform(), learning_rate=1) - -def conv3x3(in_channel, out_channel, stride=1): +def conv3x3(in_channel, out_channel, stride=1, conv_weight_attr=None): return nn.Conv2D( in_channel, out_channel, @@ -47,12 +43,14 @@ def conv3x3(in_channel, out_channel, stride=1): class BasicBlock(nn.Layer): expansion = 1 - def __init__(self, in_channels, channels, stride=1, downsample=False): + def __init__(self, in_channels, channels, stride=1, downsample=False, conv_weight_attr=None, bn_weight_attr=None): super().__init__() - self.conv1 = conv3x3(in_channels, channels, stride) + self.conv1 = conv3x3(in_channels, channels, stride, + conv_weight_attr=conv_weight_attr) self.bn1 = nn.BatchNorm2D(channels, weight_attr=bn_weight_attr) self.relu = nn.ReLU() - self.conv2 = conv3x3(channels, channels) + self.conv2 = conv3x3(channels, channels, + conv_weight_attr=conv_weight_attr) self.bn2 = nn.BatchNorm2D(channels, weight_attr=bn_weight_attr) self.downsample = downsample if downsample: @@ -96,6 +94,7 @@ class ResNet31(nn.Layer): channels (list[int]): List of out_channels of Conv2d layer. out_indices (None | Sequence[int]): Indices of output stages. last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage. + init_type (None | str): the config to control the initialization. ''' def __init__(self, @@ -103,7 +102,8 @@ class ResNet31(nn.Layer): layers=[1, 2, 5, 3], channels=[64, 128, 256, 256, 512, 512, 512], out_indices=None, - last_stage_pool=False): + last_stage_pool=False, + init_type=None): super(ResNet31, self).__init__() assert isinstance(in_channels, int) assert isinstance(last_stage_pool, bool) @@ -111,6 +111,16 @@ class ResNet31(nn.Layer): self.out_indices = out_indices self.last_stage_pool = last_stage_pool + conv_weight_attr = None + bn_weight_attr = None + + if init_type is not None: + support_dict = ['KaimingNormal'] + assert init_type in support_dict, Exception( + "resnet31 only support {}".format(support_dict)) + conv_weight_attr = nn.initializer.KaimingNormal() + bn_weight_attr = ParamAttr(initializer=nn.initializer.Uniform(), learning_rate=1) + # conv 1 (Conv Conv) self.conv1_1 = nn.Conv2D( in_channels, channels[0], kernel_size=3, stride=1, padding=1, weight_attr=conv_weight_attr) @@ -125,7 +135,8 @@ class ResNet31(nn.Layer): # conv 2 (Max-pooling, Residual block, Conv) self.pool2 = nn.MaxPool2D( kernel_size=2, stride=2, padding=0, ceil_mode=True) - self.block2 = self._make_layer(channels[1], channels[2], layers[0]) + self.block2 = self._make_layer(channels[1], channels[2], layers[0], + conv_weight_attr=conv_weight_attr, bn_weight_attr=bn_weight_attr) self.conv2 = nn.Conv2D( channels[2], channels[2], kernel_size=3, stride=1, padding=1, weight_attr=conv_weight_attr) self.bn2 = nn.BatchNorm2D(channels[2], weight_attr=bn_weight_attr) @@ -134,7 +145,8 @@ class ResNet31(nn.Layer): # conv 3 (Max-pooling, Residual block, Conv) self.pool3 = nn.MaxPool2D( kernel_size=2, stride=2, padding=0, ceil_mode=True) - self.block3 = self._make_layer(channels[2], channels[3], layers[1]) + self.block3 = self._make_layer(channels[2], channels[3], layers[1], + conv_weight_attr=conv_weight_attr, bn_weight_attr=bn_weight_attr) self.conv3 = nn.Conv2D( channels[3], channels[3], kernel_size=3, stride=1, padding=1, weight_attr=conv_weight_attr) self.bn3 = nn.BatchNorm2D(channels[3], weight_attr=bn_weight_attr) @@ -143,7 +155,8 @@ class ResNet31(nn.Layer): # conv 4 (Max-pooling, Residual block, Conv) self.pool4 = nn.MaxPool2D( kernel_size=(2, 1), stride=(2, 1), padding=0, ceil_mode=True) - self.block4 = self._make_layer(channels[3], channels[4], layers[2]) + self.block4 = self._make_layer(channels[3], channels[4], layers[2], + conv_weight_attr=conv_weight_attr, bn_weight_attr=bn_weight_attr) self.conv4 = nn.Conv2D( channels[4], channels[4], kernel_size=3, stride=1, padding=1, weight_attr=conv_weight_attr) self.bn4 = nn.BatchNorm2D(channels[4], weight_attr=bn_weight_attr) @@ -154,7 +167,8 @@ class ResNet31(nn.Layer): if self.last_stage_pool: self.pool5 = nn.MaxPool2D( kernel_size=2, stride=2, padding=0, ceil_mode=True) - self.block5 = self._make_layer(channels[4], channels[5], layers[3]) + self.block5 = self._make_layer(channels[4], channels[5], layers[3], + conv_weight_attr=conv_weight_attr, bn_weight_attr=bn_weight_attr) self.conv5 = nn.Conv2D( channels[5], channels[5], kernel_size=3, stride=1, padding=1, weight_attr=conv_weight_attr) self.bn5 = nn.BatchNorm2D(channels[5], weight_attr=bn_weight_attr) @@ -162,7 +176,7 @@ class ResNet31(nn.Layer): self.out_channels = channels[-1] - def _make_layer(self, input_channels, output_channels, blocks): + def _make_layer(self, input_channels, output_channels, blocks, conv_weight_attr=None, bn_weight_attr=None): layers = [] for _ in range(blocks): downsample = None @@ -179,7 +193,8 @@ class ResNet31(nn.Layer): layers.append( BasicBlock( - input_channels, output_channels, downsample=downsample)) + input_channels, output_channels, downsample=downsample, + conv_weight_attr=conv_weight_attr, bn_weight_attr=bn_weight_attr)) input_channels = output_channels return nn.Sequential(*layers)