From ea2f81d856bad1304164680371d58a3c9a674c85 Mon Sep 17 00:00:00 2001 From: wangxinxin08 <69842442+wangxinxin08@users.noreply.github.com> Date: Mon, 20 Dec 2021 17:36:11 +0800 Subject: [PATCH] refine sync bn (#4361) * refine sync bn * fix bugs of batch_norm * fix bugs while deploying and modify BatchNorm to BatchNorm2D * param_attr -> weight_attr in BatchNorm2D * modify BatchNorm to BatchNorm2D --- ppdet/engine/trainer.py | 6 +++++ ppdet/modeling/backbones/blazenet.py | 7 ++---- ppdet/modeling/backbones/hrnet.py | 6 ++--- ppdet/modeling/backbones/lcnet.py | 4 ++-- ppdet/modeling/backbones/lite_hrnet.py | 28 +++++++++++------------ ppdet/modeling/backbones/mobilenet_v1.py | 11 ++------- ppdet/modeling/backbones/mobilenet_v3.py | 12 ++++------ ppdet/modeling/backbones/resnet.py | 12 ++++------ ppdet/modeling/backbones/shufflenet_v2.py | 10 ++++---- ppdet/modeling/layers.py | 5 +--- ppdet/modeling/necks/bifpn.py | 4 +--- ppdet/modeling/necks/blazeface_fpn.py | 7 ++---- ppdet/modeling/ops.py | 15 +++++------- ppdet/modeling/reid/pplcnet_embedding.py | 6 ++--- ppdet/modeling/reid/resnet.py | 6 +++-- 15 files changed, 60 insertions(+), 79 deletions(-) diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py index 455e74474..9d3774cfc 100644 --- a/ppdet/engine/trainer.py +++ b/ppdet/engine/trainer.py @@ -336,6 +336,12 @@ class Trainer(object): assert self.mode == 'train', "Model not in 'train' mode" Init_mark = False + sync_bn = (getattr(self.cfg, 'norm_type', None) == 'sync_bn' and + self.cfg.use_gpu and self._nranks > 1) + if sync_bn: + self.model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm( + self.model) + model = self.model if self.cfg.get('fleet', False): model = fleet.distributed_model(model) diff --git a/ppdet/modeling/backbones/blazenet.py b/ppdet/modeling/backbones/blazenet.py index 425f2a86e..fbfdcec9d 100644 --- a/ppdet/modeling/backbones/blazenet.py +++ b/ppdet/modeling/backbones/blazenet.py @@ -58,11 +58,8 @@ class ConvBNLayer(nn.Layer): learning_rate=conv_lr, initializer=KaimingNormal()), bias_attr=False) - if norm_type == 'sync_bn': - self._batch_norm = nn.SyncBatchNorm(out_channels) - else: - self._batch_norm = nn.BatchNorm( - out_channels, act=None, use_global_stats=False) + if norm_type in ['bn', 'sync_bn']: + self._batch_norm = nn.BatchNorm2D(out_channels) def forward(self, x): x = self._conv(x) diff --git a/ppdet/modeling/backbones/hrnet.py b/ppdet/modeling/backbones/hrnet.py index d92aa95f5..0f09aedca 100644 --- a/ppdet/modeling/backbones/hrnet.py +++ b/ppdet/modeling/backbones/hrnet.py @@ -62,11 +62,11 @@ class ConvNormLayer(nn.Layer): learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) bias_attr = ParamAttr( learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) - global_stats = True if freeze_norm else False + global_stats = True if freeze_norm else None if norm_type in ['bn', 'sync_bn']: - self.norm = nn.BatchNorm( + self.norm = nn.BatchNorm2D( ch_out, - param_attr=param_attr, + weight_attr=param_attr, bias_attr=bias_attr, use_global_stats=global_stats) elif norm_type == 'gn': diff --git a/ppdet/modeling/backbones/lcnet.py b/ppdet/modeling/backbones/lcnet.py index fd8ad4e46..d4e3a2c15 100644 --- a/ppdet/modeling/backbones/lcnet.py +++ b/ppdet/modeling/backbones/lcnet.py @@ -81,9 +81,9 @@ class ConvBNLayer(nn.Layer): weight_attr=ParamAttr(initializer=KaimingNormal()), bias_attr=False) - self.bn = BatchNorm( + self.bn = BatchNorm2D( num_filters, - param_attr=ParamAttr(regularizer=L2Decay(0.0)), + weight_attr=ParamAttr(regularizer=L2Decay(0.0)), bias_attr=ParamAttr(regularizer=L2Decay(0.0))) self.hardswish = nn.Hardswish() diff --git a/ppdet/modeling/backbones/lite_hrnet.py b/ppdet/modeling/backbones/lite_hrnet.py index 52bad3cbb..d6832c509 100644 --- a/ppdet/modeling/backbones/lite_hrnet.py +++ b/ppdet/modeling/backbones/lite_hrnet.py @@ -56,11 +56,11 @@ class ConvNormLayer(nn.Layer): regularizer=L2Decay(norm_decay), ) bias_attr = ParamAttr( learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) - global_stats = True if freeze_norm else False + global_stats = True if freeze_norm else None if norm_type in ['bn', 'sync_bn']: - self.norm = nn.BatchNorm( + self.norm = nn.BatchNorm2D( ch_out, - param_attr=param_attr, + weight_attr=param_attr, bias_attr=bias_attr, use_global_stats=global_stats, ) elif norm_type == 'gn': @@ -582,7 +582,7 @@ class LiteHRNetModule(nn.Layer): stride=1, padding=0, bias=False, ), - nn.BatchNorm(self.in_channels[i]), + nn.BatchNorm2D(self.in_channels[i]), nn.Upsample( scale_factor=2**(j - i), mode='nearest'))) elif j == i: @@ -601,7 +601,7 @@ class LiteHRNetModule(nn.Layer): padding=1, groups=self.in_channels[j], bias=False, ), - nn.BatchNorm(self.in_channels[j]), + nn.BatchNorm2D(self.in_channels[j]), L.Conv2d( self.in_channels[j], self.in_channels[i], @@ -609,7 +609,7 @@ class LiteHRNetModule(nn.Layer): stride=1, padding=0, bias=False, ), - nn.BatchNorm(self.in_channels[i]))) + nn.BatchNorm2D(self.in_channels[i]))) else: conv_downsamples.append( nn.Sequential( @@ -621,7 +621,7 @@ class LiteHRNetModule(nn.Layer): padding=1, groups=self.in_channels[j], bias=False, ), - nn.BatchNorm(self.in_channels[j]), + nn.BatchNorm2D(self.in_channels[j]), L.Conv2d( self.in_channels[j], self.in_channels[j], @@ -629,7 +629,7 @@ class LiteHRNetModule(nn.Layer): stride=1, padding=0, bias=False, ), - nn.BatchNorm(self.in_channels[j]), + nn.BatchNorm2D(self.in_channels[j]), nn.ReLU())) fuse_layer.append(nn.Sequential(*conv_downsamples)) @@ -777,7 +777,7 @@ class LiteHRNet(nn.Layer): padding=1, groups=num_channels_pre_layer[i], bias=False), - nn.BatchNorm(num_channels_pre_layer[i]), + nn.BatchNorm2D(num_channels_pre_layer[i]), L.Conv2d( num_channels_pre_layer[i], num_channels_cur_layer[i], @@ -785,7 +785,7 @@ class LiteHRNet(nn.Layer): stride=1, padding=0, bias=False, ), - nn.BatchNorm(num_channels_cur_layer[i]), + nn.BatchNorm2D(num_channels_cur_layer[i]), nn.ReLU())) else: transition_layers.append(None) @@ -802,7 +802,7 @@ class LiteHRNet(nn.Layer): stride=2, padding=1, bias=False, ), - nn.BatchNorm(num_channels_pre_layer[-1]), + nn.BatchNorm2D(num_channels_pre_layer[-1]), L.Conv2d( num_channels_pre_layer[-1], num_channels_cur_layer[i] @@ -812,9 +812,9 @@ class LiteHRNet(nn.Layer): stride=1, padding=0, bias=False, ), - nn.BatchNorm(num_channels_cur_layer[i] - if j == i - num_branches_pre else - num_channels_pre_layer[-1]), + nn.BatchNorm2D(num_channels_cur_layer[i] + if j == i - num_branches_pre else + num_channels_pre_layer[-1]), nn.ReLU())) transition_layers.append(nn.Sequential(*conv_downsamples)) return nn.LayerList(transition_layers) diff --git a/ppdet/modeling/backbones/mobilenet_v1.py b/ppdet/modeling/backbones/mobilenet_v1.py index 7b9fa80eb..a39435be5 100644 --- a/ppdet/modeling/backbones/mobilenet_v1.py +++ b/ppdet/modeling/backbones/mobilenet_v1.py @@ -59,16 +59,9 @@ class ConvBNLayer(nn.Layer): param_attr = ParamAttr(regularizer=L2Decay(norm_decay)) bias_attr = ParamAttr(regularizer=L2Decay(norm_decay)) - if norm_type == 'sync_bn': - self._batch_norm = nn.SyncBatchNorm( + if norm_type in ['sync_bn', 'bn']: + self._batch_norm = nn.BatchNorm2D( out_channels, weight_attr=param_attr, bias_attr=bias_attr) - else: - self._batch_norm = nn.BatchNorm( - out_channels, - act=None, - param_attr=param_attr, - bias_attr=bias_attr, - use_global_stats=False) def forward(self, x): x = self._conv(x) diff --git a/ppdet/modeling/backbones/mobilenet_v3.py b/ppdet/modeling/backbones/mobilenet_v3.py index 02021e87c..2bd88567a 100644 --- a/ppdet/modeling/backbones/mobilenet_v3.py +++ b/ppdet/modeling/backbones/mobilenet_v3.py @@ -74,15 +74,11 @@ class ConvBNLayer(nn.Layer): learning_rate=norm_lr, regularizer=L2Decay(norm_decay), trainable=False if freeze_norm else True) - global_stats = True if freeze_norm else False - if norm_type == 'sync_bn': - self.bn = nn.SyncBatchNorm( - out_c, weight_attr=param_attr, bias_attr=bias_attr) - else: - self.bn = nn.BatchNorm( + global_stats = True if freeze_norm else None + if norm_type in ['sync_bn', 'bn']: + self.bn = nn.BatchNorm2D( out_c, - act=None, - param_attr=param_attr, + weight_attr=param_attr, bias_attr=bias_attr, use_global_stats=global_stats) norm_params = self.bn.parameters() diff --git a/ppdet/modeling/backbones/resnet.py b/ppdet/modeling/backbones/resnet.py index d4bc878ea..6f8eb0b89 100755 --- a/ppdet/modeling/backbones/resnet.py +++ b/ppdet/modeling/backbones/resnet.py @@ -100,15 +100,11 @@ class ConvNormLayer(nn.Layer): regularizer=L2Decay(norm_decay), trainable=False if freeze_norm else True) - global_stats = True if freeze_norm else False - if norm_type == 'sync_bn': - self.norm = nn.SyncBatchNorm( - ch_out, weight_attr=param_attr, bias_attr=bias_attr) - else: - self.norm = nn.BatchNorm( + global_stats = True if freeze_norm else None + if norm_type in ['sync_bn', 'bn']: + self.norm = nn.BatchNorm2D( ch_out, - act=None, - param_attr=param_attr, + weight_attr=param_attr, bias_attr=bias_attr, use_global_stats=global_stats) norm_params = self.norm.parameters() diff --git a/ppdet/modeling/backbones/shufflenet_v2.py b/ppdet/modeling/backbones/shufflenet_v2.py index 59b0502a1..059b15ed7 100644 --- a/ppdet/modeling/backbones/shufflenet_v2.py +++ b/ppdet/modeling/backbones/shufflenet_v2.py @@ -51,15 +51,17 @@ class ConvBNLayer(nn.Layer): weight_attr=ParamAttr(initializer=KaimingNormal()), bias_attr=False) - self._batch_norm = BatchNorm( + self._batch_norm = BatchNorm2D( out_channels, - param_attr=ParamAttr(regularizer=L2Decay(0.0)), - bias_attr=ParamAttr(regularizer=L2Decay(0.0)), - act=act) + weight_attr=ParamAttr(regularizer=L2Decay(0.0)), + bias_attr=ParamAttr(regularizer=L2Decay(0.0))) + self.act = act def forward(self, inputs): y = self._conv(inputs) y = self._batch_norm(y) + if self.act: + y = getattr(F, self.act)(y) return y diff --git a/ppdet/modeling/layers.py b/ppdet/modeling/layers.py index 73da16a14..894fa3c8f 100644 --- a/ppdet/modeling/layers.py +++ b/ppdet/modeling/layers.py @@ -174,12 +174,9 @@ class ConvNormLayer(nn.Layer): bias_attr = ParamAttr( learning_rate=norm_lr, regularizer=L2Decay(norm_decay) if norm_decay is not None else None) - if norm_type == 'bn': + if norm_type in ['bn', 'sync_bn']: self.norm = nn.BatchNorm2D( ch_out, weight_attr=param_attr, bias_attr=bias_attr) - elif norm_type == 'sync_bn': - self.norm = nn.SyncBatchNorm( - ch_out, weight_attr=param_attr, bias_attr=bias_attr) elif norm_type == 'gn': self.norm = nn.GroupNorm( num_groups=norm_groups, diff --git a/ppdet/modeling/necks/bifpn.py b/ppdet/modeling/necks/bifpn.py index c60760893..9e794b8f5 100644 --- a/ppdet/modeling/necks/bifpn.py +++ b/ppdet/modeling/necks/bifpn.py @@ -52,10 +52,8 @@ class SeparableConvLayer(nn.Layer): self.pointwise_conv = nn.Conv2D(in_channels, self.out_channels, 1) # norm type - if self.norm_type == 'bn': + if self.norm_type in ['bn', 'sync_bn']: self.norm = nn.BatchNorm2D(self.out_channels) - elif self.norm_type == 'sync_bn': - self.norm = nn.SyncBatchNorm(self.out_channels) elif self.norm_type == 'gn': self.norm = nn.GroupNorm( num_groups=self.norm_groups, num_channels=self.out_channels) diff --git a/ppdet/modeling/necks/blazeface_fpn.py b/ppdet/modeling/necks/blazeface_fpn.py index 18d7f3cf1..b903c97b2 100644 --- a/ppdet/modeling/necks/blazeface_fpn.py +++ b/ppdet/modeling/necks/blazeface_fpn.py @@ -54,11 +54,8 @@ class ConvBNLayer(nn.Layer): learning_rate=conv_lr, initializer=KaimingNormal()), bias_attr=False) - if norm_type == 'sync_bn': - self._batch_norm = nn.SyncBatchNorm(out_channels) - else: - self._batch_norm = nn.BatchNorm( - out_channels, act=None, use_global_stats=False) + if norm_type in ['sync_bn', 'bn']: + self._batch_norm = nn.BatchNorm2D(out_channels) def forward(self, x): x = self._conv(x) diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index 294bcf2a1..b157da5a2 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -50,10 +50,6 @@ def batch_norm(ch, freeze_norm=False, initializer=None, data_format='NCHW'): - if norm_type == 'sync_bn': - batch_norm = nn.SyncBatchNorm - else: - batch_norm = nn.BatchNorm2D norm_lr = 0. if freeze_norm else 1. weight_attr = ParamAttr( @@ -66,11 +62,12 @@ def batch_norm(ch, regularizer=L2Decay(norm_decay), trainable=False if freeze_norm else True) - norm_layer = batch_norm( - ch, - weight_attr=weight_attr, - bias_attr=bias_attr, - data_format=data_format) + if norm_type in ['sync_bn', 'bn']: + norm_layer = nn.BatchNorm2D( + ch, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) norm_params = norm_layer.parameters() if freeze_norm: diff --git a/ppdet/modeling/reid/pplcnet_embedding.py b/ppdet/modeling/reid/pplcnet_embedding.py index cad9f85be..d360f8914 100644 --- a/ppdet/modeling/reid/pplcnet_embedding.py +++ b/ppdet/modeling/reid/pplcnet_embedding.py @@ -21,7 +21,7 @@ import paddle.nn as nn import paddle.nn.functional as F from paddle.nn.initializer import Normal, Constant from paddle import ParamAttr -from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Linear +from paddle.nn import AdaptiveAvgPool2D, BatchNorm2D, Conv2D, Linear from paddle.regularizer import L2Decay from paddle.nn.initializer import KaimingNormal, XavierNormal from ppdet.core.workspace import register @@ -77,9 +77,9 @@ class ConvBNLayer(nn.Layer): weight_attr=ParamAttr(initializer=KaimingNormal()), bias_attr=False) - self.bn = BatchNorm( + self.bn = BatchNorm2D( num_filters, - param_attr=ParamAttr(regularizer=L2Decay(0.0)), + weight_attr=ParamAttr(regularizer=L2Decay(0.0)), bias_attr=ParamAttr(regularizer=L2Decay(0.0))) self.hardswish = nn.Hardswish() diff --git a/ppdet/modeling/reid/resnet.py b/ppdet/modeling/reid/resnet.py index 968fe9774..c2261e0d0 100644 --- a/ppdet/modeling/reid/resnet.py +++ b/ppdet/modeling/reid/resnet.py @@ -55,12 +55,14 @@ class ConvBNLayer(nn.Layer): bias_attr=False, data_format=data_format) - self._batch_norm = nn.BatchNorm( - num_filters, act=act, data_layout=data_format) + self._batch_norm = nn.BatchNorm2D(num_filters, data_layout=data_format) + self.act = act def forward(self, inputs): y = self._conv(inputs) y = self._batch_norm(y) + if self.act: + y = getattr(F, self.act)(y) return y -- GitLab