未验证 提交 1f132953 编写于 作者: N nihao 提交者: GitHub

mobileone block k>1 bugfix (#6342)

上级 d409ec06
...@@ -22,7 +22,7 @@ import paddle ...@@ -22,7 +22,7 @@ import paddle
import paddle.nn as nn import paddle.nn as nn
from paddle import ParamAttr from paddle import ParamAttr
from paddle.regularizer import L2Decay from paddle.regularizer import L2Decay
from paddle.nn.initializer import Normal from paddle.nn.initializer import Normal, Constant
from ppdet.modeling.ops import get_act_fn from ppdet.modeling.ops import get_act_fn
from ppdet.modeling.layers import ConvNormLayer from ppdet.modeling.layers import ConvNormLayer
...@@ -57,9 +57,7 @@ class MobileOneBlock(nn.Layer): ...@@ -57,9 +57,7 @@ class MobileOneBlock(nn.Layer):
self.depth_conv = nn.LayerList() self.depth_conv = nn.LayerList()
self.point_conv = nn.LayerList() self.point_conv = nn.LayerList()
for i in range(self.k): for _ in range(self.k):
if i > 0:
stride = 1
self.depth_conv.append( self.depth_conv.append(
ConvNormLayer( ConvNormLayer(
ch_in, ch_in,
...@@ -112,7 +110,8 @@ class MobileOneBlock(nn.Layer): ...@@ -112,7 +110,8 @@ class MobileOneBlock(nn.Layer):
self.rbr_identity_st2 = nn.BatchNorm2D( self.rbr_identity_st2 = nn.BatchNorm2D(
num_features=ch_out, num_features=ch_out,
weight_attr=ParamAttr(regularizer=L2Decay(0.0)), weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0))) bias_attr=ParamAttr(regularizer=L2Decay(
0.0))) if ch_in == ch_out and self.stride == 1 else None
self.act = get_act_fn(act) if act is None or isinstance(act, ( self.act = get_act_fn(act) if act is None or isinstance(act, (
str, dict)) else act str, dict)) else act
...@@ -125,9 +124,10 @@ class MobileOneBlock(nn.Layer): ...@@ -125,9 +124,10 @@ class MobileOneBlock(nn.Layer):
else: else:
id_out_st1 = self.rbr_identity_st1(x) id_out_st1 = self.rbr_identity_st1(x)
x1_1 = x.clone() x1_1 = 0
for i in range(self.k): for i in range(self.k):
x1_1 = self.depth_conv[i](x1_1) x1_1 += self.depth_conv[i](x)
x1_2 = self.rbr_1x1(x) x1_2 = self.rbr_1x1(x)
x1 = self.act(x1_1 + x1_2 + id_out_st1) x1 = self.act(x1_1 + x1_2 + id_out_st1)
...@@ -136,9 +136,9 @@ class MobileOneBlock(nn.Layer): ...@@ -136,9 +136,9 @@ class MobileOneBlock(nn.Layer):
else: else:
id_out_st2 = self.rbr_identity_st2(x1) id_out_st2 = self.rbr_identity_st2(x1)
x2_1 = x1.clone() x2_1 = 0
for i in range(self.k): for i in range(self.k):
x2_1 = self.point_conv[i](x2_1) x2_1 += self.point_conv[i](x1)
y = self.act(x2_1 + id_out_st2) y = self.act(x2_1 + id_out_st2)
return y return y
...@@ -151,7 +151,9 @@ class MobileOneBlock(nn.Layer): ...@@ -151,7 +151,9 @@ class MobileOneBlock(nn.Layer):
kernel_size=self.kernel_size, kernel_size=self.kernel_size,
stride=self.stride, stride=self.stride,
padding=self.padding, padding=self.padding,
groups=self.ch_in) groups=self.ch_in,
bias_attr=ParamAttr(
initializer=Constant(value=0.), learning_rate=1.))
if not hasattr(self, 'conv2'): if not hasattr(self, 'conv2'):
self.conv2 = nn.Conv2D( self.conv2 = nn.Conv2D(
in_channels=self.ch_in, in_channels=self.ch_in,
...@@ -159,7 +161,9 @@ class MobileOneBlock(nn.Layer): ...@@ -159,7 +161,9 @@ class MobileOneBlock(nn.Layer):
kernel_size=1, kernel_size=1,
stride=1, stride=1,
padding='SAME', padding='SAME',
groups=1) groups=1,
bias_attr=ParamAttr(
initializer=Constant(value=0.), learning_rate=1.))
conv1_kernel, conv1_bias, conv2_kernel, conv2_bias = self.get_equivalent_kernel_bias( conv1_kernel, conv1_bias, conv2_kernel, conv2_bias = self.get_equivalent_kernel_bias(
) )
...@@ -211,26 +215,24 @@ class MobileOneBlock(nn.Layer): ...@@ -211,26 +215,24 @@ class MobileOneBlock(nn.Layer):
return 0, 0 return 0, 0
if isinstance(branch, nn.LayerList): if isinstance(branch, nn.LayerList):
kernel = 0 fused_kernels = []
running_mean = 0 fused_bias = []
running_var = 0
gamma = 0
beta = 0
eps = 0
for block in branch: for block in branch:
kernel += block.conv.weight kernel = block.conv.weight
running_mean += block.norm._mean running_mean = block.norm._mean
running_var += block.norm._variance running_var = block.norm._variance
gamma += block.norm.weight gamma = block.norm.weight
beta += block.norm.bias beta = block.norm.bias
eps += block.norm._epsilon eps = block.norm._epsilon
std = (running_var + eps).sqrt()
t = (gamma / std).reshape((-1, 1, 1, 1))
fused_kernels.append(kernel * t)
fused_bias.append(beta - running_mean * gamma / std)
return sum(fused_kernels), sum(fused_bias)
kernel /= len(branch)
running_mean /= len(branch)
running_var /= len(branch)
gamma /= len(branch)
beta /= len(branch)
eps /= len(branch)
elif isinstance(branch, ConvNormLayer): elif isinstance(branch, ConvNormLayer):
kernel = branch.conv.weight kernel = branch.conv.weight
running_mean = branch.norm._mean running_mean = branch.norm._mean
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册