未验证 提交 8e26493c 编写于 作者: L littletomatodonkey 提交者: GitHub

fix lr mult val (#551)

上级 1d12efa7
...@@ -107,7 +107,7 @@ Note: when finetuning model, which has been trained by SSLD, please use smaller ...@@ -107,7 +107,7 @@ Note: when finetuning model, which has been trained by SSLD, please use smaller
ARCHITECTURE: ARCHITECTURE:
name: 'ResNet50_vd' name: 'ResNet50_vd'
params: params:
lr_mult_list: [0.1, 0.1, 0.2, 0.2, 0.3] lr_mult_list: [0.5, 0.5, 0.6, 0.6, 0.8]
pretrained_model: "./pretrained/ResNet50_vd_ssld_pretrained" pretrained_model: "./pretrained/ResNet50_vd_ssld_pretrained"
``` ```
......
...@@ -108,7 +108,7 @@ python -m paddle.distributed.launch \ ...@@ -108,7 +108,7 @@ python -m paddle.distributed.launch \
ARCHITECTURE: ARCHITECTURE:
name: 'ResNet50_vd' name: 'ResNet50_vd'
params: params:
lr_mult_list: [0.1, 0.1, 0.2, 0.2, 0.3] lr_mult_list: [0.5, 0.5, 0.6, 0.6, 0.8]
pretrained_model: "./pretrained/ResNet50_vd_ssld_pretrained" pretrained_model: "./pretrained/ResNet50_vd_ssld_pretrained"
``` ```
......
...@@ -64,8 +64,10 @@ class ConvBNLayer(nn.Layer): ...@@ -64,8 +64,10 @@ class ConvBNLayer(nn.Layer):
self._batch_norm = BatchNorm( self._batch_norm = BatchNorm(
num_filters, num_filters,
act=act, act=act,
param_attr=ParamAttr(name=bn_name + '_scale'), param_attr=ParamAttr(
bias_attr=ParamAttr(bn_name + '_offset'), name=bn_name + '_scale', learning_rate=lr_mult),
bias_attr=ParamAttr(
bn_name + '_offset', learning_rate=lr_mult),
moving_mean_name=bn_name + '_mean', moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance') moving_variance_name=bn_name + '_variance')
...@@ -118,6 +120,7 @@ class BottleneckBlock(nn.Layer): ...@@ -118,6 +120,7 @@ class BottleneckBlock(nn.Layer):
filter_size=1, filter_size=1,
stride=1, stride=1,
is_vd_mode=False if if_first else True, is_vd_mode=False if if_first else True,
lr_mult=lr_mult,
name=name + "_branch1") name=name + "_branch1")
self.shortcut = shortcut self.shortcut = shortcut
...@@ -153,12 +156,14 @@ class BasicBlock(nn.Layer): ...@@ -153,12 +156,14 @@ class BasicBlock(nn.Layer):
filter_size=3, filter_size=3,
stride=stride, stride=stride,
act='relu', act='relu',
lr_mult=lr_mult,
name=name + "_branch2a") name=name + "_branch2a")
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
num_channels=num_filters, num_channels=num_filters,
num_filters=num_filters, num_filters=num_filters,
filter_size=3, filter_size=3,
act=None, act=None,
lr_mult=lr_mult,
name=name + "_branch2b") name=name + "_branch2b")
if not shortcut: if not shortcut:
...@@ -168,6 +173,7 @@ class BasicBlock(nn.Layer): ...@@ -168,6 +173,7 @@ class BasicBlock(nn.Layer):
filter_size=1, filter_size=1,
stride=1, stride=1,
is_vd_mode=False if if_first else True, is_vd_mode=False if if_first else True,
lr_mult=lr_mult,
name=name + "_branch1") name=name + "_branch1")
self.shortcut = shortcut self.shortcut = shortcut
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册