提交 cc68cb45 编写于 作者: C chenguowei01

only init weight in model

上级 d22e3e5e
...@@ -36,4 +36,5 @@ learning_rate: ...@@ -36,4 +36,5 @@ learning_rate:
loss: loss:
types: types:
- type: CrossEntropyLoss - type: CrossEntropyLoss
ignore_index: 255
coef: [1] coef: [1]
...@@ -4,6 +4,6 @@ model: ...@@ -4,6 +4,6 @@ model:
type: FCN type: FCN
backbone: backbone:
type: HRNet_W18 type: HRNet_W18
backbone_pretrained: pretrained_model/hrnet_w18_imagenet
num_classes: 19 num_classes: 19
backbone_channels: [270] backbone_channels: [270]
backbone_pretrained: pretrained_model/hrnet_w18_imagenet
...@@ -4,6 +4,6 @@ model: ...@@ -4,6 +4,6 @@ model:
type: FCN type: FCN
backbone: backbone:
type: HRNet_W18 type: HRNet_W18
backbone_pretrained: pretrained_model/hrnet_w18_imagenet
num_classes: 2 num_classes: 2
backbone_channels: [270] backbone_channels: [270]
backbone_pretrained: pretrained_model/hrnet_w18_imagenet
_base_: '../_base_/cityscapes.yml'
model:
type: FCN
backbone:
type: HRNet_W48
num_classes: 19
backbone_channels: [720]
backbone_pretrained: pretrained_model/hrnet_w48_imagenet
...@@ -64,8 +64,8 @@ def train(model, ...@@ -64,8 +64,8 @@ def train(model,
num_classes=None, num_classes=None,
num_workers=8, num_workers=8,
use_vdl=False, use_vdl=False,
losses=None): losses=None,
ignore_index = model.ignore_index ignore_index=255):
nranks = ParallelEnv().nranks nranks = ParallelEnv().nranks
start_iter = 0 start_iter = 0
......
...@@ -56,7 +56,6 @@ class HRNet(fluid.dygraph.Layer): ...@@ -56,7 +56,6 @@ class HRNet(fluid.dygraph.Layer):
""" """
def __init__(self, def __init__(self,
backbone_pretrained=None,
stage1_num_modules=1, stage1_num_modules=1,
stage1_num_blocks=[4], stage1_num_blocks=[4],
stage1_num_channels=[64], stage1_num_channels=[64],
...@@ -146,9 +145,6 @@ class HRNet(fluid.dygraph.Layer): ...@@ -146,9 +145,6 @@ class HRNet(fluid.dygraph.Layer):
has_se=self.has_se, has_se=self.has_se,
name="st4") name="st4")
if self.training:
self.init_weight(backbone_pretrained)
def forward(self, x, label=None, mode='train'): def forward(self, x, label=None, mode='train'):
input_shape = x.shape[2:] input_shape = x.shape[2:]
conv1 = self.conv_layer1_1(x) conv1 = self.conv_layer1_1(x)
...@@ -173,30 +169,6 @@ class HRNet(fluid.dygraph.Layer): ...@@ -173,30 +169,6 @@ class HRNet(fluid.dygraph.Layer):
return [x] return [x]
def init_weight(self, pretrained_model=None):
"""
Initialize the parameters of model parts.
Args:
pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
"""
params = self.parameters()
for param in params:
param_name = param.name
if 'batch_norm' in param_name:
if 'w_0' in param_name:
param_init.constant_init(param, 1.0)
elif 'b_0' in param_name:
param_init.constant_init(param, 0.0)
if 'conv' in param_name and 'w_0' in param_name:
param_init.normal_init(param, scale=0.001)
if pretrained_model is not None:
if os.path.exists(pretrained_model):
utils.load_pretrained_model(self, pretrained_model)
else:
raise Exception('Pretrained model is not found: {}'.format(
pretrained_model))
class ConvBNLayer(fluid.dygraph.Layer): class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
......
...@@ -26,6 +26,7 @@ from paddle.nn import SyncBatchNorm as BatchNorm ...@@ -26,6 +26,7 @@ from paddle.nn import SyncBatchNorm as BatchNorm
from dygraph.cvlibs import manager from dygraph.cvlibs import manager
from dygraph import utils from dygraph import utils
from dygraph.cvlibs import param_init from dygraph.cvlibs import param_init
from dygraph.utils import logger
__all__ = [ __all__ = [
"fcn_hrnet_w18_small_v1", "fcn_hrnet_w18_small_v2", "fcn_hrnet_w18", "fcn_hrnet_w18_small_v1", "fcn_hrnet_w18_small_v2", "fcn_hrnet_w18",
...@@ -52,25 +53,22 @@ class FCN(fluid.dygraph.Layer): ...@@ -52,25 +53,22 @@ class FCN(fluid.dygraph.Layer):
backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index. backbone_channels (tuple): the same length with "backbone_indices". It indicates the channels of corresponding index.
channels (int): channels after conv layer before the last one. channels (int): channels after conv layer before the last one.
ignore_index (int): the value of ground-truth mask would be ignored while computing loss or doing evaluation. Default 255.
""" """
def __init__(self, def __init__(self,
num_classes, num_classes,
backbone, backbone,
backbone_pretrained=None,
model_pretrained=None, model_pretrained=None,
backbone_indices=(-1, ), backbone_indices=(-1, ),
backbone_channels=(270, ), backbone_channels=(270, ),
channels=None, channels=None):
ignore_index=255,
**kwargs):
super(FCN, self).__init__() super(FCN, self).__init__()
self.num_classes = num_classes self.num_classes = num_classes
self.backbone_pretrained = backbone_pretrained
self.model_pretrained = model_pretrained
self.backbone_indices = backbone_indices self.backbone_indices = backbone_indices
self.ignore_index = ignore_index
self.EPS = 1e-5
if channels is None: if channels is None:
channels = backbone_channels[backbone_indices[0]] channels = backbone_channels[backbone_indices[0]]
...@@ -87,7 +85,7 @@ class FCN(fluid.dygraph.Layer): ...@@ -87,7 +85,7 @@ class FCN(fluid.dygraph.Layer):
stride=1, stride=1,
padding=0) padding=0)
if self.training: if self.training:
self.init_weight(model_pretrained) self.init_weight()
def forward(self, x): def forward(self, x):
input_shape = x.shape[2:] input_shape = x.shape[2:]
...@@ -98,40 +96,33 @@ class FCN(fluid.dygraph.Layer): ...@@ -98,40 +96,33 @@ class FCN(fluid.dygraph.Layer):
logit = fluid.layers.resize_bilinear(logit, input_shape) logit = fluid.layers.resize_bilinear(logit, input_shape)
return [logit] return [logit]
# if self.training: def init_weight(self):
# if label is None:
# raise Exception('Label is need during training')
# return self._get_loss(logit, label)
# else:
# score_map = fluid.layers.softmax(logit, axis=1)
# score_map = fluid.layers.transpose(score_map, [0, 2, 3, 1])
# pred = fluid.layers.argmax(score_map, axis=3)
# pred = fluid.layers.unsqueeze(pred, axes=[3])
# return pred, score_map
def init_weight(self, pretrained_model=None):
"""
Initialize the parameters of model parts.
Args:
pretrained_model ([str], optional): the path of pretrained model. Defaults to None.
"""
params = self.parameters() params = self.parameters()
for param in params: for param in params:
param_name = param.name param_name = param.name
if 'batch_norm' in param_name: if 'batch_norm' in param_name:
if 'w_0' in param_name: if 'w_0' in param_name:
param_init.constant_init(param, 1.0) param_init.constant_init(param, value=1.0)
elif 'b_0' in param_name: elif 'b_0' in param_name:
param_init.constant_init(param, 0.0) param_init.constant_init(param, value=0.0)
if 'conv' in param_name and 'w_0' in param_name: if 'conv' in param_name and 'w_0' in param_name:
param_init.normal_init(param, scale=0.001) param_init.normal_init(param, scale=0.001)
if pretrained_model is not None: if self.model_pretrained is not None:
if os.path.exists(pretrained_model): if os.path.exists(self.model_pretrained):
utils.load_pretrained_model(self, pretrained_model) utils.load_pretrained_model(self, self.model_pretrained)
else:
raise Exception('Pretrained model is not found: {}'.format(
self.model_pretrained))
elif self.backbone_pretrained is not None:
if os.path.exists(self.backbone_pretrained):
utils.load_pretrained_model(self.backbone,
self.backbone_pretrained)
else: else:
raise Exception('Pretrained model is not found: {}'.format( raise Exception('Pretrained model is not found: {}'.format(
pretrained_model)) self.backbone_pretrained))
else:
logger.warning('No pretrained model to load, train from scratch')
class ConvBNLayer(fluid.dygraph.Layer): class ConvBNLayer(fluid.dygraph.Layer):
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
import paddle import paddle
from paddle import nn from paddle import nn
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.fluid as fluid
from dygraph.cvlibs import manager from dygraph.cvlibs import manager
''' '''
...@@ -106,13 +107,34 @@ class CrossEntropyLoss(nn.Layer): ...@@ -106,13 +107,34 @@ class CrossEntropyLoss(nn.Layer):
if len(label.shape) != len(logit.shape): if len(label.shape) != len(logit.shape):
label = paddle.unsqueeze(label, 1) label = paddle.unsqueeze(label, 1)
loss = F.softmax_with_cross_entropy( # logit = paddle.transpose(logit, [0, 2, 3, 1])
logit, label, ignore_index=self.ignore_index, axis=1) # label = paddle.transpose(label, [0, 2, 3, 1])
loss = paddle.reduce_mean(loss) # loss = F.softmax_with_cross_entropy(
# logit, label, ignore_index=self.ignore_index, axis=-1)
# loss = paddle.reduce_mean(loss)
# mask = label != self.ignore_index
# mask = paddle.cast(mask, 'float32')
# avg_loss = loss / (paddle.mean(mask) + self.EPS)
# label.stop_gradient = True
# mask.stop_gradient = True
# return avg_loss
logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
label = fluid.layers.transpose(label, [0, 2, 3, 1])
mask = label != self.ignore_index mask = label != self.ignore_index
mask = paddle.cast(mask, 'float32') mask = fluid.layers.cast(mask, 'float32')
avg_loss = loss / (paddle.mean(mask) + self.EPS) loss, probs = fluid.layers.softmax_with_cross_entropy(
logit,
label,
ignore_index=self.ignore_index,
return_softmax=True,
axis=-1)
loss = loss * mask
avg_loss = fluid.layers.mean(loss) / (
fluid.layers.mean(mask) + self.EPS)
label.stop_gradient = True label.stop_gradient = True
mask.stop_gradient = True mask.stop_gradient = True
......
...@@ -53,7 +53,7 @@ def parse_args(): ...@@ -53,7 +53,7 @@ def parse_args():
dest='save_interval_iters', dest='save_interval_iters',
help='The interval iters for save a model snapshot', help='The interval iters for save a model snapshot',
type=int, type=int,
default=5) default=1000)
parser.add_argument( parser.add_argument(
'--save_dir', '--save_dir',
dest='save_dir', dest='save_dir',
...@@ -126,7 +126,8 @@ def main(args): ...@@ -126,7 +126,8 @@ def main(args):
num_classes=train_dataset.num_classes, num_classes=train_dataset.num_classes,
num_workers=args.num_workers, num_workers=args.num_workers,
use_vdl=args.use_vdl, use_vdl=args.use_vdl,
losses=losses) losses=losses,
ignore_index=losses['types'][0].ignore_index)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -63,7 +63,7 @@ def load_pretrained_model(model, pretrained_model): ...@@ -63,7 +63,7 @@ def load_pretrained_model(model, pretrained_model):
'The pretrained model directory is not Found: {}'.format( 'The pretrained model directory is not Found: {}'.format(
pretrained_model)) pretrained_model))
else: else:
logger.info('No pretrained model to load, train from scratch') logger.warning('No pretrained model to load, train from scratch')
def resume(model, optimizer, resume_model): def resume(model, optimizer, resume_model):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册