diff --git a/dygraph/tsn/model.py b/dygraph/tsn/model.py index 0477b2c6fed11830796ef5fa3df73167b285a7f3..18bf8f0306b7ec6d9b1dc0602641ec1541c2c3c7 100644 --- a/dygraph/tsn/model.py +++ b/dygraph/tsn/model.py @@ -246,7 +246,7 @@ class TSN_ResNet(paddle.nn.Layer): self.class_dim, weight_attr=ParamAttr( initializer=paddle.nn.initializer.Normal( - loc=0.0, scale=0.01), + mean=0.0, std=0.01), name="fc_0.w_0"), bias_attr=ParamAttr( initializer=paddle.nn.initializer.Constant(value=0.0), diff --git a/dygraph/tsn/train.py b/dygraph/tsn/train.py index 993a16f91087fec832606bb1336615343167d68f..ad5dcb59defc0bc074720ac1b90f9b293fa2b490 100644 --- a/dygraph/tsn/train.py +++ b/dygraph/tsn/train.py @@ -161,7 +161,7 @@ def create_optimizer(cfg, params): momentum = cfg.momentum optimizer = paddle.optimizer.Momentum( - learning_rate=paddle.optimizer.PiecewiseLR( + learning_rate=paddle.optimizer.lr.PiecewiseDecay( boundaries=bd, values=lr), momentum=momentum, weight_decay=paddle.regularizer.L2Decay(l2_weight_decay), @@ -190,7 +190,7 @@ def train(args): video_model = paddle.DataParallel(video_model) - pre_state_dict, _ = paddle.load(args.pretrain) + pre_state_dict = paddle.load(args.pretrain) #if paddle.distributed.parallel.Env().local_rank == 0: video_model = init_model(video_model, pre_state_dict)