diff --git a/dygraph/paddleseg/cvlibs/param_init.py b/dygraph/paddleseg/cvlibs/param_init.py index 567399c0a0c7d2310931b1c0ccae13cd0d5422b1..495a47c189b8204a260014de4ca23300b7cbcc23 100644 --- a/dygraph/paddleseg/cvlibs/param_init.py +++ b/dygraph/paddleseg/cvlibs/param_init.py @@ -23,3 +23,8 @@ def constant_init(param, **kwargs): def normal_init(param, **kwargs): initializer = fluid.initializer.Normal(**kwargs) initializer(param, param.block) + + +def msra_init(param, **kwargs): + initializer = fluid.initializer.MSRA(**kwargs) + initializer(param, param.block) diff --git a/dygraph/paddleseg/models/bisenet.py b/dygraph/paddleseg/models/bisenet.py index 5c1964932b8fc601eabc813910597e914daac33d..a1ae897ef264812dbc3cc623317d290e36e37ff5 100644 --- a/dygraph/paddleseg/models/bisenet.py +++ b/dygraph/paddleseg/models/bisenet.py @@ -259,7 +259,7 @@ class BiSeNet(nn.Layer): else: for sublayer in self.sublayers(): if isinstance(sublayer, nn.Conv2d): - param_init.normal_init(sublayer.weight, scale=0.001) + param_init.msra_init(sublayer.weight) elif isinstance(sublayer, nn.SyncBatchNorm): param_init.constant_init(sublayer.weight, value=1.0) param_init.constant_init(sublayer.bias, value=0.0)