未验证 提交 73733498 编写于 作者: H hong 提交者: GitHub

State dict do not count data parallel layers (#22169)

* DataParallel state dict don't include _layers.; test=develop

* add unitest of data parallel; test=develop

* add load state test; test=develop
上级 5e601a92
...@@ -254,3 +254,116 @@ class DataParallel(layers.Layer): ...@@ -254,3 +254,116 @@ class DataParallel(layers.Layer):
def _is_data_parallel_mode(self): def _is_data_parallel_mode(self):
return self._strategy.nranks > 1 return self._strategy.nranks > 1
def state_dict(self,
destination=None,
include_sublayers=True,
structured_name_prefix=""):
'''
Get all parameters of self._layers and its sub-layers. And set all the parameters into a dict
Parameters:
destination(dict, optional) : If provide, all the parameters will set to this dict . Default: None
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
structured_name_prefix(str, optional): If not empty str, all the key in state dict will start
with structured_name_prefix
Retruns:
dict: a dict contains all the parameters of self._layers
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
strategy=dygraph.parallel.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = dygraph.parallel.DataParallel(emb, strategy)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
'''
return self._layers.state_dict(
destination=destination,
include_sublayers=include_sublayers,
structured_name_prefix=structured_name_prefix)
def set_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
Default: True
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
strategy=dygraph.parallel.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = dygraph.parallel.DataParallel(emb, strategy)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.set_dict( para_state_dict )
'''
self._layers.set_dict(
stat_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
def load_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
This api will be Deprecated. Please use set_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
Default: True
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
strategy=dygraph.parallel.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = dygraph.parallel.DataParallel(emb, strategy)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.load_dict( para_state_dict )
'''
self._layers.load_dict(
stat_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import contextlib
import unittest
import numpy as np
import six
import unittest
import paddle
import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid.dygraph.nn import Linear
import paddle.fluid.core as core
class MLP(fluid.Layer):
def __init__(self, param_attr=None, bias_attr=None):
super(MLP, self).__init__()
self._linear1 = Linear(784, 10)
self._linear2 = Linear(10, 10)
def forward(self, inputs):
y = self._linear1(inputs)
y = self._linear2(y)
return y
class TestDataParallelStateDict(unittest.TestCase):
def test_data_parallel_state_dict(self):
with fluid.dygraph.guard():
strategy = dygraph.parallel.prepare_context()
mlp = MLP()
parallel_mlp = dygraph.parallel.DataParallel(mlp, strategy)
single_state = mlp.state_dict()
parallel_state = parallel_mlp.state_dict()
base_para = {}
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
for k, v in single_state.items():
self.assertTrue(k in parallel_state)
self.assertTrue(
np.array_equal(v.numpy(), parallel_state[k].numpy()))
base_para[k] = v.numpy()
for k, v in parallel_state.items():
np_t = v.numpy()
var = v.value().get_tensor()
var.set(np.zeros_like(np_t), place)
self.assertTrue(np.sum(np.abs(v.numpy())) == 0)
parallel_mlp.set_dict(base_para)
parallel_state = parallel_mlp.state_dict()
for k, v in parallel_state.items():
self.assertTrue(np.array_equal(v.numpy(), base_para[k]))
parallel_mlp.load_dict(base_para)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册