From 737334989beaa95bab7f150937994e2a3440cd78 Mon Sep 17 00:00:00 2001 From: hong <43953930+phlrain@users.noreply.github.com> Date: Wed, 15 Jan 2020 15:20:58 +0800 Subject: [PATCH] State dict do not count data parallel layers (#22169) * DataParallel state dict don't include _layers.; test=develop * add unitest of data parallel; test=develop * add load state test; test=develop --- python/paddle/fluid/dygraph/parallel.py | 113 ++++++++++++++++++ .../test_imperative_data_parallel.py | 82 +++++++++++++ 2 files changed, 195 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index 76a3d2c5dcb..ac75a5dded0 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -254,3 +254,116 @@ class DataParallel(layers.Layer): def _is_data_parallel_mode(self): return self._strategy.nranks > 1 + + def state_dict(self, + destination=None, + include_sublayers=True, + structured_name_prefix=""): + ''' + Get all parameters of self._layers and its sub-layers. And set all the parameters into a dict + + Parameters: + destination(dict, optional) : If provide, all the parameters will set to this dict . Default: None + include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True + structured_name_prefix(str, optional): If not empty str, all the key in state dict will start + with structured_name_prefix + + Retruns: + dict: a dict contains all the parameters of self._layers + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + with fluid.dygraph.guard(): + strategy=dygraph.parallel.prepare_context() + emb = fluid.dygraph.Embedding([10, 10]) + emb = dygraph.parallel.DataParallel(emb, strategy) + + state_dict = emb.state_dict() + fluid.save_dygraph( state_dict, "paddle_dy") + + ''' + + return self._layers.state_dict( + destination=destination, + include_sublayers=include_sublayers, + structured_name_prefix=structured_name_prefix) + + def set_dict(self, + stat_dict, + include_sublayers=True, + use_structured_name=True): + ''' + Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict + + Parameters: + state_dict(dict) : Dict contains all the parameters + include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True + use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. + Default: True + Returns: + None + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + with fluid.dygraph.guard(): + strategy=dygraph.parallel.prepare_context() + emb = fluid.dygraph.Embedding([10, 10]) + emb = dygraph.parallel.DataParallel(emb, strategy) + + state_dict = emb.state_dict() + fluid.save_dygraph( state_dict, "paddle_dy") + + para_state_dict, _ = fluid.load_dygraph( "paddle_dy") + + emb.set_dict( para_state_dict ) + + ''' + + self._layers.set_dict( + stat_dict, + include_sublayers=include_sublayers, + use_structured_name=use_structured_name) + + def load_dict(self, + stat_dict, + include_sublayers=True, + use_structured_name=True): + ''' + Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict + + This api will be Deprecated. Please use set_dict + + Parameters: + state_dict(dict) : Dict contains all the parameters + include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True + use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. + Default: True + Returns: + None + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + with fluid.dygraph.guard(): + strategy=dygraph.parallel.prepare_context() + emb = fluid.dygraph.Embedding([10, 10]) + emb = dygraph.parallel.DataParallel(emb, strategy) + + state_dict = emb.state_dict() + fluid.save_dygraph( state_dict, "paddle_dy") + + para_state_dict, _ = fluid.load_dygraph( "paddle_dy") + + emb.load_dict( para_state_dict ) + + ''' + + self._layers.load_dict( + stat_dict, + include_sublayers=include_sublayers, + use_structured_name=use_structured_name) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py new file mode 100644 index 00000000000..d645a0a5ceb --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py @@ -0,0 +1,82 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import contextlib +import unittest +import numpy as np +import six +import unittest + +import paddle +import paddle.fluid as fluid +import paddle.fluid.dygraph as dygraph +from paddle.fluid.dygraph.nn import Linear +import paddle.fluid.core as core + + +class MLP(fluid.Layer): + def __init__(self, param_attr=None, bias_attr=None): + super(MLP, self).__init__() + + self._linear1 = Linear(784, 10) + self._linear2 = Linear(10, 10) + + def forward(self, inputs): + y = self._linear1(inputs) + y = self._linear2(y) + return y + + +class TestDataParallelStateDict(unittest.TestCase): + def test_data_parallel_state_dict(self): + with fluid.dygraph.guard(): + strategy = dygraph.parallel.prepare_context() + mlp = MLP() + parallel_mlp = dygraph.parallel.DataParallel(mlp, strategy) + + single_state = mlp.state_dict() + parallel_state = parallel_mlp.state_dict() + + base_para = {} + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + for k, v in single_state.items(): + self.assertTrue(k in parallel_state) + + self.assertTrue( + np.array_equal(v.numpy(), parallel_state[k].numpy())) + + base_para[k] = v.numpy() + + for k, v in parallel_state.items(): + np_t = v.numpy() + var = v.value().get_tensor() + var.set(np.zeros_like(np_t), place) + + self.assertTrue(np.sum(np.abs(v.numpy())) == 0) + + parallel_mlp.set_dict(base_para) + + parallel_state = parallel_mlp.state_dict() + + for k, v in parallel_state.items(): + self.assertTrue(np.array_equal(v.numpy(), base_para[k])) + + parallel_mlp.load_dict(base_para) + + +if __name__ == '__main__': + unittest.main() -- GitLab