From da24e423cd257e87822dc04d62400a843e13370d Mon Sep 17 00:00:00 2001 From: songyouwei Date: Thu, 16 Jan 2020 11:20:55 +0800 Subject: [PATCH] [cherry-pick] fix save_dygraph & save & DataParallel (#22303) * cherry-pick #22266 * cherry-pick #22169 test=release/1.7, test=develop --- python/paddle/fluid/dygraph/checkpoint.py | 9 +- python/paddle/fluid/dygraph/parallel.py | 113 ++++++++++++++++++ python/paddle/fluid/io.py | 4 + .../test_imperative_data_parallel.py | 82 +++++++++++++ .../unittests/test_imperative_save_load.py | 6 +- .../tests/unittests/test_static_save_load.py | 5 +- 6 files changed, 213 insertions(+), 6 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py index 5ed4e2d412e..27658ba3d46 100644 --- a/python/paddle/fluid/dygraph/checkpoint.py +++ b/python/paddle/fluid/dygraph/checkpoint.py @@ -82,7 +82,12 @@ def save_dygraph(state_dict, model_path): name_table[k] = v.name model_dict["StructuredToParameterName@@"] = name_table - with open(model_path + suffix, 'wb') as f: + file_name = model_path + suffix + dir_name = os.path.dirname(file_name) + if dir_name and not os.path.exists(dir_name): + os.makedirs(dir_name) + + with open(file_name, 'wb') as f: pickle.dump(model_dict, f) @@ -113,7 +118,7 @@ def load_dygraph(model_path, keep_name_table=False): adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000), parameter_list = emb.parameters() ) state_dict = adam.state_dict() - fluid.save_dygraph( state_dict, "padle_dy") + fluid.save_dygraph( state_dict, "paddle_dy") para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy") diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index 76a3d2c5dcb..ac75a5dded0 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -254,3 +254,116 @@ class DataParallel(layers.Layer): def _is_data_parallel_mode(self): return self._strategy.nranks > 1 + + def state_dict(self, + destination=None, + include_sublayers=True, + structured_name_prefix=""): + ''' + Get all parameters of self._layers and its sub-layers. And set all the parameters into a dict + + Parameters: + destination(dict, optional) : If provide, all the parameters will set to this dict . Default: None + include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True + structured_name_prefix(str, optional): If not empty str, all the key in state dict will start + with structured_name_prefix + + Retruns: + dict: a dict contains all the parameters of self._layers + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + with fluid.dygraph.guard(): + strategy=dygraph.parallel.prepare_context() + emb = fluid.dygraph.Embedding([10, 10]) + emb = dygraph.parallel.DataParallel(emb, strategy) + + state_dict = emb.state_dict() + fluid.save_dygraph( state_dict, "paddle_dy") + + ''' + + return self._layers.state_dict( + destination=destination, + include_sublayers=include_sublayers, + structured_name_prefix=structured_name_prefix) + + def set_dict(self, + stat_dict, + include_sublayers=True, + use_structured_name=True): + ''' + Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict + + Parameters: + state_dict(dict) : Dict contains all the parameters + include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True + use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. + Default: True + Returns: + None + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + with fluid.dygraph.guard(): + strategy=dygraph.parallel.prepare_context() + emb = fluid.dygraph.Embedding([10, 10]) + emb = dygraph.parallel.DataParallel(emb, strategy) + + state_dict = emb.state_dict() + fluid.save_dygraph( state_dict, "paddle_dy") + + para_state_dict, _ = fluid.load_dygraph( "paddle_dy") + + emb.set_dict( para_state_dict ) + + ''' + + self._layers.set_dict( + stat_dict, + include_sublayers=include_sublayers, + use_structured_name=use_structured_name) + + def load_dict(self, + stat_dict, + include_sublayers=True, + use_structured_name=True): + ''' + Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict + + This api will be Deprecated. Please use set_dict + + Parameters: + state_dict(dict) : Dict contains all the parameters + include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True + use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. + Default: True + Returns: + None + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + with fluid.dygraph.guard(): + strategy=dygraph.parallel.prepare_context() + emb = fluid.dygraph.Embedding([10, 10]) + emb = dygraph.parallel.DataParallel(emb, strategy) + + state_dict = emb.state_dict() + fluid.save_dygraph( state_dict, "paddle_dy") + + para_state_dict, _ = fluid.load_dygraph( "paddle_dy") + + emb.load_dict( para_state_dict ) + + ''' + + self._layers.load_dict( + stat_dict, + include_sublayers=include_sublayers, + use_structured_name=use_structured_name) diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 0436137a80c..73273acf7a2 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -1518,6 +1518,10 @@ def save(program, model_path): assert base_name != "", \ "model_path MUST be format of dirname/filename [dirname\\filename in Window], Now filename is empty str" + dir_name = os.path.dirname(model_path) + if dir_name and not os.path.exists(dir_name): + os.makedirs(dir_name) + def get_tensor(var): t = global_scope().find_var(var.name).get_tensor() return np.array(t) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py new file mode 100644 index 00000000000..d645a0a5ceb --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py @@ -0,0 +1,82 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import contextlib +import unittest +import numpy as np +import six +import unittest + +import paddle +import paddle.fluid as fluid +import paddle.fluid.dygraph as dygraph +from paddle.fluid.dygraph.nn import Linear +import paddle.fluid.core as core + + +class MLP(fluid.Layer): + def __init__(self, param_attr=None, bias_attr=None): + super(MLP, self).__init__() + + self._linear1 = Linear(784, 10) + self._linear2 = Linear(10, 10) + + def forward(self, inputs): + y = self._linear1(inputs) + y = self._linear2(y) + return y + + +class TestDataParallelStateDict(unittest.TestCase): + def test_data_parallel_state_dict(self): + with fluid.dygraph.guard(): + strategy = dygraph.parallel.prepare_context() + mlp = MLP() + parallel_mlp = dygraph.parallel.DataParallel(mlp, strategy) + + single_state = mlp.state_dict() + parallel_state = parallel_mlp.state_dict() + + base_para = {} + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + for k, v in single_state.items(): + self.assertTrue(k in parallel_state) + + self.assertTrue( + np.array_equal(v.numpy(), parallel_state[k].numpy())) + + base_para[k] = v.numpy() + + for k, v in parallel_state.items(): + np_t = v.numpy() + var = v.value().get_tensor() + var.set(np.zeros_like(np_t), place) + + self.assertTrue(np.sum(np.abs(v.numpy())) == 0) + + parallel_mlp.set_dict(base_para) + + parallel_state = parallel_mlp.state_dict() + + for k, v in parallel_state.items(): + self.assertTrue(np.array_equal(v.numpy(), base_para[k])) + + parallel_mlp.load_dict(base_para) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index 01327ac647f..6a621b8c75c 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -14,6 +14,7 @@ from __future__ import print_function +import os import unittest import paddle.fluid as fluid import paddle.fluid.core as core @@ -879,9 +880,10 @@ class TestDygraphPtbRnn(unittest.TestCase): with fluid.dygraph.guard(): emb = fluid.dygraph.Embedding([10, 10]) state_dict = emb.state_dict() - fluid.save_dygraph(state_dict, "emb_dy") + fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy')) - para_state_dict, opti_state_dict = fluid.load_dygraph("emb_dy") + para_state_dict, opti_state_dict = fluid.load_dygraph( + os.path.join('saved_dy', 'emb_dy')) self.assertTrue(opti_state_dict == None) diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py index 0dd767edc4c..24b61f514ce 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -609,7 +609,7 @@ class TestProgramStatePartial(unittest.TestCase): self.assertTrue(np.sum(np.abs(t)) != 0) base_map[var.name] = t - fluid.save(main_program, "./test_1") + fluid.save(main_program, os.path.join('some_dir', 'test_1')) # set var to zero for var in main_program.list_vars(): @@ -623,7 +623,8 @@ class TestProgramStatePartial(unittest.TestCase): self.assertTrue(np.sum(np.abs(new_t)) == 0) #fluid.load(test_program, "./test_1", None ) - program_state = fluid.load_program_state("./test_1") + program_state = fluid.load_program_state( + os.path.join('some_dir', 'test_1')) fluid.set_program_state(test_program, program_state) for var in test_program.list_vars(): -- GitLab