提交 da24e423 编写于 作者: S songyouwei 提交者: hong

[cherry-pick] fix save_dygraph & save & DataParallel (#22303)

* cherry-pick #22266

* cherry-pick #22169
test=release/1.7, test=develop
上级 c396f276
...@@ -82,7 +82,12 @@ def save_dygraph(state_dict, model_path): ...@@ -82,7 +82,12 @@ def save_dygraph(state_dict, model_path):
name_table[k] = v.name name_table[k] = v.name
model_dict["StructuredToParameterName@@"] = name_table model_dict["StructuredToParameterName@@"] = name_table
with open(model_path + suffix, 'wb') as f: file_name = model_path + suffix
dir_name = os.path.dirname(file_name)
if dir_name and not os.path.exists(dir_name):
os.makedirs(dir_name)
with open(file_name, 'wb') as f:
pickle.dump(model_dict, f) pickle.dump(model_dict, f)
...@@ -113,7 +118,7 @@ def load_dygraph(model_path, keep_name_table=False): ...@@ -113,7 +118,7 @@ def load_dygraph(model_path, keep_name_table=False):
adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000), adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000),
parameter_list = emb.parameters() ) parameter_list = emb.parameters() )
state_dict = adam.state_dict() state_dict = adam.state_dict()
fluid.save_dygraph( state_dict, "padle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy") para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy")
......
...@@ -254,3 +254,116 @@ class DataParallel(layers.Layer): ...@@ -254,3 +254,116 @@ class DataParallel(layers.Layer):
def _is_data_parallel_mode(self): def _is_data_parallel_mode(self):
return self._strategy.nranks > 1 return self._strategy.nranks > 1
def state_dict(self,
destination=None,
include_sublayers=True,
structured_name_prefix=""):
'''
Get all parameters of self._layers and its sub-layers. And set all the parameters into a dict
Parameters:
destination(dict, optional) : If provide, all the parameters will set to this dict . Default: None
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
structured_name_prefix(str, optional): If not empty str, all the key in state dict will start
with structured_name_prefix
Retruns:
dict: a dict contains all the parameters of self._layers
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
strategy=dygraph.parallel.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = dygraph.parallel.DataParallel(emb, strategy)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
'''
return self._layers.state_dict(
destination=destination,
include_sublayers=include_sublayers,
structured_name_prefix=structured_name_prefix)
def set_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
Default: True
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
strategy=dygraph.parallel.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = dygraph.parallel.DataParallel(emb, strategy)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.set_dict( para_state_dict )
'''
self._layers.set_dict(
stat_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
def load_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
This api will be Deprecated. Please use set_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
Default: True
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
strategy=dygraph.parallel.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = dygraph.parallel.DataParallel(emb, strategy)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.load_dict( para_state_dict )
'''
self._layers.load_dict(
stat_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
...@@ -1518,6 +1518,10 @@ def save(program, model_path): ...@@ -1518,6 +1518,10 @@ def save(program, model_path):
assert base_name != "", \ assert base_name != "", \
"model_path MUST be format of dirname/filename [dirname\\filename in Window], Now filename is empty str" "model_path MUST be format of dirname/filename [dirname\\filename in Window], Now filename is empty str"
dir_name = os.path.dirname(model_path)
if dir_name and not os.path.exists(dir_name):
os.makedirs(dir_name)
def get_tensor(var): def get_tensor(var):
t = global_scope().find_var(var.name).get_tensor() t = global_scope().find_var(var.name).get_tensor()
return np.array(t) return np.array(t)
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import contextlib
import unittest
import numpy as np
import six
import unittest
import paddle
import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid.dygraph.nn import Linear
import paddle.fluid.core as core
class MLP(fluid.Layer):
def __init__(self, param_attr=None, bias_attr=None):
super(MLP, self).__init__()
self._linear1 = Linear(784, 10)
self._linear2 = Linear(10, 10)
def forward(self, inputs):
y = self._linear1(inputs)
y = self._linear2(y)
return y
class TestDataParallelStateDict(unittest.TestCase):
def test_data_parallel_state_dict(self):
with fluid.dygraph.guard():
strategy = dygraph.parallel.prepare_context()
mlp = MLP()
parallel_mlp = dygraph.parallel.DataParallel(mlp, strategy)
single_state = mlp.state_dict()
parallel_state = parallel_mlp.state_dict()
base_para = {}
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
for k, v in single_state.items():
self.assertTrue(k in parallel_state)
self.assertTrue(
np.array_equal(v.numpy(), parallel_state[k].numpy()))
base_para[k] = v.numpy()
for k, v in parallel_state.items():
np_t = v.numpy()
var = v.value().get_tensor()
var.set(np.zeros_like(np_t), place)
self.assertTrue(np.sum(np.abs(v.numpy())) == 0)
parallel_mlp.set_dict(base_para)
parallel_state = parallel_mlp.state_dict()
for k, v in parallel_state.items():
self.assertTrue(np.array_equal(v.numpy(), base_para[k]))
parallel_mlp.load_dict(base_para)
if __name__ == '__main__':
unittest.main()
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import print_function from __future__ import print_function
import os
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
...@@ -879,9 +880,10 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -879,9 +880,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10]) emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph(state_dict, "emb_dy") fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy'))
para_state_dict, opti_state_dict = fluid.load_dygraph("emb_dy") para_state_dict, opti_state_dict = fluid.load_dygraph(
os.path.join('saved_dy', 'emb_dy'))
self.assertTrue(opti_state_dict == None) self.assertTrue(opti_state_dict == None)
......
...@@ -609,7 +609,7 @@ class TestProgramStatePartial(unittest.TestCase): ...@@ -609,7 +609,7 @@ class TestProgramStatePartial(unittest.TestCase):
self.assertTrue(np.sum(np.abs(t)) != 0) self.assertTrue(np.sum(np.abs(t)) != 0)
base_map[var.name] = t base_map[var.name] = t
fluid.save(main_program, "./test_1") fluid.save(main_program, os.path.join('some_dir', 'test_1'))
# set var to zero # set var to zero
for var in main_program.list_vars(): for var in main_program.list_vars():
...@@ -623,7 +623,8 @@ class TestProgramStatePartial(unittest.TestCase): ...@@ -623,7 +623,8 @@ class TestProgramStatePartial(unittest.TestCase):
self.assertTrue(np.sum(np.abs(new_t)) == 0) self.assertTrue(np.sum(np.abs(new_t)) == 0)
#fluid.load(test_program, "./test_1", None ) #fluid.load(test_program, "./test_1", None )
program_state = fluid.load_program_state("./test_1") program_state = fluid.load_program_state(
os.path.join('some_dir', 'test_1'))
fluid.set_program_state(test_program, program_state) fluid.set_program_state(test_program, program_state)
for var in test_program.list_vars(): for var in test_program.list_vars():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册