未验证 提交 29861846 编写于 作者: C Chen Weihang 提交者: GitHub

Update 2.0 Save/Load API names/arguments/doc examples (#27138)

* Update set_dict method name & add aliases (#26700)

* update set_dict method name & add aliases

* fix var name error

* fix alias formats

* use set_state_dict in unittest

* add decorator solve compatible problem

* polish decorator

* replace layer set_state_dict by patched method

* remove import monkey path layer

* fix import function error

* add unittest for coverage

* Support load state dict form `inference model` format save result (#26718)

* support load infer model format state dict

* add unittests

* remove keep name table

* recolve circle inport

* fix compatible problem

* recover unittest

* polish doc and comment

* Change jit.save/load configs to config & update code examples (#27056)

* change configs to config & update examples

* fix deprecate decorator conflict
上级 0072490f
......@@ -232,6 +232,7 @@ from .framework import grad #DEFINE_ALIAS
from .framework import no_grad #DEFINE_ALIAS
from .framework import save #DEFINE_ALIAS
from .framework import load #DEFINE_ALIAS
from .framework import SaveLoadConfig #DEFINE_ALIAS
from .framework import DataParallel #DEFINE_ALIAS
from .framework import NoamDecay #DEFINE_ALIAS
......
......@@ -16,13 +16,16 @@ from __future__ import print_function
import os
import collections
import functools
from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter, ParamBase, _varbase_creator, _dygraph_tracer
import pickle
import six
from . import learning_rate_scheduler
import warnings
from .. import core
from paddle.fluid.dygraph.io import VARIABLE_FILENAME, EXTRA_VAR_INFO_FILENAME, _load_persistable_vars
from .base import guard
from paddle.fluid.dygraph.jit import SaveLoadConfig, deprecate_save_load_configs
from paddle.fluid.dygraph.io import _construct_program_holders, _construct_params_and_buffers
__all__ = [
'save_dygraph',
......@@ -30,6 +33,37 @@ __all__ = [
]
# NOTE(chenweihang): deprecate load_dygraph's argument keep_name_table,
# ensure compatibility when user still use keep_name_table argument
def deprecate_keep_name_table(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
def __warn_and_build_configs__(keep_name_table):
warnings.warn(
"The argument `keep_name_table` has deprecated, please use `SaveLoadConfig.keep_name_table`.",
DeprecationWarning)
config = SaveLoadConfig()
config.keep_name_table = keep_name_table
return config
# deal with arg `keep_name_table`
if len(args) > 1 and isinstance(args[1], bool):
args = list(args)
args[1] = __warn_and_build_configs__(args[1])
# deal with kwargs
elif 'keep_name_table' in kwargs:
kwargs['config'] = __warn_and_build_configs__(kwargs[
'keep_name_table'])
kwargs.pop('keep_name_table')
else:
# do nothing
pass
return func(*args, **kwargs)
return wrapper
@dygraph_only
def save_dygraph(state_dict, model_path):
'''
......@@ -100,17 +134,28 @@ def save_dygraph(state_dict, model_path):
# TODO(qingqing01): remove dygraph_only to support loading static model.
# maybe need to unify the loading interface after 2.0 API is ready.
#@dygraph_only
def load_dygraph(model_path, keep_name_table=False):
# @dygraph_only
@deprecate_save_load_configs
@deprecate_keep_name_table
def load_dygraph(model_path, config=None):
'''
:api_attr: imperative
Load parameter state_dict from disk.
Load parameter state dict from disk.
.. note::
Due to some historical reasons, if you load ``state_dict`` from the saved
result of `paddle.io.save_inference_model`, the structured variable name
will cannot be restored. You need to set the argument `use_structured_name=False`
when using `Layer.set_state_dict` later.
Args:
model_path(str) : The file prefix store the state_dict. (The path should Not contain suffix '.pdparams')
keep_name_table(bool, optional) : Whether keep structed name to parameter name conversion table in output dict.
Default : False
model_path(str) : The file prefix store the state_dict.
(The path should Not contain suffix '.pdparams')
config (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig`
object that specifies additional configuration options, these options
are for compatibility with ``jit.save/io.save_inference_model`` formats.
Default None.
Returns:
state_dict(dict) : the dict store the state_dict
......@@ -118,23 +163,27 @@ def load_dygraph(model_path, keep_name_table=False):
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
paddle.disable_static()
emb = paddle.nn.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
paddle.save(state_dict, "paddle_dy")
adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000),
parameter_list = emb.parameters() )
scheduler = paddle.optimizer.lr_scheduler.NoamLR(
d_model=0.01, warmup_steps=100, verbose=True)
adam = paddle.optimizer.Adam(
learning_rate=scheduler,
parameters=emb.parameters())
state_dict = adam.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
paddle.save(state_dict, "paddle_dy")
para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy")
para_state_dict, opti_state_dict = paddle.load("paddle_dy")
'''
# deal with argument `model_path`
model_prefix = model_path
if model_prefix.endswith(".pdparams"):
model_prefix = model_prefix[:-9]
......@@ -145,66 +194,45 @@ def load_dygraph(model_path, keep_name_table=False):
opti_dict = None
params_file_path = model_prefix + ".pdparams"
opti_file_path = model_prefix + ".pdopt"
# deal with argument `configs`
configs = config
if configs is None:
configs = SaveLoadConfig()
if not os.path.exists(params_file_path) and not os.path.exists(
opti_file_path):
# Load state dict by `jit.save` save format
# TODO(chenweihang): [Why not support `io.save_infernece_model` save format here]
# Load state dict by `jit.save/io.save_inference_model` save format
# NOTE(chenweihang): [ Compatibility of save_inference_model save format ]
# The model saved by `save_inference_model` does not completely correspond to
# the information required by the `state_dict` under the dygraph.
# Although we reluctantly restore the `state_dict` in some scenarios,
# this may not be complete and there are some limitations, so this function
# will be considered later. The limitations include:
# 1. `save_inference_model` not save structured name, we need to remind
# the user to configure the `use_structured_name` argument when `set_dict`,
# but this argument is currently not public
# 2. if `save_inference_model` save all persistable variables in a single file,
# user need to give the variable name list to load `state_dict`
# `save_inference_model` not save structured name, we need to remind
# the user to configure the `use_structured_name` argument when `set_state_dict`
# NOTE(chenweihang): `jit.save` doesn't save optimizer state
# 1. check model path
if not os.path.isdir(model_prefix):
raise ValueError("Model saved directory '%s' is not exists." %
model_prefix)
# 2. load `__variables.info__`
var_info_path = os.path.join(model_prefix, EXTRA_VAR_INFO_FILENAME)
if not os.path.exists(var_info_path):
raise RuntimeError(
"No target can be loaded. Now only supports loading `state_dict` from "
"the result saved by `imperative.save` and `imperative.jit.save`."
)
with open(var_info_path, 'rb') as f:
extra_var_info = pickle.load(f)
# 3. load `__variables__`
# TODO(chenweihang): now only supports loading from default save format:
# - all persistable vars saved in one file named `__variables__`
# for other case, we may need to modify the arguments of this API
var_file_path = os.path.join(model_prefix, VARIABLE_FILENAME)
if not os.path.exists(var_file_path):
raise RuntimeError(
"The parameter file to be loaded was not found. "
"Now only supports loading from the default save format, "
"and does not support custom params_filename and "
"save parameters separately.")
# 4. load all persistable vars
load_var_list = []
for name in sorted(extra_var_info):
var = _varbase_creator(name=name, persistable=True)
load_var_list.append(var)
_dygraph_tracer().trace_op(
type='load_combine',
inputs={},
outputs={'Out': load_var_list},
attrs={'file_path': var_file_path})
# 5. construct state_dict
# 2. load program desc & construct _ProgramHolder
programs = _construct_program_holders(model_path,
configs.model_filename)
# 3. load layer parameters & buffers
# NOTE: using fluid.dygraph.guard() here will cause import error in py2
with guard():
persistable_var_dict = _construct_params_and_buffers(
model_prefix,
programs,
configs.separate_params,
configs.params_filename,
append_suffix=False)
# 4. construct state_dict
para_dict = dict()
for var in load_var_list:
structured_name = extra_var_info[var.name].get('structured_name',
None)
if structured_name is None:
raise RuntimeError(
"Cannot find saved variable (%s)'s structured name in saved model.",
var.name)
para_dict[structured_name] = var.numpy()
# NOTE: `jit.save` doesn't save optimizer state
for var_name in persistable_var_dict:
para_dict[var_name] = persistable_var_dict[var_name].numpy()
else:
# Load state dict by `save_dygraph` save format
para_dict = {}
......@@ -213,7 +241,7 @@ def load_dygraph(model_path, keep_name_table=False):
para_dict = pickle.load(f) if six.PY2 else pickle.load(
f, encoding='latin1')
if not keep_name_table and "StructuredToParameterName@@" in para_dict:
if not configs.keep_name_table and "StructuredToParameterName@@" in para_dict:
del para_dict["StructuredToParameterName@@"]
if os.path.exists(opti_file_path):
......
......@@ -488,6 +488,15 @@ def _load_persistable_vars(model_path,
return load_var_dict
# NOTE(chenweihang): to adapt paddle.load to get state_dict
def _remove_varname_suffix(var_dict, program_holder):
no_suffix_var_dict = dict()
for var_name in var_dict:
no_suffix_name = program_holder._suffix_varname_dict[var_name]
no_suffix_var_dict[no_suffix_name] = var_dict[var_name]
return no_suffix_var_dict
def _construct_program_holders(model_path, model_filename=None):
# make sure the path has been checked
program_holder_dict = dict()
......@@ -517,7 +526,8 @@ def _construct_program_holders(model_path, model_filename=None):
def _construct_params_and_buffers(model_path,
programs,
separate_params=False,
params_filename=None):
params_filename=None,
append_suffix=True):
var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME)
if os.path.exists(var_info_path):
var_dict = _load_persistable_vars(model_path, var_info_path,
......@@ -526,6 +536,10 @@ def _construct_params_and_buffers(model_path,
else:
var_dict = _load_persistable_vars_by_program(
model_path, programs['forward'], params_filename)
if not append_suffix:
var_dict = _remove_varname_suffix(var_dict, programs['forward'])
return var_dict
......@@ -685,7 +699,7 @@ class TranslatedLayer(layers.Layer):
# 1. load program desc & construct _ProgramHolder
programs = _construct_program_holders(model_path, model_filename)
# 2. load layer parameters & parameter attributes
# 2. load layer parameters & buffers
persistable_vars = _construct_params_and_buffers(
model_path, programs, separate_params, params_filename)
......
此差异已折叠。
......@@ -29,6 +29,9 @@ from .layer_object_helper import LayerObjectHelper
from .base import program_desc_tracing_guard, param_guard
from paddle.fluid import framework
from ..param_attr import ParamAttr
from paddle.fluid.executor import Executor, global_scope
from paddle.fluid.framework import in_dygraph_mode
from paddle.fluid.framework import _current_expected_place as _get_device
__all__ = ['Layer']
......@@ -797,7 +800,7 @@ class Layer(core.Layer):
raise ValueError(
"super(YourLayer, self).__init__() should be called first")
if len(self._loaddict_holder) > 0:
assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format(
assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in state_dict".format(
value.name)
value.set_value(self._loaddict_holder[value.name])
......@@ -943,12 +946,13 @@ class Layer(core.Layer):
destination = destination_temp
return destination
def set_dict(self,
stat_dict,
@framework.deprecate_stat_dict
def set_state_dict(self,
state_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters and persistable buffers from stat_dict. All the parameters and buffers will be reset by the tensor in the stat_dict
Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict
Parameters:
state_dict(dict) : Dict contains all the parameters and persistable buffers.
......@@ -961,72 +965,67 @@ class Layer(core.Layer):
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.set_dict( para_state_dict )
'''
self.load_dict(
stat_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
def load_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters and persistable buffers from stat_dict. All the parameters and persistabl buffers will be reset by the tensor in the stat_dict
This api will be Deprecated. Please use set_dict
Parameters:
state_dict(dict) : Dict contains all the parameters and persistable buffers.
include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
Default: True
Returns:
None
import paddle
Examples:
.. code-block:: python
paddle.disable_static()
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
paddle.save(state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
para_state_dict, _ = paddle.load("paddle_dy")
emb.load_dict( para_state_dict )
emb.set_state_dict(para_state_dict)
'''
inner_state_dict = self.state_dict()
def _check_match(key, param):
state = state_dict.get(key, None)
if state is None:
raise ValueError("{} is not found in the provided dict.".format(
key))
if list(state.shape) != list(param.shape):
raise ValueError(
"{} receives a shape {}, but the expected shape is {}.".
format(key, list(state.shape), list(param.shape)))
return param, state
matched_param_state = []
for key, param in self.state_dict().items():
key_name = key if use_structured_name else param.name
try:
match_res = _check_match(key_name, param)
matched_param_state.append(match_res)
except ValueError as err:
warnings.warn(("Skip loading for {}. ".format(key) + str(err)))
if in_dygraph_mode():
for param, state in matched_param_state:
param.set_value(state)
else:
for name, param_or_buffer in inner_state_dict.items():
key_name = name if use_structured_name else param_or_buffer.name
if key_name in stat_dict:
param_or_buffer.set_value(stat_dict[key_name])
def _set_var(var, ndarray):
t = global_scope().find_var(var.name).get_tensor()
p = t._place()
if p.is_cpu_place():
place = core.CPUPlace()
elif p.is_cuda_pinned_place():
place = core.CUDAPinnedPlace()
else:
raise RuntimeError(
"Parameter or persistable buffer not found, Can't find [ {} ] in stat_dict"
"use_structured_name is set to [{}]".format(
key_name, use_structured_name))
unused_para_list = []
for k, v in stat_dict.items():
if k not in inner_state_dict:
unused_para_list.append(k)
if len(unused_para_list) > 0:
warnings.warn(
"Variables [ {} ] are not used, because not included in layers state_dict".
format(" ".join(unused_para_list)))
p = core.Place()
p.set_place(t._place())
place = core.CUDAPlace(p.gpu_device_id())
t.set(ndarray, place)
executor = Executor(_get_device())._default_executor
# restore parameter states
core._create_loaded_parameter(
[param for param, state in matched_param_state],
global_scope(), executor)
for param, state in matched_param_state:
_set_var(param, state)
# [aliases] Compatible with old method names
set_dict = set_state_dict
load_dict = set_state_dict
......@@ -97,7 +97,7 @@ class LearningRateDecay(object):
"""
self.keys = ['step_num']
def set_dict(self, state_dict):
def set_state_dict(self, state_dict):
"""
Loads the schedulers state.
"""
......@@ -114,6 +114,9 @@ class LearningRateDecay(object):
"There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict"
)
# [aliases] Compatible with old method names
set_dict = set_state_dict
def step(self):
raise NotImplementedError()
......
......@@ -587,12 +587,13 @@ class DataParallel(layers.Layer):
include_sublayers=include_sublayers,
structured_name_prefix=structured_name_prefix)
def set_dict(self,
stat_dict,
@framework.deprecate_stat_dict
def set_state_dict(self,
state_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
Set parameters of self._layers from state_dict. All the parameters of self._layers will be reset by the tensor in the state_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
......@@ -605,62 +606,27 @@ class DataParallel(layers.Layer):
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
strategy=fluid.dygraph.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = fluid.dygraph.DataParallel(emb, strategy)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.set_dict( para_state_dict )
'''
self._layers.set_dict(
stat_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
def load_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
This api will be Deprecated. Please use set_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
Default: True
Returns:
None
import paddle
Examples:
.. code-block:: python
paddle.disable_static()
import paddle.fluid as fluid
with fluid.dygraph.guard():
strategy=fluid.dygraph.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding([10, 10])
emb = fluid.dygraph.DataParallel(emb, strategy)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
paddle.save(state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
para_state_dict, _ = paddle.load("paddle_dy")
emb.load_dict( para_state_dict )
emb.set_state_dict(para_state_dict)
'''
self._layers.load_dict(
stat_dict,
self._layers.set_state_dict(
state_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
# [aliases] Compatible with old method names
set_dict = set_state_dict
load_dict = set_state_dict
......@@ -36,6 +36,7 @@ from . import core
from . import unique_name
import paddle.version as fluid_version
import warnings
import functools
__all__ = [
'Program',
......@@ -238,6 +239,25 @@ def _fake_interface_only_(func):
return __impl__
# NOTE(chenweihang): There is argument name typo (stat_dict, correct name is state_dict)
# in fluid api Layer.set_dict, Optimizer.load, in order to correct the argument without
# introducing compatibility issues, add this decorator
# NOTE(chenweihang): not using `wrap_decorator` here is because `wrap_decorator` will
# move kwargs to args, which doesn't work in this decorate case
def deprecate_stat_dict(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if 'stat_dict' in kwargs:
warnings.warn(
"The argument `stat_dict` has deprecated, please change it to `state_dict`.",
DeprecationWarning)
kwargs['state_dict'] = kwargs['stat_dict']
kwargs.pop('stat_dict')
return func(*args, **kwargs)
return wrapper
dygraph_not_support = wrap_decorator(_dygraph_not_support_)
dygraph_only = wrap_decorator(_dygraph_only_)
fake_interface_only = wrap_decorator(_fake_interface_only_)
......
......@@ -170,7 +170,7 @@ class Optimizer(object):
return state_dict
@framework.dygraph_only
def set_dict(self, state_dict):
def set_state_dict(self, state_dict):
'''
Load optimizer state dict. For Adam optimizer, contains beta1, beta2, momentum etc. If LearningRateDecay have been used, global_step will be changed.
......@@ -182,20 +182,22 @@ class Optimizer(object):
Examples:
.. code-block:: python
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
import paddle
paddle.disable_static()
emb = paddle.nn.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph(state_dict, "paddle_dy")
paddle.save(state_dict, "paddle_dy")
adam = fluid.optimizer.Adam(learning_rate=fluid.layers.noam_decay( 100, 10000),
adam = paddle.optimizer.Adam(learning_rate=fluid.layers.noam_decay( 100, 10000),
parameter_list=emb.parameters())
state_dict = adam.state_dict()
fluid.save_dygraph(state_dict, "paddle_dy")
para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy")
para_state_dict, opti_state_dict = paddle.load("paddle_dy")
adam.set_dict(opti_state_dict)
adam.set_state_dict(opti_state_dict)
'''
from paddle.optimizer.lr_scheduler import _LRScheduler
......@@ -257,6 +259,9 @@ class Optimizer(object):
tensor.set(load_para_np, framework._current_expected_place())
# [aliases] Compatible with old method names
set_dict = set_state_dict
def get_opti_var_name_list(self):
return self._opti_name_list
......@@ -4595,7 +4600,8 @@ class RecomputeOptimizer(Optimizer):
), "_checkpoints should be a list of Variable or a list of String"
self._checkpoints = checkpoints
def load(self, stat_dict):
@framework.deprecate_stat_dict
def load(self, state_dict):
"""
:api_attr: Static Graph
......@@ -4603,7 +4609,7 @@ class RecomputeOptimizer(Optimizer):
:return: None
Args:
stat_dict: the dict load by load_persistable method
state_dict: the dict load by load_persistable method
Examples:
.. code-block:: python
......@@ -4627,8 +4633,8 @@ class RecomputeOptimizer(Optimizer):
sgd = fluid.optimizer.RecomputeOptimizer(sgd)
sgd._set_checkpoints([fc_1, pred])
try:
stat_dict = {}
sgd.load(stat_dict)
state_dict = {}
sgd.load(state_dict)
except NotImplementedError as e:
print(cpt.get_exception_message(e))
"""
......
......@@ -43,7 +43,7 @@ class TestDirectory(unittest.TestCase):
'paddle.distributed.prepare_context', 'paddle.DataParallel',
'paddle.jit', 'paddle.jit.TracedLayer', 'paddle.jit.to_static',
'paddle.jit.ProgramTranslator', 'paddle.jit.TranslatedLayer',
'paddle.jit.save', 'paddle.jit.load', 'paddle.jit.SaveLoadConfig',
'paddle.jit.save', 'paddle.jit.load', 'paddle.SaveLoadConfig',
'paddle.NoamDecay', 'paddle.PiecewiseDecay',
'paddle.NaturalExpDecay', 'paddle.ExponentialDecay',
'paddle.InverseTimeDecay', 'paddle.PolynomialDecay',
......
......@@ -374,8 +374,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
adam._learning_rate.step_num = 0
para_state_dict, opti_state_dict = paddle.load("./test_dy")
print(opti_state_dict['LR_Scheduler'])
adam.set_dict(opti_state_dict)
adam.set_state_dict(opti_state_dict)
opti_dict = adam.state_dict()
for k, v in opti_dict.items():
......@@ -393,7 +392,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
var.set(np.zeros_like(np_t), place)
ptb_model.set_dict(para_state_dict)
ptb_model.set_state_dict(stat_dict=para_state_dict)
state_dict = ptb_model.state_dict()
......@@ -483,7 +482,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
if isinstance(adam._learning_rate, LearningRateDecay):
adam._learning_rate.step_num = 0
adam.set_dict(self.opti_dict)
adam.set_state_dict(self.opti_dict)
opti_dict = adam.state_dict()
for k, v in opti_dict.items():
if isinstance(v, core.VarBase):
......@@ -500,7 +499,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
var.set(np.zeros_like(np_t), place)
ptb_model.set_dict(self.state_dict)
ptb_model.set_state_dict(self.state_dict)
state_dict = ptb_model.state_dict()
......@@ -593,7 +592,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
if isinstance(adam._learning_rate, LearningRateDecay):
adam._learning_rate.step_num = 0
adam.set_dict(np_opti_dict)
adam.set_state_dict(np_opti_dict)
opti_dict = adam.state_dict()
for k, v in opti_dict.items():
......@@ -613,7 +612,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
var.set(np.zeros_like(np_t), place)
ptb_model.set_dict(np_state_dict)
ptb_model.set_state_dict(np_state_dict)
state_dict = ptb_model.state_dict()
......@@ -656,8 +655,8 @@ class TestDygraphPtbRnn(unittest.TestCase):
last_hidden = None
last_cell = None
adam.set_dict(self.opti_dict)
ptb_model.set_dict(self.state_dict)
adam.set_state_dict(self.opti_dict)
ptb_model.set_state_dict(self.state_dict)
for i in range(1):
x_data = np.arange(12).reshape(4, 3).astype('int64')
......@@ -745,8 +744,8 @@ class TestDygraphPtbRnn(unittest.TestCase):
last_cell = None
state_dict, opti_dict = fluid.load_dygraph("./test_dy")
adam.set_dict(opti_dict)
ptb_model.set_dict(state_dict)
adam.set_state_dict(opti_dict)
ptb_model.set_state_dict(state_dict)
for i in range(1):
x_data = np.arange(12).reshape(4, 3).astype('int64')
......@@ -849,8 +848,8 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k, v in self.state_dict.items():
np_state_dict[k] = v.numpy()
adam.set_dict(np_opti_dict)
ptb_model.set_dict(np_state_dict)
adam.set_state_dict(np_opti_dict)
ptb_model.set_state_dict(np_state_dict)
for i in range(1):
x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
......@@ -912,6 +911,22 @@ class TestDygraphPtbRnn(unittest.TestCase):
para_state_dict, opti_state_dict = paddle.load(
os.path.join('saved_dy', 'emb_dy.pdopt'))
def test_load_compatible_with_keep_name_table(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy'))
para_state_dict, opti_state_dict = paddle.load(
os.path.join('saved_dy', 'emb_dy'), True)
self.assertTrue(para_state_dict != None)
self.assertTrue(opti_state_dict == None)
para_state_dict, opti_state_dict = paddle.load(
os.path.join('saved_dy', 'emb_dy'), keep_name_table=True)
self.assertTrue(para_state_dict != None)
self.assertTrue(opti_state_dict == None)
if __name__ == '__main__':
unittest.main()
......@@ -917,6 +917,29 @@ class TestDygraphPtbRnn(unittest.TestCase):
para_state_dict, opti_state_dict = paddle.load(
os.path.join('saved_dy', 'emb_dy.pdopt'))
def test_no_state_in_input_dict(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy'))
para_state_dict, _ = paddle.load(os.path.join('saved_dy', 'emb_dy'))
para_state_dict.pop('weight')
emb.set_state_dict(para_state_dict)
def test_state_shape_mismatch(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy'))
para_state_dict, _ = paddle.load(os.path.join('saved_dy', 'emb_dy'))
para_state_dict['weight'] = np.expand_dims(
para_state_dict['weight'], axis=-1)
emb.set_state_dict(para_state_dict)
if __name__ == '__main__':
unittest.main()
......@@ -183,25 +183,6 @@ class TestJitSaveLoad(unittest.TestCase):
with self.assertRaises(ValueError):
model_dict, _ = fluid.dygraph.load_dygraph(model_path)
def test_load_dygraph_no_var_info(self):
model_path = "model.test_jit_save_load.no_var_info"
self.train_and_save_model(model_path=model_path)
# remove `__variables.info__`
var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME)
os.remove(var_info_path)
new_layer = LinearNet(784, 1)
with self.assertRaises(RuntimeError):
model_dict, _ = fluid.dygraph.load_dygraph(model_path)
def test_load_dygraph_not_var_file(self):
model_path = "model.test_jit_save_load.no_var_file"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.params_filename = "__params__"
self.train_and_save_model(model_path=model_path, configs=configs)
new_layer = LinearNet(784, 1)
with self.assertRaises(RuntimeError):
model_dict, _ = fluid.dygraph.load_dygraph(model_path)
class LinearNetMultiInput(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import six
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from test_imperative_base import new_program_scope
def convolutional_neural_network(img):
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu")
conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu")
prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
return prediction
def static_train_net(img, label):
prediction = convolutional_neural_network(img)
loss = fluid.layers.cross_entropy(input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
optimizer = fluid.optimizer.SGD(learning_rate=0.001)
optimizer.minimize(avg_loss)
return prediction, avg_loss
class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase):
def setUp(self):
self.seed = 90
self.epoch_num = 1
self.batch_size = 128
self.batch_num = 10
def train_and_save_model(self):
with new_program_scope():
startup_program = fluid.default_startup_program()
main_program = fluid.default_main_program()
img = fluid.data(
name='img', shape=[None, 1, 28, 28], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
prediction, avg_loss = static_train_net(img, label)
place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
exe.run(startup_program)
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=100),
batch_size=self.batch_size)
for _ in range(0, self.epoch_num):
for batch_id, data in enumerate(train_reader()):
exe.run(main_program,
feed=feeder.feed(data),
fetch_list=[avg_loss])
if batch_id > self.batch_num:
break
static_param_dict = {}
for param in fluid.default_main_program().all_parameters():
static_param_dict[param.name] = fluid.executor._fetch_var(
param.name)
fluid.io.save_inference_model(
self.save_dirname, ["img"], [prediction],
exe,
model_filename=self.model_filename,
params_filename=self.params_filename)
return static_param_dict
def check_load_state_dict(self, orig_dict, load_dict):
for var_name, value in six.iteritems(orig_dict):
self.assertTrue(np.array_equal(value, load_dict[var_name]))
def test_load_default(self):
self.save_dirname = "static_mnist.load_state_dict.default"
self.model_filename = None
self.params_filename = None
orig_param_dict = self.train_and_save_model()
configs = paddle.SaveLoadConfig()
configs.separate_params = True
load_param_dict, _ = paddle.load(self.save_dirname, configs)
self.check_load_state_dict(orig_param_dict, load_param_dict)
def test_load_with_model_filename(self):
self.save_dirname = "static_mnist.load_state_dict.model_filename"
self.model_filename = "static_mnist.model"
self.params_filename = None
orig_param_dict = self.train_and_save_model()
configs = paddle.SaveLoadConfig()
configs.separate_params = True
configs.model_filename = self.model_filename
load_param_dict, _ = paddle.load(self.save_dirname, configs)
self.check_load_state_dict(orig_param_dict, load_param_dict)
def test_load_with_param_filename(self):
self.save_dirname = "static_mnist.load_state_dict.param_filename"
self.model_filename = None
self.params_filename = "static_mnist.params"
orig_param_dict = self.train_and_save_model()
configs = paddle.SaveLoadConfig()
configs.params_filename = self.params_filename
load_param_dict, _ = paddle.load(self.save_dirname, configs)
self.check_load_state_dict(orig_param_dict, load_param_dict)
def test_load_with_model_and_param_filename(self):
self.save_dirname = "static_mnist.load_state_dict.model_and_param_filename"
self.model_filename = "static_mnist.model"
self.params_filename = "static_mnist.params"
orig_param_dict = self.train_and_save_model()
configs = paddle.SaveLoadConfig()
configs.params_filename = self.params_filename
configs.model_filename = self.model_filename
load_param_dict, _ = paddle.load(self.save_dirname, configs)
self.check_load_state_dict(orig_param_dict, load_param_dict)
if __name__ == '__main__':
unittest.main()
......@@ -832,8 +832,8 @@ class TestRecomputeOptimizer(unittest.TestCase):
recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer)
recompute_optimizer._set_checkpoints([b1_out])
try:
stat_dict = {}
recompute_optimizer.load(stat_dict)
state_dict = {}
recompute_optimizer.load(state_dict)
except NotImplementedError as e:
self.assertEqual(
"load function is not supported by Recompute Optimizer for now",
......
......@@ -20,8 +20,8 @@ __all__ = [
]
__all__ += [
'grad', 'LayerList', 'load', 'save', 'to_variable', 'no_grad',
'DataParallel'
'grad', 'LayerList', 'load', 'save', 'SaveLoadConfig', 'to_variable',
'no_grad', 'DataParallel'
]
__all__ += [
......@@ -50,6 +50,7 @@ from ..fluid.dygraph.base import to_variable #DEFINE_ALIAS
from ..fluid.dygraph.base import grad #DEFINE_ALIAS
from ..fluid.dygraph.checkpoint import load_dygraph as load #DEFINE_ALIAS
from ..fluid.dygraph.checkpoint import save_dygraph as save #DEFINE_ALIAS
from ..fluid.dygraph.jit import SaveLoadConfig #DEFINE_ALIAS
from ..fluid.dygraph.parallel import DataParallel #DEFINE_ALIAS
from ..fluid.dygraph.learning_rate_scheduler import NoamDecay #DEFINE_ALIAS
......
......@@ -19,10 +19,7 @@ from . import model_summary
from . import model
from .model import *
from .model_summary import summary
from .dygraph_layer_patch import monkey_patch_layer
logger.setup_logger()
__all__ = ['callbacks'] + model.__all__ + ['summary']
monkey_patch_layer()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
import paddle.fluid as fluid
from paddle.fluid.framework import in_dygraph_mode
from paddle.fluid.framework import _current_expected_place as _get_device
def monkey_patch_layer():
def load_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters from stat_dict. All the parameters will be reset by the
tensor in the stat_dict
This api will be Deprecated. Please use set_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the
parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name
as key, otherwise, use parameter name as key. Default: True
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.load_dict( para_state_dict )
'''
def _check_match(key, param):
state = stat_dict.get(key, None)
if state is None:
raise ValueError(
"{} is not found in the providing file.".format(key))
if list(state.shape) != list(param.shape):
raise ValueError(
"{} receives a shape {}, but the expected shape is {}.".
format(key, list(state.shape), list(param.shape)))
return param, state
matched_param_state = []
for key, param in self.state_dict().items():
key_name = key if use_structured_name else param.name
try:
match_res = _check_match(key_name, param)
matched_param_state.append(match_res)
except ValueError as err:
warnings.warn(("Skip loading for {}. ".format(key) + str(err)))
if in_dygraph_mode():
for param, state in matched_param_state:
param.set_value(state)
else:
def _set_var(var, ndarray):
t = fluid.global_scope().find_var(var.name).get_tensor()
p = t._place()
if p.is_cpu_place():
place = fluid.CPUPlace()
elif p.is_cuda_pinned_place():
place = fluid.CUDAPinnedPlace()
else:
p = fluid.core.Place()
p.set_place(t._place())
place = fluid.CUDAPlace(p.gpu_device_id())
t.set(ndarray, place)
executor = fluid.Executor(_get_device())._default_executor
# restore parameter states
fluid.core._create_loaded_parameter(
[param for param, state in matched_param_state],
fluid.global_scope(), executor)
for param, state in matched_param_state:
_set_var(param, state)
setattr(fluid.dygraph.Layer, 'load_dict', load_dict)
......@@ -14,7 +14,6 @@
from ..fluid.dygraph.jit import save #DEFINE_ALIAS
from ..fluid.dygraph.jit import load #DEFINE_ALIAS
from ..fluid.dygraph.jit import SaveLoadConfig #DEFINE_ALIAS
from ..fluid.dygraph.jit import TracedLayer #DEFINE_ALIAS
from ..fluid.dygraph.jit import set_code_level #DEFINE_ALIAS
from ..fluid.dygraph.jit import set_verbosity #DEFINE_ALIAS
......@@ -23,6 +22,6 @@ from ..fluid.dygraph import ProgramTranslator #DEFINE_ALIAS
from ..fluid.dygraph.io import TranslatedLayer #DEFINE_ALIAS
__all__ = [
'save', 'load', 'SaveLoadConfig', 'TracedLayer', 'to_static',
'ProgramTranslator', 'TranslatedLayer', 'set_code_level', 'set_verbosity'
'save', 'load', 'TracedLayer', 'to_static', 'ProgramTranslator',
'TranslatedLayer', 'set_code_level', 'set_verbosity'
]
......@@ -109,7 +109,7 @@ class _LRScheduler(object):
"""
self.keys = ['last_epoch', 'last_lr']
def set_dict(self, state_dict):
def set_state_dict(self, state_dict):
"""
Loads the schedulers state.
"""
......@@ -126,8 +126,8 @@ class _LRScheduler(object):
"There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict"
)
# alias for set_dict
set_state_dict = set_dict
# alias for set_state_dict
set_dict = set_state_dict
def get_lr(self):
# calculate by python float
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册