未验证 提交 29861846 编写于 作者: C Chen Weihang 提交者: GitHub

Update 2.0 Save/Load API names/arguments/doc examples (#27138)

* Update set_dict method name & add aliases (#26700)

* update set_dict method name & add aliases

* fix var name error

* fix alias formats

* use set_state_dict in unittest

* add decorator solve compatible problem

* polish decorator

* replace layer set_state_dict by patched method

* remove import monkey path layer

* fix import function error

* add unittest for coverage

* Support load state dict form `inference model` format save result (#26718)

* support load infer model format state dict

* add unittests

* remove keep name table

* recolve circle inport

* fix compatible problem

* recover unittest

* polish doc and comment

* Change jit.save/load configs to config & update code examples (#27056)

* change configs to config & update examples

* fix deprecate decorator conflict
上级 0072490f
...@@ -232,6 +232,7 @@ from .framework import grad #DEFINE_ALIAS ...@@ -232,6 +232,7 @@ from .framework import grad #DEFINE_ALIAS
from .framework import no_grad #DEFINE_ALIAS from .framework import no_grad #DEFINE_ALIAS
from .framework import save #DEFINE_ALIAS from .framework import save #DEFINE_ALIAS
from .framework import load #DEFINE_ALIAS from .framework import load #DEFINE_ALIAS
from .framework import SaveLoadConfig #DEFINE_ALIAS
from .framework import DataParallel #DEFINE_ALIAS from .framework import DataParallel #DEFINE_ALIAS
from .framework import NoamDecay #DEFINE_ALIAS from .framework import NoamDecay #DEFINE_ALIAS
......
...@@ -16,13 +16,16 @@ from __future__ import print_function ...@@ -16,13 +16,16 @@ from __future__ import print_function
import os import os
import collections import collections
import functools
from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter, ParamBase, _varbase_creator, _dygraph_tracer from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter, ParamBase, _varbase_creator, _dygraph_tracer
import pickle import pickle
import six import six
from . import learning_rate_scheduler from . import learning_rate_scheduler
import warnings import warnings
from .. import core from .. import core
from paddle.fluid.dygraph.io import VARIABLE_FILENAME, EXTRA_VAR_INFO_FILENAME, _load_persistable_vars from .base import guard
from paddle.fluid.dygraph.jit import SaveLoadConfig, deprecate_save_load_configs
from paddle.fluid.dygraph.io import _construct_program_holders, _construct_params_and_buffers
__all__ = [ __all__ = [
'save_dygraph', 'save_dygraph',
...@@ -30,6 +33,37 @@ __all__ = [ ...@@ -30,6 +33,37 @@ __all__ = [
] ]
# NOTE(chenweihang): deprecate load_dygraph's argument keep_name_table,
# ensure compatibility when user still use keep_name_table argument
def deprecate_keep_name_table(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
def __warn_and_build_configs__(keep_name_table):
warnings.warn(
"The argument `keep_name_table` has deprecated, please use `SaveLoadConfig.keep_name_table`.",
DeprecationWarning)
config = SaveLoadConfig()
config.keep_name_table = keep_name_table
return config
# deal with arg `keep_name_table`
if len(args) > 1 and isinstance(args[1], bool):
args = list(args)
args[1] = __warn_and_build_configs__(args[1])
# deal with kwargs
elif 'keep_name_table' in kwargs:
kwargs['config'] = __warn_and_build_configs__(kwargs[
'keep_name_table'])
kwargs.pop('keep_name_table')
else:
# do nothing
pass
return func(*args, **kwargs)
return wrapper
@dygraph_only @dygraph_only
def save_dygraph(state_dict, model_path): def save_dygraph(state_dict, model_path):
''' '''
...@@ -100,17 +134,28 @@ def save_dygraph(state_dict, model_path): ...@@ -100,17 +134,28 @@ def save_dygraph(state_dict, model_path):
# TODO(qingqing01): remove dygraph_only to support loading static model. # TODO(qingqing01): remove dygraph_only to support loading static model.
# maybe need to unify the loading interface after 2.0 API is ready. # maybe need to unify the loading interface after 2.0 API is ready.
#@dygraph_only # @dygraph_only
def load_dygraph(model_path, keep_name_table=False): @deprecate_save_load_configs
@deprecate_keep_name_table
def load_dygraph(model_path, config=None):
''' '''
:api_attr: imperative :api_attr: imperative
Load parameter state_dict from disk. Load parameter state dict from disk.
.. note::
Due to some historical reasons, if you load ``state_dict`` from the saved
result of `paddle.io.save_inference_model`, the structured variable name
will cannot be restored. You need to set the argument `use_structured_name=False`
when using `Layer.set_state_dict` later.
Args: Args:
model_path(str) : The file prefix store the state_dict. (The path should Not contain suffix '.pdparams') model_path(str) : The file prefix store the state_dict.
keep_name_table(bool, optional) : Whether keep structed name to parameter name conversion table in output dict. (The path should Not contain suffix '.pdparams')
Default : False config (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig`
object that specifies additional configuration options, these options
are for compatibility with ``jit.save/io.save_inference_model`` formats.
Default None.
Returns: Returns:
state_dict(dict) : the dict store the state_dict state_dict(dict) : the dict store the state_dict
...@@ -118,23 +163,27 @@ def load_dygraph(model_path, keep_name_table=False): ...@@ -118,23 +163,27 @@ def load_dygraph(model_path, keep_name_table=False):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
with fluid.dygraph.guard(): paddle.disable_static()
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") paddle.save(state_dict, "paddle_dy")
adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000), scheduler = paddle.optimizer.lr_scheduler.NoamLR(
parameter_list = emb.parameters() ) d_model=0.01, warmup_steps=100, verbose=True)
adam = paddle.optimizer.Adam(
learning_rate=scheduler,
parameters=emb.parameters())
state_dict = adam.state_dict() state_dict = adam.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") paddle.save(state_dict, "paddle_dy")
para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy") para_state_dict, opti_state_dict = paddle.load("paddle_dy")
''' '''
# deal with argument `model_path`
model_prefix = model_path model_prefix = model_path
if model_prefix.endswith(".pdparams"): if model_prefix.endswith(".pdparams"):
model_prefix = model_prefix[:-9] model_prefix = model_prefix[:-9]
...@@ -145,66 +194,45 @@ def load_dygraph(model_path, keep_name_table=False): ...@@ -145,66 +194,45 @@ def load_dygraph(model_path, keep_name_table=False):
opti_dict = None opti_dict = None
params_file_path = model_prefix + ".pdparams" params_file_path = model_prefix + ".pdparams"
opti_file_path = model_prefix + ".pdopt" opti_file_path = model_prefix + ".pdopt"
# deal with argument `configs`
configs = config
if configs is None:
configs = SaveLoadConfig()
if not os.path.exists(params_file_path) and not os.path.exists( if not os.path.exists(params_file_path) and not os.path.exists(
opti_file_path): opti_file_path):
# Load state dict by `jit.save` save format # Load state dict by `jit.save/io.save_inference_model` save format
# TODO(chenweihang): [Why not support `io.save_infernece_model` save format here] # NOTE(chenweihang): [ Compatibility of save_inference_model save format ]
# The model saved by `save_inference_model` does not completely correspond to # The model saved by `save_inference_model` does not completely correspond to
# the information required by the `state_dict` under the dygraph. # the information required by the `state_dict` under the dygraph.
# Although we reluctantly restore the `state_dict` in some scenarios, # `save_inference_model` not save structured name, we need to remind
# this may not be complete and there are some limitations, so this function # the user to configure the `use_structured_name` argument when `set_state_dict`
# will be considered later. The limitations include: # NOTE(chenweihang): `jit.save` doesn't save optimizer state
# 1. `save_inference_model` not save structured name, we need to remind
# the user to configure the `use_structured_name` argument when `set_dict`,
# but this argument is currently not public
# 2. if `save_inference_model` save all persistable variables in a single file,
# user need to give the variable name list to load `state_dict`
# 1. check model path # 1. check model path
if not os.path.isdir(model_prefix): if not os.path.isdir(model_prefix):
raise ValueError("Model saved directory '%s' is not exists." % raise ValueError("Model saved directory '%s' is not exists." %
model_prefix) model_prefix)
# 2. load `__variables.info__`
var_info_path = os.path.join(model_prefix, EXTRA_VAR_INFO_FILENAME) # 2. load program desc & construct _ProgramHolder
if not os.path.exists(var_info_path): programs = _construct_program_holders(model_path,
raise RuntimeError( configs.model_filename)
"No target can be loaded. Now only supports loading `state_dict` from "
"the result saved by `imperative.save` and `imperative.jit.save`." # 3. load layer parameters & buffers
) # NOTE: using fluid.dygraph.guard() here will cause import error in py2
with open(var_info_path, 'rb') as f: with guard():
extra_var_info = pickle.load(f) persistable_var_dict = _construct_params_and_buffers(
# 3. load `__variables__` model_prefix,
# TODO(chenweihang): now only supports loading from default save format: programs,
# - all persistable vars saved in one file named `__variables__` configs.separate_params,
# for other case, we may need to modify the arguments of this API configs.params_filename,
var_file_path = os.path.join(model_prefix, VARIABLE_FILENAME) append_suffix=False)
if not os.path.exists(var_file_path):
raise RuntimeError( # 4. construct state_dict
"The parameter file to be loaded was not found. "
"Now only supports loading from the default save format, "
"and does not support custom params_filename and "
"save parameters separately.")
# 4. load all persistable vars
load_var_list = []
for name in sorted(extra_var_info):
var = _varbase_creator(name=name, persistable=True)
load_var_list.append(var)
_dygraph_tracer().trace_op(
type='load_combine',
inputs={},
outputs={'Out': load_var_list},
attrs={'file_path': var_file_path})
# 5. construct state_dict
para_dict = dict() para_dict = dict()
for var in load_var_list: for var_name in persistable_var_dict:
structured_name = extra_var_info[var.name].get('structured_name', para_dict[var_name] = persistable_var_dict[var_name].numpy()
None)
if structured_name is None:
raise RuntimeError(
"Cannot find saved variable (%s)'s structured name in saved model.",
var.name)
para_dict[structured_name] = var.numpy()
# NOTE: `jit.save` doesn't save optimizer state
else: else:
# Load state dict by `save_dygraph` save format # Load state dict by `save_dygraph` save format
para_dict = {} para_dict = {}
...@@ -213,7 +241,7 @@ def load_dygraph(model_path, keep_name_table=False): ...@@ -213,7 +241,7 @@ def load_dygraph(model_path, keep_name_table=False):
para_dict = pickle.load(f) if six.PY2 else pickle.load( para_dict = pickle.load(f) if six.PY2 else pickle.load(
f, encoding='latin1') f, encoding='latin1')
if not keep_name_table and "StructuredToParameterName@@" in para_dict: if not configs.keep_name_table and "StructuredToParameterName@@" in para_dict:
del para_dict["StructuredToParameterName@@"] del para_dict["StructuredToParameterName@@"]
if os.path.exists(opti_file_path): if os.path.exists(opti_file_path):
......
...@@ -488,6 +488,15 @@ def _load_persistable_vars(model_path, ...@@ -488,6 +488,15 @@ def _load_persistable_vars(model_path,
return load_var_dict return load_var_dict
# NOTE(chenweihang): to adapt paddle.load to get state_dict
def _remove_varname_suffix(var_dict, program_holder):
no_suffix_var_dict = dict()
for var_name in var_dict:
no_suffix_name = program_holder._suffix_varname_dict[var_name]
no_suffix_var_dict[no_suffix_name] = var_dict[var_name]
return no_suffix_var_dict
def _construct_program_holders(model_path, model_filename=None): def _construct_program_holders(model_path, model_filename=None):
# make sure the path has been checked # make sure the path has been checked
program_holder_dict = dict() program_holder_dict = dict()
...@@ -517,7 +526,8 @@ def _construct_program_holders(model_path, model_filename=None): ...@@ -517,7 +526,8 @@ def _construct_program_holders(model_path, model_filename=None):
def _construct_params_and_buffers(model_path, def _construct_params_and_buffers(model_path,
programs, programs,
separate_params=False, separate_params=False,
params_filename=None): params_filename=None,
append_suffix=True):
var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME) var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME)
if os.path.exists(var_info_path): if os.path.exists(var_info_path):
var_dict = _load_persistable_vars(model_path, var_info_path, var_dict = _load_persistable_vars(model_path, var_info_path,
...@@ -526,6 +536,10 @@ def _construct_params_and_buffers(model_path, ...@@ -526,6 +536,10 @@ def _construct_params_and_buffers(model_path,
else: else:
var_dict = _load_persistable_vars_by_program( var_dict = _load_persistable_vars_by_program(
model_path, programs['forward'], params_filename) model_path, programs['forward'], params_filename)
if not append_suffix:
var_dict = _remove_varname_suffix(var_dict, programs['forward'])
return var_dict return var_dict
...@@ -685,7 +699,7 @@ class TranslatedLayer(layers.Layer): ...@@ -685,7 +699,7 @@ class TranslatedLayer(layers.Layer):
# 1. load program desc & construct _ProgramHolder # 1. load program desc & construct _ProgramHolder
programs = _construct_program_holders(model_path, model_filename) programs = _construct_program_holders(model_path, model_filename)
# 2. load layer parameters & parameter attributes # 2. load layer parameters & buffers
persistable_vars = _construct_params_and_buffers( persistable_vars = _construct_params_and_buffers(
model_path, programs, separate_params, params_filename) model_path, programs, separate_params, params_filename)
......
...@@ -17,6 +17,7 @@ from __future__ import print_function ...@@ -17,6 +17,7 @@ from __future__ import print_function
import os import os
import pickle import pickle
import warnings import warnings
import functools
import six import six
import paddle import paddle
...@@ -228,63 +229,60 @@ class SaveLoadConfig(object): ...@@ -228,63 +229,60 @@ class SaveLoadConfig(object):
.. code-block:: python .. code-block:: python
import numpy as np import paddle
import paddle.fluid as fluid import paddle.nn as nn
from paddle.fluid.dygraph import Linear import paddle.optimizer as opt
from paddle.fluid.dygraph import declarative
class SimpleNet(fluid.dygraph.Layer): class SimpleNet(nn.Layer):
def __init__(self, in_size, out_size): def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__() super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size) self._linear = nn.Linear(in_size, out_size)
@declarative @paddle.jit.to_static
def forward(self, x): def forward(self, x):
y = self._linear(x) y = self._linear(x)
z = self._linear(y) z = self._linear(y)
return z return z
# enable dygraph mode # enable dygraph mode
fluid.enable_dygraph() paddle.disable_static()
# train model # train model
net = SimpleNet(8, 8) net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) adam = opt.Adam(learning_rate=0.1, parameters=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) x = paddle.randn([4, 8], 'float32')
for i in range(10): for i in range(10):
out = net(x) out = net(x)
loss = fluid.layers.mean(out) loss = paddle.tensor.mean(out)
loss.backward() loss.backward()
adam.minimize(loss) adam.step()
net.clear_gradients() adam.clear_grad()
# use SaveLoadconfig when saving model # use SaveLoadconfig when saving model
model_path = "simplenet.example.model" model_path = "simplenet.example.model"
configs = fluid.dygraph.jit.SaveLoadConfig() config = paddle.SaveLoadConfig()
configs.model_filename = "__simplenet__" config.model_filename = "__simplenet__"
fluid.dygraph.jit.save( paddle.jit.save(
layer=net, layer=net,
model_path=model_path, model_path=model_path,
input_spec=[x], config=config)
configs=configs)
2. Using ``SaveLoadConfig`` when loading model 2. Using ``SaveLoadConfig`` when loading model
.. code-block:: python .. code-block:: python
import numpy as np import paddle
import paddle.fluid as fluid
# enable dygraph mode # enable dygraph mode
fluid.enable_dygraph() paddle.disable_static()
# use SaveLoadconfig when loading model # use SaveLoadconfig when loading model
model_path = "simplenet.example.model" model_path = "simplenet.example.model"
configs = fluid.dygraph.jit.SaveLoadConfig() config = paddle.SaveLoadConfig()
configs.model_filename = "__simplenet__" config.model_filename = "__simplenet__"
infer_net = fluid.dygraph.jit.load(model_path, configs=configs) infer_net = paddle.jit.load(model_path, config=config)
# inference # inference
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) x = paddle.randn([4, 8], 'float32')
pred = infer_net(x) pred = infer_net(x)
""" """
...@@ -293,6 +291,8 @@ class SaveLoadConfig(object): ...@@ -293,6 +291,8 @@ class SaveLoadConfig(object):
self._model_filename = None self._model_filename = None
self._params_filename = None self._params_filename = None
self._separate_params = False self._separate_params = False
# used for `paddle.load`
self._keep_name_table = False
# NOTE: Users rarely use following configs, so these configs are not open to users, # NOTE: Users rarely use following configs, so these configs are not open to users,
# reducing user learning costs, but we retain the configuration capabilities # reducing user learning costs, but we retain the configuration capabilities
...@@ -322,51 +322,46 @@ class SaveLoadConfig(object): ...@@ -322,51 +322,46 @@ class SaveLoadConfig(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np import paddle
import paddle.fluid as fluid import paddle.nn as nn
from paddle.fluid.dygraph import Linear import paddle.optimizer as opt
from paddle.fluid.dygraph import declarative
class SimpleNet(fluid.dygraph.Layer): class SimpleNet(nn.Layer):
def __init__(self, in_size, out_size): def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__() super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size) self._linear = nn.Linear(in_size, out_size)
@declarative @paddle.jit.to_static
def forward(self, x): def forward(self, x):
y = self._linear(x) y = self._linear(x)
z = self._linear(y) z = self._linear(y)
loss = fluid.layers.mean(z) loss = paddle.tensor.mean(z)
return z, loss return z, loss
# enable dygraph mode # enable dygraph mode
fluid.enable_dygraph() paddle.disable_static()
# train model # train model
net = SimpleNet(8, 8) net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) adam = opt.Adam(learning_rate=0.1, parameters=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) x = paddle.randn([4, 8], 'float32')
for i in range(10): for i in range(10):
out, loss = net(x) out, loss = net(x)
loss.backward() loss.backward()
adam.minimize(loss) adam.step()
net.clear_gradients() adam.clear_grad()
# use SaveLoadconfig.output_spec # use SaveLoadconfig.output_spec
model_path = "simplenet.example.model.output_spec" model_path = "simplenet.example.model.output_spec"
configs = fluid.dygraph.jit.SaveLoadConfig() config = paddle.SaveLoadConfig()
# only keep the predicted output in saved model, discard loss config.output_spec = [out]
configs.output_spec = [out] paddle.jit.save(
fluid.dygraph.jit.save(
layer=net, layer=net,
model_path=model_path, model_path=model_path,
input_spec=[x], config=config)
configs=configs)
infer_net = fluid.dygraph.jit.load(model_path, configs=configs) infer_net = paddle.jit.load(model_path)
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) x = paddle.randn([4, 8], 'float32')
# only have the predicted output
pred = infer_net(x) pred = infer_net(x)
""" """
return self._output_spec return self._output_spec
...@@ -393,52 +388,47 @@ class SaveLoadConfig(object): ...@@ -393,52 +388,47 @@ class SaveLoadConfig(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np import paddle
import paddle.fluid as fluid import paddle.nn as nn
from paddle.fluid.dygraph import Linear import paddle.optimizer as opt
from paddle.fluid.dygraph import declarative
class SimpleNet(fluid.dygraph.Layer): class SimpleNet(nn.Layer):
def __init__(self, in_size, out_size): def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__() super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size) self._linear = nn.Linear(in_size, out_size)
@declarative @paddle.jit.to_static
def forward(self, x): def forward(self, x):
y = self._linear(x) y = self._linear(x)
z = self._linear(y) z = self._linear(y)
return z return z
# enable dygraph mode # enable dygraph mode
fluid.enable_dygraph() paddle.disable_static()
# train model # train model
net = SimpleNet(8, 8) net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) adam = opt.Adam(learning_rate=0.1, parameters=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) x = paddle.randn([4, 8], 'float32')
for i in range(10): for i in range(10):
out = net(x) out = net(x)
loss = fluid.layers.mean(out) loss = paddle.tensor.mean(out)
loss.backward() loss.backward()
adam.minimize(loss) adam.step()
net.clear_gradients() adam.clear_grad()
model_path = "simplenet.example.model.model_filename"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.model_filename = "__simplenet__"
# saving with configs.model_filename # saving with configs.model_filename
fluid.dygraph.jit.save( model_path = "simplenet.example.model.model_filename"
config = paddle.SaveLoadConfig()
config.model_filename = "__simplenet__"
paddle.jit.save(
layer=net, layer=net,
model_path=model_path, model_path=model_path,
input_spec=[x], config=config)
configs=configs)
# [result] the saved model directory contains:
# __simplenet__ __variables__ __variables.info__
# loading with configs.model_filename # loading with configs.model_filename
infer_net = fluid.dygraph.jit.load(model_path, configs=configs) infer_net = paddle.jit.load(model_path, config=config)
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) x = paddle.randn([4, 8], 'float32')
pred = infer_net(x) pred = infer_net(x)
""" """
return self._model_filename return self._model_filename
...@@ -463,52 +453,48 @@ class SaveLoadConfig(object): ...@@ -463,52 +453,48 @@ class SaveLoadConfig(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np import paddle
import paddle.fluid as fluid import paddle.nn as nn
from paddle.fluid.dygraph import Linear import paddle.optimizer as opt
from paddle.fluid.dygraph import declarative
class SimpleNet(fluid.dygraph.Layer): class SimpleNet(nn.Layer):
def __init__(self, in_size, out_size): def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__() super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size) self._linear = nn.Linear(in_size, out_size)
@declarative @paddle.jit.to_static
def forward(self, x): def forward(self, x):
y = self._linear(x) y = self._linear(x)
z = self._linear(y) z = self._linear(y)
return z return z
# enable dygraph mode # enable dygraph mode
fluid.enable_dygraph() paddle.disable_static()
# train model # train model
net = SimpleNet(8, 8) net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) adam = opt.Adam(learning_rate=0.1, parameters=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) x = paddle.randn([4, 8], 'float32')
for i in range(10): for i in range(10):
out = net(x) out = net(x)
loss = fluid.layers.mean(out) loss = paddle.tensor.mean(out)
loss.backward() loss.backward()
adam.minimize(loss) adam.step()
net.clear_gradients() adam.clear_grad()
model_path = "simplenet.example.model.params_filename" model_path = "simplenet.example.model.params_filename"
configs = fluid.dygraph.jit.SaveLoadConfig() config = paddle.SaveLoadConfig()
configs.params_filename = "__params__" config.params_filename = "__params__"
# saving with configs.params_filename # saving with configs.params_filename
fluid.dygraph.jit.save( paddle.jit.save(
layer=net, layer=net,
model_path=model_path, model_path=model_path,
input_spec=[x], config=config)
configs=configs)
# [result] the saved model directory contains:
# __model__ __params__ __variables.info__
# loading with configs.params_filename # loading with configs.params_filename
infer_net = fluid.dygraph.jit.load(model_path, configs=configs) infer_net = paddle.jit.load(model_path, config=config)
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) x = paddle.randn([4, 8], 'float32')
pred = infer_net(x) pred = infer_net(x)
""" """
return self._params_filename return self._params_filename
...@@ -542,52 +528,50 @@ class SaveLoadConfig(object): ...@@ -542,52 +528,50 @@ class SaveLoadConfig(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np import paddle
import paddle.fluid as fluid import paddle.nn as nn
from paddle.fluid.dygraph import Linear import paddle.optimizer as opt
from paddle.fluid.dygraph import declarative
class SimpleNet(fluid.dygraph.Layer): class SimpleNet(nn.Layer):
def __init__(self, in_size, out_size): def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__() super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size) self._linear = nn.Linear(in_size, out_size)
@declarative @paddle.jit.to_static
def forward(self, x): def forward(self, x):
y = self._linear(x) y = self._linear(x)
z = self._linear(y) z = self._linear(y)
return z return z
# enable dygraph mode # enable dygraph mode
fluid.enable_dygraph() paddle.disable_static()
# train model # train model
net = SimpleNet(8, 8) net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) adam = opt.Adam(learning_rate=0.1, parameters=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) x = paddle.randn([4, 8], 'float32')
for i in range(10): for i in range(10):
out = net(x) out = net(x)
loss = fluid.layers.mean(out) loss = paddle.tensor.mean(out)
loss.backward() loss.backward()
adam.minimize(loss) adam.step()
net.clear_gradients() adam.clear_grad()
model_path = "simplenet.example.model.separate_params" model_path = "simplenet.example.model.separate_params"
configs = fluid.dygraph.jit.SaveLoadConfig() config = paddle.jit.SaveLoadConfig()
configs.separate_params = True config.separate_params = True
# saving with configs.separate_params # saving with configs.separate_params
fluid.dygraph.jit.save( paddle.jit.save(
layer=net, layer=net,
model_path=model_path, model_path=model_path,
input_spec=[x], config=config)
configs=configs)
# [result] the saved model directory contains: # [result] the saved model directory contains:
# linear_0.b_0 linear_0.w_0 __model__ __variables.info__ # linear_0.b_0 linear_0.w_0 __model__ __variables.info__
# loading with configs.params_filename # loading with configs.params_filename
infer_net = fluid.dygraph.jit.load(model_path, configs=configs) infer_net = paddle.jit.load(model_path, config=config)
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) x = paddle.randn([4, 8], 'float32')
pred = infer_net(x) pred = infer_net(x)
""" """
return self._separate_params return self._separate_params
...@@ -600,9 +584,70 @@ class SaveLoadConfig(object): ...@@ -600,9 +584,70 @@ class SaveLoadConfig(object):
% type(value)) % type(value))
self._separate_params = value self._separate_params = value
@property
def keep_name_table(self):
"""
Configures whether keep ``structured_name -> parameter_name`` dict in loaded state dict.
This dict is the debugging information saved when call `paddle.save`.
It is generally only used for debugging and does not affect the actual training or inference.
By default, it will not be retained in `paddle.load` result. Default: False.
.. note::
Only used for ``paddle.load``.
Examples:
.. code-block:: python
import paddle
paddle.disable_static()
linear = paddle.nn.Linear(5, 1)
state_dict = linear.state_dict()
paddle.save(state_dict, "paddle_dy")
configs = paddle.SaveLoadConfig()
configs.keep_name_table = True
para_state_dict, _ = paddle.load("paddle_dy", configs)
print(para_state_dict)
# the name_table is 'StructuredToParameterName@@'
# {'bias': array([0.], dtype=float32),
# 'StructuredToParameterName@@':
# {'bias': u'linear_0.b_0', 'weight': u'linear_0.w_0'},
# 'weight': array([[ 0.04230034],
# [-0.1222527 ],
# [ 0.7392676 ],
# [-0.8136974 ],
# [ 0.01211023]], dtype=float32)}
"""
return self._keep_name_table
@keep_name_table.setter
def keep_name_table(self, value):
if not isinstance(value, bool):
raise TypeError(
"The SaveLoadConfig.keep_name_table should be bool value, but received input's type is %s."
% type(value))
self._keep_name_table = value
# NOTE(chenweihang): change jit.save/load argument `configs` to `config`
def deprecate_save_load_configs(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if 'configs' in kwargs:
kwargs['config'] = kwargs['configs']
kwargs.pop('configs')
return func(*args, **kwargs)
return wrapper
@deprecate_save_load_configs
@switch_to_static_graph @switch_to_static_graph
def save(layer, model_path, input_spec=None, configs=None): def save(layer, model_path, input_spec=None, config=None):
""" """
Saves input declarative Layer as :ref:`api_imperative_TranslatedLayer` Saves input declarative Layer as :ref:`api_imperative_TranslatedLayer`
format model, which can be used for inference or fine-tuning after loading. format model, which can be used for inference or fine-tuning after loading.
...@@ -627,7 +672,7 @@ def save(layer, model_path, input_spec=None, configs=None): ...@@ -627,7 +672,7 @@ def save(layer, model_path, input_spec=None, configs=None):
It is the example inputs that will be passed to saved TranslatedLayer's forward It is the example inputs that will be passed to saved TranslatedLayer's forward
function. If None, all input variables of the original Layer's forward function function. If None, all input variables of the original Layer's forward function
would be the inputs of the saved model. Default None. would be the inputs of the saved model. Default None.
configs (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object config (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object
that specifies additional configuration options. Default None. that specifies additional configuration options. Default None.
Returns: Returns:
None None
...@@ -636,65 +681,76 @@ def save(layer, model_path, input_spec=None, configs=None): ...@@ -636,65 +681,76 @@ def save(layer, model_path, input_spec=None, configs=None):
.. code-block:: python .. code-block:: python
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle
from paddle.fluid.dygraph import Linear import paddle.nn as nn
from paddle.fluid.dygraph import declarative import paddle.optimizer as opt
BATCH_SIZE = 32 BATCH_SIZE = 16
BATCH_NUM = 20 BATCH_NUM = 4
EPOCH_NUM = 4
def random_batch_reader(): IMAGE_SIZE = 784
def _get_random_images_and_labels(image_shape, label_shape): CLASS_NUM = 10
image = np.random.random(size=image_shape).astype('float32')
label = np.random.random(size=label_shape).astype('int64')
return image, label
def __reader__(): # define a random dataset
for _ in range(BATCH_NUM): class RandomDataset(paddle.io.Dataset):
batch_image, batch_label = _get_random_images_and_labels( def __init__(self, num_samples):
[BATCH_SIZE, 784], [BATCH_SIZE, 1]) self.num_samples = num_samples
yield batch_image, batch_label
return __reader__ def __getitem__(self, idx):
image = np.random.random([IMAGE_SIZE]).astype('float32')
label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
return image, label
class LinearNet(fluid.dygraph.Layer): def __len__(self):
def __init__(self, in_size, out_size): return self.num_samples
class LinearNet(nn.Layer):
def __init__(self):
super(LinearNet, self).__init__() super(LinearNet, self).__init__()
self._linear = Linear(in_size, out_size) self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
@declarative @paddle.jit.to_static
def forward(self, x): def forward(self, x):
return self._linear(x) return self._linear(x)
def train(layer, loader, loss_fn, opt):
for epoch_id in range(EPOCH_NUM):
for batch_id, (image, label) in enumerate(loader()):
out = layer(image)
loss = loss_fn(out, label)
loss.backward()
opt.step()
opt.clear_grad()
print("Epoch {} batch {}: loss = {}".format(
epoch_id, batch_id, np.mean(loss.numpy())))
# enable dygraph mode # enable dygraph mode
fluid.enable_dygraph() place = paddle.CPUPlace()
paddle.disable_static(place)
# create network # 1. train & save model.
net = LinearNet(784, 1)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
# create data loader
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(random_batch_reader())
# train
for data in train_loader():
img, label = data
label.stop_gradient = True
cost = net(img) # create network
layer = LinearNet()
loss_fn = nn.CrossEntropyLoss()
adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters())
loss = fluid.layers.cross_entropy(cost, label) # create data loader
avg_loss = fluid.layers.mean(loss) dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
loader = paddle.io.DataLoader(dataset,
places=place,
batch_size=BATCH_SIZE,
shuffle=True,
drop_last=True,
num_workers=2)
avg_loss.backward() # train
adam.minimize(avg_loss) train(layer, loader, loss_fn, adam)
net.clear_gradients()
# save model # save
model_path = "linear.example.model" model_path = "linear.example.model"
fluid.dygraph.jit.save( paddle.jit.save(layer, model_path)
layer=net,
model_path=model_path,
input_spec=[img])
""" """
def get_inout_spec(all_vars, target_vars, return_name=False): def get_inout_spec(all_vars, target_vars, return_name=False):
...@@ -728,6 +784,7 @@ def save(layer, model_path, input_spec=None, configs=None): ...@@ -728,6 +784,7 @@ def save(layer, model_path, input_spec=None, configs=None):
"The input layer of paddle.jit.save should be 'Layer', but received layer type is %s." "The input layer of paddle.jit.save should be 'Layer', but received layer type is %s."
% type(layer)) % type(layer))
configs = config
if configs is None: if configs is None:
configs = SaveLoadConfig() configs = SaveLoadConfig()
...@@ -819,8 +876,9 @@ def save(layer, model_path, input_spec=None, configs=None): ...@@ -819,8 +876,9 @@ def save(layer, model_path, input_spec=None, configs=None):
pickle.dump(extra_var_info, f, protocol=2) pickle.dump(extra_var_info, f, protocol=2)
@deprecate_save_load_configs
@dygraph_only @dygraph_only
def load(model_path, configs=None): def load(model_path, config=None):
""" """
:api_attr: imperative :api_attr: imperative
...@@ -837,7 +895,7 @@ def load(model_path, configs=None): ...@@ -837,7 +895,7 @@ def load(model_path, configs=None):
Args: Args:
model_path (str): The directory path where the model is saved. model_path (str): The directory path where the model is saved.
configs (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object that specifies config (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object that specifies
additional configuration options. Default None. additional configuration options. Default None.
Returns: Returns:
...@@ -849,122 +907,126 @@ def load(model_path, configs=None): ...@@ -849,122 +907,126 @@ def load(model_path, configs=None):
.. code-block:: python .. code-block:: python
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle
from paddle.fluid.dygraph import Linear import paddle.nn as nn
from paddle.fluid.dygraph import declarative import paddle.optimizer as opt
BATCH_SIZE = 32 BATCH_SIZE = 16
BATCH_NUM = 20 BATCH_NUM = 4
EPOCH_NUM = 4
def random_batch_reader(): IMAGE_SIZE = 784
def _get_random_images_and_labels(image_shape, label_shape): CLASS_NUM = 10
image = np.random.random(size=image_shape).astype('float32')
label = np.random.random(size=label_shape).astype('int64')
return image, label
def __reader__(): # define a random dataset
for _ in range(BATCH_NUM): class RandomDataset(paddle.io.Dataset):
batch_image, batch_label = _get_random_images_and_labels( def __init__(self, num_samples):
[BATCH_SIZE, 784], [BATCH_SIZE, 1]) self.num_samples = num_samples
yield batch_image, batch_label
return __reader__ def __getitem__(self, idx):
image = np.random.random([IMAGE_SIZE]).astype('float32')
label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
return image, label
class LinearNet(fluid.dygraph.Layer): def __len__(self):
def __init__(self, in_size, out_size): return self.num_samples
class LinearNet(nn.Layer):
def __init__(self):
super(LinearNet, self).__init__() super(LinearNet, self).__init__()
self._linear = Linear(in_size, out_size) self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
@declarative @paddle.jit.to_static
def forward(self, x): def forward(self, x):
return self._linear(x) return self._linear(x)
def train(layer, loader, loss_fn, opt):
for epoch_id in range(EPOCH_NUM):
for batch_id, (image, label) in enumerate(loader()):
out = layer(image)
loss = loss_fn(out, label)
loss.backward()
opt.step()
opt.clear_grad()
print("Epoch {} batch {}: loss = {}".format(
epoch_id, batch_id, np.mean(loss.numpy())))
# enable dygraph mode # enable dygraph mode
fluid.enable_dygraph() place = paddle.CPUPlace()
paddle.disable_static(place)
# 1. train & save model. # 1. train & save model.
# create network # create network
net = LinearNet(784, 1) layer = LinearNet()
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) loss_fn = nn.CrossEntropyLoss()
adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters())
# create data loader # create data loader
train_loader = fluid.io.DataLoader.from_generator(capacity=5) dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
train_loader.set_batch_generator(random_batch_reader()) loader = paddle.io.DataLoader(dataset,
# train places=place,
for data in train_loader(): batch_size=BATCH_SIZE,
img, label = data shuffle=True,
label.stop_gradient = True drop_last=True,
num_workers=2)
cost = net(img) # train
train(layer, loader, loss_fn, adam)
loss = fluid.layers.cross_entropy(cost, label) # save
avg_loss = fluid.layers.mean(loss) model_path = "linear.example.model"
paddle.jit.save(layer, model_path)
avg_loss.backward() # 2. load model
adam.minimize(avg_loss)
net.clear_gradients()
model_path = "linear.example.model" # load
fluid.dygraph.jit.save( loaded_layer = paddle.jit.load(model_path)
layer=net,
model_path=model_path,
input_spec=[img])
# 2. load model & inference
# load model
infer_net = fluid.dygraph.jit.load(model_path)
# inference # inference
x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32')) loaded_layer.eval()
pred = infer_net(x) x = paddle.randn([1, IMAGE_SIZE], 'float32')
pred = loaded_layer(x)
# 3. load model & fine-tune
# load model
train_net = fluid.dygraph.jit.load(model_path)
train_net.train()
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=train_net.parameters())
# create data loader
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(random_batch_reader())
# fine-tune # fine-tune
for data in train_loader(): loaded_layer.train()
img, label = data adam = opt.Adam(learning_rate=0.001, parameters=loaded_layer.parameters())
label.stop_gradient = True train(loaded_layer, loader, loss_fn, adam)
cost = train_net(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
train_net.clear_gradients()
2. Load model saved by :ref:`api_fluid_io_save_inference_model` then performing and fine-tune training. 2. Load model saved by :ref:`api_fluid_io_save_inference_model` then performing and fine-tune training.
.. code-block:: python .. code-block:: python
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.nn as nn
import paddle.optimizer as opt
BATCH_SIZE = 32 BATCH_SIZE = 16
BATCH_NUM = 20 BATCH_NUM = 4
EPOCH_NUM = 4
def random_batch_reader(): IMAGE_SIZE = 784
def _get_random_images_and_labels(image_shape, label_shape): CLASS_NUM = 10
image = np.random.random(size=image_shape).astype('float32')
label = np.random.random(size=label_shape).astype('int64') # define a random dataset
return image, label class RandomDataset(paddle.io.Dataset):
def __init__(self, num_samples):
self.num_samples = num_samples
def __reader__(): def __getitem__(self, idx):
for _ in range(BATCH_NUM): image = np.random.random([IMAGE_SIZE]).astype('float32')
batch_image, batch_label = _get_random_images_and_labels( label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
[BATCH_SIZE, 784], [BATCH_SIZE, 1]) return image, label
yield batch_image, batch_label
return __reader__ def __len__(self):
return self.num_samples
img = fluid.data(name='img', shape=[None, 784], dtype='float32') image = fluid.data(name='image', shape=[None, 784], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64') label = fluid.data(name='label', shape=[None, 1], dtype='int64')
pred = fluid.layers.fc(input=img, size=10, act='softmax') pred = fluid.layers.fc(input=image, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=pred, label=label) loss = fluid.layers.cross_entropy(input=pred, label=label)
avg_loss = fluid.layers.mean(loss) avg_loss = fluid.layers.mean(loss)
...@@ -975,9 +1037,15 @@ def load(model_path, configs=None): ...@@ -975,9 +1037,15 @@ def load(model_path, configs=None):
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
loader = fluid.io.DataLoader.from_generator( # create data loader
feed_list=[img, label], capacity=5, iterable=True) dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
loader.set_batch_generator(random_batch_reader(), places=place) loader = paddle.io.DataLoader(dataset,
feed_list=[image, label],
places=place,
batch_size=BATCH_SIZE,
shuffle=True,
drop_last=True,
num_workers=2)
# 1. train and save inference model # 1. train and save inference model
for data in loader(): for data in loader():
...@@ -988,39 +1056,42 @@ def load(model_path, configs=None): ...@@ -988,39 +1056,42 @@ def load(model_path, configs=None):
model_path = "fc.example.model" model_path = "fc.example.model"
fluid.io.save_inference_model( fluid.io.save_inference_model(
model_path, ["img"], [pred], exe) model_path, ["image"], [pred], exe)
# 2. load model
# enable dygraph mode # enable dygraph mode
fluid.enable_dygraph() paddle.disable_static(place)
# load
fc = paddle.jit.load(model_path)
# 2. load model & inference # inference
fc = fluid.dygraph.jit.load(model_path) fc.eval()
x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32')) x = paddle.randn([1, IMAGE_SIZE], 'float32')
pred = fc(x) pred = fc(x)
# 3. load model & fine-tune # fine-tune
fc = fluid.dygraph.jit.load(model_path)
fc.train() fc.train()
sgd = fluid.optimizer.SGD(learning_rate=0.001, loss_fn = nn.CrossEntropyLoss()
parameter_list=fc.parameters()) adam = opt.Adam(learning_rate=0.001, parameters=fc.parameters())
loader = paddle.io.DataLoader(dataset,
train_loader = fluid.io.DataLoader.from_generator(capacity=5) places=place,
train_loader.set_batch_generator( batch_size=BATCH_SIZE,
random_batch_reader(), places=place) shuffle=True,
drop_last=True,
for data in train_loader(): num_workers=2)
img, label = data for epoch_id in range(EPOCH_NUM):
label.stop_gradient = True for batch_id, (image, label) in enumerate(loader()):
out = fc(image)
cost = fc(img) loss = loss_fn(out, label)
loss.backward()
loss = fluid.layers.cross_entropy(cost, label) adam.step()
avg_loss = fluid.layers.mean(loss) adam.clear_grad()
print("Epoch {} batch {}: loss = {}".format(
avg_loss.backward() epoch_id, batch_id, np.mean(loss.numpy())))
sgd.minimize(avg_loss)
""" """
return TranslatedLayer._construct(model_path, configs) return TranslatedLayer._construct(model_path, config)
@dygraph_only @dygraph_only
......
...@@ -29,6 +29,9 @@ from .layer_object_helper import LayerObjectHelper ...@@ -29,6 +29,9 @@ from .layer_object_helper import LayerObjectHelper
from .base import program_desc_tracing_guard, param_guard from .base import program_desc_tracing_guard, param_guard
from paddle.fluid import framework from paddle.fluid import framework
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from paddle.fluid.executor import Executor, global_scope
from paddle.fluid.framework import in_dygraph_mode
from paddle.fluid.framework import _current_expected_place as _get_device
__all__ = ['Layer'] __all__ = ['Layer']
...@@ -797,7 +800,7 @@ class Layer(core.Layer): ...@@ -797,7 +800,7 @@ class Layer(core.Layer):
raise ValueError( raise ValueError(
"super(YourLayer, self).__init__() should be called first") "super(YourLayer, self).__init__() should be called first")
if len(self._loaddict_holder) > 0: if len(self._loaddict_holder) > 0:
assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format( assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in state_dict".format(
value.name) value.name)
value.set_value(self._loaddict_holder[value.name]) value.set_value(self._loaddict_holder[value.name])
...@@ -943,12 +946,13 @@ class Layer(core.Layer): ...@@ -943,12 +946,13 @@ class Layer(core.Layer):
destination = destination_temp destination = destination_temp
return destination return destination
def set_dict(self, @framework.deprecate_stat_dict
stat_dict, def set_state_dict(self,
state_dict,
include_sublayers=True, include_sublayers=True,
use_structured_name=True): use_structured_name=True):
''' '''
Set parameters and persistable buffers from stat_dict. All the parameters and buffers will be reset by the tensor in the stat_dict Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict
Parameters: Parameters:
state_dict(dict) : Dict contains all the parameters and persistable buffers. state_dict(dict) : Dict contains all the parameters and persistable buffers.
...@@ -961,72 +965,67 @@ class Layer(core.Layer): ...@@ -961,72 +965,67 @@ class Layer(core.Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.set_dict( para_state_dict )
'''
self.load_dict(
stat_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
def load_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters and persistable buffers from stat_dict. All the parameters and persistabl buffers will be reset by the tensor in the stat_dict
This api will be Deprecated. Please use set_dict
Parameters:
state_dict(dict) : Dict contains all the parameters and persistable buffers.
include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
Default: True
Returns:
None
Examples: paddle.disable_static()
.. code-block:: python
import paddle.fluid as fluid emb = paddle.nn.Embedding([10, 10])
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") paddle.save(state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy") para_state_dict, _ = paddle.load("paddle_dy")
emb.load_dict( para_state_dict ) emb.set_state_dict(para_state_dict)
''' '''
inner_state_dict = self.state_dict() def _check_match(key, param):
state = state_dict.get(key, None)
if state is None:
raise ValueError("{} is not found in the provided dict.".format(
key))
if list(state.shape) != list(param.shape):
raise ValueError(
"{} receives a shape {}, but the expected shape is {}.".
format(key, list(state.shape), list(param.shape)))
return param, state
matched_param_state = []
for key, param in self.state_dict().items():
key_name = key if use_structured_name else param.name
try:
match_res = _check_match(key_name, param)
matched_param_state.append(match_res)
except ValueError as err:
warnings.warn(("Skip loading for {}. ".format(key) + str(err)))
if in_dygraph_mode():
for param, state in matched_param_state:
param.set_value(state)
else:
for name, param_or_buffer in inner_state_dict.items(): def _set_var(var, ndarray):
key_name = name if use_structured_name else param_or_buffer.name t = global_scope().find_var(var.name).get_tensor()
if key_name in stat_dict: p = t._place()
param_or_buffer.set_value(stat_dict[key_name]) if p.is_cpu_place():
place = core.CPUPlace()
elif p.is_cuda_pinned_place():
place = core.CUDAPinnedPlace()
else: else:
raise RuntimeError( p = core.Place()
"Parameter or persistable buffer not found, Can't find [ {} ] in stat_dict" p.set_place(t._place())
"use_structured_name is set to [{}]".format( place = core.CUDAPlace(p.gpu_device_id())
key_name, use_structured_name)) t.set(ndarray, place)
unused_para_list = []
for k, v in stat_dict.items(): executor = Executor(_get_device())._default_executor
if k not in inner_state_dict: # restore parameter states
unused_para_list.append(k) core._create_loaded_parameter(
if len(unused_para_list) > 0: [param for param, state in matched_param_state],
warnings.warn( global_scope(), executor)
"Variables [ {} ] are not used, because not included in layers state_dict". for param, state in matched_param_state:
format(" ".join(unused_para_list))) _set_var(param, state)
# [aliases] Compatible with old method names
set_dict = set_state_dict
load_dict = set_state_dict
...@@ -97,7 +97,7 @@ class LearningRateDecay(object): ...@@ -97,7 +97,7 @@ class LearningRateDecay(object):
""" """
self.keys = ['step_num'] self.keys = ['step_num']
def set_dict(self, state_dict): def set_state_dict(self, state_dict):
""" """
Loads the schedulers state. Loads the schedulers state.
""" """
...@@ -114,6 +114,9 @@ class LearningRateDecay(object): ...@@ -114,6 +114,9 @@ class LearningRateDecay(object):
"There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict" "There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict"
) )
# [aliases] Compatible with old method names
set_dict = set_state_dict
def step(self): def step(self):
raise NotImplementedError() raise NotImplementedError()
......
...@@ -587,12 +587,13 @@ class DataParallel(layers.Layer): ...@@ -587,12 +587,13 @@ class DataParallel(layers.Layer):
include_sublayers=include_sublayers, include_sublayers=include_sublayers,
structured_name_prefix=structured_name_prefix) structured_name_prefix=structured_name_prefix)
def set_dict(self, @framework.deprecate_stat_dict
stat_dict, def set_state_dict(self,
state_dict,
include_sublayers=True, include_sublayers=True,
use_structured_name=True): use_structured_name=True):
''' '''
Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict Set parameters of self._layers from state_dict. All the parameters of self._layers will be reset by the tensor in the state_dict
Parameters: Parameters:
state_dict(dict) : Dict contains all the parameters state_dict(dict) : Dict contains all the parameters
...@@ -605,62 +606,27 @@ class DataParallel(layers.Layer): ...@@ -605,62 +606,27 @@ class DataParallel(layers.Layer):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
with fluid.dygraph.guard():
strategy=fluid.dygraph.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = fluid.dygraph.DataParallel(emb, strategy)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.set_dict( para_state_dict )
'''
self._layers.set_dict(
stat_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
def load_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
This api will be Deprecated. Please use set_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
Default: True
Returns:
None
Examples: paddle.disable_static()
.. code-block:: python
import paddle.fluid as fluid emb = paddle.nn.Embedding([10, 10])
with fluid.dygraph.guard():
strategy=fluid.dygraph.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = fluid.dygraph.DataParallel(emb, strategy) emb = fluid.dygraph.DataParallel(emb, strategy)
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") paddle.save(state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy") para_state_dict, _ = paddle.load("paddle_dy")
emb.load_dict( para_state_dict ) emb.set_state_dict(para_state_dict)
''' '''
self._layers.load_dict( self._layers.set_state_dict(
stat_dict, state_dict,
include_sublayers=include_sublayers, include_sublayers=include_sublayers,
use_structured_name=use_structured_name) use_structured_name=use_structured_name)
# [aliases] Compatible with old method names
set_dict = set_state_dict
load_dict = set_state_dict
...@@ -36,6 +36,7 @@ from . import core ...@@ -36,6 +36,7 @@ from . import core
from . import unique_name from . import unique_name
import paddle.version as fluid_version import paddle.version as fluid_version
import warnings import warnings
import functools
__all__ = [ __all__ = [
'Program', 'Program',
...@@ -238,6 +239,25 @@ def _fake_interface_only_(func): ...@@ -238,6 +239,25 @@ def _fake_interface_only_(func):
return __impl__ return __impl__
# NOTE(chenweihang): There is argument name typo (stat_dict, correct name is state_dict)
# in fluid api Layer.set_dict, Optimizer.load, in order to correct the argument without
# introducing compatibility issues, add this decorator
# NOTE(chenweihang): not using `wrap_decorator` here is because `wrap_decorator` will
# move kwargs to args, which doesn't work in this decorate case
def deprecate_stat_dict(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if 'stat_dict' in kwargs:
warnings.warn(
"The argument `stat_dict` has deprecated, please change it to `state_dict`.",
DeprecationWarning)
kwargs['state_dict'] = kwargs['stat_dict']
kwargs.pop('stat_dict')
return func(*args, **kwargs)
return wrapper
dygraph_not_support = wrap_decorator(_dygraph_not_support_) dygraph_not_support = wrap_decorator(_dygraph_not_support_)
dygraph_only = wrap_decorator(_dygraph_only_) dygraph_only = wrap_decorator(_dygraph_only_)
fake_interface_only = wrap_decorator(_fake_interface_only_) fake_interface_only = wrap_decorator(_fake_interface_only_)
......
...@@ -170,7 +170,7 @@ class Optimizer(object): ...@@ -170,7 +170,7 @@ class Optimizer(object):
return state_dict return state_dict
@framework.dygraph_only @framework.dygraph_only
def set_dict(self, state_dict): def set_state_dict(self, state_dict):
''' '''
Load optimizer state dict. For Adam optimizer, contains beta1, beta2, momentum etc. If LearningRateDecay have been used, global_step will be changed. Load optimizer state dict. For Adam optimizer, contains beta1, beta2, momentum etc. If LearningRateDecay have been used, global_step will be changed.
...@@ -182,20 +182,22 @@ class Optimizer(object): ...@@ -182,20 +182,22 @@ class Optimizer(object):
Examples: Examples:
.. code-block:: python .. code-block:: python
with fluid.dygraph.guard(): import paddle
emb = fluid.dygraph.Embedding([10, 10])
paddle.disable_static()
emb = paddle.nn.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph(state_dict, "paddle_dy") paddle.save(state_dict, "paddle_dy")
adam = fluid.optimizer.Adam(learning_rate=fluid.layers.noam_decay( 100, 10000), adam = paddle.optimizer.Adam(learning_rate=fluid.layers.noam_decay( 100, 10000),
parameter_list=emb.parameters()) parameter_list=emb.parameters())
state_dict = adam.state_dict() state_dict = adam.state_dict()
fluid.save_dygraph(state_dict, "paddle_dy")
para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy") para_state_dict, opti_state_dict = paddle.load("paddle_dy")
adam.set_dict(opti_state_dict) adam.set_state_dict(opti_state_dict)
''' '''
from paddle.optimizer.lr_scheduler import _LRScheduler from paddle.optimizer.lr_scheduler import _LRScheduler
...@@ -257,6 +259,9 @@ class Optimizer(object): ...@@ -257,6 +259,9 @@ class Optimizer(object):
tensor.set(load_para_np, framework._current_expected_place()) tensor.set(load_para_np, framework._current_expected_place())
# [aliases] Compatible with old method names
set_dict = set_state_dict
def get_opti_var_name_list(self): def get_opti_var_name_list(self):
return self._opti_name_list return self._opti_name_list
...@@ -4595,7 +4600,8 @@ class RecomputeOptimizer(Optimizer): ...@@ -4595,7 +4600,8 @@ class RecomputeOptimizer(Optimizer):
), "_checkpoints should be a list of Variable or a list of String" ), "_checkpoints should be a list of Variable or a list of String"
self._checkpoints = checkpoints self._checkpoints = checkpoints
def load(self, stat_dict): @framework.deprecate_stat_dict
def load(self, state_dict):
""" """
:api_attr: Static Graph :api_attr: Static Graph
...@@ -4603,7 +4609,7 @@ class RecomputeOptimizer(Optimizer): ...@@ -4603,7 +4609,7 @@ class RecomputeOptimizer(Optimizer):
:return: None :return: None
Args: Args:
stat_dict: the dict load by load_persistable method state_dict: the dict load by load_persistable method
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -4627,8 +4633,8 @@ class RecomputeOptimizer(Optimizer): ...@@ -4627,8 +4633,8 @@ class RecomputeOptimizer(Optimizer):
sgd = fluid.optimizer.RecomputeOptimizer(sgd) sgd = fluid.optimizer.RecomputeOptimizer(sgd)
sgd._set_checkpoints([fc_1, pred]) sgd._set_checkpoints([fc_1, pred])
try: try:
stat_dict = {} state_dict = {}
sgd.load(stat_dict) sgd.load(state_dict)
except NotImplementedError as e: except NotImplementedError as e:
print(cpt.get_exception_message(e)) print(cpt.get_exception_message(e))
""" """
......
...@@ -43,7 +43,7 @@ class TestDirectory(unittest.TestCase): ...@@ -43,7 +43,7 @@ class TestDirectory(unittest.TestCase):
'paddle.distributed.prepare_context', 'paddle.DataParallel', 'paddle.distributed.prepare_context', 'paddle.DataParallel',
'paddle.jit', 'paddle.jit.TracedLayer', 'paddle.jit.to_static', 'paddle.jit', 'paddle.jit.TracedLayer', 'paddle.jit.to_static',
'paddle.jit.ProgramTranslator', 'paddle.jit.TranslatedLayer', 'paddle.jit.ProgramTranslator', 'paddle.jit.TranslatedLayer',
'paddle.jit.save', 'paddle.jit.load', 'paddle.jit.SaveLoadConfig', 'paddle.jit.save', 'paddle.jit.load', 'paddle.SaveLoadConfig',
'paddle.NoamDecay', 'paddle.PiecewiseDecay', 'paddle.NoamDecay', 'paddle.PiecewiseDecay',
'paddle.NaturalExpDecay', 'paddle.ExponentialDecay', 'paddle.NaturalExpDecay', 'paddle.ExponentialDecay',
'paddle.InverseTimeDecay', 'paddle.PolynomialDecay', 'paddle.InverseTimeDecay', 'paddle.PolynomialDecay',
......
...@@ -374,8 +374,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -374,8 +374,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
adam._learning_rate.step_num = 0 adam._learning_rate.step_num = 0
para_state_dict, opti_state_dict = paddle.load("./test_dy") para_state_dict, opti_state_dict = paddle.load("./test_dy")
print(opti_state_dict['LR_Scheduler']) adam.set_state_dict(opti_state_dict)
adam.set_dict(opti_state_dict)
opti_dict = adam.state_dict() opti_dict = adam.state_dict()
for k, v in opti_dict.items(): for k, v in opti_dict.items():
...@@ -393,7 +392,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -393,7 +392,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
var.set(np.zeros_like(np_t), place) var.set(np.zeros_like(np_t), place)
ptb_model.set_dict(para_state_dict) ptb_model.set_state_dict(stat_dict=para_state_dict)
state_dict = ptb_model.state_dict() state_dict = ptb_model.state_dict()
...@@ -483,7 +482,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -483,7 +482,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
if isinstance(adam._learning_rate, LearningRateDecay): if isinstance(adam._learning_rate, LearningRateDecay):
adam._learning_rate.step_num = 0 adam._learning_rate.step_num = 0
adam.set_dict(self.opti_dict) adam.set_state_dict(self.opti_dict)
opti_dict = adam.state_dict() opti_dict = adam.state_dict()
for k, v in opti_dict.items(): for k, v in opti_dict.items():
if isinstance(v, core.VarBase): if isinstance(v, core.VarBase):
...@@ -500,7 +499,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -500,7 +499,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
var.set(np.zeros_like(np_t), place) var.set(np.zeros_like(np_t), place)
ptb_model.set_dict(self.state_dict) ptb_model.set_state_dict(self.state_dict)
state_dict = ptb_model.state_dict() state_dict = ptb_model.state_dict()
...@@ -593,7 +592,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -593,7 +592,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
if isinstance(adam._learning_rate, LearningRateDecay): if isinstance(adam._learning_rate, LearningRateDecay):
adam._learning_rate.step_num = 0 adam._learning_rate.step_num = 0
adam.set_dict(np_opti_dict) adam.set_state_dict(np_opti_dict)
opti_dict = adam.state_dict() opti_dict = adam.state_dict()
for k, v in opti_dict.items(): for k, v in opti_dict.items():
...@@ -613,7 +612,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -613,7 +612,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
var.set(np.zeros_like(np_t), place) var.set(np.zeros_like(np_t), place)
ptb_model.set_dict(np_state_dict) ptb_model.set_state_dict(np_state_dict)
state_dict = ptb_model.state_dict() state_dict = ptb_model.state_dict()
...@@ -656,8 +655,8 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -656,8 +655,8 @@ class TestDygraphPtbRnn(unittest.TestCase):
last_hidden = None last_hidden = None
last_cell = None last_cell = None
adam.set_dict(self.opti_dict) adam.set_state_dict(self.opti_dict)
ptb_model.set_dict(self.state_dict) ptb_model.set_state_dict(self.state_dict)
for i in range(1): for i in range(1):
x_data = np.arange(12).reshape(4, 3).astype('int64') x_data = np.arange(12).reshape(4, 3).astype('int64')
...@@ -745,8 +744,8 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -745,8 +744,8 @@ class TestDygraphPtbRnn(unittest.TestCase):
last_cell = None last_cell = None
state_dict, opti_dict = fluid.load_dygraph("./test_dy") state_dict, opti_dict = fluid.load_dygraph("./test_dy")
adam.set_dict(opti_dict) adam.set_state_dict(opti_dict)
ptb_model.set_dict(state_dict) ptb_model.set_state_dict(state_dict)
for i in range(1): for i in range(1):
x_data = np.arange(12).reshape(4, 3).astype('int64') x_data = np.arange(12).reshape(4, 3).astype('int64')
...@@ -849,8 +848,8 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -849,8 +848,8 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k, v in self.state_dict.items(): for k, v in self.state_dict.items():
np_state_dict[k] = v.numpy() np_state_dict[k] = v.numpy()
adam.set_dict(np_opti_dict) adam.set_state_dict(np_opti_dict)
ptb_model.set_dict(np_state_dict) ptb_model.set_state_dict(np_state_dict)
for i in range(1): for i in range(1):
x_data = np.arange(12).reshape(4, 3).astype('int64') x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
...@@ -912,6 +911,22 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -912,6 +911,22 @@ class TestDygraphPtbRnn(unittest.TestCase):
para_state_dict, opti_state_dict = paddle.load( para_state_dict, opti_state_dict = paddle.load(
os.path.join('saved_dy', 'emb_dy.pdopt')) os.path.join('saved_dy', 'emb_dy.pdopt'))
def test_load_compatible_with_keep_name_table(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy'))
para_state_dict, opti_state_dict = paddle.load(
os.path.join('saved_dy', 'emb_dy'), True)
self.assertTrue(para_state_dict != None)
self.assertTrue(opti_state_dict == None)
para_state_dict, opti_state_dict = paddle.load(
os.path.join('saved_dy', 'emb_dy'), keep_name_table=True)
self.assertTrue(para_state_dict != None)
self.assertTrue(opti_state_dict == None)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -917,6 +917,29 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -917,6 +917,29 @@ class TestDygraphPtbRnn(unittest.TestCase):
para_state_dict, opti_state_dict = paddle.load( para_state_dict, opti_state_dict = paddle.load(
os.path.join('saved_dy', 'emb_dy.pdopt')) os.path.join('saved_dy', 'emb_dy.pdopt'))
def test_no_state_in_input_dict(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy'))
para_state_dict, _ = paddle.load(os.path.join('saved_dy', 'emb_dy'))
para_state_dict.pop('weight')
emb.set_state_dict(para_state_dict)
def test_state_shape_mismatch(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy'))
para_state_dict, _ = paddle.load(os.path.join('saved_dy', 'emb_dy'))
para_state_dict['weight'] = np.expand_dims(
para_state_dict['weight'], axis=-1)
emb.set_state_dict(para_state_dict)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -183,25 +183,6 @@ class TestJitSaveLoad(unittest.TestCase): ...@@ -183,25 +183,6 @@ class TestJitSaveLoad(unittest.TestCase):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
model_dict, _ = fluid.dygraph.load_dygraph(model_path) model_dict, _ = fluid.dygraph.load_dygraph(model_path)
def test_load_dygraph_no_var_info(self):
model_path = "model.test_jit_save_load.no_var_info"
self.train_and_save_model(model_path=model_path)
# remove `__variables.info__`
var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME)
os.remove(var_info_path)
new_layer = LinearNet(784, 1)
with self.assertRaises(RuntimeError):
model_dict, _ = fluid.dygraph.load_dygraph(model_path)
def test_load_dygraph_not_var_file(self):
model_path = "model.test_jit_save_load.no_var_file"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.params_filename = "__params__"
self.train_and_save_model(model_path=model_path, configs=configs)
new_layer = LinearNet(784, 1)
with self.assertRaises(RuntimeError):
model_dict, _ = fluid.dygraph.load_dygraph(model_path)
class LinearNetMultiInput(fluid.dygraph.Layer): class LinearNetMultiInput(fluid.dygraph.Layer):
def __init__(self, in_size, out_size): def __init__(self, in_size, out_size):
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import six
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from test_imperative_base import new_program_scope
def convolutional_neural_network(img):
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu")
conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu")
prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
return prediction
def static_train_net(img, label):
prediction = convolutional_neural_network(img)
loss = fluid.layers.cross_entropy(input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
optimizer = fluid.optimizer.SGD(learning_rate=0.001)
optimizer.minimize(avg_loss)
return prediction, avg_loss
class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase):
def setUp(self):
self.seed = 90
self.epoch_num = 1
self.batch_size = 128
self.batch_num = 10
def train_and_save_model(self):
with new_program_scope():
startup_program = fluid.default_startup_program()
main_program = fluid.default_main_program()
img = fluid.data(
name='img', shape=[None, 1, 28, 28], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
prediction, avg_loss = static_train_net(img, label)
place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
exe.run(startup_program)
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=100),
batch_size=self.batch_size)
for _ in range(0, self.epoch_num):
for batch_id, data in enumerate(train_reader()):
exe.run(main_program,
feed=feeder.feed(data),
fetch_list=[avg_loss])
if batch_id > self.batch_num:
break
static_param_dict = {}
for param in fluid.default_main_program().all_parameters():
static_param_dict[param.name] = fluid.executor._fetch_var(
param.name)
fluid.io.save_inference_model(
self.save_dirname, ["img"], [prediction],
exe,
model_filename=self.model_filename,
params_filename=self.params_filename)
return static_param_dict
def check_load_state_dict(self, orig_dict, load_dict):
for var_name, value in six.iteritems(orig_dict):
self.assertTrue(np.array_equal(value, load_dict[var_name]))
def test_load_default(self):
self.save_dirname = "static_mnist.load_state_dict.default"
self.model_filename = None
self.params_filename = None
orig_param_dict = self.train_and_save_model()
configs = paddle.SaveLoadConfig()
configs.separate_params = True
load_param_dict, _ = paddle.load(self.save_dirname, configs)
self.check_load_state_dict(orig_param_dict, load_param_dict)
def test_load_with_model_filename(self):
self.save_dirname = "static_mnist.load_state_dict.model_filename"
self.model_filename = "static_mnist.model"
self.params_filename = None
orig_param_dict = self.train_and_save_model()
configs = paddle.SaveLoadConfig()
configs.separate_params = True
configs.model_filename = self.model_filename
load_param_dict, _ = paddle.load(self.save_dirname, configs)
self.check_load_state_dict(orig_param_dict, load_param_dict)
def test_load_with_param_filename(self):
self.save_dirname = "static_mnist.load_state_dict.param_filename"
self.model_filename = None
self.params_filename = "static_mnist.params"
orig_param_dict = self.train_and_save_model()
configs = paddle.SaveLoadConfig()
configs.params_filename = self.params_filename
load_param_dict, _ = paddle.load(self.save_dirname, configs)
self.check_load_state_dict(orig_param_dict, load_param_dict)
def test_load_with_model_and_param_filename(self):
self.save_dirname = "static_mnist.load_state_dict.model_and_param_filename"
self.model_filename = "static_mnist.model"
self.params_filename = "static_mnist.params"
orig_param_dict = self.train_and_save_model()
configs = paddle.SaveLoadConfig()
configs.params_filename = self.params_filename
configs.model_filename = self.model_filename
load_param_dict, _ = paddle.load(self.save_dirname, configs)
self.check_load_state_dict(orig_param_dict, load_param_dict)
if __name__ == '__main__':
unittest.main()
...@@ -832,8 +832,8 @@ class TestRecomputeOptimizer(unittest.TestCase): ...@@ -832,8 +832,8 @@ class TestRecomputeOptimizer(unittest.TestCase):
recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer) recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer)
recompute_optimizer._set_checkpoints([b1_out]) recompute_optimizer._set_checkpoints([b1_out])
try: try:
stat_dict = {} state_dict = {}
recompute_optimizer.load(stat_dict) recompute_optimizer.load(state_dict)
except NotImplementedError as e: except NotImplementedError as e:
self.assertEqual( self.assertEqual(
"load function is not supported by Recompute Optimizer for now", "load function is not supported by Recompute Optimizer for now",
......
...@@ -20,8 +20,8 @@ __all__ = [ ...@@ -20,8 +20,8 @@ __all__ = [
] ]
__all__ += [ __all__ += [
'grad', 'LayerList', 'load', 'save', 'to_variable', 'no_grad', 'grad', 'LayerList', 'load', 'save', 'SaveLoadConfig', 'to_variable',
'DataParallel' 'no_grad', 'DataParallel'
] ]
__all__ += [ __all__ += [
...@@ -50,6 +50,7 @@ from ..fluid.dygraph.base import to_variable #DEFINE_ALIAS ...@@ -50,6 +50,7 @@ from ..fluid.dygraph.base import to_variable #DEFINE_ALIAS
from ..fluid.dygraph.base import grad #DEFINE_ALIAS from ..fluid.dygraph.base import grad #DEFINE_ALIAS
from ..fluid.dygraph.checkpoint import load_dygraph as load #DEFINE_ALIAS from ..fluid.dygraph.checkpoint import load_dygraph as load #DEFINE_ALIAS
from ..fluid.dygraph.checkpoint import save_dygraph as save #DEFINE_ALIAS from ..fluid.dygraph.checkpoint import save_dygraph as save #DEFINE_ALIAS
from ..fluid.dygraph.jit import SaveLoadConfig #DEFINE_ALIAS
from ..fluid.dygraph.parallel import DataParallel #DEFINE_ALIAS from ..fluid.dygraph.parallel import DataParallel #DEFINE_ALIAS
from ..fluid.dygraph.learning_rate_scheduler import NoamDecay #DEFINE_ALIAS from ..fluid.dygraph.learning_rate_scheduler import NoamDecay #DEFINE_ALIAS
......
...@@ -19,10 +19,7 @@ from . import model_summary ...@@ -19,10 +19,7 @@ from . import model_summary
from . import model from . import model
from .model import * from .model import *
from .model_summary import summary from .model_summary import summary
from .dygraph_layer_patch import monkey_patch_layer
logger.setup_logger() logger.setup_logger()
__all__ = ['callbacks'] + model.__all__ + ['summary'] __all__ = ['callbacks'] + model.__all__ + ['summary']
monkey_patch_layer()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
import paddle.fluid as fluid
from paddle.fluid.framework import in_dygraph_mode
from paddle.fluid.framework import _current_expected_place as _get_device
def monkey_patch_layer():
def load_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters from stat_dict. All the parameters will be reset by the
tensor in the stat_dict
This api will be Deprecated. Please use set_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the
parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name
as key, otherwise, use parameter name as key. Default: True
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.load_dict( para_state_dict )
'''
def _check_match(key, param):
state = stat_dict.get(key, None)
if state is None:
raise ValueError(
"{} is not found in the providing file.".format(key))
if list(state.shape) != list(param.shape):
raise ValueError(
"{} receives a shape {}, but the expected shape is {}.".
format(key, list(state.shape), list(param.shape)))
return param, state
matched_param_state = []
for key, param in self.state_dict().items():
key_name = key if use_structured_name else param.name
try:
match_res = _check_match(key_name, param)
matched_param_state.append(match_res)
except ValueError as err:
warnings.warn(("Skip loading for {}. ".format(key) + str(err)))
if in_dygraph_mode():
for param, state in matched_param_state:
param.set_value(state)
else:
def _set_var(var, ndarray):
t = fluid.global_scope().find_var(var.name).get_tensor()
p = t._place()
if p.is_cpu_place():
place = fluid.CPUPlace()
elif p.is_cuda_pinned_place():
place = fluid.CUDAPinnedPlace()
else:
p = fluid.core.Place()
p.set_place(t._place())
place = fluid.CUDAPlace(p.gpu_device_id())
t.set(ndarray, place)
executor = fluid.Executor(_get_device())._default_executor
# restore parameter states
fluid.core._create_loaded_parameter(
[param for param, state in matched_param_state],
fluid.global_scope(), executor)
for param, state in matched_param_state:
_set_var(param, state)
setattr(fluid.dygraph.Layer, 'load_dict', load_dict)
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
from ..fluid.dygraph.jit import save #DEFINE_ALIAS from ..fluid.dygraph.jit import save #DEFINE_ALIAS
from ..fluid.dygraph.jit import load #DEFINE_ALIAS from ..fluid.dygraph.jit import load #DEFINE_ALIAS
from ..fluid.dygraph.jit import SaveLoadConfig #DEFINE_ALIAS
from ..fluid.dygraph.jit import TracedLayer #DEFINE_ALIAS from ..fluid.dygraph.jit import TracedLayer #DEFINE_ALIAS
from ..fluid.dygraph.jit import set_code_level #DEFINE_ALIAS from ..fluid.dygraph.jit import set_code_level #DEFINE_ALIAS
from ..fluid.dygraph.jit import set_verbosity #DEFINE_ALIAS from ..fluid.dygraph.jit import set_verbosity #DEFINE_ALIAS
...@@ -23,6 +22,6 @@ from ..fluid.dygraph import ProgramTranslator #DEFINE_ALIAS ...@@ -23,6 +22,6 @@ from ..fluid.dygraph import ProgramTranslator #DEFINE_ALIAS
from ..fluid.dygraph.io import TranslatedLayer #DEFINE_ALIAS from ..fluid.dygraph.io import TranslatedLayer #DEFINE_ALIAS
__all__ = [ __all__ = [
'save', 'load', 'SaveLoadConfig', 'TracedLayer', 'to_static', 'save', 'load', 'TracedLayer', 'to_static', 'ProgramTranslator',
'ProgramTranslator', 'TranslatedLayer', 'set_code_level', 'set_verbosity' 'TranslatedLayer', 'set_code_level', 'set_verbosity'
] ]
...@@ -109,7 +109,7 @@ class _LRScheduler(object): ...@@ -109,7 +109,7 @@ class _LRScheduler(object):
""" """
self.keys = ['last_epoch', 'last_lr'] self.keys = ['last_epoch', 'last_lr']
def set_dict(self, state_dict): def set_state_dict(self, state_dict):
""" """
Loads the schedulers state. Loads the schedulers state.
""" """
...@@ -126,8 +126,8 @@ class _LRScheduler(object): ...@@ -126,8 +126,8 @@ class _LRScheduler(object):
"There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict" "There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict"
) )
# alias for set_dict # alias for set_state_dict
set_state_dict = set_dict set_dict = set_state_dict
def get_lr(self): def get_lr(self):
# calculate by python float # calculate by python float
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册