未验证 提交 29861846 编写于 作者: C Chen Weihang 提交者: GitHub

Update 2.0 Save/Load API names/arguments/doc examples (#27138)

* Update set_dict method name & add aliases (#26700)

* update set_dict method name & add aliases

* fix var name error

* fix alias formats

* use set_state_dict in unittest

* add decorator solve compatible problem

* polish decorator

* replace layer set_state_dict by patched method

* remove import monkey path layer

* fix import function error

* add unittest for coverage

* Support load state dict form `inference model` format save result (#26718)

* support load infer model format state dict

* add unittests

* remove keep name table

* recolve circle inport

* fix compatible problem

* recover unittest

* polish doc and comment

* Change jit.save/load configs to config & update code examples (#27056)

* change configs to config & update examples

* fix deprecate decorator conflict
上级 0072490f
......@@ -232,6 +232,7 @@ from .framework import grad #DEFINE_ALIAS
from .framework import no_grad #DEFINE_ALIAS
from .framework import save #DEFINE_ALIAS
from .framework import load #DEFINE_ALIAS
from .framework import SaveLoadConfig #DEFINE_ALIAS
from .framework import DataParallel #DEFINE_ALIAS
from .framework import NoamDecay #DEFINE_ALIAS
......
......@@ -16,13 +16,16 @@ from __future__ import print_function
import os
import collections
import functools
from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter, ParamBase, _varbase_creator, _dygraph_tracer
import pickle
import six
from . import learning_rate_scheduler
import warnings
from .. import core
from paddle.fluid.dygraph.io import VARIABLE_FILENAME, EXTRA_VAR_INFO_FILENAME, _load_persistable_vars
from .base import guard
from paddle.fluid.dygraph.jit import SaveLoadConfig, deprecate_save_load_configs
from paddle.fluid.dygraph.io import _construct_program_holders, _construct_params_and_buffers
__all__ = [
'save_dygraph',
......@@ -30,6 +33,37 @@ __all__ = [
]
# NOTE(chenweihang): deprecate load_dygraph's argument keep_name_table,
# ensure compatibility when user still use keep_name_table argument
def deprecate_keep_name_table(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
def __warn_and_build_configs__(keep_name_table):
warnings.warn(
"The argument `keep_name_table` has deprecated, please use `SaveLoadConfig.keep_name_table`.",
DeprecationWarning)
config = SaveLoadConfig()
config.keep_name_table = keep_name_table
return config
# deal with arg `keep_name_table`
if len(args) > 1 and isinstance(args[1], bool):
args = list(args)
args[1] = __warn_and_build_configs__(args[1])
# deal with kwargs
elif 'keep_name_table' in kwargs:
kwargs['config'] = __warn_and_build_configs__(kwargs[
'keep_name_table'])
kwargs.pop('keep_name_table')
else:
# do nothing
pass
return func(*args, **kwargs)
return wrapper
@dygraph_only
def save_dygraph(state_dict, model_path):
'''
......@@ -100,17 +134,28 @@ def save_dygraph(state_dict, model_path):
# TODO(qingqing01): remove dygraph_only to support loading static model.
# maybe need to unify the loading interface after 2.0 API is ready.
#@dygraph_only
def load_dygraph(model_path, keep_name_table=False):
# @dygraph_only
@deprecate_save_load_configs
@deprecate_keep_name_table
def load_dygraph(model_path, config=None):
'''
:api_attr: imperative
Load parameter state_dict from disk.
Load parameter state dict from disk.
.. note::
Due to some historical reasons, if you load ``state_dict`` from the saved
result of `paddle.io.save_inference_model`, the structured variable name
will cannot be restored. You need to set the argument `use_structured_name=False`
when using `Layer.set_state_dict` later.
Args:
model_path(str) : The file prefix store the state_dict. (The path should Not contain suffix '.pdparams')
keep_name_table(bool, optional) : Whether keep structed name to parameter name conversion table in output dict.
Default : False
model_path(str) : The file prefix store the state_dict.
(The path should Not contain suffix '.pdparams')
config (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig`
object that specifies additional configuration options, these options
are for compatibility with ``jit.save/io.save_inference_model`` formats.
Default None.
Returns:
state_dict(dict) : the dict store the state_dict
......@@ -118,23 +163,27 @@ def load_dygraph(model_path, keep_name_table=False):
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
paddle.disable_static()
emb = paddle.nn.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
paddle.save(state_dict, "paddle_dy")
adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000),
parameter_list = emb.parameters() )
scheduler = paddle.optimizer.lr_scheduler.NoamLR(
d_model=0.01, warmup_steps=100, verbose=True)
adam = paddle.optimizer.Adam(
learning_rate=scheduler,
parameters=emb.parameters())
state_dict = adam.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
paddle.save(state_dict, "paddle_dy")
para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy")
para_state_dict, opti_state_dict = paddle.load("paddle_dy")
'''
# deal with argument `model_path`
model_prefix = model_path
if model_prefix.endswith(".pdparams"):
model_prefix = model_prefix[:-9]
......@@ -145,66 +194,45 @@ def load_dygraph(model_path, keep_name_table=False):
opti_dict = None
params_file_path = model_prefix + ".pdparams"
opti_file_path = model_prefix + ".pdopt"
# deal with argument `configs`
configs = config
if configs is None:
configs = SaveLoadConfig()
if not os.path.exists(params_file_path) and not os.path.exists(
opti_file_path):
# Load state dict by `jit.save` save format
# TODO(chenweihang): [Why not support `io.save_infernece_model` save format here]
# Load state dict by `jit.save/io.save_inference_model` save format
# NOTE(chenweihang): [ Compatibility of save_inference_model save format ]
# The model saved by `save_inference_model` does not completely correspond to
# the information required by the `state_dict` under the dygraph.
# Although we reluctantly restore the `state_dict` in some scenarios,
# this may not be complete and there are some limitations, so this function
# will be considered later. The limitations include:
# 1. `save_inference_model` not save structured name, we need to remind
# the user to configure the `use_structured_name` argument when `set_dict`,
# but this argument is currently not public
# 2. if `save_inference_model` save all persistable variables in a single file,
# user need to give the variable name list to load `state_dict`
# `save_inference_model` not save structured name, we need to remind
# the user to configure the `use_structured_name` argument when `set_state_dict`
# NOTE(chenweihang): `jit.save` doesn't save optimizer state
# 1. check model path
if not os.path.isdir(model_prefix):
raise ValueError("Model saved directory '%s' is not exists." %
model_prefix)
# 2. load `__variables.info__`
var_info_path = os.path.join(model_prefix, EXTRA_VAR_INFO_FILENAME)
if not os.path.exists(var_info_path):
raise RuntimeError(
"No target can be loaded. Now only supports loading `state_dict` from "
"the result saved by `imperative.save` and `imperative.jit.save`."
)
with open(var_info_path, 'rb') as f:
extra_var_info = pickle.load(f)
# 3. load `__variables__`
# TODO(chenweihang): now only supports loading from default save format:
# - all persistable vars saved in one file named `__variables__`
# for other case, we may need to modify the arguments of this API
var_file_path = os.path.join(model_prefix, VARIABLE_FILENAME)
if not os.path.exists(var_file_path):
raise RuntimeError(
"The parameter file to be loaded was not found. "
"Now only supports loading from the default save format, "
"and does not support custom params_filename and "
"save parameters separately.")
# 4. load all persistable vars
load_var_list = []
for name in sorted(extra_var_info):
var = _varbase_creator(name=name, persistable=True)
load_var_list.append(var)
_dygraph_tracer().trace_op(
type='load_combine',
inputs={},
outputs={'Out': load_var_list},
attrs={'file_path': var_file_path})
# 5. construct state_dict
# 2. load program desc & construct _ProgramHolder
programs = _construct_program_holders(model_path,
configs.model_filename)
# 3. load layer parameters & buffers
# NOTE: using fluid.dygraph.guard() here will cause import error in py2
with guard():
persistable_var_dict = _construct_params_and_buffers(
model_prefix,
programs,
configs.separate_params,
configs.params_filename,
append_suffix=False)
# 4. construct state_dict
para_dict = dict()
for var in load_var_list:
structured_name = extra_var_info[var.name].get('structured_name',
None)
if structured_name is None:
raise RuntimeError(
"Cannot find saved variable (%s)'s structured name in saved model.",
var.name)
para_dict[structured_name] = var.numpy()
# NOTE: `jit.save` doesn't save optimizer state
for var_name in persistable_var_dict:
para_dict[var_name] = persistable_var_dict[var_name].numpy()
else:
# Load state dict by `save_dygraph` save format
para_dict = {}
......@@ -213,7 +241,7 @@ def load_dygraph(model_path, keep_name_table=False):
para_dict = pickle.load(f) if six.PY2 else pickle.load(
f, encoding='latin1')
if not keep_name_table and "StructuredToParameterName@@" in para_dict:
if not configs.keep_name_table and "StructuredToParameterName@@" in para_dict:
del para_dict["StructuredToParameterName@@"]
if os.path.exists(opti_file_path):
......
......@@ -488,6 +488,15 @@ def _load_persistable_vars(model_path,
return load_var_dict
# NOTE(chenweihang): to adapt paddle.load to get state_dict
def _remove_varname_suffix(var_dict, program_holder):
no_suffix_var_dict = dict()
for var_name in var_dict:
no_suffix_name = program_holder._suffix_varname_dict[var_name]
no_suffix_var_dict[no_suffix_name] = var_dict[var_name]
return no_suffix_var_dict
def _construct_program_holders(model_path, model_filename=None):
# make sure the path has been checked
program_holder_dict = dict()
......@@ -517,7 +526,8 @@ def _construct_program_holders(model_path, model_filename=None):
def _construct_params_and_buffers(model_path,
programs,
separate_params=False,
params_filename=None):
params_filename=None,
append_suffix=True):
var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME)
if os.path.exists(var_info_path):
var_dict = _load_persistable_vars(model_path, var_info_path,
......@@ -526,6 +536,10 @@ def _construct_params_and_buffers(model_path,
else:
var_dict = _load_persistable_vars_by_program(
model_path, programs['forward'], params_filename)
if not append_suffix:
var_dict = _remove_varname_suffix(var_dict, programs['forward'])
return var_dict
......@@ -685,7 +699,7 @@ class TranslatedLayer(layers.Layer):
# 1. load program desc & construct _ProgramHolder
programs = _construct_program_holders(model_path, model_filename)
# 2. load layer parameters & parameter attributes
# 2. load layer parameters & buffers
persistable_vars = _construct_params_and_buffers(
model_path, programs, separate_params, params_filename)
......
......@@ -17,6 +17,7 @@ from __future__ import print_function
import os
import pickle
import warnings
import functools
import six
import paddle
......@@ -228,63 +229,60 @@ class SaveLoadConfig(object):
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
import paddle
import paddle.nn as nn
import paddle.optimizer as opt
class SimpleNet(fluid.dygraph.Layer):
class SimpleNet(nn.Layer):
def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size)
self._linear = nn.Linear(in_size, out_size)
@declarative
@paddle.jit.to_static
def forward(self, x):
y = self._linear(x)
z = self._linear(y)
return z
# enable dygraph mode
fluid.enable_dygraph()
paddle.disable_static()
# train model
net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
adam = opt.Adam(learning_rate=0.1, parameters=net.parameters())
x = paddle.randn([4, 8], 'float32')
for i in range(10):
out = net(x)
loss = fluid.layers.mean(out)
loss = paddle.tensor.mean(out)
loss.backward()
adam.minimize(loss)
net.clear_gradients()
adam.step()
adam.clear_grad()
# use SaveLoadconfig when saving model
model_path = "simplenet.example.model"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.model_filename = "__simplenet__"
fluid.dygraph.jit.save(
config = paddle.SaveLoadConfig()
config.model_filename = "__simplenet__"
paddle.jit.save(
layer=net,
model_path=model_path,
input_spec=[x],
configs=configs)
config=config)
2. Using ``SaveLoadConfig`` when loading model
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
import paddle
# enable dygraph mode
fluid.enable_dygraph()
paddle.disable_static()
# use SaveLoadconfig when loading model
model_path = "simplenet.example.model"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.model_filename = "__simplenet__"
infer_net = fluid.dygraph.jit.load(model_path, configs=configs)
config = paddle.SaveLoadConfig()
config.model_filename = "__simplenet__"
infer_net = paddle.jit.load(model_path, config=config)
# inference
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
x = paddle.randn([4, 8], 'float32')
pred = infer_net(x)
"""
......@@ -293,6 +291,8 @@ class SaveLoadConfig(object):
self._model_filename = None
self._params_filename = None
self._separate_params = False
# used for `paddle.load`
self._keep_name_table = False
# NOTE: Users rarely use following configs, so these configs are not open to users,
# reducing user learning costs, but we retain the configuration capabilities
......@@ -322,51 +322,46 @@ class SaveLoadConfig(object):
Examples:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
import paddle
import paddle.nn as nn
import paddle.optimizer as opt
class SimpleNet(fluid.dygraph.Layer):
class SimpleNet(nn.Layer):
def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size)
self._linear = nn.Linear(in_size, out_size)
@declarative
@paddle.jit.to_static
def forward(self, x):
y = self._linear(x)
z = self._linear(y)
loss = fluid.layers.mean(z)
loss = paddle.tensor.mean(z)
return z, loss
# enable dygraph mode
fluid.enable_dygraph()
paddle.disable_static()
# train model
net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
adam = opt.Adam(learning_rate=0.1, parameters=net.parameters())
x = paddle.randn([4, 8], 'float32')
for i in range(10):
out, loss = net(x)
loss.backward()
adam.minimize(loss)
net.clear_gradients()
adam.step()
adam.clear_grad()
# use SaveLoadconfig.output_spec
model_path = "simplenet.example.model.output_spec"
configs = fluid.dygraph.jit.SaveLoadConfig()
# only keep the predicted output in saved model, discard loss
configs.output_spec = [out]
fluid.dygraph.jit.save(
config = paddle.SaveLoadConfig()
config.output_spec = [out]
paddle.jit.save(
layer=net,
model_path=model_path,
input_spec=[x],
configs=configs)
config=config)
infer_net = fluid.dygraph.jit.load(model_path, configs=configs)
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
# only have the predicted output
infer_net = paddle.jit.load(model_path)
x = paddle.randn([4, 8], 'float32')
pred = infer_net(x)
"""
return self._output_spec
......@@ -393,52 +388,47 @@ class SaveLoadConfig(object):
Examples:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
import paddle
import paddle.nn as nn
import paddle.optimizer as opt
class SimpleNet(fluid.dygraph.Layer):
class SimpleNet(nn.Layer):
def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size)
self._linear = nn.Linear(in_size, out_size)
@declarative
@paddle.jit.to_static
def forward(self, x):
y = self._linear(x)
z = self._linear(y)
return z
# enable dygraph mode
fluid.enable_dygraph()
paddle.disable_static()
# train model
net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
adam = opt.Adam(learning_rate=0.1, parameters=net.parameters())
x = paddle.randn([4, 8], 'float32')
for i in range(10):
out = net(x)
loss = fluid.layers.mean(out)
loss = paddle.tensor.mean(out)
loss.backward()
adam.minimize(loss)
net.clear_gradients()
model_path = "simplenet.example.model.model_filename"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.model_filename = "__simplenet__"
adam.step()
adam.clear_grad()
# saving with configs.model_filename
fluid.dygraph.jit.save(
model_path = "simplenet.example.model.model_filename"
config = paddle.SaveLoadConfig()
config.model_filename = "__simplenet__"
paddle.jit.save(
layer=net,
model_path=model_path,
input_spec=[x],
configs=configs)
# [result] the saved model directory contains:
# __simplenet__ __variables__ __variables.info__
config=config)
# loading with configs.model_filename
infer_net = fluid.dygraph.jit.load(model_path, configs=configs)
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
infer_net = paddle.jit.load(model_path, config=config)
x = paddle.randn([4, 8], 'float32')
pred = infer_net(x)
"""
return self._model_filename
......@@ -463,52 +453,48 @@ class SaveLoadConfig(object):
Examples:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
import paddle
import paddle.nn as nn
import paddle.optimizer as opt
class SimpleNet(fluid.dygraph.Layer):
class SimpleNet(nn.Layer):
def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size)
self._linear = nn.Linear(in_size, out_size)
@declarative
@paddle.jit.to_static
def forward(self, x):
y = self._linear(x)
z = self._linear(y)
return z
# enable dygraph mode
fluid.enable_dygraph()
paddle.disable_static()
# train model
net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
adam = opt.Adam(learning_rate=0.1, parameters=net.parameters())
x = paddle.randn([4, 8], 'float32')
for i in range(10):
out = net(x)
loss = fluid.layers.mean(out)
loss = paddle.tensor.mean(out)
loss.backward()
adam.minimize(loss)
net.clear_gradients()
adam.step()
adam.clear_grad()
model_path = "simplenet.example.model.params_filename"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.params_filename = "__params__"
config = paddle.SaveLoadConfig()
config.params_filename = "__params__"
# saving with configs.params_filename
fluid.dygraph.jit.save(
paddle.jit.save(
layer=net,
model_path=model_path,
input_spec=[x],
configs=configs)
# [result] the saved model directory contains:
# __model__ __params__ __variables.info__
config=config)
# loading with configs.params_filename
infer_net = fluid.dygraph.jit.load(model_path, configs=configs)
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
infer_net = paddle.jit.load(model_path, config=config)
x = paddle.randn([4, 8], 'float32')
pred = infer_net(x)
"""
return self._params_filename
......@@ -542,52 +528,50 @@ class SaveLoadConfig(object):
Examples:
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
import paddle
import paddle.nn as nn
import paddle.optimizer as opt
class SimpleNet(fluid.dygraph.Layer):
class SimpleNet(nn.Layer):
def __init__(self, in_size, out_size):
super(SimpleNet, self).__init__()
self._linear = Linear(in_size, out_size)
self._linear = nn.Linear(in_size, out_size)
@declarative
@paddle.jit.to_static
def forward(self, x):
y = self._linear(x)
z = self._linear(y)
return z
# enable dygraph mode
fluid.enable_dygraph()
paddle.disable_static()
# train model
net = SimpleNet(8, 8)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
adam = opt.Adam(learning_rate=0.1, parameters=net.parameters())
x = paddle.randn([4, 8], 'float32')
for i in range(10):
out = net(x)
loss = fluid.layers.mean(out)
loss = paddle.tensor.mean(out)
loss.backward()
adam.minimize(loss)
net.clear_gradients()
adam.step()
adam.clear_grad()
model_path = "simplenet.example.model.separate_params"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.separate_params = True
config = paddle.jit.SaveLoadConfig()
config.separate_params = True
# saving with configs.separate_params
fluid.dygraph.jit.save(
paddle.jit.save(
layer=net,
model_path=model_path,
input_spec=[x],
configs=configs)
config=config)
# [result] the saved model directory contains:
# linear_0.b_0 linear_0.w_0 __model__ __variables.info__
# loading with configs.params_filename
infer_net = fluid.dygraph.jit.load(model_path, configs=configs)
x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
infer_net = paddle.jit.load(model_path, config=config)
x = paddle.randn([4, 8], 'float32')
pred = infer_net(x)
"""
return self._separate_params
......@@ -600,9 +584,70 @@ class SaveLoadConfig(object):
% type(value))
self._separate_params = value
@property
def keep_name_table(self):
"""
Configures whether keep ``structured_name -> parameter_name`` dict in loaded state dict.
This dict is the debugging information saved when call `paddle.save`.
It is generally only used for debugging and does not affect the actual training or inference.
By default, it will not be retained in `paddle.load` result. Default: False.
.. note::
Only used for ``paddle.load``.
Examples:
.. code-block:: python
import paddle
paddle.disable_static()
linear = paddle.nn.Linear(5, 1)
state_dict = linear.state_dict()
paddle.save(state_dict, "paddle_dy")
configs = paddle.SaveLoadConfig()
configs.keep_name_table = True
para_state_dict, _ = paddle.load("paddle_dy", configs)
print(para_state_dict)
# the name_table is 'StructuredToParameterName@@'
# {'bias': array([0.], dtype=float32),
# 'StructuredToParameterName@@':
# {'bias': u'linear_0.b_0', 'weight': u'linear_0.w_0'},
# 'weight': array([[ 0.04230034],
# [-0.1222527 ],
# [ 0.7392676 ],
# [-0.8136974 ],
# [ 0.01211023]], dtype=float32)}
"""
return self._keep_name_table
@keep_name_table.setter
def keep_name_table(self, value):
if not isinstance(value, bool):
raise TypeError(
"The SaveLoadConfig.keep_name_table should be bool value, but received input's type is %s."
% type(value))
self._keep_name_table = value
# NOTE(chenweihang): change jit.save/load argument `configs` to `config`
def deprecate_save_load_configs(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if 'configs' in kwargs:
kwargs['config'] = kwargs['configs']
kwargs.pop('configs')
return func(*args, **kwargs)
return wrapper
@deprecate_save_load_configs
@switch_to_static_graph
def save(layer, model_path, input_spec=None, configs=None):
def save(layer, model_path, input_spec=None, config=None):
"""
Saves input declarative Layer as :ref:`api_imperative_TranslatedLayer`
format model, which can be used for inference or fine-tuning after loading.
......@@ -627,7 +672,7 @@ def save(layer, model_path, input_spec=None, configs=None):
It is the example inputs that will be passed to saved TranslatedLayer's forward
function. If None, all input variables of the original Layer's forward function
would be the inputs of the saved model. Default None.
configs (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object
config (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object
that specifies additional configuration options. Default None.
Returns:
None
......@@ -636,65 +681,76 @@ def save(layer, model_path, input_spec=None, configs=None):
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
import paddle
import paddle.nn as nn
import paddle.optimizer as opt
BATCH_SIZE = 32
BATCH_NUM = 20
BATCH_SIZE = 16
BATCH_NUM = 4
EPOCH_NUM = 4
def random_batch_reader():
def _get_random_images_and_labels(image_shape, label_shape):
image = np.random.random(size=image_shape).astype('float32')
label = np.random.random(size=label_shape).astype('int64')
return image, label
IMAGE_SIZE = 784
CLASS_NUM = 10
def __reader__():
for _ in range(BATCH_NUM):
batch_image, batch_label = _get_random_images_and_labels(
[BATCH_SIZE, 784], [BATCH_SIZE, 1])
yield batch_image, batch_label
# define a random dataset
class RandomDataset(paddle.io.Dataset):
def __init__(self, num_samples):
self.num_samples = num_samples
return __reader__
def __getitem__(self, idx):
image = np.random.random([IMAGE_SIZE]).astype('float32')
label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
return image, label
class LinearNet(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
def __len__(self):
return self.num_samples
class LinearNet(nn.Layer):
def __init__(self):
super(LinearNet, self).__init__()
self._linear = Linear(in_size, out_size)
self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
@declarative
@paddle.jit.to_static
def forward(self, x):
return self._linear(x)
def train(layer, loader, loss_fn, opt):
for epoch_id in range(EPOCH_NUM):
for batch_id, (image, label) in enumerate(loader()):
out = layer(image)
loss = loss_fn(out, label)
loss.backward()
opt.step()
opt.clear_grad()
print("Epoch {} batch {}: loss = {}".format(
epoch_id, batch_id, np.mean(loss.numpy())))
# enable dygraph mode
fluid.enable_dygraph()
place = paddle.CPUPlace()
paddle.disable_static(place)
# create network
net = LinearNet(784, 1)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
# create data loader
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(random_batch_reader())
# train
for data in train_loader():
img, label = data
label.stop_gradient = True
# 1. train & save model.
cost = net(img)
# create network
layer = LinearNet()
loss_fn = nn.CrossEntropyLoss()
adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters())
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
# create data loader
dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
loader = paddle.io.DataLoader(dataset,
places=place,
batch_size=BATCH_SIZE,
shuffle=True,
drop_last=True,
num_workers=2)
avg_loss.backward()
adam.minimize(avg_loss)
net.clear_gradients()
# train
train(layer, loader, loss_fn, adam)
# save model
# save
model_path = "linear.example.model"
fluid.dygraph.jit.save(
layer=net,
model_path=model_path,
input_spec=[img])
paddle.jit.save(layer, model_path)
"""
def get_inout_spec(all_vars, target_vars, return_name=False):
......@@ -728,6 +784,7 @@ def save(layer, model_path, input_spec=None, configs=None):
"The input layer of paddle.jit.save should be 'Layer', but received layer type is %s."
% type(layer))
configs = config
if configs is None:
configs = SaveLoadConfig()
......@@ -819,8 +876,9 @@ def save(layer, model_path, input_spec=None, configs=None):
pickle.dump(extra_var_info, f, protocol=2)
@deprecate_save_load_configs
@dygraph_only
def load(model_path, configs=None):
def load(model_path, config=None):
"""
:api_attr: imperative
......@@ -837,7 +895,7 @@ def load(model_path, configs=None):
Args:
model_path (str): The directory path where the model is saved.
configs (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object that specifies
config (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object that specifies
additional configuration options. Default None.
Returns:
......@@ -849,122 +907,126 @@ def load(model_path, configs=None):
.. code-block:: python
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import declarative
import paddle
import paddle.nn as nn
import paddle.optimizer as opt
BATCH_SIZE = 32
BATCH_NUM = 20
BATCH_SIZE = 16
BATCH_NUM = 4
EPOCH_NUM = 4
def random_batch_reader():
def _get_random_images_and_labels(image_shape, label_shape):
image = np.random.random(size=image_shape).astype('float32')
label = np.random.random(size=label_shape).astype('int64')
return image, label
IMAGE_SIZE = 784
CLASS_NUM = 10
def __reader__():
for _ in range(BATCH_NUM):
batch_image, batch_label = _get_random_images_and_labels(
[BATCH_SIZE, 784], [BATCH_SIZE, 1])
yield batch_image, batch_label
# define a random dataset
class RandomDataset(paddle.io.Dataset):
def __init__(self, num_samples):
self.num_samples = num_samples
return __reader__
def __getitem__(self, idx):
image = np.random.random([IMAGE_SIZE]).astype('float32')
label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
return image, label
class LinearNet(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
def __len__(self):
return self.num_samples
class LinearNet(nn.Layer):
def __init__(self):
super(LinearNet, self).__init__()
self._linear = Linear(in_size, out_size)
self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM)
@declarative
@paddle.jit.to_static
def forward(self, x):
return self._linear(x)
def train(layer, loader, loss_fn, opt):
for epoch_id in range(EPOCH_NUM):
for batch_id, (image, label) in enumerate(loader()):
out = layer(image)
loss = loss_fn(out, label)
loss.backward()
opt.step()
opt.clear_grad()
print("Epoch {} batch {}: loss = {}".format(
epoch_id, batch_id, np.mean(loss.numpy())))
# enable dygraph mode
fluid.enable_dygraph()
place = paddle.CPUPlace()
paddle.disable_static(place)
# 1. train & save model.
# create network
net = LinearNet(784, 1)
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
layer = LinearNet()
loss_fn = nn.CrossEntropyLoss()
adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters())
# create data loader
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(random_batch_reader())
# train
for data in train_loader():
img, label = data
label.stop_gradient = True
dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
loader = paddle.io.DataLoader(dataset,
places=place,
batch_size=BATCH_SIZE,
shuffle=True,
drop_last=True,
num_workers=2)
cost = net(img)
# train
train(layer, loader, loss_fn, adam)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
# save
model_path = "linear.example.model"
paddle.jit.save(layer, model_path)
avg_loss.backward()
adam.minimize(avg_loss)
net.clear_gradients()
# 2. load model
model_path = "linear.example.model"
fluid.dygraph.jit.save(
layer=net,
model_path=model_path,
input_spec=[img])
# load
loaded_layer = paddle.jit.load(model_path)
# 2. load model & inference
# load model
infer_net = fluid.dygraph.jit.load(model_path)
# inference
x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32'))
pred = infer_net(x)
loaded_layer.eval()
x = paddle.randn([1, IMAGE_SIZE], 'float32')
pred = loaded_layer(x)
# 3. load model & fine-tune
# load model
train_net = fluid.dygraph.jit.load(model_path)
train_net.train()
adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=train_net.parameters())
# create data loader
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(random_batch_reader())
# fine-tune
for data in train_loader():
img, label = data
label.stop_gradient = True
cost = train_net(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
loaded_layer.train()
adam = opt.Adam(learning_rate=0.001, parameters=loaded_layer.parameters())
train(loaded_layer, loader, loss_fn, adam)
avg_loss.backward()
adam.minimize(avg_loss)
train_net.clear_gradients()
2. Load model saved by :ref:`api_fluid_io_save_inference_model` then performing and fine-tune training.
.. code-block:: python
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.nn as nn
import paddle.optimizer as opt
BATCH_SIZE = 32
BATCH_NUM = 20
BATCH_SIZE = 16
BATCH_NUM = 4
EPOCH_NUM = 4
def random_batch_reader():
def _get_random_images_and_labels(image_shape, label_shape):
image = np.random.random(size=image_shape).astype('float32')
label = np.random.random(size=label_shape).astype('int64')
return image, label
IMAGE_SIZE = 784
CLASS_NUM = 10
# define a random dataset
class RandomDataset(paddle.io.Dataset):
def __init__(self, num_samples):
self.num_samples = num_samples
def __reader__():
for _ in range(BATCH_NUM):
batch_image, batch_label = _get_random_images_and_labels(
[BATCH_SIZE, 784], [BATCH_SIZE, 1])
yield batch_image, batch_label
def __getitem__(self, idx):
image = np.random.random([IMAGE_SIZE]).astype('float32')
label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
return image, label
return __reader__
def __len__(self):
return self.num_samples
img = fluid.data(name='img', shape=[None, 784], dtype='float32')
image = fluid.data(name='image', shape=[None, 784], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
pred = fluid.layers.fc(input=img, size=10, act='softmax')
pred = fluid.layers.fc(input=image, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=pred, label=label)
avg_loss = fluid.layers.mean(loss)
......@@ -975,9 +1037,15 @@ def load(model_path, configs=None):
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
loader = fluid.io.DataLoader.from_generator(
feed_list=[img, label], capacity=5, iterable=True)
loader.set_batch_generator(random_batch_reader(), places=place)
# create data loader
dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)
loader = paddle.io.DataLoader(dataset,
feed_list=[image, label],
places=place,
batch_size=BATCH_SIZE,
shuffle=True,
drop_last=True,
num_workers=2)
# 1. train and save inference model
for data in loader():
......@@ -988,39 +1056,42 @@ def load(model_path, configs=None):
model_path = "fc.example.model"
fluid.io.save_inference_model(
model_path, ["img"], [pred], exe)
model_path, ["image"], [pred], exe)
# 2. load model
# enable dygraph mode
fluid.enable_dygraph()
paddle.disable_static(place)
# load
fc = paddle.jit.load(model_path)
# 2. load model & inference
fc = fluid.dygraph.jit.load(model_path)
x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32'))
# inference
fc.eval()
x = paddle.randn([1, IMAGE_SIZE], 'float32')
pred = fc(x)
# 3. load model & fine-tune
fc = fluid.dygraph.jit.load(model_path)
# fine-tune
fc.train()
sgd = fluid.optimizer.SGD(learning_rate=0.001,
parameter_list=fc.parameters())
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(
random_batch_reader(), places=place)
for data in train_loader():
img, label = data
label.stop_gradient = True
cost = fc(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
sgd.minimize(avg_loss)
loss_fn = nn.CrossEntropyLoss()
adam = opt.Adam(learning_rate=0.001, parameters=fc.parameters())
loader = paddle.io.DataLoader(dataset,
places=place,
batch_size=BATCH_SIZE,
shuffle=True,
drop_last=True,
num_workers=2)
for epoch_id in range(EPOCH_NUM):
for batch_id, (image, label) in enumerate(loader()):
out = fc(image)
loss = loss_fn(out, label)
loss.backward()
adam.step()
adam.clear_grad()
print("Epoch {} batch {}: loss = {}".format(
epoch_id, batch_id, np.mean(loss.numpy())))
"""
return TranslatedLayer._construct(model_path, configs)
return TranslatedLayer._construct(model_path, config)
@dygraph_only
......
......@@ -29,6 +29,9 @@ from .layer_object_helper import LayerObjectHelper
from .base import program_desc_tracing_guard, param_guard
from paddle.fluid import framework
from ..param_attr import ParamAttr
from paddle.fluid.executor import Executor, global_scope
from paddle.fluid.framework import in_dygraph_mode
from paddle.fluid.framework import _current_expected_place as _get_device
__all__ = ['Layer']
......@@ -797,7 +800,7 @@ class Layer(core.Layer):
raise ValueError(
"super(YourLayer, self).__init__() should be called first")
if len(self._loaddict_holder) > 0:
assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format(
assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in state_dict".format(
value.name)
value.set_value(self._loaddict_holder[value.name])
......@@ -943,12 +946,13 @@ class Layer(core.Layer):
destination = destination_temp
return destination
def set_dict(self,
stat_dict,
@framework.deprecate_stat_dict
def set_state_dict(self,
state_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters and persistable buffers from stat_dict. All the parameters and buffers will be reset by the tensor in the stat_dict
Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict
Parameters:
state_dict(dict) : Dict contains all the parameters and persistable buffers.
......@@ -961,72 +965,67 @@ class Layer(core.Layer):
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.set_dict( para_state_dict )
'''
self.load_dict(
stat_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
def load_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters and persistable buffers from stat_dict. All the parameters and persistabl buffers will be reset by the tensor in the stat_dict
This api will be Deprecated. Please use set_dict
Parameters:
state_dict(dict) : Dict contains all the parameters and persistable buffers.
include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
Default: True
Returns:
None
import paddle
Examples:
.. code-block:: python
paddle.disable_static()
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
paddle.save(state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
para_state_dict, _ = paddle.load("paddle_dy")
emb.load_dict( para_state_dict )
emb.set_state_dict(para_state_dict)
'''
inner_state_dict = self.state_dict()
def _check_match(key, param):
state = state_dict.get(key, None)
if state is None:
raise ValueError("{} is not found in the provided dict.".format(
key))
if list(state.shape) != list(param.shape):
raise ValueError(
"{} receives a shape {}, but the expected shape is {}.".
format(key, list(state.shape), list(param.shape)))
return param, state
matched_param_state = []
for key, param in self.state_dict().items():
key_name = key if use_structured_name else param.name
try:
match_res = _check_match(key_name, param)
matched_param_state.append(match_res)
except ValueError as err:
warnings.warn(("Skip loading for {}. ".format(key) + str(err)))
if in_dygraph_mode():
for param, state in matched_param_state:
param.set_value(state)
else:
for name, param_or_buffer in inner_state_dict.items():
key_name = name if use_structured_name else param_or_buffer.name
if key_name in stat_dict:
param_or_buffer.set_value(stat_dict[key_name])
def _set_var(var, ndarray):
t = global_scope().find_var(var.name).get_tensor()
p = t._place()
if p.is_cpu_place():
place = core.CPUPlace()
elif p.is_cuda_pinned_place():
place = core.CUDAPinnedPlace()
else:
raise RuntimeError(
"Parameter or persistable buffer not found, Can't find [ {} ] in stat_dict"
"use_structured_name is set to [{}]".format(
key_name, use_structured_name))
unused_para_list = []
for k, v in stat_dict.items():
if k not in inner_state_dict:
unused_para_list.append(k)
if len(unused_para_list) > 0:
warnings.warn(
"Variables [ {} ] are not used, because not included in layers state_dict".
format(" ".join(unused_para_list)))
p = core.Place()
p.set_place(t._place())
place = core.CUDAPlace(p.gpu_device_id())
t.set(ndarray, place)
executor = Executor(_get_device())._default_executor
# restore parameter states
core._create_loaded_parameter(
[param for param, state in matched_param_state],
global_scope(), executor)
for param, state in matched_param_state:
_set_var(param, state)
# [aliases] Compatible with old method names
set_dict = set_state_dict
load_dict = set_state_dict
......@@ -97,7 +97,7 @@ class LearningRateDecay(object):
"""
self.keys = ['step_num']
def set_dict(self, state_dict):
def set_state_dict(self, state_dict):
"""
Loads the schedulers state.
"""
......@@ -114,6 +114,9 @@ class LearningRateDecay(object):
"There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict"
)
# [aliases] Compatible with old method names
set_dict = set_state_dict
def step(self):
raise NotImplementedError()
......
......@@ -587,12 +587,13 @@ class DataParallel(layers.Layer):
include_sublayers=include_sublayers,
structured_name_prefix=structured_name_prefix)
def set_dict(self,
stat_dict,
@framework.deprecate_stat_dict
def set_state_dict(self,
state_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
Set parameters of self._layers from state_dict. All the parameters of self._layers will be reset by the tensor in the state_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
......@@ -605,62 +606,27 @@ class DataParallel(layers.Layer):
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
strategy=fluid.dygraph.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = fluid.dygraph.DataParallel(emb, strategy)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.set_dict( para_state_dict )
'''
self._layers.set_dict(
stat_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
def load_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
This api will be Deprecated. Please use set_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
Default: True
Returns:
None
import paddle
Examples:
.. code-block:: python
paddle.disable_static()
import paddle.fluid as fluid
with fluid.dygraph.guard():
strategy=fluid.dygraph.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = paddle.nn.Embedding([10, 10])
emb = fluid.dygraph.DataParallel(emb, strategy)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
paddle.save(state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
para_state_dict, _ = paddle.load("paddle_dy")
emb.load_dict( para_state_dict )
emb.set_state_dict(para_state_dict)
'''
self._layers.load_dict(
stat_dict,
self._layers.set_state_dict(
state_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
# [aliases] Compatible with old method names
set_dict = set_state_dict
load_dict = set_state_dict
......@@ -36,6 +36,7 @@ from . import core
from . import unique_name
import paddle.version as fluid_version
import warnings
import functools
__all__ = [
'Program',
......@@ -238,6 +239,25 @@ def _fake_interface_only_(func):
return __impl__
# NOTE(chenweihang): There is argument name typo (stat_dict, correct name is state_dict)
# in fluid api Layer.set_dict, Optimizer.load, in order to correct the argument without
# introducing compatibility issues, add this decorator
# NOTE(chenweihang): not using `wrap_decorator` here is because `wrap_decorator` will
# move kwargs to args, which doesn't work in this decorate case
def deprecate_stat_dict(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if 'stat_dict' in kwargs:
warnings.warn(
"The argument `stat_dict` has deprecated, please change it to `state_dict`.",
DeprecationWarning)
kwargs['state_dict'] = kwargs['stat_dict']
kwargs.pop('stat_dict')
return func(*args, **kwargs)
return wrapper
dygraph_not_support = wrap_decorator(_dygraph_not_support_)
dygraph_only = wrap_decorator(_dygraph_only_)
fake_interface_only = wrap_decorator(_fake_interface_only_)
......
......@@ -170,7 +170,7 @@ class Optimizer(object):
return state_dict
@framework.dygraph_only
def set_dict(self, state_dict):
def set_state_dict(self, state_dict):
'''
Load optimizer state dict. For Adam optimizer, contains beta1, beta2, momentum etc. If LearningRateDecay have been used, global_step will be changed.
......@@ -182,20 +182,22 @@ class Optimizer(object):
Examples:
.. code-block:: python
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
import paddle
paddle.disable_static()
emb = paddle.nn.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph(state_dict, "paddle_dy")
paddle.save(state_dict, "paddle_dy")
adam = fluid.optimizer.Adam(learning_rate=fluid.layers.noam_decay( 100, 10000),
adam = paddle.optimizer.Adam(learning_rate=fluid.layers.noam_decay( 100, 10000),
parameter_list=emb.parameters())
state_dict = adam.state_dict()
fluid.save_dygraph(state_dict, "paddle_dy")
para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy")
para_state_dict, opti_state_dict = paddle.load("paddle_dy")
adam.set_dict(opti_state_dict)
adam.set_state_dict(opti_state_dict)
'''
from paddle.optimizer.lr_scheduler import _LRScheduler
......@@ -257,6 +259,9 @@ class Optimizer(object):
tensor.set(load_para_np, framework._current_expected_place())
# [aliases] Compatible with old method names
set_dict = set_state_dict
def get_opti_var_name_list(self):
return self._opti_name_list
......@@ -4595,7 +4600,8 @@ class RecomputeOptimizer(Optimizer):
), "_checkpoints should be a list of Variable or a list of String"
self._checkpoints = checkpoints
def load(self, stat_dict):
@framework.deprecate_stat_dict
def load(self, state_dict):
"""
:api_attr: Static Graph
......@@ -4603,7 +4609,7 @@ class RecomputeOptimizer(Optimizer):
:return: None
Args:
stat_dict: the dict load by load_persistable method
state_dict: the dict load by load_persistable method
Examples:
.. code-block:: python
......@@ -4627,8 +4633,8 @@ class RecomputeOptimizer(Optimizer):
sgd = fluid.optimizer.RecomputeOptimizer(sgd)
sgd._set_checkpoints([fc_1, pred])
try:
stat_dict = {}
sgd.load(stat_dict)
state_dict = {}
sgd.load(state_dict)
except NotImplementedError as e:
print(cpt.get_exception_message(e))
"""
......
......@@ -43,7 +43,7 @@ class TestDirectory(unittest.TestCase):
'paddle.distributed.prepare_context', 'paddle.DataParallel',
'paddle.jit', 'paddle.jit.TracedLayer', 'paddle.jit.to_static',
'paddle.jit.ProgramTranslator', 'paddle.jit.TranslatedLayer',
'paddle.jit.save', 'paddle.jit.load', 'paddle.jit.SaveLoadConfig',
'paddle.jit.save', 'paddle.jit.load', 'paddle.SaveLoadConfig',
'paddle.NoamDecay', 'paddle.PiecewiseDecay',
'paddle.NaturalExpDecay', 'paddle.ExponentialDecay',
'paddle.InverseTimeDecay', 'paddle.PolynomialDecay',
......
......@@ -374,8 +374,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
adam._learning_rate.step_num = 0
para_state_dict, opti_state_dict = paddle.load("./test_dy")
print(opti_state_dict['LR_Scheduler'])
adam.set_dict(opti_state_dict)
adam.set_state_dict(opti_state_dict)
opti_dict = adam.state_dict()
for k, v in opti_dict.items():
......@@ -393,7 +392,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
var.set(np.zeros_like(np_t), place)
ptb_model.set_dict(para_state_dict)
ptb_model.set_state_dict(stat_dict=para_state_dict)
state_dict = ptb_model.state_dict()
......@@ -483,7 +482,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
if isinstance(adam._learning_rate, LearningRateDecay):
adam._learning_rate.step_num = 0
adam.set_dict(self.opti_dict)
adam.set_state_dict(self.opti_dict)
opti_dict = adam.state_dict()
for k, v in opti_dict.items():
if isinstance(v, core.VarBase):
......@@ -500,7 +499,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
var.set(np.zeros_like(np_t), place)
ptb_model.set_dict(self.state_dict)
ptb_model.set_state_dict(self.state_dict)
state_dict = ptb_model.state_dict()
......@@ -593,7 +592,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
if isinstance(adam._learning_rate, LearningRateDecay):
adam._learning_rate.step_num = 0
adam.set_dict(np_opti_dict)
adam.set_state_dict(np_opti_dict)
opti_dict = adam.state_dict()
for k, v in opti_dict.items():
......@@ -613,7 +612,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
var.set(np.zeros_like(np_t), place)
ptb_model.set_dict(np_state_dict)
ptb_model.set_state_dict(np_state_dict)
state_dict = ptb_model.state_dict()
......@@ -656,8 +655,8 @@ class TestDygraphPtbRnn(unittest.TestCase):
last_hidden = None
last_cell = None
adam.set_dict(self.opti_dict)
ptb_model.set_dict(self.state_dict)
adam.set_state_dict(self.opti_dict)
ptb_model.set_state_dict(self.state_dict)
for i in range(1):
x_data = np.arange(12).reshape(4, 3).astype('int64')
......@@ -745,8 +744,8 @@ class TestDygraphPtbRnn(unittest.TestCase):
last_cell = None
state_dict, opti_dict = fluid.load_dygraph("./test_dy")
adam.set_dict(opti_dict)
ptb_model.set_dict(state_dict)
adam.set_state_dict(opti_dict)
ptb_model.set_state_dict(state_dict)
for i in range(1):
x_data = np.arange(12).reshape(4, 3).astype('int64')
......@@ -849,8 +848,8 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k, v in self.state_dict.items():
np_state_dict[k] = v.numpy()
adam.set_dict(np_opti_dict)
ptb_model.set_dict(np_state_dict)
adam.set_state_dict(np_opti_dict)
ptb_model.set_state_dict(np_state_dict)
for i in range(1):
x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
......@@ -912,6 +911,22 @@ class TestDygraphPtbRnn(unittest.TestCase):
para_state_dict, opti_state_dict = paddle.load(
os.path.join('saved_dy', 'emb_dy.pdopt'))
def test_load_compatible_with_keep_name_table(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy'))
para_state_dict, opti_state_dict = paddle.load(
os.path.join('saved_dy', 'emb_dy'), True)
self.assertTrue(para_state_dict != None)
self.assertTrue(opti_state_dict == None)
para_state_dict, opti_state_dict = paddle.load(
os.path.join('saved_dy', 'emb_dy'), keep_name_table=True)
self.assertTrue(para_state_dict != None)
self.assertTrue(opti_state_dict == None)
if __name__ == '__main__':
unittest.main()
......@@ -917,6 +917,29 @@ class TestDygraphPtbRnn(unittest.TestCase):
para_state_dict, opti_state_dict = paddle.load(
os.path.join('saved_dy', 'emb_dy.pdopt'))
def test_no_state_in_input_dict(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy'))
para_state_dict, _ = paddle.load(os.path.join('saved_dy', 'emb_dy'))
para_state_dict.pop('weight')
emb.set_state_dict(para_state_dict)
def test_state_shape_mismatch(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy'))
para_state_dict, _ = paddle.load(os.path.join('saved_dy', 'emb_dy'))
para_state_dict['weight'] = np.expand_dims(
para_state_dict['weight'], axis=-1)
emb.set_state_dict(para_state_dict)
if __name__ == '__main__':
unittest.main()
......@@ -183,25 +183,6 @@ class TestJitSaveLoad(unittest.TestCase):
with self.assertRaises(ValueError):
model_dict, _ = fluid.dygraph.load_dygraph(model_path)
def test_load_dygraph_no_var_info(self):
model_path = "model.test_jit_save_load.no_var_info"
self.train_and_save_model(model_path=model_path)
# remove `__variables.info__`
var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME)
os.remove(var_info_path)
new_layer = LinearNet(784, 1)
with self.assertRaises(RuntimeError):
model_dict, _ = fluid.dygraph.load_dygraph(model_path)
def test_load_dygraph_not_var_file(self):
model_path = "model.test_jit_save_load.no_var_file"
configs = fluid.dygraph.jit.SaveLoadConfig()
configs.params_filename = "__params__"
self.train_and_save_model(model_path=model_path, configs=configs)
new_layer = LinearNet(784, 1)
with self.assertRaises(RuntimeError):
model_dict, _ = fluid.dygraph.load_dygraph(model_path)
class LinearNetMultiInput(fluid.dygraph.Layer):
def __init__(self, in_size, out_size):
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import six
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from test_imperative_base import new_program_scope
def convolutional_neural_network(img):
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu")
conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu")
prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
return prediction
def static_train_net(img, label):
prediction = convolutional_neural_network(img)
loss = fluid.layers.cross_entropy(input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
optimizer = fluid.optimizer.SGD(learning_rate=0.001)
optimizer.minimize(avg_loss)
return prediction, avg_loss
class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase):
def setUp(self):
self.seed = 90
self.epoch_num = 1
self.batch_size = 128
self.batch_num = 10
def train_and_save_model(self):
with new_program_scope():
startup_program = fluid.default_startup_program()
main_program = fluid.default_main_program()
img = fluid.data(
name='img', shape=[None, 1, 28, 28], dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
prediction, avg_loss = static_train_net(img, label)
place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
exe.run(startup_program)
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=100),
batch_size=self.batch_size)
for _ in range(0, self.epoch_num):
for batch_id, data in enumerate(train_reader()):
exe.run(main_program,
feed=feeder.feed(data),
fetch_list=[avg_loss])
if batch_id > self.batch_num:
break
static_param_dict = {}
for param in fluid.default_main_program().all_parameters():
static_param_dict[param.name] = fluid.executor._fetch_var(
param.name)
fluid.io.save_inference_model(
self.save_dirname, ["img"], [prediction],
exe,
model_filename=self.model_filename,
params_filename=self.params_filename)
return static_param_dict
def check_load_state_dict(self, orig_dict, load_dict):
for var_name, value in six.iteritems(orig_dict):
self.assertTrue(np.array_equal(value, load_dict[var_name]))
def test_load_default(self):
self.save_dirname = "static_mnist.load_state_dict.default"
self.model_filename = None
self.params_filename = None
orig_param_dict = self.train_and_save_model()
configs = paddle.SaveLoadConfig()
configs.separate_params = True
load_param_dict, _ = paddle.load(self.save_dirname, configs)
self.check_load_state_dict(orig_param_dict, load_param_dict)
def test_load_with_model_filename(self):
self.save_dirname = "static_mnist.load_state_dict.model_filename"
self.model_filename = "static_mnist.model"
self.params_filename = None
orig_param_dict = self.train_and_save_model()
configs = paddle.SaveLoadConfig()
configs.separate_params = True
configs.model_filename = self.model_filename
load_param_dict, _ = paddle.load(self.save_dirname, configs)
self.check_load_state_dict(orig_param_dict, load_param_dict)
def test_load_with_param_filename(self):
self.save_dirname = "static_mnist.load_state_dict.param_filename"
self.model_filename = None
self.params_filename = "static_mnist.params"
orig_param_dict = self.train_and_save_model()
configs = paddle.SaveLoadConfig()
configs.params_filename = self.params_filename
load_param_dict, _ = paddle.load(self.save_dirname, configs)
self.check_load_state_dict(orig_param_dict, load_param_dict)
def test_load_with_model_and_param_filename(self):
self.save_dirname = "static_mnist.load_state_dict.model_and_param_filename"
self.model_filename = "static_mnist.model"
self.params_filename = "static_mnist.params"
orig_param_dict = self.train_and_save_model()
configs = paddle.SaveLoadConfig()
configs.params_filename = self.params_filename
configs.model_filename = self.model_filename
load_param_dict, _ = paddle.load(self.save_dirname, configs)
self.check_load_state_dict(orig_param_dict, load_param_dict)
if __name__ == '__main__':
unittest.main()
......@@ -832,8 +832,8 @@ class TestRecomputeOptimizer(unittest.TestCase):
recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer)
recompute_optimizer._set_checkpoints([b1_out])
try:
stat_dict = {}
recompute_optimizer.load(stat_dict)
state_dict = {}
recompute_optimizer.load(state_dict)
except NotImplementedError as e:
self.assertEqual(
"load function is not supported by Recompute Optimizer for now",
......
......@@ -20,8 +20,8 @@ __all__ = [
]
__all__ += [
'grad', 'LayerList', 'load', 'save', 'to_variable', 'no_grad',
'DataParallel'
'grad', 'LayerList', 'load', 'save', 'SaveLoadConfig', 'to_variable',
'no_grad', 'DataParallel'
]
__all__ += [
......@@ -50,6 +50,7 @@ from ..fluid.dygraph.base import to_variable #DEFINE_ALIAS
from ..fluid.dygraph.base import grad #DEFINE_ALIAS
from ..fluid.dygraph.checkpoint import load_dygraph as load #DEFINE_ALIAS
from ..fluid.dygraph.checkpoint import save_dygraph as save #DEFINE_ALIAS
from ..fluid.dygraph.jit import SaveLoadConfig #DEFINE_ALIAS
from ..fluid.dygraph.parallel import DataParallel #DEFINE_ALIAS
from ..fluid.dygraph.learning_rate_scheduler import NoamDecay #DEFINE_ALIAS
......
......@@ -19,10 +19,7 @@ from . import model_summary
from . import model
from .model import *
from .model_summary import summary
from .dygraph_layer_patch import monkey_patch_layer
logger.setup_logger()
__all__ = ['callbacks'] + model.__all__ + ['summary']
monkey_patch_layer()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
import paddle.fluid as fluid
from paddle.fluid.framework import in_dygraph_mode
from paddle.fluid.framework import _current_expected_place as _get_device
def monkey_patch_layer():
def load_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters from stat_dict. All the parameters will be reset by the
tensor in the stat_dict
This api will be Deprecated. Please use set_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the
parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name
as key, otherwise, use parameter name as key. Default: True
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.load_dict( para_state_dict )
'''
def _check_match(key, param):
state = stat_dict.get(key, None)
if state is None:
raise ValueError(
"{} is not found in the providing file.".format(key))
if list(state.shape) != list(param.shape):
raise ValueError(
"{} receives a shape {}, but the expected shape is {}.".
format(key, list(state.shape), list(param.shape)))
return param, state
matched_param_state = []
for key, param in self.state_dict().items():
key_name = key if use_structured_name else param.name
try:
match_res = _check_match(key_name, param)
matched_param_state.append(match_res)
except ValueError as err:
warnings.warn(("Skip loading for {}. ".format(key) + str(err)))
if in_dygraph_mode():
for param, state in matched_param_state:
param.set_value(state)
else:
def _set_var(var, ndarray):
t = fluid.global_scope().find_var(var.name).get_tensor()
p = t._place()
if p.is_cpu_place():
place = fluid.CPUPlace()
elif p.is_cuda_pinned_place():
place = fluid.CUDAPinnedPlace()
else:
p = fluid.core.Place()
p.set_place(t._place())
place = fluid.CUDAPlace(p.gpu_device_id())
t.set(ndarray, place)
executor = fluid.Executor(_get_device())._default_executor
# restore parameter states
fluid.core._create_loaded_parameter(
[param for param, state in matched_param_state],
fluid.global_scope(), executor)
for param, state in matched_param_state:
_set_var(param, state)
setattr(fluid.dygraph.Layer, 'load_dict', load_dict)
......@@ -14,7 +14,6 @@
from ..fluid.dygraph.jit import save #DEFINE_ALIAS
from ..fluid.dygraph.jit import load #DEFINE_ALIAS
from ..fluid.dygraph.jit import SaveLoadConfig #DEFINE_ALIAS
from ..fluid.dygraph.jit import TracedLayer #DEFINE_ALIAS
from ..fluid.dygraph.jit import set_code_level #DEFINE_ALIAS
from ..fluid.dygraph.jit import set_verbosity #DEFINE_ALIAS
......@@ -23,6 +22,6 @@ from ..fluid.dygraph import ProgramTranslator #DEFINE_ALIAS
from ..fluid.dygraph.io import TranslatedLayer #DEFINE_ALIAS
__all__ = [
'save', 'load', 'SaveLoadConfig', 'TracedLayer', 'to_static',
'ProgramTranslator', 'TranslatedLayer', 'set_code_level', 'set_verbosity'
'save', 'load', 'TracedLayer', 'to_static', 'ProgramTranslator',
'TranslatedLayer', 'set_code_level', 'set_verbosity'
]
......@@ -109,7 +109,7 @@ class _LRScheduler(object):
"""
self.keys = ['last_epoch', 'last_lr']
def set_dict(self, state_dict):
def set_state_dict(self, state_dict):
"""
Loads the schedulers state.
"""
......@@ -126,8 +126,8 @@ class _LRScheduler(object):
"There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict"
)
# alias for set_dict
set_state_dict = set_dict
# alias for set_state_dict
set_dict = set_state_dict
def get_lr(self):
# calculate by python float
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册