diff --git a/paddle/http.log b/paddle/http.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index a7c7041b59610a6fc7a70bb83c9f5e0c776938c0..d5793eb424ab794e3e8af8ef2312aac927c272e5 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -232,6 +232,7 @@ from .framework import grad #DEFINE_ALIAS from .framework import no_grad #DEFINE_ALIAS from .framework import save #DEFINE_ALIAS from .framework import load #DEFINE_ALIAS +from .framework import SaveLoadConfig #DEFINE_ALIAS from .framework import DataParallel #DEFINE_ALIAS from .framework import NoamDecay #DEFINE_ALIAS diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py index f4d68a798efa26d43702aa1c555f6046f0e6a6a5..30ded1f7eda295bab5567a082ba1fa3989b55fa2 100644 --- a/python/paddle/fluid/dygraph/checkpoint.py +++ b/python/paddle/fluid/dygraph/checkpoint.py @@ -16,13 +16,16 @@ from __future__ import print_function import os import collections +import functools from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter, ParamBase, _varbase_creator, _dygraph_tracer import pickle import six from . import learning_rate_scheduler import warnings from .. import core -from paddle.fluid.dygraph.io import VARIABLE_FILENAME, EXTRA_VAR_INFO_FILENAME, _load_persistable_vars +from .base import guard +from paddle.fluid.dygraph.jit import SaveLoadConfig, deprecate_save_load_configs +from paddle.fluid.dygraph.io import _construct_program_holders, _construct_params_and_buffers __all__ = [ 'save_dygraph', @@ -30,6 +33,37 @@ __all__ = [ ] +# NOTE(chenweihang): deprecate load_dygraph's argument keep_name_table, +# ensure compatibility when user still use keep_name_table argument +def deprecate_keep_name_table(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + def __warn_and_build_configs__(keep_name_table): + warnings.warn( + "The argument `keep_name_table` has deprecated, please use `SaveLoadConfig.keep_name_table`.", + DeprecationWarning) + config = SaveLoadConfig() + config.keep_name_table = keep_name_table + return config + + # deal with arg `keep_name_table` + if len(args) > 1 and isinstance(args[1], bool): + args = list(args) + args[1] = __warn_and_build_configs__(args[1]) + # deal with kwargs + elif 'keep_name_table' in kwargs: + kwargs['config'] = __warn_and_build_configs__(kwargs[ + 'keep_name_table']) + kwargs.pop('keep_name_table') + else: + # do nothing + pass + + return func(*args, **kwargs) + + return wrapper + + @dygraph_only def save_dygraph(state_dict, model_path): ''' @@ -100,17 +134,28 @@ def save_dygraph(state_dict, model_path): # TODO(qingqing01): remove dygraph_only to support loading static model. # maybe need to unify the loading interface after 2.0 API is ready. -#@dygraph_only -def load_dygraph(model_path, keep_name_table=False): +# @dygraph_only +@deprecate_save_load_configs +@deprecate_keep_name_table +def load_dygraph(model_path, config=None): ''' :api_attr: imperative - Load parameter state_dict from disk. + Load parameter state dict from disk. + + .. note:: + Due to some historical reasons, if you load ``state_dict`` from the saved + result of `paddle.io.save_inference_model`, the structured variable name + will cannot be restored. You need to set the argument `use_structured_name=False` + when using `Layer.set_state_dict` later. Args: - model_path(str) : The file prefix store the state_dict. (The path should Not contain suffix '.pdparams') - keep_name_table(bool, optional) : Whether keep structed name to parameter name conversion table in output dict. - Default : False + model_path(str) : The file prefix store the state_dict. + (The path should Not contain suffix '.pdparams') + config (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` + object that specifies additional configuration options, these options + are for compatibility with ``jit.save/io.save_inference_model`` formats. + Default None. Returns: state_dict(dict) : the dict store the state_dict @@ -118,23 +163,27 @@ def load_dygraph(model_path, keep_name_table=False): Examples: .. code-block:: python - import paddle.fluid as fluid + import paddle - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + paddle.disable_static() - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") + emb = paddle.nn.Embedding([10, 10]) - adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000), - parameter_list = emb.parameters() ) - state_dict = adam.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") + state_dict = emb.state_dict() + paddle.save(state_dict, "paddle_dy") - para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy") + scheduler = paddle.optimizer.lr_scheduler.NoamLR( + d_model=0.01, warmup_steps=100, verbose=True) + adam = paddle.optimizer.Adam( + learning_rate=scheduler, + parameters=emb.parameters()) + state_dict = adam.state_dict() + paddle.save(state_dict, "paddle_dy") - ''' + para_state_dict, opti_state_dict = paddle.load("paddle_dy") + ''' + # deal with argument `model_path` model_prefix = model_path if model_prefix.endswith(".pdparams"): model_prefix = model_prefix[:-9] @@ -145,66 +194,45 @@ def load_dygraph(model_path, keep_name_table=False): opti_dict = None params_file_path = model_prefix + ".pdparams" opti_file_path = model_prefix + ".pdopt" + + # deal with argument `configs` + configs = config + if configs is None: + configs = SaveLoadConfig() + if not os.path.exists(params_file_path) and not os.path.exists( opti_file_path): - # Load state dict by `jit.save` save format - # TODO(chenweihang): [Why not support `io.save_infernece_model` save format here] + # Load state dict by `jit.save/io.save_inference_model` save format + # NOTE(chenweihang): [ Compatibility of save_inference_model save format ] # The model saved by `save_inference_model` does not completely correspond to # the information required by the `state_dict` under the dygraph. - # Although we reluctantly restore the `state_dict` in some scenarios, - # this may not be complete and there are some limitations, so this function - # will be considered later. The limitations include: - # 1. `save_inference_model` not save structured name, we need to remind - # the user to configure the `use_structured_name` argument when `set_dict`, - # but this argument is currently not public - # 2. if `save_inference_model` save all persistable variables in a single file, - # user need to give the variable name list to load `state_dict` + # `save_inference_model` not save structured name, we need to remind + # the user to configure the `use_structured_name` argument when `set_state_dict` + # NOTE(chenweihang): `jit.save` doesn't save optimizer state # 1. check model path if not os.path.isdir(model_prefix): raise ValueError("Model saved directory '%s' is not exists." % model_prefix) - # 2. load `__variables.info__` - var_info_path = os.path.join(model_prefix, EXTRA_VAR_INFO_FILENAME) - if not os.path.exists(var_info_path): - raise RuntimeError( - "No target can be loaded. Now only supports loading `state_dict` from " - "the result saved by `imperative.save` and `imperative.jit.save`." - ) - with open(var_info_path, 'rb') as f: - extra_var_info = pickle.load(f) - # 3. load `__variables__` - # TODO(chenweihang): now only supports loading from default save format: - # - all persistable vars saved in one file named `__variables__` - # for other case, we may need to modify the arguments of this API - var_file_path = os.path.join(model_prefix, VARIABLE_FILENAME) - if not os.path.exists(var_file_path): - raise RuntimeError( - "The parameter file to be loaded was not found. " - "Now only supports loading from the default save format, " - "and does not support custom params_filename and " - "save parameters separately.") - # 4. load all persistable vars - load_var_list = [] - for name in sorted(extra_var_info): - var = _varbase_creator(name=name, persistable=True) - load_var_list.append(var) - _dygraph_tracer().trace_op( - type='load_combine', - inputs={}, - outputs={'Out': load_var_list}, - attrs={'file_path': var_file_path}) - # 5. construct state_dict - para_dict = dict() - for var in load_var_list: - structured_name = extra_var_info[var.name].get('structured_name', - None) - if structured_name is None: - raise RuntimeError( - "Cannot find saved variable (%s)'s structured name in saved model.", - var.name) - para_dict[structured_name] = var.numpy() - # NOTE: `jit.save` doesn't save optimizer state + + # 2. load program desc & construct _ProgramHolder + programs = _construct_program_holders(model_path, + configs.model_filename) + + # 3. load layer parameters & buffers + # NOTE: using fluid.dygraph.guard() here will cause import error in py2 + with guard(): + persistable_var_dict = _construct_params_and_buffers( + model_prefix, + programs, + configs.separate_params, + configs.params_filename, + append_suffix=False) + + # 4. construct state_dict + para_dict = dict() + for var_name in persistable_var_dict: + para_dict[var_name] = persistable_var_dict[var_name].numpy() else: # Load state dict by `save_dygraph` save format para_dict = {} @@ -213,7 +241,7 @@ def load_dygraph(model_path, keep_name_table=False): para_dict = pickle.load(f) if six.PY2 else pickle.load( f, encoding='latin1') - if not keep_name_table and "StructuredToParameterName@@" in para_dict: + if not configs.keep_name_table and "StructuredToParameterName@@" in para_dict: del para_dict["StructuredToParameterName@@"] if os.path.exists(opti_file_path): diff --git a/python/paddle/fluid/dygraph/io.py b/python/paddle/fluid/dygraph/io.py index 7f3d450a49c7d3fcc9ca1d3c2d7c5eb732671c6c..7c17bb07c0c24a15bd5faf93ab1cfafef83b0d6e 100644 --- a/python/paddle/fluid/dygraph/io.py +++ b/python/paddle/fluid/dygraph/io.py @@ -488,6 +488,15 @@ def _load_persistable_vars(model_path, return load_var_dict +# NOTE(chenweihang): to adapt paddle.load to get state_dict +def _remove_varname_suffix(var_dict, program_holder): + no_suffix_var_dict = dict() + for var_name in var_dict: + no_suffix_name = program_holder._suffix_varname_dict[var_name] + no_suffix_var_dict[no_suffix_name] = var_dict[var_name] + return no_suffix_var_dict + + def _construct_program_holders(model_path, model_filename=None): # make sure the path has been checked program_holder_dict = dict() @@ -517,7 +526,8 @@ def _construct_program_holders(model_path, model_filename=None): def _construct_params_and_buffers(model_path, programs, separate_params=False, - params_filename=None): + params_filename=None, + append_suffix=True): var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME) if os.path.exists(var_info_path): var_dict = _load_persistable_vars(model_path, var_info_path, @@ -526,6 +536,10 @@ def _construct_params_and_buffers(model_path, else: var_dict = _load_persistable_vars_by_program( model_path, programs['forward'], params_filename) + + if not append_suffix: + var_dict = _remove_varname_suffix(var_dict, programs['forward']) + return var_dict @@ -685,7 +699,7 @@ class TranslatedLayer(layers.Layer): # 1. load program desc & construct _ProgramHolder programs = _construct_program_holders(model_path, model_filename) - # 2. load layer parameters & parameter attributes + # 2. load layer parameters & buffers persistable_vars = _construct_params_and_buffers( model_path, programs, separate_params, params_filename) diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py index f67b79b91f7da235697d920cf0dfe376e88ab93e..d520fe61888cf3b11efc61d67ce566a3407dc6ff 100644 --- a/python/paddle/fluid/dygraph/jit.py +++ b/python/paddle/fluid/dygraph/jit.py @@ -17,6 +17,7 @@ from __future__ import print_function import os import pickle import warnings +import functools import six import paddle @@ -228,63 +229,60 @@ class SaveLoadConfig(object): .. code-block:: python - import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - class SimpleNet(fluid.dygraph.Layer): + class SimpleNet(nn.Layer): def __init__(self, in_size, out_size): super(SimpleNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(in_size, out_size) - @declarative + @paddle.jit.to_static def forward(self, x): y = self._linear(x) z = self._linear(y) return z # enable dygraph mode - fluid.enable_dygraph() + paddle.disable_static() # train model net = SimpleNet(8, 8) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + adam = opt.Adam(learning_rate=0.1, parameters=net.parameters()) + x = paddle.randn([4, 8], 'float32') for i in range(10): out = net(x) - loss = fluid.layers.mean(out) + loss = paddle.tensor.mean(out) loss.backward() - adam.minimize(loss) - net.clear_gradients() + adam.step() + adam.clear_grad() # use SaveLoadconfig when saving model model_path = "simplenet.example.model" - configs = fluid.dygraph.jit.SaveLoadConfig() - configs.model_filename = "__simplenet__" - fluid.dygraph.jit.save( + config = paddle.SaveLoadConfig() + config.model_filename = "__simplenet__" + paddle.jit.save( layer=net, model_path=model_path, - input_spec=[x], - configs=configs) + config=config) 2. Using ``SaveLoadConfig`` when loading model .. code-block:: python - import numpy as np - import paddle.fluid as fluid + import paddle # enable dygraph mode - fluid.enable_dygraph() + paddle.disable_static() # use SaveLoadconfig when loading model model_path = "simplenet.example.model" - configs = fluid.dygraph.jit.SaveLoadConfig() - configs.model_filename = "__simplenet__" - infer_net = fluid.dygraph.jit.load(model_path, configs=configs) + config = paddle.SaveLoadConfig() + config.model_filename = "__simplenet__" + infer_net = paddle.jit.load(model_path, config=config) # inference - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + x = paddle.randn([4, 8], 'float32') pred = infer_net(x) """ @@ -293,6 +291,8 @@ class SaveLoadConfig(object): self._model_filename = None self._params_filename = None self._separate_params = False + # used for `paddle.load` + self._keep_name_table = False # NOTE: Users rarely use following configs, so these configs are not open to users, # reducing user learning costs, but we retain the configuration capabilities @@ -322,51 +322,46 @@ class SaveLoadConfig(object): Examples: .. code-block:: python - import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - class SimpleNet(fluid.dygraph.Layer): + class SimpleNet(nn.Layer): def __init__(self, in_size, out_size): super(SimpleNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(in_size, out_size) - @declarative + @paddle.jit.to_static def forward(self, x): y = self._linear(x) z = self._linear(y) - loss = fluid.layers.mean(z) + loss = paddle.tensor.mean(z) return z, loss # enable dygraph mode - fluid.enable_dygraph() + paddle.disable_static() # train model net = SimpleNet(8, 8) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + adam = opt.Adam(learning_rate=0.1, parameters=net.parameters()) + x = paddle.randn([4, 8], 'float32') for i in range(10): out, loss = net(x) loss.backward() - adam.minimize(loss) - net.clear_gradients() + adam.step() + adam.clear_grad() # use SaveLoadconfig.output_spec model_path = "simplenet.example.model.output_spec" - configs = fluid.dygraph.jit.SaveLoadConfig() - # only keep the predicted output in saved model, discard loss - configs.output_spec = [out] - - fluid.dygraph.jit.save( + config = paddle.SaveLoadConfig() + config.output_spec = [out] + paddle.jit.save( layer=net, model_path=model_path, - input_spec=[x], - configs=configs) + config=config) - infer_net = fluid.dygraph.jit.load(model_path, configs=configs) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) - # only have the predicted output + infer_net = paddle.jit.load(model_path) + x = paddle.randn([4, 8], 'float32') pred = infer_net(x) """ return self._output_spec @@ -393,52 +388,47 @@ class SaveLoadConfig(object): Examples: .. code-block:: python - import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - class SimpleNet(fluid.dygraph.Layer): + class SimpleNet(nn.Layer): def __init__(self, in_size, out_size): super(SimpleNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(in_size, out_size) - @declarative + @paddle.jit.to_static def forward(self, x): y = self._linear(x) z = self._linear(y) return z # enable dygraph mode - fluid.enable_dygraph() + paddle.disable_static() # train model net = SimpleNet(8, 8) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + adam = opt.Adam(learning_rate=0.1, parameters=net.parameters()) + x = paddle.randn([4, 8], 'float32') for i in range(10): out = net(x) - loss = fluid.layers.mean(out) + loss = paddle.tensor.mean(out) loss.backward() - adam.minimize(loss) - net.clear_gradients() - - model_path = "simplenet.example.model.model_filename" - configs = fluid.dygraph.jit.SaveLoadConfig() - configs.model_filename = "__simplenet__" + adam.step() + adam.clear_grad() # saving with configs.model_filename - fluid.dygraph.jit.save( + model_path = "simplenet.example.model.model_filename" + config = paddle.SaveLoadConfig() + config.model_filename = "__simplenet__" + paddle.jit.save( layer=net, model_path=model_path, - input_spec=[x], - configs=configs) - # [result] the saved model directory contains: - # __simplenet__ __variables__ __variables.info__ + config=config) # loading with configs.model_filename - infer_net = fluid.dygraph.jit.load(model_path, configs=configs) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + infer_net = paddle.jit.load(model_path, config=config) + x = paddle.randn([4, 8], 'float32') pred = infer_net(x) """ return self._model_filename @@ -463,52 +453,48 @@ class SaveLoadConfig(object): Examples: .. code-block:: python - import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - class SimpleNet(fluid.dygraph.Layer): + class SimpleNet(nn.Layer): def __init__(self, in_size, out_size): super(SimpleNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(in_size, out_size) - @declarative + @paddle.jit.to_static def forward(self, x): y = self._linear(x) z = self._linear(y) return z # enable dygraph mode - fluid.enable_dygraph() + paddle.disable_static() # train model net = SimpleNet(8, 8) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + adam = opt.Adam(learning_rate=0.1, parameters=net.parameters()) + x = paddle.randn([4, 8], 'float32') for i in range(10): out = net(x) - loss = fluid.layers.mean(out) + loss = paddle.tensor.mean(out) loss.backward() - adam.minimize(loss) - net.clear_gradients() + adam.step() + adam.clear_grad() model_path = "simplenet.example.model.params_filename" - configs = fluid.dygraph.jit.SaveLoadConfig() - configs.params_filename = "__params__" + config = paddle.SaveLoadConfig() + config.params_filename = "__params__" # saving with configs.params_filename - fluid.dygraph.jit.save( + paddle.jit.save( layer=net, model_path=model_path, - input_spec=[x], - configs=configs) - # [result] the saved model directory contains: - # __model__ __params__ __variables.info__ + config=config) # loading with configs.params_filename - infer_net = fluid.dygraph.jit.load(model_path, configs=configs) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + infer_net = paddle.jit.load(model_path, config=config) + x = paddle.randn([4, 8], 'float32') pred = infer_net(x) """ return self._params_filename @@ -542,52 +528,50 @@ class SaveLoadConfig(object): Examples: .. code-block:: python - import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - class SimpleNet(fluid.dygraph.Layer): + class SimpleNet(nn.Layer): def __init__(self, in_size, out_size): super(SimpleNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(in_size, out_size) - @declarative + @paddle.jit.to_static def forward(self, x): y = self._linear(x) z = self._linear(y) return z # enable dygraph mode - fluid.enable_dygraph() + paddle.disable_static() # train model net = SimpleNet(8, 8) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + adam = opt.Adam(learning_rate=0.1, parameters=net.parameters()) + x = paddle.randn([4, 8], 'float32') for i in range(10): out = net(x) - loss = fluid.layers.mean(out) + loss = paddle.tensor.mean(out) loss.backward() - adam.minimize(loss) - net.clear_gradients() + adam.step() + adam.clear_grad() model_path = "simplenet.example.model.separate_params" - configs = fluid.dygraph.jit.SaveLoadConfig() - configs.separate_params = True + config = paddle.jit.SaveLoadConfig() + config.separate_params = True # saving with configs.separate_params - fluid.dygraph.jit.save( + paddle.jit.save( layer=net, model_path=model_path, - input_spec=[x], - configs=configs) + config=config) # [result] the saved model directory contains: # linear_0.b_0 linear_0.w_0 __model__ __variables.info__ # loading with configs.params_filename - infer_net = fluid.dygraph.jit.load(model_path, configs=configs) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + infer_net = paddle.jit.load(model_path, config=config) + x = paddle.randn([4, 8], 'float32') pred = infer_net(x) """ return self._separate_params @@ -600,9 +584,70 @@ class SaveLoadConfig(object): % type(value)) self._separate_params = value + @property + def keep_name_table(self): + """ + Configures whether keep ``structured_name -> parameter_name`` dict in loaded state dict. + This dict is the debugging information saved when call `paddle.save`. + It is generally only used for debugging and does not affect the actual training or inference. + By default, it will not be retained in `paddle.load` result. Default: False. + + .. note:: + Only used for ``paddle.load``. + + Examples: + .. code-block:: python + + import paddle + + paddle.disable_static() + + linear = paddle.nn.Linear(5, 1) + + state_dict = linear.state_dict() + paddle.save(state_dict, "paddle_dy") + + configs = paddle.SaveLoadConfig() + configs.keep_name_table = True + para_state_dict, _ = paddle.load("paddle_dy", configs) + + print(para_state_dict) + # the name_table is 'StructuredToParameterName@@' + # {'bias': array([0.], dtype=float32), + # 'StructuredToParameterName@@': + # {'bias': u'linear_0.b_0', 'weight': u'linear_0.w_0'}, + # 'weight': array([[ 0.04230034], + # [-0.1222527 ], + # [ 0.7392676 ], + # [-0.8136974 ], + # [ 0.01211023]], dtype=float32)} + """ + return self._keep_name_table + + @keep_name_table.setter + def keep_name_table(self, value): + if not isinstance(value, bool): + raise TypeError( + "The SaveLoadConfig.keep_name_table should be bool value, but received input's type is %s." + % type(value)) + self._keep_name_table = value + +# NOTE(chenweihang): change jit.save/load argument `configs` to `config` +def deprecate_save_load_configs(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if 'configs' in kwargs: + kwargs['config'] = kwargs['configs'] + kwargs.pop('configs') + return func(*args, **kwargs) + + return wrapper + + +@deprecate_save_load_configs @switch_to_static_graph -def save(layer, model_path, input_spec=None, configs=None): +def save(layer, model_path, input_spec=None, config=None): """ Saves input declarative Layer as :ref:`api_imperative_TranslatedLayer` format model, which can be used for inference or fine-tuning after loading. @@ -627,7 +672,7 @@ def save(layer, model_path, input_spec=None, configs=None): It is the example inputs that will be passed to saved TranslatedLayer's forward function. If None, all input variables of the original Layer's forward function would be the inputs of the saved model. Default None. - configs (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object + config (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object that specifies additional configuration options. Default None. Returns: None @@ -636,65 +681,76 @@ def save(layer, model_path, input_spec=None, configs=None): .. code-block:: python import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - BATCH_SIZE = 32 - BATCH_NUM = 20 + BATCH_SIZE = 16 + BATCH_NUM = 4 + EPOCH_NUM = 4 - def random_batch_reader(): - def _get_random_images_and_labels(image_shape, label_shape): - image = np.random.random(size=image_shape).astype('float32') - label = np.random.random(size=label_shape).astype('int64') - return image, label + IMAGE_SIZE = 784 + CLASS_NUM = 10 - def __reader__(): - for _ in range(BATCH_NUM): - batch_image, batch_label = _get_random_images_and_labels( - [BATCH_SIZE, 784], [BATCH_SIZE, 1]) - yield batch_image, batch_label + # define a random dataset + class RandomDataset(paddle.io.Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples - return __reader__ + def __getitem__(self, idx): + image = np.random.random([IMAGE_SIZE]).astype('float32') + label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64') + return image, label - class LinearNet(fluid.dygraph.Layer): - def __init__(self, in_size, out_size): + def __len__(self): + return self.num_samples + + class LinearNet(nn.Layer): + def __init__(self): super(LinearNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) - @declarative + @paddle.jit.to_static def forward(self, x): return self._linear(x) + def train(layer, loader, loss_fn, opt): + for epoch_id in range(EPOCH_NUM): + for batch_id, (image, label) in enumerate(loader()): + out = layer(image) + loss = loss_fn(out, label) + loss.backward() + opt.step() + opt.clear_grad() + print("Epoch {} batch {}: loss = {}".format( + epoch_id, batch_id, np.mean(loss.numpy()))) + # enable dygraph mode - fluid.enable_dygraph() + place = paddle.CPUPlace() + paddle.disable_static(place) - # create network - net = LinearNet(784, 1) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) - # create data loader - train_loader = fluid.io.DataLoader.from_generator(capacity=5) - train_loader.set_batch_generator(random_batch_reader()) - # train - for data in train_loader(): - img, label = data - label.stop_gradient = True + # 1. train & save model. - cost = net(img) + # create network + layer = LinearNet() + loss_fn = nn.CrossEntropyLoss() + adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters()) - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) + # create data loader + dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) + loader = paddle.io.DataLoader(dataset, + places=place, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) - avg_loss.backward() - adam.minimize(avg_loss) - net.clear_gradients() + # train + train(layer, loader, loss_fn, adam) - # save model + # save model_path = "linear.example.model" - fluid.dygraph.jit.save( - layer=net, - model_path=model_path, - input_spec=[img]) + paddle.jit.save(layer, model_path) """ def get_inout_spec(all_vars, target_vars, return_name=False): @@ -728,6 +784,7 @@ def save(layer, model_path, input_spec=None, configs=None): "The input layer of paddle.jit.save should be 'Layer', but received layer type is %s." % type(layer)) + configs = config if configs is None: configs = SaveLoadConfig() @@ -819,8 +876,9 @@ def save(layer, model_path, input_spec=None, configs=None): pickle.dump(extra_var_info, f, protocol=2) +@deprecate_save_load_configs @dygraph_only -def load(model_path, configs=None): +def load(model_path, config=None): """ :api_attr: imperative @@ -837,7 +895,7 @@ def load(model_path, configs=None): Args: model_path (str): The directory path where the model is saved. - configs (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object that specifies + config (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object that specifies additional configuration options. Default None. Returns: @@ -849,122 +907,126 @@ def load(model_path, configs=None): .. code-block:: python import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - BATCH_SIZE = 32 - BATCH_NUM = 20 + BATCH_SIZE = 16 + BATCH_NUM = 4 + EPOCH_NUM = 4 - def random_batch_reader(): - def _get_random_images_and_labels(image_shape, label_shape): - image = np.random.random(size=image_shape).astype('float32') - label = np.random.random(size=label_shape).astype('int64') - return image, label + IMAGE_SIZE = 784 + CLASS_NUM = 10 - def __reader__(): - for _ in range(BATCH_NUM): - batch_image, batch_label = _get_random_images_and_labels( - [BATCH_SIZE, 784], [BATCH_SIZE, 1]) - yield batch_image, batch_label + # define a random dataset + class RandomDataset(paddle.io.Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples - return __reader__ + def __getitem__(self, idx): + image = np.random.random([IMAGE_SIZE]).astype('float32') + label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64') + return image, label - class LinearNet(fluid.dygraph.Layer): - def __init__(self, in_size, out_size): + def __len__(self): + return self.num_samples + + class LinearNet(nn.Layer): + def __init__(self): super(LinearNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) - @declarative + @paddle.jit.to_static def forward(self, x): return self._linear(x) + def train(layer, loader, loss_fn, opt): + for epoch_id in range(EPOCH_NUM): + for batch_id, (image, label) in enumerate(loader()): + out = layer(image) + loss = loss_fn(out, label) + loss.backward() + opt.step() + opt.clear_grad() + print("Epoch {} batch {}: loss = {}".format( + epoch_id, batch_id, np.mean(loss.numpy()))) + # enable dygraph mode - fluid.enable_dygraph() + place = paddle.CPUPlace() + paddle.disable_static(place) # 1. train & save model. + # create network - net = LinearNet(784, 1) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) + layer = LinearNet() + loss_fn = nn.CrossEntropyLoss() + adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters()) + # create data loader - train_loader = fluid.io.DataLoader.from_generator(capacity=5) - train_loader.set_batch_generator(random_batch_reader()) + dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) + loader = paddle.io.DataLoader(dataset, + places=place, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) + # train - for data in train_loader(): - img, label = data - label.stop_gradient = True + train(layer, loader, loss_fn, adam) - cost = net(img) + # save + model_path = "linear.example.model" + paddle.jit.save(layer, model_path) - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) + # 2. load model - avg_loss.backward() - adam.minimize(avg_loss) - net.clear_gradients() + # load + loaded_layer = paddle.jit.load(model_path) - model_path = "linear.example.model" - fluid.dygraph.jit.save( - layer=net, - model_path=model_path, - input_spec=[img]) - - # 2. load model & inference - # load model - infer_net = fluid.dygraph.jit.load(model_path) # inference - x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32')) - pred = infer_net(x) + loaded_layer.eval() + x = paddle.randn([1, IMAGE_SIZE], 'float32') + pred = loaded_layer(x) - # 3. load model & fine-tune - # load model - train_net = fluid.dygraph.jit.load(model_path) - train_net.train() - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=train_net.parameters()) - # create data loader - train_loader = fluid.io.DataLoader.from_generator(capacity=5) - train_loader.set_batch_generator(random_batch_reader()) # fine-tune - for data in train_loader(): - img, label = data - label.stop_gradient = True - - cost = train_net(img) + loaded_layer.train() + adam = opt.Adam(learning_rate=0.001, parameters=loaded_layer.parameters()) + train(loaded_layer, loader, loss_fn, adam) - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) - - avg_loss.backward() - adam.minimize(avg_loss) - train_net.clear_gradients() 2. Load model saved by :ref:`api_fluid_io_save_inference_model` then performing and fine-tune training. .. code-block:: python import numpy as np + import paddle import paddle.fluid as fluid + import paddle.nn as nn + import paddle.optimizer as opt - BATCH_SIZE = 32 - BATCH_NUM = 20 + BATCH_SIZE = 16 + BATCH_NUM = 4 + EPOCH_NUM = 4 - def random_batch_reader(): - def _get_random_images_and_labels(image_shape, label_shape): - image = np.random.random(size=image_shape).astype('float32') - label = np.random.random(size=label_shape).astype('int64') - return image, label + IMAGE_SIZE = 784 + CLASS_NUM = 10 - def __reader__(): - for _ in range(BATCH_NUM): - batch_image, batch_label = _get_random_images_and_labels( - [BATCH_SIZE, 784], [BATCH_SIZE, 1]) - yield batch_image, batch_label + # define a random dataset + class RandomDataset(paddle.io.Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples - return __reader__ + def __getitem__(self, idx): + image = np.random.random([IMAGE_SIZE]).astype('float32') + label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64') + return image, label - img = fluid.data(name='img', shape=[None, 784], dtype='float32') + def __len__(self): + return self.num_samples + + image = fluid.data(name='image', shape=[None, 784], dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') - pred = fluid.layers.fc(input=img, size=10, act='softmax') + pred = fluid.layers.fc(input=image, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=pred, label=label) avg_loss = fluid.layers.mean(loss) @@ -975,9 +1037,15 @@ def load(model_path, configs=None): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - loader = fluid.io.DataLoader.from_generator( - feed_list=[img, label], capacity=5, iterable=True) - loader.set_batch_generator(random_batch_reader(), places=place) + # create data loader + dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) + loader = paddle.io.DataLoader(dataset, + feed_list=[image, label], + places=place, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) # 1. train and save inference model for data in loader(): @@ -988,39 +1056,42 @@ def load(model_path, configs=None): model_path = "fc.example.model" fluid.io.save_inference_model( - model_path, ["img"], [pred], exe) + model_path, ["image"], [pred], exe) + + # 2. load model # enable dygraph mode - fluid.enable_dygraph() + paddle.disable_static(place) + + # load + fc = paddle.jit.load(model_path) - # 2. load model & inference - fc = fluid.dygraph.jit.load(model_path) - x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32')) + # inference + fc.eval() + x = paddle.randn([1, IMAGE_SIZE], 'float32') pred = fc(x) - # 3. load model & fine-tune - fc = fluid.dygraph.jit.load(model_path) + # fine-tune fc.train() - sgd = fluid.optimizer.SGD(learning_rate=0.001, - parameter_list=fc.parameters()) - - train_loader = fluid.io.DataLoader.from_generator(capacity=5) - train_loader.set_batch_generator( - random_batch_reader(), places=place) - - for data in train_loader(): - img, label = data - label.stop_gradient = True - - cost = fc(img) - - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) - - avg_loss.backward() - sgd.minimize(avg_loss) + loss_fn = nn.CrossEntropyLoss() + adam = opt.Adam(learning_rate=0.001, parameters=fc.parameters()) + loader = paddle.io.DataLoader(dataset, + places=place, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) + for epoch_id in range(EPOCH_NUM): + for batch_id, (image, label) in enumerate(loader()): + out = fc(image) + loss = loss_fn(out, label) + loss.backward() + adam.step() + adam.clear_grad() + print("Epoch {} batch {}: loss = {}".format( + epoch_id, batch_id, np.mean(loss.numpy()))) """ - return TranslatedLayer._construct(model_path, configs) + return TranslatedLayer._construct(model_path, config) @dygraph_only diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index 1ef719b9da187be659d9c898ec996b5ad0c0d8a6..7075024369f328b59ecac014b0960fc26f447ff2 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -29,6 +29,9 @@ from .layer_object_helper import LayerObjectHelper from .base import program_desc_tracing_guard, param_guard from paddle.fluid import framework from ..param_attr import ParamAttr +from paddle.fluid.executor import Executor, global_scope +from paddle.fluid.framework import in_dygraph_mode +from paddle.fluid.framework import _current_expected_place as _get_device __all__ = ['Layer'] @@ -797,7 +800,7 @@ class Layer(core.Layer): raise ValueError( "super(YourLayer, self).__init__() should be called first") if len(self._loaddict_holder) > 0: - assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format( + assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in state_dict".format( value.name) value.set_value(self._loaddict_holder[value.name]) @@ -943,12 +946,13 @@ class Layer(core.Layer): destination = destination_temp return destination - def set_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): + @framework.deprecate_stat_dict + def set_state_dict(self, + state_dict, + include_sublayers=True, + use_structured_name=True): ''' - Set parameters and persistable buffers from stat_dict. All the parameters and buffers will be reset by the tensor in the stat_dict + Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict Parameters: state_dict(dict) : Dict contains all the parameters and persistable buffers. @@ -961,72 +965,67 @@ class Layer(core.Layer): Examples: .. code-block:: python - import paddle.fluid as fluid - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + import paddle + + paddle.disable_static() + + emb = paddle.nn.Embedding([10, 10]) - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") - - emb.set_dict( para_state_dict ) + state_dict = emb.state_dict() + paddle.save(state_dict, "paddle_dy") + + para_state_dict, _ = paddle.load("paddle_dy") - ''' - self.load_dict( - stat_dict, - include_sublayers=include_sublayers, - use_structured_name=use_structured_name) + emb.set_state_dict(para_state_dict) - def load_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): ''' - Set parameters and persistable buffers from stat_dict. All the parameters and persistabl buffers will be reset by the tensor in the stat_dict - This api will be Deprecated. Please use set_dict - - Parameters: - state_dict(dict) : Dict contains all the parameters and persistable buffers. - include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True - use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key. - Default: True - Returns: - None - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) - - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") - - emb.load_dict( para_state_dict ) - - ''' - - inner_state_dict = self.state_dict() + def _check_match(key, param): + state = state_dict.get(key, None) + if state is None: + raise ValueError("{} is not found in the provided dict.".format( + key)) + if list(state.shape) != list(param.shape): + raise ValueError( + "{} receives a shape {}, but the expected shape is {}.". + format(key, list(state.shape), list(param.shape))) + return param, state + + matched_param_state = [] + for key, param in self.state_dict().items(): + key_name = key if use_structured_name else param.name + try: + match_res = _check_match(key_name, param) + matched_param_state.append(match_res) + except ValueError as err: + warnings.warn(("Skip loading for {}. ".format(key) + str(err))) + + if in_dygraph_mode(): + for param, state in matched_param_state: + param.set_value(state) + else: - for name, param_or_buffer in inner_state_dict.items(): - key_name = name if use_structured_name else param_or_buffer.name - if key_name in stat_dict: - param_or_buffer.set_value(stat_dict[key_name]) - else: - raise RuntimeError( - "Parameter or persistable buffer not found, Can't find [ {} ] in stat_dict" - "use_structured_name is set to [{}]".format( - key_name, use_structured_name)) - unused_para_list = [] - for k, v in stat_dict.items(): - if k not in inner_state_dict: - unused_para_list.append(k) - if len(unused_para_list) > 0: - warnings.warn( - "Variables [ {} ] are not used, because not included in layers state_dict". - format(" ".join(unused_para_list))) + def _set_var(var, ndarray): + t = global_scope().find_var(var.name).get_tensor() + p = t._place() + if p.is_cpu_place(): + place = core.CPUPlace() + elif p.is_cuda_pinned_place(): + place = core.CUDAPinnedPlace() + else: + p = core.Place() + p.set_place(t._place()) + place = core.CUDAPlace(p.gpu_device_id()) + t.set(ndarray, place) + + executor = Executor(_get_device())._default_executor + # restore parameter states + core._create_loaded_parameter( + [param for param, state in matched_param_state], + global_scope(), executor) + for param, state in matched_param_state: + _set_var(param, state) + + # [aliases] Compatible with old method names + set_dict = set_state_dict + load_dict = set_state_dict diff --git a/python/paddle/fluid/dygraph/learning_rate_scheduler.py b/python/paddle/fluid/dygraph/learning_rate_scheduler.py index cce383be7e22cd066199f814db80a75367862b82..cd6af6fd5b575e8188088bde9e8944ab94c7e0f8 100644 --- a/python/paddle/fluid/dygraph/learning_rate_scheduler.py +++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py @@ -97,7 +97,7 @@ class LearningRateDecay(object): """ self.keys = ['step_num'] - def set_dict(self, state_dict): + def set_state_dict(self, state_dict): """ Loads the schedulers state. """ @@ -114,6 +114,9 @@ class LearningRateDecay(object): "There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict" ) + # [aliases] Compatible with old method names + set_dict = set_state_dict + def step(self): raise NotImplementedError() diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index 5ecc713ddcace7a6bed05ffa4282d9f5c1041a44..472022bced7e3e2dd11d301501ebaec75e5e412a 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -587,12 +587,13 @@ class DataParallel(layers.Layer): include_sublayers=include_sublayers, structured_name_prefix=structured_name_prefix) - def set_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): + @framework.deprecate_stat_dict + def set_state_dict(self, + state_dict, + include_sublayers=True, + use_structured_name=True): ''' - Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict + Set parameters of self._layers from state_dict. All the parameters of self._layers will be reset by the tensor in the state_dict Parameters: state_dict(dict) : Dict contains all the parameters @@ -605,62 +606,27 @@ class DataParallel(layers.Layer): Examples: .. code-block:: python - import paddle.fluid as fluid - with fluid.dygraph.guard(): - strategy=fluid.dygraph.prepare_context() - emb = fluid.dygraph.Embedding([10, 10]) - emb = fluid.dygraph.DataParallel(emb, strategy) - - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") - - emb.set_dict( para_state_dict ) + import paddle - ''' - - self._layers.set_dict( - stat_dict, - include_sublayers=include_sublayers, - use_structured_name=use_structured_name) - - def load_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): - ''' - Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict - - This api will be Deprecated. Please use set_dict - - Parameters: - state_dict(dict) : Dict contains all the parameters - include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True - use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. - Default: True - Returns: - None + paddle.disable_static() - Examples: - .. code-block:: python + emb = paddle.nn.Embedding([10, 10]) + emb = fluid.dygraph.DataParallel(emb, strategy) - import paddle.fluid as fluid - with fluid.dygraph.guard(): - strategy=fluid.dygraph.prepare_context() - emb = fluid.dygraph.Embedding([10, 10]) - emb = fluid.dygraph.DataParallel(emb, strategy) + state_dict = emb.state_dict() + paddle.save(state_dict, "paddle_dy") - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") + para_state_dict, _ = paddle.load("paddle_dy") - emb.load_dict( para_state_dict ) + emb.set_state_dict(para_state_dict) ''' - self._layers.load_dict( - stat_dict, + self._layers.set_state_dict( + state_dict, include_sublayers=include_sublayers, use_structured_name=use_structured_name) + + # [aliases] Compatible with old method names + set_dict = set_state_dict + load_dict = set_state_dict diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index fc4e91aad4fff1db325e17828d26ccd94c164c3d..5281df9ead10acea5ae8656dcc4a0eed14fb3e83 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -36,6 +36,7 @@ from . import core from . import unique_name import paddle.version as fluid_version import warnings +import functools __all__ = [ 'Program', @@ -238,6 +239,25 @@ def _fake_interface_only_(func): return __impl__ +# NOTE(chenweihang): There is argument name typo (stat_dict, correct name is state_dict) +# in fluid api Layer.set_dict, Optimizer.load, in order to correct the argument without +# introducing compatibility issues, add this decorator +# NOTE(chenweihang): not using `wrap_decorator` here is because `wrap_decorator` will +# move kwargs to args, which doesn't work in this decorate case +def deprecate_stat_dict(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if 'stat_dict' in kwargs: + warnings.warn( + "The argument `stat_dict` has deprecated, please change it to `state_dict`.", + DeprecationWarning) + kwargs['state_dict'] = kwargs['stat_dict'] + kwargs.pop('stat_dict') + return func(*args, **kwargs) + + return wrapper + + dygraph_not_support = wrap_decorator(_dygraph_not_support_) dygraph_only = wrap_decorator(_dygraph_only_) fake_interface_only = wrap_decorator(_fake_interface_only_) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 9e2d77df777d761b6904d8916c7a35fb8e6bfaba..8b37cfef3890eace0ff5141eeb91d85e78f1c964 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -170,7 +170,7 @@ class Optimizer(object): return state_dict @framework.dygraph_only - def set_dict(self, state_dict): + def set_state_dict(self, state_dict): ''' Load optimizer state dict. For Adam optimizer, contains beta1, beta2, momentum etc. If LearningRateDecay have been used, global_step will be changed. @@ -182,20 +182,22 @@ class Optimizer(object): Examples: .. code-block:: python - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + import paddle + + paddle.disable_static() + + emb = paddle.nn.Embedding([10, 10]) - state_dict = emb.state_dict() - fluid.save_dygraph(state_dict, "paddle_dy") + state_dict = emb.state_dict() + paddle.save(state_dict, "paddle_dy") - adam = fluid.optimizer.Adam(learning_rate=fluid.layers.noam_decay( 100, 10000), + adam = paddle.optimizer.Adam(learning_rate=fluid.layers.noam_decay( 100, 10000), parameter_list=emb.parameters()) - state_dict = adam.state_dict() - fluid.save_dygraph(state_dict, "paddle_dy") + state_dict = adam.state_dict() - para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy") + para_state_dict, opti_state_dict = paddle.load("paddle_dy") - adam.set_dict(opti_state_dict) + adam.set_state_dict(opti_state_dict) ''' from paddle.optimizer.lr_scheduler import _LRScheduler @@ -257,6 +259,9 @@ class Optimizer(object): tensor.set(load_para_np, framework._current_expected_place()) + # [aliases] Compatible with old method names + set_dict = set_state_dict + def get_opti_var_name_list(self): return self._opti_name_list @@ -4595,15 +4600,16 @@ class RecomputeOptimizer(Optimizer): ), "_checkpoints should be a list of Variable or a list of String" self._checkpoints = checkpoints - def load(self, stat_dict): + @framework.deprecate_stat_dict + def load(self, state_dict): """ - :api_attr: Static Graph + :api_attr: Static Graph load function is not supported by Recompute Optimizer for now. :return: None Args: - stat_dict: the dict load by load_persistable method + state_dict: the dict load by load_persistable method Examples: .. code-block:: python @@ -4627,8 +4633,8 @@ class RecomputeOptimizer(Optimizer): sgd = fluid.optimizer.RecomputeOptimizer(sgd) sgd._set_checkpoints([fc_1, pred]) try: - stat_dict = {} - sgd.load(stat_dict) + state_dict = {} + sgd.load(state_dict) except NotImplementedError as e: print(cpt.get_exception_message(e)) """ diff --git a/python/paddle/fluid/tests/unittests/test_directory_migration.py b/python/paddle/fluid/tests/unittests/test_directory_migration.py index 2919ec5e9ca97b1d59af46a54b2d702cb6de4a14..529fff158c55fc30248b9f5a88c8c615a8b55c79 100644 --- a/python/paddle/fluid/tests/unittests/test_directory_migration.py +++ b/python/paddle/fluid/tests/unittests/test_directory_migration.py @@ -43,7 +43,7 @@ class TestDirectory(unittest.TestCase): 'paddle.distributed.prepare_context', 'paddle.DataParallel', 'paddle.jit', 'paddle.jit.TracedLayer', 'paddle.jit.to_static', 'paddle.jit.ProgramTranslator', 'paddle.jit.TranslatedLayer', - 'paddle.jit.save', 'paddle.jit.load', 'paddle.jit.SaveLoadConfig', + 'paddle.jit.save', 'paddle.jit.load', 'paddle.SaveLoadConfig', 'paddle.NoamDecay', 'paddle.PiecewiseDecay', 'paddle.NaturalExpDecay', 'paddle.ExponentialDecay', 'paddle.InverseTimeDecay', 'paddle.PolynomialDecay', diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index 48aea3a584dd25667704b22d99d1074c481bb76c..22e19efcb58d19c41835565de2c8c01fe253702a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -374,8 +374,7 @@ class TestDygraphPtbRnn(unittest.TestCase): adam._learning_rate.step_num = 0 para_state_dict, opti_state_dict = paddle.load("./test_dy") - print(opti_state_dict['LR_Scheduler']) - adam.set_dict(opti_state_dict) + adam.set_state_dict(opti_state_dict) opti_dict = adam.state_dict() for k, v in opti_dict.items(): @@ -393,7 +392,7 @@ class TestDygraphPtbRnn(unittest.TestCase): var.set(np.zeros_like(np_t), place) - ptb_model.set_dict(para_state_dict) + ptb_model.set_state_dict(stat_dict=para_state_dict) state_dict = ptb_model.state_dict() @@ -483,7 +482,7 @@ class TestDygraphPtbRnn(unittest.TestCase): if isinstance(adam._learning_rate, LearningRateDecay): adam._learning_rate.step_num = 0 - adam.set_dict(self.opti_dict) + adam.set_state_dict(self.opti_dict) opti_dict = adam.state_dict() for k, v in opti_dict.items(): if isinstance(v, core.VarBase): @@ -500,7 +499,7 @@ class TestDygraphPtbRnn(unittest.TestCase): var.set(np.zeros_like(np_t), place) - ptb_model.set_dict(self.state_dict) + ptb_model.set_state_dict(self.state_dict) state_dict = ptb_model.state_dict() @@ -593,7 +592,7 @@ class TestDygraphPtbRnn(unittest.TestCase): if isinstance(adam._learning_rate, LearningRateDecay): adam._learning_rate.step_num = 0 - adam.set_dict(np_opti_dict) + adam.set_state_dict(np_opti_dict) opti_dict = adam.state_dict() for k, v in opti_dict.items(): @@ -613,7 +612,7 @@ class TestDygraphPtbRnn(unittest.TestCase): var.set(np.zeros_like(np_t), place) - ptb_model.set_dict(np_state_dict) + ptb_model.set_state_dict(np_state_dict) state_dict = ptb_model.state_dict() @@ -656,8 +655,8 @@ class TestDygraphPtbRnn(unittest.TestCase): last_hidden = None last_cell = None - adam.set_dict(self.opti_dict) - ptb_model.set_dict(self.state_dict) + adam.set_state_dict(self.opti_dict) + ptb_model.set_state_dict(self.state_dict) for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') @@ -745,8 +744,8 @@ class TestDygraphPtbRnn(unittest.TestCase): last_cell = None state_dict, opti_dict = fluid.load_dygraph("./test_dy") - adam.set_dict(opti_dict) - ptb_model.set_dict(state_dict) + adam.set_state_dict(opti_dict) + ptb_model.set_state_dict(state_dict) for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') @@ -849,8 +848,8 @@ class TestDygraphPtbRnn(unittest.TestCase): for k, v in self.state_dict.items(): np_state_dict[k] = v.numpy() - adam.set_dict(np_opti_dict) - ptb_model.set_dict(np_state_dict) + adam.set_state_dict(np_opti_dict) + ptb_model.set_state_dict(np_state_dict) for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') @@ -912,6 +911,22 @@ class TestDygraphPtbRnn(unittest.TestCase): para_state_dict, opti_state_dict = paddle.load( os.path.join('saved_dy', 'emb_dy.pdopt')) + def test_load_compatible_with_keep_name_table(self): + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding([10, 10]) + state_dict = emb.state_dict() + paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + + para_state_dict, opti_state_dict = paddle.load( + os.path.join('saved_dy', 'emb_dy'), True) + self.assertTrue(para_state_dict != None) + self.assertTrue(opti_state_dict == None) + + para_state_dict, opti_state_dict = paddle.load( + os.path.join('saved_dy', 'emb_dy'), keep_name_table=True) + self.assertTrue(para_state_dict != None) + self.assertTrue(opti_state_dict == None) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py index e81d1c8610f6bebffadf930b67dc14a4a418ef05..2ce6372cd41c3986e8a5157f0d420c206e5a84ee 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -917,6 +917,29 @@ class TestDygraphPtbRnn(unittest.TestCase): para_state_dict, opti_state_dict = paddle.load( os.path.join('saved_dy', 'emb_dy.pdopt')) + def test_no_state_in_input_dict(self): + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding([10, 10]) + state_dict = emb.state_dict() + paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + + para_state_dict, _ = paddle.load(os.path.join('saved_dy', 'emb_dy')) + para_state_dict.pop('weight') + + emb.set_state_dict(para_state_dict) + + def test_state_shape_mismatch(self): + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding([10, 10]) + state_dict = emb.state_dict() + paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + + para_state_dict, _ = paddle.load(os.path.join('saved_dy', 'emb_dy')) + para_state_dict['weight'] = np.expand_dims( + para_state_dict['weight'], axis=-1) + + emb.set_state_dict(para_state_dict) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py index 87b6e76a6d0ab7f5fba7c4526734d81475e1540e..f7fcc1ff561b90dc1b78a67ffbe7c047ed06d0e9 100644 --- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py @@ -183,25 +183,6 @@ class TestJitSaveLoad(unittest.TestCase): with self.assertRaises(ValueError): model_dict, _ = fluid.dygraph.load_dygraph(model_path) - def test_load_dygraph_no_var_info(self): - model_path = "model.test_jit_save_load.no_var_info" - self.train_and_save_model(model_path=model_path) - # remove `__variables.info__` - var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME) - os.remove(var_info_path) - new_layer = LinearNet(784, 1) - with self.assertRaises(RuntimeError): - model_dict, _ = fluid.dygraph.load_dygraph(model_path) - - def test_load_dygraph_not_var_file(self): - model_path = "model.test_jit_save_load.no_var_file" - configs = fluid.dygraph.jit.SaveLoadConfig() - configs.params_filename = "__params__" - self.train_and_save_model(model_path=model_path, configs=configs) - new_layer = LinearNet(784, 1) - with self.assertRaises(RuntimeError): - model_dict, _ = fluid.dygraph.load_dygraph(model_path) - class LinearNetMultiInput(fluid.dygraph.Layer): def __init__(self, in_size, out_size): diff --git a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py new file mode 100644 index 0000000000000000000000000000000000000000..ed1939dbe279f28883d9e33178f1cfa256140e33 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py @@ -0,0 +1,165 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import six +import unittest +import numpy as np + +import paddle +import paddle.fluid as fluid +from paddle.fluid import core +from test_imperative_base import new_program_scope + + +def convolutional_neural_network(img): + conv_pool_1 = fluid.nets.simple_img_conv_pool( + input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") + conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) + conv_pool_2 = fluid.nets.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") + prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') + return prediction + + +def static_train_net(img, label): + prediction = convolutional_neural_network(img) + + loss = fluid.layers.cross_entropy(input=prediction, label=label) + avg_loss = fluid.layers.mean(loss) + + optimizer = fluid.optimizer.SGD(learning_rate=0.001) + optimizer.minimize(avg_loss) + + return prediction, avg_loss + + +class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): + def setUp(self): + self.seed = 90 + self.epoch_num = 1 + self.batch_size = 128 + self.batch_num = 10 + + def train_and_save_model(self): + with new_program_scope(): + startup_program = fluid.default_startup_program() + main_program = fluid.default_main_program() + + img = fluid.data( + name='img', shape=[None, 1, 28, 28], dtype='float32') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') + + prediction, avg_loss = static_train_net(img, label) + + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + + exe = fluid.Executor(place) + + feeder = fluid.DataFeeder(feed_list=[img, label], place=place) + exe.run(startup_program) + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=100), + batch_size=self.batch_size) + + for _ in range(0, self.epoch_num): + for batch_id, data in enumerate(train_reader()): + exe.run(main_program, + feed=feeder.feed(data), + fetch_list=[avg_loss]) + + if batch_id > self.batch_num: + break + + static_param_dict = {} + for param in fluid.default_main_program().all_parameters(): + static_param_dict[param.name] = fluid.executor._fetch_var( + param.name) + + fluid.io.save_inference_model( + self.save_dirname, ["img"], [prediction], + exe, + model_filename=self.model_filename, + params_filename=self.params_filename) + + return static_param_dict + + def check_load_state_dict(self, orig_dict, load_dict): + for var_name, value in six.iteritems(orig_dict): + self.assertTrue(np.array_equal(value, load_dict[var_name])) + + def test_load_default(self): + self.save_dirname = "static_mnist.load_state_dict.default" + self.model_filename = None + self.params_filename = None + orig_param_dict = self.train_and_save_model() + + configs = paddle.SaveLoadConfig() + configs.separate_params = True + load_param_dict, _ = paddle.load(self.save_dirname, configs) + self.check_load_state_dict(orig_param_dict, load_param_dict) + + def test_load_with_model_filename(self): + self.save_dirname = "static_mnist.load_state_dict.model_filename" + self.model_filename = "static_mnist.model" + self.params_filename = None + orig_param_dict = self.train_and_save_model() + + configs = paddle.SaveLoadConfig() + configs.separate_params = True + configs.model_filename = self.model_filename + load_param_dict, _ = paddle.load(self.save_dirname, configs) + self.check_load_state_dict(orig_param_dict, load_param_dict) + + def test_load_with_param_filename(self): + self.save_dirname = "static_mnist.load_state_dict.param_filename" + self.model_filename = None + self.params_filename = "static_mnist.params" + orig_param_dict = self.train_and_save_model() + + configs = paddle.SaveLoadConfig() + configs.params_filename = self.params_filename + load_param_dict, _ = paddle.load(self.save_dirname, configs) + self.check_load_state_dict(orig_param_dict, load_param_dict) + + def test_load_with_model_and_param_filename(self): + self.save_dirname = "static_mnist.load_state_dict.model_and_param_filename" + self.model_filename = "static_mnist.model" + self.params_filename = "static_mnist.params" + orig_param_dict = self.train_and_save_model() + + configs = paddle.SaveLoadConfig() + configs.params_filename = self.params_filename + configs.model_filename = self.model_filename + load_param_dict, _ = paddle.load(self.save_dirname, configs) + self.check_load_state_dict(orig_param_dict, load_param_dict) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py index 2e6e516aa2edde79e6524b4b35507ea95876ec53..91d705223316360b8c05954259724a5f7d246440 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer.py @@ -832,8 +832,8 @@ class TestRecomputeOptimizer(unittest.TestCase): recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer) recompute_optimizer._set_checkpoints([b1_out]) try: - stat_dict = {} - recompute_optimizer.load(stat_dict) + state_dict = {} + recompute_optimizer.load(state_dict) except NotImplementedError as e: self.assertEqual( "load function is not supported by Recompute Optimizer for now", diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py index af788874191335ad31d1540bcc0db90cc12383c6..f33e4e0fca8727574bcd1970e26c6eaee2139a05 100644 --- a/python/paddle/framework/__init__.py +++ b/python/paddle/framework/__init__.py @@ -20,8 +20,8 @@ __all__ = [ ] __all__ += [ - 'grad', 'LayerList', 'load', 'save', 'to_variable', 'no_grad', - 'DataParallel' + 'grad', 'LayerList', 'load', 'save', 'SaveLoadConfig', 'to_variable', + 'no_grad', 'DataParallel' ] __all__ += [ @@ -50,6 +50,7 @@ from ..fluid.dygraph.base import to_variable #DEFINE_ALIAS from ..fluid.dygraph.base import grad #DEFINE_ALIAS from ..fluid.dygraph.checkpoint import load_dygraph as load #DEFINE_ALIAS from ..fluid.dygraph.checkpoint import save_dygraph as save #DEFINE_ALIAS +from ..fluid.dygraph.jit import SaveLoadConfig #DEFINE_ALIAS from ..fluid.dygraph.parallel import DataParallel #DEFINE_ALIAS from ..fluid.dygraph.learning_rate_scheduler import NoamDecay #DEFINE_ALIAS diff --git a/python/paddle/hapi/__init__.py b/python/paddle/hapi/__init__.py index fb16b829d5b8e563be9b4e1e5db5d19dded23521..67965de5d97621e188acfa1e0384325b9ec5b7aa 100644 --- a/python/paddle/hapi/__init__.py +++ b/python/paddle/hapi/__init__.py @@ -19,10 +19,7 @@ from . import model_summary from . import model from .model import * from .model_summary import summary -from .dygraph_layer_patch import monkey_patch_layer logger.setup_logger() __all__ = ['callbacks'] + model.__all__ + ['summary'] - -monkey_patch_layer() diff --git a/python/paddle/hapi/dygraph_layer_patch.py b/python/paddle/hapi/dygraph_layer_patch.py deleted file mode 100644 index e3a2948b69305fcb08c14c850f5738ac46aea2be..0000000000000000000000000000000000000000 --- a/python/paddle/hapi/dygraph_layer_patch.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -import paddle.fluid as fluid -from paddle.fluid.framework import in_dygraph_mode -from paddle.fluid.framework import _current_expected_place as _get_device - - -def monkey_patch_layer(): - def load_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): - ''' - Set parameters from stat_dict. All the parameters will be reset by the - tensor in the stat_dict - - This api will be Deprecated. Please use set_dict - - Parameters: - state_dict(dict) : Dict contains all the parameters - include_sublayers(bool, optional) : If true, also include the - parameters from sublayers. Default: True - use_structured_name(bool, optional) : If true, use structured name - as key, otherwise, use parameter name as key. Default: True - Returns: - None - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) - - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") - emb.load_dict( para_state_dict ) - - ''' - - def _check_match(key, param): - state = stat_dict.get(key, None) - if state is None: - raise ValueError( - "{} is not found in the providing file.".format(key)) - if list(state.shape) != list(param.shape): - raise ValueError( - "{} receives a shape {}, but the expected shape is {}.". - format(key, list(state.shape), list(param.shape))) - return param, state - - matched_param_state = [] - for key, param in self.state_dict().items(): - key_name = key if use_structured_name else param.name - try: - match_res = _check_match(key_name, param) - matched_param_state.append(match_res) - except ValueError as err: - warnings.warn(("Skip loading for {}. ".format(key) + str(err))) - - if in_dygraph_mode(): - for param, state in matched_param_state: - param.set_value(state) - else: - - def _set_var(var, ndarray): - t = fluid.global_scope().find_var(var.name).get_tensor() - p = t._place() - if p.is_cpu_place(): - place = fluid.CPUPlace() - elif p.is_cuda_pinned_place(): - place = fluid.CUDAPinnedPlace() - else: - p = fluid.core.Place() - p.set_place(t._place()) - place = fluid.CUDAPlace(p.gpu_device_id()) - t.set(ndarray, place) - - executor = fluid.Executor(_get_device())._default_executor - # restore parameter states - fluid.core._create_loaded_parameter( - [param for param, state in matched_param_state], - fluid.global_scope(), executor) - for param, state in matched_param_state: - _set_var(param, state) - - setattr(fluid.dygraph.Layer, 'load_dict', load_dict) diff --git a/python/paddle/jit/__init__.py b/python/paddle/jit/__init__.py index 03299a3bb9823d31c40ae4faab601ed89570c71e..d04a65ad6ea99ee2e2e67e47fd9d656f1572a02d 100644 --- a/python/paddle/jit/__init__.py +++ b/python/paddle/jit/__init__.py @@ -14,7 +14,6 @@ from ..fluid.dygraph.jit import save #DEFINE_ALIAS from ..fluid.dygraph.jit import load #DEFINE_ALIAS -from ..fluid.dygraph.jit import SaveLoadConfig #DEFINE_ALIAS from ..fluid.dygraph.jit import TracedLayer #DEFINE_ALIAS from ..fluid.dygraph.jit import set_code_level #DEFINE_ALIAS from ..fluid.dygraph.jit import set_verbosity #DEFINE_ALIAS @@ -23,6 +22,6 @@ from ..fluid.dygraph import ProgramTranslator #DEFINE_ALIAS from ..fluid.dygraph.io import TranslatedLayer #DEFINE_ALIAS __all__ = [ - 'save', 'load', 'SaveLoadConfig', 'TracedLayer', 'to_static', - 'ProgramTranslator', 'TranslatedLayer', 'set_code_level', 'set_verbosity' + 'save', 'load', 'TracedLayer', 'to_static', 'ProgramTranslator', + 'TranslatedLayer', 'set_code_level', 'set_verbosity' ] diff --git a/python/paddle/optimizer/lr_scheduler.py b/python/paddle/optimizer/lr_scheduler.py index 4ecaffb8fa509bdc54067bb25f8d1b5191b7ac1b..61391704061bda7dfbad7252cbc04c0b7d6492a4 100644 --- a/python/paddle/optimizer/lr_scheduler.py +++ b/python/paddle/optimizer/lr_scheduler.py @@ -109,7 +109,7 @@ class _LRScheduler(object): """ self.keys = ['last_epoch', 'last_lr'] - def set_dict(self, state_dict): + def set_state_dict(self, state_dict): """ Loads the schedulers state. """ @@ -126,8 +126,8 @@ class _LRScheduler(object): "There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict" ) - # alias for set_dict - set_state_dict = set_dict + # alias for set_state_dict + set_dict = set_state_dict def get_lr(self): # calculate by python float