From 29861846c17051a5d910f7c22f47dc336f0ef386 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Tue, 8 Sep 2020 10:35:27 +0800 Subject: [PATCH] Update 2.0 Save/Load API names/arguments/doc examples (#27138) * Update set_dict method name & add aliases (#26700) * update set_dict method name & add aliases * fix var name error * fix alias formats * use set_state_dict in unittest * add decorator solve compatible problem * polish decorator * replace layer set_state_dict by patched method * remove import monkey path layer * fix import function error * add unittest for coverage * Support load state dict form `inference model` format save result (#26718) * support load infer model format state dict * add unittests * remove keep name table * recolve circle inport * fix compatible problem * recover unittest * polish doc and comment * Change jit.save/load configs to config & update code examples (#27056) * change configs to config & update examples * fix deprecate decorator conflict --- paddle/http.log | 0 python/paddle/__init__.py | 1 + python/paddle/fluid/dygraph/checkpoint.py | 168 +++-- python/paddle/fluid/dygraph/io.py | 18 +- python/paddle/fluid/dygraph/jit.py | 607 ++++++++++-------- python/paddle/fluid/dygraph/layers.py | 137 ++-- .../fluid/dygraph/learning_rate_scheduler.py | 5 +- python/paddle/fluid/dygraph/parallel.py | 74 +-- python/paddle/fluid/framework.py | 20 + python/paddle/fluid/optimizer.py | 36 +- .../unittests/test_directory_migration.py | 2 +- .../unittests/test_imperative_save_load.py | 41 +- .../unittests/test_imperative_save_load_v2.py | 23 + .../tests/unittests/test_jit_save_load.py | 19 - .../test_load_state_dict_from_old_format.py | 165 +++++ .../fluid/tests/unittests/test_optimizer.py | 4 +- python/paddle/framework/__init__.py | 5 +- python/paddle/hapi/__init__.py | 3 - python/paddle/hapi/dygraph_layer_patch.py | 103 --- python/paddle/jit/__init__.py | 5 +- python/paddle/optimizer/lr_scheduler.py | 6 +- 21 files changed, 814 insertions(+), 628 deletions(-) create mode 100644 paddle/http.log create mode 100644 python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py delete mode 100644 python/paddle/hapi/dygraph_layer_patch.py diff --git a/paddle/http.log b/paddle/http.log new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index a7c7041b596..d5793eb424a 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -232,6 +232,7 @@ from .framework import grad #DEFINE_ALIAS from .framework import no_grad #DEFINE_ALIAS from .framework import save #DEFINE_ALIAS from .framework import load #DEFINE_ALIAS +from .framework import SaveLoadConfig #DEFINE_ALIAS from .framework import DataParallel #DEFINE_ALIAS from .framework import NoamDecay #DEFINE_ALIAS diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py index f4d68a798ef..30ded1f7eda 100644 --- a/python/paddle/fluid/dygraph/checkpoint.py +++ b/python/paddle/fluid/dygraph/checkpoint.py @@ -16,13 +16,16 @@ from __future__ import print_function import os import collections +import functools from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter, ParamBase, _varbase_creator, _dygraph_tracer import pickle import six from . import learning_rate_scheduler import warnings from .. import core -from paddle.fluid.dygraph.io import VARIABLE_FILENAME, EXTRA_VAR_INFO_FILENAME, _load_persistable_vars +from .base import guard +from paddle.fluid.dygraph.jit import SaveLoadConfig, deprecate_save_load_configs +from paddle.fluid.dygraph.io import _construct_program_holders, _construct_params_and_buffers __all__ = [ 'save_dygraph', @@ -30,6 +33,37 @@ __all__ = [ ] +# NOTE(chenweihang): deprecate load_dygraph's argument keep_name_table, +# ensure compatibility when user still use keep_name_table argument +def deprecate_keep_name_table(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + def __warn_and_build_configs__(keep_name_table): + warnings.warn( + "The argument `keep_name_table` has deprecated, please use `SaveLoadConfig.keep_name_table`.", + DeprecationWarning) + config = SaveLoadConfig() + config.keep_name_table = keep_name_table + return config + + # deal with arg `keep_name_table` + if len(args) > 1 and isinstance(args[1], bool): + args = list(args) + args[1] = __warn_and_build_configs__(args[1]) + # deal with kwargs + elif 'keep_name_table' in kwargs: + kwargs['config'] = __warn_and_build_configs__(kwargs[ + 'keep_name_table']) + kwargs.pop('keep_name_table') + else: + # do nothing + pass + + return func(*args, **kwargs) + + return wrapper + + @dygraph_only def save_dygraph(state_dict, model_path): ''' @@ -100,17 +134,28 @@ def save_dygraph(state_dict, model_path): # TODO(qingqing01): remove dygraph_only to support loading static model. # maybe need to unify the loading interface after 2.0 API is ready. -#@dygraph_only -def load_dygraph(model_path, keep_name_table=False): +# @dygraph_only +@deprecate_save_load_configs +@deprecate_keep_name_table +def load_dygraph(model_path, config=None): ''' :api_attr: imperative - Load parameter state_dict from disk. + Load parameter state dict from disk. + + .. note:: + Due to some historical reasons, if you load ``state_dict`` from the saved + result of `paddle.io.save_inference_model`, the structured variable name + will cannot be restored. You need to set the argument `use_structured_name=False` + when using `Layer.set_state_dict` later. Args: - model_path(str) : The file prefix store the state_dict. (The path should Not contain suffix '.pdparams') - keep_name_table(bool, optional) : Whether keep structed name to parameter name conversion table in output dict. - Default : False + model_path(str) : The file prefix store the state_dict. + (The path should Not contain suffix '.pdparams') + config (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` + object that specifies additional configuration options, these options + are for compatibility with ``jit.save/io.save_inference_model`` formats. + Default None. Returns: state_dict(dict) : the dict store the state_dict @@ -118,23 +163,27 @@ def load_dygraph(model_path, keep_name_table=False): Examples: .. code-block:: python - import paddle.fluid as fluid + import paddle - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + paddle.disable_static() - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") + emb = paddle.nn.Embedding([10, 10]) - adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000), - parameter_list = emb.parameters() ) - state_dict = adam.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") + state_dict = emb.state_dict() + paddle.save(state_dict, "paddle_dy") - para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy") + scheduler = paddle.optimizer.lr_scheduler.NoamLR( + d_model=0.01, warmup_steps=100, verbose=True) + adam = paddle.optimizer.Adam( + learning_rate=scheduler, + parameters=emb.parameters()) + state_dict = adam.state_dict() + paddle.save(state_dict, "paddle_dy") - ''' + para_state_dict, opti_state_dict = paddle.load("paddle_dy") + ''' + # deal with argument `model_path` model_prefix = model_path if model_prefix.endswith(".pdparams"): model_prefix = model_prefix[:-9] @@ -145,66 +194,45 @@ def load_dygraph(model_path, keep_name_table=False): opti_dict = None params_file_path = model_prefix + ".pdparams" opti_file_path = model_prefix + ".pdopt" + + # deal with argument `configs` + configs = config + if configs is None: + configs = SaveLoadConfig() + if not os.path.exists(params_file_path) and not os.path.exists( opti_file_path): - # Load state dict by `jit.save` save format - # TODO(chenweihang): [Why not support `io.save_infernece_model` save format here] + # Load state dict by `jit.save/io.save_inference_model` save format + # NOTE(chenweihang): [ Compatibility of save_inference_model save format ] # The model saved by `save_inference_model` does not completely correspond to # the information required by the `state_dict` under the dygraph. - # Although we reluctantly restore the `state_dict` in some scenarios, - # this may not be complete and there are some limitations, so this function - # will be considered later. The limitations include: - # 1. `save_inference_model` not save structured name, we need to remind - # the user to configure the `use_structured_name` argument when `set_dict`, - # but this argument is currently not public - # 2. if `save_inference_model` save all persistable variables in a single file, - # user need to give the variable name list to load `state_dict` + # `save_inference_model` not save structured name, we need to remind + # the user to configure the `use_structured_name` argument when `set_state_dict` + # NOTE(chenweihang): `jit.save` doesn't save optimizer state # 1. check model path if not os.path.isdir(model_prefix): raise ValueError("Model saved directory '%s' is not exists." % model_prefix) - # 2. load `__variables.info__` - var_info_path = os.path.join(model_prefix, EXTRA_VAR_INFO_FILENAME) - if not os.path.exists(var_info_path): - raise RuntimeError( - "No target can be loaded. Now only supports loading `state_dict` from " - "the result saved by `imperative.save` and `imperative.jit.save`." - ) - with open(var_info_path, 'rb') as f: - extra_var_info = pickle.load(f) - # 3. load `__variables__` - # TODO(chenweihang): now only supports loading from default save format: - # - all persistable vars saved in one file named `__variables__` - # for other case, we may need to modify the arguments of this API - var_file_path = os.path.join(model_prefix, VARIABLE_FILENAME) - if not os.path.exists(var_file_path): - raise RuntimeError( - "The parameter file to be loaded was not found. " - "Now only supports loading from the default save format, " - "and does not support custom params_filename and " - "save parameters separately.") - # 4. load all persistable vars - load_var_list = [] - for name in sorted(extra_var_info): - var = _varbase_creator(name=name, persistable=True) - load_var_list.append(var) - _dygraph_tracer().trace_op( - type='load_combine', - inputs={}, - outputs={'Out': load_var_list}, - attrs={'file_path': var_file_path}) - # 5. construct state_dict - para_dict = dict() - for var in load_var_list: - structured_name = extra_var_info[var.name].get('structured_name', - None) - if structured_name is None: - raise RuntimeError( - "Cannot find saved variable (%s)'s structured name in saved model.", - var.name) - para_dict[structured_name] = var.numpy() - # NOTE: `jit.save` doesn't save optimizer state + + # 2. load program desc & construct _ProgramHolder + programs = _construct_program_holders(model_path, + configs.model_filename) + + # 3. load layer parameters & buffers + # NOTE: using fluid.dygraph.guard() here will cause import error in py2 + with guard(): + persistable_var_dict = _construct_params_and_buffers( + model_prefix, + programs, + configs.separate_params, + configs.params_filename, + append_suffix=False) + + # 4. construct state_dict + para_dict = dict() + for var_name in persistable_var_dict: + para_dict[var_name] = persistable_var_dict[var_name].numpy() else: # Load state dict by `save_dygraph` save format para_dict = {} @@ -213,7 +241,7 @@ def load_dygraph(model_path, keep_name_table=False): para_dict = pickle.load(f) if six.PY2 else pickle.load( f, encoding='latin1') - if not keep_name_table and "StructuredToParameterName@@" in para_dict: + if not configs.keep_name_table and "StructuredToParameterName@@" in para_dict: del para_dict["StructuredToParameterName@@"] if os.path.exists(opti_file_path): diff --git a/python/paddle/fluid/dygraph/io.py b/python/paddle/fluid/dygraph/io.py index 7f3d450a49c..7c17bb07c0c 100644 --- a/python/paddle/fluid/dygraph/io.py +++ b/python/paddle/fluid/dygraph/io.py @@ -488,6 +488,15 @@ def _load_persistable_vars(model_path, return load_var_dict +# NOTE(chenweihang): to adapt paddle.load to get state_dict +def _remove_varname_suffix(var_dict, program_holder): + no_suffix_var_dict = dict() + for var_name in var_dict: + no_suffix_name = program_holder._suffix_varname_dict[var_name] + no_suffix_var_dict[no_suffix_name] = var_dict[var_name] + return no_suffix_var_dict + + def _construct_program_holders(model_path, model_filename=None): # make sure the path has been checked program_holder_dict = dict() @@ -517,7 +526,8 @@ def _construct_program_holders(model_path, model_filename=None): def _construct_params_and_buffers(model_path, programs, separate_params=False, - params_filename=None): + params_filename=None, + append_suffix=True): var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME) if os.path.exists(var_info_path): var_dict = _load_persistable_vars(model_path, var_info_path, @@ -526,6 +536,10 @@ def _construct_params_and_buffers(model_path, else: var_dict = _load_persistable_vars_by_program( model_path, programs['forward'], params_filename) + + if not append_suffix: + var_dict = _remove_varname_suffix(var_dict, programs['forward']) + return var_dict @@ -685,7 +699,7 @@ class TranslatedLayer(layers.Layer): # 1. load program desc & construct _ProgramHolder programs = _construct_program_holders(model_path, model_filename) - # 2. load layer parameters & parameter attributes + # 2. load layer parameters & buffers persistable_vars = _construct_params_and_buffers( model_path, programs, separate_params, params_filename) diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py index f67b79b91f7..d520fe61888 100644 --- a/python/paddle/fluid/dygraph/jit.py +++ b/python/paddle/fluid/dygraph/jit.py @@ -17,6 +17,7 @@ from __future__ import print_function import os import pickle import warnings +import functools import six import paddle @@ -228,63 +229,60 @@ class SaveLoadConfig(object): .. code-block:: python - import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - class SimpleNet(fluid.dygraph.Layer): + class SimpleNet(nn.Layer): def __init__(self, in_size, out_size): super(SimpleNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(in_size, out_size) - @declarative + @paddle.jit.to_static def forward(self, x): y = self._linear(x) z = self._linear(y) return z # enable dygraph mode - fluid.enable_dygraph() + paddle.disable_static() # train model net = SimpleNet(8, 8) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + adam = opt.Adam(learning_rate=0.1, parameters=net.parameters()) + x = paddle.randn([4, 8], 'float32') for i in range(10): out = net(x) - loss = fluid.layers.mean(out) + loss = paddle.tensor.mean(out) loss.backward() - adam.minimize(loss) - net.clear_gradients() + adam.step() + adam.clear_grad() # use SaveLoadconfig when saving model model_path = "simplenet.example.model" - configs = fluid.dygraph.jit.SaveLoadConfig() - configs.model_filename = "__simplenet__" - fluid.dygraph.jit.save( + config = paddle.SaveLoadConfig() + config.model_filename = "__simplenet__" + paddle.jit.save( layer=net, model_path=model_path, - input_spec=[x], - configs=configs) + config=config) 2. Using ``SaveLoadConfig`` when loading model .. code-block:: python - import numpy as np - import paddle.fluid as fluid + import paddle # enable dygraph mode - fluid.enable_dygraph() + paddle.disable_static() # use SaveLoadconfig when loading model model_path = "simplenet.example.model" - configs = fluid.dygraph.jit.SaveLoadConfig() - configs.model_filename = "__simplenet__" - infer_net = fluid.dygraph.jit.load(model_path, configs=configs) + config = paddle.SaveLoadConfig() + config.model_filename = "__simplenet__" + infer_net = paddle.jit.load(model_path, config=config) # inference - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + x = paddle.randn([4, 8], 'float32') pred = infer_net(x) """ @@ -293,6 +291,8 @@ class SaveLoadConfig(object): self._model_filename = None self._params_filename = None self._separate_params = False + # used for `paddle.load` + self._keep_name_table = False # NOTE: Users rarely use following configs, so these configs are not open to users, # reducing user learning costs, but we retain the configuration capabilities @@ -322,51 +322,46 @@ class SaveLoadConfig(object): Examples: .. code-block:: python - import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - class SimpleNet(fluid.dygraph.Layer): + class SimpleNet(nn.Layer): def __init__(self, in_size, out_size): super(SimpleNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(in_size, out_size) - @declarative + @paddle.jit.to_static def forward(self, x): y = self._linear(x) z = self._linear(y) - loss = fluid.layers.mean(z) + loss = paddle.tensor.mean(z) return z, loss # enable dygraph mode - fluid.enable_dygraph() + paddle.disable_static() # train model net = SimpleNet(8, 8) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + adam = opt.Adam(learning_rate=0.1, parameters=net.parameters()) + x = paddle.randn([4, 8], 'float32') for i in range(10): out, loss = net(x) loss.backward() - adam.minimize(loss) - net.clear_gradients() + adam.step() + adam.clear_grad() # use SaveLoadconfig.output_spec model_path = "simplenet.example.model.output_spec" - configs = fluid.dygraph.jit.SaveLoadConfig() - # only keep the predicted output in saved model, discard loss - configs.output_spec = [out] - - fluid.dygraph.jit.save( + config = paddle.SaveLoadConfig() + config.output_spec = [out] + paddle.jit.save( layer=net, model_path=model_path, - input_spec=[x], - configs=configs) + config=config) - infer_net = fluid.dygraph.jit.load(model_path, configs=configs) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) - # only have the predicted output + infer_net = paddle.jit.load(model_path) + x = paddle.randn([4, 8], 'float32') pred = infer_net(x) """ return self._output_spec @@ -393,52 +388,47 @@ class SaveLoadConfig(object): Examples: .. code-block:: python - import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - class SimpleNet(fluid.dygraph.Layer): + class SimpleNet(nn.Layer): def __init__(self, in_size, out_size): super(SimpleNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(in_size, out_size) - @declarative + @paddle.jit.to_static def forward(self, x): y = self._linear(x) z = self._linear(y) return z # enable dygraph mode - fluid.enable_dygraph() + paddle.disable_static() # train model net = SimpleNet(8, 8) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + adam = opt.Adam(learning_rate=0.1, parameters=net.parameters()) + x = paddle.randn([4, 8], 'float32') for i in range(10): out = net(x) - loss = fluid.layers.mean(out) + loss = paddle.tensor.mean(out) loss.backward() - adam.minimize(loss) - net.clear_gradients() - - model_path = "simplenet.example.model.model_filename" - configs = fluid.dygraph.jit.SaveLoadConfig() - configs.model_filename = "__simplenet__" + adam.step() + adam.clear_grad() # saving with configs.model_filename - fluid.dygraph.jit.save( + model_path = "simplenet.example.model.model_filename" + config = paddle.SaveLoadConfig() + config.model_filename = "__simplenet__" + paddle.jit.save( layer=net, model_path=model_path, - input_spec=[x], - configs=configs) - # [result] the saved model directory contains: - # __simplenet__ __variables__ __variables.info__ + config=config) # loading with configs.model_filename - infer_net = fluid.dygraph.jit.load(model_path, configs=configs) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + infer_net = paddle.jit.load(model_path, config=config) + x = paddle.randn([4, 8], 'float32') pred = infer_net(x) """ return self._model_filename @@ -463,52 +453,48 @@ class SaveLoadConfig(object): Examples: .. code-block:: python - import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - class SimpleNet(fluid.dygraph.Layer): + class SimpleNet(nn.Layer): def __init__(self, in_size, out_size): super(SimpleNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(in_size, out_size) - @declarative + @paddle.jit.to_static def forward(self, x): y = self._linear(x) z = self._linear(y) return z # enable dygraph mode - fluid.enable_dygraph() + paddle.disable_static() # train model net = SimpleNet(8, 8) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + adam = opt.Adam(learning_rate=0.1, parameters=net.parameters()) + x = paddle.randn([4, 8], 'float32') for i in range(10): out = net(x) - loss = fluid.layers.mean(out) + loss = paddle.tensor.mean(out) loss.backward() - adam.minimize(loss) - net.clear_gradients() + adam.step() + adam.clear_grad() model_path = "simplenet.example.model.params_filename" - configs = fluid.dygraph.jit.SaveLoadConfig() - configs.params_filename = "__params__" + config = paddle.SaveLoadConfig() + config.params_filename = "__params__" # saving with configs.params_filename - fluid.dygraph.jit.save( + paddle.jit.save( layer=net, model_path=model_path, - input_spec=[x], - configs=configs) - # [result] the saved model directory contains: - # __model__ __params__ __variables.info__ + config=config) # loading with configs.params_filename - infer_net = fluid.dygraph.jit.load(model_path, configs=configs) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + infer_net = paddle.jit.load(model_path, config=config) + x = paddle.randn([4, 8], 'float32') pred = infer_net(x) """ return self._params_filename @@ -542,52 +528,50 @@ class SaveLoadConfig(object): Examples: .. code-block:: python - import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - class SimpleNet(fluid.dygraph.Layer): + class SimpleNet(nn.Layer): def __init__(self, in_size, out_size): super(SimpleNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(in_size, out_size) - @declarative + @paddle.jit.to_static def forward(self, x): y = self._linear(x) z = self._linear(y) return z # enable dygraph mode - fluid.enable_dygraph() + paddle.disable_static() # train model net = SimpleNet(8, 8) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + adam = opt.Adam(learning_rate=0.1, parameters=net.parameters()) + x = paddle.randn([4, 8], 'float32') for i in range(10): out = net(x) - loss = fluid.layers.mean(out) + loss = paddle.tensor.mean(out) loss.backward() - adam.minimize(loss) - net.clear_gradients() + adam.step() + adam.clear_grad() model_path = "simplenet.example.model.separate_params" - configs = fluid.dygraph.jit.SaveLoadConfig() - configs.separate_params = True + config = paddle.jit.SaveLoadConfig() + config.separate_params = True # saving with configs.separate_params - fluid.dygraph.jit.save( + paddle.jit.save( layer=net, model_path=model_path, - input_spec=[x], - configs=configs) + config=config) # [result] the saved model directory contains: # linear_0.b_0 linear_0.w_0 __model__ __variables.info__ # loading with configs.params_filename - infer_net = fluid.dygraph.jit.load(model_path, configs=configs) - x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) + infer_net = paddle.jit.load(model_path, config=config) + x = paddle.randn([4, 8], 'float32') pred = infer_net(x) """ return self._separate_params @@ -600,9 +584,70 @@ class SaveLoadConfig(object): % type(value)) self._separate_params = value + @property + def keep_name_table(self): + """ + Configures whether keep ``structured_name -> parameter_name`` dict in loaded state dict. + This dict is the debugging information saved when call `paddle.save`. + It is generally only used for debugging and does not affect the actual training or inference. + By default, it will not be retained in `paddle.load` result. Default: False. + + .. note:: + Only used for ``paddle.load``. + + Examples: + .. code-block:: python + + import paddle + + paddle.disable_static() + + linear = paddle.nn.Linear(5, 1) + + state_dict = linear.state_dict() + paddle.save(state_dict, "paddle_dy") + + configs = paddle.SaveLoadConfig() + configs.keep_name_table = True + para_state_dict, _ = paddle.load("paddle_dy", configs) + + print(para_state_dict) + # the name_table is 'StructuredToParameterName@@' + # {'bias': array([0.], dtype=float32), + # 'StructuredToParameterName@@': + # {'bias': u'linear_0.b_0', 'weight': u'linear_0.w_0'}, + # 'weight': array([[ 0.04230034], + # [-0.1222527 ], + # [ 0.7392676 ], + # [-0.8136974 ], + # [ 0.01211023]], dtype=float32)} + """ + return self._keep_name_table + + @keep_name_table.setter + def keep_name_table(self, value): + if not isinstance(value, bool): + raise TypeError( + "The SaveLoadConfig.keep_name_table should be bool value, but received input's type is %s." + % type(value)) + self._keep_name_table = value + +# NOTE(chenweihang): change jit.save/load argument `configs` to `config` +def deprecate_save_load_configs(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if 'configs' in kwargs: + kwargs['config'] = kwargs['configs'] + kwargs.pop('configs') + return func(*args, **kwargs) + + return wrapper + + +@deprecate_save_load_configs @switch_to_static_graph -def save(layer, model_path, input_spec=None, configs=None): +def save(layer, model_path, input_spec=None, config=None): """ Saves input declarative Layer as :ref:`api_imperative_TranslatedLayer` format model, which can be used for inference or fine-tuning after loading. @@ -627,7 +672,7 @@ def save(layer, model_path, input_spec=None, configs=None): It is the example inputs that will be passed to saved TranslatedLayer's forward function. If None, all input variables of the original Layer's forward function would be the inputs of the saved model. Default None. - configs (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object + config (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object that specifies additional configuration options. Default None. Returns: None @@ -636,65 +681,76 @@ def save(layer, model_path, input_spec=None, configs=None): .. code-block:: python import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - BATCH_SIZE = 32 - BATCH_NUM = 20 + BATCH_SIZE = 16 + BATCH_NUM = 4 + EPOCH_NUM = 4 - def random_batch_reader(): - def _get_random_images_and_labels(image_shape, label_shape): - image = np.random.random(size=image_shape).astype('float32') - label = np.random.random(size=label_shape).astype('int64') - return image, label + IMAGE_SIZE = 784 + CLASS_NUM = 10 - def __reader__(): - for _ in range(BATCH_NUM): - batch_image, batch_label = _get_random_images_and_labels( - [BATCH_SIZE, 784], [BATCH_SIZE, 1]) - yield batch_image, batch_label + # define a random dataset + class RandomDataset(paddle.io.Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples - return __reader__ + def __getitem__(self, idx): + image = np.random.random([IMAGE_SIZE]).astype('float32') + label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64') + return image, label - class LinearNet(fluid.dygraph.Layer): - def __init__(self, in_size, out_size): + def __len__(self): + return self.num_samples + + class LinearNet(nn.Layer): + def __init__(self): super(LinearNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) - @declarative + @paddle.jit.to_static def forward(self, x): return self._linear(x) + def train(layer, loader, loss_fn, opt): + for epoch_id in range(EPOCH_NUM): + for batch_id, (image, label) in enumerate(loader()): + out = layer(image) + loss = loss_fn(out, label) + loss.backward() + opt.step() + opt.clear_grad() + print("Epoch {} batch {}: loss = {}".format( + epoch_id, batch_id, np.mean(loss.numpy()))) + # enable dygraph mode - fluid.enable_dygraph() + place = paddle.CPUPlace() + paddle.disable_static(place) - # create network - net = LinearNet(784, 1) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) - # create data loader - train_loader = fluid.io.DataLoader.from_generator(capacity=5) - train_loader.set_batch_generator(random_batch_reader()) - # train - for data in train_loader(): - img, label = data - label.stop_gradient = True + # 1. train & save model. - cost = net(img) + # create network + layer = LinearNet() + loss_fn = nn.CrossEntropyLoss() + adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters()) - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) + # create data loader + dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) + loader = paddle.io.DataLoader(dataset, + places=place, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) - avg_loss.backward() - adam.minimize(avg_loss) - net.clear_gradients() + # train + train(layer, loader, loss_fn, adam) - # save model + # save model_path = "linear.example.model" - fluid.dygraph.jit.save( - layer=net, - model_path=model_path, - input_spec=[img]) + paddle.jit.save(layer, model_path) """ def get_inout_spec(all_vars, target_vars, return_name=False): @@ -728,6 +784,7 @@ def save(layer, model_path, input_spec=None, configs=None): "The input layer of paddle.jit.save should be 'Layer', but received layer type is %s." % type(layer)) + configs = config if configs is None: configs = SaveLoadConfig() @@ -819,8 +876,9 @@ def save(layer, model_path, input_spec=None, configs=None): pickle.dump(extra_var_info, f, protocol=2) +@deprecate_save_load_configs @dygraph_only -def load(model_path, configs=None): +def load(model_path, config=None): """ :api_attr: imperative @@ -837,7 +895,7 @@ def load(model_path, configs=None): Args: model_path (str): The directory path where the model is saved. - configs (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object that specifies + config (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` object that specifies additional configuration options. Default None. Returns: @@ -849,122 +907,126 @@ def load(model_path, configs=None): .. code-block:: python import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - BATCH_SIZE = 32 - BATCH_NUM = 20 + BATCH_SIZE = 16 + BATCH_NUM = 4 + EPOCH_NUM = 4 - def random_batch_reader(): - def _get_random_images_and_labels(image_shape, label_shape): - image = np.random.random(size=image_shape).astype('float32') - label = np.random.random(size=label_shape).astype('int64') - return image, label + IMAGE_SIZE = 784 + CLASS_NUM = 10 - def __reader__(): - for _ in range(BATCH_NUM): - batch_image, batch_label = _get_random_images_and_labels( - [BATCH_SIZE, 784], [BATCH_SIZE, 1]) - yield batch_image, batch_label + # define a random dataset + class RandomDataset(paddle.io.Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples - return __reader__ + def __getitem__(self, idx): + image = np.random.random([IMAGE_SIZE]).astype('float32') + label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64') + return image, label - class LinearNet(fluid.dygraph.Layer): - def __init__(self, in_size, out_size): + def __len__(self): + return self.num_samples + + class LinearNet(nn.Layer): + def __init__(self): super(LinearNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) - @declarative + @paddle.jit.to_static def forward(self, x): return self._linear(x) + def train(layer, loader, loss_fn, opt): + for epoch_id in range(EPOCH_NUM): + for batch_id, (image, label) in enumerate(loader()): + out = layer(image) + loss = loss_fn(out, label) + loss.backward() + opt.step() + opt.clear_grad() + print("Epoch {} batch {}: loss = {}".format( + epoch_id, batch_id, np.mean(loss.numpy()))) + # enable dygraph mode - fluid.enable_dygraph() + place = paddle.CPUPlace() + paddle.disable_static(place) # 1. train & save model. + # create network - net = LinearNet(784, 1) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) + layer = LinearNet() + loss_fn = nn.CrossEntropyLoss() + adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters()) + # create data loader - train_loader = fluid.io.DataLoader.from_generator(capacity=5) - train_loader.set_batch_generator(random_batch_reader()) + dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) + loader = paddle.io.DataLoader(dataset, + places=place, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) + # train - for data in train_loader(): - img, label = data - label.stop_gradient = True + train(layer, loader, loss_fn, adam) - cost = net(img) + # save + model_path = "linear.example.model" + paddle.jit.save(layer, model_path) - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) + # 2. load model - avg_loss.backward() - adam.minimize(avg_loss) - net.clear_gradients() + # load + loaded_layer = paddle.jit.load(model_path) - model_path = "linear.example.model" - fluid.dygraph.jit.save( - layer=net, - model_path=model_path, - input_spec=[img]) - - # 2. load model & inference - # load model - infer_net = fluid.dygraph.jit.load(model_path) # inference - x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32')) - pred = infer_net(x) + loaded_layer.eval() + x = paddle.randn([1, IMAGE_SIZE], 'float32') + pred = loaded_layer(x) - # 3. load model & fine-tune - # load model - train_net = fluid.dygraph.jit.load(model_path) - train_net.train() - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=train_net.parameters()) - # create data loader - train_loader = fluid.io.DataLoader.from_generator(capacity=5) - train_loader.set_batch_generator(random_batch_reader()) # fine-tune - for data in train_loader(): - img, label = data - label.stop_gradient = True - - cost = train_net(img) + loaded_layer.train() + adam = opt.Adam(learning_rate=0.001, parameters=loaded_layer.parameters()) + train(loaded_layer, loader, loss_fn, adam) - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) - - avg_loss.backward() - adam.minimize(avg_loss) - train_net.clear_gradients() 2. Load model saved by :ref:`api_fluid_io_save_inference_model` then performing and fine-tune training. .. code-block:: python import numpy as np + import paddle import paddle.fluid as fluid + import paddle.nn as nn + import paddle.optimizer as opt - BATCH_SIZE = 32 - BATCH_NUM = 20 + BATCH_SIZE = 16 + BATCH_NUM = 4 + EPOCH_NUM = 4 - def random_batch_reader(): - def _get_random_images_and_labels(image_shape, label_shape): - image = np.random.random(size=image_shape).astype('float32') - label = np.random.random(size=label_shape).astype('int64') - return image, label + IMAGE_SIZE = 784 + CLASS_NUM = 10 - def __reader__(): - for _ in range(BATCH_NUM): - batch_image, batch_label = _get_random_images_and_labels( - [BATCH_SIZE, 784], [BATCH_SIZE, 1]) - yield batch_image, batch_label + # define a random dataset + class RandomDataset(paddle.io.Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples - return __reader__ + def __getitem__(self, idx): + image = np.random.random([IMAGE_SIZE]).astype('float32') + label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64') + return image, label - img = fluid.data(name='img', shape=[None, 784], dtype='float32') + def __len__(self): + return self.num_samples + + image = fluid.data(name='image', shape=[None, 784], dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') - pred = fluid.layers.fc(input=img, size=10, act='softmax') + pred = fluid.layers.fc(input=image, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=pred, label=label) avg_loss = fluid.layers.mean(loss) @@ -975,9 +1037,15 @@ def load(model_path, configs=None): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - loader = fluid.io.DataLoader.from_generator( - feed_list=[img, label], capacity=5, iterable=True) - loader.set_batch_generator(random_batch_reader(), places=place) + # create data loader + dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) + loader = paddle.io.DataLoader(dataset, + feed_list=[image, label], + places=place, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) # 1. train and save inference model for data in loader(): @@ -988,39 +1056,42 @@ def load(model_path, configs=None): model_path = "fc.example.model" fluid.io.save_inference_model( - model_path, ["img"], [pred], exe) + model_path, ["image"], [pred], exe) + + # 2. load model # enable dygraph mode - fluid.enable_dygraph() + paddle.disable_static(place) + + # load + fc = paddle.jit.load(model_path) - # 2. load model & inference - fc = fluid.dygraph.jit.load(model_path) - x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32')) + # inference + fc.eval() + x = paddle.randn([1, IMAGE_SIZE], 'float32') pred = fc(x) - # 3. load model & fine-tune - fc = fluid.dygraph.jit.load(model_path) + # fine-tune fc.train() - sgd = fluid.optimizer.SGD(learning_rate=0.001, - parameter_list=fc.parameters()) - - train_loader = fluid.io.DataLoader.from_generator(capacity=5) - train_loader.set_batch_generator( - random_batch_reader(), places=place) - - for data in train_loader(): - img, label = data - label.stop_gradient = True - - cost = fc(img) - - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) - - avg_loss.backward() - sgd.minimize(avg_loss) + loss_fn = nn.CrossEntropyLoss() + adam = opt.Adam(learning_rate=0.001, parameters=fc.parameters()) + loader = paddle.io.DataLoader(dataset, + places=place, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) + for epoch_id in range(EPOCH_NUM): + for batch_id, (image, label) in enumerate(loader()): + out = fc(image) + loss = loss_fn(out, label) + loss.backward() + adam.step() + adam.clear_grad() + print("Epoch {} batch {}: loss = {}".format( + epoch_id, batch_id, np.mean(loss.numpy()))) """ - return TranslatedLayer._construct(model_path, configs) + return TranslatedLayer._construct(model_path, config) @dygraph_only diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index 1ef719b9da1..7075024369f 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -29,6 +29,9 @@ from .layer_object_helper import LayerObjectHelper from .base import program_desc_tracing_guard, param_guard from paddle.fluid import framework from ..param_attr import ParamAttr +from paddle.fluid.executor import Executor, global_scope +from paddle.fluid.framework import in_dygraph_mode +from paddle.fluid.framework import _current_expected_place as _get_device __all__ = ['Layer'] @@ -797,7 +800,7 @@ class Layer(core.Layer): raise ValueError( "super(YourLayer, self).__init__() should be called first") if len(self._loaddict_holder) > 0: - assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format( + assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in state_dict".format( value.name) value.set_value(self._loaddict_holder[value.name]) @@ -943,12 +946,13 @@ class Layer(core.Layer): destination = destination_temp return destination - def set_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): + @framework.deprecate_stat_dict + def set_state_dict(self, + state_dict, + include_sublayers=True, + use_structured_name=True): ''' - Set parameters and persistable buffers from stat_dict. All the parameters and buffers will be reset by the tensor in the stat_dict + Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict Parameters: state_dict(dict) : Dict contains all the parameters and persistable buffers. @@ -961,72 +965,67 @@ class Layer(core.Layer): Examples: .. code-block:: python - import paddle.fluid as fluid - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + import paddle + + paddle.disable_static() + + emb = paddle.nn.Embedding([10, 10]) - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") - - emb.set_dict( para_state_dict ) + state_dict = emb.state_dict() + paddle.save(state_dict, "paddle_dy") + + para_state_dict, _ = paddle.load("paddle_dy") - ''' - self.load_dict( - stat_dict, - include_sublayers=include_sublayers, - use_structured_name=use_structured_name) + emb.set_state_dict(para_state_dict) - def load_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): ''' - Set parameters and persistable buffers from stat_dict. All the parameters and persistabl buffers will be reset by the tensor in the stat_dict - This api will be Deprecated. Please use set_dict - - Parameters: - state_dict(dict) : Dict contains all the parameters and persistable buffers. - include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True - use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key. - Default: True - Returns: - None - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) - - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") - - emb.load_dict( para_state_dict ) - - ''' - - inner_state_dict = self.state_dict() + def _check_match(key, param): + state = state_dict.get(key, None) + if state is None: + raise ValueError("{} is not found in the provided dict.".format( + key)) + if list(state.shape) != list(param.shape): + raise ValueError( + "{} receives a shape {}, but the expected shape is {}.". + format(key, list(state.shape), list(param.shape))) + return param, state + + matched_param_state = [] + for key, param in self.state_dict().items(): + key_name = key if use_structured_name else param.name + try: + match_res = _check_match(key_name, param) + matched_param_state.append(match_res) + except ValueError as err: + warnings.warn(("Skip loading for {}. ".format(key) + str(err))) + + if in_dygraph_mode(): + for param, state in matched_param_state: + param.set_value(state) + else: - for name, param_or_buffer in inner_state_dict.items(): - key_name = name if use_structured_name else param_or_buffer.name - if key_name in stat_dict: - param_or_buffer.set_value(stat_dict[key_name]) - else: - raise RuntimeError( - "Parameter or persistable buffer not found, Can't find [ {} ] in stat_dict" - "use_structured_name is set to [{}]".format( - key_name, use_structured_name)) - unused_para_list = [] - for k, v in stat_dict.items(): - if k not in inner_state_dict: - unused_para_list.append(k) - if len(unused_para_list) > 0: - warnings.warn( - "Variables [ {} ] are not used, because not included in layers state_dict". - format(" ".join(unused_para_list))) + def _set_var(var, ndarray): + t = global_scope().find_var(var.name).get_tensor() + p = t._place() + if p.is_cpu_place(): + place = core.CPUPlace() + elif p.is_cuda_pinned_place(): + place = core.CUDAPinnedPlace() + else: + p = core.Place() + p.set_place(t._place()) + place = core.CUDAPlace(p.gpu_device_id()) + t.set(ndarray, place) + + executor = Executor(_get_device())._default_executor + # restore parameter states + core._create_loaded_parameter( + [param for param, state in matched_param_state], + global_scope(), executor) + for param, state in matched_param_state: + _set_var(param, state) + + # [aliases] Compatible with old method names + set_dict = set_state_dict + load_dict = set_state_dict diff --git a/python/paddle/fluid/dygraph/learning_rate_scheduler.py b/python/paddle/fluid/dygraph/learning_rate_scheduler.py index cce383be7e2..cd6af6fd5b5 100644 --- a/python/paddle/fluid/dygraph/learning_rate_scheduler.py +++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py @@ -97,7 +97,7 @@ class LearningRateDecay(object): """ self.keys = ['step_num'] - def set_dict(self, state_dict): + def set_state_dict(self, state_dict): """ Loads the schedulers state. """ @@ -114,6 +114,9 @@ class LearningRateDecay(object): "There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict" ) + # [aliases] Compatible with old method names + set_dict = set_state_dict + def step(self): raise NotImplementedError() diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index 5ecc713ddca..472022bced7 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -587,12 +587,13 @@ class DataParallel(layers.Layer): include_sublayers=include_sublayers, structured_name_prefix=structured_name_prefix) - def set_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): + @framework.deprecate_stat_dict + def set_state_dict(self, + state_dict, + include_sublayers=True, + use_structured_name=True): ''' - Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict + Set parameters of self._layers from state_dict. All the parameters of self._layers will be reset by the tensor in the state_dict Parameters: state_dict(dict) : Dict contains all the parameters @@ -605,62 +606,27 @@ class DataParallel(layers.Layer): Examples: .. code-block:: python - import paddle.fluid as fluid - with fluid.dygraph.guard(): - strategy=fluid.dygraph.prepare_context() - emb = fluid.dygraph.Embedding([10, 10]) - emb = fluid.dygraph.DataParallel(emb, strategy) - - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") - - emb.set_dict( para_state_dict ) + import paddle - ''' - - self._layers.set_dict( - stat_dict, - include_sublayers=include_sublayers, - use_structured_name=use_structured_name) - - def load_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): - ''' - Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict - - This api will be Deprecated. Please use set_dict - - Parameters: - state_dict(dict) : Dict contains all the parameters - include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True - use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. - Default: True - Returns: - None + paddle.disable_static() - Examples: - .. code-block:: python + emb = paddle.nn.Embedding([10, 10]) + emb = fluid.dygraph.DataParallel(emb, strategy) - import paddle.fluid as fluid - with fluid.dygraph.guard(): - strategy=fluid.dygraph.prepare_context() - emb = fluid.dygraph.Embedding([10, 10]) - emb = fluid.dygraph.DataParallel(emb, strategy) + state_dict = emb.state_dict() + paddle.save(state_dict, "paddle_dy") - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") + para_state_dict, _ = paddle.load("paddle_dy") - emb.load_dict( para_state_dict ) + emb.set_state_dict(para_state_dict) ''' - self._layers.load_dict( - stat_dict, + self._layers.set_state_dict( + state_dict, include_sublayers=include_sublayers, use_structured_name=use_structured_name) + + # [aliases] Compatible with old method names + set_dict = set_state_dict + load_dict = set_state_dict diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index fc4e91aad4f..5281df9ead1 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -36,6 +36,7 @@ from . import core from . import unique_name import paddle.version as fluid_version import warnings +import functools __all__ = [ 'Program', @@ -238,6 +239,25 @@ def _fake_interface_only_(func): return __impl__ +# NOTE(chenweihang): There is argument name typo (stat_dict, correct name is state_dict) +# in fluid api Layer.set_dict, Optimizer.load, in order to correct the argument without +# introducing compatibility issues, add this decorator +# NOTE(chenweihang): not using `wrap_decorator` here is because `wrap_decorator` will +# move kwargs to args, which doesn't work in this decorate case +def deprecate_stat_dict(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if 'stat_dict' in kwargs: + warnings.warn( + "The argument `stat_dict` has deprecated, please change it to `state_dict`.", + DeprecationWarning) + kwargs['state_dict'] = kwargs['stat_dict'] + kwargs.pop('stat_dict') + return func(*args, **kwargs) + + return wrapper + + dygraph_not_support = wrap_decorator(_dygraph_not_support_) dygraph_only = wrap_decorator(_dygraph_only_) fake_interface_only = wrap_decorator(_fake_interface_only_) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 9e2d77df777..8b37cfef389 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -170,7 +170,7 @@ class Optimizer(object): return state_dict @framework.dygraph_only - def set_dict(self, state_dict): + def set_state_dict(self, state_dict): ''' Load optimizer state dict. For Adam optimizer, contains beta1, beta2, momentum etc. If LearningRateDecay have been used, global_step will be changed. @@ -182,20 +182,22 @@ class Optimizer(object): Examples: .. code-block:: python - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + import paddle + + paddle.disable_static() + + emb = paddle.nn.Embedding([10, 10]) - state_dict = emb.state_dict() - fluid.save_dygraph(state_dict, "paddle_dy") + state_dict = emb.state_dict() + paddle.save(state_dict, "paddle_dy") - adam = fluid.optimizer.Adam(learning_rate=fluid.layers.noam_decay( 100, 10000), + adam = paddle.optimizer.Adam(learning_rate=fluid.layers.noam_decay( 100, 10000), parameter_list=emb.parameters()) - state_dict = adam.state_dict() - fluid.save_dygraph(state_dict, "paddle_dy") + state_dict = adam.state_dict() - para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy") + para_state_dict, opti_state_dict = paddle.load("paddle_dy") - adam.set_dict(opti_state_dict) + adam.set_state_dict(opti_state_dict) ''' from paddle.optimizer.lr_scheduler import _LRScheduler @@ -257,6 +259,9 @@ class Optimizer(object): tensor.set(load_para_np, framework._current_expected_place()) + # [aliases] Compatible with old method names + set_dict = set_state_dict + def get_opti_var_name_list(self): return self._opti_name_list @@ -4595,15 +4600,16 @@ class RecomputeOptimizer(Optimizer): ), "_checkpoints should be a list of Variable or a list of String" self._checkpoints = checkpoints - def load(self, stat_dict): + @framework.deprecate_stat_dict + def load(self, state_dict): """ - :api_attr: Static Graph + :api_attr: Static Graph load function is not supported by Recompute Optimizer for now. :return: None Args: - stat_dict: the dict load by load_persistable method + state_dict: the dict load by load_persistable method Examples: .. code-block:: python @@ -4627,8 +4633,8 @@ class RecomputeOptimizer(Optimizer): sgd = fluid.optimizer.RecomputeOptimizer(sgd) sgd._set_checkpoints([fc_1, pred]) try: - stat_dict = {} - sgd.load(stat_dict) + state_dict = {} + sgd.load(state_dict) except NotImplementedError as e: print(cpt.get_exception_message(e)) """ diff --git a/python/paddle/fluid/tests/unittests/test_directory_migration.py b/python/paddle/fluid/tests/unittests/test_directory_migration.py index 2919ec5e9ca..529fff158c5 100644 --- a/python/paddle/fluid/tests/unittests/test_directory_migration.py +++ b/python/paddle/fluid/tests/unittests/test_directory_migration.py @@ -43,7 +43,7 @@ class TestDirectory(unittest.TestCase): 'paddle.distributed.prepare_context', 'paddle.DataParallel', 'paddle.jit', 'paddle.jit.TracedLayer', 'paddle.jit.to_static', 'paddle.jit.ProgramTranslator', 'paddle.jit.TranslatedLayer', - 'paddle.jit.save', 'paddle.jit.load', 'paddle.jit.SaveLoadConfig', + 'paddle.jit.save', 'paddle.jit.load', 'paddle.SaveLoadConfig', 'paddle.NoamDecay', 'paddle.PiecewiseDecay', 'paddle.NaturalExpDecay', 'paddle.ExponentialDecay', 'paddle.InverseTimeDecay', 'paddle.PolynomialDecay', diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index 48aea3a584d..22e19efcb58 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -374,8 +374,7 @@ class TestDygraphPtbRnn(unittest.TestCase): adam._learning_rate.step_num = 0 para_state_dict, opti_state_dict = paddle.load("./test_dy") - print(opti_state_dict['LR_Scheduler']) - adam.set_dict(opti_state_dict) + adam.set_state_dict(opti_state_dict) opti_dict = adam.state_dict() for k, v in opti_dict.items(): @@ -393,7 +392,7 @@ class TestDygraphPtbRnn(unittest.TestCase): var.set(np.zeros_like(np_t), place) - ptb_model.set_dict(para_state_dict) + ptb_model.set_state_dict(stat_dict=para_state_dict) state_dict = ptb_model.state_dict() @@ -483,7 +482,7 @@ class TestDygraphPtbRnn(unittest.TestCase): if isinstance(adam._learning_rate, LearningRateDecay): adam._learning_rate.step_num = 0 - adam.set_dict(self.opti_dict) + adam.set_state_dict(self.opti_dict) opti_dict = adam.state_dict() for k, v in opti_dict.items(): if isinstance(v, core.VarBase): @@ -500,7 +499,7 @@ class TestDygraphPtbRnn(unittest.TestCase): var.set(np.zeros_like(np_t), place) - ptb_model.set_dict(self.state_dict) + ptb_model.set_state_dict(self.state_dict) state_dict = ptb_model.state_dict() @@ -593,7 +592,7 @@ class TestDygraphPtbRnn(unittest.TestCase): if isinstance(adam._learning_rate, LearningRateDecay): adam._learning_rate.step_num = 0 - adam.set_dict(np_opti_dict) + adam.set_state_dict(np_opti_dict) opti_dict = adam.state_dict() for k, v in opti_dict.items(): @@ -613,7 +612,7 @@ class TestDygraphPtbRnn(unittest.TestCase): var.set(np.zeros_like(np_t), place) - ptb_model.set_dict(np_state_dict) + ptb_model.set_state_dict(np_state_dict) state_dict = ptb_model.state_dict() @@ -656,8 +655,8 @@ class TestDygraphPtbRnn(unittest.TestCase): last_hidden = None last_cell = None - adam.set_dict(self.opti_dict) - ptb_model.set_dict(self.state_dict) + adam.set_state_dict(self.opti_dict) + ptb_model.set_state_dict(self.state_dict) for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') @@ -745,8 +744,8 @@ class TestDygraphPtbRnn(unittest.TestCase): last_cell = None state_dict, opti_dict = fluid.load_dygraph("./test_dy") - adam.set_dict(opti_dict) - ptb_model.set_dict(state_dict) + adam.set_state_dict(opti_dict) + ptb_model.set_state_dict(state_dict) for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') @@ -849,8 +848,8 @@ class TestDygraphPtbRnn(unittest.TestCase): for k, v in self.state_dict.items(): np_state_dict[k] = v.numpy() - adam.set_dict(np_opti_dict) - ptb_model.set_dict(np_state_dict) + adam.set_state_dict(np_opti_dict) + ptb_model.set_state_dict(np_state_dict) for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') @@ -912,6 +911,22 @@ class TestDygraphPtbRnn(unittest.TestCase): para_state_dict, opti_state_dict = paddle.load( os.path.join('saved_dy', 'emb_dy.pdopt')) + def test_load_compatible_with_keep_name_table(self): + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding([10, 10]) + state_dict = emb.state_dict() + paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + + para_state_dict, opti_state_dict = paddle.load( + os.path.join('saved_dy', 'emb_dy'), True) + self.assertTrue(para_state_dict != None) + self.assertTrue(opti_state_dict == None) + + para_state_dict, opti_state_dict = paddle.load( + os.path.join('saved_dy', 'emb_dy'), keep_name_table=True) + self.assertTrue(para_state_dict != None) + self.assertTrue(opti_state_dict == None) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py index e81d1c8610f..2ce6372cd41 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -917,6 +917,29 @@ class TestDygraphPtbRnn(unittest.TestCase): para_state_dict, opti_state_dict = paddle.load( os.path.join('saved_dy', 'emb_dy.pdopt')) + def test_no_state_in_input_dict(self): + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding([10, 10]) + state_dict = emb.state_dict() + paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + + para_state_dict, _ = paddle.load(os.path.join('saved_dy', 'emb_dy')) + para_state_dict.pop('weight') + + emb.set_state_dict(para_state_dict) + + def test_state_shape_mismatch(self): + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding([10, 10]) + state_dict = emb.state_dict() + paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + + para_state_dict, _ = paddle.load(os.path.join('saved_dy', 'emb_dy')) + para_state_dict['weight'] = np.expand_dims( + para_state_dict['weight'], axis=-1) + + emb.set_state_dict(para_state_dict) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py index 87b6e76a6d0..f7fcc1ff561 100644 --- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py @@ -183,25 +183,6 @@ class TestJitSaveLoad(unittest.TestCase): with self.assertRaises(ValueError): model_dict, _ = fluid.dygraph.load_dygraph(model_path) - def test_load_dygraph_no_var_info(self): - model_path = "model.test_jit_save_load.no_var_info" - self.train_and_save_model(model_path=model_path) - # remove `__variables.info__` - var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME) - os.remove(var_info_path) - new_layer = LinearNet(784, 1) - with self.assertRaises(RuntimeError): - model_dict, _ = fluid.dygraph.load_dygraph(model_path) - - def test_load_dygraph_not_var_file(self): - model_path = "model.test_jit_save_load.no_var_file" - configs = fluid.dygraph.jit.SaveLoadConfig() - configs.params_filename = "__params__" - self.train_and_save_model(model_path=model_path, configs=configs) - new_layer = LinearNet(784, 1) - with self.assertRaises(RuntimeError): - model_dict, _ = fluid.dygraph.load_dygraph(model_path) - class LinearNetMultiInput(fluid.dygraph.Layer): def __init__(self, in_size, out_size): diff --git a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py new file mode 100644 index 00000000000..ed1939dbe27 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py @@ -0,0 +1,165 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import six +import unittest +import numpy as np + +import paddle +import paddle.fluid as fluid +from paddle.fluid import core +from test_imperative_base import new_program_scope + + +def convolutional_neural_network(img): + conv_pool_1 = fluid.nets.simple_img_conv_pool( + input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") + conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) + conv_pool_2 = fluid.nets.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") + prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') + return prediction + + +def static_train_net(img, label): + prediction = convolutional_neural_network(img) + + loss = fluid.layers.cross_entropy(input=prediction, label=label) + avg_loss = fluid.layers.mean(loss) + + optimizer = fluid.optimizer.SGD(learning_rate=0.001) + optimizer.minimize(avg_loss) + + return prediction, avg_loss + + +class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): + def setUp(self): + self.seed = 90 + self.epoch_num = 1 + self.batch_size = 128 + self.batch_num = 10 + + def train_and_save_model(self): + with new_program_scope(): + startup_program = fluid.default_startup_program() + main_program = fluid.default_main_program() + + img = fluid.data( + name='img', shape=[None, 1, 28, 28], dtype='float32') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') + + prediction, avg_loss = static_train_net(img, label) + + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + + exe = fluid.Executor(place) + + feeder = fluid.DataFeeder(feed_list=[img, label], place=place) + exe.run(startup_program) + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=100), + batch_size=self.batch_size) + + for _ in range(0, self.epoch_num): + for batch_id, data in enumerate(train_reader()): + exe.run(main_program, + feed=feeder.feed(data), + fetch_list=[avg_loss]) + + if batch_id > self.batch_num: + break + + static_param_dict = {} + for param in fluid.default_main_program().all_parameters(): + static_param_dict[param.name] = fluid.executor._fetch_var( + param.name) + + fluid.io.save_inference_model( + self.save_dirname, ["img"], [prediction], + exe, + model_filename=self.model_filename, + params_filename=self.params_filename) + + return static_param_dict + + def check_load_state_dict(self, orig_dict, load_dict): + for var_name, value in six.iteritems(orig_dict): + self.assertTrue(np.array_equal(value, load_dict[var_name])) + + def test_load_default(self): + self.save_dirname = "static_mnist.load_state_dict.default" + self.model_filename = None + self.params_filename = None + orig_param_dict = self.train_and_save_model() + + configs = paddle.SaveLoadConfig() + configs.separate_params = True + load_param_dict, _ = paddle.load(self.save_dirname, configs) + self.check_load_state_dict(orig_param_dict, load_param_dict) + + def test_load_with_model_filename(self): + self.save_dirname = "static_mnist.load_state_dict.model_filename" + self.model_filename = "static_mnist.model" + self.params_filename = None + orig_param_dict = self.train_and_save_model() + + configs = paddle.SaveLoadConfig() + configs.separate_params = True + configs.model_filename = self.model_filename + load_param_dict, _ = paddle.load(self.save_dirname, configs) + self.check_load_state_dict(orig_param_dict, load_param_dict) + + def test_load_with_param_filename(self): + self.save_dirname = "static_mnist.load_state_dict.param_filename" + self.model_filename = None + self.params_filename = "static_mnist.params" + orig_param_dict = self.train_and_save_model() + + configs = paddle.SaveLoadConfig() + configs.params_filename = self.params_filename + load_param_dict, _ = paddle.load(self.save_dirname, configs) + self.check_load_state_dict(orig_param_dict, load_param_dict) + + def test_load_with_model_and_param_filename(self): + self.save_dirname = "static_mnist.load_state_dict.model_and_param_filename" + self.model_filename = "static_mnist.model" + self.params_filename = "static_mnist.params" + orig_param_dict = self.train_and_save_model() + + configs = paddle.SaveLoadConfig() + configs.params_filename = self.params_filename + configs.model_filename = self.model_filename + load_param_dict, _ = paddle.load(self.save_dirname, configs) + self.check_load_state_dict(orig_param_dict, load_param_dict) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py index 2e6e516aa2e..91d70522331 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer.py @@ -832,8 +832,8 @@ class TestRecomputeOptimizer(unittest.TestCase): recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer) recompute_optimizer._set_checkpoints([b1_out]) try: - stat_dict = {} - recompute_optimizer.load(stat_dict) + state_dict = {} + recompute_optimizer.load(state_dict) except NotImplementedError as e: self.assertEqual( "load function is not supported by Recompute Optimizer for now", diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py index af788874191..f33e4e0fca8 100644 --- a/python/paddle/framework/__init__.py +++ b/python/paddle/framework/__init__.py @@ -20,8 +20,8 @@ __all__ = [ ] __all__ += [ - 'grad', 'LayerList', 'load', 'save', 'to_variable', 'no_grad', - 'DataParallel' + 'grad', 'LayerList', 'load', 'save', 'SaveLoadConfig', 'to_variable', + 'no_grad', 'DataParallel' ] __all__ += [ @@ -50,6 +50,7 @@ from ..fluid.dygraph.base import to_variable #DEFINE_ALIAS from ..fluid.dygraph.base import grad #DEFINE_ALIAS from ..fluid.dygraph.checkpoint import load_dygraph as load #DEFINE_ALIAS from ..fluid.dygraph.checkpoint import save_dygraph as save #DEFINE_ALIAS +from ..fluid.dygraph.jit import SaveLoadConfig #DEFINE_ALIAS from ..fluid.dygraph.parallel import DataParallel #DEFINE_ALIAS from ..fluid.dygraph.learning_rate_scheduler import NoamDecay #DEFINE_ALIAS diff --git a/python/paddle/hapi/__init__.py b/python/paddle/hapi/__init__.py index fb16b829d5b..67965de5d97 100644 --- a/python/paddle/hapi/__init__.py +++ b/python/paddle/hapi/__init__.py @@ -19,10 +19,7 @@ from . import model_summary from . import model from .model import * from .model_summary import summary -from .dygraph_layer_patch import monkey_patch_layer logger.setup_logger() __all__ = ['callbacks'] + model.__all__ + ['summary'] - -monkey_patch_layer() diff --git a/python/paddle/hapi/dygraph_layer_patch.py b/python/paddle/hapi/dygraph_layer_patch.py deleted file mode 100644 index e3a2948b693..00000000000 --- a/python/paddle/hapi/dygraph_layer_patch.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -import paddle.fluid as fluid -from paddle.fluid.framework import in_dygraph_mode -from paddle.fluid.framework import _current_expected_place as _get_device - - -def monkey_patch_layer(): - def load_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): - ''' - Set parameters from stat_dict. All the parameters will be reset by the - tensor in the stat_dict - - This api will be Deprecated. Please use set_dict - - Parameters: - state_dict(dict) : Dict contains all the parameters - include_sublayers(bool, optional) : If true, also include the - parameters from sublayers. Default: True - use_structured_name(bool, optional) : If true, use structured name - as key, otherwise, use parameter name as key. Default: True - Returns: - None - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) - - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") - emb.load_dict( para_state_dict ) - - ''' - - def _check_match(key, param): - state = stat_dict.get(key, None) - if state is None: - raise ValueError( - "{} is not found in the providing file.".format(key)) - if list(state.shape) != list(param.shape): - raise ValueError( - "{} receives a shape {}, but the expected shape is {}.". - format(key, list(state.shape), list(param.shape))) - return param, state - - matched_param_state = [] - for key, param in self.state_dict().items(): - key_name = key if use_structured_name else param.name - try: - match_res = _check_match(key_name, param) - matched_param_state.append(match_res) - except ValueError as err: - warnings.warn(("Skip loading for {}. ".format(key) + str(err))) - - if in_dygraph_mode(): - for param, state in matched_param_state: - param.set_value(state) - else: - - def _set_var(var, ndarray): - t = fluid.global_scope().find_var(var.name).get_tensor() - p = t._place() - if p.is_cpu_place(): - place = fluid.CPUPlace() - elif p.is_cuda_pinned_place(): - place = fluid.CUDAPinnedPlace() - else: - p = fluid.core.Place() - p.set_place(t._place()) - place = fluid.CUDAPlace(p.gpu_device_id()) - t.set(ndarray, place) - - executor = fluid.Executor(_get_device())._default_executor - # restore parameter states - fluid.core._create_loaded_parameter( - [param for param, state in matched_param_state], - fluid.global_scope(), executor) - for param, state in matched_param_state: - _set_var(param, state) - - setattr(fluid.dygraph.Layer, 'load_dict', load_dict) diff --git a/python/paddle/jit/__init__.py b/python/paddle/jit/__init__.py index 03299a3bb98..d04a65ad6ea 100644 --- a/python/paddle/jit/__init__.py +++ b/python/paddle/jit/__init__.py @@ -14,7 +14,6 @@ from ..fluid.dygraph.jit import save #DEFINE_ALIAS from ..fluid.dygraph.jit import load #DEFINE_ALIAS -from ..fluid.dygraph.jit import SaveLoadConfig #DEFINE_ALIAS from ..fluid.dygraph.jit import TracedLayer #DEFINE_ALIAS from ..fluid.dygraph.jit import set_code_level #DEFINE_ALIAS from ..fluid.dygraph.jit import set_verbosity #DEFINE_ALIAS @@ -23,6 +22,6 @@ from ..fluid.dygraph import ProgramTranslator #DEFINE_ALIAS from ..fluid.dygraph.io import TranslatedLayer #DEFINE_ALIAS __all__ = [ - 'save', 'load', 'SaveLoadConfig', 'TracedLayer', 'to_static', - 'ProgramTranslator', 'TranslatedLayer', 'set_code_level', 'set_verbosity' + 'save', 'load', 'TracedLayer', 'to_static', 'ProgramTranslator', + 'TranslatedLayer', 'set_code_level', 'set_verbosity' ] diff --git a/python/paddle/optimizer/lr_scheduler.py b/python/paddle/optimizer/lr_scheduler.py index 4ecaffb8fa5..61391704061 100644 --- a/python/paddle/optimizer/lr_scheduler.py +++ b/python/paddle/optimizer/lr_scheduler.py @@ -109,7 +109,7 @@ class _LRScheduler(object): """ self.keys = ['last_epoch', 'last_lr'] - def set_dict(self, state_dict): + def set_state_dict(self, state_dict): """ Loads the schedulers state. """ @@ -126,8 +126,8 @@ class _LRScheduler(object): "There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict" ) - # alias for set_dict - set_state_dict = set_dict + # alias for set_state_dict + set_dict = set_state_dict def get_lr(self): # calculate by python float -- GitLab