From 7ff669735df5af36066c9e5b18421e72efd0ee59 Mon Sep 17 00:00:00 2001 From: GGBond8488 <33050871+GGBond8488@users.noreply.github.com> Date: Tue, 3 Jan 2023 16:58:11 +0800 Subject: [PATCH] Move out sequential and replace save_dygraph and load_dygraph (#48709) * remove fluid.save_dygraph and fluid.load_dygraph use paddle.save and paddle.load instead * move Sequential to paddle.nn * modify convert_call_func.py Sequential reference * remove related unitests * remove fluid.dynamic.Sequntial * test remove conver_call_func * fix conflicts * fix typro * fix unitests * fix sample_code * fix unitest * fix __init__ --- python/paddle/fluid/__init__.py | 1 - python/paddle/fluid/dygraph/__init__.py | 7 - python/paddle/fluid/dygraph/checkpoint.py | 320 ----- python/paddle/fluid/dygraph/container.py | 94 -- python/paddle/fluid/optimizer.py | 8 +- .../fluid/tests/unittests/CMakeLists.txt | 1 - .../unittests/dygraph_to_static/test_bert.py | 9 +- .../unittests/dygraph_to_static/test_bmn.py | 7 +- .../unittests/dygraph_to_static/test_lac.py | 6 +- .../dygraph_to_static/test_mobile_net.py | 7 +- .../dygraph_to_static/test_resnet.py | 8 +- .../dygraph_to_static/test_resnet_v2.py | 9 +- .../dygraph_to_static/test_save_load.py | 5 +- .../dygraph_to_static/test_se_resnet.py | 8 +- .../dygraph_to_static/test_seq2seq.py | 5 +- .../dygraph_to_static/test_transformer.py | 10 +- .../dygraph_to_static/transformer_util.py | 14 +- .../fluid/tests/unittests/test_adam_op.py | 14 +- .../test_imperative_container_sequential.py | 8 +- .../unittests/test_imperative_save_load.py | 1036 ----------------- .../unittests/test_imperative_save_load_v2.py | 6 +- .../tests/unittests/test_jit_save_load.py | 2 +- .../unittests/test_learning_rate_scheduler.py | 14 +- .../test_load_state_dict_from_old_format.py | 23 - python/paddle/fluid/tests/unittests/utils.py | 5 +- python/paddle/framework/io.py | 2 - python/paddle/hapi/model.py | 4 +- .../paddle/jit/dy2static/convert_call_func.py | 10 +- python/paddle/nn/__init__.py | 2 +- python/paddle/nn/layer/container.py | 73 ++ python/paddle/tests/test_model.py | 2 +- tools/parallel_UT_rule.py | 1 - 32 files changed, 159 insertions(+), 1562 deletions(-) delete mode 100644 python/paddle/fluid/dygraph/checkpoint.py delete mode 100644 python/paddle/fluid/dygraph/container.py delete mode 100644 python/paddle/fluid/tests/unittests/test_imperative_save_load.py diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index eaf64e6dc6..f1e991127b 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -101,7 +101,6 @@ from . import install_check from .dygraph.layers import * from .dygraph.base import enable_dygraph, disable_dygraph from .io import save, load, load_program_state, set_program_state -from .dygraph.checkpoint import save_dygraph, load_dygraph from .dygraph.varbase_patch_methods import monkey_patch_varbase from . import generator from .core import _cuda_synchronize diff --git a/python/paddle/fluid/dygraph/__init__.py b/python/paddle/fluid/dygraph/__init__.py index b98c188ae4..d9f6034b73 100644 --- a/python/paddle/fluid/dygraph/__init__.py +++ b/python/paddle/fluid/dygraph/__init__.py @@ -18,17 +18,12 @@ from .base import * from . import layers from .layers import * -from . import container -from .container import * - from . import tracer from .tracer import * from . import parallel from .parallel import * -from . import checkpoint -from .checkpoint import * from . import learning_rate_scheduler from .learning_rate_scheduler import * @@ -41,8 +36,6 @@ from .math_op_patch import monkey_patch_math_varbase __all__ = [] __all__ += layers.__all__ __all__ += base.__all__ -__all__ += container.__all__ __all__ += parallel.__all__ -__all__ += checkpoint.__all__ __all__ += learning_rate_scheduler.__all__ __all__ += amp.__all__ diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py deleted file mode 100644 index ba34cb1977..0000000000 --- a/python/paddle/fluid/dygraph/checkpoint.py +++ /dev/null @@ -1,320 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import collections -import functools -from ..framework import ( - Variable, - default_main_program, - dygraph_only, - Parameter, - ParamBase, - _varbase_creator, - _dygraph_tracer, - EagerParamBase, -) -import pickle -from . import learning_rate_scheduler -import warnings -from .. import core -from .base import guard -from paddle.jit.api import _SaveLoadConfig -from paddle.jit.translated_layer import ( - _construct_program_holders, - _construct_params_and_buffers, -) - -__all__ = [ - 'save_dygraph', - 'load_dygraph', -] - - -def _parse_load_config(configs): - supported_configs = ['model_filename', 'params_filename', 'keep_name_table'] - - # input check - for key in configs: - if key not in supported_configs: - raise ValueError( - "The additional config (%s) of `paddle.fluid.load_dygraph` is not supported." - % (key) - ) - - # construct inner config - inner_config = _SaveLoadConfig() - inner_config.model_filename = configs.get('model_filename', None) - inner_config.params_filename = configs.get('params_filename', None) - inner_config.keep_name_table = configs.get('keep_name_table', None) - - return inner_config - - -@dygraph_only -def save_dygraph(state_dict, model_path): - ''' - :api_attr: imperative - - Save Layer's state_dict to disk. This will generate a file with suffix ".pdparams" - - The state_dict is get from Layers.state_dict function - - Args: - state_dict(dict) : The state dict to be saved. - model_path(str) : the file prefix to save the state_dict. The format is "dirname/file_prefix". If file_prefix is empty str. A exception will be raised - - Returns: - None - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - import paddle - - with fluid.dygraph.guard(): - emb = paddle.nn.Embedding(10, 10) - - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000), - parameter_list = emb.parameters() ) - - state_dict = adam.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - ''' - - base_name = os.path.basename(model_path) - assert ( - base_name != "" - ), "The input model_path MUST be format of dirname/filename [dirname\\filename in Windows system], but received filename is empty string." - - suffix = ".pdparams" - assert len(state_dict) > 0, "state_dict is empty, no need to save" - - param_num = 0 - for k, v in state_dict.items(): - if isinstance(v, (ParamBase, EagerParamBase)): - param_num += 1 - - if param_num == 0: - suffix = ".pdopt" - - model_dict = {} - name_table = {} - for k, v in state_dict.items(): - if isinstance(v, (Variable, core.VarBase, core.eager.Tensor)): - model_dict[k] = v.numpy() - name_table[k] = v.name - else: - model_dict[k] = v - model_dict["StructuredToParameterName@@"] = name_table - - file_name = model_path + suffix - dir_name = os.path.dirname(file_name) - if dir_name and not os.path.exists(dir_name): - os.makedirs(dir_name) - - with open(file_name, 'wb') as f: - pickle.dump(model_dict, f, protocol=2) - - -# NOTE(chenweihang): load_dygraph will deprecated in future, we don't -# support new loading features for it -# TODO(qingqing01): remove dygraph_only to support loading static model. -# maybe need to unify the loading interface after 2.0 API is ready. -# @dygraph_only -def load_dygraph(model_path, **configs): - ''' - :api_attr: imperative - - Load parameter state dict from disk. - - .. note:: - Due to some historical reasons, if you load ``state_dict`` from the saved - result of `paddle.static.save_inference_model`, the structured variable name - will cannot be restored. You need to set the argument `use_structured_name=False` - when using `Layer.set_state_dict` later. - - Args: - model_path(str) : The file prefix store the state_dict. - (The path should Not contain suffix '.pdparams') - **configs (dict, optional): Other load configuration options for compatibility. We do not - recommend using these configurations, if not necessary, DO NOT use them. Default None. - The following options are currently supported: - (1) model_filename (str): The inference model file name of the paddle 1.x ``save_inference_model`` - save format. Default file name is :code:`__model__` . - (2) params_filename (str): The persistable variables file name of the paddle 1.x ``save_inference_model`` - save format. No default file name, save variables separately by default. - - Returns: - state_dict(dict) : the dict store the state_dict - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - - paddle.disable_static() - - emb = paddle.nn.Embedding(10, 10) - - state_dict = emb.state_dict() - fluid.save_dygraph(state_dict, "paddle_dy") - - scheduler = paddle.optimizer.lr.NoamDecay( - d_model=0.01, warmup_steps=100, verbose=True) - adam = paddle.optimizer.Adam( - learning_rate=scheduler, - parameters=emb.parameters()) - state_dict = adam.state_dict() - fluid.save_dygraph(state_dict, "paddle_dy") - - para_state_dict, opti_state_dict = fluid.load_dygraph("paddle_dy") - ''' - # deal with argument `model_path` - model_prefix = model_path - if model_prefix.endswith(".pdparams"): - model_prefix = model_prefix[:-9] - elif model_prefix.endswith(".pdopt"): - model_prefix = model_prefix[:-6] - - para_dict = None - opti_dict = None - params_file_path = model_prefix + ".pdparams" - opti_file_path = model_prefix + ".pdopt" - - # deal with argument `config` - config = _parse_load_config(configs) - - if os.path.exists(params_file_path) or os.path.exists(opti_file_path): - # Load state dict by `save_dygraph` save format - para_dict = {} - if os.path.exists(params_file_path): - with open(params_file_path, 'rb') as f: - para_dict = pickle.load(f, encoding='latin1') - - if ( - not config.keep_name_table - and "StructuredToParameterName@@" in para_dict - ): - del para_dict["StructuredToParameterName@@"] - - if os.path.exists(opti_file_path): - with open(opti_file_path, 'rb') as f: - opti_dict = pickle.load(f, encoding='latin1') - else: - # check model path - if not os.path.isdir(model_prefix): - raise ValueError( - "Model saved directory '%s' is not exists." % model_prefix - ) - - # check whether model file exists - if config.model_filename is None: - model_filename = '__model__' - else: - model_filename = config.model_filename - model_file_path = os.path.join(model_path, model_filename) - - if os.path.exists(model_file_path): - # Load state dict by `jit.save/io.save_inference_model` save format - # NOTE(chenweihang): [ Compatibility of save_inference_model save format ] - # The model saved by `save_inference_model` does not completely correspond to - # the information required by the `state_dict` under the dygraph. - # `save_inference_model` not save structured name, we need to remind - # the user to configure the `use_structured_name` argument when `set_state_dict` - # NOTE(chenweihang): `jit.save` doesn't save optimizer state - - # 1. load program desc & construct _ProgramHolder - programs = _construct_program_holders( - model_path, config.model_filename - ) - - # 2. load layer parameters & buffers - with guard(): - persistable_var_dict = _construct_params_and_buffers( - model_prefix, - programs, - config.params_filename, - append_suffix=False, - ) - - # 3. construct state_dict - para_dict = dict() - for var_name in persistable_var_dict: - para_dict[var_name] = persistable_var_dict[var_name].numpy() - - # if *.info exists, we can recover structured_name - var_info_filename = str(config.params_filename) + ".info" - var_info_path = os.path.join(model_prefix, var_info_filename) - if os.path.exists(var_info_path): - with open(var_info_path, 'rb') as f: - extra_var_info = pickle.load(f) - structured_para_dict = dict() - for var_name in para_dict: - structured_name = extra_var_info[var_name].get( - 'structured_name', None - ) - assert structured_name is not None, ( - "Cannot find saved variable (%s)'s structured name in saved model." - % var_name - ) - structured_para_dict[structured_name] = para_dict[ - var_name - ] - para_dict = structured_para_dict - else: - # load state dict by `io.save_params/persistables` save format - # TODO(chenweihang): [ Now only supports loading parameters separately ] - # If users save all parameters as one file, the [ variable.name -> variable ] - # mapping info will lost, so users need to give variable list, but users build - # variable list in dygraph mode is difficult, we recommend users to use - # paddle.static.load_program_state in this case - - # Try to load all the files in the directory in VarBase format, - # the file name is used as the name of VarBase - load_var_list = [] - - # 1. load file names - var_name_list = [] - for root, _, files in os.walk(model_path): - for filename in files: - file_path = os.path.join(root, filename) - tmp_var_name = os.path.relpath(file_path, model_path) - var_name = tmp_var_name.replace("\\", "/") - var_name_list.append(var_name) - - # 2. create and load VarBase - with guard(): - for name in var_name_list: - new_var = _varbase_creator(name=name, persistable=True) - _dygraph_tracer().trace_op( - type='load', - inputs={}, - outputs={'Out': new_var}, - attrs={'file_path': os.path.join(model_path, name)}, - ) - load_var_list.append(new_var) - - # 3. construct state_dict - para_dict = dict() - for var in load_var_list: - para_dict[var.name] = var.numpy() - - return para_dict, opti_dict diff --git a/python/paddle/fluid/dygraph/container.py b/python/paddle/fluid/dygraph/container.py deleted file mode 100644 index af0f043495..0000000000 --- a/python/paddle/fluid/dygraph/container.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .layers import Layer - -__all__ = [ - 'Sequential', -] - - -class Sequential(Layer): - """Sequential container. - Sub layers will be added to this container in the order of argument in the constructor. - The argument passed to the constructor can be iterable Layers or iterable name Layer pairs. - - Parameters: - layers(Layer|list|tuple): Layer or list/tuple of iterable name Layer pair. - - Examples: - .. code-block:: python - - import paddle - import numpy as np - - data = np.random.uniform(-1, 1, [30, 10]).astype('float32') - data = paddle.to_tensor(data) - # create Sequential with iterable Layers - model1 = paddle.nn.Sequential( - paddle.nn.Linear(10, 1), paddle.nn.Linear(1, 2) - ) - model1[0] # access the first layer - res1 = model1(data) # sequential execution - - # create Sequential with name Layer pairs - model2 = paddle.nn.Sequential( - ('l1', paddle.nn.Linear(10, 2)), - ('l2', paddle.nn.Linear(2, 3)) - ) - model2['l1'] # access l1 layer - model2.add_sublayer('l3', paddle.nn.Linear(3, 3)) # add sublayer - res2 = model2(data) # sequential execution - - """ - - def __init__(self, *layers): - super().__init__() - if len(layers) > 0 and isinstance(layers[0], (list, tuple)): - for name, layer in layers: - self.add_sublayer(name, layer) - else: - for idx, layer in enumerate(layers): - self.add_sublayer(str(idx), layer) - - def __getitem__(self, name): - if isinstance(name, slice): - return self.__class__(*(list(self._sub_layers.values())[name])) - elif isinstance(name, str): - return self._sub_layers[name] - else: - if name >= len(self._sub_layers): - raise IndexError('index {} is out of range'.format(name)) - elif name < 0 and name >= -len(self._sub_layers): - name += len(self._sub_layers) - elif name < -len(self._sub_layers): - raise IndexError('index {} is out of range'.format(name)) - return list(self._sub_layers.values())[name] - - def __setitem__(self, name, layer): - assert isinstance(layer, Layer) - setattr(self, str(name), layer) - - def __delitem__(self, name): - name = str(name) - assert name in self._sub_layers - del self._sub_layers[name] - - def __len__(self): - return len(self._sub_layers) - - def forward(self, input): - for layer in self._sub_layers.values(): - input = layer(input) - return input diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index cbbe8dbade..d130e0c961 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -257,14 +257,13 @@ class Optimizer: .. code-block:: python import paddle - import paddle.fluid as fluid paddle.disable_static() emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() - fluid.save_dygraph(state_dict, "paddle_dy") + paddle.save(state_dict, "paddle_dy.pdparams") scheduler = paddle.optimizer.lr.NoamDecay( d_model=0.01, warmup_steps=100, verbose=True) @@ -272,9 +271,10 @@ class Optimizer: learning_rate=scheduler, parameters=emb.parameters()) state_dict = adam.state_dict() - fluid.save_dygraph(state_dict, "paddle_dy") + paddle.save(state_dict, "paddle_dy.pdopt") - para_state_dict, opti_state_dict = fluid.load_dygraph("paddle_dy") + para_state_dict = paddle.load("paddle_dy.pdparams") + opti_state_dict = paddle.load("paddle_dy.pdopt") ''' from paddle.optimizer.lr import LRScheduler diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index e4e8954abc..b9904cb6f2 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -1037,7 +1037,6 @@ set_tests_properties(test_index_add_op PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_ssa_graph_inference_feed_partial_data PROPERTIES TIMEOUT 120) set_tests_properties(test_tensordot PROPERTIES TIMEOUT 200) -set_tests_properties(test_imperative_save_load PROPERTIES TIMEOUT 120) set_tests_properties(test_partial_eager_deletion_transformer PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_executor_seresnext_with_reduce_gpu diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py index 1fccfb70a2..863ed57b86 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bert.py @@ -119,8 +119,9 @@ class TestBert(unittest.TestCase): if to_static: paddle.jit.save(bert, self.model_save_prefix) else: - fluid.dygraph.save_dygraph( - bert.state_dict(), self.dy_state_dict_save_path + paddle.save( + bert.state_dict(), + self.dy_state_dict_save_path + '.pdparams', ) break return loss, ppl @@ -161,9 +162,7 @@ class TestBert(unittest.TestCase): bert = PretrainModelLayer( config=bert_config, weight_sharing=False, use_fp16=False ) - model_dict, _ = fluid.dygraph.load_dygraph( - self.dy_state_dict_save_path - ) + model_dict = paddle.load(self.dy_state_dict_save_path + '.pdparams') bert.set_dict(model_dict) bert.eval() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py index 56f0f6e9c2..ae1c1327cc 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py @@ -747,8 +747,9 @@ class TestTrain(unittest.TestCase): if to_static: paddle.jit.save(bmn, self.model_save_prefix) else: - fluid.dygraph.save_dygraph( - bmn.state_dict(), self.dy_param_path + paddle.save( + bmn.state_dict(), + self.dy_param_path + '.pdparams', ) break return np.array(loss_data) @@ -825,7 +826,7 @@ class TestTrain(unittest.TestCase): with fluid.dygraph.guard(self.place): bmn = BMN(self.args) # load dygraph trained parameters - model_dict, _ = fluid.load_dygraph(self.dy_param_path + ".pdparams") + model_dict = paddle.load(self.dy_param_path + ".pdparams") bmn.set_dict(model_dict) bmn.eval() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py index 867fe08090..8e877f39bf 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py @@ -622,8 +622,8 @@ class TestLACModel(unittest.TestCase): output_spec=[crf_decode], ) else: - fluid.dygraph.save_dygraph( - model.state_dict(), self.dy_param_path + paddle.save( + model.state_dict(), self.dy_param_path + '.pdparams' ) return np.array(loss_data) @@ -660,7 +660,7 @@ class TestLACModel(unittest.TestCase): with fluid.dygraph.guard(self.place): model = LexNet(self.args) # load dygraph trained parameters - model_dict, _ = fluid.load_dygraph(self.dy_param_path + ".pdparams") + model_dict = paddle.load(self.dy_param_path + ".pdparams") model.set_dict(model_dict) model.eval() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py index d6248dc922..6521dc31fc 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py @@ -571,8 +571,9 @@ def train_mobilenet(args, to_static): if to_static: paddle.jit.save(net, args.model_save_prefix) else: - fluid.dygraph.save_dygraph( - net.state_dict(), args.dy_state_dict_save_path + paddle.save( + net.state_dict(), + args.dy_state_dict_save_path + '.pdparams', ) break @@ -611,7 +612,7 @@ def predict_dygraph(args, data): elif args.model == "MobileNetV2": model = MobileNetV2(class_dim=args.class_dim, scale=1.0) # load dygraph trained parameters - model_dict, _ = fluid.load_dygraph(args.dy_state_dict_save_path) + model_dict = paddle.load(args.dy_state_dict_save_path + '.pdparams') model.set_dict(model_dict) model.eval() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py index bee4112408..9e8a854b5b 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py @@ -312,9 +312,9 @@ class ResNetHelper: if to_static: paddle.jit.save(resnet, self.model_save_prefix) else: - fluid.dygraph.save_dygraph( + paddle.save( resnet.state_dict(), - self.dy_state_dict_save_path, + self.dy_state_dict_save_path + '.pdparams', ) # avoid dataloader throw abort signaal data_loader._reset() @@ -327,9 +327,7 @@ class ResNetHelper: with fluid.dygraph.guard(place): resnet = ResNet() - model_dict, _ = fluid.dygraph.load_dygraph( - self.dy_state_dict_save_path - ) + model_dict = paddle.load(self.dy_state_dict_save_path + '.pdparams') resnet.set_dict(model_dict) resnet.eval() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py index 87ed10ef46..13a267cb3b 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py @@ -307,8 +307,9 @@ class TestResnet(unittest.TestCase): if to_static: paddle.jit.save(resnet, self.model_save_prefix) else: - paddle.fluid.dygraph.save_dygraph( - resnet.state_dict(), self.dy_state_dict_save_path + paddle.save( + resnet.state_dict(), + self.dy_state_dict_save_path + '.pdparams', ) # avoid dataloader throw abort signaal data_loader._reset() @@ -322,9 +323,7 @@ class TestResnet(unittest.TestCase): paddle.disable_static(place) resnet = ResNet() - model_dict, _ = paddle.fluid.dygraph.load_dygraph( - self.dy_state_dict_save_path - ) + model_dict = paddle.load(self.dy_state_dict_save_path + '.pdparams') resnet.set_dict(model_dict) resnet.eval() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_load.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_load.py index d9b0feeaee..c2d67514e3 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_load.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_save_load.py @@ -19,6 +19,7 @@ import unittest import numpy as np from test_fetch_feed import Linear +import paddle import paddle.fluid as fluid from paddle.fluid.optimizer import AdamOptimizer from paddle.jit import ProgramTranslator @@ -62,7 +63,7 @@ class TestDyToStaticSaveLoad(unittest.TestCase): net.clear_gradients() # Save parameters - fluid.save_dygraph(net.state_dict(), self.model_path) + paddle.save(net.state_dict(), self.model_path + '.pdparams') # minimize() will update parameter, call net() to get output and avg_loss. # Switch into eval mode. net.eval() @@ -73,7 +74,7 @@ class TestDyToStaticSaveLoad(unittest.TestCase): dygraph_net = Linear(32, 64) # Load parameters - model_dict, _ = fluid.load_dygraph(self.model_path) + model_dict = paddle.load(self.model_path + '.pdparams') dygraph_net.set_dict(model_dict) # Switch into eval mode. dygraph_net.eval() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py index 27922a0f26..51e6b70686 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py @@ -460,9 +460,9 @@ class TestSeResnet(unittest.TestCase): output_spec=[pred], ) else: - fluid.dygraph.save_dygraph( + paddle.save( se_resnext.state_dict(), - self.dy_state_dict_save_path, + self.dy_state_dict_save_path + '.pdparams', ) break return ( @@ -478,9 +478,7 @@ class TestSeResnet(unittest.TestCase): with fluid.dygraph.guard(place): se_resnext = SeResNeXt() - model_dict, _ = fluid.dygraph.load_dygraph( - self.dy_state_dict_save_path - ) + model_dict = paddle.load(self.dy_state_dict_save_path + '.pdparams') se_resnext.set_dict(model_dict) se_resnext.eval() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py index 64d0d816ba..060909dbc1 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_seq2seq.py @@ -21,6 +21,7 @@ import numpy as np from seq2seq_dygraph_model import AttentionModel, BaseModel from seq2seq_utils import Seq2SeqModelHyperParams, get_data_iter +import paddle import paddle.fluid as fluid from paddle.jit import ProgramTranslator from paddle.nn import ClipGradByGlobalNorm @@ -128,7 +129,7 @@ def train(args, attn_model=False): if not os.path.exists(model_dir): os.makedirs(model_dir) - fluid.save_dygraph(model.state_dict(), model_dir) + paddle.save(model.state_dict(), model_dir + '.pdparams') return loss.numpy() @@ -163,7 +164,7 @@ def infer(args, attn_model=False): model_path = ( args.attn_model_path if attn_model else args.base_model_path ) - state_dict, _ = fluid.dygraph.load_dygraph(model_path) + state_dict = paddle.load(model_path + '.pdparams') model.set_dict(state_dict) model.eval() train_data_iter = get_data_iter(args.batch_size, mode='infer') diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_transformer.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_transformer.py index 11f22686e3..0efc5445b6 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_transformer.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_transformer.py @@ -301,13 +301,15 @@ def train_dygraph(args, batch_generator): model_dir = os.path.join(args.save_dygraph_model_path) if not os.path.exists(model_dir): os.makedirs(model_dir) - fluid.save_dygraph( + paddle.save( transformer.state_dict(), - os.path.join(model_dir, "transformer"), + os.path.join(model_dir, "transformer") + + '.pdparams', ) - fluid.save_dygraph( + paddle.save( optimizer.state_dict(), - os.path.join(model_dir, "transformer"), + os.path.join(model_dir, "transformer") + + '.pdparams', ) break time_consumed = time.time() - pass_start_time diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py index 5922f492a8..92bdde2d94 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_util.py @@ -329,8 +329,11 @@ def load_dygraph(model_path, keep_name_table=False): To load python2 saved models in python3. """ try: - para_dict, opti_dict = fluid.load_dygraph( - model_path, keep_name_table=keep_name_table + para_dict = paddle.load( + model_path + '.pdparams', keep_name_table=keep_name_table + ) + opti_dict = paddle.load( + model_path + '.pdopt', keep_name_table=keep_name_table ) return para_dict, opti_dict except UnicodeDecodeError: @@ -341,8 +344,11 @@ def load_dygraph(model_path, keep_name_table=False): ) load_bak = pickle.load pickle.load = partial(load_bak, encoding="latin1") - para_dict, opti_dict = fluid.load_dygraph( - model_path, keep_name_table=keep_name_table + para_dict = paddle.load( + model_path + '.pdparams', keep_name_table=keep_name_table + ) + opti_dict = paddle.load( + model_path + '.pdopt', keep_name_table=keep_name_table ) pickle.load = load_bak return para_dict, opti_dict diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py index d0e6c98e25..1efea016fa 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_adam_op.py @@ -983,7 +983,7 @@ class TestAdamOptimizer(unittest.TestCase): linear = paddle.nn.Linear(10, 10) b = linear(a) state_dict = linear.state_dict() - fluid.save_dygraph(state_dict, "paddle_dy") + paddle.save(state_dict, "paddle_dy.pdparams") scheduler = paddle.optimizer.lr.NoamDecay( d_model=0.01, warmup_steps=100, verbose=True @@ -995,8 +995,9 @@ class TestAdamOptimizer(unittest.TestCase): ) adam.minimize(b) state_dict = adam.state_dict() - fluid.save_dygraph(state_dict, "paddle_dy") - para_state_dict, opt_state_dict = fluid.load_dygraph("paddle_dy") + paddle.save(state_dict, "paddle_dy.pdopt") + para_state_dict = paddle.load("paddle_dy.pdparams") + opt_state_dict = paddle.load("paddle_dy.pdopt") adam.set_state_dict(opt_state_dict) paddle.enable_static() @@ -1011,7 +1012,7 @@ class TestAdamOptimizer(unittest.TestCase): linear = paddle.nn.Linear(10, 10) b = linear(a) state_dict = linear.state_dict() - fluid.save_dygraph(state_dict, "paddle_dy") + paddle.save(state_dict, "paddle_dy.pdparams") scheduler = paddle.optimizer.lr.NoamDecay( d_model=0.01, warmup_steps=100, verbose=True @@ -1027,8 +1028,9 @@ class TestAdamOptimizer(unittest.TestCase): adam = get_opt('float32', [10, 10]) state_dict = adam.state_dict() - fluid.save_dygraph(state_dict, "paddle_dy") - para_state_dict, opt_state_dict = fluid.load_dygraph("paddle_dy") + paddle.save(state_dict, "paddle_dy.pdopt") + para_state_dict = paddle.load("paddle_dy.pdparams") + opt_state_dict = paddle.load("paddle_dy.pdopt") adam.set_state_dict(opt_state_dict) adam2 = get_opt('float64', [10, 10]) # dtype not match diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py index 32bdc0d099..3ed766e249 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py @@ -26,7 +26,7 @@ class TestImperativeContainerSequential(unittest.TestCase): data = np.random.uniform(-1, 1, [5, 10]).astype('float32') with fluid.dygraph.guard(): data = fluid.dygraph.to_variable(data) - model1 = fluid.dygraph.Sequential(Linear(10, 1), Linear(1, 2)) + model1 = paddle.nn.Sequential(Linear(10, 1), Linear(1, 2)) res1 = model1(data) self.assertListEqual(res1.shape, [5, 2]) model1[1] = Linear(1, 3) @@ -37,7 +37,7 @@ class TestImperativeContainerSequential(unittest.TestCase): l1 = Linear(10, 1) l2 = Linear(1, 3) - model2 = fluid.dygraph.Sequential(('l1', l1), ('l2', l2)) + model2 = paddle.nn.Sequential(('l1', l1), ('l2', l2)) self.assertEqual(len(model2), 2) res2 = model2(data) self.assertTrue(l1 is model2.l1) @@ -60,7 +60,7 @@ class TestImperativeContainerSequential(unittest.TestCase): data = np.random.uniform(-1, 1, [5, 10]).astype('float32') with fluid.dygraph.guard(): data = fluid.dygraph.to_variable(data) - model1 = fluid.dygraph.Sequential(Linear(10, 1), Linear(1, 2)) + model1 = paddle.nn.Sequential(Linear(10, 1), Linear(1, 2)) res1 = model1(data) self.assertListEqual(res1.shape, [5, 2]) model1[1] = Linear(1, 3) @@ -71,7 +71,7 @@ class TestImperativeContainerSequential(unittest.TestCase): l1 = Linear(10, 1) l2 = Linear(1, 3) - model2 = fluid.dygraph.Sequential(['l1', l1], ['l2', l2]) + model2 = paddle.nn.Sequential(['l1', l1], ['l2', l2]) self.assertEqual(len(model2), 2) res2 = model2(data) self.assertTrue(l1 is model2.l1) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py deleted file mode 100644 index 327cbce7ea..0000000000 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ /dev/null @@ -1,1036 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import unittest - -import numpy as np - -import paddle -import paddle.fluid as fluid -import paddle.fluid.core as core -from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay -from paddle.fluid.optimizer import Adam -from paddle.nn import Embedding - - -class SimpleLSTMRNN(fluid.Layer): - def __init__( - self, hidden_size, num_steps, num_layers=2, init_scale=0.1, dropout=None - ): - super().__init__() - self._hidden_size = hidden_size - self._num_layers = num_layers - self._init_scale = init_scale - self._dropout = dropout - self._input = None - self._num_steps = num_steps - self.cell_array = [] - self.hidden_array = [] - self.weight_1_arr = [] - self.weight_2_arr = [] - self.bias_arr = [] - self.mask_array = [] - - for i in range(self._num_layers): - weight_1 = self.create_parameter( - attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( - low=-self._init_scale, high=self._init_scale - ) - ), - shape=[self._hidden_size * 2, self._hidden_size * 4], - dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( - low=-self._init_scale, high=self._init_scale - ), - ) - self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) - bias_1 = self.create_parameter( - attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( - low=-self._init_scale, high=self._init_scale - ) - ), - shape=[self._hidden_size * 4], - dtype="float32", - default_initializer=fluid.initializer.Constant(0.0), - ) - self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) - - def forward(self, input_embedding, init_hidden=None, init_cell=None): - self.cell_array = [] - self.hidden_array = [] - - for i in range(self._num_layers): - pre_hidden = paddle.slice( - init_hidden, axes=[0], starts=[i], ends=[i + 1] - ) - pre_cell = paddle.slice( - init_cell, axes=[0], starts=[i], ends=[i + 1] - ) - pre_hidden = paddle.reshape( - pre_hidden, shape=[-1, self._hidden_size] - ) - pre_cell = paddle.reshape(pre_cell, shape=[-1, self._hidden_size]) - self.hidden_array.append(pre_hidden) - self.cell_array.append(pre_cell) - - res = [] - for index in range(self._num_steps): - self._input = paddle.slice( - input_embedding, axes=[1], starts=[index], ends=[index + 1] - ) - self._input = paddle.reshape( - self._input, shape=[-1, self._hidden_size] - ) - for k in range(self._num_layers): - pre_hidden = self.hidden_array[k] - pre_cell = self.cell_array[k] - weight_1 = self.weight_1_arr[k] - bias = self.bias_arr[k] - - nn = fluid.layers.concat([self._input, pre_hidden], 1) - gate_input = paddle.matmul(x=nn, y=weight_1) - - gate_input = paddle.add(gate_input, bias) - i, j, f, o = paddle.split( - gate_input, num_or_sections=4, axis=-1 - ) - c = pre_cell * paddle.nn.functional.sigmoid( - f - ) + paddle.nn.functional.sigmoid(i) * paddle.tanh(j) - m = paddle.tanh(c) * paddle.nn.functional.sigmoid(o) - self.hidden_array[k] = m - self.cell_array[k] = c - self._input = m - - if self._dropout is not None and self._dropout > 0.0: - self._input = paddle.nn.functional.dropout( - self._input, - p=self._dropout, - mode='upscale_in_train', - ) - res.append( - paddle.reshape(self._input, shape=[1, -1, self._hidden_size]) - ) - real_res = fluid.layers.concat(res, 0) - real_res = paddle.transpose(x=real_res, perm=[1, 0, 2]) - last_hidden = fluid.layers.concat(self.hidden_array, 1) - last_hidden = paddle.reshape( - last_hidden, shape=[-1, self._num_layers, self._hidden_size] - ) - last_hidden = paddle.transpose(x=last_hidden, perm=[1, 0, 2]) - last_cell = fluid.layers.concat(self.cell_array, 1) - last_cell = paddle.reshape( - last_cell, shape=[-1, self._num_layers, self._hidden_size] - ) - last_cell = paddle.transpose(x=last_cell, perm=[1, 0, 2]) - return real_res, last_hidden, last_cell - - -class PtbModel(fluid.Layer): - def __init__( - self, - hidden_size, - vocab_size, - num_layers=2, - num_steps=20, - init_scale=0.1, - dropout=None, - ): - super().__init__() - self.hidden_size = hidden_size - self.vocab_size = vocab_size - self.init_scale = init_scale - self.num_layers = num_layers - self.num_steps = num_steps - self.dropout = dropout - self.simple_lstm_rnn = SimpleLSTMRNN( - hidden_size, - num_steps, - num_layers=num_layers, - init_scale=init_scale, - dropout=dropout, - ) - self.embedding = Embedding( - vocab_size, - hidden_size, - sparse=False, - weight_attr=fluid.ParamAttr( - name='embedding_para', - initializer=fluid.initializer.UniformInitializer( - low=-init_scale, high=init_scale - ), - ), - ) - - self.softmax_weight = self.create_parameter( - attr=fluid.ParamAttr(), - shape=[self.hidden_size, self.vocab_size], - dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( - low=-self.init_scale, high=self.init_scale - ), - ) - self.softmax_bias = self.create_parameter( - attr=fluid.ParamAttr(), - shape=[self.vocab_size], - dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( - low=-self.init_scale, high=self.init_scale - ), - ) - - def forward(self, input, label, init_hidden, init_cell): - init_h = paddle.reshape( - init_hidden, shape=[self.num_layers, -1, self.hidden_size] - ) - - init_c = paddle.reshape( - init_cell, shape=[self.num_layers, -1, self.hidden_size] - ) - - x_emb = self.embedding(input) - x_emb = paddle.reshape( - x_emb, shape=[-1, self.num_steps, self.hidden_size] - ) - if self.dropout is not None and self.dropout > 0.0: - x_emb = paddle.nn.functional.dropout( - x_emb, - p=self.drop_out, - mode='upscale_in_train', - ) - rnn_out, last_hidden, last_cell = self.simple_lstm_rnn( - x_emb, init_h, init_c - ) - rnn_out = paddle.reshape( - rnn_out, shape=[-1, self.num_steps, self.hidden_size] - ) - - projection = paddle.matmul(rnn_out, self.softmax_weight) - projection = paddle.add(projection, self.softmax_bias) - projection = paddle.reshape(projection, shape=[-1, self.vocab_size]) - loss = paddle.nn.functional.softmax_with_cross_entropy( - logits=projection, label=label, soft_label=False - ) - loss = paddle.reshape(loss, shape=[-1, self.num_steps]) - loss = paddle.mean(loss, axis=[0]) - loss = paddle.sum(loss) - - return loss, last_hidden, last_cell - - -class TestDygraphPtbRnn(unittest.TestCase): - def func_setUp(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with fluid.dygraph.guard(): - paddle.seed(seed) - paddle.framework.random._manual_program_seed(seed) - # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale, - ) - - bd = [] - lr_arr = [1.0] - # this a fake lr decay strategy - for i in range(1, 10): - bd.append(100 * i) - new_lr = 1.0 - lr_arr.append(new_lr) - - place = ( - fluid.CPUPlace() - if not core.is_compiled_with_cuda() - else fluid.CUDAPlace(0) - ) - adam = Adam( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr_arr - ), - parameter_list=ptb_model.parameters(), - ) - dy_param_updated = dict() - dy_param_init = dict() - dy_loss = None - last_hidden = None - last_cell = None - - for i in range(batch_num): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32' - ) - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32' - ) - x = to_variable(x_data) - y = to_variable(y_data) - init_hidden = to_variable(init_hidden_data) - init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model( - x, y, init_hidden, init_cell - ) - if i == 0: - for param in ptb_model.parameters(): - dy_param_init[param.name] = param.numpy() - dy_loss.backward() - adam.minimize(dy_loss) - ptb_model.clear_gradients() - if i == batch_num - 1: - for param in ptb_model.parameters(): - dy_param_updated[param.name] = param.numpy() - - # check optimizer - self.opti_dict = adam.state_dict() - self.base_opti = {} - for k, v in self.opti_dict.items(): - if isinstance(v, (core.VarBase, core.eager.Tensor)): - self.base_opti[v.name] = v.numpy() - self.assertTrue(np.sum(np.abs(v.numpy())) != 0) - else: - self.base_opti[k] = v - - fluid.save_dygraph(self.opti_dict, "./test_dy") - - self.state_dict = ptb_model.state_dict() - - self.model_base = {} - for k, v in self.state_dict.items(): - np_t = v.numpy() - self.model_base[k] = np_t - - fluid.save_dygraph(self.state_dict, "./test_dy") - - def func_testLoadAndSetVarBase(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with fluid.dygraph.guard(): - paddle.seed(seed) - paddle.framework.random._manual_program_seed(seed) - # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale, - ) - - bd = [] - lr_arr = [1.0] - # this a fake lr decay strategy - for i in range(1, 10): - bd.append(100 * i) - new_lr = 1.0 - lr_arr.append(new_lr) - - place = ( - fluid.CPUPlace() - if not core.is_compiled_with_cuda() - else fluid.CUDAPlace(0) - ) - adam = Adam( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr_arr - ), - parameter_list=ptb_model.parameters(), - ) - dy_param_updated = dict() - dy_param_init = dict() - dy_loss = None - last_hidden = None - last_cell = None - - for i in range(batch_num): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32' - ) - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32' - ) - x = to_variable(x_data) - y = to_variable(y_data) - init_hidden = to_variable(init_hidden_data) - init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model( - x, y, init_hidden, init_cell - ) - if i == 0: - for param in ptb_model.parameters(): - dy_param_init[param.name] = param.numpy() - dy_loss.backward() - adam.minimize(dy_loss) - ptb_model.clear_gradients() - if i == batch_num - 1: - for param in ptb_model.parameters(): - dy_param_updated[param.name] = param.numpy() - - # check optimizer - opti_dict = adam.state_dict() - # set to zero - for k, v in opti_dict.items(): - if isinstance(v, (core.VarBase, core.eager.Tensor)): - np_t = v.numpy() - var = v.value().get_tensor() - var.set(np.zeros_like(np_t), place) - - self.assertTrue(np.sum(np.abs(v.numpy())) == 0) - - if isinstance(adam._learning_rate, LearningRateDecay): - adam._learning_rate.step_num = 0 - - para_state_dict, opti_state_dict = fluid.load_dygraph("./test_dy") - print(opti_state_dict.keys()) - adam.set_state_dict(opti_state_dict) - - opti_dict = adam.state_dict() - for k, v in opti_dict.items(): - if isinstance(v, (core.VarBase, core.eager.Tensor)): - np.testing.assert_array_equal( - v.numpy(), self.base_opti[v.name] - ) - else: - self.assertEqual(v, self.base_opti[k]) - - # check parameter - state_dict = ptb_model.state_dict() - for k, v in state_dict.items(): - np_t = v.numpy() - var = v.value().get_tensor() - - var.set(np.zeros_like(np_t), place) - - ptb_model.set_state_dict(stat_dict=para_state_dict) - - state_dict = ptb_model.state_dict() - - for k, v in state_dict.items(): - new_t = v.numpy() - - base_t = self.model_base[k] - - np.testing.assert_array_equal(new_t, base_t) - - def func_testSetVariable(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with fluid.dygraph.guard(): - paddle.seed(seed) - paddle.framework.random._manual_program_seed(seed) - # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale, - ) - - bd = [] - lr_arr = [1.0] - # this a fake lr decay strategy - for i in range(1, 10): - bd.append(100 * i) - new_lr = 1.0 - lr_arr.append(new_lr) - - place = ( - fluid.CPUPlace() - if not core.is_compiled_with_cuda() - else fluid.CUDAPlace(0) - ) - adam = Adam( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr_arr - ), - parameter_list=ptb_model.parameters(), - ) - dy_param_updated = dict() - dy_param_init = dict() - dy_loss = None - last_hidden = None - last_cell = None - - for i in range(batch_num): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32' - ) - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32' - ) - x = to_variable(x_data) - y = to_variable(y_data) - init_hidden = to_variable(init_hidden_data) - init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model( - x, y, init_hidden, init_cell - ) - if i == 0: - for param in ptb_model.parameters(): - dy_param_init[param.name] = param.numpy() - dy_loss.backward() - adam.minimize(dy_loss) - ptb_model.clear_gradients() - if i == batch_num - 1: - for param in ptb_model.parameters(): - dy_param_updated[param.name] = param.numpy() - - # check optimizer - opti_dict = adam.state_dict() - # set to zero - for k, v in opti_dict.items(): - if isinstance(v, (core.VarBase, core.eager.Tensor)): - np_t = v.numpy() - var = v.value().get_tensor() - var.set(np.zeros_like(np_t), place) - - self.assertTrue(np.sum(np.abs(v.numpy())) == 0) - - if isinstance(adam._learning_rate, LearningRateDecay): - adam._learning_rate.step_num = 0 - - adam.set_state_dict(self.opti_dict) - opti_dict = adam.state_dict() - for k, v in opti_dict.items(): - if isinstance(v, (core.VarBase, core.eager.Tensor)): - np.testing.assert_array_equal( - v.numpy(), self.base_opti[v.name] - ) - else: - self.assertEqual(v, self.base_opti[k]) - - # check parameter - state_dict = ptb_model.state_dict() - for k, v in state_dict.items(): - np_t = v.numpy() - var = v.value().get_tensor() - - var.set(np.zeros_like(np_t), place) - - ptb_model.set_state_dict(self.state_dict) - - state_dict = ptb_model.state_dict() - - for k, v in state_dict.items(): - new_t = v.numpy() - - base_t = self.model_base[k] - - np.testing.assert_array_equal(new_t, base_t) - - def func_testSetNumpy(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with fluid.dygraph.guard(): - paddle.seed(seed) - paddle.framework.random._manual_program_seed(seed) - # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale, - ) - - bd = [] - lr_arr = [1.0] - # this a fake lr decay strategy - for i in range(1, 10): - bd.append(100 * i) - new_lr = 1.0 - lr_arr.append(new_lr) - - place = ( - fluid.CPUPlace() - if not core.is_compiled_with_cuda() - else fluid.CUDAPlace(0) - ) - adam = Adam( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr_arr - ), - parameter_list=ptb_model.parameters(), - ) - dy_param_updated = dict() - dy_param_init = dict() - dy_loss = None - last_hidden = None - last_cell = None - - for i in range(batch_num): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32' - ) - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32' - ) - x = to_variable(x_data) - y = to_variable(y_data) - init_hidden = to_variable(init_hidden_data) - init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model( - x, y, init_hidden, init_cell - ) - if i == 0: - for param in ptb_model.parameters(): - dy_param_init[param.name] = param.numpy() - dy_loss.backward() - adam.minimize(dy_loss) - ptb_model.clear_gradients() - if i == batch_num - 1: - for param in ptb_model.parameters(): - dy_param_updated[param.name] = param.numpy() - - # check optimizer - opti_dict = adam.state_dict() - np_opti_dict = {} - # set to zero - for k, v in opti_dict.items(): - if isinstance(v, (core.VarBase, core.eager.Tensor)): - np_t = v.numpy() - np_opti_dict[v.name] = np_t - var = v.value().get_tensor() - var.set(np.zeros_like(np_t), place) - self.assertTrue(np.sum(np.abs(v.numpy())) == 0) - else: - np_opti_dict[k] = v - - if isinstance(adam._learning_rate, LearningRateDecay): - adam._learning_rate.step_num = 0 - - adam.set_state_dict(np_opti_dict) - - opti_dict = adam.state_dict() - for k, v in opti_dict.items(): - if isinstance(v, (core.VarBase, core.eager.Tensor)): - np.testing.assert_array_equal( - v.numpy(), self.base_opti[v.name] - ) - else: - self.assertEqual(v, self.base_opti[k]) - - # check parameter - state_dict = ptb_model.state_dict() - np_state_dict = {} - for k, v in state_dict.items(): - np_t = v.numpy() - np_state_dict[k] = np_t - var = v.value().get_tensor() - - var.set(np.zeros_like(np_t), place) - - ptb_model.set_state_dict(np_state_dict) - - state_dict = ptb_model.state_dict() - - for k, v in state_dict.items(): - new_t = v.numpy() - - base_t = self.model_base[k] - - np.testing.assert_array_equal(new_t, base_t) - - def func_testSetVariableBeforeTrain(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with fluid.dygraph.guard(): - # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale, - ) - - place = ( - fluid.CPUPlace() - if not core.is_compiled_with_cuda() - else fluid.CUDAPlace(0) - ) - adam = Adam( - learning_rate=0.0, - beta1=0.8, - beta2=0.6, - parameter_list=ptb_model.parameters(), - ) - dy_param_updated = dict() - dy_param_init = dict() - dy_loss = None - last_hidden = None - last_cell = None - - adam.set_state_dict(self.opti_dict) - ptb_model.set_state_dict(self.state_dict) - - for i in range(1): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32' - ) - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32' - ) - x = to_variable(x_data) - y = to_variable(y_data) - init_hidden = to_variable(init_hidden_data) - init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model( - x, y, init_hidden, init_cell - ) - - dy_loss.backward() - adam.minimize(dy_loss) - ptb_model.clear_gradients() - - opti_dict = adam.state_dict() - for k, v in opti_dict.items(): - if k == "global_step": - np.testing.assert_array_equal( - v.numpy(), self.base_opti[v.name] + 1 - ) - - if k.find("beta1_pow_acc_0") > 0: - np.testing.assert_array_equal( - v.numpy(), self.base_opti[v.name] * adam._beta1 - ) - if k.find("beta2_pow_acc_0") > 0: - np.testing.assert_array_equal( - v.numpy(), self.base_opti[v.name] * adam._beta2 - ) - - state_dict = ptb_model.state_dict() - - for k, v in state_dict.items(): - new_t = v.numpy() - - base_t = self.model_base[k] - np.testing.assert_array_equal(new_t, base_t) - - def func_testLoadAndSetVarBaseBeforeTrain(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with fluid.dygraph.guard(): - paddle.seed(seed) - paddle.framework.random._manual_program_seed(seed) - # TODO: marsyang1993 Change seed to - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale, - ) - - bd = [] - lr_arr = [0.0] - # this a fake lr decay strategy - for i in range(1, 10): - bd.append(100 * i) - # set lr to zero not update parameter - new_lr = 0.0 - lr_arr.append(new_lr) - - place = ( - fluid.CPUPlace() - if not core.is_compiled_with_cuda() - else fluid.CUDAPlace(0) - ) - adam = Adam( - learning_rate=0.0, - beta1=0.8, - beta2=0.6, - parameter_list=ptb_model.parameters(), - ) - dy_param_updated = dict() - dy_param_init = dict() - dy_loss = None - last_hidden = None - last_cell = None - - state_dict, opti_dict = fluid.load_dygraph("./test_dy") - adam.set_state_dict(opti_dict) - ptb_model.set_state_dict(state_dict) - - for i in range(1): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32' - ) - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32' - ) - x = to_variable(x_data) - y = to_variable(y_data) - init_hidden = to_variable(init_hidden_data) - init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model( - x, y, init_hidden, init_cell - ) - - dy_loss.backward() - adam.minimize(dy_loss) - ptb_model.clear_gradients() - - opti_dict = adam.state_dict() - for k, v in opti_dict.items(): - if k == "global_step": - np.testing.assert_array_equal( - v.numpy(), self.base_opti[v.name] + 1 - ) - - if k.find("beta1_pow_acc_0") > 0: - np.testing.assert_array_equal( - v.numpy(), self.base_opti[v.name] * adam._beta1 - ) - if k.find("beta2_pow_acc_0") > 0: - np.testing.assert_array_equal( - v.numpy(), self.base_opti[v.name] * adam._beta2 - ) - - # check parameter - - state_dict = ptb_model.state_dict() - - for k, v in state_dict.items(): - new_t = v.numpy() - - base_t = self.model_base[k] - np.testing.assert_array_equal(new_t, base_t) - - def func_testSetNumpyBeforeTrain(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with fluid.dygraph.guard(): - paddle.seed(seed) - paddle.framework.random._manual_program_seed(seed) - # TODO: marsyang1993 Change seed to - - ptb_model = PtbModel( - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale, - ) - - bd = [] - lr_arr = [0.0] - # this a fake lr decay strategy - for i in range(1, 10): - bd.append(100 * i) - # set lr to 0.0, not update parameter - new_lr = 0.0 - lr_arr.append(new_lr) - - place = ( - fluid.CPUPlace() - if not core.is_compiled_with_cuda() - else fluid.CUDAPlace(0) - ) - adam = Adam( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr_arr - ), - beta1=0.8, - beta2=0.6, - parameter_list=ptb_model.parameters(), - ) - dy_param_updated = dict() - dy_param_init = dict() - dy_loss = None - last_hidden = None - last_cell = None - - np_opti_dict = {} - np_state_dict = {} - - for k, v in self.opti_dict.items(): - if isinstance(v, (core.VarBase, core.eager.Tensor)): - np_opti_dict[v.name] = v.numpy() - else: - np_opti_dict[k] = v - - for k, v in self.state_dict.items(): - np_state_dict[k] = v.numpy() - - adam.set_state_dict(np_opti_dict) - ptb_model.set_state_dict(np_state_dict) - for i in range(1): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32' - ) - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32' - ) - x = to_variable(x_data) - y = to_variable(y_data) - init_hidden = to_variable(init_hidden_data) - init_cell = to_variable(init_cell_data) - dy_loss, last_hidden, last_cell = ptb_model( - x, y, init_hidden, init_cell - ) - - dy_loss.backward() - adam.minimize(dy_loss) - ptb_model.clear_gradients() - - opti_dict = adam.state_dict() - for k, v in opti_dict.items(): - if k == "global_step": - np.testing.assert_array_equal( - v.numpy(), self.base_opti[v.name] + 1 - ) - - if k.find("beta1_pow_acc_0") > 0: - np.testing.assert_array_equal( - v.numpy(), self.base_opti[v.name] * adam._beta1 - ) - if k.find("beta2_pow_acc_0") > 0: - np.testing.assert_array_equal( - v.numpy(), self.base_opti[v.name] * adam._beta2 - ) - - # check parameter - - state_dict = ptb_model.state_dict() - - for k, v in state_dict.items(): - new_t = v.numpy() - - base_t = self.model_base[k] - np.testing.assert_array_equal(new_t, base_t) - - def func_testOnlyLoadParams(self): - with fluid.dygraph.guard(): - emb = paddle.nn.Embedding(10, 10) - state_dict = emb.state_dict() - fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy')) - - para_state_dict, opti_state_dict = fluid.load_dygraph( - os.path.join('saved_dy', 'emb_dy') - ) - - self.assertIsNone(opti_state_dict) - - para_state_dict, opti_state_dict = fluid.load_dygraph( - os.path.join('saved_dy', 'emb_dy.pdparams') - ) - - para_state_dict, opti_state_dict = fluid.load_dygraph( - os.path.join('saved_dy', 'emb_dy.pdopt') - ) - - def func_test_load_compatible_with_keep_name_table(self): - with fluid.dygraph.guard(): - emb = paddle.nn.Embedding(10, 10) - state_dict = emb.state_dict() - fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy')) - - para_state_dict, opti_state_dict = fluid.load_dygraph( - os.path.join('saved_dy', 'emb_dy'), keep_name_table=True - ) - self.assertIsNotNone(para_state_dict) - self.assertIsNone(opti_state_dict) - - def test_main(self): - self.func_setUp() - self.func_testLoadAndSetVarBase() - self.func_testSetVariable() - self.func_testSetNumpy() - self.func_testSetVariableBeforeTrain() - self.func_testLoadAndSetVarBaseBeforeTrain() - self.func_testSetNumpyBeforeTrain() - self.func_testOnlyLoadParams() - self.func_test_load_compatible_with_keep_name_table() - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py index c39573e520..7fd322d358 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -844,9 +844,9 @@ class TestDygraphPtbRnn(unittest.TestCase): last_hidden = None last_cell = None - state_dict, opti_dict = fluid.load_dygraph( - os.path.join(self.temp_dir.name, "test_dy_v2") - ) + model_prefix = os.path.join(self.temp_dir.name, "test_dy_v2") + state_dict = paddle.load(model_prefix + '.pdparams') + opti_dict = paddle.load(model_prefix + '.pdopt') adam.set_state_dict(opti_dict) ptb_model.set_dict(state_dict) diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py index 0ae577bc43..3a25c6af5a 100644 --- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py @@ -430,7 +430,7 @@ class TestJitSaveLoad(unittest.TestCase): self.temp_dir.name, "test_jit_save_load.no_path/model_path" ) with self.assertRaises(ValueError): - model_dict, _ = fluid.dygraph.load_dygraph(model_path) + model_dict = paddle.load(model_path) def test_jit_load_no_path(self): path = os.path.join( diff --git a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py index ff2dc85126..f8256e0e89 100644 --- a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py @@ -161,7 +161,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase): Step_scheduler.epoch() Reducelr_scheduler.step(loss) - fluid.dygraph.save_dygraph(linear.state_dict(), "save_path") + paddle.save(linear.state_dict(), "save_path.pdparams") Exponential_scheduler_test = fluid.dygraph.ExponentialDecay( learning_rate=0.1, @@ -174,8 +174,8 @@ class TestLearningRateDecayDygraph(unittest.TestCase): learning_rate=1.0, decay_rate=0.5, patience=5, cooldown=3 ) - fluid.dygraph.save_dygraph(adam1.state_dict(), "save_path") - _, opt_state = fluid.dygraph.load_dygraph("save_path") + paddle.save(adam1.state_dict(), "save_path.pdopt") + opt_state = paddle.load("save_path.pdopt") adam_test = fluid.optimizer.Adam( learning_rate=Exponential_scheduler_test, parameter_list=linear.parameters(), @@ -187,8 +187,8 @@ class TestLearningRateDecayDygraph(unittest.TestCase): "epoch_num is different before and after set_dict", ) - fluid.dygraph.save_dygraph(adam2.state_dict(), "save_path") - _, opt_state = fluid.dygraph.load_dygraph("save_path") + paddle.save(adam2.state_dict(), "save_path.pdopt") + opt_state = paddle.load("save_path.pdopt") adam_test = fluid.optimizer.Adam( learning_rate=Step_scheduler_test, parameter_list=linear.parameters(), @@ -205,8 +205,8 @@ class TestLearningRateDecayDygraph(unittest.TestCase): "current learning rate is different before and after set_dict", ) - fluid.dygraph.save_dygraph(adam3.state_dict(), "save_path") - _, opt_state = fluid.dygraph.load_dygraph("save_path") + paddle.save(adam3.state_dict(), "save_path.pdopt") + opt_state = paddle.load("save_path.pdopt") adam_test = fluid.optimizer.Adam( learning_rate=Reducelr_scheduler_test, parameter_list=linear.parameters(), diff --git a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py index f8c5751c8c..6a00320322 100644 --- a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py +++ b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py @@ -148,9 +148,6 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): self.params_filename = None orig_param_dict = self.train_and_save_model() - load_param_dict, _ = fluid.load_dygraph(self.save_dirname) - self.check_load_state_dict(orig_param_dict, load_param_dict) - new_load_param_dict = paddle.load(self.save_dirname) self.check_load_state_dict(orig_param_dict, new_load_param_dict) @@ -162,11 +159,6 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): self.params_filename = None orig_param_dict = self.train_and_save_model() - load_param_dict, _ = fluid.load_dygraph( - self.save_dirname, model_filename=self.model_filename - ) - self.check_load_state_dict(orig_param_dict, load_param_dict) - new_load_param_dict = paddle.load( self.save_dirname, model_filename=self.model_filename ) @@ -180,11 +172,6 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): self.params_filename = "static_mnist.params" orig_param_dict = self.train_and_save_model() - load_param_dict, _ = fluid.load_dygraph( - self.save_dirname, params_filename=self.params_filename - ) - self.check_load_state_dict(orig_param_dict, load_param_dict) - new_load_param_dict = paddle.load( self.save_dirname, params_filename=self.params_filename ) @@ -199,13 +186,6 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): self.params_filename = "static_mnist.params" orig_param_dict = self.train_and_save_model() - load_param_dict, _ = fluid.load_dygraph( - self.save_dirname, - params_filename=self.params_filename, - model_filename=self.model_filename, - ) - self.check_load_state_dict(orig_param_dict, load_param_dict) - new_load_param_dict = paddle.load( self.save_dirname, params_filename=self.params_filename, @@ -220,9 +200,6 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): self.params_filename = None orig_param_dict = self.train_and_save_model(True) - load_param_dict, _ = fluid.load_dygraph(self.save_dirname) - self.check_load_state_dict(orig_param_dict, load_param_dict) - new_load_param_dict = paddle.load(self.save_dirname) self.check_load_state_dict(orig_param_dict, new_load_param_dict) diff --git a/python/paddle/fluid/tests/unittests/utils.py b/python/paddle/fluid/tests/unittests/utils.py index a9cbc845c2..3643704732 100644 --- a/python/paddle/fluid/tests/unittests/utils.py +++ b/python/paddle/fluid/tests/unittests/utils.py @@ -11,9 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import numpy as np +import paddle import paddle.fluid as fluid from paddle.fluid.framework import _dygraph_guard @@ -94,7 +94,8 @@ def load_dygraph_vars_to_scope(model_path, scope, place): dst_t.set(np.array(src_t), place) dst_t.set_lod(src_t.lod()) - param_dict, opti_dict = fluid.load_dygraph(model_path) + param_dict = paddle.load(model_path + '.pdparams') + opti_dict = paddle.load(model_path + '.pdopt') if param_dict: load_dict_to_scope(scope, param_dict) diff --git a/python/paddle/framework/io.py b/python/paddle/framework/io.py index ccc5310853..5b84379264 100644 --- a/python/paddle/framework/io.py +++ b/python/paddle/framework/io.py @@ -155,7 +155,6 @@ def _load_state_dict_from_save_params(model_path): # - need [full filename] when loading # - paddle.save # - paddle.static.save -# - paddle.fluid.save_dygraph # - need [prefix] when loading [compatible for paddle 2.x] # - paddle.jit.save # - paddle.static.save_inference_model @@ -185,7 +184,6 @@ def _build_load_path_and_config(path, config): opti_file_path = path + ".pdopt" if os.path.exists(params_file_path) or os.path.exists(opti_file_path): error_msg += ( - " If you want to load the results saved by `fluid.save_dygraph`, " "please specify the full file name, not just the file name prefix. For " "example, it should be written as `paddle.load('model.pdparams')` instead of " "`paddle.load('model')`." diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py index 52a0f8b4b3..9222090d1a 100644 --- a/python/paddle/hapi/model.py +++ b/python/paddle/hapi/model.py @@ -899,11 +899,11 @@ class DynamicGraphAdapter: def save(self, path): params = self.model.network.state_dict() - fluid.save_dygraph(params, path) + paddle.save(params, path + '.pdparams') if self.model._optimizer is not None: if self.model._optimizer.state_dict(): optim = self.model._optimizer.state_dict() - fluid.save_dygraph(optim, path) + paddle.save(optim, path + '.pdopt') if hasattr(self.model, '_scaler') and self.model._scaler is not None: if self.model._scaler.state_dict(): scaler = self.model._scaler.state_dict() diff --git a/python/paddle/jit/dy2static/convert_call_func.py b/python/paddle/jit/dy2static/convert_call_func.py index 26264840f2..dbcbbd260f 100644 --- a/python/paddle/jit/dy2static/convert_call_func.py +++ b/python/paddle/jit/dy2static/convert_call_func.py @@ -24,7 +24,6 @@ import types import numpy -from paddle.fluid.dygraph.container import Sequential from paddle.fluid.dygraph.layers import Layer from paddle.jit.dy2static.logging_utils import TranslatorLogger from paddle.jit.dy2static.utils import is_paddle_func, unwrap @@ -40,10 +39,6 @@ from .convert_operators import ( __all__ = [] -# The api(s) should be considered as plain function and convert -# them into static layer code. -PADDLE_NEED_CONVERT_APIS = [Sequential] - translator_logger = TranslatorLogger() CONVERSION_OPTIONS = "An attribute for a function that indicates conversion flags of the function in dynamic-to-static." @@ -125,6 +120,11 @@ def is_unsupported(func): return True # NOTE: should be placed before `is_paddle_func` + # The api(s) should be considered as plain function and convert + # them into static layer code. + from paddle.nn import Sequential + + PADDLE_NEED_CONVERT_APIS = [Sequential] if type(func) in PADDLE_NEED_CONVERT_APIS: return False diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index 490647e55f..db31f1878e 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -17,7 +17,7 @@ from ..fluid.dygraph.layers import Layer # noqa: F401 from .layer.container import LayerList # noqa: F401 from .layer.container import ParameterList # noqa: F401 -from ..fluid.dygraph.container import Sequential # noqa: F401 +from .layer.container import Sequential # noqa: F401 from .clip import ClipGradByGlobalNorm # noqa: F401 from .clip import ClipGradByNorm # noqa: F401 diff --git a/python/paddle/nn/layer/container.py b/python/paddle/nn/layer/container.py index 542c1266e9..f2dded131e 100644 --- a/python/paddle/nn/layer/container.py +++ b/python/paddle/nn/layer/container.py @@ -532,3 +532,76 @@ class LayerList(Layer): idx = str(offset + i) self.add_sublayer(idx, sublayer) return self + + +class Sequential(Layer): + """Sequential container. + Sub layers will be added to this container in the order of argument in the constructor. + The argument passed to the constructor can be iterable Layers or iterable name Layer pairs. + + Parameters: + layers(Layer|list|tuple): Layer or list/tuple of iterable name Layer pair. + + Examples: + .. code-block:: python + + import paddle + + data = paddle.uniform(shape=[30, 10], dtype='float32') + # create Sequential with iterable Layers + model1 = paddle.nn.Sequential( + paddle.nn.Linear(10, 1), paddle.nn.Linear(1, 2) + ) + model1[0] # access the first layer + res1 = model1(data) # sequential execution + + # create Sequential with name Layer pairs + model2 = paddle.nn.Sequential( + ('l1', paddle.nn.Linear(10, 2)), + ('l2', paddle.nn.Linear(2, 3)) + ) + model2['l1'] # access l1 layer + model2.add_sublayer('l3', paddle.nn.Linear(3, 3)) # add sublayer + res2 = model2(data) # sequential execution + + """ + + def __init__(self, *layers): + super().__init__() + if len(layers) > 0 and isinstance(layers[0], (list, tuple)): + for name, layer in layers: + self.add_sublayer(name, layer) + else: + for idx, layer in enumerate(layers): + self.add_sublayer(str(idx), layer) + + def __getitem__(self, name): + if isinstance(name, slice): + return self.__class__(*(list(self._sub_layers.values())[name])) + elif isinstance(name, str): + return self._sub_layers[name] + else: + if name >= len(self._sub_layers): + raise IndexError('index {} is out of range'.format(name)) + elif name < 0 and name >= -len(self._sub_layers): + name += len(self._sub_layers) + elif name < -len(self._sub_layers): + raise IndexError('index {} is out of range'.format(name)) + return list(self._sub_layers.values())[name] + + def __setitem__(self, name, layer): + assert isinstance(layer, Layer) + setattr(self, str(name), layer) + + def __delitem__(self, name): + name = str(name) + assert name in self._sub_layers + del self._sub_layers[name] + + def __len__(self): + return len(self._sub_layers) + + def forward(self, input): + for layer in self._sub_layers.values(): + input = layer(input) + return input diff --git a/python/paddle/tests/test_model.py b/python/paddle/tests/test_model.py index 34973bafbd..7094ea5d59 100644 --- a/python/paddle/tests/test_model.py +++ b/python/paddle/tests/test_model.py @@ -228,7 +228,7 @@ class TestModel(unittest.TestCase): if not os.path.exists(cls.save_dir): os.makedirs(cls.save_dir) cls.weight_path = os.path.join(cls.save_dir, 'lenet') - fluid.dygraph.save_dygraph(dy_lenet.state_dict(), cls.weight_path) + paddle.save(dy_lenet.state_dict(), cls.weight_path + '.pdparams') fluid.disable_dygraph() diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index 062d923dfc..0a898caa3f 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -1542,7 +1542,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ 'test_gru_unit_op', 'test_amp_check_finite_and_scale_op', 'test_imperative_selected_rows_to_lod_tensor', - 'test_imperative_save_load', 'test_add_reader_dependency', 'test_imperative_transformer_sorted_gradient', 'test_bicubic_interp_v2_op', -- GitLab