diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py index 1772620c3869e389bc42d52deb5996b1c1fd1c1e..5ed4e2d412e49408df6077c0320c597be783d385 100644 --- a/python/paddle/fluid/dygraph/checkpoint.py +++ b/python/paddle/fluid/dygraph/checkpoint.py @@ -16,7 +16,7 @@ from __future__ import print_function import os import collections -from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter +from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter, ParamBase import pickle from . import learning_rate_scheduler import warnings @@ -68,20 +68,33 @@ def save_dygraph(state_dict, model_path): assert len(state_dict) > 0, "state_dict is empty, no need to save" for k, v in state_dict.items(): - if not isinstance(v, Parameter): + if not isinstance(v, ParamBase): suffix = ".pdopt" break - core._save_dygraph_dict(model_path + suffix, state_dict) + model_dict = {} + name_table = {} + for k, v in state_dict.items(): + if isinstance(v, (Variable, core.VarBase)): + model_dict[k] = v.numpy() + else: + model_dict[k] = v + name_table[k] = v.name + model_dict["StructuredToParameterName@@"] = name_table + + with open(model_path + suffix, 'wb') as f: + pickle.dump(model_dict, f) @dygraph_only -def load_dygraph(model_path): +def load_dygraph(model_path, keep_name_table=False): ''' Load parameter state_dict from disk. Args: model_path(str) : The file prefix store the state_dict. (The path should Not contain suffix '.pdparams') + keep_name_table(bool, optional) : Whether keep structed name to parameter name conversion table in output dict. + Default : False Returns: state_dict(dict) : the dict store the state_dict @@ -111,45 +124,15 @@ def load_dygraph(model_path): raise RuntimeError("Parameter file [ {} ] not exists".format( params_file_path)) - para_dict = core._load_dygraph_dict(params_file_path) + with open(params_file_path, 'rb') as f: + para_dict = pickle.load(f) + if not keep_name_table and "StructuredToParameterName@@" in para_dict: + del para_dict["StructuredToParameterName@@"] opti_dict = None opti_file_path = model_path + ".pdopt" if os.path.exists(opti_file_path): - opti_dict = core._load_dygraph_dict(opti_file_path) + with open(opti_file_path, 'rb') as f: + opti_dict = pickle.load(f) return para_dict, opti_dict - - -@dygraph_only -def load_optimizer(model_path): - ''' - Load optimizer state_dict from disk. - - Args: - model_path(str) : The file prefix store the state_dict. (The path should Not contain shuffix '.pdparams') - - Returns: - state_dict(dict) : the dict store the state_dict - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - - with fluid.dygraph.guard(): - adam = fluid.optimizer.Adam(0.001) - - state_dict = adam.state_dict() - fluid.save_optimizer( state_dict, "opt_adam") - - fluid.load_optimizer( "opt_adam") - - ''' - - assert in_dygraph_mode(), "save_optimizer only work in dygraph mode" - opt_file_path = model_path + ".pdopt" - if not os.path.exists(opt_file_path): - raise RuntimeError("Optimizer file [ {} ] not exists".format( - opt_file_path)) - return core._load_dygraph_dict(opt_file_path) diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index 6461a7ba8d0194ecf33aa7a2ae547a4349b1f659..65920297531629b0f589d44fea51d61e61446cbc 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -25,6 +25,8 @@ from .layer_object_helper import LayerObjectHelper from .base import program_desc_tracing_guard from paddle.fluid import framework from ..param_attr import ParamAttr +import copy +import warnings __all__ = ['Layer'] @@ -99,11 +101,10 @@ class Layer(core.Layer): Returns: :ref:`api_guide_Variable_en` : created parameter. """ - if isinstance(attr, ParamAttr) and (attr.name is not None): - attr.name = ".".join([self._full_name, attr.name]) - elif isinstance(attr, six.string_types): - attr = ".".join([self._full_name, attr]) - return self._helper.create_parameter(attr, shape, dtype, is_bias, + temp_attr = copy.deepcopy(attr) + if isinstance(temp_attr, six.string_types) and temp_attr == "": + temp_attr = None + return self._helper.create_parameter(temp_attr, shape, dtype, is_bias, default_initializer) # TODO: Add more parameter list when we need them @@ -283,7 +284,10 @@ class Layer(core.Layer): else: object.__delattr__(self, name) - def state_dict(self, destination=None, include_sublayers=True): + def state_dict(self, + destination=None, + include_sublayers=True, + structured_name_prefix=""): ''' Get all parameters of current layer and its sub-layers. And set all the parameters into a dict @@ -310,25 +314,31 @@ class Layer(core.Layer): destination = collections.OrderedDict() for name, data in self._parameters.items(): if data is not None: - destination[data.name] = data + destination[structured_name_prefix + name] = data if include_sublayers: for layer_name, layer_item in self._sub_layers.items(): if layer_item is not None: destination_temp = destination.copy() destination_temp.update( - layer_item.state_dict(destination_temp, - include_sublayers)) + layer_item.state_dict( + destination_temp, include_sublayers, + structured_name_prefix + layer_name + ".")) destination = destination_temp return destination - def set_dict(self, stat_dict, include_sublayers=True): + def set_dict(self, + stat_dict, + include_sublayers=True, + use_structured_name=True): ''' Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict Parameters: state_dict(dict) : Dict contains all the parameters include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True + use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. + Default: True Returns: None @@ -347,9 +357,15 @@ class Layer(core.Layer): emb.set_dict( para_state_dict ) ''' - self.load_dict(stat_dict, include_sublayers=include_sublayers) - - def load_dict(self, stat_dict, include_sublayers=True): + self.load_dict( + stat_dict, + include_sublayers=include_sublayers, + use_structured_name=use_structured_name) + + def load_dict(self, + stat_dict, + include_sublayers=True, + use_structured_name=True): ''' Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict @@ -358,6 +374,8 @@ class Layer(core.Layer): Parameters: state_dict(dict) : Dict contains all the parameters include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True + use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. + Default: True Returns: None @@ -377,16 +395,22 @@ class Layer(core.Layer): ''' - self._loaddict_holder = stat_dict - for name, item in self.__dict__.get('_parameters', None).items(): - if item.name in stat_dict: - item.set_value(stat_dict[item.name]) + inner_state_dict = self.state_dict() + + for name, para in inner_state_dict.items(): + key_name = name if use_structured_name else para.name + if key_name in stat_dict: + para.set_value(stat_dict[key_name]) else: raise RuntimeError( - "Parameter not found, Can't not find [ {} ] in stat_dict". - format(item.name)) - - if include_sublayers: - for layer_name, layer_item in self._sub_layers.items(): - if layer_item is not None: - layer_item.load_dict(stat_dict) + "Parameter not found, Can't not find [ {} ] in stat_dict" + "use_structured_name is set to [{}]".format( + key_name, use_structured_name)) + unused_para_list = [] + for k, v in stat_dict.items(): + if k not in inner_state_dict: + unused_para_list.append(k) + if len(unused_para_list) > 0: + warnings.warn( + "Varibale [ {} ] are not used, because not included in layers state_dict". + format(" ".join(unused_para_list))) diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index f660396780f93604dbe0886e0fb8866266085015..08a36a9265df1f30509fb814f070220c24faa25c 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -19,9 +19,11 @@ from .. import core from ..layers import utils from ..dygraph import dygraph_utils from . import layers -from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter, _dygraph_tracer_ +from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter from ..param_attr import ParamAttr from ..initializer import Normal, Constant, NumpyArrayInitializer +from .. import unique_name +from .layer_object_helper import LayerObjectHelper import numpy as np import numbers import logging @@ -1287,6 +1289,9 @@ class BatchNorm(layers.Layer): self._bias_attr = bias_attr self._act = act + self._full_name = unique_name.generate("batch_norm") + self._helper = LayerObjectHelper(self._full_name) + assert bias_attr is not False, "bias_attr should not be False in batch_norm." if dtype == "float16": @@ -1618,6 +1623,10 @@ class LayerNorm(layers.Layer): super(LayerNorm, self).__init__() if isinstance(normalized_shape, numbers.Integral): normalized_shape = [normalized_shape] + + self._full_name = unique_name.generate("layer_norm") + self._helper = LayerObjectHelper(self._full_name) + self._normalized_shape = list(normalized_shape) self._scale = scale self._shift = shift diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index 09755405143537935a8e1943b32345ccb0874207..a82a75e10bb1ae3c35435401554fcf9a8208843e 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -71,6 +71,9 @@ class ParamAttr(object): gradient_clip=None, do_model_average=True): self.name = name + if isinstance(self.name, six.string_types) and self.name == "": + raise ValueError("name of ParamAttr can not be empty str") + self.initializer = initializer self.learning_rate = learning_rate self.regularizer = regularizer diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_se_resnext.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_se_resnext.py index 4f35b02194f96bd7b34d0c57f5b1d0e98243da33..4ce67676c3e85e6bcef618ff74d0d1f543c011cb 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_se_resnext.py @@ -101,8 +101,7 @@ class ConvBNLayer(fluid.dygraph.Layer): padding=(filter_size - 1) // 2, groups=groups, act=None, - bias_attr=False, - param_attr=fluid.ParamAttr(name="weights")) + bias_attr=False) # disable BatchNorm in multi-card. disable LayerNorm because of complex input_shape # self._batch_norm = BatchNorm(num_filters, act=act) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py new file mode 100644 index 0000000000000000000000000000000000000000..d25e3a76cb7f31cbf97db8e82d3b43be3e9c4c46 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py @@ -0,0 +1,200 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle.fluid as fluid +import paddle.fluid.framework as framework +from paddle.fluid.dygraph.nn import * +import numpy as np + +print("11") + + +class TestDygraphLoadStatic(unittest.TestCase): + def testLoadStaticModel(self): + # static mode + a = fluid.data(name="a", shape=[10, 10]) + conv_in = fluid.data(name="conv_in", shape=[None, 10, 10, 10]) + + fc_out1 = fluid.layers.fc(a, 10) + fc_out2 = fluid.layers.fc(a, 20) + + conv_out_1 = fluid.layers.conv2d( + conv_in, num_filters=10, filter_size=5, act="relu") + conv_out_2 = fluid.layers.conv2d( + conv_in, num_filters=10, filter_size=5, act="relu") + + conv3d_in = fluid.data( + name='conv3d_in', shape=[None, 3, 12, 32, 32], dtype='float32') + conv3d_out_1 = fluid.layers.conv3d( + input=conv3d_in, num_filters=2, filter_size=3, act="relu") + conv3d_out_2 = fluid.layers.conv3d( + input=conv3d_in, num_filters=2, filter_size=3, act="relu") + + batchnorm_in = fluid.data( + name="batchnorm_in", shape=[None, 10], dtype='float32') + batchnorm_out_1 = fluid.layers.batch_norm(batchnorm_in) + batchnorm_out_2 = fluid.layers.batch_norm(batchnorm_in) + + emb_in = fluid.data(name='emb_in', shape=[None, 10], dtype='int64') + emb_out_1 = fluid.embedding(emb_in, [1000, 100]) + emb_out_2 = fluid.embedding(emb_in, [2000, 200]) + + layernorm = fluid.data(name="ln", shape=[None, 10], dtype='float32') + layernorm_1 = fluid.layers.layer_norm(layernorm) + layernorm_2 = fluid.layers.layer_norm(layernorm) + + nce_in = fluid.data(name="nce_in", shape=[None, 100], dtype='float32') + nce_label = fluid.data( + name="nce_label", shape=[None, 10], dtype='int64') + nce_out_1 = fluid.layers.nce(nce_in, nce_label, 10000) + nce_out_2 = fluid.layers.nce(nce_in, nce_label, 10000) + + prelu_in = fluid.data( + name="prelu_in", shape=[None, 5, 10, 10], dtype='float32') + prelu_out_1 = fluid.layers.prelu(prelu_in, "channel") + prelu_out_2 = fluid.layers.prelu(prelu_in, "channel") + + bilinear_tensor_pro_x = fluid.data( + "t1", shape=[None, 5], dtype="float32") + bilinear_tensor_pro_y = fluid.data( + "t2", shape=[None, 4], dtype="float32") + + bilinear_tensor_pro_out_1 = fluid.layers.bilinear_tensor_product( + x=bilinear_tensor_pro_x, y=bilinear_tensor_pro_y, size=1000) + bilinear_tensor_pro_out_2 = fluid.layers.bilinear_tensor_product( + x=bilinear_tensor_pro_x, y=bilinear_tensor_pro_y, size=1000) + + conv2d_trans_in = fluid.data( + name="conv2d_trans_in", shape=[None, 10, 10, 10]) + + conv2d_trans_out_1 = fluid.layers.conv2d_transpose( + conv2d_trans_in, num_filters=10, filter_size=5, act="relu") + conv2d_trans_out_2 = fluid.layers.conv2d_transpose( + conv2d_trans_in, num_filters=10, filter_size=5, act="relu") + + conv3d_trans_in = fluid.data( + name='conv3d_trans_in', + shape=[None, 3, 12, 32, 32], + dtype='float32') + conv3d_trans_out_1 = fluid.layers.conv3d_transpose( + input=conv3d_trans_in, num_filters=2, filter_size=3, act="relu") + conv3d_trans_out_2 = fluid.layers.conv3d_transpose( + input=conv3d_trans_in, num_filters=2, filter_size=3, act="relu") + + groupnorm_in = fluid.data( + name='groupnorm_in', shape=[None, 8, 32, 32], dtype='float32') + groupnorm_out1 = fluid.layers.group_norm(input=groupnorm_in, groups=4) + groupnorm_out2 = fluid.layers.group_norm(input=groupnorm_in, groups=4) + ''' + spec_norm = fluid.data(name='spec_norm', shape=[2, 8, 32, 32], dtype='float32') + spe_norm_out_1 = fluid.layers.spectral_norm(weight=spec_norm, dim=1, power_iters=2) + spe_norm_out_2 = fluid.layers.spectral_norm(weight=spec_norm, dim=1, power_iters=2) + ''' + + nodes_vector = fluid.data( + name='vectors', shape=[None, 10, 5], dtype='float32') + edge_set = fluid.data( + name='edge_set', shape=[None, 10, 2], dtype='float32') + tree_conv_out1 = fluid.contrib.layers.tree_conv(nodes_vector, edge_set, + 6, 1, 2) + tree_conv_out2 = fluid.contrib.layers.tree_conv(nodes_vector, edge_set, + 6, 1, 2) + + para1 = fluid.layers.create_parameter( + [100, 100], 'float32', name="weight_test_1") + para2 = fluid.layers.create_parameter( + [20, 200], 'float32', name="weight_test_2") + + para_list = fluid.default_main_program().list_vars() + + exe = fluid.Executor(fluid.CPUPlace( + ) if not fluid.is_compiled_with_cuda() else fluid.CUDAPlace(0)) + out = exe.run(framework.default_startup_program()) + + fluid.save(framework.default_main_program(), "./test_1") + + para_dict = fluid.load_program_state("./test_1") + + new_dict = {} + for k, v in para_dict.items(): + #print( k, v.shape ) + if k.startswith("fc"): + name = k.replace("fc", "linear", 1) + new_dict[name] = v + else: + new_dict[k] = v + + with fluid.dygraph.guard(): + + class MyTest(fluid.dygraph.Layer): + def __init__(self): + super(MyTest, self).__init__() + + self.linear1 = Linear(10, 10) + self.lienar2 = Linear(10, 20) + + self.conv2d_1 = Conv2D( + num_channels=10, + num_filters=10, + filter_size=5, + act="relu") + self.conv2d_2 = Conv2D( + num_channels=10, + num_filters=10, + filter_size=5, + act="relu") + + self.conv3d_1 = Conv3D( + num_channels=3, + num_filters=2, + filter_size=3, + act="relu") + self.conv3d_2 = Conv3D( + num_channels=3, + num_filters=2, + filter_size=3, + act="relu") + + self.batch_norm_1 = BatchNorm(10) + self.batch_norm_2 = BatchNorm(10) + + self.emb1 = Embedding([1000, 100]) + self.emb2 = Embedding([2000, 200]) + + self.layer_norm_1 = LayerNorm([10]) + self.layer_norm_2 = LayerNorm(10) + + self.nce1 = NCE(10000, 100) + self.nce2 = NCE(10000, 100) + + self.prelu1 = PRelu("channel", [-1, 5, 10, 10]) + self.prelu2 = PRelu("channel", [-1, 5, 10, 10]) + + self.group_norm1 = GroupNorm(8, 4) + self.gourp_norm2 = GroupNorm(8, 4) + + self.w_1 = self.create_parameter( + [100, 100], dtype='float32', attr="weight_test_1") + self.w_2 = self.create_parameter( + [20, 200], dtype='float32', attr="weight_test_2") + + my_test = MyTest() + my_test.set_dict(new_dict, use_structured_name=False) + for k, v in my_test.state_dict().items(): + self.assertTrue(np.array_equal(v.numpy(), new_dict[v.name])) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index 3566a37e97f6926835884ae1d64cdf507e69f7ae..3d2868a97651341ea6b58da073ae38122af4ee4c 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -17,7 +17,7 @@ from __future__ import print_function import unittest import paddle.fluid as fluid import paddle.fluid.core as core -from paddle.fluid.dygraph.nn import Embedding, FC +from paddle.fluid.dygraph.nn import Embedding, Linear import paddle.fluid.framework as framework from paddle.fluid.optimizer import Adam from paddle.fluid.dygraph.base import to_variable @@ -29,13 +29,12 @@ import six class SimpleLSTMRNN(fluid.Layer): def __init__(self, - name_scope, hidden_size, num_steps, num_layers=2, init_scale=0.1, dropout=None): - super(SimpleLSTMRNN, self).__init__(name_scope) + super(SimpleLSTMRNN, self).__init__() self._hidden_size = hidden_size self._num_layers = num_layers self._init_scale = init_scale @@ -44,8 +43,6 @@ class SimpleLSTMRNN(fluid.Layer): self._num_steps = num_steps self.cell_array = [] self.hidden_array = [] - - def _build_once(self, input_embedding, init_hidden=None, init_cell=None): self.weight_1_arr = [] self.weight_2_arr = [] self.bias_arr = [] @@ -149,7 +146,6 @@ class PtbModel(fluid.Layer): self.num_steps = num_steps self.dropout = dropout self.simple_lstm_rnn = SimpleLSTMRNN( - self.full_name(), hidden_size, num_steps, num_layers=num_layers, @@ -164,9 +160,7 @@ class PtbModel(fluid.Layer): initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale))) - self.out_project = FC(self.full_name(), - self.vocab_size, - num_flatten_dims=2) + self.out_project = Linear(self.hidden_size, self.vocab_size) def forward(self, input, label, init_hidden, init_cell): init_h = fluid.layers.reshape( @@ -277,10 +271,11 @@ class TestDygraphPtbRnn(unittest.TestCase): fluid.save_dygraph(self.opti_dict, "./test_dy") self.state_dict = ptb_model.state_dict() + self.model_base = {} for k, v in self.state_dict.items(): np_t = v.numpy() - self.model_base[v.name] = np_t + self.model_base[k] = np_t fluid.save_dygraph(self.state_dict, "./test_dy") @@ -386,7 +381,7 @@ class TestDygraphPtbRnn(unittest.TestCase): for k, v in state_dict.items(): new_t = v.numpy() - base_t = self.model_base[v.name] + base_t = self.model_base[k] self.assertTrue(np.array_equal(new_t, base_t)) @@ -491,7 +486,7 @@ class TestDygraphPtbRnn(unittest.TestCase): for k, v in state_dict.items(): new_t = v.numpy() - base_t = self.model_base[v.name] + base_t = self.model_base[k] self.assertTrue(np.array_equal(new_t, base_t)) @@ -588,7 +583,7 @@ class TestDygraphPtbRnn(unittest.TestCase): np_state_dict = {} for k, v in state_dict.items(): np_t = v.numpy() - np_state_dict[v.name] = np_t + np_state_dict[k] = np_t var = v.value().get_tensor() var.set(np.zeros_like(np_t), place) @@ -600,7 +595,7 @@ class TestDygraphPtbRnn(unittest.TestCase): for k, v in state_dict.items(): new_t = v.numpy() - base_t = self.model_base[v.name] + base_t = self.model_base[k] self.assertTrue(np.array_equal(new_t, base_t)) @@ -626,20 +621,10 @@ class TestDygraphPtbRnn(unittest.TestCase): num_steps=num_steps, init_scale=init_scale) - bd = [] - lr_arr = [1.0] - # this a fake lr decay strategy - for i in range(1, 10): - bd.append(100 * i) - #set lr to 0.0, not udpate parameter - new_lr = 0.0 - lr_arr.append(new_lr) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( ) else fluid.CUDAPlace(0) adam = Adam( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr_arr), + learning_rate=0.0, beta1=0.8, beta2=0.6, parameter_list=ptb_model.parameters()) @@ -686,14 +671,12 @@ class TestDygraphPtbRnn(unittest.TestCase): np.array_equal(v.numpy(), self.base_opti[v.name] * adam._beta2)) - # check parameter - state_dict = ptb_model.state_dict() for k, v in state_dict.items(): new_t = v.numpy() - base_t = self.model_base[v.name] + base_t = self.model_base[k] self.assertTrue(np.array_equal(new_t, base_t)) def testLoadAndSetVarBaseBeforeTrain(self): @@ -719,7 +702,7 @@ class TestDygraphPtbRnn(unittest.TestCase): init_scale=init_scale) bd = [] - lr_arr = [1.0] + lr_arr = [0.0] # this a fake lr decay strategy for i in range(1, 10): bd.append(100 * i) @@ -730,8 +713,7 @@ class TestDygraphPtbRnn(unittest.TestCase): place = fluid.CPUPlace() if not core.is_compiled_with_cuda( ) else fluid.CUDAPlace(0) adam = Adam( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr_arr), + learning_rate=0.0, beta1=0.8, beta2=0.6, parameter_list=ptb_model.parameters()) @@ -786,7 +768,7 @@ class TestDygraphPtbRnn(unittest.TestCase): for k, v in state_dict.items(): new_t = v.numpy() - base_t = self.model_base[v.name] + base_t = self.model_base[k] self.assertTrue(np.array_equal(new_t, base_t)) def testSetNumpyBeforeTrain(self): @@ -812,7 +794,7 @@ class TestDygraphPtbRnn(unittest.TestCase): init_scale=init_scale) bd = [] - lr_arr = [1.0] + lr_arr = [0.0] # this a fake lr decay strategy for i in range(1, 10): bd.append(100 * i) @@ -841,11 +823,10 @@ class TestDygraphPtbRnn(unittest.TestCase): np_opti_dict[v.name] = v.numpy() for k, v in self.state_dict.items(): - np_state_dict[v.name] = v.numpy() + np_state_dict[k] = v.numpy() adam.set_dict(np_opti_dict) ptb_model.set_dict(np_state_dict) - for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') @@ -887,7 +868,7 @@ class TestDygraphPtbRnn(unittest.TestCase): for k, v in state_dict.items(): new_t = v.numpy() - base_t = self.model_base[v.name] + base_t = self.model_base[k] self.assertTrue(np.array_equal(new_t, base_t)) def testOnlyLoadParams(self):