未验证 提交 2f49cf70 编写于 作者: H hong 提交者: GitHub

Support dygraph structuerd name (#21930)

* support dygraph structured name; test=develop

* add load static param unitest and fix save load; test=develop

* fix varBase import error; test=develop

* fix unitest error; test=develop

* add comment for parameter; test=develop

* fix uni test error; test=develop

* change parallel se-resnet; test=develop

* fix dygraph se resnext parallel test; test=develop

* remove useless code; test=develop

* remove useless code; test=develop
上级 f220be4f
...@@ -16,7 +16,7 @@ from __future__ import print_function ...@@ -16,7 +16,7 @@ from __future__ import print_function
import os import os
import collections import collections
from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter, ParamBase
import pickle import pickle
from . import learning_rate_scheduler from . import learning_rate_scheduler
import warnings import warnings
...@@ -68,20 +68,33 @@ def save_dygraph(state_dict, model_path): ...@@ -68,20 +68,33 @@ def save_dygraph(state_dict, model_path):
assert len(state_dict) > 0, "state_dict is empty, no need to save" assert len(state_dict) > 0, "state_dict is empty, no need to save"
for k, v in state_dict.items(): for k, v in state_dict.items():
if not isinstance(v, Parameter): if not isinstance(v, ParamBase):
suffix = ".pdopt" suffix = ".pdopt"
break break
core._save_dygraph_dict(model_path + suffix, state_dict) model_dict = {}
name_table = {}
for k, v in state_dict.items():
if isinstance(v, (Variable, core.VarBase)):
model_dict[k] = v.numpy()
else:
model_dict[k] = v
name_table[k] = v.name
model_dict["StructuredToParameterName@@"] = name_table
with open(model_path + suffix, 'wb') as f:
pickle.dump(model_dict, f)
@dygraph_only @dygraph_only
def load_dygraph(model_path): def load_dygraph(model_path, keep_name_table=False):
''' '''
Load parameter state_dict from disk. Load parameter state_dict from disk.
Args: Args:
model_path(str) : The file prefix store the state_dict. (The path should Not contain suffix '.pdparams') model_path(str) : The file prefix store the state_dict. (The path should Not contain suffix '.pdparams')
keep_name_table(bool, optional) : Whether keep structed name to parameter name conversion table in output dict.
Default : False
Returns: Returns:
state_dict(dict) : the dict store the state_dict state_dict(dict) : the dict store the state_dict
...@@ -111,45 +124,15 @@ def load_dygraph(model_path): ...@@ -111,45 +124,15 @@ def load_dygraph(model_path):
raise RuntimeError("Parameter file [ {} ] not exists".format( raise RuntimeError("Parameter file [ {} ] not exists".format(
params_file_path)) params_file_path))
para_dict = core._load_dygraph_dict(params_file_path) with open(params_file_path, 'rb') as f:
para_dict = pickle.load(f)
if not keep_name_table and "StructuredToParameterName@@" in para_dict:
del para_dict["StructuredToParameterName@@"]
opti_dict = None opti_dict = None
opti_file_path = model_path + ".pdopt" opti_file_path = model_path + ".pdopt"
if os.path.exists(opti_file_path): if os.path.exists(opti_file_path):
opti_dict = core._load_dygraph_dict(opti_file_path) with open(opti_file_path, 'rb') as f:
opti_dict = pickle.load(f)
return para_dict, opti_dict return para_dict, opti_dict
@dygraph_only
def load_optimizer(model_path):
'''
Load optimizer state_dict from disk.
Args:
model_path(str) : The file prefix store the state_dict. (The path should Not contain shuffix '.pdparams')
Returns:
state_dict(dict) : the dict store the state_dict
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
adam = fluid.optimizer.Adam(0.001)
state_dict = adam.state_dict()
fluid.save_optimizer( state_dict, "opt_adam")
fluid.load_optimizer( "opt_adam")
'''
assert in_dygraph_mode(), "save_optimizer only work in dygraph mode"
opt_file_path = model_path + ".pdopt"
if not os.path.exists(opt_file_path):
raise RuntimeError("Optimizer file [ {} ] not exists".format(
opt_file_path))
return core._load_dygraph_dict(opt_file_path)
...@@ -25,6 +25,8 @@ from .layer_object_helper import LayerObjectHelper ...@@ -25,6 +25,8 @@ from .layer_object_helper import LayerObjectHelper
from .base import program_desc_tracing_guard from .base import program_desc_tracing_guard
from paddle.fluid import framework from paddle.fluid import framework
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
import copy
import warnings
__all__ = ['Layer'] __all__ = ['Layer']
...@@ -99,11 +101,10 @@ class Layer(core.Layer): ...@@ -99,11 +101,10 @@ class Layer(core.Layer):
Returns: Returns:
:ref:`api_guide_Variable_en` : created parameter. :ref:`api_guide_Variable_en` : created parameter.
""" """
if isinstance(attr, ParamAttr) and (attr.name is not None): temp_attr = copy.deepcopy(attr)
attr.name = ".".join([self._full_name, attr.name]) if isinstance(temp_attr, six.string_types) and temp_attr == "":
elif isinstance(attr, six.string_types): temp_attr = None
attr = ".".join([self._full_name, attr]) return self._helper.create_parameter(temp_attr, shape, dtype, is_bias,
return self._helper.create_parameter(attr, shape, dtype, is_bias,
default_initializer) default_initializer)
# TODO: Add more parameter list when we need them # TODO: Add more parameter list when we need them
...@@ -283,7 +284,10 @@ class Layer(core.Layer): ...@@ -283,7 +284,10 @@ class Layer(core.Layer):
else: else:
object.__delattr__(self, name) object.__delattr__(self, name)
def state_dict(self, destination=None, include_sublayers=True): def state_dict(self,
destination=None,
include_sublayers=True,
structured_name_prefix=""):
''' '''
Get all parameters of current layer and its sub-layers. And set all the parameters into a dict Get all parameters of current layer and its sub-layers. And set all the parameters into a dict
...@@ -310,25 +314,31 @@ class Layer(core.Layer): ...@@ -310,25 +314,31 @@ class Layer(core.Layer):
destination = collections.OrderedDict() destination = collections.OrderedDict()
for name, data in self._parameters.items(): for name, data in self._parameters.items():
if data is not None: if data is not None:
destination[data.name] = data destination[structured_name_prefix + name] = data
if include_sublayers: if include_sublayers:
for layer_name, layer_item in self._sub_layers.items(): for layer_name, layer_item in self._sub_layers.items():
if layer_item is not None: if layer_item is not None:
destination_temp = destination.copy() destination_temp = destination.copy()
destination_temp.update( destination_temp.update(
layer_item.state_dict(destination_temp, layer_item.state_dict(
include_sublayers)) destination_temp, include_sublayers,
structured_name_prefix + layer_name + "."))
destination = destination_temp destination = destination_temp
return destination return destination
def set_dict(self, stat_dict, include_sublayers=True): def set_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
''' '''
Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict
Parameters: Parameters:
state_dict(dict) : Dict contains all the parameters state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
Default: True
Returns: Returns:
None None
...@@ -347,9 +357,15 @@ class Layer(core.Layer): ...@@ -347,9 +357,15 @@ class Layer(core.Layer):
emb.set_dict( para_state_dict ) emb.set_dict( para_state_dict )
''' '''
self.load_dict(stat_dict, include_sublayers=include_sublayers) self.load_dict(
stat_dict,
def load_dict(self, stat_dict, include_sublayers=True): include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
def load_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
''' '''
Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict
...@@ -358,6 +374,8 @@ class Layer(core.Layer): ...@@ -358,6 +374,8 @@ class Layer(core.Layer):
Parameters: Parameters:
state_dict(dict) : Dict contains all the parameters state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
Default: True
Returns: Returns:
None None
...@@ -377,16 +395,22 @@ class Layer(core.Layer): ...@@ -377,16 +395,22 @@ class Layer(core.Layer):
''' '''
self._loaddict_holder = stat_dict inner_state_dict = self.state_dict()
for name, item in self.__dict__.get('_parameters', None).items():
if item.name in stat_dict: for name, para in inner_state_dict.items():
item.set_value(stat_dict[item.name]) key_name = name if use_structured_name else para.name
if key_name in stat_dict:
para.set_value(stat_dict[key_name])
else: else:
raise RuntimeError( raise RuntimeError(
"Parameter not found, Can't not find [ {} ] in stat_dict". "Parameter not found, Can't not find [ {} ] in stat_dict"
format(item.name)) "use_structured_name is set to [{}]".format(
key_name, use_structured_name))
if include_sublayers: unused_para_list = []
for layer_name, layer_item in self._sub_layers.items(): for k, v in stat_dict.items():
if layer_item is not None: if k not in inner_state_dict:
layer_item.load_dict(stat_dict) unused_para_list.append(k)
if len(unused_para_list) > 0:
warnings.warn(
"Varibale [ {} ] are not used, because not included in layers state_dict".
format(" ".join(unused_para_list)))
...@@ -19,9 +19,11 @@ from .. import core ...@@ -19,9 +19,11 @@ from .. import core
from ..layers import utils from ..layers import utils
from ..dygraph import dygraph_utils from ..dygraph import dygraph_utils
from . import layers from . import layers
from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter, _dygraph_tracer_ from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from ..initializer import Normal, Constant, NumpyArrayInitializer from ..initializer import Normal, Constant, NumpyArrayInitializer
from .. import unique_name
from .layer_object_helper import LayerObjectHelper
import numpy as np import numpy as np
import numbers import numbers
import logging import logging
...@@ -1287,6 +1289,9 @@ class BatchNorm(layers.Layer): ...@@ -1287,6 +1289,9 @@ class BatchNorm(layers.Layer):
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._act = act self._act = act
self._full_name = unique_name.generate("batch_norm")
self._helper = LayerObjectHelper(self._full_name)
assert bias_attr is not False, "bias_attr should not be False in batch_norm." assert bias_attr is not False, "bias_attr should not be False in batch_norm."
if dtype == "float16": if dtype == "float16":
...@@ -1618,6 +1623,10 @@ class LayerNorm(layers.Layer): ...@@ -1618,6 +1623,10 @@ class LayerNorm(layers.Layer):
super(LayerNorm, self).__init__() super(LayerNorm, self).__init__()
if isinstance(normalized_shape, numbers.Integral): if isinstance(normalized_shape, numbers.Integral):
normalized_shape = [normalized_shape] normalized_shape = [normalized_shape]
self._full_name = unique_name.generate("layer_norm")
self._helper = LayerObjectHelper(self._full_name)
self._normalized_shape = list(normalized_shape) self._normalized_shape = list(normalized_shape)
self._scale = scale self._scale = scale
self._shift = shift self._shift = shift
......
...@@ -71,6 +71,9 @@ class ParamAttr(object): ...@@ -71,6 +71,9 @@ class ParamAttr(object):
gradient_clip=None, gradient_clip=None,
do_model_average=True): do_model_average=True):
self.name = name self.name = name
if isinstance(self.name, six.string_types) and self.name == "":
raise ValueError("name of ParamAttr can not be empty str")
self.initializer = initializer self.initializer = initializer
self.learning_rate = learning_rate self.learning_rate = learning_rate
self.regularizer = regularizer self.regularizer = regularizer
......
...@@ -101,8 +101,7 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -101,8 +101,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
padding=(filter_size - 1) // 2, padding=(filter_size - 1) // 2,
groups=groups, groups=groups,
act=None, act=None,
bias_attr=False, bias_attr=False)
param_attr=fluid.ParamAttr(name="weights"))
# disable BatchNorm in multi-card. disable LayerNorm because of complex input_shape # disable BatchNorm in multi-card. disable LayerNorm because of complex input_shape
# self._batch_norm = BatchNorm(num_filters, act=act) # self._batch_norm = BatchNorm(num_filters, act=act)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle.fluid as fluid
import paddle.fluid.framework as framework
from paddle.fluid.dygraph.nn import *
import numpy as np
print("11")
class TestDygraphLoadStatic(unittest.TestCase):
def testLoadStaticModel(self):
# static mode
a = fluid.data(name="a", shape=[10, 10])
conv_in = fluid.data(name="conv_in", shape=[None, 10, 10, 10])
fc_out1 = fluid.layers.fc(a, 10)
fc_out2 = fluid.layers.fc(a, 20)
conv_out_1 = fluid.layers.conv2d(
conv_in, num_filters=10, filter_size=5, act="relu")
conv_out_2 = fluid.layers.conv2d(
conv_in, num_filters=10, filter_size=5, act="relu")
conv3d_in = fluid.data(
name='conv3d_in', shape=[None, 3, 12, 32, 32], dtype='float32')
conv3d_out_1 = fluid.layers.conv3d(
input=conv3d_in, num_filters=2, filter_size=3, act="relu")
conv3d_out_2 = fluid.layers.conv3d(
input=conv3d_in, num_filters=2, filter_size=3, act="relu")
batchnorm_in = fluid.data(
name="batchnorm_in", shape=[None, 10], dtype='float32')
batchnorm_out_1 = fluid.layers.batch_norm(batchnorm_in)
batchnorm_out_2 = fluid.layers.batch_norm(batchnorm_in)
emb_in = fluid.data(name='emb_in', shape=[None, 10], dtype='int64')
emb_out_1 = fluid.embedding(emb_in, [1000, 100])
emb_out_2 = fluid.embedding(emb_in, [2000, 200])
layernorm = fluid.data(name="ln", shape=[None, 10], dtype='float32')
layernorm_1 = fluid.layers.layer_norm(layernorm)
layernorm_2 = fluid.layers.layer_norm(layernorm)
nce_in = fluid.data(name="nce_in", shape=[None, 100], dtype='float32')
nce_label = fluid.data(
name="nce_label", shape=[None, 10], dtype='int64')
nce_out_1 = fluid.layers.nce(nce_in, nce_label, 10000)
nce_out_2 = fluid.layers.nce(nce_in, nce_label, 10000)
prelu_in = fluid.data(
name="prelu_in", shape=[None, 5, 10, 10], dtype='float32')
prelu_out_1 = fluid.layers.prelu(prelu_in, "channel")
prelu_out_2 = fluid.layers.prelu(prelu_in, "channel")
bilinear_tensor_pro_x = fluid.data(
"t1", shape=[None, 5], dtype="float32")
bilinear_tensor_pro_y = fluid.data(
"t2", shape=[None, 4], dtype="float32")
bilinear_tensor_pro_out_1 = fluid.layers.bilinear_tensor_product(
x=bilinear_tensor_pro_x, y=bilinear_tensor_pro_y, size=1000)
bilinear_tensor_pro_out_2 = fluid.layers.bilinear_tensor_product(
x=bilinear_tensor_pro_x, y=bilinear_tensor_pro_y, size=1000)
conv2d_trans_in = fluid.data(
name="conv2d_trans_in", shape=[None, 10, 10, 10])
conv2d_trans_out_1 = fluid.layers.conv2d_transpose(
conv2d_trans_in, num_filters=10, filter_size=5, act="relu")
conv2d_trans_out_2 = fluid.layers.conv2d_transpose(
conv2d_trans_in, num_filters=10, filter_size=5, act="relu")
conv3d_trans_in = fluid.data(
name='conv3d_trans_in',
shape=[None, 3, 12, 32, 32],
dtype='float32')
conv3d_trans_out_1 = fluid.layers.conv3d_transpose(
input=conv3d_trans_in, num_filters=2, filter_size=3, act="relu")
conv3d_trans_out_2 = fluid.layers.conv3d_transpose(
input=conv3d_trans_in, num_filters=2, filter_size=3, act="relu")
groupnorm_in = fluid.data(
name='groupnorm_in', shape=[None, 8, 32, 32], dtype='float32')
groupnorm_out1 = fluid.layers.group_norm(input=groupnorm_in, groups=4)
groupnorm_out2 = fluid.layers.group_norm(input=groupnorm_in, groups=4)
'''
spec_norm = fluid.data(name='spec_norm', shape=[2, 8, 32, 32], dtype='float32')
spe_norm_out_1 = fluid.layers.spectral_norm(weight=spec_norm, dim=1, power_iters=2)
spe_norm_out_2 = fluid.layers.spectral_norm(weight=spec_norm, dim=1, power_iters=2)
'''
nodes_vector = fluid.data(
name='vectors', shape=[None, 10, 5], dtype='float32')
edge_set = fluid.data(
name='edge_set', shape=[None, 10, 2], dtype='float32')
tree_conv_out1 = fluid.contrib.layers.tree_conv(nodes_vector, edge_set,
6, 1, 2)
tree_conv_out2 = fluid.contrib.layers.tree_conv(nodes_vector, edge_set,
6, 1, 2)
para1 = fluid.layers.create_parameter(
[100, 100], 'float32', name="weight_test_1")
para2 = fluid.layers.create_parameter(
[20, 200], 'float32', name="weight_test_2")
para_list = fluid.default_main_program().list_vars()
exe = fluid.Executor(fluid.CPUPlace(
) if not fluid.is_compiled_with_cuda() else fluid.CUDAPlace(0))
out = exe.run(framework.default_startup_program())
fluid.save(framework.default_main_program(), "./test_1")
para_dict = fluid.load_program_state("./test_1")
new_dict = {}
for k, v in para_dict.items():
#print( k, v.shape )
if k.startswith("fc"):
name = k.replace("fc", "linear", 1)
new_dict[name] = v
else:
new_dict[k] = v
with fluid.dygraph.guard():
class MyTest(fluid.dygraph.Layer):
def __init__(self):
super(MyTest, self).__init__()
self.linear1 = Linear(10, 10)
self.lienar2 = Linear(10, 20)
self.conv2d_1 = Conv2D(
num_channels=10,
num_filters=10,
filter_size=5,
act="relu")
self.conv2d_2 = Conv2D(
num_channels=10,
num_filters=10,
filter_size=5,
act="relu")
self.conv3d_1 = Conv3D(
num_channels=3,
num_filters=2,
filter_size=3,
act="relu")
self.conv3d_2 = Conv3D(
num_channels=3,
num_filters=2,
filter_size=3,
act="relu")
self.batch_norm_1 = BatchNorm(10)
self.batch_norm_2 = BatchNorm(10)
self.emb1 = Embedding([1000, 100])
self.emb2 = Embedding([2000, 200])
self.layer_norm_1 = LayerNorm([10])
self.layer_norm_2 = LayerNorm(10)
self.nce1 = NCE(10000, 100)
self.nce2 = NCE(10000, 100)
self.prelu1 = PRelu("channel", [-1, 5, 10, 10])
self.prelu2 = PRelu("channel", [-1, 5, 10, 10])
self.group_norm1 = GroupNorm(8, 4)
self.gourp_norm2 = GroupNorm(8, 4)
self.w_1 = self.create_parameter(
[100, 100], dtype='float32', attr="weight_test_1")
self.w_2 = self.create_parameter(
[20, 200], dtype='float32', attr="weight_test_2")
my_test = MyTest()
my_test.set_dict(new_dict, use_structured_name=False)
for k, v in my_test.state_dict().items():
self.assertTrue(np.array_equal(v.numpy(), new_dict[v.name]))
if __name__ == '__main__':
unittest.main()
...@@ -17,7 +17,7 @@ from __future__ import print_function ...@@ -17,7 +17,7 @@ from __future__ import print_function
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.dygraph.nn import Embedding, FC from paddle.fluid.dygraph.nn import Embedding, Linear
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
from paddle.fluid.optimizer import Adam from paddle.fluid.optimizer import Adam
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
...@@ -29,13 +29,12 @@ import six ...@@ -29,13 +29,12 @@ import six
class SimpleLSTMRNN(fluid.Layer): class SimpleLSTMRNN(fluid.Layer):
def __init__(self, def __init__(self,
name_scope,
hidden_size, hidden_size,
num_steps, num_steps,
num_layers=2, num_layers=2,
init_scale=0.1, init_scale=0.1,
dropout=None): dropout=None):
super(SimpleLSTMRNN, self).__init__(name_scope) super(SimpleLSTMRNN, self).__init__()
self._hidden_size = hidden_size self._hidden_size = hidden_size
self._num_layers = num_layers self._num_layers = num_layers
self._init_scale = init_scale self._init_scale = init_scale
...@@ -44,8 +43,6 @@ class SimpleLSTMRNN(fluid.Layer): ...@@ -44,8 +43,6 @@ class SimpleLSTMRNN(fluid.Layer):
self._num_steps = num_steps self._num_steps = num_steps
self.cell_array = [] self.cell_array = []
self.hidden_array = [] self.hidden_array = []
def _build_once(self, input_embedding, init_hidden=None, init_cell=None):
self.weight_1_arr = [] self.weight_1_arr = []
self.weight_2_arr = [] self.weight_2_arr = []
self.bias_arr = [] self.bias_arr = []
...@@ -149,7 +146,6 @@ class PtbModel(fluid.Layer): ...@@ -149,7 +146,6 @@ class PtbModel(fluid.Layer):
self.num_steps = num_steps self.num_steps = num_steps
self.dropout = dropout self.dropout = dropout
self.simple_lstm_rnn = SimpleLSTMRNN( self.simple_lstm_rnn = SimpleLSTMRNN(
self.full_name(),
hidden_size, hidden_size,
num_steps, num_steps,
num_layers=num_layers, num_layers=num_layers,
...@@ -164,9 +160,7 @@ class PtbModel(fluid.Layer): ...@@ -164,9 +160,7 @@ class PtbModel(fluid.Layer):
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale))) low=-init_scale, high=init_scale)))
self.out_project = FC(self.full_name(), self.out_project = Linear(self.hidden_size, self.vocab_size)
self.vocab_size,
num_flatten_dims=2)
def forward(self, input, label, init_hidden, init_cell): def forward(self, input, label, init_hidden, init_cell):
init_h = fluid.layers.reshape( init_h = fluid.layers.reshape(
...@@ -277,10 +271,11 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -277,10 +271,11 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.save_dygraph(self.opti_dict, "./test_dy") fluid.save_dygraph(self.opti_dict, "./test_dy")
self.state_dict = ptb_model.state_dict() self.state_dict = ptb_model.state_dict()
self.model_base = {} self.model_base = {}
for k, v in self.state_dict.items(): for k, v in self.state_dict.items():
np_t = v.numpy() np_t = v.numpy()
self.model_base[v.name] = np_t self.model_base[k] = np_t
fluid.save_dygraph(self.state_dict, "./test_dy") fluid.save_dygraph(self.state_dict, "./test_dy")
...@@ -386,7 +381,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -386,7 +381,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k, v in state_dict.items(): for k, v in state_dict.items():
new_t = v.numpy() new_t = v.numpy()
base_t = self.model_base[v.name] base_t = self.model_base[k]
self.assertTrue(np.array_equal(new_t, base_t)) self.assertTrue(np.array_equal(new_t, base_t))
...@@ -491,7 +486,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -491,7 +486,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k, v in state_dict.items(): for k, v in state_dict.items():
new_t = v.numpy() new_t = v.numpy()
base_t = self.model_base[v.name] base_t = self.model_base[k]
self.assertTrue(np.array_equal(new_t, base_t)) self.assertTrue(np.array_equal(new_t, base_t))
...@@ -588,7 +583,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -588,7 +583,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
np_state_dict = {} np_state_dict = {}
for k, v in state_dict.items(): for k, v in state_dict.items():
np_t = v.numpy() np_t = v.numpy()
np_state_dict[v.name] = np_t np_state_dict[k] = np_t
var = v.value().get_tensor() var = v.value().get_tensor()
var.set(np.zeros_like(np_t), place) var.set(np.zeros_like(np_t), place)
...@@ -600,7 +595,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -600,7 +595,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k, v in state_dict.items(): for k, v in state_dict.items():
new_t = v.numpy() new_t = v.numpy()
base_t = self.model_base[v.name] base_t = self.model_base[k]
self.assertTrue(np.array_equal(new_t, base_t)) self.assertTrue(np.array_equal(new_t, base_t))
...@@ -626,20 +621,10 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -626,20 +621,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
num_steps=num_steps, num_steps=num_steps,
init_scale=init_scale) init_scale=init_scale)
bd = []
lr_arr = [1.0]
# this a fake lr decay strategy
for i in range(1, 10):
bd.append(100 * i)
#set lr to 0.0, not udpate parameter
new_lr = 0.0
lr_arr.append(new_lr)
place = fluid.CPUPlace() if not core.is_compiled_with_cuda( place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0) ) else fluid.CUDAPlace(0)
adam = Adam( adam = Adam(
learning_rate=fluid.layers.piecewise_decay( learning_rate=0.0,
boundaries=bd, values=lr_arr),
beta1=0.8, beta1=0.8,
beta2=0.6, beta2=0.6,
parameter_list=ptb_model.parameters()) parameter_list=ptb_model.parameters())
...@@ -686,14 +671,12 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -686,14 +671,12 @@ class TestDygraphPtbRnn(unittest.TestCase):
np.array_equal(v.numpy(), self.base_opti[v.name] * np.array_equal(v.numpy(), self.base_opti[v.name] *
adam._beta2)) adam._beta2))
# check parameter
state_dict = ptb_model.state_dict() state_dict = ptb_model.state_dict()
for k, v in state_dict.items(): for k, v in state_dict.items():
new_t = v.numpy() new_t = v.numpy()
base_t = self.model_base[v.name] base_t = self.model_base[k]
self.assertTrue(np.array_equal(new_t, base_t)) self.assertTrue(np.array_equal(new_t, base_t))
def testLoadAndSetVarBaseBeforeTrain(self): def testLoadAndSetVarBaseBeforeTrain(self):
...@@ -719,7 +702,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -719,7 +702,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
init_scale=init_scale) init_scale=init_scale)
bd = [] bd = []
lr_arr = [1.0] lr_arr = [0.0]
# this a fake lr decay strategy # this a fake lr decay strategy
for i in range(1, 10): for i in range(1, 10):
bd.append(100 * i) bd.append(100 * i)
...@@ -730,8 +713,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -730,8 +713,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
place = fluid.CPUPlace() if not core.is_compiled_with_cuda( place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0) ) else fluid.CUDAPlace(0)
adam = Adam( adam = Adam(
learning_rate=fluid.layers.piecewise_decay( learning_rate=0.0,
boundaries=bd, values=lr_arr),
beta1=0.8, beta1=0.8,
beta2=0.6, beta2=0.6,
parameter_list=ptb_model.parameters()) parameter_list=ptb_model.parameters())
...@@ -786,7 +768,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -786,7 +768,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k, v in state_dict.items(): for k, v in state_dict.items():
new_t = v.numpy() new_t = v.numpy()
base_t = self.model_base[v.name] base_t = self.model_base[k]
self.assertTrue(np.array_equal(new_t, base_t)) self.assertTrue(np.array_equal(new_t, base_t))
def testSetNumpyBeforeTrain(self): def testSetNumpyBeforeTrain(self):
...@@ -812,7 +794,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -812,7 +794,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
init_scale=init_scale) init_scale=init_scale)
bd = [] bd = []
lr_arr = [1.0] lr_arr = [0.0]
# this a fake lr decay strategy # this a fake lr decay strategy
for i in range(1, 10): for i in range(1, 10):
bd.append(100 * i) bd.append(100 * i)
...@@ -841,11 +823,10 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -841,11 +823,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
np_opti_dict[v.name] = v.numpy() np_opti_dict[v.name] = v.numpy()
for k, v in self.state_dict.items(): for k, v in self.state_dict.items():
np_state_dict[v.name] = v.numpy() np_state_dict[k] = v.numpy()
adam.set_dict(np_opti_dict) adam.set_dict(np_opti_dict)
ptb_model.set_dict(np_state_dict) ptb_model.set_dict(np_state_dict)
for i in range(1): for i in range(1):
x_data = np.arange(12).reshape(4, 3).astype('int64') x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
...@@ -887,7 +868,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -887,7 +868,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k, v in state_dict.items(): for k, v in state_dict.items():
new_t = v.numpy() new_t = v.numpy()
base_t = self.model_base[v.name] base_t = self.model_base[k]
self.assertTrue(np.array_equal(new_t, base_t)) self.assertTrue(np.array_equal(new_t, base_t))
def testOnlyLoadParams(self): def testOnlyLoadParams(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册