未验证 提交 2f49cf70 编写于 作者: H hong 提交者: GitHub

Support dygraph structuerd name (#21930)

* support dygraph structured name; test=develop

* add load static param unitest and fix save load; test=develop

* fix varBase import error; test=develop

* fix unitest error; test=develop

* add comment for parameter; test=develop

* fix uni test error; test=develop

* change parallel se-resnet; test=develop

* fix dygraph se resnext parallel test; test=develop

* remove useless code; test=develop

* remove useless code; test=develop
上级 f220be4f
......@@ -16,7 +16,7 @@ from __future__ import print_function
import os
import collections
from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter
from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter, ParamBase
import pickle
from . import learning_rate_scheduler
import warnings
......@@ -68,20 +68,33 @@ def save_dygraph(state_dict, model_path):
assert len(state_dict) > 0, "state_dict is empty, no need to save"
for k, v in state_dict.items():
if not isinstance(v, Parameter):
if not isinstance(v, ParamBase):
suffix = ".pdopt"
break
core._save_dygraph_dict(model_path + suffix, state_dict)
model_dict = {}
name_table = {}
for k, v in state_dict.items():
if isinstance(v, (Variable, core.VarBase)):
model_dict[k] = v.numpy()
else:
model_dict[k] = v
name_table[k] = v.name
model_dict["StructuredToParameterName@@"] = name_table
with open(model_path + suffix, 'wb') as f:
pickle.dump(model_dict, f)
@dygraph_only
def load_dygraph(model_path):
def load_dygraph(model_path, keep_name_table=False):
'''
Load parameter state_dict from disk.
Args:
model_path(str) : The file prefix store the state_dict. (The path should Not contain suffix '.pdparams')
keep_name_table(bool, optional) : Whether keep structed name to parameter name conversion table in output dict.
Default : False
Returns:
state_dict(dict) : the dict store the state_dict
......@@ -111,45 +124,15 @@ def load_dygraph(model_path):
raise RuntimeError("Parameter file [ {} ] not exists".format(
params_file_path))
para_dict = core._load_dygraph_dict(params_file_path)
with open(params_file_path, 'rb') as f:
para_dict = pickle.load(f)
if not keep_name_table and "StructuredToParameterName@@" in para_dict:
del para_dict["StructuredToParameterName@@"]
opti_dict = None
opti_file_path = model_path + ".pdopt"
if os.path.exists(opti_file_path):
opti_dict = core._load_dygraph_dict(opti_file_path)
with open(opti_file_path, 'rb') as f:
opti_dict = pickle.load(f)
return para_dict, opti_dict
@dygraph_only
def load_optimizer(model_path):
'''
Load optimizer state_dict from disk.
Args:
model_path(str) : The file prefix store the state_dict. (The path should Not contain shuffix '.pdparams')
Returns:
state_dict(dict) : the dict store the state_dict
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
adam = fluid.optimizer.Adam(0.001)
state_dict = adam.state_dict()
fluid.save_optimizer( state_dict, "opt_adam")
fluid.load_optimizer( "opt_adam")
'''
assert in_dygraph_mode(), "save_optimizer only work in dygraph mode"
opt_file_path = model_path + ".pdopt"
if not os.path.exists(opt_file_path):
raise RuntimeError("Optimizer file [ {} ] not exists".format(
opt_file_path))
return core._load_dygraph_dict(opt_file_path)
......@@ -25,6 +25,8 @@ from .layer_object_helper import LayerObjectHelper
from .base import program_desc_tracing_guard
from paddle.fluid import framework
from ..param_attr import ParamAttr
import copy
import warnings
__all__ = ['Layer']
......@@ -99,11 +101,10 @@ class Layer(core.Layer):
Returns:
:ref:`api_guide_Variable_en` : created parameter.
"""
if isinstance(attr, ParamAttr) and (attr.name is not None):
attr.name = ".".join([self._full_name, attr.name])
elif isinstance(attr, six.string_types):
attr = ".".join([self._full_name, attr])
return self._helper.create_parameter(attr, shape, dtype, is_bias,
temp_attr = copy.deepcopy(attr)
if isinstance(temp_attr, six.string_types) and temp_attr == "":
temp_attr = None
return self._helper.create_parameter(temp_attr, shape, dtype, is_bias,
default_initializer)
# TODO: Add more parameter list when we need them
......@@ -283,7 +284,10 @@ class Layer(core.Layer):
else:
object.__delattr__(self, name)
def state_dict(self, destination=None, include_sublayers=True):
def state_dict(self,
destination=None,
include_sublayers=True,
structured_name_prefix=""):
'''
Get all parameters of current layer and its sub-layers. And set all the parameters into a dict
......@@ -310,25 +314,31 @@ class Layer(core.Layer):
destination = collections.OrderedDict()
for name, data in self._parameters.items():
if data is not None:
destination[data.name] = data
destination[structured_name_prefix + name] = data
if include_sublayers:
for layer_name, layer_item in self._sub_layers.items():
if layer_item is not None:
destination_temp = destination.copy()
destination_temp.update(
layer_item.state_dict(destination_temp,
include_sublayers))
layer_item.state_dict(
destination_temp, include_sublayers,
structured_name_prefix + layer_name + "."))
destination = destination_temp
return destination
def set_dict(self, stat_dict, include_sublayers=True):
def set_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
Default: True
Returns:
None
......@@ -347,9 +357,15 @@ class Layer(core.Layer):
emb.set_dict( para_state_dict )
'''
self.load_dict(stat_dict, include_sublayers=include_sublayers)
def load_dict(self, stat_dict, include_sublayers=True):
self.load_dict(
stat_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
def load_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict
......@@ -358,6 +374,8 @@ class Layer(core.Layer):
Parameters:
state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
Default: True
Returns:
None
......@@ -377,16 +395,22 @@ class Layer(core.Layer):
'''
self._loaddict_holder = stat_dict
for name, item in self.__dict__.get('_parameters', None).items():
if item.name in stat_dict:
item.set_value(stat_dict[item.name])
inner_state_dict = self.state_dict()
for name, para in inner_state_dict.items():
key_name = name if use_structured_name else para.name
if key_name in stat_dict:
para.set_value(stat_dict[key_name])
else:
raise RuntimeError(
"Parameter not found, Can't not find [ {} ] in stat_dict".
format(item.name))
if include_sublayers:
for layer_name, layer_item in self._sub_layers.items():
if layer_item is not None:
layer_item.load_dict(stat_dict)
"Parameter not found, Can't not find [ {} ] in stat_dict"
"use_structured_name is set to [{}]".format(
key_name, use_structured_name))
unused_para_list = []
for k, v in stat_dict.items():
if k not in inner_state_dict:
unused_para_list.append(k)
if len(unused_para_list) > 0:
warnings.warn(
"Varibale [ {} ] are not used, because not included in layers state_dict".
format(" ".join(unused_para_list)))
......@@ -19,9 +19,11 @@ from .. import core
from ..layers import utils
from ..dygraph import dygraph_utils
from . import layers
from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter, _dygraph_tracer_
from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter
from ..param_attr import ParamAttr
from ..initializer import Normal, Constant, NumpyArrayInitializer
from .. import unique_name
from .layer_object_helper import LayerObjectHelper
import numpy as np
import numbers
import logging
......@@ -1287,6 +1289,9 @@ class BatchNorm(layers.Layer):
self._bias_attr = bias_attr
self._act = act
self._full_name = unique_name.generate("batch_norm")
self._helper = LayerObjectHelper(self._full_name)
assert bias_attr is not False, "bias_attr should not be False in batch_norm."
if dtype == "float16":
......@@ -1618,6 +1623,10 @@ class LayerNorm(layers.Layer):
super(LayerNorm, self).__init__()
if isinstance(normalized_shape, numbers.Integral):
normalized_shape = [normalized_shape]
self._full_name = unique_name.generate("layer_norm")
self._helper = LayerObjectHelper(self._full_name)
self._normalized_shape = list(normalized_shape)
self._scale = scale
self._shift = shift
......
......@@ -71,6 +71,9 @@ class ParamAttr(object):
gradient_clip=None,
do_model_average=True):
self.name = name
if isinstance(self.name, six.string_types) and self.name == "":
raise ValueError("name of ParamAttr can not be empty str")
self.initializer = initializer
self.learning_rate = learning_rate
self.regularizer = regularizer
......
......@@ -101,8 +101,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False,
param_attr=fluid.ParamAttr(name="weights"))
bias_attr=False)
# disable BatchNorm in multi-card. disable LayerNorm because of complex input_shape
# self._batch_norm = BatchNorm(num_filters, act=act)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle.fluid as fluid
import paddle.fluid.framework as framework
from paddle.fluid.dygraph.nn import *
import numpy as np
print("11")
class TestDygraphLoadStatic(unittest.TestCase):
def testLoadStaticModel(self):
# static mode
a = fluid.data(name="a", shape=[10, 10])
conv_in = fluid.data(name="conv_in", shape=[None, 10, 10, 10])
fc_out1 = fluid.layers.fc(a, 10)
fc_out2 = fluid.layers.fc(a, 20)
conv_out_1 = fluid.layers.conv2d(
conv_in, num_filters=10, filter_size=5, act="relu")
conv_out_2 = fluid.layers.conv2d(
conv_in, num_filters=10, filter_size=5, act="relu")
conv3d_in = fluid.data(
name='conv3d_in', shape=[None, 3, 12, 32, 32], dtype='float32')
conv3d_out_1 = fluid.layers.conv3d(
input=conv3d_in, num_filters=2, filter_size=3, act="relu")
conv3d_out_2 = fluid.layers.conv3d(
input=conv3d_in, num_filters=2, filter_size=3, act="relu")
batchnorm_in = fluid.data(
name="batchnorm_in", shape=[None, 10], dtype='float32')
batchnorm_out_1 = fluid.layers.batch_norm(batchnorm_in)
batchnorm_out_2 = fluid.layers.batch_norm(batchnorm_in)
emb_in = fluid.data(name='emb_in', shape=[None, 10], dtype='int64')
emb_out_1 = fluid.embedding(emb_in, [1000, 100])
emb_out_2 = fluid.embedding(emb_in, [2000, 200])
layernorm = fluid.data(name="ln", shape=[None, 10], dtype='float32')
layernorm_1 = fluid.layers.layer_norm(layernorm)
layernorm_2 = fluid.layers.layer_norm(layernorm)
nce_in = fluid.data(name="nce_in", shape=[None, 100], dtype='float32')
nce_label = fluid.data(
name="nce_label", shape=[None, 10], dtype='int64')
nce_out_1 = fluid.layers.nce(nce_in, nce_label, 10000)
nce_out_2 = fluid.layers.nce(nce_in, nce_label, 10000)
prelu_in = fluid.data(
name="prelu_in", shape=[None, 5, 10, 10], dtype='float32')
prelu_out_1 = fluid.layers.prelu(prelu_in, "channel")
prelu_out_2 = fluid.layers.prelu(prelu_in, "channel")
bilinear_tensor_pro_x = fluid.data(
"t1", shape=[None, 5], dtype="float32")
bilinear_tensor_pro_y = fluid.data(
"t2", shape=[None, 4], dtype="float32")
bilinear_tensor_pro_out_1 = fluid.layers.bilinear_tensor_product(
x=bilinear_tensor_pro_x, y=bilinear_tensor_pro_y, size=1000)
bilinear_tensor_pro_out_2 = fluid.layers.bilinear_tensor_product(
x=bilinear_tensor_pro_x, y=bilinear_tensor_pro_y, size=1000)
conv2d_trans_in = fluid.data(
name="conv2d_trans_in", shape=[None, 10, 10, 10])
conv2d_trans_out_1 = fluid.layers.conv2d_transpose(
conv2d_trans_in, num_filters=10, filter_size=5, act="relu")
conv2d_trans_out_2 = fluid.layers.conv2d_transpose(
conv2d_trans_in, num_filters=10, filter_size=5, act="relu")
conv3d_trans_in = fluid.data(
name='conv3d_trans_in',
shape=[None, 3, 12, 32, 32],
dtype='float32')
conv3d_trans_out_1 = fluid.layers.conv3d_transpose(
input=conv3d_trans_in, num_filters=2, filter_size=3, act="relu")
conv3d_trans_out_2 = fluid.layers.conv3d_transpose(
input=conv3d_trans_in, num_filters=2, filter_size=3, act="relu")
groupnorm_in = fluid.data(
name='groupnorm_in', shape=[None, 8, 32, 32], dtype='float32')
groupnorm_out1 = fluid.layers.group_norm(input=groupnorm_in, groups=4)
groupnorm_out2 = fluid.layers.group_norm(input=groupnorm_in, groups=4)
'''
spec_norm = fluid.data(name='spec_norm', shape=[2, 8, 32, 32], dtype='float32')
spe_norm_out_1 = fluid.layers.spectral_norm(weight=spec_norm, dim=1, power_iters=2)
spe_norm_out_2 = fluid.layers.spectral_norm(weight=spec_norm, dim=1, power_iters=2)
'''
nodes_vector = fluid.data(
name='vectors', shape=[None, 10, 5], dtype='float32')
edge_set = fluid.data(
name='edge_set', shape=[None, 10, 2], dtype='float32')
tree_conv_out1 = fluid.contrib.layers.tree_conv(nodes_vector, edge_set,
6, 1, 2)
tree_conv_out2 = fluid.contrib.layers.tree_conv(nodes_vector, edge_set,
6, 1, 2)
para1 = fluid.layers.create_parameter(
[100, 100], 'float32', name="weight_test_1")
para2 = fluid.layers.create_parameter(
[20, 200], 'float32', name="weight_test_2")
para_list = fluid.default_main_program().list_vars()
exe = fluid.Executor(fluid.CPUPlace(
) if not fluid.is_compiled_with_cuda() else fluid.CUDAPlace(0))
out = exe.run(framework.default_startup_program())
fluid.save(framework.default_main_program(), "./test_1")
para_dict = fluid.load_program_state("./test_1")
new_dict = {}
for k, v in para_dict.items():
#print( k, v.shape )
if k.startswith("fc"):
name = k.replace("fc", "linear", 1)
new_dict[name] = v
else:
new_dict[k] = v
with fluid.dygraph.guard():
class MyTest(fluid.dygraph.Layer):
def __init__(self):
super(MyTest, self).__init__()
self.linear1 = Linear(10, 10)
self.lienar2 = Linear(10, 20)
self.conv2d_1 = Conv2D(
num_channels=10,
num_filters=10,
filter_size=5,
act="relu")
self.conv2d_2 = Conv2D(
num_channels=10,
num_filters=10,
filter_size=5,
act="relu")
self.conv3d_1 = Conv3D(
num_channels=3,
num_filters=2,
filter_size=3,
act="relu")
self.conv3d_2 = Conv3D(
num_channels=3,
num_filters=2,
filter_size=3,
act="relu")
self.batch_norm_1 = BatchNorm(10)
self.batch_norm_2 = BatchNorm(10)
self.emb1 = Embedding([1000, 100])
self.emb2 = Embedding([2000, 200])
self.layer_norm_1 = LayerNorm([10])
self.layer_norm_2 = LayerNorm(10)
self.nce1 = NCE(10000, 100)
self.nce2 = NCE(10000, 100)
self.prelu1 = PRelu("channel", [-1, 5, 10, 10])
self.prelu2 = PRelu("channel", [-1, 5, 10, 10])
self.group_norm1 = GroupNorm(8, 4)
self.gourp_norm2 = GroupNorm(8, 4)
self.w_1 = self.create_parameter(
[100, 100], dtype='float32', attr="weight_test_1")
self.w_2 = self.create_parameter(
[20, 200], dtype='float32', attr="weight_test_2")
my_test = MyTest()
my_test.set_dict(new_dict, use_structured_name=False)
for k, v in my_test.state_dict().items():
self.assertTrue(np.array_equal(v.numpy(), new_dict[v.name]))
if __name__ == '__main__':
unittest.main()
......@@ -17,7 +17,7 @@ from __future__ import print_function
import unittest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.dygraph.nn import Embedding, FC
from paddle.fluid.dygraph.nn import Embedding, Linear
import paddle.fluid.framework as framework
from paddle.fluid.optimizer import Adam
from paddle.fluid.dygraph.base import to_variable
......@@ -29,13 +29,12 @@ import six
class SimpleLSTMRNN(fluid.Layer):
def __init__(self,
name_scope,
hidden_size,
num_steps,
num_layers=2,
init_scale=0.1,
dropout=None):
super(SimpleLSTMRNN, self).__init__(name_scope)
super(SimpleLSTMRNN, self).__init__()
self._hidden_size = hidden_size
self._num_layers = num_layers
self._init_scale = init_scale
......@@ -44,8 +43,6 @@ class SimpleLSTMRNN(fluid.Layer):
self._num_steps = num_steps
self.cell_array = []
self.hidden_array = []
def _build_once(self, input_embedding, init_hidden=None, init_cell=None):
self.weight_1_arr = []
self.weight_2_arr = []
self.bias_arr = []
......@@ -149,7 +146,6 @@ class PtbModel(fluid.Layer):
self.num_steps = num_steps
self.dropout = dropout
self.simple_lstm_rnn = SimpleLSTMRNN(
self.full_name(),
hidden_size,
num_steps,
num_layers=num_layers,
......@@ -164,9 +160,7 @@ class PtbModel(fluid.Layer):
initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale)))
self.out_project = FC(self.full_name(),
self.vocab_size,
num_flatten_dims=2)
self.out_project = Linear(self.hidden_size, self.vocab_size)
def forward(self, input, label, init_hidden, init_cell):
init_h = fluid.layers.reshape(
......@@ -277,10 +271,11 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.save_dygraph(self.opti_dict, "./test_dy")
self.state_dict = ptb_model.state_dict()
self.model_base = {}
for k, v in self.state_dict.items():
np_t = v.numpy()
self.model_base[v.name] = np_t
self.model_base[k] = np_t
fluid.save_dygraph(self.state_dict, "./test_dy")
......@@ -386,7 +381,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k, v in state_dict.items():
new_t = v.numpy()
base_t = self.model_base[v.name]
base_t = self.model_base[k]
self.assertTrue(np.array_equal(new_t, base_t))
......@@ -491,7 +486,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k, v in state_dict.items():
new_t = v.numpy()
base_t = self.model_base[v.name]
base_t = self.model_base[k]
self.assertTrue(np.array_equal(new_t, base_t))
......@@ -588,7 +583,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
np_state_dict = {}
for k, v in state_dict.items():
np_t = v.numpy()
np_state_dict[v.name] = np_t
np_state_dict[k] = np_t
var = v.value().get_tensor()
var.set(np.zeros_like(np_t), place)
......@@ -600,7 +595,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k, v in state_dict.items():
new_t = v.numpy()
base_t = self.model_base[v.name]
base_t = self.model_base[k]
self.assertTrue(np.array_equal(new_t, base_t))
......@@ -626,20 +621,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
num_steps=num_steps,
init_scale=init_scale)
bd = []
lr_arr = [1.0]
# this a fake lr decay strategy
for i in range(1, 10):
bd.append(100 * i)
#set lr to 0.0, not udpate parameter
new_lr = 0.0
lr_arr.append(new_lr)
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
adam = Adam(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr),
learning_rate=0.0,
beta1=0.8,
beta2=0.6,
parameter_list=ptb_model.parameters())
......@@ -686,14 +671,12 @@ class TestDygraphPtbRnn(unittest.TestCase):
np.array_equal(v.numpy(), self.base_opti[v.name] *
adam._beta2))
# check parameter
state_dict = ptb_model.state_dict()
for k, v in state_dict.items():
new_t = v.numpy()
base_t = self.model_base[v.name]
base_t = self.model_base[k]
self.assertTrue(np.array_equal(new_t, base_t))
def testLoadAndSetVarBaseBeforeTrain(self):
......@@ -719,7 +702,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
init_scale=init_scale)
bd = []
lr_arr = [1.0]
lr_arr = [0.0]
# this a fake lr decay strategy
for i in range(1, 10):
bd.append(100 * i)
......@@ -730,8 +713,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
adam = Adam(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr),
learning_rate=0.0,
beta1=0.8,
beta2=0.6,
parameter_list=ptb_model.parameters())
......@@ -786,7 +768,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k, v in state_dict.items():
new_t = v.numpy()
base_t = self.model_base[v.name]
base_t = self.model_base[k]
self.assertTrue(np.array_equal(new_t, base_t))
def testSetNumpyBeforeTrain(self):
......@@ -812,7 +794,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
init_scale=init_scale)
bd = []
lr_arr = [1.0]
lr_arr = [0.0]
# this a fake lr decay strategy
for i in range(1, 10):
bd.append(100 * i)
......@@ -841,11 +823,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
np_opti_dict[v.name] = v.numpy()
for k, v in self.state_dict.items():
np_state_dict[v.name] = v.numpy()
np_state_dict[k] = v.numpy()
adam.set_dict(np_opti_dict)
ptb_model.set_dict(np_state_dict)
for i in range(1):
x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
......@@ -887,7 +868,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
for k, v in state_dict.items():
new_t = v.numpy()
base_t = self.model_base[v.name]
base_t = self.model_base[k]
self.assertTrue(np.array_equal(new_t, base_t))
def testOnlyLoadParams(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册