未验证 提交 838e36e9 编写于 作者: C Chen Weihang 提交者: GitHub

Fix loaded variable suffix repeat error (#26169)

* fix loaded var suffix repeat error

* use new dygraph name for loaded param
上级 e656ca47
......@@ -64,9 +64,6 @@ constexpr char kZeroVarSuffix[] = "@ZERO";
/// Variables with this suffix are the new Gradient.
constexpr char kNewGradSuffix[] = "@NEWGRAD@";
/// Variables with this suffix are the loaded from pre-train model.
constexpr char kLoadedVarSuffix[] = "@LOADED";
/// RuntimeContext is used to relate input/output names of Operator with
/// the corresponding variables in name scope.
/// If an Op has attribute kEnableCacheRuntimeContext, it means that in a same
......
......@@ -1213,8 +1213,6 @@ All parameter, weight, gradient are variables in Paddle.
[]() { return std::string(framework::kEmptyVarName); });
m.def("grad_var_suffix",
[]() { return std::string(framework::kGradVarSuffix); });
m.def("loaded_var_suffix",
[]() { return std::string(framework::kLoadedVarSuffix); });
m.def_submodule(
"var_names",
"The module will return special predefined variable name in Paddle")
......
......@@ -23,6 +23,7 @@ from paddle import compat as cpt
from paddle.fluid import core
from paddle.fluid import framework
from paddle.fluid import backward
from paddle.fluid import unique_name
from paddle.fluid.dygraph import layers
from paddle.fluid.layers import nn
from paddle.fluid.dygraph.base import switch_to_static_graph
......@@ -31,6 +32,9 @@ __all__ = ['TranslatedLayer']
VARIABLE_FILENAME = "__variables__"
EXTRA_VAR_INFO_FILENAME = "__variables.info__"
LOADED_VAR_SUFFIX = "load"
PARAMETER_NAME_PREFIX = "param"
BUFFER_NAME_PREFIX = "buffer"
def _load_program_desc(model_file_path):
......@@ -107,33 +111,30 @@ def _get_all_var_names(program_desc):
return all_var_names
@switch_to_static_graph
def _append_loaded_suffix(name):
"""
Append loaded suffix to the given variable name
e.g. x ==> x@LOADED
e.g. x ==> x.load_0, x.load_0 ==> x.load_0.load_0
"""
suffix = core.loaded_var_suffix()
suffix = LOADED_VAR_SUFFIX
name = cpt.to_text(name)
if suffix not in name:
name = name + suffix
return name
new_name = unique_name.generate_with_ignorable_key('.'.join((name, suffix)))
return new_name
def _remove_loaded_suffix(name):
"""
Remove loaded suffix to the given variable name
e.g. x@LOADED ==> x
"""
suffix = core.loaded_var_suffix()
name = cpt.to_text(name)
return name.replace(suffix, '')
@switch_to_static_graph
def _generate_unique_var_name(prefix):
return unique_name.generate_with_ignorable_key(prefix)
def _append_loaded_suffix_to_var(program_desc):
suffix_varname_dict = dict()
persistable_vars = _get_persistable_vars(program_desc)
for var_desc in persistable_vars:
old_name = var_desc.name()
new_name = _append_loaded_suffix(var_desc.name())
suffix_varname_dict[new_name] = old_name
var_desc.set_name(new_name)
for block_idx in six.moves.range(program_desc.num_blocks()):
block = program_desc.block(block_idx)
......@@ -141,6 +142,7 @@ def _append_loaded_suffix_to_var(program_desc):
op = block.op(op_idx)
op._rename_input(old_name, new_name)
op._rename_output(old_name, new_name)
return suffix_varname_dict
@switch_to_static_graph
......@@ -187,6 +189,9 @@ class _ProgramHolder(object):
# execution scope
self._inner_scope = core.Scope()
# append suffix var name dict
self._suffix_varname_dict = None
# forward program
self._infer_program_desc = self._preprocess(program_desc)
# forward + backward program
......@@ -272,7 +277,7 @@ class _ProgramHolder(object):
self._append_scale_to_output(tmp_program)
# 4. Persistable vars processing
# - append @LOADED suffix to persistable vars
# - append loaded suffix to persistable vars
# NOTE: [why need to append suffix to persistable vars]
# Dygraph and static graph mode use the same naming mechanism.
# If users want to load the model fine-tune, it is possible
......@@ -281,10 +286,7 @@ class _ProgramHolder(object):
# and later after loading, a new linear is added. At this time,
# there will be a problem of duplicate names, so here is unified
# to add the LOADED suffix to the parameters of the model loaded
# during training. And in order to avoid multiple @LOADED suffix
# are appended to variable name, we only append @LOADED suffix to
# the variable that not contains @LOADED suffix.
_append_loaded_suffix_to_var(program_desc)
self._suffix_varname_dict = _append_loaded_suffix_to_var(program_desc)
# - get persistable var
self._persistable_names = _get_persistable_var_names(program_desc)
......@@ -298,7 +300,7 @@ class _ProgramHolder(object):
for i, out in enumerate(self._output_descs):
var = program.global_block().var(out.name())
var = nn.scale(
var, 1., name="static_model_runner/scale_{}".format(i))
var, 1., name="translated_layer/scale_{}".format(i))
scale_output_vars.append(var)
# 2. update output names & descs
for i, var in enumerate(scale_output_vars):
......@@ -363,7 +365,7 @@ def _load_persistable_vars_by_program(model_path,
persistable_vars = _get_persistable_vars(program_holder.infer_program)
load_var_dict = {}
for each_var in persistable_vars:
orig_each_name = _remove_loaded_suffix(each_var.name())
orig_each_name = program_holder._suffix_varname_dict[each_var.name()]
if _is_parameter(each_var, program_holder.infer_program):
# create output varbase
new_var = framework.ParamBase(
......@@ -421,6 +423,7 @@ def _load_persistable_vars_by_program(model_path,
def _load_persistable_vars(model_path,
var_info_path,
program_holder,
separate_params=False,
params_filename=None):
# 1. load extra var info
......@@ -430,10 +433,14 @@ def _load_persistable_vars(model_path,
# 2. construct var dict
load_var_dict = dict()
load_var_list = []
inv_suffix_varname_dict = {
value: key
for key, value in program_holder._suffix_varname_dict.items()
}
# NOTE: some var may not be Parameter
for name in sorted(extra_var_info):
# append suffix, see [why need to append suffix to persistable vars]
new_name = _append_loaded_suffix(name)
# get suffix var name, see [why need to append suffix to persistable vars]
new_name = inv_suffix_varname_dict[name]
# create output varbase
if extra_var_info[name].get('trainable', None) is not None:
# use default shape and dtype
......@@ -506,7 +513,8 @@ def _construct_params_and_buffers(model_path,
var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME)
if os.path.exists(var_info_path):
var_dict = _load_persistable_vars(model_path, var_info_path,
separate_params, params_filename)
programs['forward'], separate_params,
params_filename)
else:
var_dict = _load_persistable_vars_by_program(
model_path, programs['forward'], params_filename)
......@@ -625,11 +633,23 @@ class TranslatedLayer(layers.Layer):
self._program_holder_dict = programs
# NOTE(chenweihang): [ why not use var name directly? ]
# When add parameter or buffer to Layer by follow apis,
# the variable name can't contain `.`, beccause which may cause
# AttributeError when access the newly added parameter or buffer
# in the form of `self.**.**``, but the ParamBase or BarBase
# name contains `.` originally, such as `linear_0.w_0`, so here
# need to generate new var name for each var
self._persistable_var_name_dict = dict()
for name, var in persistable_vars.items():
if isinstance(var, framework.ParamBase):
self.add_parameter(name, var)
dy_name = _generate_unique_var_name(PARAMETER_NAME_PREFIX)
self._persistable_var_name_dict[name] = dy_name
self.add_parameter(dy_name, var)
elif isinstance(var, core.VarBase):
self.register_buffer(name, var)
dy_name = _generate_unique_var_name(BUFFER_NAME_PREFIX)
self._persistable_var_name_dict[name] = dy_name
self.register_buffer(dy_name, var)
else:
raise TypeError(
"Adding persistent variable which to layer is not supported now"
......@@ -700,10 +720,11 @@ class TranslatedLayer(layers.Layer):
persistable_vars = []
for var_name in program_holder.persistable_names:
if var_name in self._parameters:
persistable_vars.append(self._parameters[var_name])
elif var_name in self._buffers:
persistable_vars.append(self._buffers[var_name])
dy_var_name = self._persistable_var_name_dict[var_name]
if dy_var_name in self._parameters:
persistable_vars.append(self._parameters[dy_var_name])
elif dy_var_name in self._buffers:
persistable_vars.append(self._buffers[dy_var_name])
else:
raise ValueError(
"The persistable variable %s is not exists in current TranslatedLayer."
......
......@@ -25,6 +25,8 @@ import paddle.fluid as fluid
from paddle.fluid import core
from test_imperative_base import new_program_scope
LOADED_VAR_SUFFIX = ".load_0"
def convolutional_neural_network(img):
conv_pool_1 = fluid.nets.simple_img_conv_pool(
......@@ -307,14 +309,14 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase):
self.assertTrue(np.array_equal(static_x_data, dy_x_data))
for key, value in six.iteritems(static_param_init_value):
key += core.loaded_var_suffix()
key += LOADED_VAR_SUFFIX
self.assertTrue(np.array_equal(value, dy_param_init_value[key]))
# np.testing.assert_array_almost_equal(static_out, dy_out)
self.assertTrue(np.allclose(static_out, dy_out, atol=1e-04))
for key, value in six.iteritems(static_param_value):
key += core.loaded_var_suffix()
key += LOADED_VAR_SUFFIX
self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-4))
def test_mnist_train_with_params_filename(self):
......@@ -335,14 +337,14 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase):
self.assertTrue(np.array_equal(static_x_data, dy_x_data))
for key, value in six.iteritems(static_param_init_value):
key += core.loaded_var_suffix()
key += LOADED_VAR_SUFFIX
self.assertTrue(np.array_equal(value, dy_param_init_value[key]))
# np.testing.assert_array_almost_equal(static_out, dy_out)
self.assertTrue(np.allclose(static_out, dy_out, atol=1e-04))
for key, value in six.iteritems(static_param_value):
key += core.loaded_var_suffix()
key += LOADED_VAR_SUFFIX
self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-4))
def test_mnist_infer_no_params_filename(self):
......
......@@ -27,6 +27,8 @@ from test_imperative_base import new_program_scope
import paddle.fluid.transpiler.details.program_utils as pu
LOADED_VAR_SUFFIX = ".load_0"
def while_softmax_regression(img):
def cond(i, times, pred):
......@@ -219,13 +221,13 @@ class TestImperativeStaticModelRunnerWhile(unittest.TestCase):
# Phase 3. compare
for key, value in six.iteritems(static_param_init_value):
key += core.loaded_var_suffix()
key += LOADED_VAR_SUFFIX
self.assertTrue(np.array_equal(value, dy_param_init_value[key]))
self.assertTrue(np.allclose(static_out, dy_out))
for key, value in six.iteritems(static_param_value):
key += core.loaded_var_suffix()
key += LOADED_VAR_SUFFIX
self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5))
......
......@@ -29,18 +29,18 @@ BATCH_NUM = 20
SEED = 10
def random_batch_reader():
def _get_random_images_and_labels(image_shape, label_shape):
def random_batch_reader(input_size, label_size):
def _get_random_inputs_and_labels(input_size, label_size):
np.random.seed(SEED)
image = np.random.random(size=image_shape).astype('float32')
label = np.random.random(size=label_shape).astype('int64')
return image, label
input = np.random.random(size=input_size).astype('float32')
label = np.random.random(size=label_size).astype('int64')
return input, label
def __reader__():
for _ in range(BATCH_NUM):
batch_image, batch_label = _get_random_images_and_labels(
[BATCH_SIZE, 784], [BATCH_SIZE, 1])
yield batch_image, batch_label
batch_input, batch_label = _get_random_inputs_and_labels(
[BATCH_SIZE, input_size], [BATCH_SIZE, label_size])
yield batch_input, batch_label
return __reader__
......@@ -77,13 +77,14 @@ class LinearNetReturnLoss(fluid.dygraph.Layer):
return z, loss
def train(layer):
def train(layer, input_size=784, label_size=1):
# create optimizer
adam = fluid.optimizer.SGDOptimizer(
learning_rate=0.01, parameter_list=layer.parameters())
# create data loader
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(random_batch_reader())
train_loader.set_batch_generator(
random_batch_reader(input_size, label_size))
# train
for data in train_loader():
img, label = data
......@@ -100,11 +101,6 @@ def train(layer):
return [img], layer, avg_loss
def infer(layer):
x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32'))
return layer(x)
class TestJitSaveLoad(unittest.TestCase):
def setUp(self):
self.model_path = "model.test_jit_save_load"
......@@ -279,5 +275,48 @@ class TestJitSaveLoadConfig(unittest.TestCase):
np.array_equal(train_layer(x)[0].numpy(), infer_layer(x).numpy()))
class MultiLoadingLinearNet(fluid.dygraph.Layer):
def __init__(self, size, model_path):
super(MultiLoadingLinearNet, self).__init__()
self._linear = Linear(size, size)
self._load_linear1 = fluid.dygraph.jit.load(model_path)
self._load_linear2 = fluid.dygraph.jit.load(model_path)
@declarative
def forward(self, x):
tmp1 = self._linear(x)
tmp2 = self._load_linear1(tmp1)
tmp3 = self._load_linear2(tmp2)
y = self._linear(tmp3)
return y
class TestJitMultipleLoading(unittest.TestCase):
def setUp(self):
self.linear_size = 4
self.model_path = "model.jit_multi_load"
# enable dygraph mode
fluid.enable_dygraph()
# config seed
fluid.default_main_program().random_seed = SEED
# train and save base model
self.train_and_save_orig_model()
def train_and_save_orig_model(self):
layer = LinearNet(self.linear_size, self.linear_size)
example_inputs, layer, _ = train(layer, self.linear_size, 1)
fluid.dygraph.jit.save(
layer=layer, model_path=self.model_path, input_spec=example_inputs)
def test_load_model_retransform_inference(self):
multi_loaded_layer = MultiLoadingLinearNet(self.linear_size,
self.model_path)
state_dict = multi_loaded_layer.state_dict()
name_set = set()
for _, var in state_dict.items():
self.assertTrue(var.name not in name_set)
name_set.add(var.name)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册