未验证 提交 838e36e9 编写于 作者: C Chen Weihang 提交者: GitHub

Fix loaded variable suffix repeat error (#26169)

* fix loaded var suffix repeat error

* use new dygraph name for loaded param
上级 e656ca47
...@@ -64,9 +64,6 @@ constexpr char kZeroVarSuffix[] = "@ZERO"; ...@@ -64,9 +64,6 @@ constexpr char kZeroVarSuffix[] = "@ZERO";
/// Variables with this suffix are the new Gradient. /// Variables with this suffix are the new Gradient.
constexpr char kNewGradSuffix[] = "@NEWGRAD@"; constexpr char kNewGradSuffix[] = "@NEWGRAD@";
/// Variables with this suffix are the loaded from pre-train model.
constexpr char kLoadedVarSuffix[] = "@LOADED";
/// RuntimeContext is used to relate input/output names of Operator with /// RuntimeContext is used to relate input/output names of Operator with
/// the corresponding variables in name scope. /// the corresponding variables in name scope.
/// If an Op has attribute kEnableCacheRuntimeContext, it means that in a same /// If an Op has attribute kEnableCacheRuntimeContext, it means that in a same
......
...@@ -1213,8 +1213,6 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1213,8 +1213,6 @@ All parameter, weight, gradient are variables in Paddle.
[]() { return std::string(framework::kEmptyVarName); }); []() { return std::string(framework::kEmptyVarName); });
m.def("grad_var_suffix", m.def("grad_var_suffix",
[]() { return std::string(framework::kGradVarSuffix); }); []() { return std::string(framework::kGradVarSuffix); });
m.def("loaded_var_suffix",
[]() { return std::string(framework::kLoadedVarSuffix); });
m.def_submodule( m.def_submodule(
"var_names", "var_names",
"The module will return special predefined variable name in Paddle") "The module will return special predefined variable name in Paddle")
......
...@@ -23,6 +23,7 @@ from paddle import compat as cpt ...@@ -23,6 +23,7 @@ from paddle import compat as cpt
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid import framework from paddle.fluid import framework
from paddle.fluid import backward from paddle.fluid import backward
from paddle.fluid import unique_name
from paddle.fluid.dygraph import layers from paddle.fluid.dygraph import layers
from paddle.fluid.layers import nn from paddle.fluid.layers import nn
from paddle.fluid.dygraph.base import switch_to_static_graph from paddle.fluid.dygraph.base import switch_to_static_graph
...@@ -31,6 +32,9 @@ __all__ = ['TranslatedLayer'] ...@@ -31,6 +32,9 @@ __all__ = ['TranslatedLayer']
VARIABLE_FILENAME = "__variables__" VARIABLE_FILENAME = "__variables__"
EXTRA_VAR_INFO_FILENAME = "__variables.info__" EXTRA_VAR_INFO_FILENAME = "__variables.info__"
LOADED_VAR_SUFFIX = "load"
PARAMETER_NAME_PREFIX = "param"
BUFFER_NAME_PREFIX = "buffer"
def _load_program_desc(model_file_path): def _load_program_desc(model_file_path):
...@@ -107,33 +111,30 @@ def _get_all_var_names(program_desc): ...@@ -107,33 +111,30 @@ def _get_all_var_names(program_desc):
return all_var_names return all_var_names
@switch_to_static_graph
def _append_loaded_suffix(name): def _append_loaded_suffix(name):
""" """
Append loaded suffix to the given variable name Append loaded suffix to the given variable name
e.g. x ==> x@LOADED e.g. x ==> x.load_0, x.load_0 ==> x.load_0.load_0
""" """
suffix = core.loaded_var_suffix() suffix = LOADED_VAR_SUFFIX
name = cpt.to_text(name) name = cpt.to_text(name)
if suffix not in name: new_name = unique_name.generate_with_ignorable_key('.'.join((name, suffix)))
name = name + suffix return new_name
return name
def _remove_loaded_suffix(name): @switch_to_static_graph
""" def _generate_unique_var_name(prefix):
Remove loaded suffix to the given variable name return unique_name.generate_with_ignorable_key(prefix)
e.g. x@LOADED ==> x
"""
suffix = core.loaded_var_suffix()
name = cpt.to_text(name)
return name.replace(suffix, '')
def _append_loaded_suffix_to_var(program_desc): def _append_loaded_suffix_to_var(program_desc):
suffix_varname_dict = dict()
persistable_vars = _get_persistable_vars(program_desc) persistable_vars = _get_persistable_vars(program_desc)
for var_desc in persistable_vars: for var_desc in persistable_vars:
old_name = var_desc.name() old_name = var_desc.name()
new_name = _append_loaded_suffix(var_desc.name()) new_name = _append_loaded_suffix(var_desc.name())
suffix_varname_dict[new_name] = old_name
var_desc.set_name(new_name) var_desc.set_name(new_name)
for block_idx in six.moves.range(program_desc.num_blocks()): for block_idx in six.moves.range(program_desc.num_blocks()):
block = program_desc.block(block_idx) block = program_desc.block(block_idx)
...@@ -141,6 +142,7 @@ def _append_loaded_suffix_to_var(program_desc): ...@@ -141,6 +142,7 @@ def _append_loaded_suffix_to_var(program_desc):
op = block.op(op_idx) op = block.op(op_idx)
op._rename_input(old_name, new_name) op._rename_input(old_name, new_name)
op._rename_output(old_name, new_name) op._rename_output(old_name, new_name)
return suffix_varname_dict
@switch_to_static_graph @switch_to_static_graph
...@@ -187,6 +189,9 @@ class _ProgramHolder(object): ...@@ -187,6 +189,9 @@ class _ProgramHolder(object):
# execution scope # execution scope
self._inner_scope = core.Scope() self._inner_scope = core.Scope()
# append suffix var name dict
self._suffix_varname_dict = None
# forward program # forward program
self._infer_program_desc = self._preprocess(program_desc) self._infer_program_desc = self._preprocess(program_desc)
# forward + backward program # forward + backward program
...@@ -272,7 +277,7 @@ class _ProgramHolder(object): ...@@ -272,7 +277,7 @@ class _ProgramHolder(object):
self._append_scale_to_output(tmp_program) self._append_scale_to_output(tmp_program)
# 4. Persistable vars processing # 4. Persistable vars processing
# - append @LOADED suffix to persistable vars # - append loaded suffix to persistable vars
# NOTE: [why need to append suffix to persistable vars] # NOTE: [why need to append suffix to persistable vars]
# Dygraph and static graph mode use the same naming mechanism. # Dygraph and static graph mode use the same naming mechanism.
# If users want to load the model fine-tune, it is possible # If users want to load the model fine-tune, it is possible
...@@ -281,10 +286,7 @@ class _ProgramHolder(object): ...@@ -281,10 +286,7 @@ class _ProgramHolder(object):
# and later after loading, a new linear is added. At this time, # and later after loading, a new linear is added. At this time,
# there will be a problem of duplicate names, so here is unified # there will be a problem of duplicate names, so here is unified
# to add the LOADED suffix to the parameters of the model loaded # to add the LOADED suffix to the parameters of the model loaded
# during training. And in order to avoid multiple @LOADED suffix self._suffix_varname_dict = _append_loaded_suffix_to_var(program_desc)
# are appended to variable name, we only append @LOADED suffix to
# the variable that not contains @LOADED suffix.
_append_loaded_suffix_to_var(program_desc)
# - get persistable var # - get persistable var
self._persistable_names = _get_persistable_var_names(program_desc) self._persistable_names = _get_persistable_var_names(program_desc)
...@@ -298,7 +300,7 @@ class _ProgramHolder(object): ...@@ -298,7 +300,7 @@ class _ProgramHolder(object):
for i, out in enumerate(self._output_descs): for i, out in enumerate(self._output_descs):
var = program.global_block().var(out.name()) var = program.global_block().var(out.name())
var = nn.scale( var = nn.scale(
var, 1., name="static_model_runner/scale_{}".format(i)) var, 1., name="translated_layer/scale_{}".format(i))
scale_output_vars.append(var) scale_output_vars.append(var)
# 2. update output names & descs # 2. update output names & descs
for i, var in enumerate(scale_output_vars): for i, var in enumerate(scale_output_vars):
...@@ -363,7 +365,7 @@ def _load_persistable_vars_by_program(model_path, ...@@ -363,7 +365,7 @@ def _load_persistable_vars_by_program(model_path,
persistable_vars = _get_persistable_vars(program_holder.infer_program) persistable_vars = _get_persistable_vars(program_holder.infer_program)
load_var_dict = {} load_var_dict = {}
for each_var in persistable_vars: for each_var in persistable_vars:
orig_each_name = _remove_loaded_suffix(each_var.name()) orig_each_name = program_holder._suffix_varname_dict[each_var.name()]
if _is_parameter(each_var, program_holder.infer_program): if _is_parameter(each_var, program_holder.infer_program):
# create output varbase # create output varbase
new_var = framework.ParamBase( new_var = framework.ParamBase(
...@@ -421,6 +423,7 @@ def _load_persistable_vars_by_program(model_path, ...@@ -421,6 +423,7 @@ def _load_persistable_vars_by_program(model_path,
def _load_persistable_vars(model_path, def _load_persistable_vars(model_path,
var_info_path, var_info_path,
program_holder,
separate_params=False, separate_params=False,
params_filename=None): params_filename=None):
# 1. load extra var info # 1. load extra var info
...@@ -430,10 +433,14 @@ def _load_persistable_vars(model_path, ...@@ -430,10 +433,14 @@ def _load_persistable_vars(model_path,
# 2. construct var dict # 2. construct var dict
load_var_dict = dict() load_var_dict = dict()
load_var_list = [] load_var_list = []
inv_suffix_varname_dict = {
value: key
for key, value in program_holder._suffix_varname_dict.items()
}
# NOTE: some var may not be Parameter # NOTE: some var may not be Parameter
for name in sorted(extra_var_info): for name in sorted(extra_var_info):
# append suffix, see [why need to append suffix to persistable vars] # get suffix var name, see [why need to append suffix to persistable vars]
new_name = _append_loaded_suffix(name) new_name = inv_suffix_varname_dict[name]
# create output varbase # create output varbase
if extra_var_info[name].get('trainable', None) is not None: if extra_var_info[name].get('trainable', None) is not None:
# use default shape and dtype # use default shape and dtype
...@@ -506,7 +513,8 @@ def _construct_params_and_buffers(model_path, ...@@ -506,7 +513,8 @@ def _construct_params_and_buffers(model_path,
var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME) var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME)
if os.path.exists(var_info_path): if os.path.exists(var_info_path):
var_dict = _load_persistable_vars(model_path, var_info_path, var_dict = _load_persistable_vars(model_path, var_info_path,
separate_params, params_filename) programs['forward'], separate_params,
params_filename)
else: else:
var_dict = _load_persistable_vars_by_program( var_dict = _load_persistable_vars_by_program(
model_path, programs['forward'], params_filename) model_path, programs['forward'], params_filename)
...@@ -625,11 +633,23 @@ class TranslatedLayer(layers.Layer): ...@@ -625,11 +633,23 @@ class TranslatedLayer(layers.Layer):
self._program_holder_dict = programs self._program_holder_dict = programs
# NOTE(chenweihang): [ why not use var name directly? ]
# When add parameter or buffer to Layer by follow apis,
# the variable name can't contain `.`, beccause which may cause
# AttributeError when access the newly added parameter or buffer
# in the form of `self.**.**``, but the ParamBase or BarBase
# name contains `.` originally, such as `linear_0.w_0`, so here
# need to generate new var name for each var
self._persistable_var_name_dict = dict()
for name, var in persistable_vars.items(): for name, var in persistable_vars.items():
if isinstance(var, framework.ParamBase): if isinstance(var, framework.ParamBase):
self.add_parameter(name, var) dy_name = _generate_unique_var_name(PARAMETER_NAME_PREFIX)
self._persistable_var_name_dict[name] = dy_name
self.add_parameter(dy_name, var)
elif isinstance(var, core.VarBase): elif isinstance(var, core.VarBase):
self.register_buffer(name, var) dy_name = _generate_unique_var_name(BUFFER_NAME_PREFIX)
self._persistable_var_name_dict[name] = dy_name
self.register_buffer(dy_name, var)
else: else:
raise TypeError( raise TypeError(
"Adding persistent variable which to layer is not supported now" "Adding persistent variable which to layer is not supported now"
...@@ -700,10 +720,11 @@ class TranslatedLayer(layers.Layer): ...@@ -700,10 +720,11 @@ class TranslatedLayer(layers.Layer):
persistable_vars = [] persistable_vars = []
for var_name in program_holder.persistable_names: for var_name in program_holder.persistable_names:
if var_name in self._parameters: dy_var_name = self._persistable_var_name_dict[var_name]
persistable_vars.append(self._parameters[var_name]) if dy_var_name in self._parameters:
elif var_name in self._buffers: persistable_vars.append(self._parameters[dy_var_name])
persistable_vars.append(self._buffers[var_name]) elif dy_var_name in self._buffers:
persistable_vars.append(self._buffers[dy_var_name])
else: else:
raise ValueError( raise ValueError(
"The persistable variable %s is not exists in current TranslatedLayer." "The persistable variable %s is not exists in current TranslatedLayer."
......
...@@ -25,6 +25,8 @@ import paddle.fluid as fluid ...@@ -25,6 +25,8 @@ import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
LOADED_VAR_SUFFIX = ".load_0"
def convolutional_neural_network(img): def convolutional_neural_network(img):
conv_pool_1 = fluid.nets.simple_img_conv_pool( conv_pool_1 = fluid.nets.simple_img_conv_pool(
...@@ -307,14 +309,14 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): ...@@ -307,14 +309,14 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase):
self.assertTrue(np.array_equal(static_x_data, dy_x_data)) self.assertTrue(np.array_equal(static_x_data, dy_x_data))
for key, value in six.iteritems(static_param_init_value): for key, value in six.iteritems(static_param_init_value):
key += core.loaded_var_suffix() key += LOADED_VAR_SUFFIX
self.assertTrue(np.array_equal(value, dy_param_init_value[key])) self.assertTrue(np.array_equal(value, dy_param_init_value[key]))
# np.testing.assert_array_almost_equal(static_out, dy_out) # np.testing.assert_array_almost_equal(static_out, dy_out)
self.assertTrue(np.allclose(static_out, dy_out, atol=1e-04)) self.assertTrue(np.allclose(static_out, dy_out, atol=1e-04))
for key, value in six.iteritems(static_param_value): for key, value in six.iteritems(static_param_value):
key += core.loaded_var_suffix() key += LOADED_VAR_SUFFIX
self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-4)) self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-4))
def test_mnist_train_with_params_filename(self): def test_mnist_train_with_params_filename(self):
...@@ -335,14 +337,14 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): ...@@ -335,14 +337,14 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase):
self.assertTrue(np.array_equal(static_x_data, dy_x_data)) self.assertTrue(np.array_equal(static_x_data, dy_x_data))
for key, value in six.iteritems(static_param_init_value): for key, value in six.iteritems(static_param_init_value):
key += core.loaded_var_suffix() key += LOADED_VAR_SUFFIX
self.assertTrue(np.array_equal(value, dy_param_init_value[key])) self.assertTrue(np.array_equal(value, dy_param_init_value[key]))
# np.testing.assert_array_almost_equal(static_out, dy_out) # np.testing.assert_array_almost_equal(static_out, dy_out)
self.assertTrue(np.allclose(static_out, dy_out, atol=1e-04)) self.assertTrue(np.allclose(static_out, dy_out, atol=1e-04))
for key, value in six.iteritems(static_param_value): for key, value in six.iteritems(static_param_value):
key += core.loaded_var_suffix() key += LOADED_VAR_SUFFIX
self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-4)) self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-4))
def test_mnist_infer_no_params_filename(self): def test_mnist_infer_no_params_filename(self):
......
...@@ -27,6 +27,8 @@ from test_imperative_base import new_program_scope ...@@ -27,6 +27,8 @@ from test_imperative_base import new_program_scope
import paddle.fluid.transpiler.details.program_utils as pu import paddle.fluid.transpiler.details.program_utils as pu
LOADED_VAR_SUFFIX = ".load_0"
def while_softmax_regression(img): def while_softmax_regression(img):
def cond(i, times, pred): def cond(i, times, pred):
...@@ -219,13 +221,13 @@ class TestImperativeStaticModelRunnerWhile(unittest.TestCase): ...@@ -219,13 +221,13 @@ class TestImperativeStaticModelRunnerWhile(unittest.TestCase):
# Phase 3. compare # Phase 3. compare
for key, value in six.iteritems(static_param_init_value): for key, value in six.iteritems(static_param_init_value):
key += core.loaded_var_suffix() key += LOADED_VAR_SUFFIX
self.assertTrue(np.array_equal(value, dy_param_init_value[key])) self.assertTrue(np.array_equal(value, dy_param_init_value[key]))
self.assertTrue(np.allclose(static_out, dy_out)) self.assertTrue(np.allclose(static_out, dy_out))
for key, value in six.iteritems(static_param_value): for key, value in six.iteritems(static_param_value):
key += core.loaded_var_suffix() key += LOADED_VAR_SUFFIX
self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5)) self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5))
......
...@@ -29,18 +29,18 @@ BATCH_NUM = 20 ...@@ -29,18 +29,18 @@ BATCH_NUM = 20
SEED = 10 SEED = 10
def random_batch_reader(): def random_batch_reader(input_size, label_size):
def _get_random_images_and_labels(image_shape, label_shape): def _get_random_inputs_and_labels(input_size, label_size):
np.random.seed(SEED) np.random.seed(SEED)
image = np.random.random(size=image_shape).astype('float32') input = np.random.random(size=input_size).astype('float32')
label = np.random.random(size=label_shape).astype('int64') label = np.random.random(size=label_size).astype('int64')
return image, label return input, label
def __reader__(): def __reader__():
for _ in range(BATCH_NUM): for _ in range(BATCH_NUM):
batch_image, batch_label = _get_random_images_and_labels( batch_input, batch_label = _get_random_inputs_and_labels(
[BATCH_SIZE, 784], [BATCH_SIZE, 1]) [BATCH_SIZE, input_size], [BATCH_SIZE, label_size])
yield batch_image, batch_label yield batch_input, batch_label
return __reader__ return __reader__
...@@ -77,13 +77,14 @@ class LinearNetReturnLoss(fluid.dygraph.Layer): ...@@ -77,13 +77,14 @@ class LinearNetReturnLoss(fluid.dygraph.Layer):
return z, loss return z, loss
def train(layer): def train(layer, input_size=784, label_size=1):
# create optimizer # create optimizer
adam = fluid.optimizer.SGDOptimizer( adam = fluid.optimizer.SGDOptimizer(
learning_rate=0.01, parameter_list=layer.parameters()) learning_rate=0.01, parameter_list=layer.parameters())
# create data loader # create data loader
train_loader = fluid.io.DataLoader.from_generator(capacity=5) train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_batch_generator(random_batch_reader()) train_loader.set_batch_generator(
random_batch_reader(input_size, label_size))
# train # train
for data in train_loader(): for data in train_loader():
img, label = data img, label = data
...@@ -100,11 +101,6 @@ def train(layer): ...@@ -100,11 +101,6 @@ def train(layer):
return [img], layer, avg_loss return [img], layer, avg_loss
def infer(layer):
x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32'))
return layer(x)
class TestJitSaveLoad(unittest.TestCase): class TestJitSaveLoad(unittest.TestCase):
def setUp(self): def setUp(self):
self.model_path = "model.test_jit_save_load" self.model_path = "model.test_jit_save_load"
...@@ -279,5 +275,48 @@ class TestJitSaveLoadConfig(unittest.TestCase): ...@@ -279,5 +275,48 @@ class TestJitSaveLoadConfig(unittest.TestCase):
np.array_equal(train_layer(x)[0].numpy(), infer_layer(x).numpy())) np.array_equal(train_layer(x)[0].numpy(), infer_layer(x).numpy()))
class MultiLoadingLinearNet(fluid.dygraph.Layer):
def __init__(self, size, model_path):
super(MultiLoadingLinearNet, self).__init__()
self._linear = Linear(size, size)
self._load_linear1 = fluid.dygraph.jit.load(model_path)
self._load_linear2 = fluid.dygraph.jit.load(model_path)
@declarative
def forward(self, x):
tmp1 = self._linear(x)
tmp2 = self._load_linear1(tmp1)
tmp3 = self._load_linear2(tmp2)
y = self._linear(tmp3)
return y
class TestJitMultipleLoading(unittest.TestCase):
def setUp(self):
self.linear_size = 4
self.model_path = "model.jit_multi_load"
# enable dygraph mode
fluid.enable_dygraph()
# config seed
fluid.default_main_program().random_seed = SEED
# train and save base model
self.train_and_save_orig_model()
def train_and_save_orig_model(self):
layer = LinearNet(self.linear_size, self.linear_size)
example_inputs, layer, _ = train(layer, self.linear_size, 1)
fluid.dygraph.jit.save(
layer=layer, model_path=self.model_path, input_spec=example_inputs)
def test_load_model_retransform_inference(self):
multi_loaded_layer = MultiLoadingLinearNet(self.linear_size,
self.model_path)
state_dict = multi_loaded_layer.state_dict()
name_set = set()
for _, var in state_dict.items():
self.assertTrue(var.name not in name_set)
name_set.add(var.name)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册