From 838e36e9eda2354f3919a596ffd7aa18df99c6d2 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Thu, 13 Aug 2020 10:31:20 +0800 Subject: [PATCH] Fix loaded variable suffix repeat error (#26169) * fix loaded var suffix repeat error * use new dygraph name for loaded param --- paddle/fluid/framework/operator.h | 3 - paddle/fluid/pybind/pybind.cc | 2 - python/paddle/fluid/dygraph/io.py | 79 ++++++++++++------- .../test_imperative_static_runner_mnist.py | 10 ++- .../test_imperative_static_runner_while.py | 6 +- .../tests/unittests/test_jit_save_load.py | 69 ++++++++++++---- 6 files changed, 114 insertions(+), 55 deletions(-) diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 709f132813c..ebecbf0498c 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -64,9 +64,6 @@ constexpr char kZeroVarSuffix[] = "@ZERO"; /// Variables with this suffix are the new Gradient. constexpr char kNewGradSuffix[] = "@NEWGRAD@"; -/// Variables with this suffix are the loaded from pre-train model. -constexpr char kLoadedVarSuffix[] = "@LOADED"; - /// RuntimeContext is used to relate input/output names of Operator with /// the corresponding variables in name scope. /// If an Op has attribute kEnableCacheRuntimeContext, it means that in a same diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index d6a8b226637..d68e225849e 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -1213,8 +1213,6 @@ All parameter, weight, gradient are variables in Paddle. []() { return std::string(framework::kEmptyVarName); }); m.def("grad_var_suffix", []() { return std::string(framework::kGradVarSuffix); }); - m.def("loaded_var_suffix", - []() { return std::string(framework::kLoadedVarSuffix); }); m.def_submodule( "var_names", "The module will return special predefined variable name in Paddle") diff --git a/python/paddle/fluid/dygraph/io.py b/python/paddle/fluid/dygraph/io.py index 7396289392a..0da5c57f1bc 100644 --- a/python/paddle/fluid/dygraph/io.py +++ b/python/paddle/fluid/dygraph/io.py @@ -23,6 +23,7 @@ from paddle import compat as cpt from paddle.fluid import core from paddle.fluid import framework from paddle.fluid import backward +from paddle.fluid import unique_name from paddle.fluid.dygraph import layers from paddle.fluid.layers import nn from paddle.fluid.dygraph.base import switch_to_static_graph @@ -31,6 +32,9 @@ __all__ = ['TranslatedLayer'] VARIABLE_FILENAME = "__variables__" EXTRA_VAR_INFO_FILENAME = "__variables.info__" +LOADED_VAR_SUFFIX = "load" +PARAMETER_NAME_PREFIX = "param" +BUFFER_NAME_PREFIX = "buffer" def _load_program_desc(model_file_path): @@ -107,33 +111,30 @@ def _get_all_var_names(program_desc): return all_var_names +@switch_to_static_graph def _append_loaded_suffix(name): """ Append loaded suffix to the given variable name - e.g. x ==> x@LOADED + e.g. x ==> x.load_0, x.load_0 ==> x.load_0.load_0 """ - suffix = core.loaded_var_suffix() + suffix = LOADED_VAR_SUFFIX name = cpt.to_text(name) - if suffix not in name: - name = name + suffix - return name + new_name = unique_name.generate_with_ignorable_key('.'.join((name, suffix))) + return new_name -def _remove_loaded_suffix(name): - """ - Remove loaded suffix to the given variable name - e.g. x@LOADED ==> x - """ - suffix = core.loaded_var_suffix() - name = cpt.to_text(name) - return name.replace(suffix, '') +@switch_to_static_graph +def _generate_unique_var_name(prefix): + return unique_name.generate_with_ignorable_key(prefix) def _append_loaded_suffix_to_var(program_desc): + suffix_varname_dict = dict() persistable_vars = _get_persistable_vars(program_desc) for var_desc in persistable_vars: old_name = var_desc.name() new_name = _append_loaded_suffix(var_desc.name()) + suffix_varname_dict[new_name] = old_name var_desc.set_name(new_name) for block_idx in six.moves.range(program_desc.num_blocks()): block = program_desc.block(block_idx) @@ -141,6 +142,7 @@ def _append_loaded_suffix_to_var(program_desc): op = block.op(op_idx) op._rename_input(old_name, new_name) op._rename_output(old_name, new_name) + return suffix_varname_dict @switch_to_static_graph @@ -187,6 +189,9 @@ class _ProgramHolder(object): # execution scope self._inner_scope = core.Scope() + # append suffix var name dict + self._suffix_varname_dict = None + # forward program self._infer_program_desc = self._preprocess(program_desc) # forward + backward program @@ -272,7 +277,7 @@ class _ProgramHolder(object): self._append_scale_to_output(tmp_program) # 4. Persistable vars processing - # - append @LOADED suffix to persistable vars + # - append loaded suffix to persistable vars # NOTE: [why need to append suffix to persistable vars] # Dygraph and static graph mode use the same naming mechanism. # If users want to load the model fine-tune, it is possible @@ -281,10 +286,7 @@ class _ProgramHolder(object): # and later after loading, a new linear is added. At this time, # there will be a problem of duplicate names, so here is unified # to add the LOADED suffix to the parameters of the model loaded - # during training. And in order to avoid multiple @LOADED suffix - # are appended to variable name, we only append @LOADED suffix to - # the variable that not contains @LOADED suffix. - _append_loaded_suffix_to_var(program_desc) + self._suffix_varname_dict = _append_loaded_suffix_to_var(program_desc) # - get persistable var self._persistable_names = _get_persistable_var_names(program_desc) @@ -298,7 +300,7 @@ class _ProgramHolder(object): for i, out in enumerate(self._output_descs): var = program.global_block().var(out.name()) var = nn.scale( - var, 1., name="static_model_runner/scale_{}".format(i)) + var, 1., name="translated_layer/scale_{}".format(i)) scale_output_vars.append(var) # 2. update output names & descs for i, var in enumerate(scale_output_vars): @@ -363,7 +365,7 @@ def _load_persistable_vars_by_program(model_path, persistable_vars = _get_persistable_vars(program_holder.infer_program) load_var_dict = {} for each_var in persistable_vars: - orig_each_name = _remove_loaded_suffix(each_var.name()) + orig_each_name = program_holder._suffix_varname_dict[each_var.name()] if _is_parameter(each_var, program_holder.infer_program): # create output varbase new_var = framework.ParamBase( @@ -421,6 +423,7 @@ def _load_persistable_vars_by_program(model_path, def _load_persistable_vars(model_path, var_info_path, + program_holder, separate_params=False, params_filename=None): # 1. load extra var info @@ -430,10 +433,14 @@ def _load_persistable_vars(model_path, # 2. construct var dict load_var_dict = dict() load_var_list = [] + inv_suffix_varname_dict = { + value: key + for key, value in program_holder._suffix_varname_dict.items() + } # NOTE: some var may not be Parameter for name in sorted(extra_var_info): - # append suffix, see [why need to append suffix to persistable vars] - new_name = _append_loaded_suffix(name) + # get suffix var name, see [why need to append suffix to persistable vars] + new_name = inv_suffix_varname_dict[name] # create output varbase if extra_var_info[name].get('trainable', None) is not None: # use default shape and dtype @@ -506,7 +513,8 @@ def _construct_params_and_buffers(model_path, var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME) if os.path.exists(var_info_path): var_dict = _load_persistable_vars(model_path, var_info_path, - separate_params, params_filename) + programs['forward'], separate_params, + params_filename) else: var_dict = _load_persistable_vars_by_program( model_path, programs['forward'], params_filename) @@ -625,11 +633,23 @@ class TranslatedLayer(layers.Layer): self._program_holder_dict = programs + # NOTE(chenweihang): [ why not use var name directly? ] + # When add parameter or buffer to Layer by follow apis, + # the variable name can't contain `.`, beccause which may cause + # AttributeError when access the newly added parameter or buffer + # in the form of `self.**.**``, but the ParamBase or BarBase + # name contains `.` originally, such as `linear_0.w_0`, so here + # need to generate new var name for each var + self._persistable_var_name_dict = dict() for name, var in persistable_vars.items(): if isinstance(var, framework.ParamBase): - self.add_parameter(name, var) + dy_name = _generate_unique_var_name(PARAMETER_NAME_PREFIX) + self._persistable_var_name_dict[name] = dy_name + self.add_parameter(dy_name, var) elif isinstance(var, core.VarBase): - self.register_buffer(name, var) + dy_name = _generate_unique_var_name(BUFFER_NAME_PREFIX) + self._persistable_var_name_dict[name] = dy_name + self.register_buffer(dy_name, var) else: raise TypeError( "Adding persistent variable which to layer is not supported now" @@ -700,10 +720,11 @@ class TranslatedLayer(layers.Layer): persistable_vars = [] for var_name in program_holder.persistable_names: - if var_name in self._parameters: - persistable_vars.append(self._parameters[var_name]) - elif var_name in self._buffers: - persistable_vars.append(self._buffers[var_name]) + dy_var_name = self._persistable_var_name_dict[var_name] + if dy_var_name in self._parameters: + persistable_vars.append(self._parameters[dy_var_name]) + elif dy_var_name in self._buffers: + persistable_vars.append(self._buffers[dy_var_name]) else: raise ValueError( "The persistable variable %s is not exists in current TranslatedLayer." diff --git a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py index afdab0148cb..acc56b7db27 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py @@ -25,6 +25,8 @@ import paddle.fluid as fluid from paddle.fluid import core from test_imperative_base import new_program_scope +LOADED_VAR_SUFFIX = ".load_0" + def convolutional_neural_network(img): conv_pool_1 = fluid.nets.simple_img_conv_pool( @@ -307,14 +309,14 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): self.assertTrue(np.array_equal(static_x_data, dy_x_data)) for key, value in six.iteritems(static_param_init_value): - key += core.loaded_var_suffix() + key += LOADED_VAR_SUFFIX self.assertTrue(np.array_equal(value, dy_param_init_value[key])) # np.testing.assert_array_almost_equal(static_out, dy_out) self.assertTrue(np.allclose(static_out, dy_out, atol=1e-04)) for key, value in six.iteritems(static_param_value): - key += core.loaded_var_suffix() + key += LOADED_VAR_SUFFIX self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-4)) def test_mnist_train_with_params_filename(self): @@ -335,14 +337,14 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): self.assertTrue(np.array_equal(static_x_data, dy_x_data)) for key, value in six.iteritems(static_param_init_value): - key += core.loaded_var_suffix() + key += LOADED_VAR_SUFFIX self.assertTrue(np.array_equal(value, dy_param_init_value[key])) # np.testing.assert_array_almost_equal(static_out, dy_out) self.assertTrue(np.allclose(static_out, dy_out, atol=1e-04)) for key, value in six.iteritems(static_param_value): - key += core.loaded_var_suffix() + key += LOADED_VAR_SUFFIX self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-4)) def test_mnist_infer_no_params_filename(self): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py index f501593d09d..0792582175e 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py @@ -27,6 +27,8 @@ from test_imperative_base import new_program_scope import paddle.fluid.transpiler.details.program_utils as pu +LOADED_VAR_SUFFIX = ".load_0" + def while_softmax_regression(img): def cond(i, times, pred): @@ -219,13 +221,13 @@ class TestImperativeStaticModelRunnerWhile(unittest.TestCase): # Phase 3. compare for key, value in six.iteritems(static_param_init_value): - key += core.loaded_var_suffix() + key += LOADED_VAR_SUFFIX self.assertTrue(np.array_equal(value, dy_param_init_value[key])) self.assertTrue(np.allclose(static_out, dy_out)) for key, value in six.iteritems(static_param_value): - key += core.loaded_var_suffix() + key += LOADED_VAR_SUFFIX self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5)) diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py index a61d31e8825..89b12da9cf9 100644 --- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py @@ -29,18 +29,18 @@ BATCH_NUM = 20 SEED = 10 -def random_batch_reader(): - def _get_random_images_and_labels(image_shape, label_shape): +def random_batch_reader(input_size, label_size): + def _get_random_inputs_and_labels(input_size, label_size): np.random.seed(SEED) - image = np.random.random(size=image_shape).astype('float32') - label = np.random.random(size=label_shape).astype('int64') - return image, label + input = np.random.random(size=input_size).astype('float32') + label = np.random.random(size=label_size).astype('int64') + return input, label def __reader__(): for _ in range(BATCH_NUM): - batch_image, batch_label = _get_random_images_and_labels( - [BATCH_SIZE, 784], [BATCH_SIZE, 1]) - yield batch_image, batch_label + batch_input, batch_label = _get_random_inputs_and_labels( + [BATCH_SIZE, input_size], [BATCH_SIZE, label_size]) + yield batch_input, batch_label return __reader__ @@ -77,13 +77,14 @@ class LinearNetReturnLoss(fluid.dygraph.Layer): return z, loss -def train(layer): +def train(layer, input_size=784, label_size=1): # create optimizer adam = fluid.optimizer.SGDOptimizer( learning_rate=0.01, parameter_list=layer.parameters()) # create data loader train_loader = fluid.io.DataLoader.from_generator(capacity=5) - train_loader.set_batch_generator(random_batch_reader()) + train_loader.set_batch_generator( + random_batch_reader(input_size, label_size)) # train for data in train_loader(): img, label = data @@ -100,11 +101,6 @@ def train(layer): return [img], layer, avg_loss -def infer(layer): - x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32')) - return layer(x) - - class TestJitSaveLoad(unittest.TestCase): def setUp(self): self.model_path = "model.test_jit_save_load" @@ -279,5 +275,48 @@ class TestJitSaveLoadConfig(unittest.TestCase): np.array_equal(train_layer(x)[0].numpy(), infer_layer(x).numpy())) +class MultiLoadingLinearNet(fluid.dygraph.Layer): + def __init__(self, size, model_path): + super(MultiLoadingLinearNet, self).__init__() + self._linear = Linear(size, size) + self._load_linear1 = fluid.dygraph.jit.load(model_path) + self._load_linear2 = fluid.dygraph.jit.load(model_path) + + @declarative + def forward(self, x): + tmp1 = self._linear(x) + tmp2 = self._load_linear1(tmp1) + tmp3 = self._load_linear2(tmp2) + y = self._linear(tmp3) + return y + + +class TestJitMultipleLoading(unittest.TestCase): + def setUp(self): + self.linear_size = 4 + self.model_path = "model.jit_multi_load" + # enable dygraph mode + fluid.enable_dygraph() + # config seed + fluid.default_main_program().random_seed = SEED + # train and save base model + self.train_and_save_orig_model() + + def train_and_save_orig_model(self): + layer = LinearNet(self.linear_size, self.linear_size) + example_inputs, layer, _ = train(layer, self.linear_size, 1) + fluid.dygraph.jit.save( + layer=layer, model_path=self.model_path, input_spec=example_inputs) + + def test_load_model_retransform_inference(self): + multi_loaded_layer = MultiLoadingLinearNet(self.linear_size, + self.model_path) + state_dict = multi_loaded_layer.state_dict() + name_set = set() + for _, var in state_dict.items(): + self.assertTrue(var.name not in name_set) + name_set.add(var.name) + + if __name__ == '__main__': unittest.main() -- GitLab