From 2057f76ac09f843a5141f6ef5af97478168d02bd Mon Sep 17 00:00:00 2001 From: hong <43953930+phlrain@users.noreply.github.com> Date: Sun, 16 Feb 2020 19:57:41 +0800 Subject: [PATCH] Enhance load program state (#22546) (#22589) * enhance load program state; test=develop * optimize commet; test=develop --- python/paddle/fluid/io.py | 103 +++++- .../tests/unittests/test_static_save_load.py | 312 ++++++++++++++++++ 2 files changed, 412 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index fa442a749d..09adb1fb13 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -20,6 +20,7 @@ import warnings import six import logging import pickle +import contextlib from functools import reduce import numpy as np @@ -180,6 +181,17 @@ def _clone_var_in_block_(block, var): persistable=True) +@contextlib.contextmanager +def _load_program_scope(main=None, startup=None, scope=None): + prog = main if main else paddle.fluid.Program() + startup_prog = startup if startup else paddle.fluid.Program() + scope = scope if scope else paddle.fluid.core.Scope() + with paddle.fluid.scope_guard(scope): + with paddle.fluid.program_guard(prog, startup_prog): + with paddle.fluid.unique_name.guard(): + yield + + def _get_valid_program(main_program): if main_program is None: main_program = default_main_program() @@ -1749,12 +1761,17 @@ def load(program, model_path, executor=None, var_list=None): set_var(v, load_dict[v.name]) -def load_program_state(model_path): +def load_program_state(model_path, var_list=None): """ Load program state from local file Args: model_path(str): The file prefix store the program + var_list(list, optional): The variable list to load saved with + [ save_params, save_persistables, save_vars ]. + Default: None. + The var_list is only used to get name, + will not be modified. Returns: state_dict(dict): the dict store Parameter and optimizer information @@ -1775,14 +1792,94 @@ def load_program_state(model_path): program_state = fluid.load_program_state( "./temp") """ - parameter_file_name = model_path + ".pdparams" + model_prefix = model_path + if model_prefix.endswith(".pdparams"): + model_prefix = model_prefix[:-9] + elif model_prefix.endswith(".pdopt"): + model_prefix = model_prefix[:-6] + elif model_prefix.endswith(".pdmodel"): + model_prefix = model_prefix[:-8] + + parameter_file_name = model_prefix + ".pdparams" + if not os.path.exists(parameter_file_name): + # model file saved with fluid.save is not found, try to load model file saved with + # [save_vars, save_params, save_persistables] + _logger.warning( + "{} not found, try to load model file saved with [ save_params, save_persistables, save_vars ]". + format(parameter_file_name)) + + var_name_list = [] + if var_list is None and os.path.isfile(model_path): + raise ValueError( + "var_list can not be None when model_path is a file type") + + for root, dirs, files in os.walk(model_path, topdown=False): + for f in files: + file_path = os.path.join(root, f) + var_temp_name = os.path.relpath(file_path, model_path) + var_temp_name = var_temp_name.replace("\\", "/") + var_name_list.append(var_temp_name) + + with _load_program_scope(): + load_prog = Program() + load_block = load_prog.global_block() + + def clone_var_to_block(block, var): + if not isinstance(var, Variable): + raise TypeError("value in var_list must be variable") + return block.create_var( + name=var.name, + shape=var.shape, + dtype=var.dtype, + type=var.type, + lod_level=var.lod_level + if var.desc.type() == core.VarDesc.VarType.LOD_TENSOR else + None, + persistable=True) + + loaded_var_list = [] + + if var_list is not None: + for var in var_list: + loaded_var_list.append(clone_var_to_block(load_block, var)) + else: + for var_name in var_name_list: + loaded_var_list.append( + load_block.create_var( + name=var_name, persistable=True)) + + place = paddle.fluid.CPUPlace() + exe = paddle.fluid.Executor(place) + + try: + if os.path.isfile(model_path): + dir_name, file_name = os.path.split(model_path) + else: + dir_name = model_path + file_name = None + load_vars( + executor=exe, + dirname=dir_name, + vars=loaded_var_list, + filename=file_name) + except: + raise RuntimeError( + "Failed to load model file , please make sure model file is saved with the " + "following APIs: save_params, save_persistables, save_vars") + res_dict = {} + for var in loaded_var_list: + res_dict[var.name] = np.asarray(paddle.fluid.global_scope( + ).find_var(var.name).get_tensor()) + + return res_dict + assert os.path.exists(parameter_file_name), \ "Parameter file [{}] not exits".format(parameter_file_name) with open(parameter_file_name, 'rb') as f: para_dict = pickle.load(f) - opt_file_name = model_path + ".pdopt" + opt_file_name = model_prefix + ".pdopt" if os.path.exists(opt_file_name): with open(opt_file_name, 'rb') as f: opti_dict = pickle.load(f) diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py index 24b61f514c..d4b92f9849 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -625,6 +625,16 @@ class TestProgramStatePartial(unittest.TestCase): #fluid.load(test_program, "./test_1", None ) program_state = fluid.load_program_state( os.path.join('some_dir', 'test_1')) + + program_state_1 = fluid.load_program_state( + os.path.join('some_dir', 'test_1.pdparams')) + + program_state_2 = fluid.load_program_state( + os.path.join('some_dir', 'test_1.pdopt')) + + program_state_3 = fluid.load_program_state( + os.path.join('some_dir', 'test_1.pdmodel')) + fluid.set_program_state(test_program, program_state) for var in test_program.list_vars(): @@ -634,6 +644,66 @@ class TestProgramStatePartial(unittest.TestCase): base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) + # check 1 + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimzier var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + fluid.set_program_state(test_program, program_state_1) + + for var in test_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + # check 2 + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimzier var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + fluid.set_program_state(test_program, program_state_2) + + for var in test_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + # check 3 + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimzier var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + fluid.set_program_state(test_program, program_state_3) + + for var in test_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + class TestVariableInit(unittest.TestCase): def test_variable_init(self): @@ -984,5 +1054,247 @@ class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): all_var_list + [temp_var]) +class TestProgramStateOldSave(unittest.TestCase): + def test_ptb_rnn_cpu_float32(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + exe = fluid.Executor(place) + sgd = Adam(learning_rate=1e-3) + x = fluid.layers.data( + name="x", shape=[-1, num_steps], dtype='int64') + y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') + init_hidden = fluid.layers.data( + name="init_hidden", shape=[1], dtype='float32') + init_cell = fluid.layers.data( + name="init_cell", shape=[1], dtype='float32') + + static_loss, static_last_hidden, static_last_cell = ptb_model( + x, y, init_hidden, init_cell) + + test_program = fluid.default_main_program().clone(for_test=True) + + add_1 = fluid.layers.fc(static_last_hidden, + size=hidden_size, + num_flatten_dims=2, + bias_attr=False) + + sgd.minimize(static_loss) + static_param_updated = dict() + static_param_init = dict() + + out = exe.run(framework.default_startup_program()) + + static_loss_value = None + static_last_cell_value = None + static_last_hidden_value = None + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + fetch_list = [static_loss, static_last_hidden, static_last_cell] + out = exe.run(fluid.default_main_program(), + feed={ + "x": x_data, + "y": y_data, + "init_hidden": init_hidden_data, + "init_cell": init_cell_data + }, + fetch_list=fetch_list) + static_loss_value = out[0] + static_last_hidden_value = out[1] + static_last_cell_value = out[2] + + # get value before save + main_program = framework.default_main_program() + base_map = {} + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimzier var have been update + self.assertTrue(np.sum(np.abs(t)) != 0) + base_map[var.name] = t + + fluid.io.save_persistables(exe, "test_program_1", main_program) + + # set var to zero + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimzier var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + #fluid.load(test_program, "./test_1", None ) + program_state = fluid.load_program_state("test_program_1") + fluid.set_program_state(main_program, program_state) + + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + +class TestProgramStateOldSaveSingleModel(unittest.TestCase): + def test_ptb_rnn_cpu_float32(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + exe = fluid.Executor(place) + sgd = Adam(learning_rate=1e-3) + x = fluid.layers.data( + name="x", shape=[-1, num_steps], dtype='int64') + y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') + init_hidden = fluid.layers.data( + name="init_hidden", shape=[1], dtype='float32') + init_cell = fluid.layers.data( + name="init_cell", shape=[1], dtype='float32') + + static_loss, static_last_hidden, static_last_cell = ptb_model( + x, y, init_hidden, init_cell) + + test_program = fluid.default_main_program().clone(for_test=True) + + add_1 = fluid.layers.fc(static_last_hidden, + size=hidden_size, + num_flatten_dims=2, + bias_attr=False) + + sgd.minimize(static_loss) + static_param_updated = dict() + static_param_init = dict() + + out = exe.run(framework.default_startup_program()) + + static_loss_value = None + static_last_cell_value = None + static_last_hidden_value = None + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + fetch_list = [static_loss, static_last_hidden, static_last_cell] + out = exe.run(fluid.default_main_program(), + feed={ + "x": x_data, + "y": y_data, + "init_hidden": init_hidden_data, + "init_cell": init_cell_data + }, + fetch_list=fetch_list) + static_loss_value = out[0] + static_last_hidden_value = out[1] + static_last_cell_value = out[2] + + # get value before save + main_program = framework.default_main_program() + base_map = {} + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimzier var have been update + self.assertTrue(np.sum(np.abs(t)) != 0) + base_map[var.name] = t + + fluid.io.save_persistables( + exe, "test_program_2", main_program, filename="model_1") + + # set var to zero + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimzier var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + #fluid.load(test_program, "./test_1", None ) + program_state = fluid.load_program_state( + os.path.join("test_program_2", "model_1"), + var_list=fluid.io.get_program_persistable_vars(main_program)) + fluid.set_program_state(main_program, program_state) + + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + with self.assertRaises(ValueError): + fluid.load_program_state( + os.path.join("test_program_2", "model_1")) + + with self.assertRaises(TypeError): + fluid.load_program_state( + os.path.join("test_program_2", "model_1"), + var_list=["str"]) + + with self.assertRaises(RuntimeError): + fluid.load_program_state( + os.path.join("test_program_2", "model_1"), + var_list=[ + main_program.global_block().create_var( + name="fake_var_name", persistable=True) + ]) + + if __name__ == '__main__': unittest.main() -- GitLab