未验证 提交 2057f76a 编写于 作者: H hong 提交者: GitHub

Enhance load program state (#22546) (#22589)

* enhance load program state; test=develop

* optimize commet; test=develop
上级 3f4687b8
...@@ -20,6 +20,7 @@ import warnings ...@@ -20,6 +20,7 @@ import warnings
import six import six
import logging import logging
import pickle import pickle
import contextlib
from functools import reduce from functools import reduce
import numpy as np import numpy as np
...@@ -180,6 +181,17 @@ def _clone_var_in_block_(block, var): ...@@ -180,6 +181,17 @@ def _clone_var_in_block_(block, var):
persistable=True) persistable=True)
@contextlib.contextmanager
def _load_program_scope(main=None, startup=None, scope=None):
prog = main if main else paddle.fluid.Program()
startup_prog = startup if startup else paddle.fluid.Program()
scope = scope if scope else paddle.fluid.core.Scope()
with paddle.fluid.scope_guard(scope):
with paddle.fluid.program_guard(prog, startup_prog):
with paddle.fluid.unique_name.guard():
yield
def _get_valid_program(main_program): def _get_valid_program(main_program):
if main_program is None: if main_program is None:
main_program = default_main_program() main_program = default_main_program()
...@@ -1749,12 +1761,17 @@ def load(program, model_path, executor=None, var_list=None): ...@@ -1749,12 +1761,17 @@ def load(program, model_path, executor=None, var_list=None):
set_var(v, load_dict[v.name]) set_var(v, load_dict[v.name])
def load_program_state(model_path): def load_program_state(model_path, var_list=None):
""" """
Load program state from local file Load program state from local file
Args: Args:
model_path(str): The file prefix store the program model_path(str): The file prefix store the program
var_list(list, optional): The variable list to load saved with
[ save_params, save_persistables, save_vars ].
Default: None.
The var_list is only used to get name,
will not be modified.
Returns: Returns:
state_dict(dict): the dict store Parameter and optimizer information state_dict(dict): the dict store Parameter and optimizer information
...@@ -1775,14 +1792,94 @@ def load_program_state(model_path): ...@@ -1775,14 +1792,94 @@ def load_program_state(model_path):
program_state = fluid.load_program_state( "./temp") program_state = fluid.load_program_state( "./temp")
""" """
parameter_file_name = model_path + ".pdparams" model_prefix = model_path
if model_prefix.endswith(".pdparams"):
model_prefix = model_prefix[:-9]
elif model_prefix.endswith(".pdopt"):
model_prefix = model_prefix[:-6]
elif model_prefix.endswith(".pdmodel"):
model_prefix = model_prefix[:-8]
parameter_file_name = model_prefix + ".pdparams"
if not os.path.exists(parameter_file_name):
# model file saved with fluid.save is not found, try to load model file saved with
# [save_vars, save_params, save_persistables]
_logger.warning(
"{} not found, try to load model file saved with [ save_params, save_persistables, save_vars ]".
format(parameter_file_name))
var_name_list = []
if var_list is None and os.path.isfile(model_path):
raise ValueError(
"var_list can not be None when model_path is a file type")
for root, dirs, files in os.walk(model_path, topdown=False):
for f in files:
file_path = os.path.join(root, f)
var_temp_name = os.path.relpath(file_path, model_path)
var_temp_name = var_temp_name.replace("\\", "/")
var_name_list.append(var_temp_name)
with _load_program_scope():
load_prog = Program()
load_block = load_prog.global_block()
def clone_var_to_block(block, var):
if not isinstance(var, Variable):
raise TypeError("value in var_list must be variable")
return block.create_var(
name=var.name,
shape=var.shape,
dtype=var.dtype,
type=var.type,
lod_level=var.lod_level
if var.desc.type() == core.VarDesc.VarType.LOD_TENSOR else
None,
persistable=True)
loaded_var_list = []
if var_list is not None:
for var in var_list:
loaded_var_list.append(clone_var_to_block(load_block, var))
else:
for var_name in var_name_list:
loaded_var_list.append(
load_block.create_var(
name=var_name, persistable=True))
place = paddle.fluid.CPUPlace()
exe = paddle.fluid.Executor(place)
try:
if os.path.isfile(model_path):
dir_name, file_name = os.path.split(model_path)
else:
dir_name = model_path
file_name = None
load_vars(
executor=exe,
dirname=dir_name,
vars=loaded_var_list,
filename=file_name)
except:
raise RuntimeError(
"Failed to load model file , please make sure model file is saved with the "
"following APIs: save_params, save_persistables, save_vars")
res_dict = {}
for var in loaded_var_list:
res_dict[var.name] = np.asarray(paddle.fluid.global_scope(
).find_var(var.name).get_tensor())
return res_dict
assert os.path.exists(parameter_file_name), \ assert os.path.exists(parameter_file_name), \
"Parameter file [{}] not exits".format(parameter_file_name) "Parameter file [{}] not exits".format(parameter_file_name)
with open(parameter_file_name, 'rb') as f: with open(parameter_file_name, 'rb') as f:
para_dict = pickle.load(f) para_dict = pickle.load(f)
opt_file_name = model_path + ".pdopt" opt_file_name = model_prefix + ".pdopt"
if os.path.exists(opt_file_name): if os.path.exists(opt_file_name):
with open(opt_file_name, 'rb') as f: with open(opt_file_name, 'rb') as f:
opti_dict = pickle.load(f) opti_dict = pickle.load(f)
......
...@@ -625,6 +625,16 @@ class TestProgramStatePartial(unittest.TestCase): ...@@ -625,6 +625,16 @@ class TestProgramStatePartial(unittest.TestCase):
#fluid.load(test_program, "./test_1", None ) #fluid.load(test_program, "./test_1", None )
program_state = fluid.load_program_state( program_state = fluid.load_program_state(
os.path.join('some_dir', 'test_1')) os.path.join('some_dir', 'test_1'))
program_state_1 = fluid.load_program_state(
os.path.join('some_dir', 'test_1.pdparams'))
program_state_2 = fluid.load_program_state(
os.path.join('some_dir', 'test_1.pdopt'))
program_state_3 = fluid.load_program_state(
os.path.join('some_dir', 'test_1.pdmodel'))
fluid.set_program_state(test_program, program_state) fluid.set_program_state(test_program, program_state)
for var in test_program.list_vars(): for var in test_program.list_vars():
...@@ -634,6 +644,66 @@ class TestProgramStatePartial(unittest.TestCase): ...@@ -634,6 +644,66 @@ class TestProgramStatePartial(unittest.TestCase):
base_t = base_map[var.name] base_t = base_map[var.name]
self.assertTrue(np.array_equal(new_t, base_t)) self.assertTrue(np.array_equal(new_t, base_t))
# check 1
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
ten = fluid.global_scope().find_var(var.name).get_tensor()
ten.set(np.zeros_like(np.array(ten)), place)
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been set to zero
self.assertTrue(np.sum(np.abs(new_t)) == 0)
fluid.set_program_state(test_program, program_state_1)
for var in test_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
base_t = base_map[var.name]
self.assertTrue(np.array_equal(new_t, base_t))
# check 2
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
ten = fluid.global_scope().find_var(var.name).get_tensor()
ten.set(np.zeros_like(np.array(ten)), place)
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been set to zero
self.assertTrue(np.sum(np.abs(new_t)) == 0)
fluid.set_program_state(test_program, program_state_2)
for var in test_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
base_t = base_map[var.name]
self.assertTrue(np.array_equal(new_t, base_t))
# check 3
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
ten = fluid.global_scope().find_var(var.name).get_tensor()
ten.set(np.zeros_like(np.array(ten)), place)
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been set to zero
self.assertTrue(np.sum(np.abs(new_t)) == 0)
fluid.set_program_state(test_program, program_state_3)
for var in test_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
base_t = base_map[var.name]
self.assertTrue(np.array_equal(new_t, base_t))
class TestVariableInit(unittest.TestCase): class TestVariableInit(unittest.TestCase):
def test_variable_init(self): def test_variable_init(self):
...@@ -984,5 +1054,247 @@ class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): ...@@ -984,5 +1054,247 @@ class TestLoadFromOldInterfaceSingleFile(unittest.TestCase):
all_var_list + [temp_var]) all_var_list + [temp_var])
class TestProgramStateOldSave(unittest.TestCase):
def test_ptb_rnn_cpu_float32(self):
seed = 90
hidden_size = 10
vocab_size = 1000
num_layers = 1
num_steps = 3
init_scale = 0.1
batch_size = 4
batch_num = 200
with new_program_scope():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
num_steps=num_steps,
init_scale=init_scale)
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
sgd = Adam(learning_rate=1e-3)
x = fluid.layers.data(
name="x", shape=[-1, num_steps], dtype='int64')
y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
init_hidden = fluid.layers.data(
name="init_hidden", shape=[1], dtype='float32')
init_cell = fluid.layers.data(
name="init_cell", shape=[1], dtype='float32')
static_loss, static_last_hidden, static_last_cell = ptb_model(
x, y, init_hidden, init_cell)
test_program = fluid.default_main_program().clone(for_test=True)
add_1 = fluid.layers.fc(static_last_hidden,
size=hidden_size,
num_flatten_dims=2,
bias_attr=False)
sgd.minimize(static_loss)
static_param_updated = dict()
static_param_init = dict()
out = exe.run(framework.default_startup_program())
static_loss_value = None
static_last_cell_value = None
static_last_hidden_value = None
for i in range(batch_num):
x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, 1))
init_hidden_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
init_cell_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
fetch_list = [static_loss, static_last_hidden, static_last_cell]
out = exe.run(fluid.default_main_program(),
feed={
"x": x_data,
"y": y_data,
"init_hidden": init_hidden_data,
"init_cell": init_cell_data
},
fetch_list=fetch_list)
static_loss_value = out[0]
static_last_hidden_value = out[1]
static_last_cell_value = out[2]
# get value before save
main_program = framework.default_main_program()
base_map = {}
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been update
self.assertTrue(np.sum(np.abs(t)) != 0)
base_map[var.name] = t
fluid.io.save_persistables(exe, "test_program_1", main_program)
# set var to zero
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
ten = fluid.global_scope().find_var(var.name).get_tensor()
ten.set(np.zeros_like(np.array(ten)), place)
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been set to zero
self.assertTrue(np.sum(np.abs(new_t)) == 0)
#fluid.load(test_program, "./test_1", None )
program_state = fluid.load_program_state("test_program_1")
fluid.set_program_state(main_program, program_state)
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
base_t = base_map[var.name]
self.assertTrue(np.array_equal(new_t, base_t))
class TestProgramStateOldSaveSingleModel(unittest.TestCase):
def test_ptb_rnn_cpu_float32(self):
seed = 90
hidden_size = 10
vocab_size = 1000
num_layers = 1
num_steps = 3
init_scale = 0.1
batch_size = 4
batch_num = 200
with new_program_scope():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
num_steps=num_steps,
init_scale=init_scale)
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
sgd = Adam(learning_rate=1e-3)
x = fluid.layers.data(
name="x", shape=[-1, num_steps], dtype='int64')
y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
init_hidden = fluid.layers.data(
name="init_hidden", shape=[1], dtype='float32')
init_cell = fluid.layers.data(
name="init_cell", shape=[1], dtype='float32')
static_loss, static_last_hidden, static_last_cell = ptb_model(
x, y, init_hidden, init_cell)
test_program = fluid.default_main_program().clone(for_test=True)
add_1 = fluid.layers.fc(static_last_hidden,
size=hidden_size,
num_flatten_dims=2,
bias_attr=False)
sgd.minimize(static_loss)
static_param_updated = dict()
static_param_init = dict()
out = exe.run(framework.default_startup_program())
static_loss_value = None
static_last_cell_value = None
static_last_hidden_value = None
for i in range(batch_num):
x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, 1))
init_hidden_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
init_cell_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
fetch_list = [static_loss, static_last_hidden, static_last_cell]
out = exe.run(fluid.default_main_program(),
feed={
"x": x_data,
"y": y_data,
"init_hidden": init_hidden_data,
"init_cell": init_cell_data
},
fetch_list=fetch_list)
static_loss_value = out[0]
static_last_hidden_value = out[1]
static_last_cell_value = out[2]
# get value before save
main_program = framework.default_main_program()
base_map = {}
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been update
self.assertTrue(np.sum(np.abs(t)) != 0)
base_map[var.name] = t
fluid.io.save_persistables(
exe, "test_program_2", main_program, filename="model_1")
# set var to zero
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
ten = fluid.global_scope().find_var(var.name).get_tensor()
ten.set(np.zeros_like(np.array(ten)), place)
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been set to zero
self.assertTrue(np.sum(np.abs(new_t)) == 0)
#fluid.load(test_program, "./test_1", None )
program_state = fluid.load_program_state(
os.path.join("test_program_2", "model_1"),
var_list=fluid.io.get_program_persistable_vars(main_program))
fluid.set_program_state(main_program, program_state)
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
base_t = base_map[var.name]
self.assertTrue(np.array_equal(new_t, base_t))
with self.assertRaises(ValueError):
fluid.load_program_state(
os.path.join("test_program_2", "model_1"))
with self.assertRaises(TypeError):
fluid.load_program_state(
os.path.join("test_program_2", "model_1"),
var_list=["str"])
with self.assertRaises(RuntimeError):
fluid.load_program_state(
os.path.join("test_program_2", "model_1"),
var_list=[
main_program.global_block().create_var(
name="fake_var_name", persistable=True)
])
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册