未验证 提交 2057f76a 编写于 作者: H hong 提交者: GitHub

Enhance load program state (#22546) (#22589)

* enhance load program state; test=develop

* optimize commet; test=develop
上级 3f4687b8
......@@ -20,6 +20,7 @@ import warnings
import six
import logging
import pickle
import contextlib
from functools import reduce
import numpy as np
......@@ -180,6 +181,17 @@ def _clone_var_in_block_(block, var):
persistable=True)
@contextlib.contextmanager
def _load_program_scope(main=None, startup=None, scope=None):
prog = main if main else paddle.fluid.Program()
startup_prog = startup if startup else paddle.fluid.Program()
scope = scope if scope else paddle.fluid.core.Scope()
with paddle.fluid.scope_guard(scope):
with paddle.fluid.program_guard(prog, startup_prog):
with paddle.fluid.unique_name.guard():
yield
def _get_valid_program(main_program):
if main_program is None:
main_program = default_main_program()
......@@ -1749,12 +1761,17 @@ def load(program, model_path, executor=None, var_list=None):
set_var(v, load_dict[v.name])
def load_program_state(model_path):
def load_program_state(model_path, var_list=None):
"""
Load program state from local file
Args:
model_path(str): The file prefix store the program
var_list(list, optional): The variable list to load saved with
[ save_params, save_persistables, save_vars ].
Default: None.
The var_list is only used to get name,
will not be modified.
Returns:
state_dict(dict): the dict store Parameter and optimizer information
......@@ -1775,14 +1792,94 @@ def load_program_state(model_path):
program_state = fluid.load_program_state( "./temp")
"""
parameter_file_name = model_path + ".pdparams"
model_prefix = model_path
if model_prefix.endswith(".pdparams"):
model_prefix = model_prefix[:-9]
elif model_prefix.endswith(".pdopt"):
model_prefix = model_prefix[:-6]
elif model_prefix.endswith(".pdmodel"):
model_prefix = model_prefix[:-8]
parameter_file_name = model_prefix + ".pdparams"
if not os.path.exists(parameter_file_name):
# model file saved with fluid.save is not found, try to load model file saved with
# [save_vars, save_params, save_persistables]
_logger.warning(
"{} not found, try to load model file saved with [ save_params, save_persistables, save_vars ]".
format(parameter_file_name))
var_name_list = []
if var_list is None and os.path.isfile(model_path):
raise ValueError(
"var_list can not be None when model_path is a file type")
for root, dirs, files in os.walk(model_path, topdown=False):
for f in files:
file_path = os.path.join(root, f)
var_temp_name = os.path.relpath(file_path, model_path)
var_temp_name = var_temp_name.replace("\\", "/")
var_name_list.append(var_temp_name)
with _load_program_scope():
load_prog = Program()
load_block = load_prog.global_block()
def clone_var_to_block(block, var):
if not isinstance(var, Variable):
raise TypeError("value in var_list must be variable")
return block.create_var(
name=var.name,
shape=var.shape,
dtype=var.dtype,
type=var.type,
lod_level=var.lod_level
if var.desc.type() == core.VarDesc.VarType.LOD_TENSOR else
None,
persistable=True)
loaded_var_list = []
if var_list is not None:
for var in var_list:
loaded_var_list.append(clone_var_to_block(load_block, var))
else:
for var_name in var_name_list:
loaded_var_list.append(
load_block.create_var(
name=var_name, persistable=True))
place = paddle.fluid.CPUPlace()
exe = paddle.fluid.Executor(place)
try:
if os.path.isfile(model_path):
dir_name, file_name = os.path.split(model_path)
else:
dir_name = model_path
file_name = None
load_vars(
executor=exe,
dirname=dir_name,
vars=loaded_var_list,
filename=file_name)
except:
raise RuntimeError(
"Failed to load model file , please make sure model file is saved with the "
"following APIs: save_params, save_persistables, save_vars")
res_dict = {}
for var in loaded_var_list:
res_dict[var.name] = np.asarray(paddle.fluid.global_scope(
).find_var(var.name).get_tensor())
return res_dict
assert os.path.exists(parameter_file_name), \
"Parameter file [{}] not exits".format(parameter_file_name)
with open(parameter_file_name, 'rb') as f:
para_dict = pickle.load(f)
opt_file_name = model_path + ".pdopt"
opt_file_name = model_prefix + ".pdopt"
if os.path.exists(opt_file_name):
with open(opt_file_name, 'rb') as f:
opti_dict = pickle.load(f)
......
......@@ -625,6 +625,16 @@ class TestProgramStatePartial(unittest.TestCase):
#fluid.load(test_program, "./test_1", None )
program_state = fluid.load_program_state(
os.path.join('some_dir', 'test_1'))
program_state_1 = fluid.load_program_state(
os.path.join('some_dir', 'test_1.pdparams'))
program_state_2 = fluid.load_program_state(
os.path.join('some_dir', 'test_1.pdopt'))
program_state_3 = fluid.load_program_state(
os.path.join('some_dir', 'test_1.pdmodel'))
fluid.set_program_state(test_program, program_state)
for var in test_program.list_vars():
......@@ -634,6 +644,66 @@ class TestProgramStatePartial(unittest.TestCase):
base_t = base_map[var.name]
self.assertTrue(np.array_equal(new_t, base_t))
# check 1
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
ten = fluid.global_scope().find_var(var.name).get_tensor()
ten.set(np.zeros_like(np.array(ten)), place)
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been set to zero
self.assertTrue(np.sum(np.abs(new_t)) == 0)
fluid.set_program_state(test_program, program_state_1)
for var in test_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
base_t = base_map[var.name]
self.assertTrue(np.array_equal(new_t, base_t))
# check 2
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
ten = fluid.global_scope().find_var(var.name).get_tensor()
ten.set(np.zeros_like(np.array(ten)), place)
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been set to zero
self.assertTrue(np.sum(np.abs(new_t)) == 0)
fluid.set_program_state(test_program, program_state_2)
for var in test_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
base_t = base_map[var.name]
self.assertTrue(np.array_equal(new_t, base_t))
# check 3
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
ten = fluid.global_scope().find_var(var.name).get_tensor()
ten.set(np.zeros_like(np.array(ten)), place)
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been set to zero
self.assertTrue(np.sum(np.abs(new_t)) == 0)
fluid.set_program_state(test_program, program_state_3)
for var in test_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
base_t = base_map[var.name]
self.assertTrue(np.array_equal(new_t, base_t))
class TestVariableInit(unittest.TestCase):
def test_variable_init(self):
......@@ -984,5 +1054,247 @@ class TestLoadFromOldInterfaceSingleFile(unittest.TestCase):
all_var_list + [temp_var])
class TestProgramStateOldSave(unittest.TestCase):
def test_ptb_rnn_cpu_float32(self):
seed = 90
hidden_size = 10
vocab_size = 1000
num_layers = 1
num_steps = 3
init_scale = 0.1
batch_size = 4
batch_num = 200
with new_program_scope():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
num_steps=num_steps,
init_scale=init_scale)
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
sgd = Adam(learning_rate=1e-3)
x = fluid.layers.data(
name="x", shape=[-1, num_steps], dtype='int64')
y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
init_hidden = fluid.layers.data(
name="init_hidden", shape=[1], dtype='float32')
init_cell = fluid.layers.data(
name="init_cell", shape=[1], dtype='float32')
static_loss, static_last_hidden, static_last_cell = ptb_model(
x, y, init_hidden, init_cell)
test_program = fluid.default_main_program().clone(for_test=True)
add_1 = fluid.layers.fc(static_last_hidden,
size=hidden_size,
num_flatten_dims=2,
bias_attr=False)
sgd.minimize(static_loss)
static_param_updated = dict()
static_param_init = dict()
out = exe.run(framework.default_startup_program())
static_loss_value = None
static_last_cell_value = None
static_last_hidden_value = None
for i in range(batch_num):
x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, 1))
init_hidden_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
init_cell_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
fetch_list = [static_loss, static_last_hidden, static_last_cell]
out = exe.run(fluid.default_main_program(),
feed={
"x": x_data,
"y": y_data,
"init_hidden": init_hidden_data,
"init_cell": init_cell_data
},
fetch_list=fetch_list)
static_loss_value = out[0]
static_last_hidden_value = out[1]
static_last_cell_value = out[2]
# get value before save
main_program = framework.default_main_program()
base_map = {}
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been update
self.assertTrue(np.sum(np.abs(t)) != 0)
base_map[var.name] = t
fluid.io.save_persistables(exe, "test_program_1", main_program)
# set var to zero
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
ten = fluid.global_scope().find_var(var.name).get_tensor()
ten.set(np.zeros_like(np.array(ten)), place)
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been set to zero
self.assertTrue(np.sum(np.abs(new_t)) == 0)
#fluid.load(test_program, "./test_1", None )
program_state = fluid.load_program_state("test_program_1")
fluid.set_program_state(main_program, program_state)
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
base_t = base_map[var.name]
self.assertTrue(np.array_equal(new_t, base_t))
class TestProgramStateOldSaveSingleModel(unittest.TestCase):
def test_ptb_rnn_cpu_float32(self):
seed = 90
hidden_size = 10
vocab_size = 1000
num_layers = 1
num_steps = 3
init_scale = 0.1
batch_size = 4
batch_num = 200
with new_program_scope():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size,
vocab_size=vocab_size,
num_layers=num_layers,
num_steps=num_steps,
init_scale=init_scale)
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
sgd = Adam(learning_rate=1e-3)
x = fluid.layers.data(
name="x", shape=[-1, num_steps], dtype='int64')
y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
init_hidden = fluid.layers.data(
name="init_hidden", shape=[1], dtype='float32')
init_cell = fluid.layers.data(
name="init_cell", shape=[1], dtype='float32')
static_loss, static_last_hidden, static_last_cell = ptb_model(
x, y, init_hidden, init_cell)
test_program = fluid.default_main_program().clone(for_test=True)
add_1 = fluid.layers.fc(static_last_hidden,
size=hidden_size,
num_flatten_dims=2,
bias_attr=False)
sgd.minimize(static_loss)
static_param_updated = dict()
static_param_init = dict()
out = exe.run(framework.default_startup_program())
static_loss_value = None
static_last_cell_value = None
static_last_hidden_value = None
for i in range(batch_num):
x_data = np.arange(12).reshape(4, 3).astype('int64')
y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, 1))
init_hidden_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
init_cell_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
fetch_list = [static_loss, static_last_hidden, static_last_cell]
out = exe.run(fluid.default_main_program(),
feed={
"x": x_data,
"y": y_data,
"init_hidden": init_hidden_data,
"init_cell": init_cell_data
},
fetch_list=fetch_list)
static_loss_value = out[0]
static_last_hidden_value = out[1]
static_last_cell_value = out[2]
# get value before save
main_program = framework.default_main_program()
base_map = {}
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been update
self.assertTrue(np.sum(np.abs(t)) != 0)
base_map[var.name] = t
fluid.io.save_persistables(
exe, "test_program_2", main_program, filename="model_1")
# set var to zero
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
ten = fluid.global_scope().find_var(var.name).get_tensor()
ten.set(np.zeros_like(np.array(ten)), place)
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
# make sure all the paramerter or optimzier var have been set to zero
self.assertTrue(np.sum(np.abs(new_t)) == 0)
#fluid.load(test_program, "./test_1", None )
program_state = fluid.load_program_state(
os.path.join("test_program_2", "model_1"),
var_list=fluid.io.get_program_persistable_vars(main_program))
fluid.set_program_state(main_program, program_state)
for var in main_program.list_vars():
if isinstance(var, framework.Parameter) or var.persistable:
new_t = np.array(fluid.global_scope().find_var(var.name)
.get_tensor())
base_t = base_map[var.name]
self.assertTrue(np.array_equal(new_t, base_t))
with self.assertRaises(ValueError):
fluid.load_program_state(
os.path.join("test_program_2", "model_1"))
with self.assertRaises(TypeError):
fluid.load_program_state(
os.path.join("test_program_2", "model_1"),
var_list=["str"])
with self.assertRaises(RuntimeError):
fluid.load_program_state(
os.path.join("test_program_2", "model_1"),
var_list=[
main_program.global_block().create_var(
name="fake_var_name", persistable=True)
])
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册