未验证 提交 d555c700 编写于 作者: C Chen Weihang 提交者: GitHub

Fix some problems in StaticModelRunner (#24082) (#24163)

* adapt old version pretrain model load, test=develop

* fix infer error & multiple input error, test=develop
上级 b0c17e76
......@@ -168,8 +168,13 @@ class StaticModelRunner(layers.Layer):
super(StaticModelRunner, self).__init__()
# Step 0. key variable definitions
self._load_program_desc = None
self._program_desc = None
# loaded inference program desc
self._infer_program_desc = None
# recovered train program desc
self._train_program_desc = None
# StaticModelRunner executed program desc,
# switch infer or train by train() and eval()
self._trace_program_desc = None
self._inner_scope = core.Scope()
# the layer outputs var desc
self._output_descs = []
......@@ -182,46 +187,47 @@ class StaticModelRunner(layers.Layer):
# Step 1. load program desc from disk
# the saved model hold feed, fetch & scale op, no need, can be remove
self._load_program_desc = self._load_static_model(model_dir,
model_filename)
self._infer_program_desc = self._load_static_model(model_dir,
model_filename)
# Step 2. set all `is_test` attributes to False
self._change_is_test_status(False)
# Step 3. load all parameters
# Step 2. load all parameters
self._load_persisitable_dict(model_dir, params_filename)
# Step 4. generate backwar program desc
self._program_desc = self._append_backward_desc()
# Step 3. generate backwar program desc
self._train_program_desc = self._append_backward_desc()
# Step 5. recheck parameters stop gradients
# Step 4. recheck parameters stop gradients
self._recheck_stop_gradients()
# Step 5. set default mode to train
self.train()
def train(self):
self._is_test = False
self._change_is_test_status(False)
self._trace_program_desc = self._train_program_desc
def eval(self):
self._is_test = True
self._change_is_test_status(True)
self._trace_program_desc = self._infer_program_desc
def forward(self, inputs):
def forward(self, *args):
"""
Executed forward part of StaticModelRunner Layer.
Generally execute directly using the Layer object.
Args:
inputs(np.ndarray|Variable|list[np.ndarray|Variable]): the inputs of StaticModelRunner
args(tuple(np.ndarray|Variable)): the inputs of StaticModelRunner.
The order of input variables needs to be the same as the order
of feed variables when using `save_inference_model` to save model.
Returns:
Variable|list[Variable]: The forward outputs of StaticModelRunner Layer.
If there is only one output, return Variable;
if there are multiple outputs, return list[Variable].
"""
# Step 1. prepare inputs, outputs, attrs
if not isinstance(inputs, (list, tuple)):
inputs = [inputs]
input_vars = []
for i, value in enumerate(inputs):
for i, value in enumerate(args):
if not isinstance(value, (np.ndarray, core.VarBase)):
raise TypeError(
"The type of inputs.value in StaticModelRunner.forward must be numpy array or Variable(VarBase), but received %s."
......@@ -265,9 +271,9 @@ class StaticModelRunner(layers.Layer):
outputs={'Out': output_vars,
'OutScope': tmp_scope_vec},
attrs={
'global_block': self._program_desc.block(0),
'global_block': self._trace_program_desc.block(0),
'start_op_index': 0,
'end_op_index': self._load_program_desc.block(0).op_size(),
'end_op_index': self._infer_program_desc.block(0).op_size(),
'is_test': self._is_test
})
......@@ -280,7 +286,7 @@ class StaticModelRunner(layers.Layer):
# be user wanted result.
for param in params:
grad_name = param.name + core.grad_var_suffix()
grad_var = self._program_desc.block(0).find_var(
grad_var = self._trace_program_desc.block(0).find_var(
cpt.to_bytes(grad_name))
# NOTE: cannot find var desc maybe no problem, such as in batch_norm
if grad_var is None:
......@@ -334,11 +340,15 @@ class StaticModelRunner(layers.Layer):
self._output_names.append(cpt.to_bytes(op.input('X')[0]))
self._output_descs.append(
root_block.find_var(cpt.to_bytes(op.input('X')[0])))
elif op.type() == 'fetch' and op.input('X')[0].startswith(
'save_infer_model/scale_'):
elif op.type() == 'fetch':
ops_to_remove.append(i)
fetch_var_name = cpt.to_bytes(op.output('Out')[0])
root_block._remove_var(fetch_var_name)
# NOTE: some old pre-train models have no extra scale_op
if not op.input('X')[0].startswith('save_infer_model/scale_'):
self._output_names.append(cpt.to_bytes(op.input('X')[0]))
self._output_descs.append(
root_block.find_var(cpt.to_bytes(op.input('X')[0])))
else:
if op.has_attr("op_callstack"):
op.remove_attr("op_callstack")
......@@ -346,14 +356,20 @@ class StaticModelRunner(layers.Layer):
for op_idx in reversed(ops_to_remove):
root_block._remove_op(op_idx, op_idx + 1)
# NOTE: reverse feed vars
self._input_names.reverse()
return program_desc
@switch_to_static_graph
def _append_backward_desc(self):
assert self._load_program_desc is not None, "The StaticModelRunner not initialized properly."
program_desc_copy = core.ProgramDesc(self._load_program_desc)
assert self._infer_program_desc is not None, "The StaticModelRunner not initialized properly."
program_desc_copy = core.ProgramDesc(self._infer_program_desc)
# Step 1. set all `is_test` attributes to False
self._change_is_test_status(program_desc_copy, False)
# Step 1. prepare program and related var
# Step 2. prepare program and related var
# NOTE: To reuse backward interfaces, build Program firstly.
# Originally, there is no need to build a program, but need to almost
# rewrite a series of methods for append_backward for program_desc.
......@@ -366,15 +382,15 @@ class StaticModelRunner(layers.Layer):
for out in self._output_descs:
targets.append(program.global_block().var(out.name()))
# Step 2. append backward
# Step 3. append backward
backward.gradients(targets=targets, inputs=[])
return program.desc
def _load_persisitable_dict(self, model_dir, params_filename=None):
load_dirname = os.path.normpath(model_dir)
assert self._load_program_desc is not None, "The StaticModelRunner not initialized properly."
assert self._infer_program_desc is not None, "The StaticModelRunner not initialized properly."
persis_vars = self._get_persis_vars(self._load_program_desc)
persis_vars = self._get_persis_vars(self._infer_program_desc)
load_var_map = {}
for each_var in persis_vars:
orig_each_name = each_var.name()
......@@ -425,12 +441,12 @@ class StaticModelRunner(layers.Layer):
self._param_names.append(param.name)
def _recheck_stop_gradients(self):
assert self._program_desc is not None, "The StaticModelRunner not initialized properly."
assert self._train_program_desc is not None, "The StaticModelRunner not initialized properly."
# NOTE: After loading the model, the stop_gradient information
# of the original variable is lost, but if a parameter does not
# have a corresponding @GRAD variable in the backward program,
# it can be said that it is also stop_gradient
all_var_names = self._get_all_var_names(self._program_desc)
all_var_names = self._get_all_var_names(self._train_program_desc)
for param_name in self._parameters:
param_grad_name = param_name + core.grad_var_suffix()
if param_grad_name not in all_var_names:
......@@ -472,19 +488,19 @@ class StaticModelRunner(layers.Layer):
return var_desc.persistable()
def _is_parameter(self, persis_var_desc):
assert self._load_program_desc is not None, "The StaticModelRunner not initialized properly."
assert self._infer_program_desc is not None, "The StaticModelRunner not initialized properly."
# 1. firstly, param should be input of op
input_ops = [] # op can be repeated
for block_idx in six.moves.range(self._load_program_desc.num_blocks()):
block = self._load_program_desc.block(block_idx)
for block_idx in six.moves.range(self._infer_program_desc.num_blocks()):
block = self._infer_program_desc.block(block_idx)
for op_idx in six.moves.range(block.op_size()):
op = block.op(op_idx)
# NOTE: parameter is the input of a certain op
if persis_var_desc.name() in op.input_arg_names():
input_ops.append(op)
# 2. secondly, param should not be output of op or be same op's output
for block_idx in six.moves.range(self._load_program_desc.num_blocks()):
block = self._load_program_desc.block(block_idx)
for block_idx in six.moves.range(self._infer_program_desc.num_blocks()):
block = self._infer_program_desc.block(block_idx)
for op_idx in six.moves.range(block.op_size()):
op = block.op(op_idx)
if persis_var_desc.name() in op.output_arg_names():
......@@ -495,11 +511,10 @@ class StaticModelRunner(layers.Layer):
return False
return True
def _change_is_test_status(self, is_test):
def _change_is_test_status(self, program_desc, is_test):
# change all `is_test` attributes
assert self._load_program_desc is not None, "The StaticModelRunner not initialized properly."
for i in six.moves.range(self._load_program_desc.num_blocks()):
block = self._load_program_desc.block(i)
for i in six.moves.range(program_desc.num_blocks()):
block = program_desc.block(i)
for j in six.moves.range(block.op_size()):
op = block.op(j)
if op.has_attr('is_test'):
......@@ -520,8 +535,8 @@ class StaticModelRunner(layers.Layer):
old_name = param_desc.name()
new_name = self._append_loaded_suffix(param_desc.name())
param_desc.set_name(new_name)
for block_idx in six.moves.range(self._load_program_desc.num_blocks()):
block = self._load_program_desc.block(block_idx)
for block_idx in six.moves.range(self._infer_program_desc.num_blocks()):
block = self._infer_program_desc.block(block_idx)
for op_idx in six.moves.range(block.op_size()):
op = block.op(op_idx)
op._rename_input(old_name, new_name)
......
......@@ -149,7 +149,7 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase):
label = data[1]
label.stop_gradient = True
cost = mnist(inputs=img)
cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
......@@ -229,7 +229,67 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase):
return static_x_data, static_out, static_param_init_value, static_param_value
def test_mnist_no_params_filename(self):
def load_and_infer_dygraph(self):
place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
with fluid.dygraph.guard(place):
fluid.default_main_program().random_seed = self.seed
mnist = fluid.dygraph.static_runner.StaticModelRunner(
model_dir=self.save_dirname, model_filename=self.model_filename)
train_reader = paddle.batch(
self.reader_decorator(paddle.dataset.mnist.test()),
batch_size=self.batch_size,
drop_last=True)
train_loader = fluid.io.DataLoader.from_generator(capacity=10)
train_loader.set_sample_list_generator(train_reader, places=place)
mnist.eval()
for batch_id, data in enumerate(train_loader()):
img = data[0]
cost = mnist(img)
if batch_id >= 1:
break
dy_x_data = img.numpy()
dy_out = cost.numpy()
return dy_x_data, dy_out
def load_and_infer_static(self):
with new_program_scope():
place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
) else fluid.CPUPlace()
exe = fluid.Executor(place)
[infer_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(self.save_dirname,
exe)
infer_program.random_seed = self.seed
train_reader = paddle.batch(
self.reader_decorator(paddle.dataset.mnist.test()),
batch_size=self.batch_size,
drop_last=True)
for batch_id, data in enumerate(train_reader()):
static_x_data = np.array([x[0] for x in data])
out = exe.run(infer_program,
feed={feed_target_names[0]: static_x_data},
fetch_list=fetch_targets)
if batch_id >= 1:
break
static_param_value = {}
static_out = out[0]
return static_x_data, static_out
def test_mnist_train_no_params_filename(self):
self.save_dirname = "mnist.inference.model.noname"
self.model_filename = None
self.params_filename = None
......@@ -257,7 +317,7 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase):
key += core.loaded_var_suffix()
self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-4))
def test_mnist_with_params_filename(self):
def test_mnist_train_with_params_filename(self):
self.save_dirname = "mnist.inference.model"
self.model_filename = "mnist.model"
self.params_filename = "mnist.params"
......@@ -285,6 +345,26 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase):
key += core.loaded_var_suffix()
self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-4))
def test_mnist_infer_no_params_filename(self):
self.save_dirname = "mnist.inference.model.noname"
self.model_filename = None
self.params_filename = None
# Phase 1. run and save static model
self.train_and_save_model()
# Phase 2. load model & train dygraph
dy_x_data, dy_out = \
self.load_and_infer_dygraph()
static_x_data, static_out = \
self.load_and_infer_static()
# Phase 3. compare
self.assertTrue(np.array_equal(static_x_data, dy_x_data))
np.testing.assert_array_almost_equal(static_out, dy_out)
self.assertTrue(np.allclose(static_out, dy_out, atol=1e-04))
if __name__ == '__main__':
unittest.main()
......@@ -136,7 +136,7 @@ class TestImperativeStaticModelRunnerWhile(unittest.TestCase):
label = data[1]
label.stop_gradient = True
cost = while_net(inputs=img)
cost = while_net(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册