From 9b3086cfb7507cf0882024ffa73d6aa17ed5626b Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Sun, 26 Apr 2020 13:45:48 +0800 Subject: [PATCH] Fix some problems in StaticModelRunner (#24082) * adapt old version pretrain model load, test=develop * fix infer error & multiple input error, test=develop --- python/paddle/fluid/dygraph/static_runner.py | 101 ++++++++++-------- .../test_imperative_static_runner_mnist.py | 86 ++++++++++++++- .../test_imperative_static_runner_while.py | 2 +- 3 files changed, 142 insertions(+), 47 deletions(-) diff --git a/python/paddle/fluid/dygraph/static_runner.py b/python/paddle/fluid/dygraph/static_runner.py index aff00b61e5c..e1aeb7e43cd 100644 --- a/python/paddle/fluid/dygraph/static_runner.py +++ b/python/paddle/fluid/dygraph/static_runner.py @@ -168,8 +168,13 @@ class StaticModelRunner(layers.Layer): super(StaticModelRunner, self).__init__() # Step 0. key variable definitions - self._load_program_desc = None - self._program_desc = None + # loaded inference program desc + self._infer_program_desc = None + # recovered train program desc + self._train_program_desc = None + # StaticModelRunner executed program desc, + # switch infer or train by train() and eval() + self._trace_program_desc = None self._inner_scope = core.Scope() # the layer outputs var desc self._output_descs = [] @@ -182,46 +187,47 @@ class StaticModelRunner(layers.Layer): # Step 1. load program desc from disk # the saved model hold feed, fetch & scale op, no need, can be remove - self._load_program_desc = self._load_static_model(model_dir, - model_filename) + self._infer_program_desc = self._load_static_model(model_dir, + model_filename) - # Step 2. set all `is_test` attributes to False - self._change_is_test_status(False) - - # Step 3. load all parameters + # Step 2. load all parameters self._load_persisitable_dict(model_dir, params_filename) - # Step 4. generate backwar program desc - self._program_desc = self._append_backward_desc() + # Step 3. generate backwar program desc + self._train_program_desc = self._append_backward_desc() - # Step 5. recheck parameters stop gradients + # Step 4. recheck parameters stop gradients self._recheck_stop_gradients() + # Step 5. set default mode to train + self.train() + def train(self): self._is_test = False - self._change_is_test_status(False) + self._trace_program_desc = self._train_program_desc def eval(self): self._is_test = True - self._change_is_test_status(True) + self._trace_program_desc = self._infer_program_desc - def forward(self, inputs): + def forward(self, *args): """ Executed forward part of StaticModelRunner Layer. Generally execute directly using the Layer object. Args: - inputs(np.ndarray|Variable|list[np.ndarray|Variable]): the inputs of StaticModelRunner + args(tuple(np.ndarray|Variable)): the inputs of StaticModelRunner. + The order of input variables needs to be the same as the order + of feed variables when using `save_inference_model` to save model. Returns: Variable|list[Variable]: The forward outputs of StaticModelRunner Layer. + If there is only one output, return Variable; + if there are multiple outputs, return list[Variable]. """ # Step 1. prepare inputs, outputs, attrs - if not isinstance(inputs, (list, tuple)): - inputs = [inputs] - input_vars = [] - for i, value in enumerate(inputs): + for i, value in enumerate(args): if not isinstance(value, (np.ndarray, core.VarBase)): raise TypeError( "The type of inputs.value in StaticModelRunner.forward must be numpy array or Variable(VarBase), but received %s." @@ -265,9 +271,9 @@ class StaticModelRunner(layers.Layer): outputs={'Out': output_vars, 'OutScope': tmp_scope_vec}, attrs={ - 'global_block': self._program_desc.block(0), + 'global_block': self._trace_program_desc.block(0), 'start_op_index': 0, - 'end_op_index': self._load_program_desc.block(0).op_size(), + 'end_op_index': self._infer_program_desc.block(0).op_size(), 'is_test': self._is_test }) @@ -280,7 +286,7 @@ class StaticModelRunner(layers.Layer): # be user wanted result. for param in params: grad_name = param.name + core.grad_var_suffix() - grad_var = self._program_desc.block(0).find_var( + grad_var = self._trace_program_desc.block(0).find_var( cpt.to_bytes(grad_name)) # NOTE: cannot find var desc maybe no problem, such as in batch_norm if grad_var is None: @@ -334,11 +340,15 @@ class StaticModelRunner(layers.Layer): self._output_names.append(cpt.to_bytes(op.input('X')[0])) self._output_descs.append( root_block.find_var(cpt.to_bytes(op.input('X')[0]))) - elif op.type() == 'fetch' and op.input('X')[0].startswith( - 'save_infer_model/scale_'): + elif op.type() == 'fetch': ops_to_remove.append(i) fetch_var_name = cpt.to_bytes(op.output('Out')[0]) root_block._remove_var(fetch_var_name) + # NOTE: some old pre-train models have no extra scale_op + if not op.input('X')[0].startswith('save_infer_model/scale_'): + self._output_names.append(cpt.to_bytes(op.input('X')[0])) + self._output_descs.append( + root_block.find_var(cpt.to_bytes(op.input('X')[0]))) else: if op.has_attr("op_callstack"): op.remove_attr("op_callstack") @@ -346,14 +356,20 @@ class StaticModelRunner(layers.Layer): for op_idx in reversed(ops_to_remove): root_block._remove_op(op_idx, op_idx + 1) + # NOTE: reverse feed vars + self._input_names.reverse() + return program_desc @switch_to_static_graph def _append_backward_desc(self): - assert self._load_program_desc is not None, "The StaticModelRunner not initialized properly." - program_desc_copy = core.ProgramDesc(self._load_program_desc) + assert self._infer_program_desc is not None, "The StaticModelRunner not initialized properly." + program_desc_copy = core.ProgramDesc(self._infer_program_desc) + + # Step 1. set all `is_test` attributes to False + self._change_is_test_status(program_desc_copy, False) - # Step 1. prepare program and related var + # Step 2. prepare program and related var # NOTE: To reuse backward interfaces, build Program firstly. # Originally, there is no need to build a program, but need to almost # rewrite a series of methods for append_backward for program_desc. @@ -366,15 +382,15 @@ class StaticModelRunner(layers.Layer): for out in self._output_descs: targets.append(program.global_block().var(out.name())) - # Step 2. append backward + # Step 3. append backward backward.gradients(targets=targets, inputs=[]) return program.desc def _load_persisitable_dict(self, model_dir, params_filename=None): load_dirname = os.path.normpath(model_dir) - assert self._load_program_desc is not None, "The StaticModelRunner not initialized properly." + assert self._infer_program_desc is not None, "The StaticModelRunner not initialized properly." - persis_vars = self._get_persis_vars(self._load_program_desc) + persis_vars = self._get_persis_vars(self._infer_program_desc) load_var_map = {} for each_var in persis_vars: orig_each_name = each_var.name() @@ -425,12 +441,12 @@ class StaticModelRunner(layers.Layer): self._param_names.append(param.name) def _recheck_stop_gradients(self): - assert self._program_desc is not None, "The StaticModelRunner not initialized properly." + assert self._train_program_desc is not None, "The StaticModelRunner not initialized properly." # NOTE: After loading the model, the stop_gradient information # of the original variable is lost, but if a parameter does not # have a corresponding @GRAD variable in the backward program, # it can be said that it is also stop_gradient - all_var_names = self._get_all_var_names(self._program_desc) + all_var_names = self._get_all_var_names(self._train_program_desc) for param_name in self._parameters: param_grad_name = param_name + core.grad_var_suffix() if param_grad_name not in all_var_names: @@ -472,19 +488,19 @@ class StaticModelRunner(layers.Layer): return var_desc.persistable() def _is_parameter(self, persis_var_desc): - assert self._load_program_desc is not None, "The StaticModelRunner not initialized properly." + assert self._infer_program_desc is not None, "The StaticModelRunner not initialized properly." # 1. firstly, param should be input of op input_ops = [] # op can be repeated - for block_idx in six.moves.range(self._load_program_desc.num_blocks()): - block = self._load_program_desc.block(block_idx) + for block_idx in six.moves.range(self._infer_program_desc.num_blocks()): + block = self._infer_program_desc.block(block_idx) for op_idx in six.moves.range(block.op_size()): op = block.op(op_idx) # NOTE: parameter is the input of a certain op if persis_var_desc.name() in op.input_arg_names(): input_ops.append(op) # 2. secondly, param should not be output of op or be same op's output - for block_idx in six.moves.range(self._load_program_desc.num_blocks()): - block = self._load_program_desc.block(block_idx) + for block_idx in six.moves.range(self._infer_program_desc.num_blocks()): + block = self._infer_program_desc.block(block_idx) for op_idx in six.moves.range(block.op_size()): op = block.op(op_idx) if persis_var_desc.name() in op.output_arg_names(): @@ -495,11 +511,10 @@ class StaticModelRunner(layers.Layer): return False return True - def _change_is_test_status(self, is_test): + def _change_is_test_status(self, program_desc, is_test): # change all `is_test` attributes - assert self._load_program_desc is not None, "The StaticModelRunner not initialized properly." - for i in six.moves.range(self._load_program_desc.num_blocks()): - block = self._load_program_desc.block(i) + for i in six.moves.range(program_desc.num_blocks()): + block = program_desc.block(i) for j in six.moves.range(block.op_size()): op = block.op(j) if op.has_attr('is_test'): @@ -520,8 +535,8 @@ class StaticModelRunner(layers.Layer): old_name = param_desc.name() new_name = self._append_loaded_suffix(param_desc.name()) param_desc.set_name(new_name) - for block_idx in six.moves.range(self._load_program_desc.num_blocks()): - block = self._load_program_desc.block(block_idx) + for block_idx in six.moves.range(self._infer_program_desc.num_blocks()): + block = self._infer_program_desc.block(block_idx) for op_idx in six.moves.range(block.op_size()): op = block.op(op_idx) op._rename_input(old_name, new_name) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py index 17afadedc3e..afdab0148cb 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py @@ -149,7 +149,7 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): label = data[1] label.stop_gradient = True - cost = mnist(inputs=img) + cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) @@ -229,7 +229,67 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): return static_x_data, static_out, static_param_init_value, static_param_value - def test_mnist_no_params_filename(self): + def load_and_infer_dygraph(self): + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + with fluid.dygraph.guard(place): + fluid.default_main_program().random_seed = self.seed + + mnist = fluid.dygraph.static_runner.StaticModelRunner( + model_dir=self.save_dirname, model_filename=self.model_filename) + + train_reader = paddle.batch( + self.reader_decorator(paddle.dataset.mnist.test()), + batch_size=self.batch_size, + drop_last=True) + train_loader = fluid.io.DataLoader.from_generator(capacity=10) + train_loader.set_sample_list_generator(train_reader, places=place) + + mnist.eval() + + for batch_id, data in enumerate(train_loader()): + img = data[0] + cost = mnist(img) + + if batch_id >= 1: + break + + dy_x_data = img.numpy() + dy_out = cost.numpy() + + return dy_x_data, dy_out + + def load_and_infer_static(self): + with new_program_scope(): + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + + exe = fluid.Executor(place) + [infer_program, feed_target_names, + fetch_targets] = fluid.io.load_inference_model(self.save_dirname, + exe) + infer_program.random_seed = self.seed + + train_reader = paddle.batch( + self.reader_decorator(paddle.dataset.mnist.test()), + batch_size=self.batch_size, + drop_last=True) + + for batch_id, data in enumerate(train_reader()): + static_x_data = np.array([x[0] for x in data]) + out = exe.run(infer_program, + feed={feed_target_names[0]: static_x_data}, + fetch_list=fetch_targets) + + if batch_id >= 1: + break + + static_param_value = {} + static_out = out[0] + + return static_x_data, static_out + + def test_mnist_train_no_params_filename(self): self.save_dirname = "mnist.inference.model.noname" self.model_filename = None self.params_filename = None @@ -257,7 +317,7 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): key += core.loaded_var_suffix() self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-4)) - def test_mnist_with_params_filename(self): + def test_mnist_train_with_params_filename(self): self.save_dirname = "mnist.inference.model" self.model_filename = "mnist.model" self.params_filename = "mnist.params" @@ -285,6 +345,26 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): key += core.loaded_var_suffix() self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-4)) + def test_mnist_infer_no_params_filename(self): + self.save_dirname = "mnist.inference.model.noname" + self.model_filename = None + self.params_filename = None + # Phase 1. run and save static model + self.train_and_save_model() + + # Phase 2. load model & train dygraph + dy_x_data, dy_out = \ + self.load_and_infer_dygraph() + + static_x_data, static_out = \ + self.load_and_infer_static() + + # Phase 3. compare + self.assertTrue(np.array_equal(static_x_data, dy_x_data)) + + np.testing.assert_array_almost_equal(static_out, dy_out) + self.assertTrue(np.allclose(static_out, dy_out, atol=1e-04)) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py index f15fe74d1ab..486b656262c 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py @@ -136,7 +136,7 @@ class TestImperativeStaticModelRunnerWhile(unittest.TestCase): label = data[1] label.stop_gradient = True - cost = while_net(inputs=img) + cost = while_net(img) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) -- GitLab