From 4d236354314cab4a3cf778795e3f576d15ec2fd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=98=8E=E5=86=AC?= <78149749+winter-wang@users.noreply.github.com> Date: Wed, 15 Sep 2021 17:57:02 +0800 Subject: [PATCH] clip op extra information when export model. (#35447) * clip op extra information when export model,test=ocr * rename clip_extra parameter to kwargs in save_inference_model, test=ocr --- paddle/fluid/framework/op_desc.cc | 5 ++ paddle/fluid/framework/op_desc.h | 2 + paddle/fluid/pybind/protobuf.cc | 1 + .../slim/quantization/imperative/qat.py | 3 +- .../tests/test_quantization_scale_pass.py | 8 ++- .../slim/tests/test_quantize_transpiler_v2.py | 8 ++- .../tests/test_image_classification_fp16.py | 12 +++- .../contrib/tests/test_quantize_transpiler.py | 7 +- python/paddle/fluid/dygraph/jit.py | 11 +-- python/paddle/fluid/framework.py | 67 ++++++++++++++++++- python/paddle/fluid/io.py | 13 ++-- .../fluid/tests/book/test_fit_a_line.py | 6 +- .../ir/inference/quant_dequant_test.py | 9 ++- .../test_imperative_static_runner_mnist.py | 3 +- .../test_imperative_static_runner_while.py | 3 +- .../tests/unittests/test_operator_desc.py | 2 + python/paddle/static/io.py | 7 +- 17 files changed, 137 insertions(+), 30 deletions(-) diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 1b4d8adeb5..2c5fcf2810 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -460,6 +460,11 @@ void OpDesc::RemoveOutput(const std::string &name) { need_update_ = true; } +void OpDesc::RemoveInput(const std::string &name) { + inputs_.erase(name); + need_update_ = true; +} + bool OpDesc::HasProtoAttr(const std::string &name) const { auto &op_info = OpInfoMap::Instance(); if (op_info.Has(desc_.type())) { diff --git a/paddle/fluid/framework/op_desc.h b/paddle/fluid/framework/op_desc.h index 6b5969f412..51e5df3e16 100644 --- a/paddle/fluid/framework/op_desc.h +++ b/paddle/fluid/framework/op_desc.h @@ -68,6 +68,8 @@ class OpDesc { const std::vector &args); void RemoveOutput(const std::string &name); + void RemoveInput(const std::string &name); + bool HasAttr(const std::string &name) const { return attrs_.find(name) != attrs_.end(); } diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index 596bd004e1..99607d7f97 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -267,6 +267,7 @@ void BindOpDesc(pybind11::module *m) { self.SetOutput(name, vec_var_name); }) .def("remove_output", &pd::OpDesc::RemoveOutput) + .def("remove_input", &pd::OpDesc::RemoveInput) .def("input_arg_names", &pd::OpDesc::InputArgumentNames) .def("output_arg_names", &pd::OpDesc::OutputArgumentNames) .def("_rename_input", &pd::OpDesc::RenameInput) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index f959b922c8..125d9fa88d 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -495,7 +495,8 @@ class ImperativeQuantizeOutputs(object): executor=exe, main_program=infer_program.clone(), model_filename=model_filename, - params_filename=params_filename) + params_filename=params_filename, + clip_extra=True) if is_dynamic_mode: paddle.disable_static() diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py index b03281546a..ec2c7a91f9 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py @@ -169,9 +169,11 @@ class TestQuantizationScalePass(unittest.TestCase): f.write(str(server_program)) with fluid.scope_guard(scope): - fluid.io.save_inference_model('quant_scale_model' + dev_name, - ['image', 'label'], [loss], exe, - server_program) + fluid.io.save_inference_model( + 'quant_scale_model' + dev_name, ['image', 'label'], [loss], + exe, + server_program, + clip_extra=True) def test_quant_scale_cuda(self): if fluid.core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py b/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py index aa9f6a1801..f5eb7d347c 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py @@ -141,9 +141,11 @@ class TestQuantizeProgramPass(unittest.TestCase): qt.convert(test_program, scope) if not for_ci: with fluid.scope_guard(scope): - fluid.io.save_inference_model('./infer_model', - ['image', 'label'], [loss], exe, - test_program) + fluid.io.save_inference_model( + './infer_model', ['image', 'label'], [loss], + exe, + test_program, + clip_extra=True) def test_gpu_1(self): if fluid.core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py index 0280dfcf67..66af517c3e 100644 --- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py +++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py @@ -201,7 +201,8 @@ def train(net_type, use_cuda, save_dirname, is_local): fluid.io.save_inference_model( save_dirname, ["pixel"], [predict], exe, - main_program=train_program) + main_program=train_program, + clip_extra=True) return if is_local: @@ -258,8 +259,13 @@ def infer(use_cuda, save_dirname=None): print("infer results: ", results[0]) - fluid.io.save_inference_model(save_dirname, feed_target_names, - fetch_targets, exe, inference_program) + fluid.io.save_inference_model( + save_dirname, + feed_target_names, + fetch_targets, + exe, + inference_program, + clip_extra=True) def main(net_type, use_cuda, is_local=True): diff --git a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py index 342be7db3e..c3099ec88f 100644 --- a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py +++ b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py @@ -258,8 +258,11 @@ class TestQuantizeTranspiler(unittest.TestCase): # Convert parameter to 8-bit. quant_transpiler.convert_to_int8(test_program, place) # Save the 8-bit parameter and model file. - fluid.io.save_inference_model('model_8bit', ['image', 'label'], - [loss], exe, test_program) + fluid.io.save_inference_model( + 'model_8bit', ['image', 'label'], [loss], + exe, + test_program, + clip_extra=True) # Test whether the 8-bit parameter and model file can be loaded successfully. [infer, feed, fetch] = fluid.io.load_inference_model('model_8bit', exe) diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py index b17b796812..fa71923200 100644 --- a/python/paddle/fluid/dygraph/jit.py +++ b/python/paddle/fluid/dygraph/jit.py @@ -855,7 +855,8 @@ def save(layer, path, input_spec=None, **configs): model_filename=model_filename, params_filename=params_filename, export_for_deployment=configs._export_for_deployment, - program_only=configs._program_only) + program_only=configs._program_only, + clip_extra=False) # NOTE(chenweihang): [ Save extra variable info ] # save_inference_model will lose some important variable information, including: @@ -1342,7 +1343,7 @@ class TracedLayer(object): return self._run(self._build_feed(inputs)) @switch_to_static_graph - def save_inference_model(self, path, feed=None, fetch=None): + def save_inference_model(self, path, feed=None, fetch=None, **kwargs): """ Save the TracedLayer to a model for inference. The saved inference model can be loaded by C++ inference APIs. @@ -1360,6 +1361,7 @@ class TracedLayer(object): saved inference model. If None, all output variables of the TracedLayer object would be the outputs of the saved inference model. Default None. + kwargs: Supported keys including 'clip_extra'.set to True if you want to clip extra information for every operator. Returns: None @@ -1409,7 +1411,7 @@ class TracedLayer(object): for f in fetch: check_type(f, "each element of fetch", int, "fluid.dygraph.jit.TracedLayer.save_inference_model") - + clip_extra = kwargs.get('clip_extra', False) # path check file_prefix = os.path.basename(path) if file_prefix == "": @@ -1449,4 +1451,5 @@ class TracedLayer(object): executor=self._exe, main_program=self._program.clone(), model_filename=model_filename, - params_filename=params_filename) + params_filename=params_filename, + clip_extra=clip_extra) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 4089e4f615..92afe0fdaf 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -5135,7 +5135,7 @@ class Program(object): res._sync_with_cpp() return res - def _remove_training_info(self): + def _remove_training_info(self, clip_extra=True): """ This method will create a new program and do following adjustments on it: 1. Remove all variable's `is_parameter` attribute if exist. @@ -5160,6 +5160,71 @@ class Program(object): for var in block.all_vars(): var.clear_is_parameter() var.clear_stop_gradient() + if not clip_extra: + continue + for op_idx in range(0, block.op_size()): + op = block.op(op_idx) + if op.type() not in OpProtoHolder.instance().op_proto_map: + continue + proto = OpProtoHolder.instance().get_op_proto(op.type()) + remove_input_list = [] + for name in op.input_names(): + find = False + for input_proto in proto.inputs: + if input_proto.name != name: + continue + if input_proto.extra: + remove_input_list.append(name) + find = True + break + if not find: + remove_input_list.append(name) + for name in remove_input_list: + op.remove_input(name) + + remove_output_list = [] + for name in op.output_names(): + find = False + for output_proto in proto.outputs: + if output_proto.name != name: + continue + if output_proto.extra: + remove_output_list.append(name) + find = True + break + if not find: + remove_output_list.append(name) + for name in remove_output_list: + op.remove_output(name) + + remove_attr_list = [] + op_quant_name = core.op_proto_and_checker_maker.kOpWithQuantAttrName( + ) + quant = bool(op.attr(op_quant_name) + ) if op_quant_name in op.attr_names() else False + quant_attrs = [ + op_quant_name, "quantization_type", "skip_quant", + "activation_bits", "bit_length", "quantize_weight_bits", + "weight_quant_scale" + ] + for name in op.attr_names(): + if quant: + if name in quant_attrs: + continue + if name.endswith("_threshold"): + continue + find = False + for attr_proto in proto.attrs: + if attr_proto.name != name: + continue + if attr_proto.extra: + remove_attr_list.append(name) + find = True + break + if not find: + remove_attr_list.append(name) + for name in remove_attr_list: + op.remove_attr(name) return res @staticmethod diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 9d02809e54..417e5ace8c 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -1251,7 +1251,8 @@ def save_inference_model(dirname, model_filename=None, params_filename=None, export_for_deployment=True, - program_only=False): + program_only=False, + clip_extra=False): """ :api_attr: Static Graph @@ -1432,14 +1433,16 @@ def save_inference_model(dirname, main_program.desc._set_version() paddle.fluid.core.save_op_version_info(main_program.desc) with open(model_basename, "wb") as f: - f.write(main_program._remove_training_info() - .desc.serialize_to_string()) + f.write( + main_program._remove_training_info(clip_extra=clip_extra) + .desc.serialize_to_string()) else: # TODO(panyx0718): Save more information so that it can also be used # for training and more flexible post-processing. with open(model_basename + ".main_program", "wb") as f: - f.write(main_program._remove_training_info() - .desc.serialize_to_string()) + f.write( + main_program._remove_training_info(clip_extra=clip_extra) + .desc.serialize_to_string()) if program_only: warnings.warn( diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index a533d1b40c..8db8b79359 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -86,8 +86,10 @@ def train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16): fetch_list=[avg_cost]) if avg_loss_value[0] < 10.0: if save_dirname is not None: - paddle.static.save_inference_model(save_dirname, [x], - [y_predict], exe) + paddle.static.save_inference_model( + save_dirname, [x], [y_predict], + exe, + clip_extra=False) return if math.isnan(float(avg_loss_value)): sys.exit("got NaN loss, training failed.") diff --git a/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py b/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py index a75911232c..1ca7799963 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py @@ -111,8 +111,13 @@ class QuantDequantTest(unittest.TestCase): def _save_models(self, dirname, feeded_var_names, target_vars, executor, program, scope): with fluid.scope_guard(scope): - fluid.io.save_inference_model(dirname, feeded_var_names, - target_vars, executor, program) + fluid.io.save_inference_model( + dirname, + feeded_var_names, + target_vars, + executor, + program, + clip_extra=True) def _get_paddle_outs(self, feed, fetch_list, executor, program, scope): ''' diff --git a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py index bab2674e87..8a0d92fa41 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py @@ -115,7 +115,8 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase): self.save_dirname, ["img"], [prediction], exe, model_filename=self.model_filename, - params_filename=self.params_filename) + params_filename=self.params_filename, + clip_extra=False) def load_and_train_dygraph(self): place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py index 841df6d089..13ed7a4d33 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py @@ -104,7 +104,8 @@ class TestImperativeStaticModelRunnerWhile(unittest.TestCase): self.save_dirname, ["img"], [pred], exe, model_filename=self.model_filename, - params_filename=self.params_filename) + params_filename=self.params_filename, + clip_extra=False) def load_and_train_dygraph(self): place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( diff --git a/python/paddle/fluid/tests/unittests/test_operator_desc.py b/python/paddle/fluid/tests/unittests/test_operator_desc.py index cf1f12411e..17eeedc524 100644 --- a/python/paddle/fluid/tests/unittests/test_operator_desc.py +++ b/python/paddle/fluid/tests/unittests/test_operator_desc.py @@ -81,6 +81,8 @@ class TestOperator(unittest.TestCase): self.assertEqual(mul_op.attr("y_num_col_dims"), 1) self.assertEqual(mul_op.idx, 0) self.assertEqual(mul_out.op, mul_op) + mul_op.desc.remove_input("X") + self.assertEqual(mul_op.input_names, ["Y"]) def test_mult_input(self): program = Program() diff --git a/python/paddle/static/io.py b/python/paddle/static/io.py index 677218cc4c..05a3389fd1 100644 --- a/python/paddle/static/io.py +++ b/python/paddle/static/io.py @@ -447,8 +447,9 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor, fetch_vars(Variable | list[Variable]): Variables returned by inference. executor(Executor): The executor that saves the inference model. You can refer to :ref:`api_guide_executor_en` for more details. - kwargs: Supported keys including 'program'.Attention please, kwargs is used for backward compatibility mainly. + kwargs: Supported keys including 'program' and "clip_extra". Attention please, kwargs is used for backward compatibility mainly. - program(Program): specify a program if you don't want to use default main program. + - clip_extra(bool): set to True if you want to clip extra information for every operator. Returns: None @@ -509,9 +510,11 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor, _check_vars('fetch_vars', fetch_vars) program = _get_valid_program(kwargs.get('program', None)) + clip_extra = kwargs.get('clip_extra', False) program = normalize_program(program, feed_vars, fetch_vars) # serialize and save program - program_bytes = _serialize_program(program._remove_training_info()) + program_bytes = _serialize_program( + program._remove_training_info(clip_extra=clip_extra)) save_to_file(model_path, program_bytes) # serialize and save params params_bytes = _serialize_persistables(program, executor) -- GitLab