From 4d236354314cab4a3cf778795e3f576d15ec2fd2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E6=98=8E=E5=86=AC?=
 <78149749+winter-wang@users.noreply.github.com>
Date: Wed, 15 Sep 2021 17:57:02 +0800
Subject: [PATCH] clip op extra information when export model. (#35447)

* clip op extra information when export model,test=ocr

* rename clip_extra parameter to kwargs in save_inference_model, test=ocr
---
 paddle/fluid/framework/op_desc.cc             |  5 ++
 paddle/fluid/framework/op_desc.h              |  2 +
 paddle/fluid/pybind/protobuf.cc               |  1 +
 .../slim/quantization/imperative/qat.py       |  3 +-
 .../tests/test_quantization_scale_pass.py     |  8 ++-
 .../slim/tests/test_quantize_transpiler_v2.py |  8 ++-
 .../tests/test_image_classification_fp16.py   | 12 +++-
 .../contrib/tests/test_quantize_transpiler.py |  7 +-
 python/paddle/fluid/dygraph/jit.py            | 11 +--
 python/paddle/fluid/framework.py              | 67 ++++++++++++++++++-
 python/paddle/fluid/io.py                     | 13 ++--
 .../fluid/tests/book/test_fit_a_line.py       |  6 +-
 .../ir/inference/quant_dequant_test.py        |  9 ++-
 .../test_imperative_static_runner_mnist.py    |  3 +-
 .../test_imperative_static_runner_while.py    |  3 +-
 .../tests/unittests/test_operator_desc.py     |  2 +
 python/paddle/static/io.py                    |  7 +-
 17 files changed, 137 insertions(+), 30 deletions(-)

diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc
index 1b4d8adeb5..2c5fcf2810 100644
--- a/paddle/fluid/framework/op_desc.cc
+++ b/paddle/fluid/framework/op_desc.cc
@@ -460,6 +460,11 @@ void OpDesc::RemoveOutput(const std::string &name) {
   need_update_ = true;
 }
 
+void OpDesc::RemoveInput(const std::string &name) {
+  inputs_.erase(name);
+  need_update_ = true;
+}
+
 bool OpDesc::HasProtoAttr(const std::string &name) const {
   auto &op_info = OpInfoMap::Instance();
   if (op_info.Has(desc_.type())) {
diff --git a/paddle/fluid/framework/op_desc.h b/paddle/fluid/framework/op_desc.h
index 6b5969f412..51e5df3e16 100644
--- a/paddle/fluid/framework/op_desc.h
+++ b/paddle/fluid/framework/op_desc.h
@@ -68,6 +68,8 @@ class OpDesc {
                  const std::vector<std::string> &args);
   void RemoveOutput(const std::string &name);
 
+  void RemoveInput(const std::string &name);
+
   bool HasAttr(const std::string &name) const {
     return attrs_.find(name) != attrs_.end();
   }
diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc
index 596bd004e1..99607d7f97 100644
--- a/paddle/fluid/pybind/protobuf.cc
+++ b/paddle/fluid/pybind/protobuf.cc
@@ -267,6 +267,7 @@ void BindOpDesc(pybind11::module *m) {
              self.SetOutput(name, vec_var_name);
            })
       .def("remove_output", &pd::OpDesc::RemoveOutput)
+      .def("remove_input", &pd::OpDesc::RemoveInput)
       .def("input_arg_names", &pd::OpDesc::InputArgumentNames)
       .def("output_arg_names", &pd::OpDesc::OutputArgumentNames)
       .def("_rename_input", &pd::OpDesc::RenameInput)
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
index f959b922c8..125d9fa88d 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
@@ -495,7 +495,8 @@ class ImperativeQuantizeOutputs(object):
             executor=exe,
             main_program=infer_program.clone(),
             model_filename=model_filename,
-            params_filename=params_filename)
+            params_filename=params_filename,
+            clip_extra=True)
 
         if is_dynamic_mode:
             paddle.disable_static()
diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py
index b03281546a..ec2c7a91f9 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py
@@ -169,9 +169,11 @@ class TestQuantizationScalePass(unittest.TestCase):
             f.write(str(server_program))
 
         with fluid.scope_guard(scope):
-            fluid.io.save_inference_model('quant_scale_model' + dev_name,
-                                          ['image', 'label'], [loss], exe,
-                                          server_program)
+            fluid.io.save_inference_model(
+                'quant_scale_model' + dev_name, ['image', 'label'], [loss],
+                exe,
+                server_program,
+                clip_extra=True)
 
     def test_quant_scale_cuda(self):
         if fluid.core.is_compiled_with_cuda():
diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py b/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py
index aa9f6a1801..f5eb7d347c 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py
@@ -141,9 +141,11 @@ class TestQuantizeProgramPass(unittest.TestCase):
         qt.convert(test_program, scope)
         if not for_ci:
             with fluid.scope_guard(scope):
-                fluid.io.save_inference_model('./infer_model',
-                                              ['image', 'label'], [loss], exe,
-                                              test_program)
+                fluid.io.save_inference_model(
+                    './infer_model', ['image', 'label'], [loss],
+                    exe,
+                    test_program,
+                    clip_extra=True)
 
     def test_gpu_1(self):
         if fluid.core.is_compiled_with_cuda():
diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
index 0280dfcf67..66af517c3e 100644
--- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
+++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
@@ -201,7 +201,8 @@ def train(net_type, use_cuda, save_dirname, is_local):
                         fluid.io.save_inference_model(
                             save_dirname, ["pixel"], [predict],
                             exe,
-                            main_program=train_program)
+                            main_program=train_program,
+                            clip_extra=True)
                         return
 
     if is_local:
@@ -258,8 +259,13 @@ def infer(use_cuda, save_dirname=None):
 
         print("infer results: ", results[0])
 
-        fluid.io.save_inference_model(save_dirname, feed_target_names,
-                                      fetch_targets, exe, inference_program)
+        fluid.io.save_inference_model(
+            save_dirname,
+            feed_target_names,
+            fetch_targets,
+            exe,
+            inference_program,
+            clip_extra=True)
 
 
 def main(net_type, use_cuda, is_local=True):
diff --git a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
index 342be7db3e..c3099ec88f 100644
--- a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
+++ b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
@@ -258,8 +258,11 @@ class TestQuantizeTranspiler(unittest.TestCase):
             # Convert parameter to 8-bit.
             quant_transpiler.convert_to_int8(test_program, place)
             # Save the 8-bit parameter and model file.
-            fluid.io.save_inference_model('model_8bit', ['image', 'label'],
-                                          [loss], exe, test_program)
+            fluid.io.save_inference_model(
+                'model_8bit', ['image', 'label'], [loss],
+                exe,
+                test_program,
+                clip_extra=True)
             # Test whether the 8-bit parameter and model file can be loaded successfully.
             [infer, feed, fetch] = fluid.io.load_inference_model('model_8bit',
                                                                  exe)
diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py
index b17b796812..fa71923200 100644
--- a/python/paddle/fluid/dygraph/jit.py
+++ b/python/paddle/fluid/dygraph/jit.py
@@ -855,7 +855,8 @@ def save(layer, path, input_spec=None, **configs):
                 model_filename=model_filename,
                 params_filename=params_filename,
                 export_for_deployment=configs._export_for_deployment,
-                program_only=configs._program_only)
+                program_only=configs._program_only,
+                clip_extra=False)
 
     # NOTE(chenweihang): [ Save extra variable info ]
     # save_inference_model will lose some important variable information, including:
@@ -1342,7 +1343,7 @@ class TracedLayer(object):
             return self._run(self._build_feed(inputs))
 
     @switch_to_static_graph
-    def save_inference_model(self, path, feed=None, fetch=None):
+    def save_inference_model(self, path, feed=None, fetch=None, **kwargs):
         """
         Save the TracedLayer to a model for inference. The saved
         inference model can be loaded by C++ inference APIs.
@@ -1360,6 +1361,7 @@ class TracedLayer(object):
                 saved inference model. If None, all output variables of the
                 TracedLayer object would be the outputs of the saved inference
                 model. Default None.
+            kwargs: Supported keys including 'clip_extra'.set to True if you want to clip extra information for every operator.
 
         Returns:
             None
@@ -1409,7 +1411,7 @@ class TracedLayer(object):
             for f in fetch:
                 check_type(f, "each element of fetch", int,
                            "fluid.dygraph.jit.TracedLayer.save_inference_model")
-
+        clip_extra = kwargs.get('clip_extra', False)
         # path check
         file_prefix = os.path.basename(path)
         if file_prefix == "":
@@ -1449,4 +1451,5 @@ class TracedLayer(object):
                 executor=self._exe,
                 main_program=self._program.clone(),
                 model_filename=model_filename,
-                params_filename=params_filename)
+                params_filename=params_filename,
+                clip_extra=clip_extra)
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 4089e4f615..92afe0fdaf 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -5135,7 +5135,7 @@ class Program(object):
         res._sync_with_cpp()
         return res
 
-    def _remove_training_info(self):
+    def _remove_training_info(self, clip_extra=True):
         """
         This method will create a new program and do following adjustments on it:
         1. Remove all variable's `is_parameter` attribute if exist.
@@ -5160,6 +5160,71 @@ class Program(object):
             for var in block.all_vars():
                 var.clear_is_parameter()
                 var.clear_stop_gradient()
+            if not clip_extra:
+                continue
+            for op_idx in range(0, block.op_size()):
+                op = block.op(op_idx)
+                if op.type() not in OpProtoHolder.instance().op_proto_map:
+                    continue
+                proto = OpProtoHolder.instance().get_op_proto(op.type())
+                remove_input_list = []
+                for name in op.input_names():
+                    find = False
+                    for input_proto in proto.inputs:
+                        if input_proto.name != name:
+                            continue
+                        if input_proto.extra:
+                            remove_input_list.append(name)
+                        find = True
+                        break
+                    if not find:
+                        remove_input_list.append(name)
+                for name in remove_input_list:
+                    op.remove_input(name)
+
+                remove_output_list = []
+                for name in op.output_names():
+                    find = False
+                    for output_proto in proto.outputs:
+                        if output_proto.name != name:
+                            continue
+                        if output_proto.extra:
+                            remove_output_list.append(name)
+                        find = True
+                        break
+                    if not find:
+                        remove_output_list.append(name)
+                for name in remove_output_list:
+                    op.remove_output(name)
+
+                remove_attr_list = []
+                op_quant_name = core.op_proto_and_checker_maker.kOpWithQuantAttrName(
+                )
+                quant = bool(op.attr(op_quant_name)
+                             ) if op_quant_name in op.attr_names() else False
+                quant_attrs = [
+                    op_quant_name, "quantization_type", "skip_quant",
+                    "activation_bits", "bit_length", "quantize_weight_bits",
+                    "weight_quant_scale"
+                ]
+                for name in op.attr_names():
+                    if quant:
+                        if name in quant_attrs:
+                            continue
+                        if name.endswith("_threshold"):
+                            continue
+                    find = False
+                    for attr_proto in proto.attrs:
+                        if attr_proto.name != name:
+                            continue
+                        if attr_proto.extra:
+                            remove_attr_list.append(name)
+                        find = True
+                        break
+                    if not find:
+                        remove_attr_list.append(name)
+                for name in remove_attr_list:
+                    op.remove_attr(name)
         return res
 
     @staticmethod
diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py
index 9d02809e54..417e5ace8c 100644
--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
@@ -1251,7 +1251,8 @@ def save_inference_model(dirname,
                          model_filename=None,
                          params_filename=None,
                          export_for_deployment=True,
-                         program_only=False):
+                         program_only=False,
+                         clip_extra=False):
     """
     :api_attr: Static Graph
 
@@ -1432,14 +1433,16 @@ def save_inference_model(dirname,
         main_program.desc._set_version()
         paddle.fluid.core.save_op_version_info(main_program.desc)
         with open(model_basename, "wb") as f:
-            f.write(main_program._remove_training_info()
-                    .desc.serialize_to_string())
+            f.write(
+                main_program._remove_training_info(clip_extra=clip_extra)
+                .desc.serialize_to_string())
     else:
         # TODO(panyx0718): Save more information so that it can also be used
         # for training and more flexible post-processing.
         with open(model_basename + ".main_program", "wb") as f:
-            f.write(main_program._remove_training_info()
-                    .desc.serialize_to_string())
+            f.write(
+                main_program._remove_training_info(clip_extra=clip_extra)
+                .desc.serialize_to_string())
 
     if program_only:
         warnings.warn(
diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py
index a533d1b40c..8db8b79359 100644
--- a/python/paddle/fluid/tests/book/test_fit_a_line.py
+++ b/python/paddle/fluid/tests/book/test_fit_a_line.py
@@ -86,8 +86,10 @@ def train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16):
                                           fetch_list=[avg_cost])
                 if avg_loss_value[0] < 10.0:
                     if save_dirname is not None:
-                        paddle.static.save_inference_model(save_dirname, [x],
-                                                           [y_predict], exe)
+                        paddle.static.save_inference_model(
+                            save_dirname, [x], [y_predict],
+                            exe,
+                            clip_extra=False)
                     return
                 if math.isnan(float(avg_loss_value)):
                     sys.exit("got NaN loss, training failed.")
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py b/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py
index a75911232c..1ca7799963 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py
@@ -111,8 +111,13 @@ class QuantDequantTest(unittest.TestCase):
     def _save_models(self, dirname, feeded_var_names, target_vars, executor,
                      program, scope):
         with fluid.scope_guard(scope):
-            fluid.io.save_inference_model(dirname, feeded_var_names,
-                                          target_vars, executor, program)
+            fluid.io.save_inference_model(
+                dirname,
+                feeded_var_names,
+                target_vars,
+                executor,
+                program,
+                clip_extra=True)
 
     def _get_paddle_outs(self, feed, fetch_list, executor, program, scope):
         '''
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py
index bab2674e87..8a0d92fa41 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py
@@ -115,7 +115,8 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase):
                 self.save_dirname, ["img"], [prediction],
                 exe,
                 model_filename=self.model_filename,
-                params_filename=self.params_filename)
+                params_filename=self.params_filename,
+                clip_extra=False)
 
     def load_and_train_dygraph(self):
         place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py
index 841df6d089..13ed7a4d33 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py
@@ -104,7 +104,8 @@ class TestImperativeStaticModelRunnerWhile(unittest.TestCase):
             self.save_dirname, ["img"], [pred],
             exe,
             model_filename=self.model_filename,
-            params_filename=self.params_filename)
+            params_filename=self.params_filename,
+            clip_extra=False)
 
     def load_and_train_dygraph(self):
         place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
diff --git a/python/paddle/fluid/tests/unittests/test_operator_desc.py b/python/paddle/fluid/tests/unittests/test_operator_desc.py
index cf1f12411e..17eeedc524 100644
--- a/python/paddle/fluid/tests/unittests/test_operator_desc.py
+++ b/python/paddle/fluid/tests/unittests/test_operator_desc.py
@@ -81,6 +81,8 @@ class TestOperator(unittest.TestCase):
         self.assertEqual(mul_op.attr("y_num_col_dims"), 1)
         self.assertEqual(mul_op.idx, 0)
         self.assertEqual(mul_out.op, mul_op)
+        mul_op.desc.remove_input("X")
+        self.assertEqual(mul_op.input_names, ["Y"])
 
     def test_mult_input(self):
         program = Program()
diff --git a/python/paddle/static/io.py b/python/paddle/static/io.py
index 677218cc4c..05a3389fd1 100644
--- a/python/paddle/static/io.py
+++ b/python/paddle/static/io.py
@@ -447,8 +447,9 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor,
         fetch_vars(Variable | list[Variable]): Variables returned by inference.
         executor(Executor): The executor that saves the inference model. You can refer
                             to :ref:`api_guide_executor_en` for more details.
-        kwargs: Supported keys including 'program'.Attention please, kwargs is used for backward compatibility mainly.
+        kwargs: Supported keys including 'program' and "clip_extra". Attention please, kwargs is used for backward compatibility mainly.
           - program(Program): specify a program if you don't want to use default main program.
+          - clip_extra(bool): set to True if you want to clip extra information for every operator.
     Returns:
         None
 
@@ -509,9 +510,11 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor,
     _check_vars('fetch_vars', fetch_vars)
 
     program = _get_valid_program(kwargs.get('program', None))
+    clip_extra = kwargs.get('clip_extra', False)
     program = normalize_program(program, feed_vars, fetch_vars)
     # serialize and save program
-    program_bytes = _serialize_program(program._remove_training_info())
+    program_bytes = _serialize_program(
+        program._remove_training_info(clip_extra=clip_extra))
     save_to_file(model_path, program_bytes)
     # serialize and save params
     params_bytes = _serialize_persistables(program, executor)
-- 
GitLab