clip op extra information when export model. (#35447)

* clip op extra information when export model,test=ocr * rename clip_extra parameter to kwargs in save_inference_model, test=ocr

clip op extra information when export model. (#35447)
* clip op extra information when export model,test=ocr * rename clip_extra parameter to kwargs in save_inference_model, test=ocr
4d236354 · 王明冬 · GitHub · 86d4af39 · 4d236354 · 4d236354
17 changed file
--- a/paddle/fluid/framework/op_desc.cc
+++ b/paddle/fluid/framework/op_desc.cc
@@ -460,6 +460,11 @@ void OpDesc::RemoveOutput(const std::string &name) {
  need_update_ = true;
 }

+void OpDesc::RemoveInput(const std::string &name) {
+  inputs_.erase(name);
+  need_update_ = true;
+}
+
 bool OpDesc::HasProtoAttr(const std::string &name) const {
  auto &op_info = OpInfoMap::Instance();
  if (op_info.Has(desc_.type())) {

--- a/paddle/fluid/framework/op_desc.h
+++ b/paddle/fluid/framework/op_desc.h
@@ -68,6 +68,8 @@ class OpDesc {
                 const std::vector<std::string> &args);
  void RemoveOutput(const std::string &name);

+  void RemoveInput(const std::string &name);
+
  bool HasAttr(const std::string &name) const {
    return attrs_.find(name) != attrs_.end();
  }

--- a/paddle/fluid/pybind/protobuf.cc
+++ b/paddle/fluid/pybind/protobuf.cc
@@ -267,6 +267,7 @@ void BindOpDesc(pybind11::module *m) {
             self.SetOutput(name, vec_var_name);
           })
      .def("remove_output", &pd::OpDesc::RemoveOutput)
+      .def("remove_input", &pd::OpDesc::RemoveInput)
      .def("input_arg_names", &pd::OpDesc::InputArgumentNames)
      .def("output_arg_names", &pd::OpDesc::OutputArgumentNames)
      .def("_rename_input", &pd::OpDesc::RenameInput)

--- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
@@ -495,7 +495,8 @@ class ImperativeQuantizeOutputs(object):
            executor=exe,
            main_program=infer_program.clone(),
            model_filename=model_filename,
-            params_filename=params_filename)
+            params_filename=params_filename,
+            clip_extra=True)

        if is_dynamic_mode:
            paddle.disable_static()

--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py
@@ -169,9 +169,11 @@ class TestQuantizationScalePass(unittest.TestCase):
            f.write(str(server_program))

        with fluid.scope_guard(scope):
-            fluid.io.save_inference_model('quant_scale_model' + dev_name,
-                                          ['image', 'label'], [loss], exe,
-                                          server_program)
+            fluid.io.save_inference_model(
+                'quant_scale_model' + dev_name, ['image', 'label'], [loss],
+                exe,
+                server_program,
+                clip_extra=True)

    def test_quant_scale_cuda(self):
        if fluid.core.is_compiled_with_cuda():

--- a/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantize_transpiler_v2.py
@@ -141,9 +141,11 @@ class TestQuantizeProgramPass(unittest.TestCase):
        qt.convert(test_program, scope)
        if not for_ci:
            with fluid.scope_guard(scope):
-                fluid.io.save_inference_model('./infer_model',
-                                              ['image', 'label'], [loss], exe,
-                                              test_program)
+                fluid.io.save_inference_model(
+                    './infer_model', ['image', 'label'], [loss],
+                    exe,
+                    test_program,
+                    clip_extra=True)

    def test_gpu_1(self):
        if fluid.core.is_compiled_with_cuda():

--- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
+++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
@@ -201,7 +201,8 @@ def train(net_type, use_cuda, save_dirname, is_local):
                        fluid.io.save_inference_model(
                            save_dirname, ["pixel"], [predict],
                            exe,
-                            main_program=train_program)
+                            main_program=train_program,
+                            clip_extra=True)
                        return

    if is_local:
@@ -258,8 +259,13 @@ def infer(use_cuda, save_dirname=None):

        print("infer results: ", results[0])

-        fluid.io.save_inference_model(save_dirname, feed_target_names,
-                                      fetch_targets, exe, inference_program)
+        fluid.io.save_inference_model(
+            save_dirname,
+            feed_target_names,
+            fetch_targets,
+            exe,
+            inference_program,
+            clip_extra=True)


 def main(net_type, use_cuda, is_local=True):

--- a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
+++ b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
@@ -258,8 +258,11 @@ class TestQuantizeTranspiler(unittest.TestCase):
            # Convert parameter to 8-bit.
            quant_transpiler.convert_to_int8(test_program, place)
            # Save the 8-bit parameter and model file.
-            fluid.io.save_inference_model('model_8bit', ['image', 'label'],
-                                          [loss], exe, test_program)
+            fluid.io.save_inference_model(
+                'model_8bit', ['image', 'label'], [loss],
+                exe,
+                test_program,
+                clip_extra=True)
            # Test whether the 8-bit parameter and model file can be loaded successfully.
            [infer, feed, fetch] = fluid.io.load_inference_model('model_8bit',
                                                                 exe)

--- a/python/paddle/fluid/dygraph/jit.py
+++ b/python/paddle/fluid/dygraph/jit.py
@@ -855,7 +855,8 @@ def save(layer, path, input_spec=None, **configs):
                model_filename=model_filename,
                params_filename=params_filename,
                export_for_deployment=configs._export_for_deployment,
-                program_only=configs._program_only)
+                program_only=configs._program_only,
+                clip_extra=False)

    # NOTE(chenweihang): [ Save extra variable info ]
    # save_inference_model will lose some important variable information, including:
@@ -1342,7 +1343,7 @@ class TracedLayer(object):
            return self._run(self._build_feed(inputs))

    @switch_to_static_graph
-    def save_inference_model(self, path, feed=None, fetch=None):
+    def save_inference_model(self, path, feed=None, fetch=None, **kwargs):
        """
        Save the TracedLayer to a model for inference. The saved
        inference model can be loaded by C++ inference APIs.
@@ -1360,6 +1361,7 @@ class TracedLayer(object):
                saved inference model. If None, all output variables of the
                TracedLayer object would be the outputs of the saved inference
                model. Default None.
+            kwargs: Supported keys including 'clip_extra'.set to True if you want to clip extra information for every operator.

        Returns:
            None
@@ -1409,7 +1411,7 @@ class TracedLayer(object):
            for f in fetch:
                check_type(f, "each element of fetch", int,
                           "fluid.dygraph.jit.TracedLayer.save_inference_model")
-
+        clip_extra = kwargs.get('clip_extra', False)
        # path check
        file_prefix = os.path.basename(path)
        if file_prefix == "":
@@ -1449,4 +1451,5 @@ class TracedLayer(object):
                executor=self._exe,
                main_program=self._program.clone(),
                model_filename=model_filename,
-                params_filename=params_filename)
+                params_filename=params_filename,
+                clip_extra=clip_extra)
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -5135,7 +5135,7 @@ class Program(object):
        res._sync_with_cpp()
        return res

-    def _remove_training_info(self):
+    def _remove_training_info(self, clip_extra=True):
        """
        This method will create a new program and do following adjustments on it:
        1. Remove all variable's `is_parameter` attribute if exist.
@@ -5160,6 +5160,71 @@ class Program(object):
            for var in block.all_vars():
                var.clear_is_parameter()
                var.clear_stop_gradient()
+            if not clip_extra:
+                continue
+            for op_idx in range(0, block.op_size()):
+                op = block.op(op_idx)
+                if op.type() not in OpProtoHolder.instance().op_proto_map:
+                    continue
+                proto = OpProtoHolder.instance().get_op_proto(op.type())
+                remove_input_list = []
+                for name in op.input_names():
+                    find = False
+                    for input_proto in proto.inputs:
+                        if input_proto.name != name:
+                            continue
+                        if input_proto.extra:
+                            remove_input_list.append(name)
+                        find = True
+                        break
+                    if not find:
+                        remove_input_list.append(name)
+                for name in remove_input_list:
+                    op.remove_input(name)
+
+                remove_output_list = []
+                for name in op.output_names():
+                    find = False
+                    for output_proto in proto.outputs:
+                        if output_proto.name != name:
+                            continue
+                        if output_proto.extra:
+                            remove_output_list.append(name)
+                        find = True
+                        break
+                    if not find:
+                        remove_output_list.append(name)
+                for name in remove_output_list:
+                    op.remove_output(name)
+
+                remove_attr_list = []
+                op_quant_name = core.op_proto_and_checker_maker.kOpWithQuantAttrName(
+                )
+                quant = bool(op.attr(op_quant_name)
+                             ) if op_quant_name in op.attr_names() else False
+                quant_attrs = [
+                    op_quant_name, "quantization_type", "skip_quant",
+                    "activation_bits", "bit_length", "quantize_weight_bits",
+                    "weight_quant_scale"
+                ]
+                for name in op.attr_names():
+                    if quant:
+                        if name in quant_attrs:
+                            continue
+                        if name.endswith("_threshold"):
+                            continue
+                    find = False
+                    for attr_proto in proto.attrs:
+                        if attr_proto.name != name:
+                            continue
+                        if attr_proto.extra:
+                            remove_attr_list.append(name)
+                        find = True
+                        break
+                    if not find:
+                        remove_attr_list.append(name)
+                for name in remove_attr_list:
+                    op.remove_attr(name)
        return res

    @staticmethod

--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
@@ -1251,7 +1251,8 @@ def save_inference_model(dirname,
                         model_filename=None,
                         params_filename=None,
                         export_for_deployment=True,
-                         program_only=False):
+                         program_only=False,
+                         clip_extra=False):
    """
    :api_attr: Static Graph

@@ -1432,13 +1433,15 @@ def save_inference_model(dirname,
        main_program.desc._set_version()
        paddle.fluid.core.save_op_version_info(main_program.desc)
        with open(model_basename, "wb") as f:
-            f.write(main_program._remove_training_info()
+            f.write(
+                main_program._remove_training_info(clip_extra=clip_extra)
                .desc.serialize_to_string())
    else:
        # TODO(panyx0718): Save more information so that it can also be used
        # for training and more flexible post-processing.
        with open(model_basename + ".main_program", "wb") as f:
-            f.write(main_program._remove_training_info()
+            f.write(
+                main_program._remove_training_info(clip_extra=clip_extra)
                .desc.serialize_to_string())

    if program_only:

--- a/python/paddle/fluid/tests/book/test_fit_a_line.py
+++ b/python/paddle/fluid/tests/book/test_fit_a_line.py
@@ -86,8 +86,10 @@ def train(use_cuda, save_dirname, is_local, use_bf16, pure_bf16):
                                          fetch_list=[avg_cost])
                if avg_loss_value[0] < 10.0:
                    if save_dirname is not None:
-                        paddle.static.save_inference_model(save_dirname, [x],
-                                                           [y_predict], exe)
+                        paddle.static.save_inference_model(
+                            save_dirname, [x], [y_predict],
+                            exe,
+                            clip_extra=False)
                    return
                if math.isnan(float(avg_loss_value)):
                    sys.exit("got NaN loss, training failed.")

--- a/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/quant_dequant_test.py
@@ -111,8 +111,13 @@ class QuantDequantTest(unittest.TestCase):
    def _save_models(self, dirname, feeded_var_names, target_vars, executor,
                     program, scope):
        with fluid.scope_guard(scope):
-            fluid.io.save_inference_model(dirname, feeded_var_names,
-                                          target_vars, executor, program)
+            fluid.io.save_inference_model(
+                dirname,
+                feeded_var_names,
+                target_vars,
+                executor,
+                program,
+                clip_extra=True)

    def _get_paddle_outs(self, feed, fetch_list, executor, program, scope):
        '''

--- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py
@@ -115,7 +115,8 @@ class TestImperativeStaticModelRunnerMnist(unittest.TestCase):
                self.save_dirname, ["img"], [prediction],
                exe,
                model_filename=self.model_filename,
-                params_filename=self.params_filename)
+                params_filename=self.params_filename,
+                clip_extra=False)

    def load_and_train_dygraph(self):
        place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(

--- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_while.py
@@ -104,7 +104,8 @@ class TestImperativeStaticModelRunnerWhile(unittest.TestCase):
            self.save_dirname, ["img"], [pred],
            exe,
            model_filename=self.model_filename,
-            params_filename=self.params_filename)
+            params_filename=self.params_filename,
+            clip_extra=False)

    def load_and_train_dygraph(self):
        place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(

--- a/python/paddle/fluid/tests/unittests/test_operator_desc.py
+++ b/python/paddle/fluid/tests/unittests/test_operator_desc.py
@@ -81,6 +81,8 @@ class TestOperator(unittest.TestCase):
        self.assertEqual(mul_op.attr("y_num_col_dims"), 1)
        self.assertEqual(mul_op.idx, 0)
        self.assertEqual(mul_out.op, mul_op)
+        mul_op.desc.remove_input("X")
+        self.assertEqual(mul_op.input_names, ["Y"])

    def test_mult_input(self):
        program = Program()

--- a/python/paddle/static/io.py
+++ b/python/paddle/static/io.py
@@ -447,8 +447,9 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor,
        fetch_vars(Variable | list[Variable]): Variables returned by inference.
        executor(Executor): The executor that saves the inference model. You can refer
                            to :ref:`api_guide_executor_en` for more details.
-        kwargs: Supported keys including 'program'.Attention please, kwargs is used for backward compatibility mainly.
+        kwargs: Supported keys including 'program' and "clip_extra". Attention please, kwargs is used for backward compatibility mainly.
          - program(Program): specify a program if you don't want to use default main program.
+          - clip_extra(bool): set to True if you want to clip extra information for every operator.
    Returns:
        None

@@ -509,9 +510,11 @@ def save_inference_model(path_prefix, feed_vars, fetch_vars, executor,
    _check_vars('fetch_vars', fetch_vars)

    program = _get_valid_program(kwargs.get('program', None))
+    clip_extra = kwargs.get('clip_extra', False)
    program = normalize_program(program, feed_vars, fetch_vars)
    # serialize and save program
-    program_bytes = _serialize_program(program._remove_training_info())
+    program_bytes = _serialize_program(
+        program._remove_training_info(clip_extra=clip_extra))
    save_to_file(model_path, program_bytes)
    # serialize and save params
    params_bytes = _serialize_persistables(program, executor)