Delete extra input (Bias, ResidualData) in OpMaker of conv2d (#49121)

* remove extra input of conv2d * fix bug * fix unittest bug * adjust conv2d.pbtxt * fix cpu_quantize_pass_tester * revert use_addto of conv2d * fix runtime attribute * fix bug * recover force_fp32_output in conv2d * refine error info * fix bug

Delete extra input (Bias, ResidualData) in OpMaker of conv2d (#49121)
* remove extra input of conv2d * fix bug * fix unittest bug * adjust conv2d.pbtxt * fix cpu_quantize_pass_tester * revert use_addto of conv2d * fix runtime attribute * fix bug * recover force_fp32_output in conv2d * refine error info * fix bug
2deada9a · zyfncg · GitHub · 1a4edcb0 · 2deada9a · 2deada9a
16 changed file
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
@@ -59,9 +59,6 @@ void SetOp(ProgramDesc* prog,
      op->SetAttr("fuse_residual_connection", false);
    }
    op->SetOutput("Output", {outputs[0]});
-    op->SetAttr("Scale_in", 1.0f);
-    op->SetAttr("Scale_out", 1.0f);
-    op->SetAttr("Scale_weights", std::vector<float>{1.0f});
  } else if (type == "pool2d" || type == "transpose2" || type == "reshape2" ||
             type == "nearest_interp" || type == "nearest_interp_v2") {
    op->SetInput("X", {inputs[0]});

--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
@@ -354,7 +354,9 @@ void CPUQuantizeSquashPass::OpDequantSquash(Graph* graph) const {
          FindOutputNameByVarName(any_op->Op(), dequant_in->Name());
      if (output_name.empty()) return;
+      if (any_op->Op()->Type() == "conv2d") {
+        any_op->Op()->SetType("fused_conv2d");
+      }
      any_op->Op()->SetAttr("force_fp32_output", true);
      any_op->Op()->SetOutput(output_name,
                              std::vector<std::string>({dequant_out->Name()}));

--- a/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
@@ -411,6 +411,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
  std::string input_name = "";
  if (quantized_op_type == "conv2d" ||
      quantized_op_type == "depthwise_conv2d" ||
+      quantized_op_type == "fused_conv2d" ||
      quantized_op_type == "conv2d_fusion" ||
      quantized_op_type == "conv2d_transpose") {
    weight_name = "Filter";
@@ -424,9 +425,10 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
    input_name = "Input";
  } else {
    PADDLE_THROW(platform::errors::Unimplemented(
-        "QuantDequantFuse: We only support conv2d, conv2d_fusion, "
+        "QuantDequantFuse: We only support conv2d, conv2d_fusion, fused_conv2d,"
-        "conv2d_transpose, fc, mul, matmul, matmul_v2 for "
+        "conv2d_transpose, fc, mul, matmul, matmul_v2 for now, but received: "
-        "now."));
+        "%s.",
+        quantized_op_type));
  }
  const std::string pattern_name = "dequant_fuse";
  GraphPatternDetector gpd;
@@ -559,6 +561,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
          }
        }
      } else if (quantized_op_type == "conv2d" ||
+                 quantized_op_type == "fusd_conv2d" ||
                 quantized_op_type == "depthwise_conv2d") {
        PADDLE_ENFORCE_EQ(
            dequant_type,
@@ -642,6 +645,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
    new_op_desc.SetType(quantized_op_type);
    new_op_desc.SetAttr("enable_int8", true);
    if (quantized_op_type == "conv2d" || quantized_op_type == "conv2d_fusion" ||
+        quantized_op_type == "fused_conv2d" ||
        quantized_op_type == "depthwise_conv2d" ||
        quantized_op_type == "conv2d_transpose") {
      new_op_desc.SetInput("Input", {new_input});
@@ -677,6 +681,7 @@ void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const {
      "fake_quantize_range_abs_max", "fake_quantize_moving_average_abs_max"};
  std::unordered_set<std::string> quantized_op_types = {
      "conv2d",
+      "fused_conv2d",
      "mul",
      "matmul",
      "depthwise_conv2d",

--- a/paddle/fluid/framework/op_desc.cc
+++ b/paddle/fluid/framework/op_desc.cc
@@ -671,6 +671,11 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
  if (extra_attr_iter != extra_attr_map.end()) {
    is_runtime_attr = true;
    attrs_ptr = &(this->runtime_attrs_);
+    // When an attribute is found in both attrs and runtime_attrs, it must
+    // be a runtime attribute, so it's value in attrs should be removed.
+    if (this->attrs_.find(name) != this->attrs_.end()) {
+      this->attrs_.erase(name);
+    }
  }
  // NOTICE(minqiyang): pybind11 will take the empty list in python as
  // the std::vector<int> type in C++; so we have to change the attr's type

--- a/paddle/fluid/operators/compat/conv2d.pbtxt
+++ b/paddle/fluid/operators/compat/conv2d.pbtxt
@@ -6,12 +6,6 @@ def {
  inputs {
    name: "Filter"
  }
-  inputs {
-    name: "Bias"
-  }
-  inputs {
-    name: "ResidualData"
-  }
  outputs {
    name: "Output"
  }
@@ -69,54 +63,10 @@ extra {
    name: "skip_quant"
    type: BOOLEAN
  }
-  attrs {
-    name: "fuse_relu_before_depthwise_conv"
-    type: BOOLEAN
-  }
-  attrs {
-    name: "fuse_relu"
-    type: BOOLEAN
-  }
-  attrs {
-    name: "fuse_activation"
-    type: STRING
-  }
-  attrs {
-    name: "fuse_alpha"
-    type: FLOAT
-  }
-  attrs {
-    name: "fuse_beta"
-    type: FLOAT
-  }
  attrs {
    name: "use_addto"
    type: BOOLEAN
  }
-  attrs {
-    name: "fuse_residual_connection"
-    type: BOOLEAN
-  }
-  attrs {
-    name: "Scale_in"
-    type: FLOAT
-  }
-  attrs {
-    name: "Scale_out"
-    type: FLOAT
-  }
-  attrs {
-    name: "Scale_in_eltwise"
-    type: FLOAT
-  }
-  attrs {
-    name: "Scale_weights"
-    type: FLOATS
-  }
-  attrs {
-    name: "force_fp32_output"
-    type: BOOLEAN
-  }
  attrs {
    name: "workspace_size_MB"
    type: INT

--- a/paddle/fluid/operators/compat/depthwise_conv2d.pbtxt
+++ b/paddle/fluid/operators/compat/depthwise_conv2d.pbtxt
@@ -6,12 +6,6 @@ def {
  inputs {
    name: "Filter"
  }
-  inputs {
-    name: "Bias"
-  }
-  inputs {
-    name: "ResidualData"
-  }
  outputs {
    name: "Output"
  }
@@ -65,50 +59,10 @@ extra {
    name: "fuse_relu_before_depthwise_conv"
    type: BOOLEAN
  }
-  attrs {
-    name: "fuse_relu"
-    type: BOOLEAN
-  }
-  attrs {
-    name: "fuse_activation"
-    type: STRING
-  }
-  attrs {
-    name: "fuse_alpha"
-    type: FLOAT
-  }
-  attrs {
-    name: "fuse_beta"
-    type: FLOAT
-  }
  attrs {
    name: "use_addto"
    type: BOOLEAN
  }
-  attrs {
-    name: "fuse_residual_connection"
-    type: BOOLEAN
-  }
-  attrs {
-    name: "Scale_in"
-    type: FLOAT
-  }
-  attrs {
-    name: "Scale_out"
-    type: FLOAT
-  }
-  attrs {
-    name: "Scale_in_eltwise"
-    type: FLOAT
-  }
-  attrs {
-    name: "Scale_weights"
-    type: FLOATS
-  }
-  attrs {
-    name: "force_fp32_output"
-    type: BOOLEAN
-  }
  attrs {
    name: "workspace_size_MB"
    type: INT

--- a/paddle/fluid/operators/conv_op.cc
+++ b/paddle/fluid/operators/conv_op.cc
@@ -250,18 +250,6 @@ void Conv2DOpMaker::Make() {
           "H is the height of the filter, and W is the width of the filter. "
           "If the groups attribute is greater than 1, C equals the number of "
           "input image channels divided by the groups.");
-  AddInput("Bias",
-           "(Tensor) Bias to be added to each output of filter application."
-           "The format of output tensor is X (one-dimensional) of size equal"
-           "to the number of output channels. Only used with MKL-DNN.")
-      .AsDispensable()
-      .AsExtra();
-  AddInput("ResidualData",
-           "(Tensor) Tensor with residual data "
-           "to which convolution output will be added."
-           "Used with fuse_residual_connection fusion.")
-      .AsDispensable()
-      .AsExtra();
  AddOutput("Output",
            "(Tensor) The output tensor of convolution operator. "
            "It has same data fromat and data type as the Input.");

--- a/paddle/fluid/operators/fused/conv_fusion_op.cc
+++ b/paddle/fluid/operators/fused/conv_fusion_op.cc
@@ -33,6 +33,16 @@ namespace operators {
 class Conv2DFusionOpMaker : public Conv2DOpMaker {
 protected:
  void Apply() override {
+    AddInput("Bias",
+             "(Tensor) Bias to be added to each output of filter application."
+             "The format of output tensor is X (one-dimensional) of size equal"
+             "to the number of output channels. Only used with MKL-DNN.")
+        .AsDispensable();
+    AddInput("ResidualData",
+             "(Tensor) Tensor with residual data "
+             "to which convolution output will be added."
+             "Used with fuse_residual_connection fusion.")
+        .AsDispensable();
    AddAttr<std::string>(
        "activation",
        "The activation type can be 'identity', 'sigmoid', 'relu', 'relu6' "

--- a/paddle/fluid/operators/fused/fused_conv2d_op.cc
+++ b/paddle/fluid/operators/fused/fused_conv2d_op.cc
@@ -23,6 +23,16 @@ namespace operators {
 class FusedConvOpMaker : public Conv2DOpMaker {
 protected:
  void Apply() override {
+    AddInput("Bias",
+             "(Tensor) Bias to be added to each output of filter application."
+             "The format of output tensor is X (one-dimensional) of size equal"
+             "to the number of output channels. Only used with MKL-DNN.")
+        .AsDispensable();
+    AddInput("ResidualData",
+             "(Tensor) Tensor with residual data "
+             "to which convolution output will be added."
+             "Used with fuse_residual_connection fusion.")
+        .AsDispensable();
    AddAttr<std::string>(
        "mkldnn_data_type",
        "(string, default \"float32\"). Data type of mkldnn kernel")

--- a/paddle/fluid/operators/ops_extra_info.h
+++ b/paddle/fluid/operators/ops_extra_info.h
@@ -218,9 +218,7 @@ class ExtraInfoUtils {
  // TODO(chenweihang): move these extra inputs into op_compat.yaml
  std::unordered_map<std::string, std::vector<std::string>>
-      g_extra_input_names_map_ = {{"conv2d", {"Bias", "ResidualData"}},
+      g_extra_input_names_map_ = {{"conv2d_transpose", {"Bias"}}};
-                                  {"conv2d_transpose", {"Bias"}},
-                                  {"conv2d_grad", {"Bias"}}};
  std::vector<std::string> empty_extra_input_names_;
 };

--- a/paddle/phi/api/yaml/op_compat.yaml
+++ b/paddle/phi/api/yaml/op_compat.yaml
@@ -242,11 +242,8 @@
 - op : conv2d
  backward : conv2d_grad
  extra :
-    attrs : [bool is_test = false, bool use_cudnn = true, bool fuse_relu_before_depthwise_conv = false, bool use_mkldnn = false,
+    attrs : [bool is_test = false, bool use_cudnn = true, bool use_mkldnn = false, bool use_addto = false,
-             bool use_quantizer = false, str mkldnn_data_type = "float32", bool fuse_relu = false,
+             str mkldnn_data_type = "float32", bool force_fp32_output = false,
-             str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f, bool use_addto = false,
-             bool fuse_residual_connection = false, float Scale_in = 1.0f, float Scale_out = 1.0f,
-             float Scale_in_eltwise = 1.0f, 'float[] Scale_weights = {1.0f}', bool force_fp32_output = false,
             int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]
 - op : conv2d_fusion
@@ -602,6 +599,11 @@
  extra :
    attrs : [bool use_mkldnn = false]
+- op : fused_conv2d
+  extra :
+    attrs : [bool use_cudnn = false, float fuse_alpha = 0.0f, float fuse_beta = 0.0f, float Scale_in = 1.0f,
+             float Scale_out = 1.0f, float Scale_in_eltwise = 1.0f, 'float[] Scale_weights = {1.0f}']
 - op : gather
  backward : gather_grad
  extra :

--- a/paddle/phi/kernels/onednn/conv_kernel.cc
+++ b/paddle/phi/kernels/onednn/conv_kernel.cc
@@ -41,29 +41,16 @@ void ConvKernel(const Context& dev_ctx,
                             dev_ctx.GetDnnAttr("mkldnn_data_type")) ==
                "bfloat16"
          : false;
-  const auto* bias =
-      dev_ctx.HasDnnInput("Bias") ? dev_ctx.GetDnnInput("Bias") : nullptr;
-  const auto* residual_param = dev_ctx.HasDnnInput("ResidualData")
-                                   ? dev_ctx.GetDnnInput("ResidualData")
-                                   : nullptr;
-  bool fuse_residual_conn =
-      dev_ctx.HasDnnAttr("fuse_residual_connection")
-          ? PADDLE_GET_CONST(bool,
-                             dev_ctx.GetDnnAttr("fuse_residual_connection"))
-          : false;
-  const std::string& fuse_activation =
-      dev_ctx.HasDnnAttr("fuse_activation")
-          ? PADDLE_GET_CONST(std::string, dev_ctx.GetDnnAttr("fuse_activation"))
-          : "";
  bool force_fp32_output =
      dev_ctx.HasDnnAttr("force_fp32_output")
          ? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("force_fp32_output"))
          : false;
  ConvOnednn<T>(dev_ctx,
                &input,
                &filter,
-                bias,
+                nullptr,
-                residual_param,
+                nullptr,
                strides,
                paddings,
                padding_algorithm,
@@ -72,8 +59,8 @@ void ConvKernel(const Context& dev_ctx,
                data_format,
                is_test,
                is_BFLOAT16,
-                fuse_activation,
+                "",
-                fuse_residual_conn,
+                false,
                force_fp32_output,
                out);
 }

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py
@@ -104,6 +104,7 @@ class TestConv2DBF16Op(TestConv2DOp):
        }
        if self.fuse_residual:
+            self.op_type = "fused_conv2d"
            self.inputs['ResidualData'] = OpTest.np_dtype_to_fluid_dtype(
                convert_float_to_uint16(self.input_residual)
            )

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py
@@ -158,6 +158,9 @@ class TestConv2DInt8Op(TestConv2DOp):
                input_residual
            )
+        if self.fuse_activation != "" or self.fuse_residual:
+            self.op_type = "fused_conv2d"
        self.attrs = {
            'strides': self.stride,
            'paddings': self.pad,
@@ -341,6 +344,7 @@ class TestWithInput1x1Filter1x1(TestConv2DInt8Op):
 def init_data_type_with_fusion(self, input_dt, fuse_activation, fuse_residual):
+    self.op_type = "fused_conv2d"
    self.srctype = input_dt
    self.dsttype = np.uint8 if fuse_activation == "relu" else np.int8

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_mkldnn_op.py
@@ -99,6 +99,13 @@ class TestConv2DMKLDNNOp(TestConv2DOp):
            output = np.minimum(np.maximum(output, 0), self.fuse_alpha).astype(
                self.dsttype
            )
+        if (
+            self.fuse_activation != ""
+            or self.fuse_bias
+            or self.fuse_residual_connection
+        ):
+            self.op_type = 'fused_conv2d'
        output = output.astype(self.dtype)
        self.attrs['fuse_bias'] = self.fuse_bias

--- a/python/paddle/static/quantization/quant_int8_mkldnn_pass.py
+++ b/python/paddle/static/quantization/quant_int8_mkldnn_pass.py
@@ -161,7 +161,7 @@ class QuantInt8MkldnnPass:
        }
        conv_op_node = graph.create_op_node(
-            op_type='conv2d',
+            op_type='fused_conv2d',
            attrs=attrs,
            inputs={'Input': input_var_node, 'Filter': weight_var_node},
            outputs={'Output': output_var_node},