未验证 提交 2deada9a 编写于 作者: Z zyfncg 提交者: GitHub

Delete extra input (Bias, ResidualData) in OpMaker of conv2d (#49121)

* remove extra input of conv2d

* fix bug

* fix unittest bug

* adjust conv2d.pbtxt

* fix cpu_quantize_pass_tester

* revert use_addto of conv2d

* fix runtime attribute

* fix bug

* recover force_fp32_output in conv2d

* refine error info

* fix bug
上级 1a4edcb0
...@@ -59,9 +59,6 @@ void SetOp(ProgramDesc* prog, ...@@ -59,9 +59,6 @@ void SetOp(ProgramDesc* prog,
op->SetAttr("fuse_residual_connection", false); op->SetAttr("fuse_residual_connection", false);
} }
op->SetOutput("Output", {outputs[0]}); op->SetOutput("Output", {outputs[0]});
op->SetAttr("Scale_in", 1.0f);
op->SetAttr("Scale_out", 1.0f);
op->SetAttr("Scale_weights", std::vector<float>{1.0f});
} else if (type == "pool2d" || type == "transpose2" || type == "reshape2" || } else if (type == "pool2d" || type == "transpose2" || type == "reshape2" ||
type == "nearest_interp" || type == "nearest_interp_v2") { type == "nearest_interp" || type == "nearest_interp_v2") {
op->SetInput("X", {inputs[0]}); op->SetInput("X", {inputs[0]});
......
...@@ -354,7 +354,9 @@ void CPUQuantizeSquashPass::OpDequantSquash(Graph* graph) const { ...@@ -354,7 +354,9 @@ void CPUQuantizeSquashPass::OpDequantSquash(Graph* graph) const {
FindOutputNameByVarName(any_op->Op(), dequant_in->Name()); FindOutputNameByVarName(any_op->Op(), dequant_in->Name());
if (output_name.empty()) return; if (output_name.empty()) return;
if (any_op->Op()->Type() == "conv2d") {
any_op->Op()->SetType("fused_conv2d");
}
any_op->Op()->SetAttr("force_fp32_output", true); any_op->Op()->SetAttr("force_fp32_output", true);
any_op->Op()->SetOutput(output_name, any_op->Op()->SetOutput(output_name,
std::vector<std::string>({dequant_out->Name()})); std::vector<std::string>({dequant_out->Name()}));
......
...@@ -411,6 +411,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph, ...@@ -411,6 +411,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
std::string input_name = ""; std::string input_name = "";
if (quantized_op_type == "conv2d" || if (quantized_op_type == "conv2d" ||
quantized_op_type == "depthwise_conv2d" || quantized_op_type == "depthwise_conv2d" ||
quantized_op_type == "fused_conv2d" ||
quantized_op_type == "conv2d_fusion" || quantized_op_type == "conv2d_fusion" ||
quantized_op_type == "conv2d_transpose") { quantized_op_type == "conv2d_transpose") {
weight_name = "Filter"; weight_name = "Filter";
...@@ -424,9 +425,10 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph, ...@@ -424,9 +425,10 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
input_name = "Input"; input_name = "Input";
} else { } else {
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"QuantDequantFuse: We only support conv2d, conv2d_fusion, " "QuantDequantFuse: We only support conv2d, conv2d_fusion, fused_conv2d,"
"conv2d_transpose, fc, mul, matmul, matmul_v2 for " "conv2d_transpose, fc, mul, matmul, matmul_v2 for now, but received: "
"now.")); "%s.",
quantized_op_type));
} }
const std::string pattern_name = "dequant_fuse"; const std::string pattern_name = "dequant_fuse";
GraphPatternDetector gpd; GraphPatternDetector gpd;
...@@ -559,6 +561,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph, ...@@ -559,6 +561,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
} }
} }
} else if (quantized_op_type == "conv2d" || } else if (quantized_op_type == "conv2d" ||
quantized_op_type == "fusd_conv2d" ||
quantized_op_type == "depthwise_conv2d") { quantized_op_type == "depthwise_conv2d") {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
dequant_type, dequant_type,
...@@ -642,6 +645,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph, ...@@ -642,6 +645,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
new_op_desc.SetType(quantized_op_type); new_op_desc.SetType(quantized_op_type);
new_op_desc.SetAttr("enable_int8", true); new_op_desc.SetAttr("enable_int8", true);
if (quantized_op_type == "conv2d" || quantized_op_type == "conv2d_fusion" || if (quantized_op_type == "conv2d" || quantized_op_type == "conv2d_fusion" ||
quantized_op_type == "fused_conv2d" ||
quantized_op_type == "depthwise_conv2d" || quantized_op_type == "depthwise_conv2d" ||
quantized_op_type == "conv2d_transpose") { quantized_op_type == "conv2d_transpose") {
new_op_desc.SetInput("Input", {new_input}); new_op_desc.SetInput("Input", {new_input});
...@@ -677,6 +681,7 @@ void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const { ...@@ -677,6 +681,7 @@ void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const {
"fake_quantize_range_abs_max", "fake_quantize_moving_average_abs_max"}; "fake_quantize_range_abs_max", "fake_quantize_moving_average_abs_max"};
std::unordered_set<std::string> quantized_op_types = { std::unordered_set<std::string> quantized_op_types = {
"conv2d", "conv2d",
"fused_conv2d",
"mul", "mul",
"matmul", "matmul",
"depthwise_conv2d", "depthwise_conv2d",
......
...@@ -671,6 +671,11 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) { ...@@ -671,6 +671,11 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
if (extra_attr_iter != extra_attr_map.end()) { if (extra_attr_iter != extra_attr_map.end()) {
is_runtime_attr = true; is_runtime_attr = true;
attrs_ptr = &(this->runtime_attrs_); attrs_ptr = &(this->runtime_attrs_);
// When an attribute is found in both attrs and runtime_attrs, it must
// be a runtime attribute, so it's value in attrs should be removed.
if (this->attrs_.find(name) != this->attrs_.end()) {
this->attrs_.erase(name);
}
} }
// NOTICE(minqiyang): pybind11 will take the empty list in python as // NOTICE(minqiyang): pybind11 will take the empty list in python as
// the std::vector<int> type in C++; so we have to change the attr's type // the std::vector<int> type in C++; so we have to change the attr's type
......
...@@ -6,12 +6,6 @@ def { ...@@ -6,12 +6,6 @@ def {
inputs { inputs {
name: "Filter" name: "Filter"
} }
inputs {
name: "Bias"
}
inputs {
name: "ResidualData"
}
outputs { outputs {
name: "Output" name: "Output"
} }
...@@ -69,54 +63,10 @@ extra { ...@@ -69,54 +63,10 @@ extra {
name: "skip_quant" name: "skip_quant"
type: BOOLEAN type: BOOLEAN
} }
attrs {
name: "fuse_relu_before_depthwise_conv"
type: BOOLEAN
}
attrs {
name: "fuse_relu"
type: BOOLEAN
}
attrs {
name: "fuse_activation"
type: STRING
}
attrs {
name: "fuse_alpha"
type: FLOAT
}
attrs {
name: "fuse_beta"
type: FLOAT
}
attrs { attrs {
name: "use_addto" name: "use_addto"
type: BOOLEAN type: BOOLEAN
} }
attrs {
name: "fuse_residual_connection"
type: BOOLEAN
}
attrs {
name: "Scale_in"
type: FLOAT
}
attrs {
name: "Scale_out"
type: FLOAT
}
attrs {
name: "Scale_in_eltwise"
type: FLOAT
}
attrs {
name: "Scale_weights"
type: FLOATS
}
attrs {
name: "force_fp32_output"
type: BOOLEAN
}
attrs { attrs {
name: "workspace_size_MB" name: "workspace_size_MB"
type: INT type: INT
......
...@@ -6,12 +6,6 @@ def { ...@@ -6,12 +6,6 @@ def {
inputs { inputs {
name: "Filter" name: "Filter"
} }
inputs {
name: "Bias"
}
inputs {
name: "ResidualData"
}
outputs { outputs {
name: "Output" name: "Output"
} }
...@@ -65,50 +59,10 @@ extra { ...@@ -65,50 +59,10 @@ extra {
name: "fuse_relu_before_depthwise_conv" name: "fuse_relu_before_depthwise_conv"
type: BOOLEAN type: BOOLEAN
} }
attrs {
name: "fuse_relu"
type: BOOLEAN
}
attrs {
name: "fuse_activation"
type: STRING
}
attrs {
name: "fuse_alpha"
type: FLOAT
}
attrs {
name: "fuse_beta"
type: FLOAT
}
attrs { attrs {
name: "use_addto" name: "use_addto"
type: BOOLEAN type: BOOLEAN
} }
attrs {
name: "fuse_residual_connection"
type: BOOLEAN
}
attrs {
name: "Scale_in"
type: FLOAT
}
attrs {
name: "Scale_out"
type: FLOAT
}
attrs {
name: "Scale_in_eltwise"
type: FLOAT
}
attrs {
name: "Scale_weights"
type: FLOATS
}
attrs {
name: "force_fp32_output"
type: BOOLEAN
}
attrs { attrs {
name: "workspace_size_MB" name: "workspace_size_MB"
type: INT type: INT
......
...@@ -250,18 +250,6 @@ void Conv2DOpMaker::Make() { ...@@ -250,18 +250,6 @@ void Conv2DOpMaker::Make() {
"H is the height of the filter, and W is the width of the filter. " "H is the height of the filter, and W is the width of the filter. "
"If the groups attribute is greater than 1, C equals the number of " "If the groups attribute is greater than 1, C equals the number of "
"input image channels divided by the groups."); "input image channels divided by the groups.");
AddInput("Bias",
"(Tensor) Bias to be added to each output of filter application."
"The format of output tensor is X (one-dimensional) of size equal"
"to the number of output channels. Only used with MKL-DNN.")
.AsDispensable()
.AsExtra();
AddInput("ResidualData",
"(Tensor) Tensor with residual data "
"to which convolution output will be added."
"Used with fuse_residual_connection fusion.")
.AsDispensable()
.AsExtra();
AddOutput("Output", AddOutput("Output",
"(Tensor) The output tensor of convolution operator. " "(Tensor) The output tensor of convolution operator. "
"It has same data fromat and data type as the Input."); "It has same data fromat and data type as the Input.");
......
...@@ -33,6 +33,16 @@ namespace operators { ...@@ -33,6 +33,16 @@ namespace operators {
class Conv2DFusionOpMaker : public Conv2DOpMaker { class Conv2DFusionOpMaker : public Conv2DOpMaker {
protected: protected:
void Apply() override { void Apply() override {
AddInput("Bias",
"(Tensor) Bias to be added to each output of filter application."
"The format of output tensor is X (one-dimensional) of size equal"
"to the number of output channels. Only used with MKL-DNN.")
.AsDispensable();
AddInput("ResidualData",
"(Tensor) Tensor with residual data "
"to which convolution output will be added."
"Used with fuse_residual_connection fusion.")
.AsDispensable();
AddAttr<std::string>( AddAttr<std::string>(
"activation", "activation",
"The activation type can be 'identity', 'sigmoid', 'relu', 'relu6' " "The activation type can be 'identity', 'sigmoid', 'relu', 'relu6' "
......
...@@ -23,6 +23,16 @@ namespace operators { ...@@ -23,6 +23,16 @@ namespace operators {
class FusedConvOpMaker : public Conv2DOpMaker { class FusedConvOpMaker : public Conv2DOpMaker {
protected: protected:
void Apply() override { void Apply() override {
AddInput("Bias",
"(Tensor) Bias to be added to each output of filter application."
"The format of output tensor is X (one-dimensional) of size equal"
"to the number of output channels. Only used with MKL-DNN.")
.AsDispensable();
AddInput("ResidualData",
"(Tensor) Tensor with residual data "
"to which convolution output will be added."
"Used with fuse_residual_connection fusion.")
.AsDispensable();
AddAttr<std::string>( AddAttr<std::string>(
"mkldnn_data_type", "mkldnn_data_type",
"(string, default \"float32\"). Data type of mkldnn kernel") "(string, default \"float32\"). Data type of mkldnn kernel")
......
...@@ -218,9 +218,7 @@ class ExtraInfoUtils { ...@@ -218,9 +218,7 @@ class ExtraInfoUtils {
// TODO(chenweihang): move these extra inputs into op_compat.yaml // TODO(chenweihang): move these extra inputs into op_compat.yaml
std::unordered_map<std::string, std::vector<std::string>> std::unordered_map<std::string, std::vector<std::string>>
g_extra_input_names_map_ = {{"conv2d", {"Bias", "ResidualData"}}, g_extra_input_names_map_ = {{"conv2d_transpose", {"Bias"}}};
{"conv2d_transpose", {"Bias"}},
{"conv2d_grad", {"Bias"}}};
std::vector<std::string> empty_extra_input_names_; std::vector<std::string> empty_extra_input_names_;
}; };
......
...@@ -242,11 +242,8 @@ ...@@ -242,11 +242,8 @@
- op : conv2d - op : conv2d
backward : conv2d_grad backward : conv2d_grad
extra : extra :
attrs : [bool is_test = false, bool use_cudnn = true, bool fuse_relu_before_depthwise_conv = false, bool use_mkldnn = false, attrs : [bool is_test = false, bool use_cudnn = true, bool use_mkldnn = false, bool use_addto = false,
bool use_quantizer = false, str mkldnn_data_type = "float32", bool fuse_relu = false, str mkldnn_data_type = "float32", bool force_fp32_output = false,
str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f, bool use_addto = false,
bool fuse_residual_connection = false, float Scale_in = 1.0f, float Scale_out = 1.0f,
float Scale_in_eltwise = 1.0f, 'float[] Scale_weights = {1.0f}', bool force_fp32_output = false,
int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false] int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false]
- op : conv2d_fusion - op : conv2d_fusion
...@@ -602,6 +599,11 @@ ...@@ -602,6 +599,11 @@
extra : extra :
attrs : [bool use_mkldnn = false] attrs : [bool use_mkldnn = false]
- op : fused_conv2d
extra :
attrs : [bool use_cudnn = false, float fuse_alpha = 0.0f, float fuse_beta = 0.0f, float Scale_in = 1.0f,
float Scale_out = 1.0f, float Scale_in_eltwise = 1.0f, 'float[] Scale_weights = {1.0f}']
- op : gather - op : gather
backward : gather_grad backward : gather_grad
extra : extra :
......
...@@ -41,29 +41,16 @@ void ConvKernel(const Context& dev_ctx, ...@@ -41,29 +41,16 @@ void ConvKernel(const Context& dev_ctx,
dev_ctx.GetDnnAttr("mkldnn_data_type")) == dev_ctx.GetDnnAttr("mkldnn_data_type")) ==
"bfloat16" "bfloat16"
: false; : false;
const auto* bias =
dev_ctx.HasDnnInput("Bias") ? dev_ctx.GetDnnInput("Bias") : nullptr;
const auto* residual_param = dev_ctx.HasDnnInput("ResidualData")
? dev_ctx.GetDnnInput("ResidualData")
: nullptr;
bool fuse_residual_conn =
dev_ctx.HasDnnAttr("fuse_residual_connection")
? PADDLE_GET_CONST(bool,
dev_ctx.GetDnnAttr("fuse_residual_connection"))
: false;
const std::string& fuse_activation =
dev_ctx.HasDnnAttr("fuse_activation")
? PADDLE_GET_CONST(std::string, dev_ctx.GetDnnAttr("fuse_activation"))
: "";
bool force_fp32_output = bool force_fp32_output =
dev_ctx.HasDnnAttr("force_fp32_output") dev_ctx.HasDnnAttr("force_fp32_output")
? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("force_fp32_output")) ? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("force_fp32_output"))
: false; : false;
ConvOnednn<T>(dev_ctx, ConvOnednn<T>(dev_ctx,
&input, &input,
&filter, &filter,
bias, nullptr,
residual_param, nullptr,
strides, strides,
paddings, paddings,
padding_algorithm, padding_algorithm,
...@@ -72,8 +59,8 @@ void ConvKernel(const Context& dev_ctx, ...@@ -72,8 +59,8 @@ void ConvKernel(const Context& dev_ctx,
data_format, data_format,
is_test, is_test,
is_BFLOAT16, is_BFLOAT16,
fuse_activation, "",
fuse_residual_conn, false,
force_fp32_output, force_fp32_output,
out); out);
} }
......
...@@ -104,6 +104,7 @@ class TestConv2DBF16Op(TestConv2DOp): ...@@ -104,6 +104,7 @@ class TestConv2DBF16Op(TestConv2DOp):
} }
if self.fuse_residual: if self.fuse_residual:
self.op_type = "fused_conv2d"
self.inputs['ResidualData'] = OpTest.np_dtype_to_fluid_dtype( self.inputs['ResidualData'] = OpTest.np_dtype_to_fluid_dtype(
convert_float_to_uint16(self.input_residual) convert_float_to_uint16(self.input_residual)
) )
......
...@@ -158,6 +158,9 @@ class TestConv2DInt8Op(TestConv2DOp): ...@@ -158,6 +158,9 @@ class TestConv2DInt8Op(TestConv2DOp):
input_residual input_residual
) )
if self.fuse_activation != "" or self.fuse_residual:
self.op_type = "fused_conv2d"
self.attrs = { self.attrs = {
'strides': self.stride, 'strides': self.stride,
'paddings': self.pad, 'paddings': self.pad,
...@@ -341,6 +344,7 @@ class TestWithInput1x1Filter1x1(TestConv2DInt8Op): ...@@ -341,6 +344,7 @@ class TestWithInput1x1Filter1x1(TestConv2DInt8Op):
def init_data_type_with_fusion(self, input_dt, fuse_activation, fuse_residual): def init_data_type_with_fusion(self, input_dt, fuse_activation, fuse_residual):
self.op_type = "fused_conv2d"
self.srctype = input_dt self.srctype = input_dt
self.dsttype = np.uint8 if fuse_activation == "relu" else np.int8 self.dsttype = np.uint8 if fuse_activation == "relu" else np.int8
......
...@@ -99,6 +99,13 @@ class TestConv2DMKLDNNOp(TestConv2DOp): ...@@ -99,6 +99,13 @@ class TestConv2DMKLDNNOp(TestConv2DOp):
output = np.minimum(np.maximum(output, 0), self.fuse_alpha).astype( output = np.minimum(np.maximum(output, 0), self.fuse_alpha).astype(
self.dsttype self.dsttype
) )
if (
self.fuse_activation != ""
or self.fuse_bias
or self.fuse_residual_connection
):
self.op_type = 'fused_conv2d'
output = output.astype(self.dtype) output = output.astype(self.dtype)
self.attrs['fuse_bias'] = self.fuse_bias self.attrs['fuse_bias'] = self.fuse_bias
......
...@@ -161,7 +161,7 @@ class QuantInt8MkldnnPass: ...@@ -161,7 +161,7 @@ class QuantInt8MkldnnPass:
} }
conv_op_node = graph.create_op_node( conv_op_node = graph.create_op_node(
op_type='conv2d', op_type='fused_conv2d',
attrs=attrs, attrs=attrs,
inputs={'Input': input_var_node, 'Filter': weight_var_node}, inputs={'Input': input_var_node, 'Filter': weight_var_node},
outputs={'Output': output_var_node}, outputs={'Output': output_var_node},
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册