diff --git a/paddle/fluid/framework/ir/xpu/conv2d_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/conv2d_xpu_fuse_pass.cc index f124c3cc44adf00ef781622bec35a21fb427a96c..0b591120014e354f407de5b2f991d524c482d8b5 100644 --- a/paddle/fluid/framework/ir/xpu/conv2d_xpu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/conv2d_xpu_fuse_pass.cc @@ -99,13 +99,15 @@ Conv2dXPUPattern::Conv2dXPUPattern(PDPattern* pattern, auto conv = pattern->NewNode(conv_repr())->assert_is_op(conv_type_); auto input = pattern->NewNode(input_repr()) ->assert_is_op_input(conv_type_, "Input") - ->AsInput(); + ->AsInput() + ->assert_more([](Node* node) { + return node->Var()->GetShape().size() == 4; + }); auto conv_filter = pattern->NewNode(conv_filter_repr()) ->assert_is_op_input(conv_type_, "Filter") ->AsInput(); auto conv_out = pattern->NewNode(conv_out_repr()) - ->assert_is_op_output(conv_type_, "Output") - ->assert_var_not_persistable(); + ->assert_is_op_output(conv_type_, "Output"); conv->LinksFrom({input, conv_filter}).LinksTo({conv_out}); // ew_bias_add op PDNode* ew_bias_add = nullptr; @@ -116,11 +118,17 @@ Conv2dXPUPattern::Conv2dXPUPattern(PDPattern* pattern, ew_bias_add_y = pattern->NewNode(ew_bias_add_y_repr()) ->assert_is_op_input("elementwise_add", "Y") ->assert_is_persistable_var() - ->assert_has_n_outputs(1); + ->assert_has_n_outputs(1) + ->assert_more([](Node* node) { + return node->Var()->GetShape().size() == 1; + }); ew_bias_add = pattern->NewNode(ew_bias_add_repr())->assert_is_op("elementwise_add"); ew_bias_add_out = pattern->NewNode(ew_bias_add_out_repr()) ->assert_is_op_output("elementwise_add", "Out"); + if (with_bn_ || with_branch_ || !act_type_.empty()) { + ew_bias_add_out->assert_has_n_outputs(1); + } ew_bias_add->LinksFrom({conv_out, ew_bias_add_y}) .LinksTo({ew_bias_add_out}); } else { @@ -159,6 +167,9 @@ Conv2dXPUPattern::Conv2dXPUPattern(PDPattern* pattern, bn = pattern->NewNode(bn_repr())->assert_is_op("batch_norm"); bn_out = pattern->NewNode(bn_out_repr())->assert_is_op_output("batch_norm", "Y"); + if (with_branch_ || !act_type_.empty()) { + bn_out->assert_has_n_outputs(1); + } bn_mean_out = pattern->NewNode(bn_mean_out_repr()) ->assert_is_op_output("batch_norm", "MeanOut"); bn_saved_mean = pattern->NewNode(bn_saved_mean_repr()) @@ -179,23 +190,27 @@ Conv2dXPUPattern::Conv2dXPUPattern(PDPattern* pattern, bn_out->assert_is_op_input("elementwise_add", "Y")->AsIntermediate(); ew_branch_add_in = pattern->NewNode(ew_branch_add_in_repr()) ->assert_is_op_input("elementwise_add", "X") - ->AsInput() - ->assert_more([](Node* node) { - return node->Var()->GetShape().size() == 4; - }); + ->AsInput(); } else if (with_branch_y_) { bn_out->assert_is_op_input("elementwise_add", "X")->AsIntermediate(); ew_branch_add_in = pattern->NewNode(ew_branch_add_in_repr()) ->assert_is_op_input("elementwise_add", "Y") - ->AsInput() - ->assert_more([](Node* node) { - return node->Var()->GetShape().size() == 4; - }); + ->AsInput(); } - ew_branch_add = - pattern->NewNode(ew_branch_add_repr())->assert_is_op("elementwise_add"); + ew_branch_add = pattern->NewNode(ew_branch_add_repr()) + ->assert_is_op("elementwise_add") + ->assert_more([](Node* node) { + if (node->inputs.size() != 2) { + return false; + } + return node->inputs[0]->Var()->GetShape() == + node->inputs[1]->Var()->GetShape(); + }); ew_branch_add_out = pattern->NewNode(ew_branch_add_out_repr()) ->assert_is_op_output("elementwise_add", "Out"); + if (!act_type_.empty()) { + ew_branch_add_out->assert_has_n_outputs(1); + } ew_branch_add->LinksFrom({bn_out, ew_branch_add_in}) .LinksTo({ew_branch_add_out}); } else { @@ -401,6 +416,7 @@ int Conv2dXPUFusePass::ApplyImpl(ir::Graph* graph, scope->FindVar(conv_filter->Name())->GetMutable(); auto filter_dims = filter_t->dims(); bool has_bias = with_bn || with_conv_bias; + bool has_branch = with_branch_x || with_branch_y; // Create conv_fusion_bias (conv bias) variable Node* fusion_bias_node = nullptr; if (has_bias) { @@ -501,18 +517,17 @@ int Conv2dXPUFusePass::ApplyImpl(ir::Graph* graph, framework::OpDesc conv2d_xpu_op_desc(block); // set input&output var conv2d_xpu_op_desc.SetType("conv2d_xpu"); - conv2d_xpu_op_desc.SetInput("input", {input->Name()}); + conv2d_xpu_op_desc.SetInput("x", {input->Name()}); conv2d_xpu_op_desc.SetInput("filter", {filter_int16->Name()}); conv2d_xpu_op_desc.SetInput("filter_max", {filter_max->Name()}); - conv2d_xpu_op_desc.SetOutput("output", {conv2d_xpu_out_name}); - conv2d_xpu_op_desc.SetOutput("output_max", {conv_out_max_name}); + conv2d_xpu_op_desc.SetOutput("out", {conv2d_xpu_out_name}); + conv2d_xpu_op_desc.SetOutput("out_max", {conv_out_max_name}); // set fusion_bias input node if (has_bias) { conv2d_xpu_op_desc.SetInput("bias", {fusion_bias_node->Name()}); - conv2d_xpu_op_desc.SetAttr("has_bias", has_bias); } // set ew_branch_add input node - if (ew_branch_add_in != nullptr) { + if (ew_branch_add != nullptr) { conv2d_xpu_op_desc.SetInput("branch", {ew_branch_add_in->Name()}); } // set attrs of conv2d_xpu @@ -566,7 +581,8 @@ int Conv2dXPUFusePass::ApplyImpl(ir::Graph* graph, conv2d_xpu_op_desc.SetAttr("place_z", std::vector{10}); conv2d_xpu_op_desc.SetAttr("paddings", conv_paddings); conv2d_xpu_op_desc.SetAttr("block_lod", std::vector{1}); - conv2d_xpu_op_desc.SetAttr("has_branch", with_branch_x || with_branch_y); + conv2d_xpu_op_desc.SetAttr("has_branch", has_branch); + conv2d_xpu_op_desc.SetAttr("has_bias", has_bias); auto* conv2d_xpu = graph->CreateOpNode(&conv2d_xpu_op_desc); IR_NODE_LINK_TO(input, conv2d_xpu); diff --git a/paddle/phi/api/yaml/fused_ops.yaml b/paddle/phi/api/yaml/fused_ops.yaml index c9fae2a81e3b74056e0cbbfad326ddda7107ac36..b43d02fced54ce6ba7a41d4fa9ff59089f2272b4 100644 --- a/paddle/phi/api/yaml/fused_ops.yaml +++ b/paddle/phi/api/yaml/fused_ops.yaml @@ -5,14 +5,14 @@ # otherwise the operator only could be used in static mode. - op : conv2d_xpu - args : (Tensor input, Tensor input_max, Tensor filter, Tensor filter_max, Tensor bias, Tensor branch, int[] paddings, int[] dilations, int[] strides, str padding_algorithm, int groups, bool has_bias, bool has_branch, int act_type, float act_param) - output : Tensor(output), Tensor(output_max) + args : (Tensor x, Tensor x_max, Tensor filter, Tensor filter_max, Tensor bias, Tensor branch, int[] paddings, int[] dilations, int[] strides, str padding_algorithm, int groups, bool has_bias, bool has_branch, int act_type, float act_param) + output : Tensor(out), Tensor(out_max) infer_meta : func : Conv2dXPUInferMeta kernel : func : conv2d_xpu - data_type : input - optional : bias, branch, input_max + data_type : x + optional : bias, branch, x_max - op : embedding_with_eltwise_add_xpu args : (Tensor[] ids, Tensor[] tables, int64_t padding_idx) diff --git a/paddle/phi/infermeta/fusion.cc b/paddle/phi/infermeta/fusion.cc index ad8409487bb5891f5b117a83c9af5063e9cac7ec..5c0aa3b8e89fdbfbed834e00bad405146304828a 100644 --- a/paddle/phi/infermeta/fusion.cc +++ b/paddle/phi/infermeta/fusion.cc @@ -35,8 +35,8 @@ inline int ConvOutSize(int input_size, return output_size; } -void Conv2dXPUInferMeta(const MetaTensor& input, - const MetaTensor& input_max, +void Conv2dXPUInferMeta(const MetaTensor& x, + const MetaTensor& x_max, const MetaTensor& filter, const MetaTensor& filter_max, const MetaTensor& bias, @@ -50,9 +50,9 @@ void Conv2dXPUInferMeta(const MetaTensor& input, bool has_branch, int act_type, float act_param, - MetaTensor* output, - MetaTensor* output_max) { - auto in_dims = input.dims(); + MetaTensor* out, + MetaTensor* out_max) { + auto in_dims = x.dims(); auto filter_dims = filter.dims(); // do some checks PADDLE_ENFORCE_EQ( @@ -157,8 +157,8 @@ void Conv2dXPUInferMeta(const MetaTensor& input, strides[i])); } // set output and output max dims - output->set_dims(DDim(out_shape.data(), out_shape.size())); - output_max->set_dims(phi::make_ddim({4})); + out->set_dims(DDim(out_shape.data(), out_shape.size())); + out_max->set_dims(phi::make_ddim({4})); } void EmbeddingWithEltwiseAddXPUInferMeta( diff --git a/paddle/phi/infermeta/fusion.h b/paddle/phi/infermeta/fusion.h index 9dcf7342ae1936451cf080c70a8a4862ae73e8e0..3105ea8a6d578132740784b4c9c14967fb2e6526 100644 --- a/paddle/phi/infermeta/fusion.h +++ b/paddle/phi/infermeta/fusion.h @@ -22,8 +22,8 @@ namespace phi { // Common InferMeta Functions for fusion operators. // NOTE: The InferMeta Functions in this file are arranged in alphabetic order. -void Conv2dXPUInferMeta(const MetaTensor& input, - const MetaTensor& input_max, +void Conv2dXPUInferMeta(const MetaTensor& x, + const MetaTensor& x_max, const MetaTensor& filter, const MetaTensor& filter_max, const MetaTensor& bias, @@ -37,8 +37,8 @@ void Conv2dXPUInferMeta(const MetaTensor& input, bool has_branch, int act_type, float act_param, - MetaTensor* output, - MetaTensor* output_max); + MetaTensor* out, + MetaTensor* out_max); void EmbeddingWithEltwiseAddXPUInferMeta( const std::vector& ids, diff --git a/paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc index 9da39097e0f8d78fbfbada2c1ad8567eeb554599..0f7d8902de3284e69c1330236bd510a01282315c 100644 --- a/paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc @@ -21,8 +21,8 @@ namespace fusion { template void Conv2dXPUKernel(const Context& ctx, - const DenseTensor& input, - const paddle::optional& input_max, + const DenseTensor& x, + const paddle::optional& x_max, const DenseTensor& filter, const DenseTensor& filter_max, const paddle::optional& bias, @@ -36,10 +36,10 @@ void Conv2dXPUKernel(const Context& ctx, bool has_branch, int act_type, float act_param, - DenseTensor* output, - DenseTensor* output_max) { + DenseTensor* out, + DenseTensor* out_max) { using XPUType = typename XPUTypeTrait::Type; - auto input_dims = input.dims(); + auto input_dims = x.dims(); auto filter_dims = filter.dims(); // update paddings and dilations accoring to padding_algorithm std::vector paddings_vec = paddings; @@ -62,17 +62,16 @@ void Conv2dXPUKernel(const Context& ctx, int win_h = static_cast(filter_dims[2]); int win_w = static_cast(filter_dims[3]); - auto* input_data = reinterpret_cast(input.data()); - const float* input_max_data = input_max.get_ptr() == nullptr - ? nullptr - : input_max.get_ptr()->data(); + auto* input_data = reinterpret_cast(x.data()); + const float* input_max_data = + x_max.get_ptr() == nullptr ? nullptr : x_max.get_ptr()->data(); auto* branch_data = branch.get_ptr() == nullptr ? nullptr : reinterpret_cast(branch.get_ptr()->data()); const float* bias_data = bias.get_ptr() == nullptr ? nullptr : bias.get_ptr()->data(); - auto* out_data = reinterpret_cast(ctx.template Alloc(output)); + auto* out_data = reinterpret_cast(ctx.template Alloc(out)); xpu::Activation_t act(static_cast(act_type)); if (act_type == xpu::Activation_t::LEAKY_RELU) { @@ -98,13 +97,13 @@ void Conv2dXPUKernel(const Context& ctx, /* int64_t groups */ groups, /* const float* in_maxptr */ input_max_data, /* const float* filter_maxptr */ filter_max.data(), - /* float* out_maxptr */ ctx.template Alloc(output_max), + /* float* out_maxptr */ ctx.template Alloc(out_max), /* bool is_nchw */ true, /* const float* bias */ bias_data, /* const TY* branch */ branch_data, /* const baidu::xpu::api::Activation_t& act */ act, - /* const float* branch_maxptr */ nullptr); - // /* const float* scale */ nullptr); + /* const float* branch_maxptr */ nullptr, + /* const float* scale */ nullptr); PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d_xpu"); }