From f3eccb3ff8e5bb4e4abbaeff8cc97908a07adf33 Mon Sep 17 00:00:00 2001 From: huangjiyi <43315610+huangjiyi@users.noreply.github.com> Date: Thu, 1 Jun 2023 10:32:07 +0800 Subject: [PATCH] Support static graph code generation for conv2d, conv3d, depthwise_conv2d (#54201) * update * update cmake * update * update * update * update * Revert "update cmake" This reverts commit 1e1dc1b2bc9967b725201272607f939260070fd4. * update * update * update * update --- paddle/fluid/framework/ir/CMakeLists.txt | 1 - paddle/fluid/operators/conv_op.cc | 683 ------------------ paddle/fluid/operators/conv_op.h | 234 ------ .../fluid/operators/fused/conv_fusion_op.cc | 77 +- .../fluid/operators/fused/fused_conv2d_op.cc | 66 +- .../fused/resnet_basic_block_op_xpu.cc | 1 - .../generator/get_expected_kernel_func.cc | 26 + .../generator/get_expected_kernel_func.h | 4 + paddle/phi/api/yaml/backward.yaml | 72 ++ paddle/phi/api/yaml/legacy_backward.yaml | 67 -- paddle/phi/api/yaml/legacy_ops.yaml | 29 - paddle/phi/api/yaml/op_compat.yaml | 26 +- paddle/phi/api/yaml/op_version.yaml | 27 + paddle/phi/api/yaml/ops.yaml | 27 + paddle/phi/infermeta/multiary.cc | 29 + paddle/phi/infermeta/multiary.h | 17 + .../fusion/onednn/fused_conv_kernel.cc | 31 +- paddle/phi/kernels/onednn/conv_grad_kernel.cc | 36 +- paddle/phi/kernels/onednn/conv_kernel.cc | 36 +- paddle/phi/ops/compat/conv2d_sig.cc | 43 -- paddle/phi/ops/compat/conv3d_sig.cc | 67 -- paddle/phi/ops/compat/depthwise_conv2d_sig.cc | 68 -- test/cpp/fluid/fused/CMakeLists.txt | 2 +- test/cpp/fluid/mkldnn/CMakeLists.txt | 1 - 24 files changed, 445 insertions(+), 1225 deletions(-) delete mode 100644 paddle/fluid/operators/conv_op.cc delete mode 100644 paddle/fluid/operators/conv_op.h delete mode 100644 paddle/phi/ops/compat/conv3d_sig.cc delete mode 100644 paddle/phi/ops/compat/depthwise_conv2d_sig.cc diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index d63af261935..f1583f5312f 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -456,7 +456,6 @@ if(WITH_MKLDNN) set(TEST_CONV_BN_PASS_DEPS conv_bn_fuse_pass graph_to_program_pass - conv_op conv_transpose_op batch_norm_op generated_op diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc deleted file mode 100644 index 9b388f74d6e..00000000000 --- a/paddle/fluid/operators/conv_op.cc +++ /dev/null @@ -1,683 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/conv_op.h" - -#include -#include -#include - -#include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" - -#ifdef PADDLE_WITH_MKLDNN -#include "paddle/fluid/platform/mkldnn_helper.h" -#endif -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/phi/infermeta/binary.h" - -namespace paddle { -namespace operators { - -std::vector ConvOp::ComputeOutputShape( - framework::InferShapeContext* ctx) const { - OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "Conv"); - OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "Conv"); - - auto in_dims = ctx->GetInputDim("Input"); - auto filter_dims = ctx->GetInputDim("Filter"); - - std::vector strides = ctx->Attrs().Get>("strides"); - std::vector paddings = ctx->Attrs().Get>("paddings"); - std::string padding_algorithm = - ctx->Attrs().Get("padding_algorithm"); - int groups = ctx->Attrs().Get("groups"); - std::vector dilations = ctx->Attrs().Get>("dilations"); - int dilation_size = dilations.size(); - for (int i = 0; i < dilation_size; ++i) { - PADDLE_ENFORCE_GT( - dilations[i], - 0, - platform::errors::InvalidArgument( - "The dilation of Op(Conv) should be larget than 0, but received " - "dilation is %d.", - dilations[i])); - } - const std::string data_format = ctx->Attrs().Get("data_format"); - - // MKL-DNN Kernels are using NCHW order of dims description - // so we ignore data_format consideration for MKL-DNN kernel - const bool channel_last = (ctx->IsRunMKLDNNKernel() == false) && - (data_format == "NHWC" || data_format == "NDHWC"); - - PADDLE_ENFORCE_EQ( - in_dims.size() == 4 || in_dims.size() == 5, - true, - platform::errors::InvalidArgument( - "The input of Op(Conv) should be a 4-D or 5-D Tensor. But " - "received: input's dimension is %u, input's shape is [%s].", - in_dims.size(), - in_dims)); - - PADDLE_ENFORCE_EQ( - in_dims.size(), - filter_dims.size(), - platform::errors::InvalidArgument( - "The input's dimension and filter's dimension of " - "Op(Conv) should be equal. But received: the input's shape is [%s], " - "the input's dimension is %d; the filter's shape is [%s], " - "the filter's dimension is %d.", - in_dims, - in_dims.size(), - filter_dims, - filter_dims.size())); - - int stride_size = strides.size(); - for (int i = 0; i < stride_size; ++i) { - PADDLE_ENFORCE_GT( - strides[i], - 0, - platform::errors::InvalidArgument( - "The stride of Op(Conv) should be larget than 0, but received " - "stride is %d.", - strides[i])); - } - - int in_sub_stride_size = in_dims.size() - stride_size; - PADDLE_ENFORCE_EQ( - in_dims.size(), - strides.size() + 2U, - platform::errors::InvalidArgument( - "The difference of input's dimension and Attr(strides)'s " - "length must be euqal to 2 for Op(Conv). " - "But received: input's dimension is %d, input's shape is [%s]; " - "Attr(stride)'s length is %d, Attr(stride) is [%s]; " - "difference of input's dimention and Attr(strides)'s length = %u.", - in_dims.size(), - in_dims, - strides.size(), - phi::make_ddim(strides), - in_sub_stride_size)); - - const auto input_channels = - channel_last ? in_dims[in_dims.size() - 1] : in_dims[1]; - - PADDLE_ENFORCE_EQ( - input_channels, - filter_dims[1] * groups, - platform::errors::InvalidArgument( - "The number of input's channels should be equal to filter's channels " - "* groups for Op(Conv). But received: the input's channels is %d, " - "the input's shape is [%s]; the filter's channels is %d, the " - "filter's shape is [%s]; the groups is %d, the data_format is %s. " - "The error may come from wrong data_format setting.", - input_channels, - in_dims, - filter_dims[1], - filter_dims, - groups, - data_format)); - PADDLE_ENFORCE_EQ( - filter_dims[0] % groups, - 0, - platform::errors::InvalidArgument( - "The number of output's channels (filter's first dimension) of " - "Op(Conv) should be divided by groups. But received: " - "the output channels is %d, the filter's shape is [%s], " - "the groups is %d.", - filter_dims[0], - filter_dims, - groups)); - - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_GT( - filter_dims[0], - 0, - platform::errors::InvalidArgument( - "the size of filter at axis 0 should be greater than 0")); - } - - framework::DDim in_data_dims; - if (channel_last) { - in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); - } else { - in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size()); - } - - framework::DDim filter_data_dims = - phi::slice_ddim(filter_dims, 2, filter_dims.size()); - - std::vector ksize = phi::vectorize(filter_data_dims); - UpdatePaddingAndDilation( - &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - - std::vector output_shape({in_dims[0]}); - if (!channel_last) { - output_shape.push_back(filter_dims[0]); - } - for (int i = 0; i < in_data_dims.size(); ++i) { - if ((!ctx->IsRuntime()) && - (in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) { - output_shape.push_back(-1); - } else { - output_shape.push_back(ConvOutputSize(in_data_dims[i], - filter_data_dims[i], - dilations[i], - paddings[2 * i], - paddings[2 * i + 1], - strides[i])); - } - } - if (channel_last) { - output_shape.push_back(filter_dims[0]); - } - - return output_shape; -} - -phi::KernelKey ConvOp::GetExpectedKernelType( - const framework::ExecutionContext& ctx) const { - auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); - // todo enable data layout when it's ready - // (https://github.com/PaddlePaddle/Paddle/pull/20042) - - if (input_data_type != framework::proto::VarType::INT8 && - input_data_type != framework::proto::VarType::UINT8 && - input_data_type != framework::proto::VarType::BF16) { - auto filter_data_type = framework::TransToProtoVarType( - ctx.Input("Filter")->dtype()); - PADDLE_ENFORCE_EQ( - input_data_type, - filter_data_type, - platform::errors::InvalidArgument( - "input and filter data type should be consistent, " - "but received input data type is %s and filter type " - "is %s", - paddle::framework::DataTypeToString(input_data_type), - paddle::framework::DataTypeToString(filter_data_type))); - } - - return phi::KernelKey(input_data_type, ctx.GetPlace()); -} - -phi::KernelKey ConvOp::GetKernelTypeForVar( - const std::string& var_name, - const phi::DenseTensor& tensor, - const phi::KernelKey& expected_kernel_type) const { -#ifdef PADDLE_WITH_MKLDNN - // Only input require reshaping, weights and - // bias are having shape in NCHW order - if ((var_name == "Input") && - (expected_kernel_type.layout() == phi::DataLayout::ONEDNN) && - (tensor.layout() != phi::DataLayout::ONEDNN)) { - auto attrs = Attrs(); - auto ar = paddle::framework::AttrReader(attrs); - const std::string data_format = ar.Get("data_format"); - auto dl = phi::StringToDataLayout(data_format); - // Some models may have intentionally set "AnyLayout" for conv - // op. Treat this as NCHW (default data_format value) - if (dl != phi::DataLayout::kAnyLayout) { - return phi::KernelKey(tensor.place(), dl, expected_kernel_type.dtype()); - } - } -#endif - return phi::KernelKey( - tensor.place(), tensor.layout(), expected_kernel_type.dtype()); -} - -void Conv2DOpMaker::Make() { - AddInput("Input", - "(Tensor) The input tensor of convolution operator. " - "The format of input tensor is NCHW or NHWC, where N is batch size, " - "C is the " - "number of channels, H is the height of the feature, " - "and W is the width of the feature."); - AddInput("Filter", - "(Tensor) The filter tensor of convolution operator. " - "The format of the filter tensor is MCHW, where M is the number of " - "output image channels, C is the number of input image channels, " - "H is the height of the filter, and W is the width of the filter. " - "If the groups attribute is greater than 1, C equals the number of " - "input image channels divided by the groups."); - AddOutput("Output", - "(Tensor) The output tensor of convolution operator. " - "It has same data fromat and data type as the Input."); - AddAttr>("strides", - "(vector default:{1, 1}), the " - "strides(h_stride, w_stride) of " - "convolution operator.") - .SetDefault({1, 1}); - AddAttr>("paddings", - "(vector default:{0, 0}), the " - "paddings(pad_height_top, pad_height_bottom, " - "pad_width_left, pad_wifth_right) of " - "convolution operator.") - .SetDefault({0, 0}); - AddAttr( - "padding_algorithm", - "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\"," - "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. " - "Set to \"SAME\" or \"VALID\" for algorithm of padding. ") - .SetDefault("EXPLICIT"); - AddAttr( - "groups", - "(int default:1), the groups number of the convolution operator. " - "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: " - "when group=2, the first half of the filters is only connected to the " - "first half of the input channels, while the second half of the filters " - "is only connected to the second half of the input channels.") - .SetDefault(1); - AddAttr>("dilations", - "(vector default:{1, 1}), the " - "dilations(h_dilation, w_dilation) of " - "convolution operator.") - .SetDefault({1, 1}); - AddAttr( - "data_format", - "(string, default NCHW) Only used in " - "An optional string from: \"NHWC\", \"NCHW\". " - "Defaults to \"NHWC\". Specify the data format of the output data, " - "the input will be transformed automatically. ") - .SetDefault("NCHW"); - // TODO(dzhwinter): need to registered layout transform function - AddComment(R"DOC( -Convolution Operator. - -The convolution operation calculates the output based on the input, filter -and strides, paddings, dilations, groups parameters. The size of each dimension of the -parameters is checked in the infer-shape. -Input(Input) and Output(Output) are in NCHW or NHWC format. Where N is batch -size, C is the number of channels, H is the height of the feature, and W is -the width of the feature. -Filters(Input) is MCHW format format. Where M is the number of output image channels, C is -the number of input image channels, H is the height of the filter, and W -is the width of the filter. -Parameters(strides, paddings, dilations) are two elements. These two elements represent -height and width, respectively. -The input(X) size and output(Out) size may be different. - -Example: - Input: - Input shape: $(N, C_{in}, H_{in}, W_{in})$ - Filter shape: $(C_{out}, C_{in}, H_f, W_f)$ - Output: - Output shape: $(N, C_{out}, H_{out}, W_{out})$ - Where -$$ - H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[0] * (H_f - 1) + 1))}{strides[0]}+ 1 \\ - W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[1] * (W_f - 1) + 1))}{strides[1]}+ 1 -$$ -)DOC"); - Apply(); -} - -class DepthwiseConv2DOpMaker : public Conv2DOpMaker { - protected: - void Apply() override { - AddAttr( - "use_cudnn", - "(bool, default false) Only used in cudnn kernel, need install cudnn") - .SetDefault(false) - .AsExtra(); - } -}; - -void Conv3DOpMaker::Make() { - AddInput( - "Input", - "(Tensor) The input tensor of convolution operator. " - "The format of input tensor is NCDHW or NDHWC. Where N is batch size, C " - "is the " - "number of channels, D is the depth of the feature, H is the height of " - "the feature, " - "and W is the width of the feature."); - AddInput("Filter", - "(Tensor) The filter tensor of convolution operator. " - "The format of the filter tensor is MCDHW, where M is the number of " - "output image channels, C is the number of input image channels, " - "D is the depth of the filter, H is the height of the filter, and W " - "is the width of the filter." - "If the groups attribute is greater than 1, C equals the number of " - "input image channels divided by the groups."); - AddOutput("Output", - "(Tensor) The output tensor of convolution operator." - "It has same data fromat and data type as the Input."); - AddAttr>("strides", - "(vector, default:{1, 1, 1}), the " - "strides(d_stride, h_stride, w_stride) of " - "convolution operator.") - .SetDefault({1, 1, 1}); - AddAttr>( - "paddings", - "(vector, default:{0, 0, 0}), the " - "paddings(pad_depth_front, pad_depth_back, pad_height_top, " - "pad_height_bottom, pad_width_left, pad_width_right) of convolution " - "operator.") - .SetDefault({0, 0, 0}); - AddAttr( - "padding_algorithm", - "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\"," - "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. " - "Set to \"SAME\" or \"VALID\" for algorithm of padding. ") - .SetDefault("EXPLICIT"); - AddAttr( - "groups", - "(int default:1), the groups number of the convolution operator. " - "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: " - "when group=2, the first half of the filters is only connected to the " - "first half of the input channels, while the second half of the filters " - "is only connected to the second half of the input channels.") - .SetDefault(1); - AddAttr>("dilations", - "(vector default:{1, 1, 1}), the " - "dilations(d_dilation, h_dilation, w_dilation) of " - "convolution operator.") - .SetDefault({1, 1, 1}); - AddAttr( - "data_format", - "(string, default NCDHW) Only used in " - "An optional string from: \"NDHWC\", \"NCDHW\". " - "Defaults to \"NDHWC\". Specify the data format of the output data, " - "the input will be transformed automatically. ") - .SetDefault("NCDHW"); - AddComment(R"DOC( -Convolution3D Operator. - -The convolution operation calculates the output based on the input, filter -and strides, paddings, dilations, groups parameters. The size of each dimension of the -parameters is checked in the infer-shape. -Input(Input) and output(Output) are in NCDHW or NDHWC format, where N is batch -size, C is the number of channels,D is the depth of the feature, H is the height of -the feature, and W is the width of the feature. -Filters(Input) is MCDHW format, where M is the number of output image channels, -C is the number of input image channels, D is the depth of the filter, -H is the height of the filter, and W is the width of the filter. -Parameters(strides, paddings, dilations) are three elements. These three elements -represent depth, height and width, respectively. -The input(X) size and output(Out) size may be different. - -Example: - Input: - Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$ - Filter shape: $(C_{out}, C_{in}, D_f, H_f, W_f)$ - Output: - Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$ - Where - $$ - D_{out}= \frac{(D_{in} + pad_depth_front + pad_depth_back - (dilations[0] * (D_f - 1) + 1))}{ strides[0]}+ 1 \\ - H_{out}= \frac{(H_{in} + pad_height_top + pad_height_bottom - (dilations[1] * (H_f - 1) + 1))}{ strides[1]}+ 1 \\ - W_{out}= \frac{(W_{in} + pad_width_left + pad_width_right - (dilations[2] * (W_f - 1) + 1))}{ strides[2]}+ 1 - $$ -)DOC"); - Apply(); -} - -void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const { - auto in_dims = ctx->GetInputDim("Input"); - auto filter_dims = ctx->GetInputDim("Filter"); - if (ctx->HasOutput(framework::GradVarName("Input"))) { - ctx->SetOutputDim(framework::GradVarName("Input"), in_dims); - } - if (ctx->HasOutput(framework::GradVarName("Filter"))) { - ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims); - } -} - -phi::KernelKey ConvOpGrad::GetExpectedKernelType( - const framework::ExecutionContext& ctx) const { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); - return phi::KernelKey(data_type, ctx.GetPlace()); -} - -phi::KernelKey ConvOpGrad::GetKernelTypeForVar( - const std::string& var_name, - const phi::DenseTensor& tensor, - const phi::KernelKey& expected_kernel_type) const { -#ifdef PADDLE_WITH_MKLDNN - // Only input require reshaping, weights and - // bias are having shape in NCHW order - if (((var_name == "Input") || - (var_name == framework::GradVarName("Output"))) && - (expected_kernel_type.layout() == phi::DataLayout::ONEDNN) && - (tensor.layout() != phi::DataLayout::ONEDNN)) { - auto attrs = Attrs(); - auto ar = paddle::framework::AttrReader(attrs); - const std::string data_format = ar.Get("data_format"); - auto dl = phi::StringToDataLayout(data_format); - // Some models may have intentionally set "AnyLayout" for pool - // op. Treat this as NCHW (default data_format value) - if (dl != phi::DataLayout::kAnyLayout) { - return phi::KernelKey(tensor.place(), dl, expected_kernel_type.dtype()); - } - } -#endif - return phi::KernelKey( - tensor.place(), tensor.layout(), expected_kernel_type.dtype()); -} - -template -class Conv2DGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - void Apply(GradOpPtr op) const override { - op->SetType(this->ForwardOpType() + "_grad"); - op->SetInput("Input", this->Input("Input")); - op->SetInput("Filter", this->Input("Filter")); - op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output")); - - op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input")); - op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter")); - - if (this->HasInput("Bias")) { - op->SetInput("Bias", this->Input("Bias")); - op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias")); - } - op->SetAttrMap(this->Attrs()); - } -}; - -template -class Conv3DGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - void Apply(GradOpPtr op) const override { - op->SetType(this->ForwardOpType() + "_grad"); - op->SetInput("Input", this->Input("Input")); - op->SetInput("Filter", this->Input("Filter")); - op->SetInput(framework::GradVarName("Output"), this->OutputGrad("Output")); - - op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input")); - op->SetOutput(framework::GradVarName("Filter"), this->InputGrad("Filter")); - - if (this->HasInput("ResidualData")) { - op->SetInput("ResidualData", this->Input("ResidualData")); - } - - op->SetAttrMap(this->Attrs()); - } -}; - -/* - * Inputs: I, W, dO, ddI, ddW - * Outputs: ddO, dW, dI - */ -template -class Conv2DDoubleGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - void Apply(GradOpPtr op) const override { - op->SetType(this->ForwardOpType() + "_grad"); - // I, W, dO, ddI, ddW - op->SetInput("Input", this->Input("Input")); - op->SetInput("Filter", this->Input("Filter")); - op->SetInput("DOutput", this->Input(framework::GradVarName("Output"))); - op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input"))); - op->SetInput("DDFilter", - this->OutputGrad(framework::GradVarName("Filter"))); - - // ddO, dI, dW - // Unlike grad op, double grad op does not use name@GRAD@GRAD - // as key of ops' inputs and outputs. - auto ddx = this->OutputGrad(framework::GradVarName("Input")); - auto ddw = this->OutputGrad(framework::GradVarName("Filter")); - - op->SetOutput("DDOutput", - ddx.empty() - ? this->EmptyInputGrad() - : this->InputGrad(framework::GradVarName("Output"))); - op->SetOutput( - "DFilter", - ddx.empty() ? this->EmptyInputGrad() : this->InputGrad("Filter")); - op->SetOutput( - "DInput", - ddw.empty() ? this->EmptyInputGrad() : this->InputGrad("Input")); - - op->SetAttrMap(this->Attrs()); - } -}; - -/* - * Inputs: I, W, dO, ddI, ddW - * Outputs: ddO, dW, dI - */ -template -class Conv3DDoubleGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - void Apply(GradOpPtr op) const override { - op->SetType(this->ForwardOpType() + "_grad"); - // I, W, dO, ddI, ddW - op->SetInput("Input", this->Input("Input")); - op->SetInput("Filter", this->Input("Filter")); - op->SetInput("DOutput", this->Input(framework::GradVarName("Output"))); - op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input"))); - op->SetInput("DDFilter", - this->OutputGrad(framework::GradVarName("Filter"))); - - auto ddx = this->OutputGrad(framework::GradVarName("Input")); - auto ddw = this->OutputGrad(framework::GradVarName("Filter")); - - op->SetOutput("DDOutput", - ddx.empty() - ? this->EmptyInputGrad() - : this->InputGrad(framework::GradVarName("Output"))); - op->SetOutput( - "DFilter", - ddx.empty() ? this->EmptyInputGrad() : this->InputGrad("Filter")); - op->SetOutput( - "DInput", - ddw.empty() ? this->EmptyInputGrad() : this->InputGrad("Input")); - - op->SetAttrMap(this->Attrs()); - } -}; - -void ConvOpDoubleGrad::InferShape(framework::InferShapeContext* ctx) const { - auto x_dims = ctx->GetInputDim("Input"); - auto w_dims = ctx->GetInputDim("Filter"); - auto do_dims = ctx->GetInputDim("DOutput"); - - if (ctx->HasOutput("DDOutput") && - (ctx->HasInput("DDInput") || (ctx->HasInput("DDFilter")))) { - ctx->SetOutputDim("DDOutput", do_dims); - } - if (ctx->HasOutput("DFilter") && ctx->HasInput("DDInput")) { - ctx->SetOutputDim("DFilter", w_dims); - } - if (ctx->HasOutput("DInput") && ctx->HasInput("DDFilter")) { - ctx->SetOutputDim("DInput", x_dims); - } -} - -phi::KernelKey ConvOpDoubleGrad::GetExpectedKernelType( - const framework::ExecutionContext& ctx) const { - auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); - return phi::KernelKey(data_type, ctx.GetPlace()); -} - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OPERATOR(conv2d, - ops::ConvOp, - ops::Conv2DOpMaker, - ops::ConvOpInferVarType, - ops::Conv2DGradMaker, - ops::Conv2DGradMaker); -REGISTER_OPERATOR(conv2d_grad, - ops::ConvOpGrad, - ops::Conv2DDoubleGradMaker, - ops::Conv2DDoubleGradMaker); -REGISTER_OPERATOR(conv2d_grad_grad, ops::ConvOpDoubleGrad); - -// depthwise convolution op -REGISTER_OPERATOR(depthwise_conv2d, - ops::ConvOp, - ops::DepthwiseConv2DOpMaker, - ops::ConvOpInferVarType, - ops::Conv2DGradMaker, - ops::Conv2DGradMaker); -REGISTER_OPERATOR(depthwise_conv2d_grad, - ops::ConvOpGrad, - ops::Conv2DDoubleGradMaker, - ops::Conv2DDoubleGradMaker); -REGISTER_OPERATOR(depthwise_conv2d_grad_grad, ops::ConvOpDoubleGrad); - -REGISTER_OPERATOR(conv3d, - ops::ConvOp, - ops::Conv3DOpMaker, - ops::ConvOpInferVarType, - ops::Conv3DGradMaker, - ops::Conv3DGradMaker); -REGISTER_OPERATOR(conv3d_grad, - ops::ConvOpGrad, - ops::Conv3DDoubleGradMaker, - ops::Conv3DDoubleGradMaker); -REGISTER_OPERATOR(conv3d_grad_grad, ops::ConvOpDoubleGrad); - -REGISTER_OP_VERSION(conv2d).AddCheckpoint( - R"ROC( - Upgrade conv2d, add a new attribute [use_addto]. - )ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "use_addto", - "In order to support new feature (inplace addto strategy) for " - "gradient accumulation.", - false)); - -REGISTER_OP_VERSION(depthwise_conv2d) - .AddCheckpoint( - R"ROC( - Upgrade depthwise_conv2d, add a new attribute [use_addto]. - )ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "use_addto", - "In order to support new feature (inplace addto strategy) for " - "gradient accumulation.", - false)); - -REGISTER_OP_VERSION(conv3d).AddCheckpoint( - R"ROC( - Upgrade conv3d, add a new attribute [use_addto]. - )ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "use_addto", - "In order to support new feature (inplace addto strategy) for " - "gradient accumulation.", - false)); diff --git a/paddle/fluid/operators/conv_op.h b/paddle/fluid/operators/conv_op.h deleted file mode 100644 index f22ba9079d0..00000000000 --- a/paddle/fluid/operators/conv_op.h +++ /dev/null @@ -1,234 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/layout_utils.h" -#include "paddle/phi/kernels/funcs/blas/blas.h" -#include "paddle/phi/kernels/funcs/im2col.h" -#include "paddle/phi/kernels/funcs/vol2col.h" - -namespace paddle { -namespace operators { - -// Base convolution operator definitions for other conv -// like operators to reuse the implementation. -inline int ConvOutputSize( - int input_size, int filter_size, int dilation, int padding, int stride) { - const int dkernel = dilation * (filter_size - 1) + 1; - int output_size = (input_size + 2 * padding - dkernel) / stride + 1; - PADDLE_ENFORCE_GT( - output_size, - 0, - platform::errors::InvalidArgument( - "The output's size is expected to be greater than 0. " - "But received: output's size is %d. The output's size is computed by " - "((input_size + 2 * padding - (dilation * (filter_size - 1) + 1)) / " - "stride + 1), where input_size is %d, padding is %d, " - "filter_size is %d, dilation is %d, stride is %d.", - output_size, - input_size, - padding, - filter_size, - dilation, - stride)); - - return output_size; -} - -inline int ConvOutputSize(int input_size, - int filter_size, - int dilation, - int padding_1, - int padding_2, - int stride) { - const int dkernel = dilation * (filter_size - 1) + 1; - int output_size = (input_size + padding_1 + padding_2 - dkernel) / stride + 1; - PADDLE_ENFORCE_GT( - output_size, - 0, - platform::errors::InvalidArgument( - "The output's size is expected to be greater than 0. " - "But received: output's size is %d. The output's size is computed by " - "((input_size + padding_1 + padding_2 - (dilation * (filter_size - " - "1) + 1)) / stride + 1), where input_size is %d, padding is " - "(%d, %d), filter_size is %d, dilation is %d, stride is %d.", - output_size, - input_size, - padding_1, - padding_2, - filter_size, - dilation, - stride)); - - return output_size; -} - -template -inline void UpdatePaddingAndDilation(std::vector* paddings, - std::vector* dilation, - const std::string padding_algorithm, - const framework::DDim data_dims, - const std::vector& strides, - const std::vector& ksize) { - // set padding size == data_dims.size() * 2 - auto data_shape = phi::vectorize(data_dims); - if (static_cast(paddings->size()) == data_dims.size()) { - for (int i = 0; i < data_dims.size(); ++i) { - T copy_pad = *(paddings->begin() + 2 * i); - paddings->insert(paddings->begin() + 2 * i + 1, copy_pad); - } - } else { - PADDLE_ENFORCE_EQ( - data_dims.size() * 2, - paddings->size(), - platform::errors::InvalidArgument( - "Attribute padding's size should be the same or twice as the " - "input's dimension. " - "But received: padding's size is %d, padding is [%s]; input's " - "dimension is %d, input's shape is [%s].", - paddings->size(), - phi::make_ddim(*paddings), - data_dims.size(), - data_dims)); - } - - // when padding_algorithm is "VALID" or "SAME" - if (padding_algorithm == "SAME") { - for (int i = 0; i < data_dims.size(); ++i) { - T out_size = (data_dims[i] + strides[i] - 1) / strides[i]; - T pad_sum = - std::max((out_size - 1) * strides[i] + ksize[i] - data_shape[i], - static_cast(0)); - T pad_0 = pad_sum / 2; - T pad_1 = pad_sum - pad_0; - *(paddings->begin() + i * 2) = pad_0; - *(paddings->begin() + i * 2 + 1) = pad_1; - - // dilation - *(dilation->begin() + i) = 1; - } - - } else if (padding_algorithm == "VALID") { - for (auto it = paddings->begin(); it != paddings->end(); it++) { - *it = 0; - } - } -} - -inline bool IsExpand(const std::vector& filter_dim, - const std::vector& strides, - const std::vector& paddings, - const std::vector& dilations) { - bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true; - for (size_t j = 0; j < strides.size(); ++j) { - filter_1 = filter_1 && (static_cast(filter_dim[j + 2]) == 1); - strides_1 = strides_1 && (strides[j] == 1); - padding_0 = padding_0 && (paddings[j] == 0); - dilation_1 = dilation_1 && (dilations[j] == 1); - } - if (paddings.size() != strides.size()) { - for (size_t j = 0; j < paddings.size(); ++j) { - padding_0 = padding_0 && (paddings[j] == 0); - } - } - return !(filter_1 && strides_1 && padding_0 && dilation_1); -} - -// Define Op classes in .h file so that other conv -// operator implementations can reuse the code. -class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() final; - - protected: - virtual void Apply() {} -}; - -class Conv3DOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() final; - - protected: - virtual void Apply() {} -}; - -class ConvOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { - protected: - std::unordered_map& GetInputOutputWithSameType() - const override { - static std::unordered_map m{ - {"Input", /*->*/ "Output"}}; - return m; - } -}; - -class ConvOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - std::vector output_shape = ComputeOutputShape(ctx); - - OP_INOUT_CHECK(ctx->HasOutput("Output"), "Output", "Output", "Conv"); - ctx->SetOutputDim("Output", phi::make_ddim(output_shape)); - ctx->ShareLoD("Input", "Output"); - } - - protected: - std::vector ComputeOutputShape( - framework::InferShapeContext* ctx) const; - - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override; - - phi::KernelKey GetKernelTypeForVar( - const std::string& var_name, - const phi::DenseTensor& tensor, - const phi::KernelKey& expected_kernel_type) const override; -}; - -class ConvOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override; - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override; - - phi::KernelKey GetKernelTypeForVar( - const std::string& var_name, - const phi::DenseTensor& tensor, - const phi::KernelKey& expected_kernel_type) const override; -}; - -class ConvOpDoubleGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override; - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override; -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cc b/paddle/fluid/operators/fused/conv_fusion_op.cc index e6c2cda275a..8f9cc4be699 100644 --- a/paddle/fluid/operators/fused/conv_fusion_op.cc +++ b/paddle/fluid/operators/fused/conv_fusion_op.cc @@ -15,13 +15,42 @@ limitations under the License. */ #include #include -#include "paddle/fluid/operators/conv_op.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/generator/get_expected_kernel_func.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" -#include "paddle/phi/core/ddim.h" +#include "paddle/phi/kernels/cpu/conv_util.h" namespace paddle { namespace operators { +inline int ConvOutputSize(int input_size, + int filter_size, + int dilation, + int padding_1, + int padding_2, + int stride) { + const int dkernel = dilation * (filter_size - 1) + 1; + int output_size = (input_size + padding_1 + padding_2 - dkernel) / stride + 1; + PADDLE_ENFORCE_GT( + output_size, + 0, + platform::errors::InvalidArgument( + "The output's size is expected to be greater than 0. " + "But received: output's size is %d. The output's size is computed by " + "((input_size + padding_1 + padding_2 - (dilation * (filter_size - " + "1) + 1)) / stride + 1), where input_size is %d, padding is " + "(%d, %d), filter_size is %d, dilation is %d, stride is %d.", + output_size, + input_size, + padding_1, + padding_2, + filter_size, + dilation, + stride)); + + return output_size; +} + // This fused conv follows the equation: // y = act ( alpha1 * conv(x) + alpha2 * z + bias ). // here, y is Output, @@ -30,9 +59,36 @@ namespace operators { // bias is Bias // When `split_channels` is set, y will be split into multiple outputs, // each output has split_channels[i] number of channels. -class Conv2DFusionOpMaker : public Conv2DOpMaker { +class Conv2DFusionOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Input", "(Tensor), input 0 of conv2d op."); + AddInput("Filter", "(Tensor), input 1 of conv2d op."); + AddOutput("Output", "(Tensor), output 0 of conv2d op."); + AddAttr>("strides", + "(std::vector), attribute 0 for conv2d op.") + .SetDefault({1, 1}); + AddAttr>("paddings", + "(std::vector), attribute 1 for conv2d op.") + .SetDefault({0, 0}); + AddAttr("padding_algorithm", + "(std::string), attribute 2 for conv2d op.") + .SetDefault("EXPLICIT"); + AddAttr>("dilations", + "(std::vector), attribute 3 for conv2d op.") + .SetDefault({1, 1}); + AddAttr("groups", "(int), attribute 4 for conv2d op.").SetDefault(1); + AddAttr("data_format", + "(std::string), attribute 5 for conv2d op.") + .SetDefault("NCHW"); + AddComment(R"DOC( +TODO: Documentation of conv2d op. +)DOC"); + Apply(); + } + protected: - void Apply() override { + void Apply() { AddInput("Bias", "(Tensor) Bias to be added to each output of filter application." "The format of output tensor is X (one-dimensional) of size equal" @@ -73,9 +129,9 @@ class Conv2DFusionOpMaker : public Conv2DOpMaker { } }; -class Conv2DFusionOp : public operators::ConvOp { +class Conv2DFusionOp : public framework::OperatorWithKernel { public: - using operators::ConvOp::ConvOp; + using framework::OperatorWithKernel::OperatorWithKernel; protected: void InferShape(framework::InferShapeContext* ctx) const override { @@ -275,7 +331,7 @@ class Conv2DFusionOp : public operators::ConvOp { } std::vector ksize = phi::vectorize(filter_data_dims); - UpdatePaddingAndDilation( + phi::UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); std::vector output_shape({in_dims[0]}); @@ -301,6 +357,11 @@ class Conv2DFusionOp : public operators::ConvOp { return output_shape; } + + phi::KernelKey GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return GetConvExpectedKernelType(ctx, this); + } }; // TODO(qingqing): add gradient operator for conv2d_fusion @@ -313,7 +374,6 @@ REGISTER_OPERATOR( conv2d_fusion, ops::Conv2DFusionOp, ops::Conv2DFusionOpMaker, - ops::ConvOpInferVarType, paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); @@ -323,6 +383,5 @@ REGISTER_OPERATOR( conv2d_fusion_cutlass, ops::Conv2DFusionOp, ops::Conv2DFusionOpMaker, - ops::ConvOpInferVarType, paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); diff --git a/paddle/fluid/operators/fused/fused_conv2d_op.cc b/paddle/fluid/operators/fused/fused_conv2d_op.cc index 322bc6944af..df89f63cb32 100644 --- a/paddle/fluid/operators/fused/fused_conv2d_op.cc +++ b/paddle/fluid/operators/fused/fused_conv2d_op.cc @@ -15,14 +15,44 @@ limitations under the License. */ #include #include -#include "paddle/fluid/operators/conv_op.h" +#include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/generator/get_expected_kernel_func.h" +#include "paddle/phi/infermeta/multiary.h" namespace paddle { namespace operators { -class FusedConvOpMaker : public Conv2DOpMaker { +class FusedConvOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Input", "(Tensor), input 0 of conv2d op."); + AddInput("Filter", "(Tensor), input 1 of conv2d op."); + AddOutput("Output", "(Tensor), output 0 of conv2d op."); + AddAttr>("strides", + "(std::vector), attribute 0 for conv2d op.") + .SetDefault({1, 1}); + AddAttr>("paddings", + "(std::vector), attribute 1 for conv2d op.") + .SetDefault({0, 0}); + AddAttr("padding_algorithm", + "(std::string), attribute 2 for conv2d op.") + .SetDefault("EXPLICIT"); + AddAttr>("dilations", + "(std::vector), attribute 3 for conv2d op.") + .SetDefault({1, 1}); + AddAttr("groups", "(int), attribute 4 for conv2d op.").SetDefault(1); + AddAttr("data_format", + "(std::string), attribute 5 for conv2d op.") + .SetDefault("NCHW"); + AddComment(R"DOC( +TODO: Documentation of conv2d op. +)DOC"); + Apply(); + } + protected: - void Apply() override { + void Apply() { AddInput("Bias", "(Tensor) Bias to be added to each output of filter application." "The format of output tensor is X (one-dimensional) of size equal" @@ -84,25 +114,43 @@ $$ } }; +class FusedConvOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + phi::KernelKey GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return GetConvExpectedKernelType(ctx, this); + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; +DECLARE_INFER_SHAPE_FUNCTOR(fused_conv2d, + FusedConv2DInferShapeFunctor, + PD_INFER_META(phi::FusedConvInferMeta)); +DECLARE_INFER_SHAPE_FUNCTOR(fused_conv3d, + FusedConv3DInferShapeFunctor, + PD_INFER_META(phi::FusedConvInferMeta)); + // fused_conv2d is only used for onednn inference. REGISTER_OPERATOR( fused_conv2d, - ops::ConvOp, + ops::FusedConvOp, ops::FusedConvOpMaker, - ops::ConvOpInferVarType, paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); + paddle::framework::EmptyGradOpMaker, + FusedConv2DInferShapeFunctor); // fused_conv3d is only used for onednn inference. REGISTER_OPERATOR( fused_conv3d, - ops::ConvOp, + ops::FusedConvOp, ops::FusedConvOpMaker, - ops::ConvOpInferVarType, paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); + paddle::framework::EmptyGradOpMaker, + FusedConv3DInferShapeFunctor); diff --git a/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc b/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc index 4d026f4b780..3855ea38544 100644 --- a/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc +++ b/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc @@ -14,7 +14,6 @@ #ifdef PADDLE_WITH_XPU #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h" diff --git a/paddle/fluid/operators/generator/get_expected_kernel_func.cc b/paddle/fluid/operators/generator/get_expected_kernel_func.cc index c9aeb7356d2..e434a8fac13 100644 --- a/paddle/fluid/operators/generator/get_expected_kernel_func.cc +++ b/paddle/fluid/operators/generator/get_expected_kernel_func.cc @@ -326,5 +326,31 @@ phi::KernelKey GetLayerNormExpectedKernelType( return phi::KernelKey(input_data_type, ctx.GetPlace()); } +phi::KernelKey GetConvExpectedKernelType( + const framework::ExecutionContext& ctx, + const framework::OperatorWithKernel* op_ptr) { + auto input_data_type = op_ptr->IndicateVarDataType(ctx, "Input"); + // todo enable data layout when it's ready + // (https://github.com/PaddlePaddle/Paddle/pull/20042) + + if (input_data_type != framework::proto::VarType::INT8 && + input_data_type != framework::proto::VarType::UINT8 && + input_data_type != framework::proto::VarType::BF16) { + auto filter_data_type = framework::TransToProtoVarType( + ctx.Input("Filter")->dtype()); + PADDLE_ENFORCE_EQ( + input_data_type, + filter_data_type, + platform::errors::InvalidArgument( + "input and filter data type should be consistent, " + "but received input data type is %s and filter type " + "is %s", + paddle::framework::DataTypeToString(input_data_type), + paddle::framework::DataTypeToString(filter_data_type))); + } + + return phi::KernelKey(input_data_type, ctx.GetPlace()); +} + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/generator/get_expected_kernel_func.h b/paddle/fluid/operators/generator/get_expected_kernel_func.h index 10f198b580f..81ce7da60c6 100644 --- a/paddle/fluid/operators/generator/get_expected_kernel_func.h +++ b/paddle/fluid/operators/generator/get_expected_kernel_func.h @@ -80,5 +80,9 @@ phi::KernelKey GetLayerNormExpectedKernelType( const framework::ExecutionContext& ctx, const framework::OperatorWithKernel* op_ptr); +phi::KernelKey GetConvExpectedKernelType( + const framework::ExecutionContext& ctx, + const framework::OperatorWithKernel* op_ptr); + } // namespace operators } // namespace paddle diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml index f9868df4913..306f7a77a05 100644 --- a/paddle/phi/api/yaml/backward.yaml +++ b/paddle/phi/api/yaml/backward.yaml @@ -324,6 +324,54 @@ output : Tensor(x_grad) invoke : conj(out_grad) +- backward_op : conv2d_grad + forward : conv2d (Tensor input, Tensor filter, int[] strides={1, 1}, int[] paddings={0, 0}, str padding_algorithm="EXPLICIT", int[] dilations={1, 1}, int groups=1, str data_format="NCHW") -> Tensor(out) + args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) + output : Tensor(input_grad), Tensor(filter_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [input, filter] + kernel : + func : conv2d_grad + data_type : input + backward : conv2d_grad_grad + +- backward_op : conv2d_grad_grad + forward : conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) -> Tensor(grad_input), Tensor(grad_filter) + args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) + output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralTernaryGradInferMeta + param: [input, filter, grad_out] + kernel : + func : conv2d_double_grad + data_type : input + optional : grad_input_grad, grad_filter_grad + +- backward_op : conv3d_double_grad + forward : conv3d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(grad_input), Tensor(grad_filter) + args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) + output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralTernaryGradInferMeta + param: [input, filter, grad_out] + kernel : + func : conv3d_double_grad + data_type : input + optional : grad_input_grad, grad_filter_grad + +- backward_op : conv3d_grad + forward : conv3d (Tensor input, Tensor filter, int[] strides={1, 1, 1}, int[] paddings={0, 0, 0}, str padding_algorithm="EXPLICIT", int groups=1, int[] dilations={1, 1, 1}, str data_format="NCDHW") -> Tensor(out) + args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) + output : Tensor(input_grad), Tensor(filter_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [input, filter] + kernel : + func : conv3d_grad + data_type : input + backward : conv3d_double_grad + - backward_op : cos_double_grad forward : cos_grad (Tensor x, Tensor grad_out) -> Tensor(grad_x) args : (Tensor x, Tensor grad_out, Tensor grad_x_grad) @@ -415,6 +463,30 @@ kernel : func : cumprod_grad +- backward_op : depthwise_conv2d_double_grad + forward : depthwise_conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(grad_input), Tensor(grad_filter) + args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) + output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralTernaryGradInferMeta + param: [input, filter, grad_out] + kernel : + func : depthwise_conv2d_double_grad + data_type : input + optional : grad_input_grad, grad_filter_grad + +- backward_op : depthwise_conv2d_grad + forward : depthwise_conv2d (Tensor input, Tensor filter, int[] strides={1, 1}, int[] paddings={0, 0}, str padding_algorithm="EXPLICIT", int groups=1, int[] dilations={1, 1}, str data_format="NCHW") -> Tensor(out) + args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) + output : Tensor(input_grad), Tensor(filter_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [input, filter] + kernel : + func : depthwise_conv2d_grad + data_type : input + backward : depthwise_conv2d_double_grad + - backward_op : det_grad forward : det (Tensor x) -> Tensor(out) args : (Tensor x, Tensor out, Tensor out_grad) diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 3e625667cf4..58ecf0604ad 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -137,28 +137,6 @@ no_need_buffer : x backward : concat_double_grad -- backward_op : conv2d_grad - forward : conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) -> Tensor(out) - args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) - output : Tensor(input_grad), Tensor(filter_grad) - infer_meta : - func : GeneralBinaryGradInferMeta - param : [input, filter] - kernel : - func : conv2d_grad - backward : conv2d_grad_grad - -- backward_op : conv2d_grad_grad - forward : conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) -> Tensor(grad_input), Tensor(grad_filter) - args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) - output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad) - infer_meta : - func : GeneralTernaryGradInferMeta - param: [input, filter, grad_out] - kernel : - func : conv2d_double_grad - optional : grad_input_grad, grad_filter_grad - - backward_op : conv2d_transpose_double_grad forward : conv2d_transpose_grad(Tensor x, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(grad_x), Tensor(grad_filter) args : (Tensor x, Tensor filter, Tensor grad_out, Tensor grad_x_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) @@ -178,28 +156,6 @@ func : conv2d_transpose_grad backward : conv2d_transpose_double_grad -- backward_op : conv3d_double_grad - forward : conv3d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(grad_input), Tensor(grad_filter) - args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) - output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad) - infer_meta : - func : GeneralTernaryGradInferMeta - param: [input, filter, grad_out] - kernel : - func : conv3d_double_grad - optional : grad_input_grad, grad_filter_grad - -- backward_op : conv3d_grad - forward : conv3d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out) - args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) - output : Tensor(input_grad), Tensor(filter_grad) - infer_meta : - func : GeneralBinaryGradInferMeta - param : [input, filter] - kernel : - func : conv3d_grad - backward : conv3d_double_grad - - backward_op : conv3d_transpose_grad forward : conv3d_transpose(Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, int[] output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out) args : (Tensor x, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, int[] output_padding, int[] output_size, str padding_algorithm, int groups, int[] dilations, str data_format) @@ -232,29 +188,6 @@ data_type : x optional : mask -- backward_op : depthwise_conv2d_double_grad - forward : depthwise_conv2d_grad (Tensor input, Tensor filter, Tensor grad_out, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(grad_input), Tensor(grad_filter) - args : (Tensor input, Tensor filter, Tensor grad_out, Tensor grad_input_grad, Tensor grad_filter_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) - output : Tensor(input_grad), Tensor(filter_grad), Tensor(grad_out_grad) - infer_meta : - func : GeneralTernaryGradInferMeta - param: [input, filter, grad_out] - kernel : - func : depthwise_conv2d_double_grad - optional : grad_input_grad, grad_filter_grad - -- backward_op : depthwise_conv2d_grad - forward : depthwise_conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out) - args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) - output : Tensor(input_grad), Tensor(filter_grad) - infer_meta : - func : GeneralBinaryGradInferMeta - param : [input, filter] - kernel : - func : depthwise_conv2d_grad - param : [input, filter, out_grad, strides, paddings, padding_algorithm, groups, dilations, data_format] - backward : depthwise_conv2d_double_grad - - backward_op : depthwise_conv2d_transpose_grad forward : depthwise_conv2d_transpose(Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) -> Tensor(out) args : (Tensor x, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index e4ab20dc5ee..93d516b7ccc 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -172,15 +172,6 @@ func : concat backward : concat_grad -- op : conv2d - args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int[] dilations, int groups, str data_format) - output : Tensor - infer_meta : - func : ConvInferMeta - kernel : - func : conv2d - backward : conv2d_grad - - op : conv2d_transpose args : (Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) output : Tensor(out) @@ -190,15 +181,6 @@ func : conv2d_transpose backward : conv2d_transpose_grad -- op : conv3d - args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) - output : Tensor - infer_meta : - func : Conv3DInferMeta - kernel : - func : conv3d - backward : conv3d_grad - - op : conv3d_transpose args : (Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, int[] output_size, str padding_algorithm, int groups, int[] dilations, str data_format) output : Tensor(out) @@ -244,17 +226,6 @@ optional : mask backward : deformable_conv_grad -- op : depthwise_conv2d - args : (Tensor x, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format) - output : Tensor(out) - infer_meta : - func : DepthwiseConvInferMeta - param : [x, filter, strides, paddings, padding_algorithm, groups, dilations, data_format] - kernel : - func : depthwise_conv2d - param : [x, filter, strides, paddings, padding_algorithm, groups, dilations, data_format] - backward : depthwise_conv2d_grad - - op : depthwise_conv2d_transpose args : (Tensor x, Tensor filter, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format) output : Tensor(out) diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index 8b6b6d60fdb..277cdf70c25 100755 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -479,11 +479,17 @@ out : Out - op : conv2d - backward : conv2d_grad + backward : conv2d_grad, conv2d_grad_grad + inputs : + {input : Input, filter : Filter} + outputs : + out : Output extra : attrs : [bool is_test = false, bool use_cudnn = true, bool use_mkldnn = false, bool use_addto = false, bool force_fp32_output = false, int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false] + get_expected_kernel_type : + conv2d : GetConvExpectedKernelType - op : conv2d_fusion extra : @@ -503,12 +509,18 @@ int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB()] - op : conv3d - backward : conv3d_grad + backward : conv3d_grad, conv3d_double_grad (conv3d_grad_grad) + inputs : + {input : Input, filter : Filter} + outputs : + out : Output extra : attrs : [bool is_test = false, bool use_cudnn = true, bool use_mkldnn = false, str mkldnn_data_type = "float32", bool fuse_relu = false, str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f, bool use_addto = false, bool fuse_residual_connection = false, bool force_fp32_output = false, int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false] + get_expected_kernel_type : + conv3d : GetConvExpectedKernelType - op : conv3d_transpose backward : conv3d_transpose_grad @@ -603,14 +615,20 @@ out : Output - op : depthwise_conv2d - backward : depthwise_conv2d_grad + backward : depthwise_conv2d_grad, depthwise_conv2d_double_grad (depthwise_conv2d_grad_grad) + inputs : + {input : Input, filter : Filter} + outputs : + out : Output extra : - attrs : [bool is_test = false, bool fuse_relu_before_depthwise_conv = false, bool use_mkldnn = false, + attrs : [bool is_test = false, bool use_cudnn = false, bool fuse_relu_before_depthwise_conv = false, bool use_mkldnn = false, bool use_quantizer = false, str mkldnn_data_type = "float32", bool fuse_relu = false, str fuse_activation = "", float fuse_alpha = 0.0f, float fuse_beta = 0.0f, bool use_addto = false, bool fuse_residual_connection = false, float Scale_in = 1.0f, float Scale_out = 1.0f, float Scale_in_eltwise = 1.0f, 'float[] Scale_weights = {1.0f}', bool force_fp32_output = false, int workspace_size_MB = phi::backends::gpu::GetDefaultConvWorkspaceSizeLimitMB(), bool exhaustive_search = false] + get_expected_kernel_type : + depthwise_conv2d : GetConvExpectedKernelType - op : depthwise_conv2d_transpose backward : depthwise_conv2d_transpose_grad diff --git a/paddle/phi/api/yaml/op_version.yaml b/paddle/phi/api/yaml/op_version.yaml index 5d6904ee589..c730bee0e1f 100644 --- a/paddle/phi/api/yaml/op_version.yaml +++ b/paddle/phi/api/yaml/op_version.yaml @@ -90,6 +90,33 @@ of each place to be compatible with before. default : -1 +- op : conv2d + version : + - checkpoint : Upgrade conv2d, add a new attribute [use_addto]. + action : + - add_attr : use_addto + comment : In order to support new feature (inplace addto strategy) for + gradient accumulation. + default : "false" + +- op : conv3d + version : + - checkpoint : Upgrade conv3d, add a new attribute [use_addto]. + action : + - add_attr : use_addto + comment : In order to support new feature (inplace addto strategy) for + gradient accumulation. + default : "false" + +- op : depthwise_conv2d + version : + - checkpoint : Upgrade depthwise_conv2d, add a new attribute [use_addto]. + action : + - add_attr : use_addto + comment : In order to support new feature (inplace addto strategy) for + gradient accumulation. + default : "false" + - op : embedding version : - checkpoint : Upgrade flip, add new attr [axis] and delete attr [dims] diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index a9c10037f80..966444785dd 100644 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -454,6 +454,24 @@ func : conj backward : conj_grad +- op : conv2d + args : (Tensor input, Tensor filter, int[] strides={1, 1}, int[] paddings={0, 0}, str padding_algorithm="EXPLICIT", int[] dilations={1, 1}, int groups=1, str data_format="NCHW") + output : Tensor + infer_meta : + func : ConvInferMeta + kernel : + func : conv2d + backward : conv2d_grad + +- op : conv3d + args : (Tensor input, Tensor filter, int[] strides={1, 1, 1}, int[] paddings={0, 0, 0}, str padding_algorithm="EXPLICIT", int groups=1, int[] dilations={1, 1, 1}, str data_format="NCDHW") + output : Tensor + infer_meta : + func : Conv3DInferMeta + kernel : + func : conv3d + backward : conv3d_grad + - op : cos args : (Tensor x) output : Tensor @@ -513,6 +531,15 @@ func : cumprod backward : cumprod_grad +- op : depthwise_conv2d + args : (Tensor input, Tensor filter, int[] strides={1, 1}, int[] paddings={0, 0}, str padding_algorithm="EXPLICIT", int groups=1, int[] dilations={1, 1}, str data_format="NCHW") + output : Tensor(out) + infer_meta : + func : DepthwiseConvInferMeta + kernel : + func : depthwise_conv2d + backward : depthwise_conv2d_grad + - op : det args : (Tensor x) output : Tensor diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index efe2e1c65bd..8b55e87aaa4 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -24,6 +24,7 @@ limitations under the License. */ #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/meta_tensor.h" #include "paddle/phi/core/utils/data_type.h" +#include "paddle/phi/infermeta/binary.h" #include "paddle/phi/kernels/funcs/common_shape.h" #include "paddle/phi/kernels/funcs/concat_funcs.h" @@ -3320,6 +3321,34 @@ void FusedAdamInferMeta( } } +void FusedConvInferMeta(const MetaTensor& input, + const MetaTensor& filter, + const MetaTensor& bias, + const MetaTensor& residual_param, + const std::vector& strides, + const std::vector& paddings, + const std::string& padding_algorithm, + const std::vector& dilations, + int groups, + const std::string& data_format, + const std::string& mkldnn_data_type, + const std::string& fuse_activation, + bool fuse_residual_conn, + bool force_fp32_output, + MetaTensor* out, + MetaConfig config) { + ConvInferMeta(input, + filter, + strides, + paddings, + padding_algorithm, + dilations, + groups, + data_format, + out, + config); +} + void MoeInferMeta(const MetaTensor& x, const MetaTensor& gate, const MetaTensor& bmm0, diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index 0393a56f3d5..50422a95978 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -619,6 +619,23 @@ void FusedAdamInferMeta( std::vector beta2_pows_out, std::vector master_params_out); +void FusedConvInferMeta(const MetaTensor& input, + const MetaTensor& filter, + const MetaTensor& bias, + const MetaTensor& residual_param, + const std::vector& strides, + const std::vector& paddings, + const std::string& padding_algorithm, + const std::vector& dilations, + int groups, + const std::string& data_format, + const std::string& mkldnn_data_type, + const std::string& fuse_activation, + bool fuse_residual_conn, + bool force_fp32_output, + MetaTensor* out, + MetaConfig config); + void MoeInferMeta(const MetaTensor& x, const MetaTensor& gate, const MetaTensor& bmm0, diff --git a/paddle/phi/kernels/fusion/onednn/fused_conv_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_conv_kernel.cc index 45bbd2b34fc..6cbf2c2c05f 100644 --- a/paddle/phi/kernels/fusion/onednn/fused_conv_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fused_conv_kernel.cc @@ -133,6 +133,29 @@ void FusedConv3DKernel(const Context& dev_ctx, out); } +KernelKey ConvGetKernelTypeForVar(const GetKernelTypeForVarContext* ctx) { + const std::string& var_name = ctx->GetVarName(); + const DenseTensor& tensor = ctx->GetTensor(); + const KernelKey& expected_kernel_type = ctx->GetKernelKey(); + const AttributeMap& attrs = ctx->GetAttrs(); + // Only input require reshaping, weights and + // bias are having shape in NCHW order + if ((var_name == "Input") && + (expected_kernel_type.layout() == phi::DataLayout::ONEDNN) && + (tensor.layout() != phi::DataLayout::ONEDNN)) { + auto it = attrs.find("data_format"); + const std::string data_format = PADDLE_GET_CONST(std::string, it->second); + auto dl = phi::StringToDataLayout(data_format); + // Some models may have intentionally set "AnyLayout" for conv + // op. Treat this as NCHW (default data_format value) + if (dl != phi::DataLayout::kAnyLayout) { + return phi::KernelKey(tensor.place(), dl, expected_kernel_type.dtype()); + } + } + return phi::KernelKey( + tensor.place(), tensor.layout(), expected_kernel_type.dtype()); +} + } // namespace fusion } // namespace phi @@ -143,7 +166,11 @@ PD_REGISTER_KERNEL(fused_conv2d, float, phi::dtype::bfloat16, uint8_t, - int8_t) {} + int8_t) { + kernel->get_kerneltype_forvar_fn_ = phi::fusion::ConvGetKernelTypeForVar; +} PD_REGISTER_KERNEL( - fused_conv3d, OneDNN, ONEDNN, phi::fusion::FusedConv3DKernel, float) {} + fused_conv3d, OneDNN, ONEDNN, phi::fusion::FusedConv3DKernel, float) { + kernel->get_kerneltype_forvar_fn_ = phi::fusion::ConvGetKernelTypeForVar; +} diff --git a/paddle/phi/kernels/onednn/conv_grad_kernel.cc b/paddle/phi/kernels/onednn/conv_grad_kernel.cc index 2fcb4d7e687..b18b8b5f154 100644 --- a/paddle/phi/kernels/onednn/conv_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/conv_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/conv_grad_kernel.h" +#include "paddle/phi/core/compat/get_kerneltype_forvar_utils.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/visit_type.h" #include "paddle/phi/kernels/funcs/data_layout_transform.h" @@ -235,6 +236,29 @@ void Conv3DGradKernel(const Context& dev_ctx, filter_grad); } +KernelKey ConvGradGetKernelTypeForVar(const GetKernelTypeForVarContext* ctx) { + const std::string& var_name = ctx->GetVarName(); + const DenseTensor& tensor = ctx->GetTensor(); + const KernelKey& expected_kernel_type = ctx->GetKernelKey(); + const AttributeMap& attrs = ctx->GetAttrs(); + // Only input require reshaping, weights and + // bias are having shape in NCHW order + if (((var_name == "Input") || (var_name == "Output@GRAD")) && + (expected_kernel_type.layout() == phi::DataLayout::ONEDNN) && + (tensor.layout() != phi::DataLayout::ONEDNN)) { + auto it = attrs.find("data_format"); + const std::string data_format = PADDLE_GET_CONST(std::string, it->second); + auto dl = phi::StringToDataLayout(data_format); + // Some models may have intentionally set "AnyLayout" for pool + // op. Treat this as NCHW (default data_format value) + if (dl != phi::DataLayout::kAnyLayout) { + return phi::KernelKey(tensor.place(), dl, expected_kernel_type.dtype()); + } + } + return phi::KernelKey( + tensor.place(), tensor.layout(), expected_kernel_type.dtype()); +} + } // namespace phi PD_REGISTER_KERNEL(conv2d_grad, @@ -242,13 +266,19 @@ PD_REGISTER_KERNEL(conv2d_grad, ONEDNN, phi::ConvGradKernel, float, - phi::dtype::bfloat16) {} + phi::dtype::bfloat16) { + kernel->get_kerneltype_forvar_fn_ = phi::ConvGradGetKernelTypeForVar; +} PD_REGISTER_KERNEL(depthwise_conv2d_grad, OneDNN, ONEDNN, phi::DepthwiseConvGradKernel, float, - phi::dtype::bfloat16) {} + phi::dtype::bfloat16) { + kernel->get_kerneltype_forvar_fn_ = phi::ConvGradGetKernelTypeForVar; +} -PD_REGISTER_KERNEL(conv3d_grad, OneDNN, ONEDNN, phi::Conv3DGradKernel, float) {} +PD_REGISTER_KERNEL(conv3d_grad, OneDNN, ONEDNN, phi::Conv3DGradKernel, float) { + kernel->get_kerneltype_forvar_fn_ = phi::ConvGradGetKernelTypeForVar; +} diff --git a/paddle/phi/kernels/onednn/conv_kernel.cc b/paddle/phi/kernels/onednn/conv_kernel.cc index c2ed2c10410..8039dab862c 100644 --- a/paddle/phi/kernels/onednn/conv_kernel.cc +++ b/paddle/phi/kernels/onednn/conv_kernel.cc @@ -14,6 +14,7 @@ #include "paddle/phi/kernels/conv_kernel.h" +#include "paddle/phi/core/compat/get_kerneltype_forvar_utils.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/visit_type.h" #include "paddle/phi/kernels/funcs/data_layout_transform.h" @@ -111,6 +112,29 @@ void Conv3DKernel(const Context& dev_ctx, out); } +KernelKey ConvGetKernelTypeForVar(const GetKernelTypeForVarContext* ctx) { + const std::string& var_name = ctx->GetVarName(); + const DenseTensor& tensor = ctx->GetTensor(); + const KernelKey& expected_kernel_type = ctx->GetKernelKey(); + const AttributeMap& attrs = ctx->GetAttrs(); + // Only input require reshaping, weights and + // bias are having shape in NCHW order + if ((var_name == "Input") && + (expected_kernel_type.layout() == phi::DataLayout::ONEDNN) && + (tensor.layout() != phi::DataLayout::ONEDNN)) { + auto it = attrs.find("data_format"); + const std::string data_format = PADDLE_GET_CONST(std::string, it->second); + auto dl = phi::StringToDataLayout(data_format); + // Some models may have intentionally set "AnyLayout" for conv + // op. Treat this as NCHW (default data_format value) + if (dl != phi::DataLayout::kAnyLayout) { + return phi::KernelKey(tensor.place(), dl, expected_kernel_type.dtype()); + } + } + return phi::KernelKey( + tensor.place(), tensor.layout(), expected_kernel_type.dtype()); +} + } // namespace phi PD_REGISTER_KERNEL(conv2d, @@ -120,7 +144,9 @@ PD_REGISTER_KERNEL(conv2d, float, phi::dtype::bfloat16, uint8_t, - int8_t) {} + int8_t) { + kernel->get_kerneltype_forvar_fn_ = phi::ConvGetKernelTypeForVar; +} PD_REGISTER_KERNEL(depthwise_conv2d, OneDNN, @@ -129,6 +155,10 @@ PD_REGISTER_KERNEL(depthwise_conv2d, float, phi::dtype::bfloat16, uint8_t, - int8_t) {} + int8_t) { + kernel->get_kerneltype_forvar_fn_ = phi::ConvGetKernelTypeForVar; +} -PD_REGISTER_KERNEL(conv3d, OneDNN, ONEDNN, phi::Conv3DKernel, float) {} +PD_REGISTER_KERNEL(conv3d, OneDNN, ONEDNN, phi::Conv3DKernel, float) { + kernel->get_kerneltype_forvar_fn_ = phi::ConvGetKernelTypeForVar; +} diff --git a/paddle/phi/ops/compat/conv2d_sig.cc b/paddle/phi/ops/compat/conv2d_sig.cc index 04b0e14ecbc..f5363b4adb5 100644 --- a/paddle/phi/ops/compat/conv2d_sig.cc +++ b/paddle/phi/ops/compat/conv2d_sig.cc @@ -16,45 +16,6 @@ namespace phi { -KernelSignature Conv2dOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("conv2d", - {"Input", "Filter"}, - {"strides", - "paddings", - "padding_algorithm", - "dilations", - "groups", - "data_format"}, - {"Output"}); -} - -KernelSignature Conv2dGradOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("conv2d_grad", - {"Input", "Filter", "Output@GRAD"}, - {"strides", - "paddings", - "padding_algorithm", - "dilations", - "groups", - "data_format"}, - {"Input@GRAD", "Filter@GRAD"}); -} - -KernelSignature Conv2dDoubleGradOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("conv2d_double_grad", - {"Input", "Filter", "DOutput", "DDInput", "DDFilter"}, - {"strides", - "paddings", - "padding_algorithm", - "dilations", - "groups", - "data_format"}, - {"DInput", "DFilter", "DDOutput"}); -} - KernelSignature Conv2dFusionArgumentMapping( const ArgumentMappingContext& ctx UNUSED) { return KernelSignature("conv2d_fusion_cutlass", @@ -71,9 +32,5 @@ KernelSignature Conv2dFusionArgumentMapping( } } // namespace phi -PD_REGISTER_ARG_MAPPING_FN(conv2d, phi::Conv2dOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(conv2d_fusion_cutlass, phi::Conv2dFusionArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(conv2d_grad, phi::Conv2dGradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(conv2d_grad_grad, - phi::Conv2dDoubleGradOpArgumentMapping); diff --git a/paddle/phi/ops/compat/conv3d_sig.cc b/paddle/phi/ops/compat/conv3d_sig.cc deleted file mode 100644 index 68797e0823d..00000000000 --- a/paddle/phi/ops/compat/conv3d_sig.cc +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature Conv3dOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("conv3d", - {"Input", "Filter"}, - { - "strides", - "paddings", - "padding_algorithm", - "groups", - "dilations", - "data_format", - }, - {"Output"}); -} - -KernelSignature Conv3dGradOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("conv3d_grad", - {"Input", "Filter", "Output@GRAD"}, - {"strides", - "paddings", - "padding_algorithm", - "groups", - "dilations", - "data_format"}, - {"Input@GRAD", "Filter@GRAD"}); -} - -KernelSignature Conv3dDoubleGradOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("conv3d_double_grad", - {"Input", "Filter", "DOutput", "DDInput", "DDFilter"}, - {"strides", - "paddings", - "padding_algorithm", - "groups", - "dilations", - "data_format"}, - {"DInput", "DFilter", "DDOutput"}); -} - -} // namespace phi - -PD_REGISTER_BASE_KERNEL_NAME(conv3d_grad_grad, conv3d_double_grad); - -PD_REGISTER_ARG_MAPPING_FN(conv3d, phi::Conv3dOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(conv3d_grad, phi::Conv3dGradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(conv3d_grad_grad, - phi::Conv3dDoubleGradOpArgumentMapping); diff --git a/paddle/phi/ops/compat/depthwise_conv2d_sig.cc b/paddle/phi/ops/compat/depthwise_conv2d_sig.cc deleted file mode 100644 index 175f9432158..00000000000 --- a/paddle/phi/ops/compat/depthwise_conv2d_sig.cc +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature DepthwiseConv2dOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("depthwise_conv2d", - {"Input", "Filter"}, - {"strides", - "paddings", - "padding_algorithm", - "groups", - "dilations", - "data_format"}, - {"Output"}); -} - -KernelSignature DepthwiseConv2dGradOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("depthwise_conv2d_grad", - {"Input", "Filter", "Output@GRAD"}, - {"strides", - "paddings", - "padding_algorithm", - "groups", - "dilations", - "data_format"}, - {"Input@GRAD", "Filter@GRAD"}); -} - -KernelSignature DepthwiseConv2dDoubleGradOpArgumentMapping( - const ArgumentMappingContext& ctx UNUSED) { - return KernelSignature("depthwise_conv2d_double_grad", - {"Input", "Filter", "DOutput", "DDInput", "DDFilter"}, - {"strides", - "paddings", - "padding_algorithm", - "groups", - "dilations", - "data_format"}, - {"DInput", "DFilter", "DDOutput"}); -} - -} // namespace phi - -PD_REGISTER_BASE_KERNEL_NAME(depthwise_conv2d_grad_grad, - depthwise_conv2d_double_grad); - -PD_REGISTER_ARG_MAPPING_FN(depthwise_conv2d, - phi::DepthwiseConv2dOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(depthwise_conv2d_grad, - phi::DepthwiseConv2dGradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(depthwise_conv2d_grad_grad, - phi::DepthwiseConv2dDoubleGradOpArgumentMapping); diff --git a/test/cpp/fluid/fused/CMakeLists.txt b/test/cpp/fluid/fused/CMakeLists.txt index 6529e13c90c..6411c7f2057 100644 --- a/test/cpp/fluid/fused/CMakeLists.txt +++ b/test/cpp/fluid/fused/CMakeLists.txt @@ -43,7 +43,7 @@ if(WITH_GPU OR WITH_ROCM) cc_test( test_cudnn_norm_conv SRCS cudnn_norm_conv_test.cc - DEPS conv_op + DEPS generated_op depthwise_conv tensor op_registry diff --git a/test/cpp/fluid/mkldnn/CMakeLists.txt b/test/cpp/fluid/mkldnn/CMakeLists.txt index 555a4ea27e4..bf56915f065 100644 --- a/test/cpp/fluid/mkldnn/CMakeLists.txt +++ b/test/cpp/fluid/mkldnn/CMakeLists.txt @@ -16,7 +16,6 @@ set(TEST_MKLDNN_CACHING_DEPS elementwise_mul_op elementwise_add_op activation_op - conv_op phi scope device_context -- GitLab