未验证 提交 1eb96eec 编写于 作者: F From00 提交者: GitHub

Move conv-transpose OPs to phi (#40675)

* Move conv-transpose OPs to phi

* Fix CI errors

* Fix CI errors
上级 b77e20ac
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
USE_OP_ITSELF(batch_norm); USE_OP_ITSELF(batch_norm);
USE_OP_DEVICE_KERNEL(batch_norm, MKLDNN); USE_OP_DEVICE_KERNEL(batch_norm, MKLDNN);
USE_OP(conv2d_transpose); USE_OP_ITSELF(conv2d_transpose);
USE_OP_DEVICE_KERNEL(conv2d_transpose, MKLDNN); USE_OP_DEVICE_KERNEL(conv2d_transpose, MKLDNN);
USE_OP_ITSELF(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN);
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
USE_OP_ITSELF(conv2d); USE_OP_ITSELF(conv2d);
USE_OP(conv2d_transpose); USE_OP_ITSELF(conv2d_transpose);
namespace paddle { namespace paddle {
namespace inference { namespace inference {
......
...@@ -13,13 +13,17 @@ See the License for the specific language governing permissions and ...@@ -13,13 +13,17 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/conv_transpose_op.h" #include "paddle/fluid/operators/conv_transpose_op.h"
#include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/cudnn_workspace_helper.h" #include "paddle/fluid/platform/cudnn_workspace_helper.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/backward.h"
#include "paddle/phi/infermeta/binary.h"
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#endif #endif
...@@ -29,165 +33,6 @@ namespace operators { ...@@ -29,165 +33,6 @@ namespace operators {
using DataLayout = framework::DataLayout; using DataLayout = framework::DataLayout;
void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "ConvTranspose");
OP_INOUT_CHECK(ctx->HasInput("Filter"), "Input", "Filter", "ConvTranspose");
OP_INOUT_CHECK(ctx->HasOutput("Output"), "Output", "Output", "ConvTranspose");
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
std::vector<int> output_size =
ctx->Attrs().Get<std::vector<int>>("output_size");
std::vector<int> output_padding =
ctx->Attrs().Get<std::vector<int>>("output_padding");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
int groups = ctx->Attrs().Get<int>("groups");
std::string padding_algorithm =
ctx->Attrs().Get<std::string>("padding_algorithm");
const std::string data_layout_str =
ctx->Attrs().Get<std::string>("data_format");
const DataLayout data_layout =
ctx->IsRunMKLDNNKernel() ? DataLayout::kNCHW
: framework::StringToDataLayout(data_layout_str);
PADDLE_ENFORCE_EQ(in_dims.size() == 4 || in_dims.size() == 5, true,
platform::errors::InvalidArgument(
"Input of Op(conv_transpose) should be 4-D or "
"5-D Tensor. But received: %u-D Tensor, "
"the shape of input is [%s]",
in_dims.size(), in_dims));
PADDLE_ENFORCE_EQ(
in_dims.size(), filter_dims.size(),
platform::errors::InvalidArgument(
"The input's dimension size and filter's dimension size of "
"Op (conv_transpose) should be equal. But received: the shape of "
"input is [%s], the dimension size of input is [%d], the shape "
"of filter is [%s], the dimension size of filter is [%d]. ",
in_dims, in_dims.size(), filter_dims, filter_dims.size()));
int stride_size = strides.size();
for (int i = 0; i < stride_size; ++i) {
PADDLE_ENFORCE_GT(
strides[i], 0,
platform::errors::InvalidArgument(
"The stride of Op(Conv) should be larget than 0, but received "
"stride is %d.",
strides[i]));
}
int in_sub_stride_size = in_dims.size() - stride_size;
PADDLE_ENFORCE_EQ(
in_dims.size() - strides.size(), 2U,
platform::errors::InvalidArgument(
"The input's dimension size minus Attr(stride)'s size must "
"be euqal to 2 for Op(conv_transpose). But received: [%d], the "
"input's dimension size is [%d], the shape of input "
"is [%s], the Attr(stride)'s size is [%d].",
in_sub_stride_size, in_dims.size(), in_dims, strides.size()));
if (output_size.size())
PADDLE_ENFORCE_EQ(
output_size.size(), strides.size(),
platform::errors::InvalidArgument(
"The Attr(output_size) and Attr(stride) of Op(conv_transpose) "
"should be the same."));
if (output_padding.size())
PADDLE_ENFORCE_EQ(
output_padding.size(), strides.size(),
platform::errors::InvalidArgument(
"The Attr(output_padding) and Attr(stride) of Op(conv_transpose) "
"should be the same."));
const int64_t C =
(data_layout != DataLayout::kNHWC ? in_dims[1]
: in_dims[in_dims.size() - 1]);
PADDLE_ENFORCE_EQ(
C, filter_dims[0],
platform::errors::InvalidArgument(
"The number of input channels should be equal to filter channels "
"for Op(conv_transpose). But received: the input's channels is "
"[%d], the shape of input is [%s], the filter's channels is [%d], "
"the shape of filter is [%s]. The data_format is %s."
"The error may come from wrong data_format setting.",
C, in_dims, filter_dims[0], filter_dims, data_layout_str));
framework::DDim in_data_dims;
if (data_layout != DataLayout::kNHWC) {
in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size());
} else {
in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1);
}
framework::DDim filter_data_dims =
phi::slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize);
std::vector<int64_t> output_shape({in_dims[0]});
if (data_layout != DataLayout::kNHWC) {
output_shape.push_back(filter_dims[1] * groups);
}
const int offset = (data_layout != DataLayout::kNHWC ? 2 : 1);
for (size_t i = 0; i < strides.size(); ++i) {
auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
auto infer_shape = (ctx->IsRuntime() || in_dims[i + offset] > 0)
? (in_dims[i + offset] - 1) * strides[i] -
paddings[2 * i] - paddings[2 * i + 1] +
filter_extent
: -1;
if (output_size.size()) {
if (ctx->IsRuntime()) {
PADDLE_ENFORCE_GE(
output_size[i], infer_shape,
platform::errors::InvalidArgument(
"output_size of Op(ConvTransposeOp) should not be "
"less than the infered output size. But received output_size = "
"[%s], whose dim %d is less than the infered output size [%s]",
phi::make_ddim(output_size).to_str(), i, infer_shape));
PADDLE_ENFORCE_LT(
output_size[i], infer_shape + strides[i],
platform::errors::InvalidArgument(
"output_size of Op(ConvTransposeOp) should be less "
"than infered size + stride. But received output_size = [%s], "
"whose dim %d is not less than the infered output size (%d) + "
"stride (%d) = %d",
phi::make_ddim(output_size).to_str(), i, infer_shape,
strides[i], infer_shape + strides[i]));
}
output_shape.push_back(output_size[i]);
} else if (output_padding.size()) {
if (ctx->IsRuntime()) {
PADDLE_ENFORCE_GE(
output_padding[i], 0,
platform::errors::InvalidArgument(
"output_padding of Op(ConvTransposeOp) should not be "
"less than the 0. But received output_padding = "
"[%s], whose dim %d is less than 0",
phi::make_ddim(output_padding).to_str(), i));
PADDLE_ENFORCE_LT(
output_padding[i], std::max(strides[i], dilations[i]),
platform::errors::InvalidArgument(
"output_padding of Op(ConvTransposeOp) should be less "
"than either stride or dilation. But received output_size = "
"[%s], "
"whose dim %d is not less than either stride (%d) or "
"dilation (%d)",
phi::make_ddim(output_size).to_str(), i, strides[i],
dilations[i]));
}
output_shape.push_back((infer_shape + output_padding[i]));
} else {
output_shape.push_back(infer_shape);
}
}
if (data_layout == DataLayout::kNHWC) {
output_shape.push_back(filter_dims[1] * groups);
}
ctx->SetOutputDim("Output", phi::make_ddim(output_shape));
}
framework::OpKernelType ConvTransposeOp::GetExpectedKernelType( framework::OpKernelType ConvTransposeOp::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const { const framework::ExecutionContext& ctx) const {
framework::LibraryType library_{framework::LibraryType::kPlain}; framework::LibraryType library_{framework::LibraryType::kPlain};
...@@ -217,7 +62,7 @@ framework::OpKernelType ConvTransposeOp::GetExpectedKernelType( ...@@ -217,7 +62,7 @@ framework::OpKernelType ConvTransposeOp::GetExpectedKernelType(
} }
framework::OpKernelType ConvTransposeOp::GetKernelTypeForVar( framework::OpKernelType ConvTransposeOp::GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor, const std::string& var_name, const framework::Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const { const framework::OpKernelType& expected_kernel_type) const {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// Only input require reshaping, weights and // Only input require reshaping, weights and
...@@ -493,17 +338,6 @@ Example: ...@@ -493,17 +338,6 @@ Example:
)DOC"); )DOC");
} }
void ConvTransposeOpGrad::InferShape(framework::InferShapeContext* ctx) const {
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
if (ctx->HasOutput(framework::GradVarName("Input"))) {
ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
}
if (ctx->HasOutput(framework::GradVarName("Filter"))) {
ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
}
}
framework::OpKernelType ConvTransposeOpGrad::GetExpectedKernelType( framework::OpKernelType ConvTransposeOpGrad::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const { const framework::ExecutionContext& ctx) const {
bool use_cudnn = bool use_cudnn =
...@@ -587,24 +421,6 @@ class ConvTransposeDoubleGradMaker : public framework::SingleGradOpMaker<T> { ...@@ -587,24 +421,6 @@ class ConvTransposeDoubleGradMaker : public framework::SingleGradOpMaker<T> {
} }
}; };
void ConvTransposeOpDoubleGrad::InferShape(
framework::InferShapeContext* ctx) const {
auto x_dims = ctx->GetInputDim("Input");
auto w_dims = ctx->GetInputDim("Filter");
auto do_dims = ctx->GetInputDim("DOutput");
if (ctx->HasOutput("DDOutput") &&
(ctx->HasInput("DDInput") || (ctx->HasInput("DDFilter")))) {
ctx->SetOutputDim("DDOutput", do_dims);
}
if (ctx->HasOutput("DFilter") && ctx->HasInput("DDInput")) {
ctx->SetOutputDim("DFilter", w_dims);
}
if (ctx->HasOutput("DInput") && ctx->HasInput("DDFilter")) {
ctx->SetOutputDim("DInput", x_dims);
}
}
framework::OpKernelType ConvTransposeOpDoubleGrad::GetExpectedKernelType( framework::OpKernelType ConvTransposeOpDoubleGrad::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const { const framework::ExecutionContext& ctx) const {
bool use_cudnn = bool use_cudnn =
...@@ -635,59 +451,57 @@ framework::OpKernelType ConvTransposeOpDoubleGrad::GetExpectedKernelType( ...@@ -635,59 +451,57 @@ framework::OpKernelType ConvTransposeOpDoubleGrad::GetExpectedKernelType(
namespace ops = paddle::operators; namespace ops = paddle::operators;
// conv2d_transpose // conv2d_transpose
DECLARE_INFER_SHAPE_FUNCTOR(conv2d_transpose, Conv2dTranposeInferShapeFunctor,
PD_INFER_META(phi::ConvTransposeInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(conv2d_transpose_grad,
Conv2dTranposeGradInferShapeFunctor,
PD_INFER_META(phi::ConvTransposeGradInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(
conv2d_transpose_grad_grad, Conv2dTranposeDoubleGradInferShapeFunctor,
PD_INFER_META(phi::Conv2dTransposeDoubleGradInferMeta));
REGISTER_OPERATOR(conv2d_transpose, ops::ConvTransposeOp, REGISTER_OPERATOR(conv2d_transpose, ops::ConvTransposeOp,
ops::Conv2DTransposeOpMaker, ops::Conv2DTransposeOpMaker,
ops::ConvTransposeGradOpMaker<paddle::framework::OpDesc>, ops::ConvTransposeGradOpMaker<paddle::framework::OpDesc>,
ops::ConvTransposeGradOpMaker<paddle::imperative::OpBase>); ops::ConvTransposeGradOpMaker<paddle::imperative::OpBase>,
REGISTER_OPERATOR( Conv2dTranposeInferShapeFunctor);
conv2d_transpose_grad, ops::ConvTransposeOpGrad, REGISTER_OPERATOR(conv2d_transpose_grad, ops::ConvTransposeOpGrad,
ops::ConvTransposeDoubleGradMaker<paddle::framework::OpDesc>, ops::ConvTransposeDoubleGradMaker<paddle::framework::OpDesc>,
ops::ConvTransposeDoubleGradMaker<paddle::imperative::OpBase>); ops::ConvTransposeDoubleGradMaker<paddle::imperative::OpBase>,
REGISTER_OPERATOR(conv2d_transpose_grad_grad, ops::ConvTransposeOpDoubleGrad); Conv2dTranposeGradInferShapeFunctor);
REGISTER_OPERATOR(conv2d_transpose_grad_grad, ops::ConvTransposeOpDoubleGrad,
REGISTER_OP_CPU_KERNEL( Conv2dTranposeDoubleGradInferShapeFunctor);
conv2d_transpose,
ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
conv2d_transpose_grad,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext,
double>);
// conv3d_transpose // conv3d_transpose
DECLARE_INFER_SHAPE_FUNCTOR(conv3d_transpose, Conv3dTranposeInferShapeFunctor,
PD_INFER_META(phi::ConvTransposeInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(conv3d_transpose_grad,
Conv3dTranposeGradInferShapeFunctor,
PD_INFER_META(phi::ConvTransposeGradInferMeta));
REGISTER_OPERATOR(conv3d_transpose, ops::ConvTransposeOp, REGISTER_OPERATOR(conv3d_transpose, ops::ConvTransposeOp,
ops::Conv3DTransposeOpMaker, ops::Conv3DTransposeOpMaker,
ops::ConvTransposeGradOpMaker<paddle::framework::OpDesc>, ops::ConvTransposeGradOpMaker<paddle::framework::OpDesc>,
ops::ConvTransposeGradOpMaker<paddle::imperative::OpBase>); ops::ConvTransposeGradOpMaker<paddle::imperative::OpBase>,
REGISTER_OPERATOR(conv3d_transpose_grad, ops::ConvTransposeOpGrad); Conv3dTranposeInferShapeFunctor);
REGISTER_OPERATOR(conv3d_transpose_grad, ops::ConvTransposeOpGrad,
REGISTER_OP_CPU_KERNEL( Conv3dTranposeGradInferShapeFunctor);
conv3d_transpose,
ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
conv3d_transpose_grad,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext,
double>);
// depthwise conv2d_transpose // depthwise conv2d_transpose
DECLARE_INFER_SHAPE_FUNCTOR(depthwise_conv2d_transpose,
DepthWiseConv2dTranposeInferShapeFunctor,
PD_INFER_META(phi::ConvTransposeInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(depthwise_conv2d_transpose_grad,
DepthWiseConv2dTranposeGradInferShapeFunctor,
PD_INFER_META(phi::ConvTransposeGradInferMeta));
REGISTER_OPERATOR(depthwise_conv2d_transpose, ops::ConvTransposeOp, REGISTER_OPERATOR(depthwise_conv2d_transpose, ops::ConvTransposeOp,
ops::Conv2DTransposeOpMaker, ops::Conv2DTransposeOpMaker,
ops::ConvTransposeGradOpMaker<paddle::framework::OpDesc>, ops::ConvTransposeGradOpMaker<paddle::framework::OpDesc>,
ops::ConvTransposeGradOpMaker<paddle::imperative::OpBase>); ops::ConvTransposeGradOpMaker<paddle::imperative::OpBase>,
REGISTER_OPERATOR(depthwise_conv2d_transpose_grad, ops::ConvTransposeOpGrad); DepthWiseConv2dTranposeInferShapeFunctor);
REGISTER_OPERATOR(depthwise_conv2d_transpose_grad, ops::ConvTransposeOpGrad,
REGISTER_OP_CPU_KERNEL( DepthWiseConv2dTranposeGradInferShapeFunctor);
depthwise_conv2d_transpose,
ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
depthwise_conv2d_transpose_grad,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext,
double>);
REGISTER_OP_VERSION(conv_transpose) REGISTER_OP_VERSION(conv_transpose)
.AddCheckpoint( .AddCheckpoint(
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/conv_transpose_op.h"
#include "paddle/phi/kernels/gpu/depthwise_conv.h"
namespace ops = paddle::operators;
using CUDA = paddle::platform::CUDADeviceContext;
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using DDim = framework::DDim;
template <typename DeviceContext, typename T>
class DepthwiseConvTransposeKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const std::string data_layout_str =
context.Attr<std::string>("data_format");
const framework::DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);
const Tensor* input = context.Input<Tensor>("Input");
Tensor filter = *context.Input<Tensor>("Filter");
Tensor* output = context.Output<Tensor>("Output");
output->mutable_data<T>(context.GetPlace());
int groups = context.Attr<int>("groups");
PADDLE_ENFORCE_EQ(
groups, filter.dims()[0],
platform::errors::InvalidArgument(
"groups should be error to the 1st dimension of filter. But "
"received groups is %d and filter dimension[0] is %d",
groups, filter.dims()[0]));
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = context.Attr<std::vector<int>>("dilations");
std::string padding_algorithm =
context.Attr<std::string>("padding_algorithm");
for (auto v : dilations) {
PADDLE_ENFORCE_EQ(v, 1, platform::errors::InvalidArgument(
"dilations should be 1 in depthwise conv. "
"But received dilations is %d",
v));
}
auto in_dims = input->dims();
auto filter_dims = filter.dims();
framework::DDim in_data_dims;
if (data_layout != framework::DataLayout::kNHWC) {
in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size());
} else {
in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1);
}
framework::DDim filter_data_dims =
phi::slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize);
output->mutable_data<T>(context.GetPlace());
auto& dev_ctx = context.template device_context<DeviceContext>();
phi::funcs::SetConstant<DeviceContext, T> set_zero;
set_zero(dev_ctx, output, static_cast<T>(0));
math::DepthwiseConvInputGradFunctor<phi::GPUContext, T>
depthwiseConvInputGrad;
depthwiseConvInputGrad(
static_cast<const typename framework::ConvertToPhiContext<
DeviceContext>::TYPE&>(dev_ctx),
*output, filter, *input, strides,
std::vector<int>{paddings[0], paddings[2], paddings[1], paddings[3]},
dilations, output, data_layout);
}
};
template <typename DeviceContext, typename T>
class DepthwiseConvTransposeGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
const std::string data_layout_str =
context.Attr<std::string>("data_format");
const framework::DataLayout data_layout =
framework::StringToDataLayout(data_layout_str);
const Tensor* input = context.Input<Tensor>("Input");
const Tensor* output_grad =
context.Input<Tensor>(framework::GradVarName("Output"));
Tensor* input_grad =
context.Output<Tensor>(framework::GradVarName("Input"));
Tensor* filter_grad =
context.Output<Tensor>(framework::GradVarName("Filter"));
Tensor filter = *context.Input<Tensor>("Filter");
if (!input_grad && !filter_grad) return;
auto& dev_ctx = context.template device_context<DeviceContext>();
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = context.Attr<std::vector<int>>("dilations");
std::string padding_algorithm =
context.Attr<std::string>("padding_algorithm");
auto in_dims = input->dims();
auto filter_dims = filter.dims();
framework::DDim in_data_dims;
if (data_layout != framework::DataLayout::kNHWC) {
in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size());
} else {
in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1);
}
framework::DDim filter_data_dims =
phi::slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize);
if (input_grad) {
math::DepthwiseConvFunctor<phi::GPUContext, T> depthwiseConv;
depthwiseConv(
static_cast<const typename framework::ConvertToPhiContext<
DeviceContext>::TYPE&>(dev_ctx),
*output_grad, filter, strides,
std::vector<int>{paddings[0], paddings[2], paddings[1], paddings[3]},
dilations, input_grad, data_layout);
}
if (filter_grad) {
phi::funcs::SetConstant<DeviceContext, T> set_zero;
filter_grad->mutable_data<T>(context.GetPlace());
set_zero(dev_ctx, filter_grad, static_cast<T>(0));
math::DepthwiseConvFilterGradFunctor<phi::GPUContext, T>
depthwiseConvFilterGrad;
depthwiseConvFilterGrad(
static_cast<const typename framework::ConvertToPhiContext<
DeviceContext>::TYPE&>(dev_ctx),
*output_grad, *input, strides,
std::vector<int>{paddings[0], paddings[2], paddings[1], paddings[3]},
dilations, filter_grad, data_layout);
}
}
};
} // namespace operators
} // namespace paddle
// conv2d
REGISTER_OP_CUDA_KERNEL(conv2d_transpose,
ops::GemmConvTransposeKernel<CUDA, float>,
ops::GemmConvTransposeKernel<CUDA, double>);
REGISTER_OP_CUDA_KERNEL(conv2d_transpose_grad,
ops::GemmConvTransposeGradKernel<CUDA, float>,
ops::GemmConvTransposeGradKernel<CUDA, double>);
REGISTER_OP_CUDA_KERNEL(conv2d_transpose_grad_grad,
ops::GemmConvTransposeGradKernel<CUDA, float>,
ops::GemmConvTransposeGradKernel<CUDA, double>);
// conv3d
REGISTER_OP_CUDA_KERNEL(conv3d_transpose,
ops::GemmConvTransposeKernel<CUDA, float>,
ops::GemmConvTransposeKernel<CUDA, double>);
REGISTER_OP_CUDA_KERNEL(conv3d_transpose_grad,
ops::GemmConvTransposeGradKernel<CUDA, float>,
ops::GemmConvTransposeGradKernel<CUDA, double>);
// depthwise conv2d
REGISTER_OP_CUDA_KERNEL(depthwise_conv2d_transpose,
ops::DepthwiseConvTransposeKernel<CUDA, float>,
ops::DepthwiseConvTransposeKernel<CUDA, double>);
REGISTER_OP_CUDA_KERNEL(depthwise_conv2d_transpose_grad,
ops::DepthwiseConvTransposeGradKernel<CUDA, float>,
ops::DepthwiseConvTransposeGradKernel<CUDA, double>);
...@@ -13,11 +13,15 @@ See the License for the specific language governing permissions and ...@@ -13,11 +13,15 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/conv_transpose_op.h" #include "paddle/fluid/operators/conv_transpose_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#include "paddle/phi/kernels/cpu/conv_util.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
using NPUDeviceContext = platform::NPUDeviceContext; using NPUDeviceContext = platform::NPUDeviceContext;
template <typename T> template <typename T>
...@@ -55,8 +59,8 @@ class Conv2DTransposeNPUKernel : public framework::OpKernel<T> { ...@@ -55,8 +59,8 @@ class Conv2DTransposeNPUKernel : public framework::OpKernel<T> {
filter_data_dims = phi::slice_ddim(filter_dims, 2, in_dims.size()); filter_data_dims = phi::slice_ddim(filter_dims, 2, in_dims.size());
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims); std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&padding, &dilation, padding_algorithm, phi::UpdatePaddingAndDilation(&padding, &dilation, padding_algorithm,
in_data_dims, stride, ksize); in_data_dims, stride, ksize);
// construct NPU attr // construct NPU attr
std::vector<int> strides(4, 1); std::vector<int> strides(4, 1);
...@@ -137,8 +141,8 @@ class Conv2DTransposeGradNPUKernel : public framework::OpKernel<T> { ...@@ -137,8 +141,8 @@ class Conv2DTransposeGradNPUKernel : public framework::OpKernel<T> {
framework::DDim filter_data_dims = framework::DDim filter_data_dims =
phi::slice_ddim(filter_dims, 2, filter_dims.size()); phi::slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims); std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm, phi::UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize); in_data_dims, strides, ksize);
std::vector<int> strides_vec(4, 1); std::vector<int> strides_vec(4, 1);
std::vector<int> dilations_vec(4, 1); std::vector<int> dilations_vec(4, 1);
......
...@@ -8,15 +8,22 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -8,15 +8,22 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/conv_transpose_op.h" #include "paddle/fluid/operators/conv_transpose_op.h"
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/phi/kernels/cpu/conv_util.h"
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
// target_len == 2 || target_len == 4 // target_len == 2 || target_len == 4
inline std::vector<int> vector_extend(const std::vector<int>& src, inline std::vector<int> vector_extend(const std::vector<int>& src,
int target_len) { int target_len) {
...@@ -61,8 +68,8 @@ class Conv2DTransposeXPUKernel : public framework::OpKernel<T> { ...@@ -61,8 +68,8 @@ class Conv2DTransposeXPUKernel : public framework::OpKernel<T> {
framework::DDim filter_data_dims = framework::DDim filter_data_dims =
phi::slice_ddim(filter.dims(), 2, filter.dims().size()); phi::slice_ddim(filter.dims(), 2, filter.dims().size());
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims); std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm, phi::UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize); in_data_dims, strides, ksize);
const int batch_size = static_cast<int>(input->dims()[0]); const int batch_size = static_cast<int>(input->dims()[0]);
const int img_yc = static_cast<int>(input->dims()[1]); const int img_yc = static_cast<int>(input->dims()[1]);
...@@ -135,8 +142,8 @@ class Conv2DTransposeGradXPUKernel : public framework::OpKernel<T> { ...@@ -135,8 +142,8 @@ class Conv2DTransposeGradXPUKernel : public framework::OpKernel<T> {
framework::DDim filter_data_dims = framework::DDim filter_data_dims =
phi::slice_ddim(filter.dims(), 2, filter.dims().size()); phi::slice_ddim(filter.dims(), 2, filter.dims().size());
std::vector<int> ksize = phi::vectorize<int>(filter_data_dims); std::vector<int> ksize = phi::vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm, phi::UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm,
in_data_dims, strides, ksize); in_data_dims, strides, ksize);
const int batch_size = static_cast<int>(input->dims()[0]); const int batch_size = static_cast<int>(input->dims()[0]);
const int img_yc = static_cast<int>(input->dims()[1]); const int img_yc = static_cast<int>(input->dims()[1]);
......
...@@ -64,6 +64,45 @@ void BilinearTensorProductGradInferMeta(const MetaTensor& x, ...@@ -64,6 +64,45 @@ void BilinearTensorProductGradInferMeta(const MetaTensor& x,
} }
} }
void ConvTransposeGradInferMeta(const MetaTensor& x,
const MetaTensor& filter,
const MetaTensor& dout,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
MetaTensor* dx,
MetaTensor* dfilter) {
GeneralBinaryGradInferMeta(x, filter, dx, dfilter);
}
void Conv2dTransposeDoubleGradInferMeta(const MetaTensor& x,
const MetaTensor& filter,
const MetaTensor& dout,
const MetaTensor& ddx,
const MetaTensor& ddfilter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
MetaTensor* dx,
MetaTensor* dfilter,
MetaTensor* ddout) {
GeneralBinaryGradInferMeta(x, filter, dx, dfilter);
if (ddout) {
ddout->share_meta(dout);
}
}
void GatherNdGradInferMeta(const MetaTensor& x, void GatherNdGradInferMeta(const MetaTensor& x,
const MetaTensor& index, const MetaTensor& index,
const MetaTensor& out_grad, const MetaTensor& out_grad,
......
...@@ -37,6 +37,37 @@ void BilinearTensorProductGradInferMeta(const MetaTensor& x, ...@@ -37,6 +37,37 @@ void BilinearTensorProductGradInferMeta(const MetaTensor& x,
MetaTensor* dweight, MetaTensor* dweight,
MetaTensor* dbias); MetaTensor* dbias);
void ConvTransposeGradInferMeta(const MetaTensor& x,
const MetaTensor& filter,
const MetaTensor& dout,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
MetaTensor* dx,
MetaTensor* dfilter);
void Conv2dTransposeDoubleGradInferMeta(const MetaTensor& x,
const MetaTensor& filter,
const MetaTensor& dout,
const MetaTensor& ddx,
const MetaTensor& ddfilter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
MetaTensor* dx,
MetaTensor* dfilter,
MetaTensor* ddout);
void GatherNdGradInferMeta(const MetaTensor& x, void GatherNdGradInferMeta(const MetaTensor& x,
const MetaTensor& index, const MetaTensor& index,
const MetaTensor& out_grad, const MetaTensor& out_grad,
......
...@@ -17,8 +17,10 @@ limitations under the License. */ ...@@ -17,8 +17,10 @@ limitations under the License. */
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/kernels/cpu/conv_util.h"
#include "paddle/phi/kernels/funcs/common_shape.h" #include "paddle/phi/kernels/funcs/common_shape.h"
#include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/cpu/conv_util.h"
...@@ -312,51 +314,6 @@ void CompareAllInferMeta(const MetaTensor& x, ...@@ -312,51 +314,6 @@ void CompareAllInferMeta(const MetaTensor& x,
out->set_dtype(DataType::BOOL); out->set_dtype(DataType::BOOL);
} }
void CrossInferMeta(const MetaTensor& x,
const MetaTensor& y,
int axis,
MetaTensor* out) {
auto x_dim = x.dims();
auto y_dim = y.dims();
auto dim = axis;
bool dims_match = phi::funcs::CheckDims(x_dim, y_dim);
PADDLE_ENFORCE_EQ(
dims_match,
true,
phi::errors::InvalidArgument("The 'shape' of Input(X) should be equal to "
"the 'shape' of Input(Y). But received "
"Input(X).dimensions = [%s], "
"Input(Y).dimensions = [%s]",
x_dim,
y_dim));
if (dim != DDim::kMaxRank) {
PADDLE_ENFORCE_EQ(
dim < x_dim.size() && dim >= (0 - x_dim.size()),
true,
phi::errors::OutOfRange(
"Attr(dim) is out of range, It's expected "
"to be in range of [-%d, %d]. But received Attr(dim) = %d.",
x_dim.size(),
x_dim.size() - 1,
dim));
if (dim < 0) {
dim += x_dim.size();
}
PADDLE_ENFORCE_EQ(x_dim[dim] == 3 && y_dim[dim] == 3,
true,
phi::errors::InvalidArgument(
"Input(X/Y).dims()[dim] should be equal to 3."
"But received Input(X/Y).dims()[dim] = %d.",
x_dim[dim]));
}
out->set_dims(x_dim);
out->set_dtype(x.dtype());
out->set_layout(x.layout());
out->share_lod(x);
}
void ConvInferMeta(const MetaTensor& input, void ConvInferMeta(const MetaTensor& input,
const MetaTensor& filter, const MetaTensor& filter,
const std::vector<int>& strides, const std::vector<int>& strides,
...@@ -512,6 +469,241 @@ void ConvInferMeta(const MetaTensor& input, ...@@ -512,6 +469,241 @@ void ConvInferMeta(const MetaTensor& input,
out->set_dtype(input.dtype()); out->set_dtype(input.dtype());
} }
void ConvTransposeInferMeta(const MetaTensor& x,
const MetaTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
MetaTensor* out,
MetaConfig config) {
auto x_dims = x.dims();
auto filter_dims = filter.dims();
std::vector<int> paddings_ = paddings;
std::vector<int> dilations_ = dilations;
const DataLayout data_layout =
config.is_run_mkldnn_kernel
? DataLayout::kNCHW
: paddle::framework::StringToDataLayout(data_format);
PADDLE_ENFORCE_EQ(
x_dims.size() == 4 || x_dims.size() == 5,
true,
errors::InvalidArgument("Input of Op(conv_transpose) should be 4-D or "
"5-D Tensor. But received: %u-D Tensor, "
"the shape of input is [%s]",
x_dims.size(),
x_dims));
PADDLE_ENFORCE_EQ(
x_dims.size(),
filter_dims.size(),
errors::InvalidArgument(
"The input's dimension size and filter's dimension size of "
"Op (conv_transpose) should be equal. But received: the shape of "
"input is [%s], the dimension size of input is [%d], the shape "
"of filter is [%s], the dimension size of filter is [%d]. ",
x_dims,
x_dims.size(),
filter_dims,
filter_dims.size()));
int stride_size = strides.size();
for (int i = 0; i < stride_size; ++i) {
PADDLE_ENFORCE_GT(
strides[i],
0,
errors::InvalidArgument(
"The stride of Op(Conv) should be larget than 0, but received "
"stride is %d.",
strides[i]));
}
int in_sub_stride_size = x_dims.size() - stride_size;
PADDLE_ENFORCE_EQ(
x_dims.size() - strides.size(),
2U,
errors::InvalidArgument(
"The input's dimension size minus Attr(stride)'s size must "
"be euqal to 2 for Op(conv_transpose). But received: [%d], the "
"input's dimension size is [%d], the shape of input "
"is [%s], the Attr(stride)'s size is [%d].",
in_sub_stride_size,
x_dims.size(),
x_dims,
strides.size()));
if (output_size.size())
PADDLE_ENFORCE_EQ(
output_size.size(),
strides.size(),
errors::InvalidArgument(
"The Attr(output_size) and Attr(stride) of Op(conv_transpose) "
"should be the same."));
if (output_padding.size())
PADDLE_ENFORCE_EQ(
output_padding.size(),
strides.size(),
errors::InvalidArgument(
"The Attr(output_padding) and Attr(stride) of Op(conv_transpose) "
"should be the same."));
const int64_t C =
(data_layout != DataLayout::kNHWC ? x_dims[1]
: x_dims[x_dims.size() - 1]);
PADDLE_ENFORCE_EQ(
C,
filter_dims[0],
errors::InvalidArgument(
"The number of input channels should be equal to filter channels "
"for Op(conv_transpose). But received: the input's channels is "
"[%d], the shape of input is [%s], the filter's channels is [%d], "
"the shape of filter is [%s]. The data_format is %s."
"The error may come from wrong data_format setting.",
C,
x_dims,
filter_dims[0],
filter_dims,
data_format));
DDim x_data_dims;
if (data_layout != DataLayout::kNHWC) {
x_data_dims = slice_ddim(x_dims, 2, x_dims.size());
} else {
x_data_dims = slice_ddim(x_dims, 1, x_dims.size() - 1);
}
DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(
&paddings_, &dilations_, padding_algorithm, x_data_dims, strides, ksize);
std::vector<int64_t> output_shape({x_dims[0]});
if (data_layout != DataLayout::kNHWC) {
output_shape.push_back(filter_dims[1] * groups);
}
const int offset = (data_layout != DataLayout::kNHWC ? 2 : 1);
for (size_t i = 0; i < strides.size(); ++i) {
auto filter_extent = dilations_[i] * (filter_dims[i + 2] - 1) + 1;
auto infer_shape = (config.is_runtime || x_dims[i + offset] > 0)
? (x_dims[i + offset] - 1) * strides[i] -
paddings_[2 * i] - paddings_[2 * i + 1] +
filter_extent
: -1;
if (output_size.size()) {
if (config.is_runtime) {
PADDLE_ENFORCE_GE(
output_size[i],
infer_shape,
errors::InvalidArgument(
"output_size of Op(ConvTransposeOp) should not be "
"less than the infered output size. But received output_size = "
"[%s], whose dim %d is less than the infered output size [%s]",
make_ddim(output_size).to_str(),
i,
infer_shape));
PADDLE_ENFORCE_LT(
output_size[i],
infer_shape + strides[i],
errors::InvalidArgument(
"output_size of Op(ConvTransposeOp) should be less "
"than infered size + stride. But received output_size = [%s], "
"whose dim %d is not less than the infered output size (%d) + "
"stride (%d) = %d",
make_ddim(output_size).to_str(),
i,
infer_shape,
strides[i],
infer_shape + strides[i]));
}
output_shape.push_back(output_size[i]);
} else if (output_padding.size()) {
if (config.is_runtime) {
PADDLE_ENFORCE_GE(
output_padding[i],
0,
errors::InvalidArgument(
"output_padding of Op(ConvTransposeOp) should not be "
"less than the 0. But received output_padding = "
"[%s], whose dim %d is less than 0",
make_ddim(output_padding).to_str(),
i));
PADDLE_ENFORCE_LT(
output_padding[i],
std::max(strides[i], dilations_[i]),
errors::InvalidArgument(
"output_padding of Op(ConvTransposeOp) should be less "
"than either stride or dilation. But received output_size = "
"[%s], "
"whose dim %d is not less than either stride (%d) or "
"dilation (%d)",
make_ddim(output_size).to_str(),
i,
strides[i],
dilations_[i]));
}
output_shape.push_back((infer_shape + output_padding[i]));
} else {
output_shape.push_back(infer_shape);
}
}
if (data_layout == DataLayout::kNHWC) {
output_shape.push_back(filter_dims[1] * groups);
}
out->set_dims(make_ddim(output_shape));
out->set_dtype(x.dtype());
}
void CrossInferMeta(const MetaTensor& x,
const MetaTensor& y,
int axis,
MetaTensor* out) {
auto x_dim = x.dims();
auto y_dim = y.dims();
auto dim = axis;
bool dims_match = phi::funcs::CheckDims(x_dim, y_dim);
PADDLE_ENFORCE_EQ(
dims_match,
true,
phi::errors::InvalidArgument("The 'shape' of Input(X) should be equal to "
"the 'shape' of Input(Y). But received "
"Input(X).dimensions = [%s], "
"Input(Y).dimensions = [%s]",
x_dim,
y_dim));
if (dim != DDim::kMaxRank) {
PADDLE_ENFORCE_EQ(
dim < x_dim.size() && dim >= (0 - x_dim.size()),
true,
phi::errors::OutOfRange(
"Attr(dim) is out of range, It's expected "
"to be in range of [-%d, %d]. But received Attr(dim) = %d.",
x_dim.size(),
x_dim.size() - 1,
dim));
if (dim < 0) {
dim += x_dim.size();
}
PADDLE_ENFORCE_EQ(x_dim[dim] == 3 && y_dim[dim] == 3,
true,
phi::errors::InvalidArgument(
"Input(X/Y).dims()[dim] should be equal to 3."
"But received Input(X/Y).dims()[dim] = %d.",
x_dim[dim]));
}
out->set_dims(x_dim);
out->set_dtype(x.dtype());
out->set_layout(x.layout());
out->share_lod(x);
}
void DistInferMeta(const MetaTensor& x, void DistInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
float p, float p,
......
...@@ -83,6 +83,19 @@ void ConvInferMeta(const MetaTensor& input, ...@@ -83,6 +83,19 @@ void ConvInferMeta(const MetaTensor& input,
MetaTensor* out, MetaTensor* out,
MetaConfig config = MetaConfig()); MetaConfig config = MetaConfig());
void ConvTransposeInferMeta(const MetaTensor& x,
const MetaTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
MetaTensor* out,
MetaConfig config = MetaConfig());
void CrossInferMeta(const MetaTensor& x, void CrossInferMeta(const MetaTensor& x,
const MetaTensor& y, const MetaTensor& y,
int axis, int axis,
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void Conv2dTransposeGradKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const DenseTensor& dout,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* dx,
DenseTensor* dfilter);
template <typename T, typename Context>
void Conv2dTransposeDoubleGradKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const DenseTensor& dout,
const DenseTensor& ddx,
const DenseTensor& ddfilter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* dx,
DenseTensor* dfilter,
DenseTensor* ddout);
template <typename T, typename Context>
void Conv3dTransposeGradKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const DenseTensor& dout,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* dx,
DenseTensor* dfilter);
template <typename T, typename Context>
void DepthwiseConv2dTransposeGradKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const DenseTensor& dout,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* dx,
DenseTensor* dfilter);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void Conv2dTransposeKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* out);
template <typename T, typename Context>
void Conv3dTransposeKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* out);
template <typename T, typename Context>
void DepthwiseConv2dTransposeKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* out);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/conv_transpose_grad_kernel.h"
#include "paddle/phi/kernels/impl/conv_transpose_grad_kernel_impl.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void DepthwiseConv2dTransposeGradKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const DenseTensor& dout,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* dx,
DenseTensor* dfilter) {
ConvTransposeGradRawKernel<T, Context>(ctx,
x,
filter,
dout,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
dx,
dfilter);
}
} // namespace phi
PD_REGISTER_KERNEL(conv2d_transpose_grad,
CPU,
ALL_LAYOUT,
phi::Conv2dTransposeGradKernel,
float,
double) {}
PD_REGISTER_KERNEL(conv3d_transpose_grad,
CPU,
ALL_LAYOUT,
phi::Conv3dTransposeGradKernel,
float,
double) {}
PD_REGISTER_KERNEL(depthwise_conv2d_transpose_grad,
CPU,
ALL_LAYOUT,
phi::DepthwiseConv2dTransposeGradKernel,
float,
double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/conv_transpose_kernel.h"
#include "paddle/phi/kernels/impl/conv_transpose_kernel_impl.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void DepthwiseConv2dTransposeKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* out) {
ConvTransposeRawKernel<T, Context>(ctx,
x,
filter,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
out);
}
} // namespace phi
PD_REGISTER_KERNEL(conv2d_transpose,
CPU,
ALL_LAYOUT,
phi::Conv2dTransposeKernel,
float,
double) {}
PD_REGISTER_KERNEL(conv3d_transpose,
CPU,
ALL_LAYOUT,
phi::Conv3dTransposeKernel,
float,
double) {}
PD_REGISTER_KERNEL(depthwise_conv2d_transpose,
CPU,
ALL_LAYOUT,
phi::DepthwiseConv2dTransposeKernel,
float,
double) {}
...@@ -123,5 +123,56 @@ DenseTensor Slice(const Context& dev_ctx, ...@@ -123,5 +123,56 @@ DenseTensor Slice(const Context& dev_ctx,
return ret; return ret;
} }
// Use in conv_transpose kernel
template <typename Context, typename T, size_t D>
static void Slice(const Context& ctx,
const DenseTensor* input,
DenseTensor* out,
const std::vector<int64_t>& begin_vec,
const std::vector<int64_t>& end_vec,
const std::vector<int64_t>& axes_vec) {
auto& place = *ctx.eigen_device();
auto in_dims = input->dims();
auto offsets = Eigen::DSizes<Eigen::DenseIndex, D>();
auto extents = Eigen::DSizes<Eigen::DenseIndex, D>();
for (size_t i = 0; i < D; ++i) {
offsets[i] = 0;
extents[i] = in_dims[i];
}
std::vector<int64_t> out_shape_vec = vectorize(in_dims);
for (size_t i = 0; i < axes_vec.size(); ++i) {
offsets[axes_vec[i]] = begin_vec[i];
extents[axes_vec[i]] = end_vec[i] - begin_vec[i];
out_shape_vec[axes_vec[i]] = end_vec[i] - begin_vec[i];
}
DDim out_dims(make_ddim(out_shape_vec));
out->Resize(out_dims);
ctx.template Alloc<T>(out);
auto in_t =
EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(*input);
auto out_t = EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
*out, out_dims);
funcs::EigenSlice<std::decay_t<decltype(place)>, T, D>::Eval(
place, out_t, in_t, offsets, extents);
out->Resize(out_dims);
}
template <typename Context, typename T, size_t D>
static void Slice(const Context& ctx,
const DenseTensor* input,
DenseTensor* out,
int64_t begin_idx,
int64_t end_idx,
int64_t axes) {
std::vector<int64_t> begin_vec = {begin_idx};
std::vector<int64_t> end_vec = {end_idx};
std::vector<int64_t> axes_vec = {axes};
Slice<Context, T, D>(ctx, input, out, begin_vec, end_vec, axes_vec);
}
} // namespace funcs } // namespace funcs
} // namespace phi } // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/conv_transpose_grad_kernel.h"
#include "paddle/phi/kernels/impl/conv_transpose_grad_kernel_impl.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/cpu/conv_util.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/gpu/depthwise_conv.h"
namespace phi {
template <typename T, typename Context>
void Conv2dTransposeDoubleGradKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const DenseTensor& dout,
const DenseTensor& ddx,
const DenseTensor& ddfilter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* dx,
DenseTensor* dfilter,
DenseTensor* ddout) {
ConvTransposeGradRawKernel<T, Context>(ctx,
x,
filter,
dout,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
dx,
dfilter);
}
template <typename T, typename Context>
void DepthwiseConv2dTransposeGradKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const DenseTensor& dout,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* dx,
DenseTensor* dfilter) {
const DataLayout data_layout =
paddle::framework::StringToDataLayout(data_format);
DenseTensor filter_ = filter;
if (!dx && !dfilter) {
return;
}
std::vector<int> paddings_ = paddings;
std::vector<int> dilations_ = dilations;
auto x_dims = x.dims();
auto filter_dims = filter_.dims();
DDim in_data_dims;
if (data_layout != DataLayout::kNHWC) {
in_data_dims = slice_ddim(x_dims, 2, x_dims.size());
} else {
in_data_dims = slice_ddim(x_dims, 1, x_dims.size() - 1);
}
DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(
&paddings_, &dilations_, padding_algorithm, in_data_dims, strides, ksize);
if (dx) {
paddle::operators::math::DepthwiseConvFunctor<Context, T> depthwiseConv;
depthwiseConv(ctx,
dout,
filter_,
strides,
std::vector<int>{
paddings_[0], paddings_[2], paddings_[1], paddings_[3]},
dilations_,
dx,
data_layout);
}
if (dfilter) {
funcs::SetConstant<Context, T> set_zero;
ctx.template Alloc<T>(dfilter);
set_zero(ctx, dfilter, static_cast<T>(0));
paddle::operators::math::DepthwiseConvFilterGradFunctor<Context, T>
depthwiseConvFilterGrad;
depthwiseConvFilterGrad(
ctx,
dout,
x,
strides,
std::vector<int>{
paddings_[0], paddings_[2], paddings_[1], paddings_[3]},
dilations_,
dfilter,
data_layout);
}
}
} // namespace phi
PD_REGISTER_KERNEL(conv2d_transpose_grad,
GPU,
ALL_LAYOUT,
phi::Conv2dTransposeGradKernel,
float,
double) {}
PD_REGISTER_KERNEL(conv2d_transpose_grad_grad,
GPU,
ALL_LAYOUT,
phi::Conv2dTransposeDoubleGradKernel,
float,
double) {}
PD_REGISTER_KERNEL(conv3d_transpose_grad,
GPU,
ALL_LAYOUT,
phi::Conv3dTransposeGradKernel,
float,
double) {}
PD_REGISTER_KERNEL(depthwise_conv2d_transpose_grad,
GPU,
ALL_LAYOUT,
phi::DepthwiseConv2dTransposeGradKernel,
float,
double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/conv_transpose_kernel.h"
#include "paddle/phi/kernels/impl/conv_transpose_kernel_impl.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/cpu/conv_util.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/gpu/depthwise_conv.h"
namespace phi {
template <typename T, typename Context>
void DepthwiseConv2dTransposeKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* out) {
const DataLayout data_layout =
paddle::framework::StringToDataLayout(data_format);
DenseTensor filter_ = filter;
ctx.template Alloc<T>(out);
PADDLE_ENFORCE_EQ(
groups,
filter_.dims()[0],
errors::InvalidArgument(
"groups should be error to the 1st dimension of filter_. But "
"received groups is %d and filter dimension[0] is %d",
groups,
filter_.dims()[0]));
std::vector<int> paddings_ = paddings;
std::vector<int> dilations_ = dilations;
for (auto v : dilations_) {
PADDLE_ENFORCE_EQ(
v,
1,
errors::InvalidArgument("dilations should be 1 in depthwise conv. "
"But received dilations is %d",
v));
}
auto x_dims = x.dims();
auto filter_dims = filter_.dims();
DDim in_data_dims;
if (data_layout != DataLayout::kNHWC) {
in_data_dims = slice_ddim(x_dims, 2, x_dims.size());
} else {
in_data_dims = slice_ddim(x_dims, 1, x_dims.size() - 1);
}
DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(
&paddings_, &dilations_, padding_algorithm, in_data_dims, strides, ksize);
ctx.template Alloc<T>(out);
funcs::SetConstant<Context, T> set_zero;
set_zero(ctx, out, static_cast<T>(0));
paddle::operators::math::DepthwiseConvInputGradFunctor<Context, T>
depthwiseConvInputGrad;
depthwiseConvInputGrad(
ctx,
*out,
filter,
x,
strides,
std::vector<int>{paddings_[0], paddings_[2], paddings_[1], paddings_[3]},
dilations_,
out,
data_layout);
}
} // namespace phi
PD_REGISTER_KERNEL(conv2d_transpose,
GPU,
ALL_LAYOUT,
phi::Conv2dTransposeKernel,
float,
double) {}
PD_REGISTER_KERNEL(conv3d_transpose,
GPU,
ALL_LAYOUT,
phi::Conv3dTransposeKernel,
float,
double) {}
PD_REGISTER_KERNEL(depthwise_conv2d_transpose,
GPU,
ALL_LAYOUT,
phi::DepthwiseConv2dTransposeKernel,
float,
double) {}
此差异已折叠。
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/conv_transpose_kernel.h"
#include <algorithm>
#include "paddle/phi/backends/dynload/cudnn.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/cpu/conv_util.h"
#include "paddle/phi/kernels/funcs/padding.h"
#include "paddle/phi/kernels/funcs/slice.h"
#include "paddle/phi/kernels/transpose_kernel.h"
#ifdef PADDLE_WITH_HIP
#include "paddle/fluid/operators/conv_miopen_helper.h"
#include "paddle/fluid/platform/device/gpu/rocm/miopen_helper.h"
#else
#include "paddle/fluid/operators/conv_cudnn_helper.h"
#include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h"
#endif
namespace phi {
using GPUDNNDataLayout = paddle::platform::DataLayout;
template <typename T, typename Context>
void ConvTransposeRawGPUDNNKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* out) {
std::vector<int> paddings_ = paddings;
std::vector<int> dilations_ =
dilations; // cudnn v5 does not support dilations
const T* filter_data = filter.data<T>();
const GPUDNNDataLayout data_layout =
(data_format != "NHWC" ? GPUDNNDataLayout::kNCHW
: GPUDNNDataLayout::kNHWC);
std::vector<int> x_vec = vectorize<int>(x.dims());
std::vector<int> out_vec = vectorize<int>(out->dims());
// if channel_last, transpose to channel_first
DenseTensor x_transpose;
if (data_layout == GPUDNNDataLayout::kNHWC) {
if (strides.size() == 2U) {
std::vector<int> axis = {0, 3, 1, 2};
for (size_t i = 0; i < axis.size(); ++i) {
x_vec[i] = x.dims()[axis[i]];
out_vec[i] = out->dims()[axis[i]];
}
x_transpose = Transpose<T, Context>(ctx, x, axis);
} else if (strides.size() == 3U) {
std::vector<int> axis = {0, 4, 1, 2, 3};
for (size_t i = 0; i < axis.size(); ++i) {
x_vec[i] = x.dims()[axis[i]];
out_vec[i] = out->dims()[axis[i]];
}
x_transpose = Transpose<T, Context>(ctx, x, axis);
}
} else {
x_transpose = x;
}
// update padding and dilation
auto x_dims = x_transpose.dims();
auto filter_dims = filter.dims();
DDim x_data_dims;
x_data_dims = slice_ddim(x_dims, 2, x_dims.size());
DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size());
std::vector<int> ksize = vectorize<int>(filter_data_dims);
UpdatePaddingAndDilation(
&paddings_, &dilations_, padding_algorithm, x_data_dims, strides, ksize);
int data_dim = strides.size(); // 2d or 3d
bool is_sys_pad = funcs::IsSymmetricPadding(paddings_, data_dim);
std::vector<int> x_pad(x_dims.size() * 2, 0);
DenseTensor transformed_x;
std::vector<int> padding_common(data_dim, 0);
if (!is_sys_pad) {
std::vector<int> padding_diff(data_dim);
std::vector<int> new_x_shape_vec(data_dim + 2);
new_x_shape_vec[0] = x_dims[0];
new_x_shape_vec[1] = x_dims[1];
for (size_t i = 0; i < data_dim; ++i) {
padding_diff[i] = std::abs(paddings_[2 * i] - paddings_[2 * i + 1]);
padding_common[i] = std::min(paddings_[2 * i], paddings_[2 * i + 1]);
new_x_shape_vec[i + 2] = x_dims[i + 2] + padding_diff[i];
x_pad[2 * i + 4] = paddings_[2 * i] - padding_common[i];
x_pad[2 * i + 4 + 1] = paddings_[2 * i + 1] - padding_common[i];
}
DDim new_x_shape(make_ddim(new_x_shape_vec));
transformed_x.Resize(new_x_shape);
ctx.template Alloc<T>(&transformed_x);
const int rank = x_dims.size();
T pad_value(0.0);
switch (rank) {
case 4: {
funcs::PadFunction<Context, T, 4>(
ctx, x_pad, x_transpose, pad_value, &transformed_x);
} break;
case 5: {
funcs::PadFunction<Context, T, 5>(
ctx, x_pad, x_transpose, pad_value, &transformed_x);
} break;
default:
PADDLE_THROW(errors::InvalidArgument(
"Op(ConvTranspose) only supports 4-D or 5-D x DenseTensor."));
}
} else {
transformed_x = x_transpose;
if (paddings_.size() == data_dim) {
for (size_t i = 0; i < data_dim; ++i) {
padding_common[i] = paddings_[i];
}
} else {
for (size_t i = 0; i < data_dim; ++i) {
padding_common[i] = paddings_[2 * i];
}
}
}
std::vector<int64_t> starts(data_dim, 0);
std::vector<int64_t> ends(data_dim, 0);
std::vector<int64_t> axes(data_dim, 0);
for (size_t i = 0; i < data_dim; ++i) {
starts[i] = x_pad[2 * i + 4] * (strides[i] + 1);
ends[i] = starts[i] + out_vec[i + 2];
axes[i] = i + 2;
}
const T* x_data = transformed_x.data<T>();
x_vec = vectorize<int>(transformed_x.dims());
std::vector<int> transformed_out_vec = out_vec;
for (size_t i = 0; i < data_dim; ++i) {
transformed_out_vec[i + 2] =
out_vec[i + 2] + (x_pad[2 * i + 4] + x_pad[2 * i + 5]) * strides[i] -
2 * padding_common[i] + paddings_[2 * i] + paddings_[2 * i + 1];
}
DenseTensor transformed_out;
if (!is_sys_pad) {
transformed_out.Resize(make_ddim(transformed_out_vec));
ctx.template Alloc<T>(&transformed_out);
} else {
ctx.template Alloc<T>(out);
transformed_out.ShareDataWith(*out);
transformed_out.Resize(make_ddim(transformed_out_vec));
}
T* transformed_out_data = transformed_out.data<T>();
GPUDNNDataLayout layout;
int iwo_groups = groups;
int c_groups = 1;
#if defined(PADDLE_WITH_HIP) || CUDNN_VERSION_MIN(7, 0, 1)
iwo_groups = 1;
c_groups = groups;
groups = 1;
#endif
if (strides.size() == 2U) {
layout = GPUDNNDataLayout::kNCHW;
} else {
layout = GPUDNNDataLayout::kNCDHW;
}
size_t workspace_size = 0;
#ifdef PADDLE_WITH_HIP
miopenConvBwdDataAlgorithm_t algo{};
#else
cudnnConvolutionBwdDataAlgo_t algo{};
#endif
// ------------------- cudnn conv algorithm ---------------------
auto handle = ctx.cudnn_handle();
auto layout_tensor = paddle::platform::GetCudnnTensorFormat(layout);
bool deterministic = FLAGS_cudnn_deterministic;
auto dtype = paddle::platform::CudnnDataType<T>::type;
// ------------------- cudnn descriptors ---------------------
paddle::operators::ConvArgs args{&transformed_out,
&filter,
&transformed_x,
strides,
padding_common,
dilations_,
dtype};
args.handle = handle;
args.idesc.set(transformed_out, iwo_groups);
args.wdesc.set(filter, layout_tensor, iwo_groups);
args.odesc.set(transformed_x, iwo_groups);
args.cdesc.set(dtype,
padding_common,
strides,
dilations_,
paddle::platform::AllowTF32Cudnn(),
c_groups);
#ifdef PADDLE_WITH_HIP
using search =
paddle::operators::SearchAlgorithm<miopenConvBwdDataAlgorithm_t>;
workspace_size = std::max(workspace_size, search::GetWorkspaceSize(args));
algo = search::Find<T>(args, false, deterministic, workspace_size, ctx);
#else
using search =
paddle::operators::SearchAlgorithm<cudnnConvolutionBwdDataAlgoPerf_t>;
algo = search::Find<T>(args, false, deterministic, ctx);
workspace_size =
std::max(workspace_size, search::GetWorkspaceSize(args, algo));
#endif
// ------------------- cudnn conv transpose forward ---------------------
int x_offset = transformed_x.numel() / transformed_x.dims()[0] / groups;
int out_offset = transformed_out.numel() / transformed_out.dims()[0] / groups;
int filter_offset = filter.numel() / groups;
paddle::operators::ScalingParamType<T> alpha = 1.0f;
paddle::operators::ScalingParamType<T> beta = 0.0f;
auto workspace_handle = ctx.cudnn_workspace_handle();
for (int g = 0; g < groups; g++) {
#ifdef PADDLE_WITH_HIP
auto cudnn_func = [&](void* cudnn_workspace) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenConvolutionBackwardData(
handle,
&alpha,
args.odesc.desc(),
x_data + x_offset * g,
args.wdesc.desc(),
filter_data + filter_offset * g,
args.cdesc.desc(),
algo,
&beta,
args.idesc.desc(),
transformed_out_data + out_offset * g,
cudnn_workspace,
workspace_size));
};
#else // PADDLE_WITH_HIP
auto cudnn_func = [&](void* cudnn_workspace) {
PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnConvolutionBackwardData(
handle,
&alpha,
args.wdesc.desc(),
filter_data + filter_offset * g,
args.odesc.desc(),
x_data + x_offset * g,
args.cdesc.desc(),
algo,
cudnn_workspace,
workspace_size,
&beta,
args.idesc.desc(),
transformed_out_data + out_offset * g));
};
#endif // PADDLE_WITH_HIP
workspace_handle.RunFunc(cudnn_func, workspace_size);
}
if (!is_sys_pad && strides.size() == 2U) {
funcs::Slice<Context, T, 4>(ctx, &transformed_out, out, starts, ends, axes);
} else if (!is_sys_pad && strides.size() == 3U) {
funcs::Slice<Context, T, 5>(ctx, &transformed_out, out, starts, ends, axes);
}
if (data_layout == GPUDNNDataLayout::kNHWC) {
DenseTensor out_transpose;
DenseTensor out_nchw;
out_nchw.ShareDataWith(*out);
out_nchw.Resize(make_ddim(out_vec));
if (strides.size() == 2U) {
out_transpose = Transpose<T, Context>(ctx, out_nchw, {0, 2, 3, 1});
} else if (strides.size() == 3U) {
out_transpose = Transpose<T, Context>(ctx, out_nchw, {0, 2, 3, 4, 1});
}
*out = out_transpose;
}
}
template <typename T, typename Context>
void Conv2dTransposeGPUDNNKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* out) {
ConvTransposeRawGPUDNNKernel<T, Context>(ctx,
x,
filter,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
out);
}
template <typename T, typename Context>
void Conv3dTransposeGPUDNNKernel(const Context& ctx,
const DenseTensor& x,
const DenseTensor& filter,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::vector<int>& output_padding,
const std::vector<int>& output_size,
const std::string& padding_algorithm,
int groups,
const std::vector<int>& dilations,
const std::string& data_format,
DenseTensor* out) {
ConvTransposeRawGPUDNNKernel<T, Context>(ctx,
x,
filter,
strides,
paddings,
padding_algorithm,
groups,
dilations,
data_format,
out);
}
} // namespace phi
using float16 = phi::dtype::float16;
#ifdef PADDLE_WITH_HIP
// MIOPEN do not support double
PD_REGISTER_KERNEL(conv2d_transpose,
GPUDNN,
ALL_LAYOUT,
phi::Conv2dTransposeGPUDNNKernel,
float,
float16) {}
PD_REGISTER_KERNEL(conv3d_transpose,
GPUDNN,
ALL_LAYOUT,
phi::Conv3dTransposeGPUDNNKernel,
float,
float16) {}
#else
PD_REGISTER_KERNEL(conv2d_transpose,
GPUDNN,
ALL_LAYOUT,
phi::Conv2dTransposeGPUDNNKernel,
float,
double,
float16) {}
PD_REGISTER_KERNEL(conv3d_transpose,
GPUDNN,
ALL_LAYOUT,
phi::Conv3dTransposeGPUDNNKernel,
float,
double,
float16) {}
#endif
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册