未验证 提交 43a64a76 编写于 作者: Q qingqing01 提交者: GitHub

Merge pull request #5118 from chengduoZH/Add_deconv3d_op

Add 3D convolution transposed operator.
...@@ -69,6 +69,13 @@ function(op_library TARGET) ...@@ -69,6 +69,13 @@ function(op_library TARGET)
file(APPEND ${pybind_file} "USE_OP(max_pool2d_with_index);\n") file(APPEND ${pybind_file} "USE_OP(max_pool2d_with_index);\n")
endif() endif()
# conv_transpose_op contains several operators
if ("${TARGET}" STREQUAL "conv_transpose_op")
set(pybind_flag 1)
# It's enough to just adding one operator to pybind
file(APPEND ${pybind_file} "USE_OP(conv2d_transpose);\n")
endif()
# pool_cudnn_op contains several operators # pool_cudnn_op contains several operators
if ("${TARGET}" STREQUAL "pool_cudnn_op") if ("${TARGET}" STREQUAL "pool_cudnn_op")
set(pybind_flag 1) set(pybind_flag 1)
...@@ -139,6 +146,8 @@ set(DEPS_OPS ...@@ -139,6 +146,8 @@ set(DEPS_OPS
sum_op sum_op
pool_op pool_op
pool_with_index_op pool_with_index_op
lstm_op
conv_transpose_op
nccl_op nccl_op
sequence_conv_op sequence_conv_op
sequence_pool_op sequence_pool_op
...@@ -159,10 +168,12 @@ endif() ...@@ -159,10 +168,12 @@ endif()
op_library(sequence_conv_op DEPS context_project) op_library(sequence_conv_op DEPS context_project)
op_library(sequence_pool_op DEPS sequence_pooling) op_library(sequence_pool_op DEPS sequence_pooling)
op_library(lstm_op DEPS sequence2batch lstm_compute) op_library(lstm_op DEPS sequence2batch lstm_compute)
op_library(conv_transpose_op DEPS vol2col)
op_library(gru_op DEPS sequence2batch gru_compute) op_library(gru_op DEPS sequence2batch gru_compute)
op_library(dynamic_recurrent_op SRCS dynamic_recurrent_op.cc rnn/recurrent_op_utils.cc op_library(dynamic_recurrent_op SRCS dynamic_recurrent_op.cc rnn/recurrent_op_utils.cc
DEPS net_op tensor_array) DEPS net_op tensor_array)
op_library(recurrent_op SRCS recurrent_op.cc DEPS executor) op_library(recurrent_op SRCS recurrent_op.cc DEPS executor)
list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
foreach(src ${GENERAL_OPS}) foreach(src ${GENERAL_OPS})
op_library(${src}) op_library(${src})
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/conv2d_transpose_op.h" #include "paddle/operators/conv_transpose_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -38,13 +38,13 @@ class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker { ...@@ -38,13 +38,13 @@ class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(conv2d_transpose_cudnn, ops::Conv2DTransposeOp, REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp,
ops::CudnnConv2DTransposeOpMaker, conv2d_transpose_cudnn_grad, ops::CudnnConv2DTransposeOpMaker, conv2d_transpose_cudnn_grad,
ops::Conv2DTransposeOpGrad); ops::ConvTransposeOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv2d_transpose_cudnn, conv2d_transpose_cudnn,
ops::GemmConv2DTransposeKernel<paddle::platform::CPUPlace, float>); ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv2d_transpose_cudnn_grad, conv2d_transpose_cudnn_grad,
ops::GemmConv2DTransposeGradKernel<paddle::platform::CPUPlace, float>); ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, float>);
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "paddle/framework/eigen.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/memory/memory.h" #include "paddle/memory/memory.h"
#include "paddle/operators/conv2d_transpose_op.h" #include "paddle/operators/conv_transpose_op.h"
#include "paddle/platform/assert.h" #include "paddle/platform/assert.h"
#include "paddle/platform/cudnn_helper.h" #include "paddle/platform/cudnn_helper.h"
......
...@@ -12,18 +12,18 @@ ...@@ -12,18 +12,18 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/conv2d_transpose_op.h" #include "paddle/operators/conv_transpose_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
void Conv2DTransposeOp::InferShape(framework::InferShapeContext* ctx) const { void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE(ctx->HasInput("Input"), PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of Conv2DTransposeOp should not be null."); "Input(Input) of ConvTransposeOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Filter"), PADDLE_ENFORCE(ctx->HasInput("Filter"),
"Input(Filter) of Conv2DTransposeOp should not be null."); "Input(Filter) of ConvTransposeOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Output"), PADDLE_ENFORCE(ctx->HasOutput("Output"),
"Output(Output) of Conv2DTransposeOp should not be null."); "Output(Output) of ConvTransposeOp should not be null.");
auto in_dims = ctx->GetInputDim("Input"); auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter"); auto filter_dims = ctx->GetInputDim("Filter");
...@@ -35,17 +35,27 @@ void Conv2DTransposeOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -35,17 +35,27 @@ void Conv2DTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
"No Padding allowed in conv transpose op."); "No Padding allowed in conv transpose op.");
} }
PADDLE_ENFORCE_EQ(in_dims.size(), 4, PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5,
"Conv2DTransposeOp input should be 4-D tensor."); "ConvTransposeOp intput should be 4-D or 5-D tensor.");
PADDLE_ENFORCE_EQ(filter_dims.size(), 4, PADDLE_ENFORCE_EQ(in_dims.size(), filter_dims.size(),
"Conv2DTransposeOp filter should be 4-D tensor."); "ConvTransposeOp input dimension and filter dimension "
"should be the same.");
PADDLE_ENFORCE(in_dims.size() - strides.size() == 2U,
"ConvTransposeOp input dimension and strides dimension should "
"be consistent.");
PADDLE_ENFORCE_EQ(paddings.size(), strides.size(),
"ConvTransposeOp paddings dimension and Conv strides "
"dimension should be the same.");
PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0], PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0],
"input and kernel input dimension should be equal."); "In ConvTransposeOp, The input channel should be the same "
"as the number of filters.");
auto output_height = (in_dims[2] - 1) * strides[0] + filter_dims[2]; std::vector<int64_t> output_shape({in_dims[0], filter_dims[1]});
auto output_width = (in_dims[3] - 1) * strides[1] + filter_dims[3]; for (size_t i = 0; i < paddings.size(); ++i) {
ctx->SetOutputDim("Output", output_shape.push_back((in_dims[i + 2] - 1) * strides[i] +
{in_dims[0], filter_dims[1], output_height, output_width}); filter_dims[i + 2]);
}
ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
} }
Conv2DTransposeOpMaker::Conv2DTransposeOpMaker( Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
...@@ -54,37 +64,109 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker( ...@@ -54,37 +64,109 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
AddInput( AddInput(
"Input", "Input",
"(Tensor) The input tensor of convolution transpose operator. " "(Tensor) The input tensor of convolution transpose operator. "
"The format of input tensor is NCHW, where N is batch size, C is the " "The format of input tensor is NCHW. Where N is batch size, C is the "
"number of input channels, H is the height of the image, and " "number of input channels, H is the height of the feature, and "
"W is the width of the image."); "W is the width of the feature.");
AddInput("Filter", AddInput("Filter",
"(Tensor) The filter tensor of convolution transpose operator." "(Tensor) The filter tensor of convolution transpose operator. "
"The format of the filter tensor is CMHW, where C is the number of " "The format of the filter tensor is CMHW, where C is the number of "
"output image channels, M is the number of input image channels, " "output image channels, M is the number of input image channels, "
"H is the height of the filter, and W is the width of the filter. " "H is the height of the filter, and W is the width of the filter. "
"We enforce groups number == 1 and padding == 0 in " "We enforce groups number == 1 and padding == 0 in "
"the convolution transpose scenario."); "the convolution transpose scenario.");
AddOutput("Output", AddOutput("Output",
"(Tensor) The output tensor of convolution transpose operator." "(Tensor) The output tensor of convolution transpose operator. "
"The format of output tensor is also NCHW."); "The format of output tensor is also NCHW.");
AddAttr<std::vector<int>>("strides", AddAttr<std::vector<int>>(
"strides of convolution transpose operator.") "strides",
"(vector defalut:{1, 1}), strides of convolution transpose operator.")
.SetDefault({1, 1}); .SetDefault({1, 1});
AddAttr<std::vector<int>>("paddings", AddAttr<std::vector<int>>(
"paddings of convolution transpose operator.") "paddings",
"(vector defalut:{0, 0}), paddings of convolution transpose operator.")
.SetDefault({0, 0}); .SetDefault({0, 0});
AddComment(R"DOC( AddComment(R"DOC(
Convolution Transpose Operator. Convolution2D Transpose Operator.
The convolution transpose operation calculates the output based on the input, The convolution transpose operation calculates the output based on the input, filter
filter, strides, paddings, and groups parameters. The size of each dimension and strides, paddings, groups parameters. The size of each dimension of the
of the parameters is checked in the infer-shape method. parameters is checked in the infer-shape.
Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch
size, C is the number of channels, H is the height of the feature, and
W is the width of the feature. Parameters(ksize, strides, paddings) are two elements.
These two elements represent height and width, respectively.
The input(X) size and output(Out) size may be different.
Example:
Input:
Input shape: (N, C_in, H_in, W_in)
Filter shape: (C_in, C_out, H_f, W_f)
Output:
Output shape: (N, C_out, H_out, W_out)
where
H_out = (H_in - 1) * strides[0] - 2 * paddings[0] + filter_size[0];
W_out = (W_in - 1) * strides[1] - 2 * paddings[1] + filter_size[1];
)DOC");
}
Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(
framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input",
"(Tensor) The input tensor of convolution transpose operator."
"The format of input tensor is NCDHW. Where N is batch size, C is "
"the number of channels, D is the depth of the feature, H is the "
"height of the feature, and "
"W is the width of the feature.");
AddInput("Filter",
"(Tensor) The filter tensor of convolution transpose operator."
"The format of the filter tensor is CMDHW, where C is the number of "
"output image channels, M is the number of input image channels, D "
"is the depth of the filter, H is the height of the filter, and "
"W is the width of the filter."
"We enforce groups number == 1 and padding == 0 in "
"the convolution3d transpose scenario.");
AddOutput("Output",
"(Tensor) The output tensor of convolution transpose operator."
"The format of output tensor is also NCDHW."
"Where N is batch size, C is "
"the number of channels, D is the depth of the feature, H is the "
"height of the feature, and W is the width of the feature.");
AddAttr<std::vector<int>>(
"strides",
"(vector defalut:{1, 1, 1}), strides of convolution transpose operator.")
.SetDefault({1, 1, 1});
AddAttr<std::vector<int>>(
"paddings",
"(vector defalut:{0, 0, 0}), paddings of convolution transpose operator.")
.SetDefault({0, 0, 0});
AddComment(R"DOC(
Convolution3D Transpose Operator.
The convolution transpose operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
Input(Input, Filter) and output(Output) are in NCDHW format. Where N is batch
size, C is the number of channels, D is the depth of the feature,
H is the height of the feature, and W is the width of the feature.
Parameters(ksize, strides, paddings) are three elements.
These three elements represent depth, height and width, respectively.
The input(X) size and output(Out) size may be different.
Example:
Input:
Input shape: (N, C_in, D_in, H_in, W_in)
Filter shape: (C_in, C_out, D_f, H_f, W_f)
Output:
Output shape: (N, C_out, D_out, H_out, W_out)
where
D_out = (D_in - 1) * strides[0] - 2 * paddings[0] + filter_size[0];
H_out = (H_in - 1) * strides[1] - 2 * paddings[1] + filter_size[1];
W_out = (W_in - 1) * strides[2] - 2 * paddings[2] + filter_size[2];
)DOC"); )DOC");
} }
void Conv2DTransposeOpGrad::InferShape( void ConvTransposeOpGrad::InferShape(framework::InferShapeContext* ctx) const {
framework::InferShapeContext* ctx) const {
auto in_dims = ctx->GetInputDim("Input"); auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter"); auto filter_dims = ctx->GetInputDim("Filter");
if (ctx->HasOutput(framework::GradVarName("Input"))) { if (ctx->HasOutput(framework::GradVarName("Input"))) {
...@@ -99,13 +181,23 @@ void Conv2DTransposeOpGrad::InferShape( ...@@ -99,13 +181,23 @@ void Conv2DTransposeOpGrad::InferShape(
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(conv2d_transpose, ops::Conv2DTransposeOp,
ops::Conv2DTransposeOpMaker, conv2d_transpose_grad, REGISTER_OP(conv2d_transpose, ops::ConvTransposeOp, ops::Conv2DTransposeOpMaker,
ops::Conv2DTransposeOpGrad); conv2d_transpose_grad, ops::ConvTransposeOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv2d_transpose, conv2d_transpose,
ops::GemmConv2DTransposeKernel<paddle::platform::CPUPlace, float>); ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
conv2d_transpose_grad, conv2d_transpose_grad,
ops::GemmConv2DTransposeGradKernel<paddle::platform::CPUPlace, float>); ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP(conv3d_transpose, ops::ConvTransposeOp, ops::Conv3DTransposeOpMaker,
conv3d_transpose_grad, ops::ConvTransposeOpGrad);
REGISTER_OP_CPU_KERNEL(
conv3d_transpose,
ops::GemmConvTransposeKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
conv3d_transpose_grad,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUPlace, float>);
...@@ -12,13 +12,20 @@ ...@@ -12,13 +12,20 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/conv2d_transpose_op.h" #include "paddle/operators/conv_transpose_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
conv2d_transpose, conv2d_transpose,
ops::GemmConv2DTransposeKernel<paddle::platform::GPUPlace, float>); ops::GemmConvTransposeKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
conv2d_transpose_grad, conv2d_transpose_grad,
ops::GemmConv2DTransposeGradKernel<paddle::platform::GPUPlace, float>); ops::GemmConvTransposeGradKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
conv3d_transpose,
ops::GemmConvTransposeKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
conv3d_transpose_grad,
ops::GemmConvTransposeGradKernel<paddle::platform::GPUPlace, float>);
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/math/im2col.h" #include "paddle/operators/math/im2col.h"
#include "paddle/operators/math/math_function.h" #include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/vol2col.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -33,7 +34,13 @@ class Conv2DTransposeOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -33,7 +34,13 @@ class Conv2DTransposeOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker); framework::OpAttrChecker* op_checker);
}; };
class Conv2DTransposeOp : public framework::OperatorWithKernel { class Conv3DTransposeOpMaker : public framework::OpProtoAndCheckerMaker {
public:
Conv3DTransposeOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker);
};
class ConvTransposeOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -41,7 +48,7 @@ class Conv2DTransposeOp : public framework::OperatorWithKernel { ...@@ -41,7 +48,7 @@ class Conv2DTransposeOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext* ctx) const override; void InferShape(framework::InferShapeContext* ctx) const override;
}; };
class Conv2DTransposeOpGrad : public framework::OperatorWithKernel { class ConvTransposeOpGrad : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -50,41 +57,44 @@ class Conv2DTransposeOpGrad : public framework::OperatorWithKernel { ...@@ -50,41 +57,44 @@ class Conv2DTransposeOpGrad : public framework::OperatorWithKernel {
}; };
template <typename Place, typename T> template <typename Place, typename T>
class GemmConv2DTransposeKernel : public framework::OpKernel<T> { class GemmConvTransposeKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<Tensor>("Input"); const Tensor* input = context.Input<Tensor>("Input");
// The filter will be reshaped, so it should not be constant pointer // The filter will be reshaped, so it should not be constant pointer
Tensor filter = *context.Input<Tensor>("Filter"); Tensor filter = *context.Input<Tensor>("Filter");
Tensor* output = context.Output<Tensor>("Output"); Tensor* output = context.Output<Tensor>("Output");
std::vector<int> strides = context.Attr<std::vector<int>>("strides"); std::vector<int> strides = context.Attr<std::vector<int>>("strides");
// TODO(Zhuoyuan): Paddings can be added in future. // TODO(Zhuoyuan): Paddings can be added in future.
// groups will alway be disabled in conv2d_transpose. // groups will alway be disabled in conv2dtranspose.
const int batch_size = input->dims()[0]; const int batch_size = static_cast<int>(input->dims()[0]);
const int m = input->dims()[1];
const int h = input->dims()[2]; // input_shape_vec: {h, w} or {d, h, w}
const int w = input->dims()[3]; std::vector<int64_t> input_shape_vec = framework::vectorize(input->dims());
input_shape_vec.erase(input_shape_vec.begin(), input_shape_vec.begin() + 2);
const int k_h = filter.dims()[2];
const int k_w = filter.dims()[3]; // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w}
std::vector<int64_t> filter_shape_vec = framework::vectorize(filter.dims());
const int c = output->dims()[1]; // output channels filter_shape_vec.erase(filter_shape_vec.begin(),
const int o_h = output->dims()[2]; filter_shape_vec.begin() + 2);
const int o_w = output->dims()[3];
// use col_shape in the im2col and col2im (or vol2col and col2vol)
paddle::operators::math::Col2ImFunctor< // calculation
paddle::operators::math::ColFormat::kCFO, Place, T> // col_shape_vec: {c, k_h, k_w, h, w} or {c, k_d, k_h, k_w, d, h, w}
col2im; std::vector<int64_t> col_shape_vec;
col_shape_vec.push_back(output->dims()[1]);
// use col_shape in the im2col and col2im calculation col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(),
DDim col_shape = {c, k_h, k_w, h, w}; filter_shape_vec.end());
col_shape_vec.insert(col_shape_vec.end(), input_shape_vec.begin(),
input_shape_vec.end());
DDim col_shape(framework::make_ddim(col_shape_vec));
// use col_matrix_shape in the gemm calculation // use col_matrix_shape in the gemm calculation
DDim col_matrix_shape = {c * k_h * k_w, h * w}; // size: (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1);
Tensor col; Tensor col;
col.mutable_data<T>(col_shape, context.GetPlace()); col.mutable_data<T>(col_shape, context.GetPlace());
...@@ -95,160 +105,189 @@ class GemmConv2DTransposeKernel : public framework::OpKernel<T> { ...@@ -95,160 +105,189 @@ class GemmConv2DTransposeKernel : public framework::OpKernel<T> {
col_matrix.ShareDataWith(col); col_matrix.ShareDataWith(col);
col_matrix.Resize(col_matrix_shape); col_matrix.Resize(col_matrix_shape);
DDim output_shape = {c, o_h, o_w}; // output size: (c, o_h, o_w) or (c, o_d, o_h, o_w)
DDim input_matrix_shape = {m, h * w}; DDim output_shape =
framework::slice_ddim(output->dims(), 1, output->dims().size());
DDim filter_matrix_shape = {m, c * k_h * k_w}; // input matrix size: (m, h * w) or (m, d * h * w)
filter.Resize(filter_matrix_shape); DDim input_matrix_shape = {input->dims()[1], col_matrix_shape[1]};
// convolution transpose: gemm + col2im (similar to conv-backward on input) // filter size: (m, c * k_h * k_w) or (m, c * k_d * k_h * k_w)
DDim filter_matrix_shape = {input->dims()[1], col_matrix_shape[0]};
filter.Resize(filter_matrix_shape);
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
auto t = framework::EigenVector<T>::Flatten(*output); math::SetConstant<Place, T> set_zero;
t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0)); set_zero(context.device_context(), output, static_cast<T>(0));
// convolution transpose: gemm + col2im or col2vol (similar to conv-backward
// on input)
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
// batch with size (M, h * w) // batch with size (m, h * w) or (m, d * h * w)
Tensor input_batch = input->Slice(i, i + 1).Resize(input_matrix_shape); Tensor input_batch = input->Slice(i, i + 1).Resize(input_matrix_shape);
// filter size: (M, c * k_h * k_w)
// output size: (c, o_h, o_w) // output size: (c, o_h, o_w) or (c, o_d, o_h, o_w)
Tensor output_batch = output->Slice(i, i + 1).Resize(output_shape); Tensor output_batch = output->Slice(i, i + 1).Resize(output_shape);
// col_matrix = filter * input_batch // col_matrix = filter * input_batch
// of shape (c * k_h * k_w, h * w) // of shape (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
math::matmul<Place, T>(context.device_context(), filter, true, math::matmul<Place, T>(context.device_context(), filter, true,
input_batch, false, T(1.0), &col_matrix, T(0.0)); input_batch, false, static_cast<T>(1.0),
&col_matrix, static_cast<T>(0.0));
if (filter_shape_vec.size() == 2) {
// col2im: col_matrix -> dy
// from (c * k_h * k_w, h * w) to (c, o_h, o_w)
math::Col2ImFunctor<math::ColFormat::kCFO, Place, T> col2im;
col2im(context.device_context(), output_batch, col, strides[0], col2im(context.device_context(), output_batch, col, strides[0],
strides[1], 0, 0, 0, 0); strides[1], 0, 0, 0, 0);
} else if (filter_shape_vec.size() == 3) {
// col2vol: col_matrix -> dy
// from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w)
math::Col2VolFunctor<Place, T> col2vol;
col2vol(context.device_context(), output_batch, col, strides[0],
strides[1], strides[2], 0, 0, 0);
}
} }
} }
}; };
template <typename Place, typename T> template <typename Place, typename T>
class GemmConv2DTransposeGradKernel : public framework::OpKernel<T> { class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
const Tensor* input = context.Input<Tensor>("Input"); const Tensor* input = context.Input<Tensor>("Input");
const Tensor* output_grad = const Tensor* output_grad =
context.Input<Tensor>(framework::GradVarName("Output")); context.Input<Tensor>(framework::GradVarName("Output"));
// For filter, we do not use const pointer b/c we will do reshape, // For filter, we do not use const pointer b/c we will do reshape,
// but we should avoid modifying its value. // but we should avoid modifying its value.
Tensor filter = *context.Input<Tensor>("Filter"); Tensor filter = *context.Input<Tensor>("Filter");
Tensor* input_grad = Tensor* input_grad =
context.Output<Tensor>(framework::GradVarName("Input")); context.Output<Tensor>(framework::GradVarName("Input"));
Tensor* filter_grad = Tensor* filter_grad =
context.Output<Tensor>(framework::GradVarName("Filter")); context.Output<Tensor>(framework::GradVarName("Filter"));
if ((!input_grad) && (!filter_grad)) return;
std::vector<int> strides = context.Attr<std::vector<int>>("strides"); std::vector<int> strides = context.Attr<std::vector<int>>("strides");
// Actually, no paddings and groups allowed in conv transpose. // Actually, no paddings and groups allowed in conv transpose.
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
const int batch_size = input->dims()[0]; const int batch_size = static_cast<int>(input->dims()[0]);
const int m = input->dims()[1];
const int h = input->dims()[2];
const int w = input->dims()[3];
const int k_h = filter.dims()[2]; // input_shape_vec: {h, w} or {d, h, w}
const int k_w = filter.dims()[3]; std::vector<int64_t> input_shape_vec = framework::vectorize(input->dims());
input_shape_vec.erase(input_shape_vec.begin(), input_shape_vec.begin() + 2);
const int c = output_grad->dims()[1]; // output channels // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w}
const int o_h = output_grad->dims()[2]; std::vector<int64_t> filter_shape_vec = framework::vectorize(filter.dims());
const int o_w = output_grad->dims()[3]; filter_shape_vec.erase(filter_shape_vec.begin(),
filter_shape_vec.begin() + 2);
// Only im2col functor required for bp to get to the right shape // use col_shape in the im2col and col2im (or vol2col and col2vol)
paddle::operators::math::Im2ColFunctor< // calculation
paddle::operators::math::ColFormat::kCFO, Place, T> // col_shape_vec: {c, k_h, k_w, h, w} or {c, k_d, k_h, k_w, d, h, w}
im2col; std::vector<int64_t> col_shape_vec;
col_shape_vec.push_back(output_grad->dims()[1]);
// use col_shape in the im2col and col2im calculation col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(),
DDim col_shape = {c, k_h, k_w, h, w}; filter_shape_vec.end());
col_shape_vec.insert(col_shape_vec.end(), input_shape_vec.begin(),
input_shape_vec.end());
DDim col_shape(framework::make_ddim(col_shape_vec));
// use col_matrix_shape in the gemm calculation // use col_matrix_shape in the gemm calculation
DDim col_matrix_shape_f = {c * h * w, k_h * k_w}; // size: (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
DDim col_matrix_shape =
framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1);
Tensor col; // output size: (c, o_h, o_w) or (c, o_d, o_h, o_w)
col.mutable_data<T>(col_shape, context.GetPlace()); DDim output_shape = framework::slice_ddim(output_grad->dims(), 1,
// col_matrix shares the same piece of data with col, output_grad->dims().size());
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
DDim output_shape = {c, o_h, o_w}; // input matrix size: (m, h * w) or (m, d * h * w)
DDim input_matrix_shape = {m, h * w}; DDim input_matrix_shape = {input->dims()[1], col_matrix_shape[1]};
DDim filter_matrix_shape = {m, c * k_h * k_w}; // filter size: (m, c * k_h * k_w) or (m, c * k_d * k_h * k_w)
DDim filter_matrix_shape = {input->dims()[1], col_matrix_shape[0]};
filter.Resize(filter_matrix_shape); filter.Resize(filter_matrix_shape);
// convolution transpose grad on input: // convolution transpose grad on input:
// im2col + gemm (similar to conv-forward) // im2col + gemm (similar to conv-forward)
// input need to compute gradient // input need to compute gradient
if (input_grad) { if (input_grad || filter_grad) {
Tensor col;
col.mutable_data<T>(col_shape, context.GetPlace());
// col_matrix shares the same piece of data with col,
// but will be reshaped into a two-dimensional matrix shape
// to call the matrix multiplication interface.
Tensor col_matrix; Tensor col_matrix;
col_matrix.ShareDataWith(col); col_matrix.ShareDataWith(col);
DDim col_matrix_shape = {c * k_h * k_w, h * w};
col_matrix.Resize(col_matrix_shape); col_matrix.Resize(col_matrix_shape);
Tensor filter_grad_;
math::SetConstant<Place, T> set_zero;
if (input_grad) {
input_grad->mutable_data<T>(context.GetPlace()); input_grad->mutable_data<T>(context.GetPlace());
auto t = framework::EigenVector<T>::Flatten(*input_grad); set_zero(context.device_context(), input_grad, static_cast<T>(0));
t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0)); }
if (filter_grad) { // filter size (m, c, k_h, k_w)
filter_grad->mutable_data<T>(context.GetPlace());
set_zero(context.device_context(), filter_grad, static_cast<T>(0));
filter_grad_ = *filter_grad;
filter_grad_.Resize(filter_matrix_shape);
}
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
// batch with size (c, o_h * o_w) // batch with size (c, o_h * o_w)
Tensor output_grad_batch = Tensor output_grad_batch =
output_grad->Slice(i, i + 1).Resize(output_shape); output_grad->Slice(i, i + 1).Resize(output_shape);
// filter of size (m, c * k_h * k_w)
if (filter_shape_vec.size() == 2) {
// im2col: dy -> col matrix
// from (c, o_h, o_w) to (c * k_h * k_w, h * w)
math::Im2ColFunctor<math::ColFormat::kCFO, Place, T> im2col;
im2col(context.device_context(), output_grad_batch, col, strides[0],
strides[1], paddings[0], paddings[0], paddings[1],
paddings[1]);
} else if (filter_shape_vec.size() == 3) {
// vol2col: dy -> col_matrix
// from (c, o_d, o_h, o_w) to (c * k_d * k_h * k_w, d * h * w)
math::Vol2ColFunctor<Place, T> vol2col;
vol2col(context.device_context(), output_grad_batch, col, strides[0],
strides[1], strides[2], paddings[0], paddings[1],
paddings[2]);
}
if (input_grad) {
// batch with size (m, h, w) // batch with size (m, h, w)
Tensor input_grad_batch = Tensor input_grad_batch =
input_grad->Slice(i, i + 1).Resize(input_matrix_shape); input_grad->Slice(i, i + 1).Resize(input_matrix_shape);
// im2col: dy from (c, o_h, o_w) -> (c * k_h * k_w, h * w)
im2col(context.device_context(), output_grad_batch, col, strides[0],
strides[1], paddings[0], paddings[0], paddings[1], paddings[1]);
// gemm: dx = filter * dy // gemm: dx = filter * dy
// (m, c * k_h * k_w) * (c * k_h * k_w, h * w) -> (m, c, h) // (m, c * k_h * k_w) * (c * k_h * k_w, h * w) -> (m, h * w)
// or
// (m, c * k_d * k_h * k_w) * (c * k_d * k_h * k_w, d * h * w) -> (m,
// d, h, w)
math::matmul<Place, T>(context.device_context(), filter, false, math::matmul<Place, T>(context.device_context(), filter, false,
col_matrix, false, T(1.0), &input_grad_batch, col_matrix, false, static_cast<T>(1.0),
T(0.0)); &input_grad_batch, static_cast<T>(0.0));
}
} }
// filter gradient required
if (filter_grad) { if (filter_grad) {
Tensor col_matrix_f;
col_matrix_f.ShareDataWith(col);
DDim col_matrix_shape_f = {c * h * w, k_h * k_w};
col_matrix_f.Resize(col_matrix_shape_f);
filter_grad->mutable_data<T>(context.GetPlace());
Tensor filter_grad_ = *filter_grad;
filter_grad_.Resize(filter_matrix_shape);
auto t = framework::EigenVector<T>::Flatten(filter_grad_);
t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
for (int i = 0; i < batch_size; ++i) {
// batch with size (c, o_h, o_w)
Tensor output_grad_batch =
output_grad->Slice(i, i + 1).Resize(output_shape);
// input batch // input batch
Tensor in_batch = input->Slice(i, i + 1).Resize(input_matrix_shape); Tensor in_batch = input->Slice(i, i + 1).Resize(input_matrix_shape);
// gemm: d_filter = x * dy^T
// im2col: (c * h * w, k_h * k_w) // (m, c * h * w) * (k_h * k_w, c * h * w) -> (m, k_h * k_w)
im2col(context.device_context(), output_grad_batch, col, strides[0], // or
strides[1], paddings[0], paddings[0], paddings[1], paddings[1]); // (m, d * h * w) * (d * h * w, c * k_d * k_h * k_w) -> (m, c * k_d *
// k_h * k_w)
// gemm: d_filter = x * y_grad^T
// (m, c * h * w) * (k_h * k_w, c * h * w) -> (m, c, h)
math::matmul<Place, T>(context.device_context(), in_batch, false, math::matmul<Place, T>(context.device_context(), in_batch, false,
col_matrix_f, true, T(1.0), &filter_grad_, col_matrix, true, static_cast<T>(1.0),
T(1.0)); &filter_grad_, static_cast<T>(1.0));
}
} }
} }
} }
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -58,36 +58,37 @@ class TestConv2dTransposeOp(OpTest): ...@@ -58,36 +58,37 @@ class TestConv2dTransposeOp(OpTest):
print 'check output here for', self.op_type print 'check output here for', self.op_type
self.check_output() self.check_output()
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.dilations = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
def init_op_type(self):
self.op_type = "conv2d_transpose"
def test_check_grad_no_input(self): def test_check_grad_no_input(self):
self.check_grad( self.check_grad(
['Filter'], ['Filter'],
'Output', 'Output',
max_relative_error=0.05, max_relative_error=0.02,
no_grad_set=set(['Input'])) no_grad_set=set(['Input']))
def test_check_grad_no_filter(self): def test_check_grad_no_filter(self):
self.check_grad( self.check_grad(
['Input'], ['Input'],
'Output', 'Output',
max_relative_error=0.05, max_relative_error=0.02,
no_grad_set=set(['Filter'])) no_grad_set=set(['Filter']))
def test_check_grad(self): def test_check_grad(self):
self.check_grad( self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.05) set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
def init_test_case(self):
self.pad = [0, 0]
self.stride = [1, 1]
self.dilations = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
def init_op_type(self):
self.op_type = "conv2d_transpose"
# ------------ test_cudnn ------------
class TestCudnn(TestConv2dTransposeOp): class TestCudnn(TestConv2dTransposeOp):
def init_op_type(self): def init_op_type(self):
self.op_type = "conv2d_transpose_cudnn" self.op_type = "conv2d_transpose_cudnn"
......
import unittest
import numpy as np
from op_test import OpTest
def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
# [2, 3, 5, 5, 5]
in_n, in_c, in_d, in_h, in_w = input_.shape
# [3, 6, 3, 3, 3]
f_c, out_c, f_d, f_h, f_w = filter_.shape
assert in_c == f_c
stride, pad = conv3dtranspose_param['stride'], conv3dtranspose_param['pad']
out_d = (in_d - 1) * stride[0] + f_d
out_h = (in_h - 1) * stride[1] + f_h
out_w = (in_w - 1) * stride[2] + f_w
out = np.zeros((in_n, out_c, out_d, out_h, out_w))
for n in range(in_n):
for d in range(in_d):
for i in range(in_h):
for j in range(in_w):
input_masked = input_[n, :, d, i, j] # (c)
input_masked = np.reshape(input_masked, (in_c, 1, 1, 1))
input_masked = np.tile(input_masked, (1, f_d, f_h, f_w))
for k in range(out_c):
tmp_out = np.sum(input_masked * filter_[:, k, :, :, :],
axis=0)
d1, d2 = d * stride[0], d * stride[0] + f_d
i1, i2 = i * stride[1], i * stride[1] + f_h
j1, j2 = j * stride[2], j * stride[2] + f_w
out[n, k, d1:d2, i1:i2, j1:j2] += tmp_out
return out
class TestConv3dTransposeOp(OpTest):
def setUp(self):
# init as conv transpose
self.init_op_type()
# [2, 3, 5, 5, 5] -> kernel [3, 6, 3, 3, 3] -> output [2, 6, 7, 7, 7]
self.init_test_case()
conv3dtranspose_param = {'stride': self.stride, 'pad': self.pad}
input_ = np.random.random(self.input_size).astype("float32")
filter_ = np.random.random(self.filter_size).astype("float32")
output = conv3dtranspose_forward_naive(
input_, filter_, conv3dtranspose_param).astype("float32")
# print 'deconv output py', output, output.shape
self.inputs = {'Input': input_, 'Filter': filter_}
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
# 'dilations': self.dilations
}
self.outputs = {'Output': output}
def test_check_output(self):
print 'check output here'
self.check_output()
def test_check_grad(self):
self.check_grad(
set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
def test_check_grad_no_filter(self):
self.check_grad(
['Input'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Filter']))
def test_check_grad_no_input(self):
self.check_grad(
['Filter'],
'Output',
max_relative_error=0.02,
no_grad_set=set(['Input']))
def init_test_case(self):
self.pad = [0, 0, 0]
self.stride = [1, 1, 1]
self.dilations = [1, 1, 1]
self.input_size = [2, 3, 5, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
def init_op_type(self):
self.op_type = "conv3d_transpose"
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册