diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index c5ecccf9023d7fba7d0edec4c687c9515da034e6..384f004e0ec8c8af23eb40611409d3aafef05912 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -69,6 +69,13 @@ function(op_library TARGET) file(APPEND ${pybind_file} "USE_OP(max_pool2d_with_index);\n") endif() + # conv_op contains several operators + if ("${TARGET}" STREQUAL "conv_op") + set(pybind_flag 1) + # It's enough to just adding one operator to pybind + file(APPEND ${pybind_file} "USE_OP(conv2d);\n") + endif() + # conv_transpose_op contains several operators if ("${TARGET}" STREQUAL "conv_transpose_op") set(pybind_flag 1) @@ -146,6 +153,7 @@ set(DEPS_OPS sum_op pool_op pool_with_index_op + conv_op lstm_op conv_transpose_op nccl_op @@ -158,6 +166,7 @@ set(DEPS_OPS op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) op_library(cross_entropy_op DEPS cross_entropy) op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) +op_library(conv_op DEPS vol2col) op_library(sum_op DEPS net_op selected_rows_functor) op_library(pool_op DEPS pooling) op_library(pool_with_index_op DEPS pooling) diff --git a/paddle/operators/conv2d_op.cc b/paddle/operators/conv2d_op.cc deleted file mode 100644 index b47cff180dc6f019c1730f68841fae27e95693ee..0000000000000000000000000000000000000000 --- a/paddle/operators/conv2d_op.cc +++ /dev/null @@ -1,115 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/operators/conv2d_op.h" - -namespace paddle { -namespace operators { - -void Conv2DOp::InferShape(framework::InferShapeContext* ctx) const { - PADDLE_ENFORCE(ctx->HasInput("Input"), - "Input(Input) of Conv2DOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Filter"), - "Input(Filter) of Conv2DOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Output"), - "Output(Output) of Conv2DOp should not be null."); - - auto in_dims = ctx->GetInputDim("Input"); - auto filter_dims = ctx->GetInputDim("Filter"); - std::vector strides = ctx->Attrs().Get>("strides"); - std::vector paddings = ctx->Attrs().Get>("paddings"); - int groups = ctx->Attrs().Get("groups"); - int input_channels = in_dims[1]; - int output_channels = filter_dims[0]; - - PADDLE_ENFORCE_EQ(in_dims.size(), 4, "Conv2DOp input should be 4-D."); - PADDLE_ENFORCE_EQ(filter_dims.size(), 4, "Conv2DOp filter should be 4-D."); - PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups, - "The number of input channels should be equal to filter " - "channels * groups."); - PADDLE_ENFORCE_EQ( - output_channels % groups, 0, - "The number of output channels should be divided by groups."); - - auto output_height = - OutputSize(in_dims[2], filter_dims[2], paddings[0], strides[0]); - auto output_width = - OutputSize(in_dims[3], filter_dims[3], paddings[1], strides[1]); - ctx->SetOutputDim("Output", - {in_dims[0], filter_dims[0], output_height, output_width}); -} - -Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, - framework::OpAttrChecker* op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput( - "Input", - "The input tensor of convolution operator. " - "The format of input tensor is NCHW, where N is batch size, C is the " - "number of channels, H is the height of the image, " - "and W is the width of the image."); - AddInput("Filter", - "The filter tensor of convolution operator. " - "The format of the filter tensor is MCHW, where M is the number of " - "output image channels, C is the number of input image channels, " - "H is the height of the filter, and W is the width of the filter. " - "If the groups attribute is greater than 1, C equals the number of " - "input image channels divided by the groups."); - AddOutput("Output", - "The output tensor of convolution operator. " - "The format of output tensor is also NCHW."); - AddAttr>("strides", "strides of convolution operator.") - .SetDefault({1, 1}); - AddAttr>("paddings", "paddings of convolution operator.") - .SetDefault({0, 0}); - AddAttr( - "groups", - "Group size of convolution operator. " - "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: " - "when group=2, the first half of the filters is only connected to the " - "first half of the input channels, while the second half of the filters " - "is only connected to the second half of the input channels.") - .SetDefault(1); - AddComment(R"DOC( -Convolution Operator. - -The convolution operation calculates the output based on the input, filter, -strides, paddings, and groups parameters. The size of each dimension of the -parameters is checked in the infer-shape method. - -)DOC"); -} - -void Conv2DOpGrad::InferShape(framework::InferShapeContext* ctx) const { - auto in_dims = ctx->GetInputDim("Input"); - auto filter_dims = ctx->GetInputDim("Filter"); - if (ctx->HasOutput(framework::GradVarName("Input"))) { - ctx->SetOutputDim(framework::GradVarName("Input"), in_dims); - } - if (ctx->HasOutput(framework::GradVarName("Filter"))) { - ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims); - } -} - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP(conv2d, ops::Conv2DOp, ops::Conv2DOpMaker, conv2d_grad, - ops::Conv2DOpGrad); - -REGISTER_OP_CPU_KERNEL( - conv2d, ops::GemmConv2DKernel); -REGISTER_OP_CPU_KERNEL( - conv2d_grad, ops::GemmConvGrad2DKernel); diff --git a/paddle/operators/conv_cudnn_op.cc b/paddle/operators/conv_cudnn_op.cc index 62190ebc217be49f549cedfb2de24b9d138fff48..97f31bf22d7072d89bd043045045dcb5bb5518b8 100644 --- a/paddle/operators/conv_cudnn_op.cc +++ b/paddle/operators/conv_cudnn_op.cc @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/conv2d_op.h" +#include "paddle/operators/conv_op.h" namespace paddle { namespace operators { @@ -38,10 +38,11 @@ class CudnnConvOpMaker : public Conv2DOpMaker { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(conv_cudnn, ops::Conv2DOp, ops::CudnnConvOpMaker, conv_cudnn_grad, - ops::Conv2DOpGrad); -REGISTER_OP_CPU_KERNEL( - conv_cudnn, ops::GemmConv2DKernel); +REGISTER_OP(conv_cudnn, ops::ConvOp, ops::CudnnConvOpMaker, conv_cudnn_grad, + ops::ConvOpGrad); + +REGISTER_OP_CPU_KERNEL(conv_cudnn, + ops::GemmConvKernel); REGISTER_OP_CPU_KERNEL( conv_cudnn_grad, - ops::GemmConvGrad2DKernel); + ops::GemmConvGradKernel); diff --git a/paddle/operators/conv_cudnn_op.cu b/paddle/operators/conv_cudnn_op.cu index e2eb157f40c0039f87c41d28f8732cd4901a046d..d115850e2b651e20d82ad6028648c6a88439c9d7 100644 --- a/paddle/operators/conv_cudnn_op.cu +++ b/paddle/operators/conv_cudnn_op.cu @@ -15,7 +15,7 @@ #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" #include "paddle/memory/memory.h" -#include "paddle/operators/conv2d_op.h" +#include "paddle/operators/conv_op.h" #include "paddle/platform/assert.h" #include "paddle/platform/cudnn_helper.h" diff --git a/paddle/operators/conv_op.cc b/paddle/operators/conv_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..a6f65f10165929316f971d195f3790fd9e7ed376 --- /dev/null +++ b/paddle/operators/conv_op.cc @@ -0,0 +1,209 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/conv_op.h" + +namespace paddle { +namespace operators { + +void ConvOp::InferShape(framework::InferShapeContext* ctx) const { + PADDLE_ENFORCE(ctx->HasInput("Input"), + "Input(Input) of ConvOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Filter"), + "Input(Filter) of ConvOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Output"), + "Output(Output) of ConvOp should not be null."); + + auto in_dims = ctx->GetInputDim("Input"); + auto filter_dims = ctx->GetInputDim("Filter"); + std::vector strides = ctx->Attrs().Get>("strides"); + std::vector paddings = ctx->Attrs().Get>("paddings"); + int groups = ctx->Attrs().Get("groups"); + int input_channels = in_dims[1]; + int output_channels = filter_dims[0]; + + PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5, + "Conv intput should be 4-D or 5-D tensor."); + PADDLE_ENFORCE_EQ( + in_dims.size(), filter_dims.size(), + "Conv input dimension and filter dimension should be the same."); + PADDLE_ENFORCE( + in_dims.size() - strides.size() == 2U, + "Conv input dimension and strides dimension should be consistent."); + PADDLE_ENFORCE_EQ( + paddings.size(), strides.size(), + "Conv paddings dimension and Conv strides dimension should be the same."); + PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups, + "The number of input channels should be equal to filter " + "channels * groups."); + PADDLE_ENFORCE_EQ( + output_channels % groups, 0, + "The number of output channels should be divided by groups."); + + std::vector output_shape({in_dims[0], filter_dims[0]}); + for (size_t i = 0; i < paddings.size(); ++i) { + output_shape.push_back(OutputSize(in_dims[i + 2], filter_dims[i + 2], + paddings[i], strides[i])); + } + ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); +} + +Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput( + "Input", + "(Tensor) The input tensor of convolution operator. " + "The format of input tensor is NCHW, where N is batch size, C is the " + "number of channels, H is the height of the feature, " + "and W is the width of the feature."); + AddInput("Filter", + "(Tensor) The filter tensor of convolution operator. " + "The format of the filter tensor is MCHW, where M is the number of " + "output image channels, C is the number of input image channels, " + "H is the height of the filter, and W is the width of the filter. " + "If the groups attribute is greater than 1, C equals the number of " + "input image channels divided by the groups."); + AddOutput("Output", + "(Tensor) The output tensor of convolution operator. " + "The format of output tensor is also NCHW."); + AddAttr>("strides", "strides of convolution operator.") + .SetDefault({1, 1}); + AddAttr>("paddings", "paddings of convolution operator.") + .SetDefault({0, 0}); + AddAttr( + "groups", + "(int default:1), the group size of convolution operator. " + "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: " + "when group=2, the first half of the filters is only connected to the " + "first half of the input channels, while the second half of the filters " + "is only connected to the second half of the input channels.") + .SetDefault(1); + AddComment(R"DOC( +Convolution Operator. + +The convolution operation calculates the output based on the input, filter +and strides, paddings, groups parameters. The size of each dimension of the +parameters is checked in the infer-shape. +Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch +size, C is the number of channels, H is the height of the feature, and W is +the width of the feature. Parameters(ksize, strides, paddings) are two elements. +These two elements represent height and width, respectively. +The input(X) size and output(Out) size may be different. + +Example: + Input: + Input shape: (N, C_in, H_in, W_in) + Filter shape: (C_out, C_in, H_f, W_f) + Output: + Output shape: (N, C_out, H_out, W_out) + where + H_out = (H_in - filter_size[0] + 2 * paddings[0]) / strides[0] + 1; + W_out = (W_in - filter_size[1] + 2 * paddings[1]) / strides[1] + 1; +)DOC"); +} + +Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput( + "Input", + "(Tensor) The input tensor of convolution operator. " + "The format of input tensor is NCDHW. Where N is batch size, C is the " + "number of channels, D is the depth of the feature, H is the height of " + "the feature, " + "and W is the width of the feature."); + AddInput("Filter", + "(Tensor) The filter tensor of convolution operator. " + "The format of the filter tensor is MCDHW, where M is the number of " + "output image channels, C is the number of input image channels, " + "D is the depth of the filter, H is the height of the filter, and W " + "is the width of the filter." + "If the groups attribute is greater than 1, C equals the number of " + "input image channels divided by the groups."); + AddOutput("Output", + "(Tensor) The output tensor of convolution operator." + "The format of output tensor is also NCDHW."); + AddAttr>( + "strides", + "(vector, default:{0, 0, 0}), the strides of convolution operator.") + .SetDefault({1, 1, 1}); + AddAttr>( + "paddings", + "(vector, default:{0, 0, 0}), the paddings of convolution operator.") + .SetDefault({0, 0, 0}); + AddAttr( + "groups", + "(int default:1), the group size of convolution operator. " + "According to grouped convolution in Alex Krizhevsky's Deep CNN paper: " + "when group=2, the first half of the filters is only connected to the " + "first half of the input channels, while the second half of the filters " + "is only connected to the second half of the input channels.") + .SetDefault(1); + + AddComment(R"DOC( +Convolution3D Operator. + +The convolution operation calculates the output based on the input, filter +and strides, paddings, groups parameters. The size of each dimension of the +parameters is checked in the infer-shape. +Input(Input, Filter) and output(Output) are in NCDHW format. Where N is batch +size, C is the number of channels,D is the depth of the feature, H is the height of +the feature, and W is the width of the feature. Parameters(ksize, strides, paddings) +are three elements. These three elements represent depth, height and width, respectively. +The input(X) size and output(Out) size may be different. + +Example: + Input: + Input shape: (N, C_in, D_in, H_in, W_in) + Filter shape: (C_out, C_in, D_f, H_f, W_f) + Output: + Output shape: (N, C_out, D_out, H_out, W_out) + where + D_out = (D_in - filter_size[0] + 2 * paddings[0]) / strides[0] + 1; + H_out = (H_in - filter_size[1] + 2 * paddings[1]) / strides[1] + 1; + W_out = (W_in - filter_size[2] + 2 * paddings[2]) / strides[2] + 1; +)DOC"); +} + +void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const { + auto in_dims = ctx->GetInputDim("Input"); + auto filter_dims = ctx->GetInputDim("Filter"); + if (ctx->HasOutput(framework::GradVarName("Input"))) { + ctx->SetOutputDim(framework::GradVarName("Input"), in_dims); + } + if (ctx->HasOutput(framework::GradVarName("Filter"))) { + ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims); + } +} + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(conv2d, ops::ConvOp, ops::Conv2DOpMaker, conv2d_grad, + ops::ConvOpGrad); +namespace ops = paddle::operators; +REGISTER_OP(conv3d, ops::ConvOp, ops::Conv3DOpMaker, conv3d_grad, + ops::ConvOpGrad); + +REGISTER_OP_CPU_KERNEL(conv2d, + ops::GemmConvKernel); +REGISTER_OP_CPU_KERNEL( + conv2d_grad, ops::GemmConvGradKernel); + +REGISTER_OP_CPU_KERNEL(conv3d, + ops::GemmConvKernel); +REGISTER_OP_CPU_KERNEL( + conv3d_grad, ops::GemmConvGradKernel); diff --git a/paddle/operators/conv2d_op.cu b/paddle/operators/conv_op.cu similarity index 64% rename from paddle/operators/conv2d_op.cu rename to paddle/operators/conv_op.cu index c697c9466d34c29af6976f3a4d2d0a24ba778ceb..8e6f9da455b7291049aee57189dae15b8bcc2150 100644 --- a/paddle/operators/conv2d_op.cu +++ b/paddle/operators/conv_op.cu @@ -12,11 +12,16 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/conv2d_op.h" +#include "paddle/operators/conv_op.h" namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(conv2d, + ops::GemmConvKernel); REGISTER_OP_GPU_KERNEL( - conv2d, ops::GemmConv2DKernel); + conv2d_grad, ops::GemmConvGradKernel); + +REGISTER_OP_GPU_KERNEL(conv3d, + ops::GemmConvKernel); REGISTER_OP_GPU_KERNEL( - conv2d_grad, ops::GemmConvGrad2DKernel); + conv3d_grad, ops::GemmConvGradKernel); diff --git a/paddle/operators/conv2d_op.h b/paddle/operators/conv_op.h similarity index 54% rename from paddle/operators/conv2d_op.h rename to paddle/operators/conv_op.h index 0621389a79eee6b5e75b1eab309b49f8aa4a97ca..7c1729213bf3f5f3987afbf2d51d5b5339ae521d 100644 --- a/paddle/operators/conv2d_op.h +++ b/paddle/operators/conv_op.h @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/operators/math/im2col.h" #include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/vol2col.h" namespace paddle { namespace operators { @@ -40,14 +41,20 @@ class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker { framework::OpAttrChecker* op_checker); }; -class Conv2DOp : public framework::OperatorWithKernel { +class Conv3DOpMaker : public framework::OpProtoAndCheckerMaker { + public: + Conv3DOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker); +}; + +class ConvOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override; }; -class Conv2DOpGrad : public framework::OperatorWithKernel { +class ConvOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -55,7 +62,7 @@ class Conv2DOpGrad : public framework::OperatorWithKernel { }; template -class GemmConv2DKernel : public framework::OpKernel { +class GemmConvKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* input = context.Input("Input"); @@ -70,51 +77,78 @@ class GemmConv2DKernel : public framework::OpKernel { std::vector paddings = context.Attr>("paddings"); int groups = context.Attr("groups"); - int batch_size = input->dims()[0]; - int input_channels = input->dims()[1]; - int filter_height = filter.dims()[filter.dims().size() - 2]; - int filter_width = filter.dims()[filter.dims().size() - 1]; - int output_channels = output->dims()[1]; - int output_height = output->dims()[2]; - int output_width = output->dims()[3]; - - paddle::operators::math::Im2ColFunctor< - paddle::operators::math::ColFormat::kCFO, Place, T> - im2col; + const int batch_size = static_cast(input->dims()[0]); + + // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w} + std::vector filter_shape_vec(framework::vectorize(filter.dims())); + filter_shape_vec.erase(filter_shape_vec.begin(), + filter_shape_vec.begin() + 2); + + // output_shape_vec: {o_h, o_w} or {o_d, o_h, o_w} + std::vector output_shape_vec(framework::vectorize(output->dims())); + output_shape_vec.erase(output_shape_vec.begin(), + output_shape_vec.begin() + 2); + // use col_shape in the im2col calculation - framework::DDim col_shape = {input_channels / groups, filter_height, - filter_width, output_height, output_width}; + // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h, k_w, o_d, + // o_h, o_w} + std::vector col_shape_vec; + col_shape_vec.push_back(input->dims()[1] / groups); + col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(), + filter_shape_vec.end()); + col_shape_vec.insert(col_shape_vec.end(), output_shape_vec.begin(), + output_shape_vec.end()); + framework::DDim col_shape(framework::make_ddim(col_shape_vec)); + // use col_matrix_shape in the gemm calculation - framework::DDim col_matrix_shape = { - input_channels / groups * filter_height * filter_width, - output_height * output_width}; + // size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w, o_d * + // o_h * o_w) + framework::DDim col_matrix_shape = + framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); + Tensor col; col.mutable_data(col_shape, context.GetPlace()); // col_matrix shares the same piece of data with col, // but will be reshaped into a two-dimensional matrix shape // to call the matrix multiplication interface. - Tensor col_matrix = col; + Tensor col_matrix; + col_matrix.ShareDataWith(col); col_matrix.Resize(col_matrix_shape); - framework::DDim input_shape = {input->dims()[1], input->dims()[2], - input->dims()[3]}; + framework::DDim input_shape = framework::slice_ddim( + input->dims(), 1, static_cast(input->dims().size())); + framework::DDim filter_matrix_shape = {filter.dims()[0], filter.numel() / filter.dims()[0]}; filter.Resize(filter_matrix_shape); - framework::DDim output_matrix_shape = {output_channels, - output_height * output_width}; - // convolution operator: im2col + gemm - int in_step = input_channels / groups; - int out_step = output_channels / groups; + framework::DDim output_matrix_shape = { + output->dims()[1], + output->numel() / (output->dims()[0] * output->dims()[1])}; + + // convolution operator: im2col(or vol2col) + gemm + int in_step = static_cast(input->dims()[1]) / groups; + int out_step = static_cast(output->dims()[1]) / groups; + for (int i = 0; i < batch_size; i++) { Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); for (int g = 0; g < groups; g++) { - // im2col Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - im2col(context.device_context(), in_slice, col, strides[0], strides[1], - paddings[0], paddings[0], paddings[1], paddings[1]); + + if (filter_shape_vec.size() == 2) { + // im2col + math::Im2ColFunctor im2col; + im2col(context.device_context(), in_slice, col, strides[0], + strides[1], paddings[0], paddings[0], paddings[1], + paddings[1]); + } else if (filter_shape_vec.size() == 3) { + // vol2col + math::Vol2ColFunctor vol2col; + vol2col(context.device_context(), in_slice, col, strides[0], + strides[1], strides[2], paddings[0], paddings[1], + paddings[2]); + } // gemm Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step); @@ -127,7 +161,7 @@ class GemmConv2DKernel : public framework::OpKernel { }; template -class GemmConvGrad2DKernel : public framework::OpKernel { +class GemmConvGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* input = context.Input("Input"); @@ -137,64 +171,79 @@ class GemmConvGrad2DKernel : public framework::OpKernel { context.Output(framework::GradVarName("Input")); Tensor* filter_grad = context.Output(framework::GradVarName("Filter")); - // The filter and filter_grad will be reshaped in the calculations, // so here use an assignment operation, // that avoids modifying the variable in the Scope. Tensor filter = *context.Input("Filter"); + if (!input_grad && !filter_grad) return; + std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); int groups = context.Attr("groups"); - int batch_size = input->dims()[0]; - int input_channels = input->dims()[1]; - int filter_height = filter.dims()[filter.dims().size() - 2]; - int filter_width = filter.dims()[filter.dims().size() - 1]; - int output_channels = output_grad->dims()[1]; - int output_height = output_grad->dims()[2]; - int output_width = output_grad->dims()[3]; - - paddle::operators::math::Col2ImFunctor< - paddle::operators::math::ColFormat::kCFO, Place, T> - col2im; - paddle::operators::math::Im2ColFunctor< - paddle::operators::math::ColFormat::kCFO, Place, T> - im2col; - // use col_shape in the im2col and col2im calculation - framework::DDim col_shape = {input_channels / groups, filter_height, - filter_width, output_height, output_width}; + const int batch_size = static_cast(input->dims()[0]); + + // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w} + std::vector filter_shape_vec(framework::vectorize(filter.dims())); + filter_shape_vec.erase(filter_shape_vec.begin(), + filter_shape_vec.begin() + 2); + + // output_shape_vec: {o_h, o_w} or {o_d, o_h, o_w} + std::vector output_shape_vec( + framework::vectorize(output_grad->dims())); + output_shape_vec.erase(output_shape_vec.begin(), + output_shape_vec.begin() + 2); + + // use col_shape in the im2col calculation + // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h, k_w, o_d, + // o_h, o_w} + std::vector col_shape_vec; + col_shape_vec.push_back(input->dims()[1] / groups); + col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(), + filter_shape_vec.end()); + col_shape_vec.insert(col_shape_vec.end(), output_shape_vec.begin(), + output_shape_vec.end()); + framework::DDim col_shape(framework::make_ddim(col_shape_vec)); + // use col_matrix_shape in the gemm calculation - framework::DDim col_matrix_shape = { - input_channels / groups * filter_height * filter_width, - output_height * output_width}; - Tensor col; - col.mutable_data(col_shape, context.GetPlace()); - // col_matrix shares the same piece of data with col, - // but will be reshaped into a two-dimensional matrix shape - // to call the matrix multiplication interface. - Tensor col_matrix = col; - col_matrix.Resize(col_matrix_shape); + // size: (i_c/g * k_h * k_w, o_h * o_w) + // or + // (i_c/g * k_d * k_h * k_w, o_d * o_h * o_w) + framework::DDim col_matrix_shape = + framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); - framework::DDim input_shape = {input->dims()[1], input->dims()[2], - input->dims()[3]}; - framework::DDim output_matrix_shape = { - output_grad->dims()[1], - output_grad->dims()[2] * output_grad->dims()[3]}; + framework::DDim input_shape = framework::slice_ddim( + input->dims(), 1, static_cast(input->dims().size())); framework::DDim filter_matrix_shape = {filter.dims()[0], filter.numel() / filter.dims()[0]}; filter.Resize(filter_matrix_shape); - // convolution backward input operator: gemm + col2im - // convolution backward weight operator: im2col + gemm - int in_step = input_channels / groups; - int out_step = output_channels / groups; + framework::DDim output_matrix_shape = { + output_grad->dims()[1], + output_grad->numel() / + (output_grad->dims()[0] * output_grad->dims()[1])}; + + // convolution backward input operator: gemm + col2im(or col2vol) + // convolution backward weight operator: im2col(or vol2col) + gemm + int in_step = static_cast(input->dims()[1]) / groups; + int out_step = static_cast(output_grad->dims()[1]) / groups; + + Tensor col; + // col_matrix shares the same piece of data with col, + // but will be reshaped into a two-dimensional matrix shape + // to call the matrix multiplication interface. + Tensor col_matrix; + col.mutable_data(col_shape, context.GetPlace()); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + + math::SetConstant set_zero; if (input_grad) { input_grad->mutable_data(context.GetPlace()); - auto t = framework::EigenVector::Flatten(*input_grad); - t.device(context.GetEigenDevice()) = t.constant(static_cast(0)); + set_zero(context.device_context(), input_grad, static_cast(0)); for (int i = 0; i < batch_size; i++) { Tensor out_grad_batch = @@ -208,13 +257,22 @@ class GemmConvGrad2DKernel : public framework::OpKernel { math::matmul(context.device_context(), filter_slice, true, out_grad_slice, false, T(1.0), &col_matrix, T(0.0)); - // col2im Tensor in_grad_slice = in_grad_batch.Slice(g * in_step, (g + 1) * in_step); - col2im(context.device_context(), in_grad_slice, col, strides[0], - strides[1], paddings[0], paddings[0], paddings[1], - paddings[1]); + + if (filter_shape_vec.size() == 2) { + math::Col2ImFunctor col2im; + col2im(context.device_context(), in_grad_slice, col, strides[0], + strides[1], paddings[0], paddings[0], paddings[1], + paddings[1]); + + } else if (filter_shape_vec.size() == 3) { + math::Col2VolFunctor col2vol; + col2vol(context.device_context(), in_grad_slice, col, strides[0], + strides[1], strides[2], paddings[0], paddings[1], + paddings[2]); + } } } } @@ -223,8 +281,7 @@ class GemmConvGrad2DKernel : public framework::OpKernel { filter_grad->mutable_data(context.GetPlace()); Tensor filter_grad_ = *filter_grad; filter_grad_.Resize(filter_matrix_shape); - auto t = framework::EigenVector::Flatten(filter_grad_); - t.device(context.GetEigenDevice()) = t.constant(static_cast(0)); + set_zero(context.device_context(), filter_grad, static_cast(0)); for (int i = 0; i < batch_size; i++) { Tensor out_grad_batch = @@ -235,9 +292,18 @@ class GemmConvGrad2DKernel : public framework::OpKernel { Tensor out_grad_slice = out_grad_batch.Slice(g * out_step, (g + 1) * out_step); Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - im2col(context.device_context(), in_slice, col, strides[0], - strides[1], paddings[0], paddings[0], paddings[1], - paddings[1]); + + if (filter_shape_vec.size() == 2) { + math::Im2ColFunctor im2col; + im2col(context.device_context(), in_slice, col, strides[0], + strides[1], paddings[0], paddings[0], paddings[1], + paddings[1]); + } else if (filter_shape_vec.size() == 3) { + math::Vol2ColFunctor vol2col; + vol2col(context.device_context(), in_slice, col, strides[0], + strides[1], strides[2], paddings[0], paddings[1], + paddings[2]); + } // gemm Tensor filter_grad_slice = @@ -250,6 +316,5 @@ class GemmConvGrad2DKernel : public framework::OpKernel { } } }; - } // namespace operators } // namespace paddle diff --git a/python/paddle/v2/framework/tests/test_conv2d_op.py b/python/paddle/v2/framework/tests/test_conv2d_op.py index f58b96463cf78103b2acb3d80652ef0aa988ad49..04ae7f294c27fdceaaff2e9a7ed854213e643945 100644 --- a/python/paddle/v2/framework/tests/test_conv2d_op.py +++ b/python/paddle/v2/framework/tests/test_conv2d_op.py @@ -61,25 +61,23 @@ class TestConv2dOp(OpTest): def test_check_grad(self): self.check_grad( - set(['Input', 'Filter']), 'Output', max_relative_error=0.05) + set(['Input', 'Filter']), 'Output', max_relative_error=0.02) def test_check_grad_no_filter(self): self.check_grad( ['Input'], 'Output', - max_relative_error=0.05, + max_relative_error=0.02, no_grad_set=set(['Filter'])) def test_check_grad_no_input(self): self.check_grad( ['Filter'], 'Output', - max_relative_error=0.05, + max_relative_error=0.02, no_grad_set=set(['Input'])) def init_test_case(self): - # self.groups = 1 - # self.op_type = "conv2d" self.pad = [0, 0] self.stride = [1, 1] self.dilations = [1, 1] @@ -103,6 +101,9 @@ class TestWithGroup(TestConv2dOp): self.op_type = "conv2d" +#----------------Conv2dCudnn---------------- + + class TestCudnn(TestConv2dOp): def init_group(self): self.groups = 1 diff --git a/python/paddle/v2/framework/tests/test_conv3d_op.py b/python/paddle/v2/framework/tests/test_conv3d_op.py new file mode 100644 index 0000000000000000000000000000000000000000..44c192f58d25f8ddaa38d2ba7c7c19b9a5bd7dc1 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_conv3d_op.py @@ -0,0 +1,131 @@ +import unittest +import numpy as np +from op_test import OpTest + + +def conv3d_forward_naive(input, filter, group, conv_param): + in_n, in_c, in_d, in_h, in_w = input.shape + out_c, f_c, f_d, f_h, f_w = filter.shape + assert f_c * group == in_c + assert np.mod(out_c, group) == 0 + sub_out_c = out_c / group + + stride, pad = conv_param['stride'], conv_param['pad'] + out_d = 1 + (in_d + 2 * pad[0] - f_h) / stride[0] + out_h = 1 + (in_h + 2 * pad[1] - f_h) / stride[1] + out_w = 1 + (in_w + 2 * pad[2] - f_w) / stride[2] + out = np.zeros((in_n, out_c, out_d, out_h, out_w)) + + input_pad = np.pad(input, ((0, ), (0, ), (pad[0], ), (pad[1], ), + (pad[2], )), + mode='constant', + constant_values=0) + for d in range(out_d): + for i in range(out_h): + for j in range(out_w): + for g in range(group): + input_pad_masked = \ + input_pad[:, g * f_c:(g + 1) * f_c, + d * stride[0]:d * stride[0] + f_d, + i * stride[1]:i * stride[1] + f_h, + j * stride[2]:j * stride[2] + f_w] + f_sub = filter[g * sub_out_c:(g + 1) * + sub_out_c, :, :, :, :] + for k in range(sub_out_c): + out[:, g * sub_out_c + k, d, i, j] = \ + np.sum(input_pad_masked * f_sub[k, :, :, :, :], + axis=(1, 2, 3, 4)) + + return out + + +class TestConv3dOp(OpTest): + def setUp(self): + self.init_group() + self.init_op_type() + self.init_test_case() + + conv3d_param = {'stride': self.stride, 'pad': self.pad} + input = np.random.random(self.input_size).astype("float32") + filter = np.random.random(self.filter_size).astype("float32") + output = conv3d_forward_naive(input, filter, self.groups, + conv3d_param).astype("float32") + + self.inputs = {'Input': input, 'Filter': filter} + self.attrs = { + 'strides': self.stride, + 'paddings': self.pad, + 'groups': self.groups + } + self.outputs = {'Output': output} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad( + set(['Input', 'Filter']), 'Output', max_relative_error=0.03) + + def test_check_grad_no_filter(self): + self.check_grad( + ['Input'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Filter'])) + + def test_check_grad_no_input(self): + self.check_grad( + ['Filter'], + 'Output', + max_relative_error=0.03, + no_grad_set=set(['Input'])) + + def init_test_case(self): + self.pad = [0, 0, 0] + self.stride = [1, 1, 1] + self.input_size = [2, 3, 4, 4, 4] # NCDHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 3, 3, 3] + + def init_group(self): + self.groups = 1 + + def init_op_type(self): + self.op_type = "conv3d" + + +class TestCase1(TestConv3dOp): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.input_size = [2, 3, 4, 4, 4] # NCDHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 3, 3, 3] + + def init_group(self): + self.groups = 1 + + def init_op_type(self): + self.op_type = "conv3d" + + +class TestWithGroup1(TestConv3dOp): + def init_group(self): + self.groups = 3 + + def init_op_type(self): + self.op_type = "conv3d" + + +class TestWithGroup2(TestCase1): + def init_group(self): + self.groups = 3 + + def init_op_type(self): + self.op_type = "conv3d" + + +if __name__ == '__main__': + unittest.main()