diff --git a/paddle/operators/conv_cudnn_op.cc b/paddle/operators/conv_cudnn_op.cc index 97f31bf22d7072d89bd043045045dcb5bb5518b8..4c65b60d2349d2989128f4b1da705ea18391b8a3 100644 --- a/paddle/operators/conv_cudnn_op.cc +++ b/paddle/operators/conv_cudnn_op.cc @@ -22,8 +22,6 @@ class CudnnConvOpMaker : public Conv2DOpMaker { CudnnConvOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : Conv2DOpMaker(proto, op_checker) { - AddAttr>("dilations", "dilations of convolution operator.") - .SetDefault(std::vector{1, 1}); AddAttr("workspace_size_MB", "workspace size for cudnn, in MB, " "workspace is a section of GPU memory which will be " diff --git a/paddle/operators/conv_op.cc b/paddle/operators/conv_op.cc index a6f65f10165929316f971d195f3790fd9e7ed376..687d741cb22a081eab18c61752200b9fd48f68a7 100644 --- a/paddle/operators/conv_op.cc +++ b/paddle/operators/conv_op.cc @@ -30,6 +30,7 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { std::vector strides = ctx->Attrs().Get>("strides"); std::vector paddings = ctx->Attrs().Get>("paddings"); int groups = ctx->Attrs().Get("groups"); + std::vector dilations = ctx->Attrs().Get>("dilations"); int input_channels = in_dims[1]; int output_channels = filter_dims[0]; @@ -52,9 +53,15 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { "The number of output channels should be divided by groups."); std::vector output_shape({in_dims[0], filter_dims[0]}); - for (size_t i = 0; i < paddings.size(); ++i) { + for (size_t i = 0; i < strides.size(); ++i) { + PADDLE_ENFORCE(in_dims[i + 2] + 2 * paddings[i] - + (dilations[i] * (filter_dims[i + 2] - 1) + 1) > + 0, + "Due to the settings of paddings, filter_dims and " + "dilations, the output size is less than 0, please check " + "again."); output_shape.push_back(OutputSize(in_dims[i + 2], filter_dims[i + 2], - paddings[i], strides[i])); + dilations[i], paddings[i], strides[i])); } ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); } @@ -78,9 +85,15 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, AddOutput("Output", "(Tensor) The output tensor of convolution operator. " "The format of output tensor is also NCHW."); - AddAttr>("strides", "strides of convolution operator.") + AddAttr>("strides", + "(vector default:{1, 1}), the " + "strides(h_stride, w_stride) of " + "convolution operator.") .SetDefault({1, 1}); - AddAttr>("paddings", "paddings of convolution operator.") + AddAttr>("paddings", + "(vector default:{0, 0}), the " + "paddings(h_pad, w_pad) of " + "convolution operator.") .SetDefault({0, 0}); AddAttr( "groups", @@ -90,15 +103,20 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, "first half of the input channels, while the second half of the filters " "is only connected to the second half of the input channels.") .SetDefault(1); + AddAttr>("dilations", + "(vector default:{1, 1}), the " + "dilations(h_dilation, w_dilation) of " + "convolution operator.") + .SetDefault({1, 1}); AddComment(R"DOC( Convolution Operator. The convolution operation calculates the output based on the input, filter -and strides, paddings, groups parameters. The size of each dimension of the +and strides, paddings, groups, dilations parameters. The size of each dimension of the parameters is checked in the infer-shape. Input(Input, Filter) and output(Output) are in NCHW format. Where N is batch size, C is the number of channels, H is the height of the feature, and W is -the width of the feature. Parameters(ksize, strides, paddings) are two elements. +the width of the feature. Parameters(ksize, strides, paddings, dilations) are two elements. These two elements represent height and width, respectively. The input(X) size and output(Out) size may be different. @@ -109,8 +127,8 @@ Example: Output: Output shape: (N, C_out, H_out, W_out) where - H_out = (H_in - filter_size[0] + 2 * paddings[0]) / strides[0] + 1; - W_out = (W_in - filter_size[1] + 2 * paddings[1]) / strides[1] + 1; + H_out = (H_in + 2 * paddings[0] - (dilations[0]*(filter_size[0] - 1) + 1)) / strides[0] + 1; + W_out = (W_in + 2 * paddings[1] - (dilations[1]*(filter_size[1] - 1) + 1)) / strides[1] + 1; )DOC"); } @@ -135,13 +153,15 @@ Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto, AddOutput("Output", "(Tensor) The output tensor of convolution operator." "The format of output tensor is also NCDHW."); - AddAttr>( - "strides", - "(vector, default:{0, 0, 0}), the strides of convolution operator.") + AddAttr>("strides", + "(vector, default:{1, 1, 1}), the " + "strides(d_stride, h_stride, w_stride) of " + "convolution operator.") .SetDefault({1, 1, 1}); - AddAttr>( - "paddings", - "(vector, default:{0, 0, 0}), the paddings of convolution operator.") + AddAttr>("paddings", + "(vector, default:{0, 0, 0}), the " + "paddings(d_pad, h_pad, w_pad) of convolution " + "operator.") .SetDefault({0, 0, 0}); AddAttr( "groups", @@ -151,6 +171,12 @@ Conv3DOpMaker::Conv3DOpMaker(framework::OpProto* proto, "first half of the input channels, while the second half of the filters " "is only connected to the second half of the input channels.") .SetDefault(1); + AddAttr>("dilations", + "(vector default:{1, 1, 1}), the " + "dilations(d_dilation, h_dilation, w_dilation) of " + "convolution operator. Currently, conv3d doesn't " + "support dilation.") + .SetDefault({1, 1, 1}); AddComment(R"DOC( Convolution3D Operator. diff --git a/paddle/operators/conv_op.h b/paddle/operators/conv_op.h index 7c1729213bf3f5f3987afbf2d51d5b5339ae521d..fac5f1d0e25fe205f89fc7eeb9fadfd8431517d5 100644 --- a/paddle/operators/conv_op.h +++ b/paddle/operators/conv_op.h @@ -27,11 +27,24 @@ using Tensor = framework::Tensor; // Base convolution operator definations for other conv // like operators to reuse the implementation. -inline int OutputSize(int input_size, int filter_size, int padding, - int stride) { - int output_size = (input_size - filter_size + 2 * padding) / stride + 1; +inline int OutputSize(int input_size, int filter_size, int dilation, + int padding, int stride) { + const int dkernel = dilation * (filter_size - 1) + 1; + const int output_size = (input_size + 2 * padding - dkernel) / stride + 1; return output_size; } +inline bool IsExpand(std::vector& filter_dim, + std::vector& strides, std::vector& paddings, + std::vector& dilations) { + bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true; + for (size_t j = 0; j < strides.size(); ++j) { + filter_1 = filter_1 && (static_cast(filter_dim[j]) == 1); + strides_1 = strides_1 && (strides[j] == 1); + padding_0 = padding_0 && (paddings[j] == 0); + dilation_1 = dilation_1 && (dilations[j] == 1); + } + return !(filter_1 && strides_1 && padding_0 && dilation_1); +} // Define Op classes in .h file so that other conv // operator implementations can reuse the code. @@ -50,14 +63,12 @@ class Conv3DOpMaker : public framework::OpProtoAndCheckerMaker { class ConvOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override; }; class ConvOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override; }; @@ -73,9 +84,10 @@ class GemmConvKernel : public framework::OpKernel { Tensor* output = context.Output("Output"); output->mutable_data(context.GetPlace()); + int groups = context.Attr("groups"); std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); - int groups = context.Attr("groups"); + std::vector dilations = context.Attr>("dilations"); const int batch_size = static_cast(input->dims()[0]); @@ -106,14 +118,17 @@ class GemmConvKernel : public framework::OpKernel { framework::DDim col_matrix_shape = framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); + bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations); Tensor col; - col.mutable_data(col_shape, context.GetPlace()); // col_matrix shares the same piece of data with col, // but will be reshaped into a two-dimensional matrix shape // to call the matrix multiplication interface. Tensor col_matrix; - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); + if (is_expand) { + col.mutable_data(col_shape, context.GetPlace()); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } framework::DDim input_shape = framework::slice_ddim( input->dims(), 1, static_cast(input->dims().size())); @@ -130,24 +145,30 @@ class GemmConvKernel : public framework::OpKernel { int in_step = static_cast(input->dims()[1]) / groups; int out_step = static_cast(output->dims()[1]) / groups; + math::Vol2ColFunctor vol2col; + math::Im2ColFunctor im2col; + for (int i = 0; i < batch_size; i++) { Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape); Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape); + for (int g = 0; g < groups; g++) { Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - if (filter_shape_vec.size() == 2) { + if (!is_expand) { + col.ShareDataWith(in_slice); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } else if (filter_shape_vec.size() == 2) { // im2col - math::Im2ColFunctor im2col; - im2col(context.device_context(), in_slice, col, strides[0], - strides[1], paddings[0], paddings[0], paddings[1], - paddings[1]); + im2col(context.device_context(), in_slice, dilations, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &col); } else if (filter_shape_vec.size() == 3) { // vol2col - math::Vol2ColFunctor vol2col; - vol2col(context.device_context(), in_slice, col, strides[0], - strides[1], strides[2], paddings[0], paddings[1], - paddings[2]); + vol2col(context.device_context(), in_slice, dilations, strides, + paddings, &col); } // gemm @@ -178,9 +199,10 @@ class GemmConvGradKernel : public framework::OpKernel { if (!input_grad && !filter_grad) return; + int groups = context.Attr("groups"); std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); - int groups = context.Attr("groups"); + std::vector dilations = context.Attr>("dilations"); const int batch_size = static_cast(input->dims()[0]); @@ -230,14 +252,17 @@ class GemmConvGradKernel : public framework::OpKernel { int in_step = static_cast(input->dims()[1]) / groups; int out_step = static_cast(output_grad->dims()[1]) / groups; + bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations); Tensor col; // col_matrix shares the same piece of data with col, // but will be reshaped into a two-dimensional matrix shape // to call the matrix multiplication interface. Tensor col_matrix; - col.mutable_data(col_shape, context.GetPlace()); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); + if (is_expand) { + col.mutable_data(col_shape, context.GetPlace()); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } math::SetConstant set_zero; @@ -245,6 +270,9 @@ class GemmConvGradKernel : public framework::OpKernel { input_grad->mutable_data(context.GetPlace()); set_zero(context.device_context(), input_grad, static_cast(0)); + math::Col2VolFunctor col2vol; + math::Col2ImFunctor col2im; + for (int i = 0; i < batch_size; i++) { Tensor out_grad_batch = output_grad->Slice(i, i + 1).Resize(output_matrix_shape); @@ -254,24 +282,26 @@ class GemmConvGradKernel : public framework::OpKernel { Tensor out_grad_slice = out_grad_batch.Slice(g * out_step, (g + 1) * out_step); Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); - math::matmul(context.device_context(), filter_slice, true, - out_grad_slice, false, T(1.0), &col_matrix, - T(0.0)); - // col2im + Tensor in_grad_slice = in_grad_batch.Slice(g * in_step, (g + 1) * in_step); - if (filter_shape_vec.size() == 2) { - math::Col2ImFunctor col2im; - col2im(context.device_context(), in_grad_slice, col, strides[0], - strides[1], paddings[0], paddings[0], paddings[1], - paddings[1]); + if (!is_expand) { + col_matrix.ShareDataWith(in_grad_slice); + col_matrix.Resize(col_matrix_shape); + } + math::matmul(context.device_context(), filter_slice, true, + out_grad_slice, false, T(1.0), &col_matrix, + T(0.0)); - } else if (filter_shape_vec.size() == 3) { - math::Col2VolFunctor col2vol; - col2vol(context.device_context(), in_grad_slice, col, strides[0], - strides[1], strides[2], paddings[0], paddings[1], - paddings[2]); + if (is_expand && filter_shape_vec.size() == 2) { + col2im(context.device_context(), col, dilations, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &in_grad_slice); + } else if (is_expand && filter_shape_vec.size() == 3) { + col2vol(context.device_context(), col, dilations, strides, paddings, + &in_grad_slice); } } } @@ -282,7 +312,8 @@ class GemmConvGradKernel : public framework::OpKernel { Tensor filter_grad_ = *filter_grad; filter_grad_.Resize(filter_matrix_shape); set_zero(context.device_context(), filter_grad, static_cast(0)); - + math::Im2ColFunctor im2col; + math::Vol2ColFunctor vol2col; for (int i = 0; i < batch_size; i++) { Tensor out_grad_batch = output_grad->Slice(i, i + 1).Resize(output_matrix_shape); @@ -293,16 +324,18 @@ class GemmConvGradKernel : public framework::OpKernel { out_grad_batch.Slice(g * out_step, (g + 1) * out_step); Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step); - if (filter_shape_vec.size() == 2) { - math::Im2ColFunctor im2col; - im2col(context.device_context(), in_slice, col, strides[0], - strides[1], paddings[0], paddings[0], paddings[1], - paddings[1]); + if (!is_expand) { + col.ShareDataWith(in_slice); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } else if (filter_shape_vec.size() == 2) { + im2col(context.device_context(), in_slice, dilations, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &col); } else if (filter_shape_vec.size() == 3) { - math::Vol2ColFunctor vol2col; - vol2col(context.device_context(), in_slice, col, strides[0], - strides[1], strides[2], paddings[0], paddings[1], - paddings[2]); + vol2col(context.device_context(), in_slice, dilations, strides, + paddings, &col); } // gemm diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc index 50081779a5ea3c81884007d4e4b7832dc4ea2bdd..13ac0cd54cbeb8f68c2246f7e1d02f032266a72e 100644 --- a/paddle/operators/conv_transpose_op.cc +++ b/paddle/operators/conv_transpose_op.cc @@ -51,7 +51,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { "as the number of filters."); std::vector output_shape({in_dims[0], filter_dims[1]}); - for (size_t i = 0; i < paddings.size(); ++i) { + for (size_t i = 0; i < strides.size(); ++i) { output_shape.push_back((in_dims[i + 2] - 1) * strides[i] + filter_dims[i + 2]); } @@ -79,11 +79,13 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker( "The format of output tensor is also NCHW."); AddAttr>( "strides", - "(vector defalut:{1, 1}), strides of convolution transpose operator.") + "(vector defalut:{1, 1}), the strides(h_stride, w_stride) of " + "convolution transpose operator.") .SetDefault({1, 1}); AddAttr>( "paddings", - "(vector defalut:{0, 0}), paddings of convolution transpose operator.") + "(vector defalut:{0, 0}), the paddings(h_pad, w_pad) of convolution " + "transpose operator.") .SetDefault({0, 0}); AddComment(R"DOC( Convolution2D Transpose Operator. @@ -132,13 +134,14 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker( "Where N is batch size, C is " "the number of channels, D is the depth of the feature, H is the " "height of the feature, and W is the width of the feature."); - AddAttr>( - "strides", - "(vector defalut:{1, 1, 1}), strides of convolution transpose operator.") + AddAttr>("strides", + "(vector defalut:{1, 1, 1}), the " + "strides{d_stride, h_stride, w_stride} of " + "convolution transpose operator.") .SetDefault({1, 1, 1}); - AddAttr>( - "paddings", - "(vector defalut:{0, 0, 0}), paddings of convolution transpose operator.") + AddAttr>("paddings", + "(vector defalut:{0, 0, 0}), paddings(d_pad, " + "h_pad, w_pad) of convolution transpose operator.") .SetDefault({0, 0, 0}); AddComment(R"DOC( Convolution3D Transpose Operator. diff --git a/paddle/operators/conv_transpose_op.h b/paddle/operators/conv_transpose_op.h index 6c1a6220d784abf89ec789f94d9cff9e5414db04..4b2bd60437da8f58054d8cdd5e6ba1fdac05f0d5 100644 --- a/paddle/operators/conv_transpose_op.h +++ b/paddle/operators/conv_transpose_op.h @@ -43,16 +43,12 @@ class Conv3DTransposeOpMaker : public framework::OpProtoAndCheckerMaker { class ConvTransposeOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - - protected: void InferShape(framework::InferShapeContext* ctx) const override; }; class ConvTransposeOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - - protected: void InferShape(framework::InferShapeContext* ctx) const override; }; @@ -66,6 +62,8 @@ class GemmConvTransposeKernel : public framework::OpKernel { Tensor* output = context.Output("Output"); std::vector strides = context.Attr>("strides"); + // Actually, no paddings and groups allowed in conv transpose. + std::vector paddings = context.Attr>("paddings"); // TODO(Zhuoyuan): Paddings can be added in future. // groups will alway be disabled in conv2dtranspose. @@ -120,6 +118,10 @@ class GemmConvTransposeKernel : public framework::OpKernel { math::SetConstant set_zero; set_zero(context.device_context(), output, static_cast(0)); + math::Col2ImFunctor col2im; + math::Col2VolFunctor col2vol; + std::vector dilations({1, 1, 1}); + // convolution transpose: gemm + col2im or col2vol (similar to conv-backward // on input) for (int i = 0; i < batch_size; i++) { @@ -138,16 +140,16 @@ class GemmConvTransposeKernel : public framework::OpKernel { if (filter_shape_vec.size() == 2) { // col2im: col_matrix -> dy // from (c * k_h * k_w, h * w) to (c, o_h, o_w) - math::Col2ImFunctor col2im; - - col2im(context.device_context(), output_batch, col, strides[0], - strides[1], 0, 0, 0, 0); + col2im(context.device_context(), col, + std::vector{dilations[0], dilations[1]}, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &output_batch); } else if (filter_shape_vec.size() == 3) { // col2vol: col_matrix -> dy // from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w) - math::Col2VolFunctor col2vol; - col2vol(context.device_context(), output_batch, col, strides[0], - strides[1], strides[2], 0, 0, 0); + col2vol(context.device_context(), col, dilations, strides, + std::vector{0, 0, 0}, &output_batch); } } } @@ -228,6 +230,10 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { Tensor filter_grad_; math::SetConstant set_zero; + math::Im2ColFunctor im2col; + math::Vol2ColFunctor vol2col; + std::vector dilations({1, 1, 1}); + if (input_grad) { input_grad->mutable_data(context.GetPlace()); set_zero(context.device_context(), input_grad, static_cast(0)); @@ -247,17 +253,16 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { if (filter_shape_vec.size() == 2) { // im2col: dy -> col matrix // from (c, o_h, o_w) to (c * k_h * k_w, h * w) - math::Im2ColFunctor im2col; - im2col(context.device_context(), output_grad_batch, col, strides[0], - strides[1], paddings[0], paddings[0], paddings[1], - paddings[1]); + im2col(context.device_context(), output_grad_batch, + std::vector{dilations[0], dilations[1]}, strides, + std::vector{paddings[0], paddings[1], paddings[0], + paddings[1]}, + &col); } else if (filter_shape_vec.size() == 3) { // vol2col: dy -> col_matrix // from (c, o_d, o_h, o_w) to (c * k_d * k_h * k_w, d * h * w) - math::Vol2ColFunctor vol2col; - vol2col(context.device_context(), output_grad_batch, col, strides[0], - strides[1], strides[2], paddings[0], paddings[1], - paddings[2]); + vol2col(context.device_context(), output_grad_batch, dilations, + strides, paddings, &col); } if (input_grad) { diff --git a/paddle/operators/math/context_project.h b/paddle/operators/math/context_project.h index e0283360414fbdfb3dae2e94b45c9c8daeed3c74..845de82bbcb33d52184f04ae1594738cb4776eca 100644 --- a/paddle/operators/math/context_project.h +++ b/paddle/operators/math/context_project.h @@ -88,13 +88,18 @@ template class ContextProjectFunctor { public: void operator()(const platform::DeviceContext& context, const LoDTensor& in, - const Tensor& padding_data, Tensor& col, - bool padding_trainable, int context_start, int context_length, - int context_stride, int up_pad, int down_pad) { + const Tensor& padding_data, bool padding_trainable, + const int context_start, const int context_length, + const int context_stride, const int up_pad, + const int down_pad, Tensor* col) { auto lod_level_0 = in.lod()[0]; math::Im2ColFunctor im2col_ocf; + std::vector dilation({1, 1}); + std::vector padding({up_pad, 0, down_pad, 0}); + std::vector stride({context_stride, 1}); + int input_row_begin, input_row_end; int sequence_height, sequence_width; sequence_width = in.dims()[1]; @@ -105,8 +110,8 @@ class ContextProjectFunctor { : static_cast(lod_level_0[i]); input_row_end = static_cast(lod_level_0[i + 1]); - Tensor out_t = col.Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); + Tensor out_t = col->Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); sequence_height = static_cast(out_t.dims()[0]); @@ -123,17 +128,14 @@ class ContextProjectFunctor { {1, input_row_end - input_row_begin, sequence_width}); // input_channels, input_height, input_width in_t.Resize(framework::make_ddim(input_shape)); - - im2col_ocf(context, in_t, out_t, - /*stride_height*/ context_stride, /*stride_width*/ 1, up_pad, - down_pad, 0, 0); + im2col_ocf(context, in_t, dilation, stride, padding, &out_t); out_t.Resize({sequence_height, context_length * sequence_width}); } } if (padding_trainable) { for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { - Tensor out_t = col.Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); + Tensor out_t = col->Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); sequence_height = static_cast(out_t.dims()[0]); @@ -196,14 +198,19 @@ class ContextProjectFunctor { template class ContextProjectGradFunctor { public: - void operator()(const platform::DeviceContext& context, LoDTensor& in, - Tensor& padding_data, Tensor& col, bool padding_trainable, - int context_start, int context_length, int context_stride, - int up_pad, int down_pad, bool input_grad, bool pad_grad) { + void operator()(const platform::DeviceContext& context, const LoDTensor& in, + bool padding_trainable, const int context_start, + const int context_length, const int context_stride, + const int up_pad, const int down_pad, bool pad_grad, + bool input_grad, Tensor* padding_data, Tensor* col) { auto lod_level_0 = in.lod()[0]; math::Col2ImFunctor col2im_ocf; + std::vector dilation({1, 1}); + std::vector padding({up_pad, 0, down_pad, 0}); + std::vector stride({context_stride, 1}); + int input_row_begin, input_row_end; int sequence_height, sequence_width; sequence_width = in.dims()[1]; @@ -215,8 +222,8 @@ class ContextProjectGradFunctor { : static_cast(lod_level_0[i]); input_row_end = static_cast(lod_level_0[i + 1]); - Tensor out_t = col.Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); + Tensor out_t = col->Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); sequence_height = static_cast(out_t.dims()[0]); @@ -234,9 +241,7 @@ class ContextProjectGradFunctor { sequence_width}); // input_channels, input_height, input_width in_t.Resize(framework::make_ddim(input_shape)); - col2im_ocf(context, in_t, out_t, - /*stride_height*/ context_stride, /*stride_width*/ 1, - up_pad, down_pad, 0, 0); + col2im_ocf(context, out_t, dilation, stride, padding, &in_t); out_t.Resize({sequence_height, context_length * sequence_width}); } } @@ -244,8 +249,8 @@ class ContextProjectGradFunctor { if (pad_grad) { if (padding_trainable) { for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { - Tensor out_t = col.Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); + Tensor out_t = col->Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); sequence_height = static_cast(out_t.dims()[0]); out_t.Resize({sequence_height * context_length, sequence_width}); @@ -259,7 +264,7 @@ class ContextProjectGradFunctor { k + context_length < up_pad ? context_length : up_pad - k; Tensor out_t_sub = out_t.Slice(k * context_length, k * context_length + padding_size); - Tensor w_sub = padding_data.Slice(k, k + padding_size); + Tensor w_sub = padding_data->Slice(k, k + padding_size); auto out_t_sub_e = EigenMatrix::From(out_t_sub); auto w_sub_e = EigenMatrix::From(w_sub); w_sub_e.device(*context.GetEigenDevice()) = @@ -292,7 +297,7 @@ class ContextProjectGradFunctor { Tensor out_t_sub = out_t.Slice( (down_pad_begin_row + t) * context_length - padding_size, (down_pad_begin_row + t) * context_length); - Tensor w_sub = padding_data.Slice( + Tensor w_sub = padding_data->Slice( up_pad + padding_idx, up_pad + padding_idx + padding_size); auto out_t_sub_e = EigenMatrix::From(out_t_sub); auto w_sub_e = EigenMatrix::From(w_sub); diff --git a/paddle/operators/math/im2col.cc b/paddle/operators/math/im2col.cc index 3b1b0bd71dd3768b932864e185af8dc839b4653e..c10c44c52076c8ee56eee3a0d82c31df70a1c9c7 100644 --- a/paddle/operators/math/im2col.cc +++ b/paddle/operators/math/im2col.cc @@ -28,57 +28,55 @@ class Im2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& im, framework::Tensor& col, - int stride_height, int stride_width, int padding_up, - int padding_down, int padding_left, int padding_right) { + const framework::Tensor& im, const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* col) { PADDLE_ENFORCE(im.dims().size() == 3); - PADDLE_ENFORCE(col.dims().size() == 5); + PADDLE_ENFORCE(col->dims().size() == 5); - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; - int filter_height = col.dims()[1]; - int filter_width = col.dims()[2]; - int output_height = col.dims()[3]; - int output_width = col.dims()[4]; + int im_channels = im.dims()[0]; + int im_height = im.dims()[1]; + int im_width = im.dims()[2]; + int filter_height = col->dims()[1]; + int filter_width = col->dims()[2]; + int col_height = col->dims()[3]; + int col_width = col->dims()[4]; - PADDLE_ENFORCE_EQ( - (input_height + padding_up + padding_down - filter_height) / - stride_height + - 1, - output_height, - "Output_height and padding(padding_up, padding_down) are " - "inconsistent."); - PADDLE_ENFORCE_EQ( - (input_width + padding_left + padding_right - filter_width) / - stride_width + - 1, - output_width, - "output_width and padding(padding_left, padding_right) are " - "inconsistent."); + PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - + ((dilation[0] * (filter_height - 1) + 1))) / + stride[0] + + 1, + col_height, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); + PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - + ((dilation[1] * (filter_width - 1) + 1))) / + stride[1] + + 1, + col_width, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); - int channels_col = input_channels * filter_height * filter_width; + int channels_col = im_channels * filter_height * filter_width; const T* im_data = im.data(); - T* col_data = col.data(); + T* col_data = col->data(); for (int c = 0; c < channels_col; ++c) { int w_offset = c % filter_width; int h_offset = (c / filter_width) % filter_height; int c_im = c / filter_width / filter_height; - for (int h = 0; h < output_height; ++h) { - for (int w = 0; w < output_width; ++w) { - int im_row_idx = h * stride_height + h_offset - padding_up; - int im_col_idx = w * stride_width + w_offset - padding_left; + for (int h = 0; h < col_height; ++h) { + for (int w = 0; w < col_width; ++w) { + int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0]; + int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1]; + int col_idx = (c * col_height + h) * col_width + w; + int im_idx = (im_row_idx + c_im * im_height) * im_width + im_col_idx; - if (im_row_idx < 0 || im_row_idx >= input_height || im_col_idx < 0 || - im_col_idx >= input_width) { - col_data[(c * output_height + h) * output_width + w] = T(0); - } else { - im_row_idx += c_im * input_height; - col_data[(c * output_height + h) * output_width + w] = - im_data[im_row_idx * input_width + im_col_idx]; - } + col_data[col_idx] = (im_row_idx < 0 || im_row_idx >= im_height || + im_col_idx < 0 || im_col_idx >= im_width) + ? static_cast(0) + : im_data[im_idx]; } } } @@ -94,54 +92,55 @@ template class Col2ImFunctor { public: - void operator()(const platform::DeviceContext& context, framework::Tensor& im, - const framework::Tensor& col, int stride_height, - int stride_width, int padding_up, int padding_down, - int padding_left, int padding_right) { - PADDLE_ENFORCE(im.dims().size() == 3); + void operator()(const platform::DeviceContext& context, + const framework::Tensor& col, + const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* im) { + PADDLE_ENFORCE(im->dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; + int im_channels = im->dims()[0]; + int im_height = im->dims()[1]; + int im_width = im->dims()[2]; int filter_height = col.dims()[1]; int filter_width = col.dims()[2]; - int output_height = col.dims()[3]; - int output_width = col.dims()[4]; + int col_height = col.dims()[3]; + int col_width = col.dims()[4]; - PADDLE_ENFORCE_EQ( - (input_height + padding_up + padding_down - filter_height) / - stride_height + - 1, - output_height, - "Output_height and padding(padding_up, padding_down) are " - "inconsistent."); - PADDLE_ENFORCE_EQ( - (input_width + padding_left + padding_right - filter_width) / - stride_width + - 1, - output_width, - "output_width and padding(padding_left, padding_right) are " - "inconsistent."); + PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - + ((dilation[0] * (filter_height - 1) + 1))) / + stride[0] + + 1, + col_height, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); + PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - + ((dilation[1] * (filter_width - 1) + 1))) / + stride[1] + + 1, + col_width, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); - int channels_col = input_channels * filter_height * filter_width; + int channels_col = im_channels * filter_height * filter_width; - T* im_data = im.data(); + T* im_data = im->data(); const T* col_data = col.data(); for (int c = 0; c < channels_col; ++c) { int w_offset = c % filter_width; int h_offset = (c / filter_width) % filter_height; int c_im = c / filter_width / filter_height; - for (int h = 0; h < output_height; ++h) { - for (int w = 0; w < output_width; ++w) { - int im_row_idx = h * stride_height + h_offset - padding_up; - int im_col_idx = w * stride_width + w_offset - padding_left; + for (int h = 0; h < col_height; ++h) { + for (int w = 0; w < col_width; ++w) { + int im_row_idx = h * stride[0] - padding[0] + h_offset * dilation[0]; + int im_col_idx = w * stride[1] - padding[1] + w_offset * dilation[1]; - if ((im_row_idx) >= 0 && (im_row_idx) < input_height && - (im_col_idx) >= 0 && (im_col_idx) < input_width) { - im_row_idx += c_im * input_height; - im_data[im_row_idx * input_width + im_col_idx] += - col_data[(c * output_height + h) * output_width + w]; + if ((im_row_idx) >= 0 && (im_row_idx) < im_height && + (im_col_idx) >= 0 && (im_col_idx) < im_width) { + im_row_idx += c_im * im_height; + im_data[im_row_idx * im_width + im_col_idx] += + col_data[(c * col_height + h) * col_width + w]; } } } @@ -168,64 +167,59 @@ class Im2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& im, framework::Tensor& col, - int stride_height, int stride_width, int padding_up, - int padding_down, int padding_left, int padding_right) { + const framework::Tensor& im, const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* col) { PADDLE_ENFORCE(im.dims().size() == 3); - PADDLE_ENFORCE(col.dims().size() == 5); - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; - int filter_height = col.dims()[3]; - int filter_width = col.dims()[4]; - int output_height = col.dims()[0]; - int output_width = col.dims()[1]; + PADDLE_ENFORCE(col->dims().size() == 5); + int im_channels = im.dims()[0]; + int im_height = im.dims()[1]; + int im_width = im.dims()[2]; + int filter_height = col->dims()[3]; + int filter_width = col->dims()[4]; + int col_height = col->dims()[0]; + int col_width = col->dims()[1]; PADDLE_ENFORCE_EQ( - (input_height + padding_up + padding_down - filter_height) / - stride_height + - 1, - output_height, + (im_height + padding[0] + padding[2] - filter_height) / stride[0] + 1, + col_height, "Output_height and padding(padding_up, padding_down) are " "inconsistent."); PADDLE_ENFORCE_EQ( - (input_width + padding_left + padding_right - filter_width) / - stride_width + - 1, - output_width, - "output_width and padding(padding_left, padding_right) are " + (im_width + padding[1] + padding[3] - filter_width) / stride[1] + 1, + col_width, + "col_width and padding(padding_left, padding_right) are " "inconsistent."); const T* im_data = im.data(); - T* col_data = col.data(); + T* col_data = col->data(); - for (int col_row_idx = 0; col_row_idx < output_height; ++col_row_idx) { - for (int col_col_idx = 0; col_col_idx < output_width; ++col_col_idx) { - for (int channel = 0; channel < input_channels; ++channel) { + for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) { + for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) { + for (int channel = 0; channel < im_channels; ++channel) { for (int filter_row_idx = 0; filter_row_idx < filter_height; ++filter_row_idx) { for (int filter_col_idx = 0; filter_col_idx < filter_width; ++filter_col_idx) { int im_row_offset = - col_row_idx * stride_height + filter_row_idx - padding_up; + col_row_idx * stride[0] + filter_row_idx - padding[0]; int im_col_offset = - col_col_idx * stride_width + filter_col_idx - padding_left; - int col_offset = ((((col_row_idx)*output_width + col_col_idx) * - input_channels + - channel) * - filter_height + - filter_row_idx) * - filter_width + - filter_col_idx; - if (im_row_offset < 0 || im_row_offset >= input_height || - im_col_offset < 0 || im_col_offset >= input_width) { - col_data[col_offset] = T(0); - } else { - int im_offset = - (channel * input_height + im_row_offset) * input_width + - im_col_offset; - col_data[col_offset] = im_data[im_offset]; - } + col_col_idx * stride[1] + filter_col_idx - padding[1]; + int col_offset = + ((((col_row_idx)*col_width + col_col_idx) * im_channels + + channel) * + filter_height + + filter_row_idx) * + filter_width + + filter_col_idx; + + int im_offset = (channel * im_height + im_row_offset) * im_width + + im_col_offset; + col_data[col_offset] = + (im_row_offset < 0 || im_row_offset >= im_height || + im_col_offset < 0 || im_col_offset >= im_width) + ? static_cast(0) + : im_data[im_offset]; } } } @@ -243,60 +237,57 @@ template class Col2ImFunctor { public: - void operator()(const platform::DeviceContext& context, framework::Tensor& im, - const framework::Tensor& col, int stride_height, - int stride_width, int padding_up, int padding_down, - int padding_left, int padding_right) { - PADDLE_ENFORCE(im.dims().size() == 3); + void operator()(const platform::DeviceContext& context, + const framework::Tensor& col, + const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* im) { + PADDLE_ENFORCE(im->dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; + int im_channels = im->dims()[0]; + int im_height = im->dims()[1]; + int im_width = im->dims()[2]; int filter_height = col.dims()[3]; int filter_width = col.dims()[4]; - int output_height = col.dims()[0]; - int output_width = col.dims()[1]; + int col_height = col.dims()[0]; + int col_width = col.dims()[1]; PADDLE_ENFORCE_EQ( - (input_height + padding_up + padding_down - filter_height) / - stride_height + - 1, - output_height, + (im_height + padding[0] + padding[2] - filter_height) / stride[0] + 1, + col_height, "Output_height and padding(padding_up, padding_down) are " "inconsistent."); PADDLE_ENFORCE_EQ( - (input_width + padding_left + padding_right - filter_width) / - stride_width + - 1, - output_width, - "output_width and padding(padding_left, padding_right) are " + (im_width + padding[1] + padding[3] - filter_width) / stride[1] + 1, + col_width, + "col_width and padding(padding_left, padding_right) are " "inconsistent."); - T* im_data = im.data(); + T* im_data = im->data(); const T* col_data = col.data(); - for (int col_row_idx = 0; col_row_idx < output_height; ++col_row_idx) { - for (int col_col_idx = 0; col_col_idx < output_width; ++col_col_idx) { - for (int channel = 0; channel < input_channels; ++channel) { + for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) { + for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) { + for (int channel = 0; channel < im_channels; ++channel) { for (int filter_row_idx = 0; filter_row_idx < filter_height; ++filter_row_idx) { for (int filter_col_idx = 0; filter_col_idx < filter_width; ++filter_col_idx) { int im_row_offset = - col_row_idx * stride_height + filter_row_idx - padding_up; + col_row_idx * stride[0] + filter_row_idx - padding[0]; int im_col_offset = - col_col_idx * stride_width + filter_col_idx - padding_left; - int col_offset = (((col_row_idx * output_width + col_col_idx) * - input_channels + - channel) * - filter_height + - filter_row_idx) * - filter_width + - filter_col_idx; - if (im_row_offset >= 0 && im_row_offset < input_height && - im_col_offset >= 0 && im_col_offset < input_width) { + col_col_idx * stride[1] + filter_col_idx - padding[1]; + int col_offset = + (((col_row_idx * col_width + col_col_idx) * im_channels + + channel) * + filter_height + + filter_row_idx) * + filter_width + + filter_col_idx; + if (im_row_offset >= 0 && im_row_offset < im_height && + im_col_offset >= 0 && im_col_offset < im_width) { int im_offset = - (channel * input_height + im_row_offset) * input_width + + (channel * im_height + im_row_offset) * im_width + im_col_offset; im_data[im_offset] += col_data[col_offset]; } diff --git a/paddle/operators/math/im2col.cu b/paddle/operators/math/im2col.cu index 7b201fdbf3c5dd7d336d359e00b7323cecc0231a..347df7a0ffdec163c0479a71ec775a813930ba5f 100644 --- a/paddle/operators/math/im2col.cu +++ b/paddle/operators/math/im2col.cu @@ -20,36 +20,32 @@ namespace operators { namespace math { template -__global__ void im2col(const T* data_im, int num_outs, int height, int width, +__global__ void im2col(const T* data_im, int num_outs, int im_height, + int im_width, int dilation_h, int dilation_w, int filter_height, int filter_width, int stride_height, int stride_width, int padding_height, int padding_width, - int output_height, int output_width, T* data_col) { - int index = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; + int col_height, int col_width, T* data_col) { + const int index = + (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; if (index < num_outs) { - int w_out = index % output_width; - index /= output_width; - int h_out = index % output_height; - int channel_in = index / output_height; + int w_out = index % col_width; + int h_out = (index / col_width) % col_height; + int channel_in = index / col_width / col_height; int channel_out = channel_in * filter_height * filter_width; - int h_in = h_out * stride_height; - int w_in = w_out * stride_width; + int h_in = h_out * stride_height - padding_height; + int w_in = w_out * stride_width - padding_width; - data_col += (channel_out * output_height + h_out) * output_width + w_out; + data_col += (channel_out * col_height + h_out) * col_width + w_out; + data_im += (channel_in * im_height + h_in) * im_width + w_in; for (int i = 0; i < filter_height; ++i) { for (int j = 0; j < filter_width; ++j) { - int rIdx = int(h_in + i); - int cIdx = int(w_in + j); - if ((rIdx - (int)padding_height) >= (int)height || - (rIdx - (int)padding_height) < 0 || - (cIdx - (int)padding_width) >= (int)width || - (cIdx - (int)padding_width) < 0) { - *data_col = 0; - } else { - rIdx = rIdx + channel_in * height - padding_height; - cIdx = cIdx - padding_width; - *data_col = data_im[rIdx * width + cIdx]; - } - data_col += output_height * output_width; + int rIdx = h_in + i * dilation_h; + int cIdx = w_in + j * dilation_w; + *data_col = + (rIdx >= im_height || rIdx < 0 || cIdx >= im_width || cIdx < 0) + ? 0 + : data_im[i * dilation_h * im_width + j * dilation_w]; + data_col += col_height * col_width; } } } @@ -65,30 +61,36 @@ class Im2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& im, framework::Tensor& col, - int stride_height, int stride_width, int padding_up, - int padding_down, int padding_left, int padding_right) { + const framework::Tensor& im, const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* col) { PADDLE_ENFORCE(im.dims().size() == 3); - PADDLE_ENFORCE(col.dims().size() == 5); - - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; - int filter_height = col.dims()[1]; - int filter_width = col.dims()[2]; - int output_height = col.dims()[3]; - int output_width = col.dims()[4]; - - PADDLE_ENFORCE((input_height + padding_up + padding_down - filter_height) / - stride_height + - 1 == - output_height); - PADDLE_ENFORCE((input_width + padding_left + padding_right - filter_width) / - stride_width + - 1 == - output_width); - - int num_outputs = input_channels * output_height * output_width; + PADDLE_ENFORCE(col->dims().size() == 5); + + int im_channels = im.dims()[0]; + int im_height = im.dims()[1]; + int im_width = im.dims()[2]; + int filter_height = col->dims()[1]; + int filter_width = col->dims()[2]; + int col_height = col->dims()[3]; + int col_width = col->dims()[4]; + + PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - + (dilation[0] * (filter_height - 1) + 1)) / + stride[0] + + 1, + col_height, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); + PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - + (dilation[1] * (filter_width - 1) + 1)) / + stride[1] + + 1, + col_width, + "col_width and padding(padding_left, padding_right) are " + "inconsistent."); + + int num_outputs = im_channels * col_height * col_width; int blocks = (num_outputs + 1024 - 1) / 1024; int block_x = 512; int block_y = (blocks + 512 - 1) / 512; @@ -97,56 +99,57 @@ class Im2ColFunctor<<(context) .stream()>>>( - im.data(), num_outputs, input_height, input_width, filter_height, - filter_width, stride_height, stride_width, padding_up, padding_left, - output_height, output_width, col.data()); + im.data(), num_outputs, im_height, im_width, dilation[0], + dilation[1], filter_height, filter_width, stride[0], stride[1], + padding[0], padding[1], col_height, col_width, col->data()); } }; template -__global__ void col2im(size_t n, const T* data_col, size_t height, size_t width, - size_t channels, size_t filter_height, - size_t filter_width, size_t stride_height, - size_t stride_width, size_t padding_height, - size_t padding_width, size_t output_height, - size_t output_width, T* data_im) { - size_t index = +__global__ void col2im(int n, const T* data_col, int im_height, int im_width, + int dilation_h, int dilation_w, int filter_height, + int filter_width, int stride_height, int stride_width, + int padding_height, int padding_width, int col_height, + int col_width, T* data_im) { + const int index = (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x; + + const int d_filter_height = dilation_h * (filter_height - 1) + 1; + const int d_filter_width = dilation_w * (filter_width - 1) + 1; + if (index < n) { T val = 0; - int w = int(index % width); - int h = int((index / width) % height); - int c = int(index / (width * height)); - if ((w - (int)padding_width) >= 0 && - (w - (int)padding_width) < (width - 2 * padding_width) && - (h - (int)padding_height) >= 0 && - (h - padding_height) < (height - 2 * padding_height)) { - // compute the start and end of the output - int w_col_start = (w < (int)filter_width) - ? 0 - : (w - int(filter_width)) / (int)stride_width + 1; - int w_col_end = - min((int)(w / (int)stride_width + 1), (int)(output_width)); - int h_col_start = (h < (int)filter_height) - ? 0 - : (h - (int)filter_height) / (int)stride_height + 1; - int h_col_end = min(int(h / stride_height + 1), int(output_height)); - for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { - for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { - // the col location: [c * width * height + h_out, w_out] - int c_col = int(c * filter_height * filter_width) + - (h - h_col * (int)stride_height) * (int)filter_width + - (w - w_col * (int)stride_width); - val += - data_col[(c_col * output_height + h_col) * output_width + w_col]; + int w = index % im_width; + int h = (index / im_width) % im_height; + int c = index / (im_width * im_height); + + // compute the start and end of the output + int w_col_start = + (w < d_filter_width) ? 0 : (w - d_filter_width) / stride_width + 1; + int w_col_end = min(w / stride_width + 1, col_width); + int h_col_start = + (h < d_filter_height) ? 0 : (h - d_filter_height) / stride_height + 1; + int h_col_end = min(h / stride_height + 1, col_height); + + for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { + for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { + int h_off = (h - h_col * stride_height); + int w_off = (w - w_col * stride_width); + if (h_off % dilation_h == 0 && w_off % dilation_w == 0) { + h_off /= dilation_h; + w_off /= dilation_w; + int data_col_index = + (((c * filter_height + h_off) * filter_width + w_off) * + col_height + + h_col) * + col_width + + w_col; + + val += data_col[data_col_index]; } } - h -= padding_height; - w -= padding_width; - data_im[c * ((width - 2 * padding_width) * - (height - 2 * padding_height)) + - h * (width - 2 * padding_width) + w] += val; } + data_im[index] = val; } } @@ -159,33 +162,38 @@ template class Col2ImFunctor { public: - void operator()(const platform::DeviceContext& context, framework::Tensor& im, - const framework::Tensor& col, int stride_height, - int stride_width, int padding_up, int padding_down, - int padding_left, int padding_right) { - PADDLE_ENFORCE(im.dims().size() == 3); + void operator()(const platform::DeviceContext& context, + const framework::Tensor& col, + const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* im) { + PADDLE_ENFORCE(im->dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; + int im_channels = im->dims()[0]; + int im_height = im->dims()[1]; + int im_width = im->dims()[2]; int filter_height = col.dims()[1]; int filter_width = col.dims()[2]; - int output_height = col.dims()[3]; - int output_width = col.dims()[4]; - - PADDLE_ENFORCE((input_height + padding_up + padding_down - filter_height) / - stride_height + - 1 == - output_height); - PADDLE_ENFORCE((input_width + padding_left + padding_right - filter_width) / - stride_width + - 1 == - output_width); - - size_t num_kernels = input_channels * - (input_height + padding_up + padding_down) * - (input_width + padding_left + padding_right); + int col_height = col.dims()[3]; + int col_width = col.dims()[4]; + + PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - + (dilation[0] * (filter_height - 1) + 1)) / + stride[0] + + 1, + col_height, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); + PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - + (dilation[1] * (filter_width - 1) + 1)) / + stride[1] + + 1, + col_width, + "col_width and padding(padding_left, padding_right) are " + "inconsistent."); + + size_t num_kernels = im_channels * im_height * im_width; size_t blocks = (num_kernels + 1024 - 1) / 1024; size_t block_x = 512; @@ -198,10 +206,9 @@ class Col2ImFunctor<<(context) .stream()>>>( - num_kernels, col.data(), input_height + padding_up + padding_down, - input_width + padding_left + padding_left, input_channels, - filter_height, filter_width, stride_height, stride_width, padding_up, - padding_left, output_height, output_width, im.data()); + num_kernels, col.data(), im_height, im_width, dilation[0], + dilation[1], filter_height, filter_width, stride[0], stride[1], + padding[0], padding[2], col_height, col_width, im->data()); } }; @@ -215,33 +222,32 @@ template class Col2ImFunctor; template -__global__ void im2colOCF(const T* im_data, T* col_data, int input_channels, - int input_height, int input_width, int filter_height, - int filter_width, int stride_height, int stride_width, - int padding_height, int padding_width, - int output_height, int output_width) { +__global__ void im2colOCF(const T* im_data, int im_channels, int im_height, + int im_width, int filter_height, int filter_width, + int stride_height, int stride_width, + int padding_height, int padding_width, int col_height, + int col_width, T* col_data) { int swid = blockIdx.x; int shid = blockIdx.y; - for (int channelid = threadIdx.z; channelid < input_channels; + for (int channelid = threadIdx.z; channelid < im_channels; channelid += blockDim.z) { for (int idy = threadIdx.y; idy < filter_height; idy += blockDim.y) { for (int idx = threadIdx.x; idx < filter_width; idx += blockDim.x) { int width_offset = idx + swid * stride_width - padding_width; int height_offset = idy + shid * stride_height - padding_height; - int im_offset = width_offset + height_offset * input_width + - channelid * input_height * input_width; + int im_offset = width_offset + height_offset * im_width + + channelid * im_height * im_width; int col_offset = idx + idy * filter_width + channelid * filter_height * filter_width + - (shid * output_width + swid) * - (input_channels * filter_height * filter_width); - - if (height_offset >= input_height || height_offset < 0 || - width_offset >= input_width || width_offset < 0) { - col_data[col_offset] = T(0); - } else { - col_data[col_offset] = im_data[im_offset]; - } + (shid * col_width + swid) * + (im_channels * filter_height * filter_width); + + col_data[col_offset] = + (height_offset >= im_height || height_offset < 0 || + width_offset >= im_width || width_offset < 0) + ? T(0) + : im_data[im_offset]; } } } @@ -257,27 +263,33 @@ class Im2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& im, framework::Tensor& col, - int stride_height, int stride_width, int padding_up, - int padding_down, int padding_left, int padding_right) { + const framework::Tensor& im, const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* col) { PADDLE_ENFORCE(im.dims().size() == 3); - PADDLE_ENFORCE(col.dims().size() == 5); - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; - int filter_height = col.dims()[3]; - int filter_width = col.dims()[4]; - int output_height = col.dims()[0]; - int output_width = col.dims()[1]; - - PADDLE_ENFORCE((input_height + padding_up + padding_down - filter_height) / - stride_height + - 1 == - output_height); - PADDLE_ENFORCE((input_width + padding_left + padding_right - filter_width) / - stride_width + - 1 == - output_width); + PADDLE_ENFORCE(col->dims().size() == 5); + int im_channels = im.dims()[0]; + int im_height = im.dims()[1]; + int im_width = im.dims()[2]; + int filter_height = col->dims()[3]; + int filter_width = col->dims()[4]; + int col_height = col->dims()[0]; + int col_width = col->dims()[1]; + + PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - + (dilation[0] * (filter_height - 1) + 1)) / + stride[0] + + 1, + col_height, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); + PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - + (dilation[1] * (filter_width - 1) + 1)) / + stride[1] + + 1, + col_width, + "col_width and padding(padding_left, padding_right) are " + "inconsistent."); int block_dim_x = 0; int block_dim_y = 0; @@ -296,42 +308,41 @@ class Im2ColFunctor<<(context) .stream()>>>( - im.data(), col.data(), input_channels, input_height, input_width, - filter_height, filter_width, stride_height, stride_width, padding_up, - padding_left, output_height, output_width); + im.data(), im_channels, im_height, im_width, filter_height, + filter_width, stride[0], stride[1], padding[0], padding[1], col_height, + col_width, col->data()); } }; template -__global__ void col2imOCF(T* im_data, const T* col_data, int input_channels, - int input_height, int input_width, int filter_height, - int filter_width, int stride_height, int stride_width, - int padding_height, int padding_width, - int output_height, int output_width) { +__global__ void col2imOCF(const T* col_data, int im_channels, int im_height, + int im_width, int filter_height, int filter_width, + int stride_height, int stride_width, + int padding_height, int padding_width, int col_height, + int col_width, T* im_data) { int swid = blockIdx.x; int shid = blockIdx.y; - for (int channelid = threadIdx.z; channelid < input_channels; + for (int channelid = threadIdx.z; channelid < im_channels; channelid += blockDim.z) { for (int idy = threadIdx.y; idy < filter_height; idy += blockDim.y) { for (int idx = threadIdx.x; idx < filter_width; idx += blockDim.x) { int width_offset = idx + swid * stride_width - padding_width; int height_offset = idy + shid * stride_height - padding_height; - int im_offset = width_offset + height_offset * input_width + - channelid * input_height * input_width; + int im_offset = width_offset + height_offset * im_width + + channelid * im_height * im_width; int col_offset = idx + idy * filter_width + channelid * filter_height * filter_width + - (shid * output_width + swid) * - (input_channels * filter_height * filter_width); + (shid * col_width + swid) * + (im_channels * filter_height * filter_width); - if (height_offset >= 0 && height_offset < input_height && - width_offset >= 0 && width_offset < input_width) { + if (height_offset >= 0 && height_offset < im_height && + width_offset >= 0 && width_offset < im_width) { paddle::platform::CudaAtomicAdd(im_data + im_offset, col_data[col_offset]); } @@ -349,28 +360,35 @@ template class Col2ImFunctor { public: - void operator()(const platform::DeviceContext& context, framework::Tensor& im, - const framework::Tensor& col, int stride_height, - int stride_width, int padding_up, int padding_down, - int padding_left, int padding_right) { - PADDLE_ENFORCE(im.dims().size() == 3); + void operator()(const platform::DeviceContext& context, + const framework::Tensor& col, + const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* im) { + PADDLE_ENFORCE(im->dims().size() == 3); PADDLE_ENFORCE(col.dims().size() == 5); - int input_channels = im.dims()[0]; - int input_height = im.dims()[1]; - int input_width = im.dims()[2]; + int im_channels = im->dims()[0]; + int im_height = im->dims()[1]; + int im_width = im->dims()[2]; int filter_height = col.dims()[3]; int filter_width = col.dims()[4]; - int output_height = col.dims()[0]; - int output_width = col.dims()[1]; - - PADDLE_ENFORCE((input_height + padding_up + padding_down - filter_height) / - stride_height + - 1 == - output_height); - PADDLE_ENFORCE((input_width + padding_left + padding_right - filter_width) / - stride_width + - 1 == - output_width); + int col_height = col.dims()[0]; + int col_width = col.dims()[1]; + + PADDLE_ENFORCE_EQ((im_height + padding[0] + padding[2] - + (dilation[0] * (filter_height - 1) + 1)) / + stride[0] + + 1, + col_height, + "Output_height and padding(padding_up, padding_down) are " + "inconsistent."); + PADDLE_ENFORCE_EQ((im_width + padding[1] + padding[3] - + (dilation[1] * (filter_width - 1) + 1)) / + stride[1] + + 1, + col_width, + "col_width and padding(padding_left, padding_right) are " + "inconsistent."); int block_dim_x = 0; int block_dim_y = 0; @@ -389,15 +407,14 @@ class Col2ImFunctor<<(context) .stream()>>>( - im.data(), col.data(), input_channels, input_height, input_width, - filter_height, filter_width, stride_height, stride_width, padding_up, - padding_left, output_height, output_width); + col.data(), im_channels, im_height, im_width, filter_height, + filter_width, stride[0], stride[1], padding[0], padding[1], col_height, + col_width, im->data()); } }; diff --git a/paddle/operators/math/im2col.h b/paddle/operators/math/im2col.h index c736d4fa523c2af3e3dd7a11114d7f84021bc5c1..deb60051beef56437cf75f0fa2cef90bbc0a209a 100644 --- a/paddle/operators/math/im2col.h +++ b/paddle/operators/math/im2col.h @@ -35,6 +35,15 @@ enum class ColFormat { kCFO = 0, kOCF = 1 }; * \param colData Column data. * \param colShape The shape of colData. * + * \param dilations dilation data. + * \param 2-dimension [dilation_height, dilation_width]. + * + * \param strides stride data. + * \param 2-dimension [stride_height, stride_width]. + * + * \param paddings padding data. + * \param 4-dimension [up_pad, left_pad, down_pad, right_pad]. + * * If the template argument Format is kCFO, the shape of colData is: * [input_channels, filter_height, filter_width, output_height, output_width] * So, it is easy to reshape into a convolution matrix for convolution @@ -73,18 +82,19 @@ template class Im2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& im, framework::Tensor& col, - int stride_height, int stride_width, int padding_up, - int padding_down, int padding_left, int padding_right); + const framework::Tensor& im, const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* col); }; template class Col2ImFunctor { public: - void operator()(const platform::DeviceContext& context, framework::Tensor& im, - const framework::Tensor& col, int stride_height, - int stride_width, int padding_up, int padding_down, - int padding_left, int padding_right); + void operator()(const platform::DeviceContext& context, + const framework::Tensor& col, + const std::vector& dilation, + const std::vector& stride, + const std::vector& padding, framework::Tensor* im); }; } // namespace math diff --git a/paddle/operators/math/im2col_test.cc b/paddle/operators/math/im2col_test.cc index 5763782c4edec87f44dabef2ccffe3097eeb2421..10c28da72ba9d3b94bb59c5cf00e7f5a2f28fd06 100644 --- a/paddle/operators/math/im2col_test.cc +++ b/paddle/operators/math/im2col_test.cc @@ -45,10 +45,14 @@ void testIm2col() { int input_height = 2; int input_width = 3; int filter_size = 2; - int stride = 1; - int padding = 0; - int output_height = (input_height - filter_size + 2 * padding) / stride + 1; - int output_width = (input_width - filter_size + 2 * padding) / stride + 1; + std::vector stride({1, 1}); // stride_y, stride_x + std::vector padding( + {0, 0, 0, 0}); // up_pad, left_pad, down_pad, right_pad + std::vector dilation({1, 1}); // dilation_y, dilation_x + int output_height = + (input_height - filter_size + padding[0] + padding[1]) / stride[0] + 1; + int output_width = + (input_width - filter_size + padding[2] + padding[3]) / stride[1] + 1; float* input_ptr = input_tmp.mutable_data( {1, input_height, input_width}, paddle::platform::CPUPlace()); float arr[6] = {0, 1, 2, 3, 4, 5}; @@ -85,10 +89,8 @@ void testIm2col() { paddle::operators::math::ColFormat::kOCF, Place, float> im2col_ocf; - im2col(*context, input, output_cfo, stride, stride, padding, padding, padding, - padding); - im2col_ocf(*context, input, output_ocf, stride, stride, padding, padding, - padding, padding); + im2col(*context, input, dilation, stride, padding, &output_cfo); + im2col_ocf(*context, input, dilation, stride, padding, &output_ocf); float out_cfo_data[] = {0, 1, 1, 2, 3, 4, 4, 5}; float out_ocf_data[] = {0, 1, 3, 4, 1, 2, 4, 5}; @@ -131,8 +133,7 @@ void testIm2col() { input.CopyFrom(input_tmp, *place, *context); } - col2im(*context, input, output_cfo, stride, stride, padding, padding, padding, - padding); + col2im(*context, output_cfo, dilation, stride, padding, &input); float* in_ptr; if (paddle::platform::is_cpu_place(*place)) { @@ -153,8 +154,7 @@ void testIm2col() { input.CopyFrom(input_tmp, *place, *context); } - col2im_ocf(*context, input, output_ocf, stride, stride, padding, padding, - padding, padding); + col2im_ocf(*context, output_ocf, dilation, stride, padding, &input); if (paddle::platform::is_cpu_place(*place)) { in_ptr = input.data(); diff --git a/paddle/operators/math/vol2col.cc b/paddle/operators/math/vol2col.cc index e9718a047381596a1570b4b00546622968b70227..99eb7fd46de42400a915d86706580d15b08a74a2 100644 --- a/paddle/operators/math/vol2col.cc +++ b/paddle/operators/math/vol2col.cc @@ -28,28 +28,51 @@ template class Vol2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& vol, framework::Tensor& col, - int stride_depth, int stride_height, int stride_width, - int padding_depth, int padding_height, - int padding_width) const { + const framework::Tensor& vol, + const std::vector& dilations, + const std::vector& strides, + const std::vector& paddings, + framework::Tensor* col) const { PADDLE_ENFORCE(vol.dims().size() == 4); - PADDLE_ENFORCE(col.dims().size() == 7); + PADDLE_ENFORCE(col->dims().size() == 7); int input_channels = vol.dims()[0]; int input_depth = vol.dims()[1]; int input_height = vol.dims()[2]; int input_width = vol.dims()[3]; - int filter_depth = col.dims()[1]; - int filter_height = col.dims()[2]; - int filter_width = col.dims()[3]; - int output_depth = col.dims()[4]; - int output_height = col.dims()[5]; - int output_width = col.dims()[6]; + int filter_depth = col->dims()[1]; + int filter_height = col->dims()[2]; + int filter_width = col->dims()[3]; + int output_depth = col->dims()[4]; + int output_height = col->dims()[5]; + int output_width = col->dims()[6]; int channels_col = input_channels * filter_depth * filter_height * filter_width; + PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] - + ((dilations[0] * (filter_depth - 1) + 1))) / + strides[0] + + 1, + output_depth, + "input_depth and output_depth are " + "mismatching."); + PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] - + ((dilations[1] * (filter_height - 1) + 1))) / + strides[1] + + 1, + output_height, + "input_height and output_height are " + "mismatching."); + PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] - + ((dilations[2] * (filter_width - 1) + 1))) / + strides[2] + + 1, + output_width, + "input_width and output_width are " + "mismatching."); + const T* vol_data = vol.data(); - T* col_data = col.data(); + T* col_data = col->data(); for (int c = 0; c < channels_col; ++c) { int w_offset = c % filter_width; @@ -57,24 +80,23 @@ class Vol2ColFunctor { int d_offset = (c / filter_width / filter_height) % filter_depth; int c_in = c / filter_width / filter_height / filter_depth; for (int d = 0; d < output_depth; ++d) { - int d_pad = d * stride_depth - padding_depth + d_offset; + int d_pad = d * strides[0] - paddings[0] + d_offset * dilations[0]; for (int h = 0; h < output_height; ++h) { - int h_pad = h * stride_height - padding_height + h_offset; + int h_pad = h * strides[1] - paddings[1] + h_offset * dilations[1]; for (int w = 0; w < output_width; ++w) { - int w_pad = w * stride_width - padding_width + w_offset; + int w_pad = w * strides[2] - paddings[2] + w_offset * dilations[2]; int col_idx = ((c * output_depth + d) * output_height + h) * output_width + w; - if (h_pad < 0 || h_pad >= input_height || w_pad < 0 || - w_pad >= input_width || d_pad < 0 || d_pad >= input_depth) { - col_data[col_idx] = static_cast(0); - } else { - int vol_idx = - ((c_in * input_depth + d_pad) * input_height + h_pad) * - input_width + - w_pad; - col_data[col_idx] = vol_data[vol_idx]; - } + int vol_idx = + ((c_in * input_depth + d_pad) * input_height + h_pad) * + input_width + + w_pad; + col_data[col_idx] = + (h_pad < 0 || h_pad >= input_height || w_pad < 0 || + w_pad >= input_width || d_pad < 0 || d_pad >= input_depth) + ? static_cast(0) + : vol_data[vol_idx]; } } } @@ -92,17 +114,18 @@ template class Col2VolFunctor { public: void operator()(const platform::DeviceContext& context, - framework::Tensor& vol, const framework::Tensor& col, - int stride_depth, int stride_height, int stride_width, - int padding_depth, int padding_height, - int padding_width) const { - PADDLE_ENFORCE(vol.dims().size() == 4); + const framework::Tensor& col, + const std::vector& dilations, + const std::vector& strides, + const std::vector& paddings, + framework::Tensor* vol) const { + PADDLE_ENFORCE(vol->dims().size() == 4); PADDLE_ENFORCE(col.dims().size() == 7); - int input_channels = vol.dims()[0]; - int input_depth = vol.dims()[1]; - int input_height = vol.dims()[2]; - int input_width = vol.dims()[3]; + int input_channels = vol->dims()[0]; + int input_depth = vol->dims()[1]; + int input_height = vol->dims()[2]; + int input_width = vol->dims()[3]; int filter_depth = col.dims()[1]; int filter_height = col.dims()[2]; int filter_width = col.dims()[3]; @@ -112,7 +135,28 @@ class Col2VolFunctor { int channels_col = input_channels * filter_depth * filter_height * filter_width; - T* vol_data = vol.data(); + PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] - + ((dilations[0] * (filter_depth - 1) + 1))) / + strides[0] + + 1, + output_depth, + "input_depth and output_depth are " + "mismatching."); + PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] - + ((dilations[1] * (filter_height - 1) + 1))) / + strides[1] + + 1, + output_height, + "input_height and output_height are " + "mismatching."); + PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] - + ((dilations[2] * (filter_width - 1) + 1))) / + strides[2] + + 1, + output_width, + "input_width and output_width are " + "mismatching."); + T* vol_data = vol->data(); const T* col_data = col.data(); for (int c = 0; c < channels_col; ++c) { @@ -121,11 +165,11 @@ class Col2VolFunctor { int d_offset = (c / filter_width / filter_height) % filter_depth; int cIm = c / filter_width / filter_height / filter_depth; for (int d = 0; d < output_depth; ++d) { - int d_pad = d * stride_depth - padding_depth + d_offset; + int d_pad = d * strides[0] - paddings[0] + d_offset * dilations[0]; for (int h = 0; h < output_height; ++h) { - int h_pad = h * stride_height - padding_height + h_offset; + int h_pad = h * strides[1] - paddings[1] + h_offset * dilations[1]; for (int w = 0; w < output_width; ++w) { - int w_pad = w * stride_width - padding_width + w_offset; + int w_pad = w * strides[2] - paddings[2] + w_offset * dilations[2]; if (h_pad >= 0 && h_pad < input_height && w_pad >= 0 && w_pad < input_width && d_pad >= 0 && d_pad < input_depth) { @@ -133,6 +177,7 @@ class Col2VolFunctor { ((cIm * input_depth + d_pad) * input_height + h_pad) * input_width + w_pad; + int col_idx = ((c * output_depth + d) * output_height + h) * output_width + w; diff --git a/paddle/operators/math/vol2col.cu b/paddle/operators/math/vol2col.cu index 27b11fb237575fd25a789a5fcc24ed4e30607009..dae3be858e9f47d0133aa37e8a5f90a0addf1dfd 100644 --- a/paddle/operators/math/vol2col.cu +++ b/paddle/operators/math/vol2col.cu @@ -21,11 +21,12 @@ namespace math { template __global__ void vol2col(int num_kernels, const T* data_vol, int depth, - int height, int width, int filter_depth, - int filter_height, int filter_width, int stride_depth, - int stride_height, int stride_width, int padding_depth, - int padding_height, int padding_width, int output_detph, - int output_height, int output_width, T* data_col) { + int height, int width, int dilation_d, int dilation_h, + int dilation_w, int filter_depth, int filter_height, + int filter_width, int stride_depth, int stride_height, + int stride_width, int padding_depth, int padding_height, + int padding_width, int output_detph, int output_height, + int output_width, T* data_col) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < num_kernels; index += blockDim.x * gridDim.x) { int w_out = index % output_width; @@ -44,12 +45,14 @@ __global__ void vol2col(int num_kernels, const T* data_vol, int depth, for (int k = 0; k < filter_depth; ++k) { for (int i = 0; i < filter_height; ++i) { for (int j = 0; j < filter_width; ++j) { - int d = d_in + k; - int h = h_in + i; - int w = w_in + j; + int d = d_in + k * dilation_d; + int h = h_in + i * dilation_h; + int w = w_in + j * dilation_w; + int col_idx = (k * dilation_d * height + i * dilation_h) * width + + j * dilation_w; *data_col = (d >= 0 && d < depth && h >= 0 && h < height && w >= 0 && w < width) - ? data_vol[(k * height + i) * width + j] + ? data_vol[col_idx] : 0; data_col += output_detph * output_height * output_width; } @@ -68,23 +71,46 @@ template class Vol2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& vol, framework::Tensor& col, - int stride_depth, int stride_height, int stride_width, - int padding_depth, int padding_height, - int padding_width) const { + const framework::Tensor& vol, + const std::vector& dilations, + const std::vector& strides, + const std::vector& paddings, + framework::Tensor* col) const { PADDLE_ENFORCE(vol.dims().size() == 4); - PADDLE_ENFORCE(col.dims().size() == 7); + PADDLE_ENFORCE(col->dims().size() == 7); int input_channels = vol.dims()[0]; int input_depth = vol.dims()[1]; int input_height = vol.dims()[2]; int input_width = vol.dims()[3]; - int filter_depth = col.dims()[1]; - int filter_height = col.dims()[2]; - int filter_width = col.dims()[3]; - int output_depth = col.dims()[4]; - int output_height = col.dims()[5]; - int output_width = col.dims()[6]; + int filter_depth = col->dims()[1]; + int filter_height = col->dims()[2]; + int filter_width = col->dims()[3]; + int output_depth = col->dims()[4]; + int output_height = col->dims()[5]; + int output_width = col->dims()[6]; + + PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] - + ((dilations[0] * (filter_depth - 1) + 1))) / + strides[0] + + 1, + output_depth, + "input_depth and output_depth are " + "Mismatching."); + PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] - + ((dilations[1] * (filter_height - 1) + 1))) / + strides[1] + + 1, + output_height, + "input_height and output_height are " + "Mismatching."); + PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] - + ((dilations[2] * (filter_width - 1) + 1))) / + strides[2] + + 1, + output_width, + "input_width and output_width are " + "Mismatching."); int num_outputs = input_channels * output_depth * output_height * output_width; @@ -95,19 +121,25 @@ class Vol2ColFunctor { reinterpret_cast(context) .stream()>>>( num_outputs, vol.data(), input_depth, input_height, input_width, - filter_depth, filter_height, filter_width, stride_depth, stride_height, - stride_width, padding_depth, padding_height, padding_width, - output_depth, output_height, output_width, col.data()); + dilations[0], dilations[1], dilations[2], filter_depth, filter_height, + filter_width, strides[0], strides[1], strides[2], paddings[0], + paddings[1], paddings[2], output_depth, output_height, output_width, + col->data()); } }; template __global__ void col2vol(int num_kernels, const T* data_col, int depth, - int height, int width, int filter_depth, - int filter_height, int filter_width, int stride_depth, - int stride_height, int stride_width, int padding_depth, - int padding_height, int padding_width, int output_detph, - int output_height, int output_width, T* data_vol) { + int height, int width, int dilation_d, int dilation_h, + int dilation_w, int filter_depth, int filter_height, + int filter_width, int stride_depth, int stride_height, + int stride_width, int padding_depth, int padding_height, + int padding_width, int output_detph, int output_height, + int output_width, T* data_vol) { + const int d_filter_depth = dilation_d * (filter_depth - 1) + 1; + const int d_filter_height = dilation_h * (filter_height - 1) + 1; + const int d_filter_width = dilation_w * (filter_width - 1) + 1; + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < num_kernels; index += blockDim.x * gridDim.x) { T src_val = 0; @@ -115,35 +147,41 @@ __global__ void col2vol(int num_kernels, const T* data_col, int depth, int h = (index / width) % height + padding_height; int d = (index / width / height) % depth + padding_depth; int c = index / width / height / depth; + // compute the start and end of the output int w_col_start = - (w < filter_width) ? 0 : (w - filter_width) / stride_width + 1; + (w < d_filter_width) ? 0 : (w - d_filter_width) / stride_width + 1; int w_col_end = min(w / stride_width + 1, output_width); int h_col_start = - (h < filter_height) ? 0 : (h - filter_height) / stride_height + 1; + (h < d_filter_height) ? 0 : (h - d_filter_height) / stride_height + 1; int h_col_end = min(h / stride_height + 1, output_height); int d_col_start = - (d < filter_depth) ? 0 : (d - filter_depth) / stride_depth + 1; + (d < d_filter_depth) ? 0 : (d - d_filter_depth) / stride_depth + 1; int d_col_end = min(d / stride_depth + 1, output_detph); - int offset = (c * filter_depth * filter_height * filter_width + - d * filter_width * filter_height + h * filter_width + w) * - output_detph * output_height * output_width; - - int coeff_d_col = - (1 - stride_depth * filter_width * filter_height * output_detph) * - output_height * output_width; - int coeff_h_col = - (1 - stride_height * filter_width * output_detph * output_height) * - output_width; - int coeff_w_col = - (1 - stride_width * output_detph * output_height * output_width); - for (int d_col = d_col_start; d_col < d_col_end; ++d_col) { for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { - src_val += data_col[offset + d_col * coeff_d_col + - h_col * coeff_h_col + w_col * coeff_w_col]; + int d_off = (d - d_col * stride_depth); + int h_off = (h - h_col * stride_height); + int w_off = (w - w_col * stride_width); + if (d_off % dilation_d == 0 && h_off % dilation_h == 0 && + w_off % dilation_w == 0) { + d_off /= dilation_d; + h_off /= dilation_h; + w_off /= dilation_w; + + int data_col_index = + (((((c * filter_depth + d_off) * filter_height + h_off) * + filter_width + + w_off))); + data_col_index = + ((data_col_index * output_detph + d_col) * output_height + + h_col) * + output_width + + w_col; + src_val += data_col[data_col_index]; + } } } } @@ -161,17 +199,18 @@ template class Col2VolFunctor { public: void operator()(const platform::DeviceContext& context, - framework::Tensor& vol, const framework::Tensor& col, - int stride_depth, int stride_height, int stride_width, - int padding_depth, int padding_height, - int padding_width) const { - PADDLE_ENFORCE(vol.dims().size() == 4); + const framework::Tensor& col, + const std::vector& dilations, + const std::vector& strides, + const std::vector& paddings, + framework::Tensor* vol) const { + PADDLE_ENFORCE(vol->dims().size() == 4); PADDLE_ENFORCE(col.dims().size() == 7); - int input_channels = vol.dims()[0]; - int input_depth = vol.dims()[1]; - int input_height = vol.dims()[2]; - int input_width = vol.dims()[3]; + int input_channels = vol->dims()[0]; + int input_depth = vol->dims()[1]; + int input_height = vol->dims()[2]; + int input_width = vol->dims()[3]; int filter_depth = col.dims()[1]; int filter_height = col.dims()[2]; int filter_width = col.dims()[3]; @@ -179,6 +218,28 @@ class Col2VolFunctor { int output_height = col.dims()[5]; int output_width = col.dims()[6]; + PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] - + ((dilations[0] * (filter_depth - 1) + 1))) / + strides[0] + + 1, + output_depth, + "input_depth and output_depth are " + "Mismatching."); + PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] - + ((dilations[1] * (filter_height - 1) + 1))) / + strides[1] + + 1, + output_height, + "input_height and output_height are " + "Mismatching."); + PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] - + ((dilations[2] * (filter_width - 1) + 1))) / + strides[2] + + 1, + output_width, + "input_width and output_width are " + "Mismatching."); + int num_kernels = input_channels * input_depth * input_height * input_width; const int threads = 1024; @@ -188,9 +249,10 @@ class Col2VolFunctor { reinterpret_cast(context) .stream()>>>( num_kernels, col.data(), input_depth, input_height, input_width, - filter_depth, filter_height, filter_width, stride_depth, stride_height, - stride_width, padding_depth, padding_height, padding_width, - output_depth, output_height, output_width, vol.data()); + dilations[0], dilations[1], dilations[2], filter_depth, filter_height, + filter_width, strides[0], strides[1], strides[2], paddings[0], + paddings[1], paddings[2], output_depth, output_height, output_width, + vol->data()); } }; diff --git a/paddle/operators/math/vol2col.h b/paddle/operators/math/vol2col.h index f022365a16fbf61981e94bedbd8b21a32887b235..cbc30bd754608dd6e6def1a4097d69bdf0c942c3 100644 --- a/paddle/operators/math/vol2col.h +++ b/paddle/operators/math/vol2col.h @@ -31,6 +31,15 @@ namespace math { * \param colData Column data. * \param colShape The shape of colData. * + * \param dilations dilation data. + * \param 3-dimension [dilation_depth, dilation_height, dilation_width]. + * + * \param strides stride data. + * \param 3-dimension [stride_depth, stride_height, stride_width]. + * + * \param paddings padding data. + * \param 3-dimension [d_pad, h_pad, w_pad]. + * * The shape of colData is: * [input_channels, filter_depth, filter_height, filter_width, output_depth, * output_height, output_width] @@ -57,20 +66,22 @@ template class Vol2ColFunctor { public: void operator()(const platform::DeviceContext& context, - const framework::Tensor& vol, framework::Tensor& col, - int stride_depth, int stride_height, int stride_width, - int padding_depth, int padding_height, - int padding_width) const; + const framework::Tensor& vol, + const std::vector& dilations, + const std::vector& strides, + const std::vector& paddings, + framework::Tensor* col) const; }; template class Col2VolFunctor { public: void operator()(const platform::DeviceContext& context, - framework::Tensor& vol, const framework::Tensor& col, - int stride_depth, int stride_height, int stride_width, - int padding_depth, int padding_height, - int padding_width) const; + const framework::Tensor& col, + const std::vector& dilations, + const std::vector& strides, + const std::vector& paddings, + framework::Tensor* vol) const; }; } // namespace math diff --git a/paddle/operators/math/vol2col_test.cc b/paddle/operators/math/vol2col_test.cc index 74590d17cd0f974f830e760d85daef8ab5318a43..c31c716842f30de67c29b803866b8c82ddcf4a41 100644 --- a/paddle/operators/math/vol2col_test.cc +++ b/paddle/operators/math/vol2col_test.cc @@ -62,11 +62,15 @@ void testVol2col() { int input_height = 2; int input_width = 3; int filter_size = 2; - int stride = 1; - int padding = 0; - int output_depth = (input_depth - filter_size + 2 * padding) / stride + 1; - int output_height = (input_height - filter_size + 2 * padding) / stride + 1; - int output_width = (input_width - filter_size + 2 * padding) / stride + 1; + std::vector strides({1, 1, 1}); + std::vector paddings({0, 0, 0}); + std::vector dilations({1, 1, 1}); + int output_depth = + (input_depth - filter_size + 2 * paddings[0]) / strides[0] + 1; + int output_height = + (input_height - filter_size + 2 * paddings[1]) / strides[1] + 1; + int output_width = + (input_width - filter_size + 2 * paddings[2]) / strides[2] + 1; // Vol2Col test float* input_ptr = @@ -85,8 +89,7 @@ void testVol2col() { *place); paddle::operators::math::Vol2ColFunctor vol2col; - vol2col(*context, input, output, stride, stride, stride, padding, padding, - padding); + vol2col(*context, input, dilations, strides, paddings, &output); float vol_2_col[] = {0, 1, 1, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 10, 10, 11}; float* out_cfo_ptr; @@ -111,8 +114,7 @@ void testVol2col() { } paddle::operators::math::Col2VolFunctor col2vol; - col2vol(*context, input, output, stride, stride, stride, padding, padding, - padding); + col2vol(*context, output, dilations, strides, paddings, &input); float* in_ptr; if (paddle::platform::is_cpu_place(*place)) { diff --git a/paddle/operators/sequence_conv_op.h b/paddle/operators/sequence_conv_op.h index a57e1752bb8ed4844423f752bf0ad9f8e114486a..adee8d760e1d6bed852236ecee0951656c458901 100644 --- a/paddle/operators/sequence_conv_op.h +++ b/paddle/operators/sequence_conv_op.h @@ -62,9 +62,9 @@ class SequenceConvKernel : public framework::OpKernel { math::ContextProjectFunctor seq_project_functor; - seq_project_functor(context.device_context(), *in, *padding_data, col, + seq_project_functor(context.device_context(), *in, *padding_data, padding_trainable, context_start, context_length, - context_stride, up_pad, down_pad); + context_stride, up_pad, down_pad, &col); math::matmul(context.device_context(), col, false, filter, false, static_cast(1.0), out, static_cast(0.0)); @@ -117,10 +117,10 @@ class SequenceConvGradKernel : public framework::OpKernel { in_g->set_lod(in->lod()); set_zero(context.device_context(), in_g, static_cast(0)); - seq_project_grad_functor(context.device_context(), *in_g, *padding_data_g, - col, padding_trainable, context_start, - context_length, context_stride, up_pad, down_pad, - true, false); + seq_project_grad_functor(context.device_context(), *in_g, + padding_trainable, context_start, context_length, + context_stride, up_pad, down_pad, false, true, + padding_data_g, &col); } if (padding_trainable && padding_data_g) { @@ -129,9 +129,9 @@ class SequenceConvGradKernel : public framework::OpKernel { LoDTensor* input = const_cast(in); seq_project_grad_functor(context.device_context(), *input, - *padding_data_g, col, padding_trainable, - context_start, context_length, context_stride, - up_pad, down_pad, false, true); + padding_trainable, context_start, context_length, + context_stride, up_pad, down_pad, true, false, + padding_data_g, &col); } if (filter_g) { @@ -146,9 +146,9 @@ class SequenceConvGradKernel : public framework::OpKernel { padding_data = context.Input("PaddingData"); } - seq_project_functor(context.device_context(), *in, *padding_data, col, + seq_project_functor(context.device_context(), *in, *padding_data, padding_trainable, context_start, context_length, - context_stride, up_pad, down_pad); + context_stride, up_pad, down_pad, &col); math::matmul(context.device_context(), col, true, out_grad, false, T(1.0), &filter_grad, T(1.0)); diff --git a/python/paddle/v2/fluid/tests/test_conv2d_op.py b/python/paddle/v2/fluid/tests/test_conv2d_op.py index 04ae7f294c27fdceaaff2e9a7ed854213e643945..907b52c405d9e5c02c70f611e4c777ba21948c40 100644 --- a/python/paddle/v2/fluid/tests/test_conv2d_op.py +++ b/python/paddle/v2/fluid/tests/test_conv2d_op.py @@ -10,23 +10,33 @@ def conv2d_forward_naive(input, filter, group, conv_param): assert np.mod(out_c, group) == 0 sub_out_c = out_c / group - stride, pad = conv_param['stride'], conv_param['pad'] - out_h = 1 + (in_h + 2 * pad[0] - f_h) / stride[0] - out_w = 1 + (in_w + 2 * pad[1] - f_w) / stride[1] + stride, pad, dilation = conv_param['stride'], conv_param['pad'], conv_param[ + 'dilation'] + out_h = 1 + (in_h + 2 * pad[0] - (dilation[0] * (f_h - 1) + 1)) / stride[0] + out_w = 1 + (in_w + 2 * pad[1] - (dilation[1] * (f_w - 1) + 1)) / stride[1] out = np.zeros((in_n, out_c, out_h, out_w)) + d_bolck_w = (dilation[0] * (f_h - 1) + 1) + d_bolck_h = (dilation[1] * (f_w - 1) + 1) + input_pad = np.pad(input, ((0, ), (0, ), (pad[0], ), (pad[1], )), mode='constant', constant_values=0) + + filter_dilation = np.zeros((out_c, f_c, d_bolck_h, d_bolck_w)) + filter_dilation[:, :, 0:d_bolck_h:dilation[0], 0:d_bolck_w:dilation[ + 1]] = filter + for i in range(out_h): for j in range(out_w): for g in range(group): input_pad_masked = \ input_pad[:, g * f_c:(g + 1) * f_c, - i * stride[0]:i * stride[0] + f_h, - j * stride[1]:j * stride[1] + f_w] + i * stride[0]:i * stride[0] + d_bolck_h, + j * stride[1]:j * stride[1] + d_bolck_w] - f_sub = filter[g * sub_out_c:(g + 1) * sub_out_c, :, :, :] + f_sub = filter_dilation[g * sub_out_c:(g + 1) * + sub_out_c, :, :, :] for k in range(sub_out_c): out[:, g * sub_out_c + k, i, j] = \ np.sum(input_pad_masked * f_sub[k, :, :, :], @@ -39,9 +49,14 @@ class TestConv2dOp(OpTest): def setUp(self): self.init_op_type() self.init_group() + self.init_dilation() self.init_test_case() - conv2d_param = {'stride': self.stride, 'pad': self.pad} + conv2d_param = { + 'stride': self.stride, + 'pad': self.pad, + 'dilation': self.dilations + } input = np.random.random(self.input_size).astype("float32") filter = np.random.random(self.filter_size).astype("float32") output = conv2d_forward_naive(input, filter, self.groups, @@ -80,12 +95,14 @@ class TestConv2dOp(OpTest): def init_test_case(self): self.pad = [0, 0] self.stride = [1, 1] - self.dilations = [1, 1] self.input_size = [2, 3, 5, 5] # NCHW assert np.mod(self.input_size[1], self.groups) == 0 f_c = self.input_size[1] / self.groups self.filter_size = [6, f_c, 3, 3] + def init_dilation(self): + self.dilations = [1, 1] + def init_group(self): self.groups = 1 @@ -101,24 +118,66 @@ class TestWithGroup(TestConv2dOp): self.op_type = "conv2d" -#----------------Conv2dCudnn---------------- +class TestWith1x1(TestConv2dOp): + def init_test_case(self): + self.pad = [0, 0] + self.stride = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 1, 1] + def init_dilation(self): + self.dilations = [1, 1] -class TestCudnn(TestConv2dOp): def init_group(self): - self.groups = 1 + self.groups = 3 def init_op_type(self): - self.op_type = "conv_cudnn" + self.op_type = "conv2d" -class TestCudnnWithGroup(TestConv2dOp): +class TestWithDilation(TestConv2dOp): + def init_test_case(self): + self.pad = [0, 0] + self.stride = [1, 1] + self.input_size = [2, 3, 10, 10] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 3, 3] + + def init_dilation(self): + self.dilations = [2, 2] + def init_group(self): self.groups = 3 + def init_op_type(self): + self.op_type = "conv2d" + + +#----------------Conv2dCudnn---------------- + + +class TestCudnn(TestConv2dOp): + def init_op_type(self): + self.op_type = "conv_cudnn" + + +class TestCudnnWithGroup(TestWithGroup): def init_op_type(self): self.op_type = "conv_cudnn" +class TestCudnnWith1x1(TestWith1x1): + def init_op_type(self): + self.op_type = "conv_cudnn" + + +# cudnn v5 does not support dilation conv. +# class TestCudnnWithDilation(TestWithDilation): +# def init_op_type(self): +# self.op_type = "conv_cudnn" + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_conv3d_op.py b/python/paddle/v2/fluid/tests/test_conv3d_op.py index 44c192f58d25f8ddaa38d2ba7c7c19b9a5bd7dc1..934ea46437d67b78309a86a2779e0c6577399136 100644 --- a/python/paddle/v2/fluid/tests/test_conv3d_op.py +++ b/python/paddle/v2/fluid/tests/test_conv3d_op.py @@ -10,27 +10,40 @@ def conv3d_forward_naive(input, filter, group, conv_param): assert np.mod(out_c, group) == 0 sub_out_c = out_c / group - stride, pad = conv_param['stride'], conv_param['pad'] - out_d = 1 + (in_d + 2 * pad[0] - f_h) / stride[0] - out_h = 1 + (in_h + 2 * pad[1] - f_h) / stride[1] - out_w = 1 + (in_w + 2 * pad[2] - f_w) / stride[2] + stride, pad, dilation = conv_param['stride'], conv_param['pad'], conv_param[ + 'dilations'] + + out_d = 1 + (in_d + 2 * pad[0] - (dilation[0] * (f_d - 1) + 1)) / stride[0] + out_h = 1 + (in_h + 2 * pad[1] - (dilation[1] * (f_h - 1) + 1)) / stride[1] + out_w = 1 + (in_w + 2 * pad[2] - (dilation[2] * (f_w - 1) + 1)) / stride[2] + out = np.zeros((in_n, out_c, out_d, out_h, out_w)) + d_bolck_d = (dilation[0] * (f_d - 1) + 1) + d_bolck_h = (dilation[1] * (f_h - 1) + 1) + d_bolck_w = (dilation[2] * (f_w - 1) + 1) + input_pad = np.pad(input, ((0, ), (0, ), (pad[0], ), (pad[1], ), (pad[2], )), mode='constant', constant_values=0) + + filter_dilation = np.zeros((out_c, f_c, d_bolck_d, d_bolck_h, d_bolck_w)) + filter_dilation[:, :, 0:d_bolck_d:dilation[0], 0:d_bolck_h:dilation[1], 0: + d_bolck_w:dilation[2]] = filter + for d in range(out_d): for i in range(out_h): for j in range(out_w): for g in range(group): input_pad_masked = \ input_pad[:, g * f_c:(g + 1) * f_c, - d * stride[0]:d * stride[0] + f_d, - i * stride[1]:i * stride[1] + f_h, - j * stride[2]:j * stride[2] + f_w] - f_sub = filter[g * sub_out_c:(g + 1) * - sub_out_c, :, :, :, :] + d * stride[0]:d * stride[0] + d_bolck_d, + i * stride[1]:i * stride[1] + d_bolck_h, + j * stride[2]:j * stride[2] + d_bolck_w] + + f_sub = filter_dilation[g * sub_out_c:(g + 1) * + sub_out_c, :, :, :, :] for k in range(sub_out_c): out[:, g * sub_out_c + k, d, i, j] = \ np.sum(input_pad_masked * f_sub[k, :, :, :, :], @@ -43,9 +56,14 @@ class TestConv3dOp(OpTest): def setUp(self): self.init_group() self.init_op_type() + self.init_dilation() self.init_test_case() - conv3d_param = {'stride': self.stride, 'pad': self.pad} + conv3d_param = { + 'stride': self.stride, + 'pad': self.pad, + 'dilations': self.dilations + } input = np.random.random(self.input_size).astype("float32") filter = np.random.random(self.filter_size).astype("float32") output = conv3d_forward_naive(input, filter, self.groups, @@ -55,7 +73,8 @@ class TestConv3dOp(OpTest): self.attrs = { 'strides': self.stride, 'paddings': self.pad, - 'groups': self.groups + 'groups': self.groups, + 'dilations': self.dilations } self.outputs = {'Output': output} @@ -88,6 +107,9 @@ class TestConv3dOp(OpTest): f_c = self.input_size[1] / self.groups self.filter_size = [6, f_c, 3, 3, 3] + def init_dilation(self): + self.dilations = [1, 1, 1] + def init_group(self): self.groups = 1 @@ -104,27 +126,47 @@ class TestCase1(TestConv3dOp): f_c = self.input_size[1] / self.groups self.filter_size = [6, f_c, 3, 3, 3] - def init_group(self): - self.groups = 1 - def init_op_type(self): - self.op_type = "conv3d" +class TestWithGroup1(TestConv3dOp): + def init_group(self): + self.groups = 3 -class TestWithGroup1(TestConv3dOp): +class TestWithGroup2(TestCase1): def init_group(self): self.groups = 3 - def init_op_type(self): - self.op_type = "conv3d" +class TestWith1x1(TestConv3dOp): + def init_test_case(self): + self.pad = [0, 0, 0] + self.stride = [1, 1, 1] + self.input_size = [2, 3, 4, 4, 4] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 1, 1, 1] + + def init_dilation(self): + self.dilations = [1, 1, 1] -class TestWithGroup2(TestCase1): def init_group(self): self.groups = 3 - def init_op_type(self): - self.op_type = "conv3d" + +class TestWithDilation(TestConv3dOp): + def init_test_case(self): + self.pad = [0, 0, 0] + self.stride = [1, 1, 1] + self.input_size = [2, 3, 6, 6, 6] # NCDHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 2, 2, 2] + + def init_dilation(self): + self.dilations = [2, 2, 2] + + def init_group(self): + self.groups = 3 if __name__ == '__main__':