diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 6745a8da17723d663913a29f28e5ea9eedc0372a..fa2f8caacf06addb041af8ee6def82c2b822e00b 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -155,7 +155,8 @@ op_library(parallel_do_op DEPS executor) # Regist multiple Kernel to pybind if (WITH_GPU) -op_library(conv_op SRCS conv_op.cc conv_op.cu.cc conv_cudnn_op.cu.cc DEPS vol2col) +op_library(conv_op SRCS conv_op.cc conv_op.cu.cc conv_cudnn_op.cu.cc DEPS + vol2col depthwise_conv) op_library(pool_op SRCS pool_op.cc pool_op.cu.cc pool_cudnn_op.cu.cc DEPS pooling) op_library(conv_transpose_op SRCS conv_transpose_op.cc conv_transpose_op.cu.cc conv_transpose_cudnn_op.cu.cc DEPS vol2col) diff --git a/paddle/operators/conv_op.cc b/paddle/operators/conv_op.cc index 55a78efea1b14b15c537d2bd2c45c09e4a874f88..a53b11615c4ad7e3635bf0372503f3a9f132d08b 100644 --- a/paddle/operators/conv_op.cc +++ b/paddle/operators/conv_op.cc @@ -318,15 +318,20 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType( namespace ops = paddle::operators; REGISTER_OP(conv2d, ops::ConvOp, ops::Conv2DOpMaker, conv2d_grad, ops::ConvOpGrad); -REGISTER_OP(depthwiseConv, ops::ConvOp, ops::Conv2DOpMaker, conv2d_grad, +REGISTER_OP(depthwiseConv, ops::ConvOp, ops::Conv2DOpMaker, depthwiseConv_grad, ops::ConvOpGrad); REGISTER_OP(conv3d, ops::ConvOp, ops::Conv3DOpMaker, conv3d_grad, ops::ConvOpGrad); REGISTER_OP_CPU_KERNEL( depthwiseConv, - ops::DepthwiseConvKernel, - ops::DepthwiseConvKernel); + ops::GemmConvKernel, + ops::GemmConvKernel); + +REGISTER_OP_CPU_KERNEL( + depthwiseConv_grad, + ops::GemmConvGradKernel, + ops::GemmConvGradKernel); REGISTER_OP_CPU_KERNEL( conv2d, ops::GemmConvKernel, diff --git a/paddle/operators/conv_op.h b/paddle/operators/conv_op.h index ca61f1c6e65ba4222172202b66e90b08ce65e83b..a9138dbf936f2aadd44abcd01f68f374e4d81260 100644 --- a/paddle/operators/conv_op.h +++ b/paddle/operators/conv_op.h @@ -364,18 +364,15 @@ class DepthwiseConvKernel : public framework::OpKernel { Tensor* output = context.Output("Output"); output->mutable_data(context.GetPlace()); + std::vector ksize = context.Attr>("ksize"); std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); std::vector dilations = context.Attr>("dilations"); - framework::DDim filter_matrix_shape = {filter.dims()[0], - filter.numel() / filter.dims()[0]}; - filter.Resize(filter_matrix_shape); - math::DepthwiseConvFunctor depthwiseConv; auto& dev_ctx = context.template device_context(); - depthwiseConv(dev_ctx, input, filter, filter_shape_vec, strides, paddings, + depthwiseConv(dev_ctx, *input, filter, ksize, strides, paddings, output); } }; diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index c607704efac86982c8c22e462381aaab488a9b69..6fb15312368cd33a55ecf497a432594171c5bcd4 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -8,6 +8,7 @@ if(WITH_GPU) nv_library(softmax SRCS softmax.cc softmax.cu DEPS device_context) nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS device_context) nv_library(pooling SRCS pooling.cc pooling.cu DEPS device_context) + nv_library(depthwise_conv SRCS depthwise_conv.cu DEPS device_context) nv_library(sequence_pooling SRCS sequence_pooling.cc sequence_pooling.cu DEPS device_context math_function) nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context tensor) nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context math_function) diff --git a/paddle/operators/math/depthwise_conv.cu b/paddle/operators/math/depthwise_conv.cu index 16a0037ab1994c67e08265ef14915c36db46eda7..aee052d379e3962bc7bf5571d07539b954e01143 100644 --- a/paddle/operators/math/depthwise_conv.cu +++ b/paddle/operators/math/depthwise_conv.cu @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/math/pooling.h" +#include "paddle/operators/math/depthwise_conv.h" #include "paddle/platform/cuda_helper.h" namespace paddle { @@ -195,7 +195,7 @@ __global__ void KernelDepthwiseConvFilterGrad(const int num_i, * Ksize, strides, paddings are two elements. These two elements represent * height and width, respectively. */ -template +template class DepthwiseConvFunctor { public: void operator()(const platform::CUDADeviceContext& context, @@ -226,7 +226,7 @@ class DepthwiseConvFunctor { dim3 threads(1024, 1); dim3 grid(blocks, 1); - KernelDepthwiseConv<<>>( + KernelDepthwiseConv<<>>( nthreads, input_data, filter_data, batch_size, output_channels, output_height, output_width, input_channels, input_height, input_width, output_channels / input_channels, ksize_height, ksize_width, @@ -236,7 +236,6 @@ class DepthwiseConvFunctor { }; /* - template class DepthwiseConvInputGradFunctor { @@ -254,8 +253,7 @@ class DepthwiseConvInputGradFunctor const int output_height = output.dims()[2]; const int output_width = output.dims()[3]; const int ksize_height = ksize[0]; - const int ksize_width = ksize[1]; - const int stride_height = strides[0]; + const int ksize_width = ksize[1]; const int stride_height = strides[0]; const int stride_width = strides[1]; const int padding_height = paddings[0]; const int padding_width = paddings[1]; @@ -321,24 +319,20 @@ class DepthwiseConvdFilterGradFunctor { */ template class DepthwiseConvFunctor, float>; +template class DepthwiseConvFunctor; /* template class DepthwiseConvInputGradFunctor, float>; template class DepthwiseConvFilterGradFunctor, float>; template class DepthwiseConvFunctor, double>; template class DepthwiseConvInputGradFunctor, double>; template class DepthwiseConvFilterGradFunctor, double>; */