diff --git a/benchmark/paddle/image/googlenet.py b/benchmark/paddle/image/googlenet.py index bc893bab98c4d2e07c62fbd012d51a0939db4766..a88ecac67d9e677f14f6dc24ba9a337b1245243f 100644 --- a/benchmark/paddle/image/googlenet.py +++ b/benchmark/paddle/image/googlenet.py @@ -5,6 +5,7 @@ height = 224 width = 224 num_class = 1000 batch_size = get_config_arg('batch_size', int, 128) +use_gpu = get_config_arg('use_gpu', bool, True) args = {'height': height, 'width': width, 'color': True, 'num_class': num_class} define_py_data_sources2( @@ -16,6 +17,8 @@ settings( learning_method=MomentumOptimizer(0.9), regularization=L2Regularization(0.0005 * batch_size)) +conv_projection = conv_projection if use_gpu else img_conv_layer + def inception2(name, input, channels, \ filter1, filter3R, filter3, @@ -138,7 +141,7 @@ def inception(name, input, channels, \ cat = concat_layer( name=name, input=[cov1, cov3, cov5, covprj], - bias_attr=True, + bias_attr=True if use_gpu else False, act=ReluActivation()) return cat diff --git a/benchmark/paddle/image/run_mkldnn.sh b/benchmark/paddle/image/run_mkldnn.sh index 3cc779b48d082985f75ab1c053fbe262bc6d58aa..f768f6c29a84b40f917e0ccfde4d8c15f65c818b 100755 --- a/benchmark/paddle/image/run_mkldnn.sh +++ b/benchmark/paddle/image/run_mkldnn.sh @@ -40,6 +40,7 @@ fi for use_mkldnn in True False; do for batchsize in 64 128 256; do train vgg 19 $batchsize $use_mkldnn - train resnet 50 $batchsize $use_mkldnn + train resnet 50 $batchsize $use_mkldnn + train googlenet v1 $batchsize $use_mkldnn done done diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp index 8b7b2e9b65898950e036ebc023cd28990cef303f..f5a41b66bf09a4abc5ae7b64f227ca52461408f5 100644 --- a/paddle/gserver/activations/ActivationFunction.cpp +++ b/paddle/gserver/activations/ActivationFunction.cpp @@ -212,6 +212,37 @@ Error __must_check backward(Argument& act) { } END_DEFINE_ACTIVATION(sequence_softmax) +/* + * @brief SoftSign Activation. + * \f[ + * f(z) = \frac{z}{1 + |z|} + * \f] + */ +BEGIN_DEFINE_ACTIVATION(softsign) +private: +MatrixPtr denominator_; + +Error __must_check forward(Argument& act) { + size_t height = act.value->getHeight(); + size_t width = act.value->getWidth(); + Matrix::resizeOrCreate( + denominator_, height, width, false, useGpu(act.deviceId)); + denominator_->assign(*act.value); + denominator_->abs2(); + denominator_->add(1.); + + act.value->dotDiv(*act.value, *denominator_); + return Error(); +} + +Error __must_check backward(Argument& act) { + denominator_->square2(); + denominator_->scalarDiv(*denominator_, 1.); + act.grad->dotMul(*act.grad, *denominator_); + return Error(); +} +END_DEFINE_ACTIVATION(softsign) + /** * @brief Relu Activation. * forward. y = max(0, z) diff --git a/paddle/operators/conv_cudnn_op.cc b/paddle/operators/conv_cudnn_op.cc index 4c65b60d2349d2989128f4b1da705ea18391b8a3..c03dc3e4fb07ac6ecde42be93a1138d91778edf4 100644 --- a/paddle/operators/conv_cudnn_op.cc +++ b/paddle/operators/conv_cudnn_op.cc @@ -40,7 +40,8 @@ REGISTER_OP(conv_cudnn, ops::ConvOp, ops::CudnnConvOpMaker, conv_cudnn_grad, ops::ConvOpGrad); REGISTER_OP_CPU_KERNEL(conv_cudnn, - ops::GemmConvKernel); + ops::GemmConvKernel, + ops::GemmConvKernel); REGISTER_OP_CPU_KERNEL( - conv_cudnn_grad, - ops::GemmConvGradKernel); + conv_cudnn_grad, ops::GemmConvGradKernel, + ops::GemmConvGradKernel); diff --git a/paddle/operators/conv_cudnn_op.cu.cc b/paddle/operators/conv_cudnn_op.cu.cc index 4900f7b086c869b496c492743c71ab7047c5f672..5eaf6b33704eb371fff4b949c6cc32a7a5dbc812 100644 --- a/paddle/operators/conv_cudnn_op.cu.cc +++ b/paddle/operators/conv_cudnn_op.cu.cc @@ -259,6 +259,8 @@ class CudnnConvGradOpKernel : public framework::OpKernel { } // namespace operators } // namespace paddle -REGISTER_OP_GPU_KERNEL(conv_cudnn, paddle::operators::CudnnConvOpKernel); +REGISTER_OP_GPU_KERNEL(conv_cudnn, paddle::operators::CudnnConvOpKernel, + paddle::operators::CudnnConvOpKernel); REGISTER_OP_GPU_KERNEL(conv_cudnn_grad, - paddle::operators::CudnnConvGradOpKernel); + paddle::operators::CudnnConvGradOpKernel, + paddle::operators::CudnnConvGradOpKernel); diff --git a/paddle/operators/conv_transpose_cudnn_op.cc b/paddle/operators/conv_transpose_cudnn_op.cc index dbd1bc3c3bc2d026f13ddcf62919db6cf7d87bc5..0192178ce3a0a47196232f0723baec8324bea60b 100644 --- a/paddle/operators/conv_transpose_cudnn_op.cc +++ b/paddle/operators/conv_transpose_cudnn_op.cc @@ -61,10 +61,12 @@ REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp, REGISTER_OP_CPU_KERNEL( conv2d_transpose_cudnn, - ops::GemmConvTransposeKernel); + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); REGISTER_OP_CPU_KERNEL( conv2d_transpose_cudnn_grad, - ops::GemmConvTransposeGradKernel); + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp, ops::CudnnConv3DTransposeOpMaker, conv3d_transpose_cudnn_grad, @@ -72,7 +74,9 @@ REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp, REGISTER_OP_CPU_KERNEL( conv3d_transpose_cudnn, - ops::GemmConvTransposeKernel); + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); REGISTER_OP_CPU_KERNEL( conv3d_transpose_cudnn_grad, - ops::GemmConvTransposeGradKernel); + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); diff --git a/paddle/operators/conv_transpose_cudnn_op.cu.cc b/paddle/operators/conv_transpose_cudnn_op.cu.cc index e2ba77086e737a07471f14e483cbd32ab1d4ee12..494904fe524ae30a5032e489a0c5f20179d8e8ce 100644 --- a/paddle/operators/conv_transpose_cudnn_op.cu.cc +++ b/paddle/operators/conv_transpose_cudnn_op.cu.cc @@ -235,11 +235,15 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn, - ops::CudnnConvTransposeOpKernel); + ops::CudnnConvTransposeOpKernel, + ops::CudnnConvTransposeOpKernel); REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn_grad, - ops::CudnnConvTransposeGradOpKernel); + ops::CudnnConvTransposeGradOpKernel, + ops::CudnnConvTransposeGradOpKernel); REGISTER_OP_GPU_KERNEL(conv3d_transpose_cudnn, - ops::CudnnConvTransposeOpKernel); + ops::CudnnConvTransposeOpKernel, + ops::CudnnConvTransposeOpKernel); REGISTER_OP_GPU_KERNEL(conv3d_transpose_cudnn_grad, - ops::CudnnConvTransposeGradOpKernel); + ops::CudnnConvTransposeGradOpKernel, + ops::CudnnConvTransposeGradOpKernel); diff --git a/paddle/operators/math/pooling.cc b/paddle/operators/math/pooling.cc index ead89e146f32ef005b06f4f6f04224d691805d74..135984586a67f666425f81456148c3623ed7ef25 100644 --- a/paddle/operators/math/pooling.cc +++ b/paddle/operators/math/pooling.cc @@ -498,8 +498,8 @@ template class Pool3dGradFunctor< * Ksize, strides, paddings are two elements. These two elements represent * height and width, respectively. */ -template -class MaxPool2dWithIndexFunctor { +template +class MaxPool2dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& input, std::vector& ksize, @@ -520,9 +520,9 @@ class MaxPool2dWithIndexFunctor { const int input_stride = input_height * input_width; const int output_stride = output_height * output_width; - const T* input_data = input.data(); - T* output_data = output->mutable_data(context.GetPlace()); - T* mask_data = mask->mutable_data(context.GetPlace()); + const T1* input_data = input.data(); + T1* output_data = output->mutable_data(context.GetPlace()); + T2* mask_data = mask->mutable_data(context.GetPlace()); for (int i = 0; i < batch_size; i++) { for (int c = 0; c < output_channels; ++c) { @@ -535,7 +535,7 @@ class MaxPool2dWithIndexFunctor { int wend = std::min(wstart + ksize_width, input_width); wstart = std::max(wstart, 0); - T ele = static_cast(-FLT_MAX); + T1 ele = static_cast(-FLT_MAX); int index = -1; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { @@ -563,8 +563,8 @@ class MaxPool2dWithIndexFunctor { * Ksize, strides, paddings are two elements. These two elements represent * height and width, respectively. */ -template -class MaxPool2dWithIndexGradFunctor { +template +class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& output_grad, @@ -580,9 +580,9 @@ class MaxPool2dWithIndexGradFunctor { const int input_stride = input_height * input_width; const int output_stride = output_height * output_width; - const T* mask_data = mask.data(); - const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + const T2* mask_data = mask.data(); + const T1* output_grad_data = output_grad.data(); + T1* input_grad_data = input_grad->mutable_data(context.GetPlace()); for (int n = 0; n < batch_size; ++n) { for (int c = 0; c < output_channels; ++c) { @@ -602,18 +602,18 @@ class MaxPool2dWithIndexGradFunctor { } }; -template class MaxPool2dWithIndexFunctor; -template class MaxPool2dWithIndexGradFunctor; -template class MaxPool2dWithIndexFunctor; -template class MaxPool2dWithIndexGradFunctor; +template class MaxPool2dWithIndexFunctor; +template class MaxPool2dWithIndexGradFunctor; +template class MaxPool2dWithIndexFunctor; +template class MaxPool2dWithIndexGradFunctor; /* * All tensors are in NCDHW format. * Ksize, strides, paddings are three elements. These three elements represent * depth, height and width, respectively. */ -template -class MaxPool3dWithIndexFunctor { +template +class MaxPool3dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& input, std::vector& ksize, @@ -639,9 +639,9 @@ class MaxPool3dWithIndexFunctor { const int input_stride = input_depth * input_height * input_width; const int output_stride = output_depth * output_height * output_width; - const T* input_data = input.data(); - T* output_data = output->mutable_data(context.GetPlace()); - T* mask_data = mask->mutable_data(context.GetPlace()); + const T1* input_data = input.data(); + T1* output_data = output->mutable_data(context.GetPlace()); + T2* mask_data = mask->mutable_data(context.GetPlace()); for (int i = 0; i < batch_size; i++) { for (int c = 0; c < output_channels; ++c) { @@ -659,7 +659,7 @@ class MaxPool3dWithIndexFunctor { wstart = std::max(wstart, 0); int output_idx = (pd * output_height + ph) * output_width + pw; - T ele = static_cast(-FLT_MAX); + T1 ele = static_cast(-FLT_MAX); int index = -1; for (int d = dstart; d < dend; ++d) { for (int h = hstart; h < hend; ++h) { @@ -691,8 +691,8 @@ class MaxPool3dWithIndexFunctor { * Ksize, strides, paddings are three elements. These three elements represent * depth, height and width, respectively. */ -template -class MaxPool3dWithIndexGradFunctor { +template +class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& output_grad, @@ -710,9 +710,9 @@ class MaxPool3dWithIndexGradFunctor { const int input_stride = input_depth * input_height * input_width; const int output_stride = output_depth * output_height * output_width; - const T* mask_data = mask.data(); - const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + const T2* mask_data = mask.data(); + const T1* output_grad_data = output_grad.data(); + T1* input_grad_data = input_grad->mutable_data(context.GetPlace()); for (int n = 0; n < batch_size; ++n) { for (int c = 0; c < output_channels; ++c) { @@ -735,10 +735,10 @@ class MaxPool3dWithIndexGradFunctor { } }; -template class MaxPool3dWithIndexFunctor; -template class MaxPool3dWithIndexGradFunctor; -template class MaxPool3dWithIndexFunctor; -template class MaxPool3dWithIndexGradFunctor; +template class MaxPool3dWithIndexFunctor; +template class MaxPool3dWithIndexGradFunctor; +template class MaxPool3dWithIndexFunctor; +template class MaxPool3dWithIndexGradFunctor; } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/math/pooling.cu b/paddle/operators/math/pooling.cu index 6d1138ad50cb095e85b4ceb44fa81731316f10dd..ca3560f264b59057fd655084f3d43adc617c6606 100644 --- a/paddle/operators/math/pooling.cu +++ b/paddle/operators/math/pooling.cu @@ -658,13 +658,13 @@ template class Pool3dGradFunctor< template class Pool3dGradFunctor< platform::GPUPlace, paddle::operators::math::AvgPoolGrad, double>; -template +template __global__ void KernelMaxPool2dWithIdx( - const int nthreads, const T* input_data, const int channels, + const int nthreads, const T1* input_data, const int channels, const int input_height, const int input_width, const int output_height, const int output_width, const int ksize_height, const int ksize_width, const int stride_height, const int stride_width, const int padding_height, - const int padding_width, T* output_data, T* mask_data) { + const int padding_width, T1* output_data, T2* mask_data) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int pw = index % output_width; @@ -681,7 +681,7 @@ __global__ void KernelMaxPool2dWithIdx( wstart = max(wstart, 0); input_data += (batch_idx * channels + c) * input_height * input_width; - T ele = -FLT_MAX; + T1 ele = -FLT_MAX; int max_index = -1; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { @@ -697,13 +697,13 @@ __global__ void KernelMaxPool2dWithIdx( } } -template +template __global__ void KernelMaxPool2DWithIdxGrad( - const int nthreads, const T* output_grad, const T* mask_data, + const int nthreads, const T1* output_grad, const T2* mask_data, const int channels, const int input_height, const int input_width, const int output_height, const int output_width, const int ksize_height, const int ksize_width, const int stride_height, const int stride_width, - const int padding_height, const int padding_width, T* input_grad) { + const int padding_height, const int padding_width, T1* input_grad) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int w_offset = index % input_width; @@ -724,7 +724,7 @@ __global__ void KernelMaxPool2DWithIdxGrad( int pw_end = min((w_offset + padding_width) / stride_width + 1, output_width); - T gradient = 0; + T1 gradient = 0; int input_current_featuremap_idx = h_offset * input_width + w_offset; int output_idx = (batch_idx * channels + c_offset) * output_height * output_width; @@ -746,8 +746,8 @@ __global__ void KernelMaxPool2DWithIdxGrad( * Ksize, strides, paddings are two elements. These two elements represent * height and width, respectively. */ -template -class MaxPool2dWithIndexFunctor { +template +class MaxPool2dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& input, std::vector& ksize, @@ -767,9 +767,9 @@ class MaxPool2dWithIndexFunctor { const int padding_height = paddings[0]; const int padding_width = paddings[1]; - const T* input_data = input.data(); - T* output_data = output->mutable_data(context.GetPlace()); - T* mask_data = mask->mutable_data(context.GetPlace()); + const T1* input_data = input.data(); + T1* output_data = output->mutable_data(context.GetPlace()); + T2* mask_data = mask->mutable_data(context.GetPlace()); int nthreads = batch_size * output_channels * output_height * output_width; int blocks = (nthreads + 1024 - 1) / 1024; @@ -777,9 +777,9 @@ class MaxPool2dWithIndexFunctor { dim3 grid(blocks, 1); KernelMaxPool2dWithIdx< - T><<(context) - .stream()>>>( + T1, T2><<(context) + .stream()>>>( nthreads, input_data, input_channels, input_height, input_width, output_height, output_width, ksize_height, ksize_width, stride_height, stride_width, padding_height, padding_width, output_data, mask_data); @@ -791,8 +791,8 @@ class MaxPool2dWithIndexFunctor { * Ksize, strides, paddings are two elements. These two elements represent * height and width, respectively. */ -template -class MaxPool2dWithIndexGradFunctor { +template +class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& output_grad, @@ -812,9 +812,9 @@ class MaxPool2dWithIndexGradFunctor { const int padding_height = paddings[0]; const int padding_width = paddings[1]; - const T* mask_data = mask.data(); - const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + const T2* mask_data = mask.data(); + const T1* output_grad_data = output_grad.data(); + T1* input_grad_data = input_grad->mutable_data(context.GetPlace()); int nthreads = batch_size * input_channels * input_height * input_width; int blocks = (nthreads + 1024 - 1) / 1024; @@ -822,30 +822,30 @@ class MaxPool2dWithIndexGradFunctor { dim3 grid(blocks, 1); KernelMaxPool2DWithIdxGrad< - T><<(context) - .stream()>>>(nthreads, output_grad_data, mask_data, - input_channels, input_height, input_width, - output_height, output_width, ksize_height, - ksize_width, stride_height, stride_width, - padding_height, padding_width, input_grad_data); + T1, T2><<(context) + .stream()>>>( + nthreads, output_grad_data, mask_data, input_channels, input_height, + input_width, output_height, output_width, ksize_height, ksize_width, + stride_height, stride_width, padding_height, padding_width, + input_grad_data); } }; -template class MaxPool2dWithIndexFunctor; -template class MaxPool2dWithIndexGradFunctor; -template class MaxPool2dWithIndexFunctor; -template class MaxPool2dWithIndexGradFunctor; +template class MaxPool2dWithIndexFunctor; +template class MaxPool2dWithIndexGradFunctor; +template class MaxPool2dWithIndexFunctor; +template class MaxPool2dWithIndexGradFunctor; -template +template __global__ void KernelMaxPool3DWithIdx( - const int nthreads, const T* input_data, const int channels, + const int nthreads, const T1* input_data, const int channels, const int input_depth, const int input_height, const int input_width, const int output_depth, const int output_height, const int output_width, const int ksize_depth, const int ksize_height, const int ksize_width, const int stride_depth, const int stride_height, const int stride_width, const int padding_depth, const int padding_height, const int padding_width, - T* output_data, T* mask_data) { + T1* output_data, T2* mask_data) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int pw = index % output_width; @@ -865,7 +865,7 @@ __global__ void KernelMaxPool3DWithIdx( hstart = max(hstart, 0); wstart = max(wstart, 0); - T ele = -FLT_MAX; + T1 ele = -FLT_MAX; int max_index = -1; input_data += (batch_idx * channels + c) * input_depth * input_height * input_width; @@ -885,15 +885,15 @@ __global__ void KernelMaxPool3DWithIdx( } } -template +template __global__ void KernelMaxPool3DWithIdxGrad( - const int nthreads, const T* output_grad, const T* mask, const int channels, - const int input_depth, const int input_height, const int input_width, - const int output_depth, const int output_height, const int output_width, - const int ksize_depth, const int ksize_height, const int ksize_width, - const int stride_depth, const int stride_height, const int stride_width, - const int padding_depth, const int padding_height, const int padding_width, - T* input_grad) { + const int nthreads, const T1* output_grad, const T2* mask, + const int channels, const int input_depth, const int input_height, + const int input_width, const int output_depth, const int output_height, + const int output_width, const int ksize_depth, const int ksize_height, + const int ksize_width, const int stride_depth, const int stride_height, + const int stride_width, const int padding_depth, const int padding_height, + const int padding_width, T1* input_grad) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int w_offset = index % input_width; @@ -922,7 +922,7 @@ __global__ void KernelMaxPool3DWithIdxGrad( int pw_end = min((w_offset + padding_width) / stride_width + 1, output_width); - T gradient = 0; + T1 gradient = 0; int input_current_feature_map_idx = (d_offset * input_height + h_offset) * input_width + w_offset; int output_idx = (batch_idx * channels + c_offset) * output_depth * @@ -949,8 +949,8 @@ __global__ void KernelMaxPool3DWithIdxGrad( * Ksize, strides, paddings are three elements. These three elements represent * depth, height and width, respectively. */ -template -class MaxPool3dWithIndexFunctor { +template +class MaxPool3dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& input, std::vector& ksize, @@ -975,9 +975,9 @@ class MaxPool3dWithIndexFunctor { const int padding_height = paddings[1]; const int padding_width = paddings[2]; - const T* input_data = input.data(); - T* output_data = output->mutable_data(context.GetPlace()); - T* mask_data = mask->mutable_data(context.GetPlace()); + const T1* input_data = input.data(); + T1* output_data = output->mutable_data(context.GetPlace()); + T2* mask_data = mask->mutable_data(context.GetPlace()); int nthreads = batch_size * output_channels * output_depth * output_height * output_width; @@ -986,9 +986,9 @@ class MaxPool3dWithIndexFunctor { dim3 grid(blocks, 1); KernelMaxPool3DWithIdx< - T><<(context) - .stream()>>>( + T1, T2><<(context) + .stream()>>>( nthreads, input_data, input_channels, input_depth, input_height, input_width, output_depth, output_height, output_width, ksize_depth, ksize_height, ksize_width, stride_depth, stride_height, stride_width, @@ -1001,8 +1001,8 @@ class MaxPool3dWithIndexFunctor { * Ksize, strides, paddings are three elements. These three elements represent * depth, height and width, respectively. */ -template -class MaxPool3dWithIndexGradFunctor { +template +class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& output_grad, @@ -1027,9 +1027,9 @@ class MaxPool3dWithIndexGradFunctor { const int padding_height = paddings[1]; const int padding_width = paddings[2]; - const T* output_grad_data = output_grad.data(); - const T* mask_data = mask.data(); - T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + const T1* output_grad_data = output_grad.data(); + const T2* mask_data = mask.data(); + T1* input_grad_data = input_grad->mutable_data(context.GetPlace()); int nthreads = batch_size * input_channels * input_depth * input_height * input_width; @@ -1038,9 +1038,9 @@ class MaxPool3dWithIndexGradFunctor { dim3 grid(blocks, 1); KernelMaxPool3DWithIdxGrad< - T><<(context) - .stream()>>>( + T1, T2><<(context) + .stream()>>>( nthreads, output_grad_data, mask_data, input_channels, input_depth, input_height, input_width, output_depth, output_height, output_width, ksize_depth, ksize_height, ksize_width, stride_depth, stride_height, @@ -1049,10 +1049,10 @@ class MaxPool3dWithIndexGradFunctor { } }; -template class MaxPool3dWithIndexFunctor; -template class MaxPool3dWithIndexGradFunctor; -template class MaxPool3dWithIndexFunctor; -template class MaxPool3dWithIndexGradFunctor; +template class MaxPool3dWithIndexFunctor; +template class MaxPool3dWithIndexGradFunctor; +template class MaxPool3dWithIndexFunctor; +template class MaxPool3dWithIndexGradFunctor; } // namespace math } // namespace operators diff --git a/paddle/operators/math/pooling.h b/paddle/operators/math/pooling.h index f6719e1e628cdd2cf7445ec9cd05713bc4f14c84..19fbd8b4bb2469d3ce8a139ce30a48641dbd6e0f 100644 --- a/paddle/operators/math/pooling.h +++ b/paddle/operators/math/pooling.h @@ -153,7 +153,7 @@ class MaxPool3dGradFunctor { * In pool2d, all tensors are in NCHW format. In pool3d, all tensors are in * NCDHW format. */ -template +template class MaxPool2dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, @@ -162,7 +162,7 @@ class MaxPool2dWithIndexFunctor { framework::Tensor* output, framework::Tensor* mask); }; -template +template class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, @@ -172,7 +172,7 @@ class MaxPool2dWithIndexGradFunctor { framework::Tensor* input_grad); }; -template +template class MaxPool3dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, @@ -181,7 +181,7 @@ class MaxPool3dWithIndexFunctor { framework::Tensor* output, framework::Tensor* mask); }; -template +template class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, diff --git a/paddle/operators/pool_cudnn_op.cc b/paddle/operators/pool_cudnn_op.cc index f962d9e3e6abde14ce21eb0102f10d139fdb160e..be9fcc5661f420aadf908cf80cce6c963008b0e4 100644 --- a/paddle/operators/pool_cudnn_op.cc +++ b/paddle/operators/pool_cudnn_op.cc @@ -20,6 +20,18 @@ REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad, ops::PoolOpGrad); REGISTER_OP_CPU_KERNEL(pool2d_cudnn, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_CPU_KERNEL(pool2d_cudnn_grad, - ops::PoolGradKernel) + ops::PoolGradKernel, + ops::PoolGradKernel) + +REGISTER_OP(pool3d_cudnn, ops::PoolOp, ops::Pool3dOpMaker, pool3d_cudnn_grad, + ops::PoolOpGrad); + +REGISTER_OP_CPU_KERNEL(pool3d_cudnn, + ops::PoolKernel, + ops::PoolKernel); +REGISTER_OP_CPU_KERNEL(pool3d_cudnn_grad, + ops::PoolGradKernel, + ops::PoolGradKernel) diff --git a/paddle/operators/pool_cudnn_op.cu.cc b/paddle/operators/pool_cudnn_op.cu.cc index f9d8af3e1c5db49873979fdfeb17a32d16341a1a..66dd194ccd5ed629c5861552a7c124dc911362d7 100644 --- a/paddle/operators/pool_cudnn_op.cu.cc +++ b/paddle/operators/pool_cudnn_op.cu.cc @@ -52,7 +52,13 @@ class PoolCudnnOpKernel : public framework::OpKernel { ScopedTensorDescriptor input_desc; ScopedTensorDescriptor output_desc; ScopedPoolingDescriptor pool_desc; - DataLayout layout = DataLayout::kNCHW; + DataLayout layout; + + if (strides.size() == 2U) { + layout = DataLayout::kNCHW; + } else { + layout = DataLayout::kNCDHW; + } cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( layout, framework::vectorize2int(input->dims())); @@ -112,7 +118,13 @@ class PoolCudnnGradOpKernel : public framework::OpKernel { ScopedTensorDescriptor input_desc; ScopedTensorDescriptor output_desc; ScopedPoolingDescriptor pool_desc; - DataLayout layout = DataLayout::kNCHW; + DataLayout layout; + + if (strides.size() == 2U) { + layout = DataLayout::kNCHW; + } else { + layout = DataLayout::kNCDHW; + } cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( layout, framework::vectorize2int(input->dims())); @@ -150,5 +162,12 @@ class PoolCudnnGradOpKernel : public framework::OpKernel { namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel); -REGISTER_OP_GPU_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel); +REGISTER_OP_GPU_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel, + ops::PoolCudnnOpKernel); +REGISTER_OP_GPU_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel, + ops::PoolCudnnGradOpKernel); + +REGISTER_OP_GPU_KERNEL(pool3d_cudnn, ops::PoolCudnnOpKernel, + ops::PoolCudnnOpKernel); +REGISTER_OP_GPU_KERNEL(pool3d_cudnn_grad, ops::PoolCudnnGradOpKernel, + ops::PoolCudnnGradOpKernel); diff --git a/paddle/operators/pool_op.cc b/paddle/operators/pool_op.cc index f3963b1995ef8767786f0bf230b134afc69aa99d..d8c58618cf703d086d3cabc927ebc5eb038b1aec 100644 --- a/paddle/operators/pool_op.cc +++ b/paddle/operators/pool_op.cc @@ -217,14 +217,18 @@ REGISTER_OP(pool2d, ops::PoolOp, ops::Pool2dOpMaker, pool2d_grad, ops::PoolOpGrad); REGISTER_OP_CPU_KERNEL(pool2d, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_CPU_KERNEL(pool2d_grad, - ops::PoolGradKernel) + ops::PoolGradKernel, + ops::PoolGradKernel) REGISTER_OP(pool3d, ops::PoolOp, ops::Pool3dOpMaker, pool3d_grad, ops::PoolOpGrad); REGISTER_OP_CPU_KERNEL(pool3d, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_CPU_KERNEL(pool3d_grad, - ops::PoolGradKernel); + ops::PoolGradKernel, + ops::PoolGradKernel); diff --git a/paddle/operators/pool_op.cu.cc b/paddle/operators/pool_op.cu.cc index 0e3b80868f7b9d1697d619889160856d65ad59a3..1010cb762289dd39cd632c699f7528f4ba638278 100644 --- a/paddle/operators/pool_op.cu.cc +++ b/paddle/operators/pool_op.cu.cc @@ -17,11 +17,15 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(pool2d, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_GPU_KERNEL(pool2d_grad, - ops::PoolGradKernel); + ops::PoolGradKernel, + ops::PoolGradKernel); REGISTER_OP_GPU_KERNEL(pool3d, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_GPU_KERNEL(pool3d_grad, - ops::PoolGradKernel); + ops::PoolGradKernel, + ops::PoolGradKernel); diff --git a/paddle/operators/pool_with_index_op.cc b/paddle/operators/pool_with_index_op.cc index 1df36e965abab3549aeb88bf682b712033c4d79c..4958fa645405db0798f37165030eae95da371477 100644 --- a/paddle/operators/pool_with_index_op.cc +++ b/paddle/operators/pool_with_index_op.cc @@ -29,11 +29,11 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), - "X(Input) of Pooling should not be null."); + "Input(X) of Pooling should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Out(Output) of Pooling should not be null."); + "Output(Out) of Pooling should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Mask"), - "Mask(Output) of Pooling should not be null."); + "Output(Mask) of Pooling should not be null."); auto in_x_dims = ctx->GetInputDim("X"); @@ -67,6 +67,14 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); ctx->SetOutputDim("Mask", framework::make_ddim(output_shape)); } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext &ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } }; class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel { @@ -80,6 +88,14 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel { "Input(X@GRAD) should not be null."); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext &ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } }; class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { @@ -116,7 +132,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { // TypedAttrChecker don't support vector type.) AddAttr( "global_pooling", - "(bool, default false) Whether to use the global pooling. " + "(bool, default:false) Whether to use the global pooling. " "If global_pooling = true, ksize and paddings will be ignored.") .SetDefault(false); AddAttr>("strides", @@ -126,7 +142,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { // TypedAttrChecker don't support vector type.) AddAttr>( "paddings", - "(vector, defalut {0, 0}), paddings(height, width) of pooling " + "(vector, defalut:{0, 0}), paddings(height, width) of pooling " "operator. " "If global_pooling = true, paddings and will be ignored.") .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, @@ -250,10 +266,12 @@ REGISTER_OP(max_pool2d_with_index, ops::MaxPoolWithIndexOp, REGISTER_OP_CPU_KERNEL( max_pool2d_with_index, - ops::MaxPoolWithIndexKernel); + ops::MaxPoolWithIndexKernel, + ops::MaxPoolWithIndexKernel); REGISTER_OP_CPU_KERNEL( max_pool2d_with_index_grad, - ops::MaxPoolWithIndexGradKernel) + ops::MaxPoolWithIndexGradKernel, + ops::MaxPoolWithIndexGradKernel) REGISTER_OP(max_pool3d_with_index, ops::MaxPoolWithIndexOp, ops::MaxPool3dWithIndexOpMaker, max_pool3d_with_index_grad, @@ -261,7 +279,9 @@ REGISTER_OP(max_pool3d_with_index, ops::MaxPoolWithIndexOp, REGISTER_OP_CPU_KERNEL( max_pool3d_with_index, - ops::MaxPoolWithIndexKernel); + ops::MaxPoolWithIndexKernel, + ops::MaxPoolWithIndexKernel); REGISTER_OP_CPU_KERNEL( max_pool3d_with_index_grad, - ops::MaxPoolWithIndexGradKernel) + ops::MaxPoolWithIndexGradKernel, + ops::MaxPoolWithIndexGradKernel) diff --git a/paddle/operators/pool_with_index_op.cu.cc b/paddle/operators/pool_with_index_op.cu.cc index 287657d4b1c57f354ef050885f71261092bdc062..335064a7eea4ec15c529db5254cbb026ba575f3d 100644 --- a/paddle/operators/pool_with_index_op.cu.cc +++ b/paddle/operators/pool_with_index_op.cu.cc @@ -18,14 +18,18 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( max_pool2d_with_index, - ops::MaxPoolWithIndexKernel); + ops::MaxPoolWithIndexKernel, + ops::MaxPoolWithIndexKernel); REGISTER_OP_GPU_KERNEL( max_pool2d_with_index_grad, - ops::MaxPoolWithIndexGradKernel) + ops::MaxPoolWithIndexGradKernel, + ops::MaxPoolWithIndexGradKernel) REGISTER_OP_GPU_KERNEL( max_pool3d_with_index, - ops::MaxPoolWithIndexKernel); + ops::MaxPoolWithIndexKernel, + ops::MaxPoolWithIndexKernel); REGISTER_OP_GPU_KERNEL( max_pool3d_with_index_grad, - ops::MaxPoolWithIndexGradKernel) + ops::MaxPoolWithIndexGradKernel, + ops::MaxPoolWithIndexGradKernel) diff --git a/paddle/operators/pool_with_index_op.h b/paddle/operators/pool_with_index_op.h index a081607edce335f0265388ab01238d584bcf3ead..40766c7e821e8b85aeda9473798a1f696d0ad719 100644 --- a/paddle/operators/pool_with_index_op.h +++ b/paddle/operators/pool_with_index_op.h @@ -24,8 +24,8 @@ namespace operators { using Tensor = framework::Tensor; -template -class MaxPoolWithIndexKernel : public framework::OpKernel { +template +class MaxPoolWithIndexKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* in_x = context.Input("X"); @@ -44,13 +44,13 @@ class MaxPoolWithIndexKernel : public framework::OpKernel { switch (ksize.size()) { case 2: { - paddle::operators::math::MaxPool2dWithIndexFunctor + paddle::operators::math::MaxPool2dWithIndexFunctor pool2d_forward; pool2d_forward(context.device_context(), *in_x, ksize, strides, paddings, out, mask); } break; case 3: { - paddle::operators::math::MaxPool3dWithIndexFunctor + paddle::operators::math::MaxPool3dWithIndexFunctor pool3d_forward; pool3d_forward(context.device_context(), *in_x, ksize, strides, paddings, out, mask); @@ -60,8 +60,8 @@ class MaxPoolWithIndexKernel : public framework::OpKernel { } }; -template -class MaxPoolWithIndexGradKernel : public framework::OpKernel { +template +class MaxPoolWithIndexGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* mask = context.Input("Mask"); @@ -80,19 +80,19 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel { } if (in_x_grad) { - in_x_grad->mutable_data(context.GetPlace()); + in_x_grad->mutable_data(context.GetPlace()); auto& device_ctx = context.device_context(); math::set_constant(device_ctx, in_x_grad, 0); switch (ksize.size()) { case 2: { - paddle::operators::math::MaxPool2dWithIndexGradFunctor + paddle::operators::math::MaxPool2dWithIndexGradFunctor pool2d_backward; pool2d_backward(device_ctx, *out_grad, *mask, ksize, strides, paddings, in_x_grad); } break; case 3: { - paddle::operators::math::MaxPool3dWithIndexGradFunctor + paddle::operators::math::MaxPool3dWithIndexGradFunctor pool3d_backward; pool3d_backward(device_ctx, *out_grad, *mask, ksize, strides, paddings, in_x_grad); diff --git a/paddle/operators/sequence_slice_op.cc b/paddle/operators/sequence_slice_op.cc new file mode 100755 index 0000000000000000000000000000000000000000..cbe0b4233160dd1f3ebdf6db8b5f6df392efdfe7 --- /dev/null +++ b/paddle/operators/sequence_slice_op.cc @@ -0,0 +1,132 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/sequence_slice_op.h" + +namespace paddle { +namespace operators { + +class SequenceSliceOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of SequenceSliceOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Offset"), + "Input(Offset) of SequenceSliceOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Length"), + "Input(Length) of SequenceSliceOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of SequenceSliceOp should not be null."); + auto input_dims = ctx->GetInputDim("X"); + + auto offset_dim = ctx->GetInputDim("Offset"); + auto length_dim = ctx->GetInputDim("Length"); + + PADDLE_ENFORCE_EQ( + offset_dim.size(), 2UL, + "Only support one level sequence now, The rank of offset must be 2."); + PADDLE_ENFORCE_EQ( + length_dim.size(), 2UL, + "Only support one level sequence now, The rank of Length must be 2."); + + // Initialize the output's dims to maximum, + // and re-set to real dims by the value of Offset and Length at kernel + ctx->SetOutputDim("Out", input_dims); + } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } +}; + +class SequenceSliceGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "The gradient of Out should not be null."); + PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName("X")), + "The gradient of X should not be null."); + ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X")); + } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } +}; + +class SequenceSliceOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SequenceSliceOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(LoDTensor), " + "the input of SequenceSliceOp."); + AddInput("Offset", + "(Tensor), " + "a vector to describe the offset of every input sequence for " + "sub sequence item."); + AddInput("Length", + "(Tensor), " + "a vector to describe the length of every input sequence for " + "sub sequence item."); + AddOutput("Out", + "(LoDTensor), the output of SequenceSliceOp."); + AddComment(R"DOC( +Sequence slice operator + +The operator crops a subsequence from given sequence with given start offset and subsequence length. +It only supports sequence (LoD Tensor with level number is 1). +- Case: + X = [[a1, a2; + b1, b2; + c1, c2] + [d1, d2; + e1, e2]] + LoD(X) = {{0, 3, 5}}; Dims(X) = (5, 2) + Offset = [[0], [1]]; Length = [[2], [1]] + + Out = [[a1, a2; + b1, b2] + [e1, e2]] + LoD(Out) = {{0, 2, 3}}; Dims(Out) = (3, 2) +NOTE: The first dimension size of input, the size of offset and Length, should be equal. The offset start from 0. + )DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(sequence_slice, ops::SequenceSliceOp, ops::SequenceSliceOpMaker, + sequence_slice_grad, ops::SequenceSliceGradOp); +REGISTER_OP_CPU_KERNEL( + sequence_slice, + ops::SequenceSliceOpKernel); +REGISTER_OP_CPU_KERNEL( + sequence_slice_grad, + ops::SequenceSliceGradOpKernel); diff --git a/paddle/operators/sequence_slice_op.cu b/paddle/operators/sequence_slice_op.cu new file mode 100755 index 0000000000000000000000000000000000000000..a9f59dadba74d900fa5cc0601fb5b264ea19e34d --- /dev/null +++ b/paddle/operators/sequence_slice_op.cu @@ -0,0 +1,23 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/sequence_slice_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + sequence_slice, + ops::SequenceSliceOpKernel); +REGISTER_OP_GPU_KERNEL( + sequence_slice_grad, + ops::SequenceSliceGradOpKernel); diff --git a/paddle/operators/sequence_slice_op.h b/paddle/operators/sequence_slice_op.h new file mode 100755 index 0000000000000000000000000000000000000000..2c9b8464a1236a054cf1a38b9dc1d73588f8dd38 --- /dev/null +++ b/paddle/operators/sequence_slice_op.h @@ -0,0 +1,173 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/operators/strided_memcpy.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using LoD = framework::LoD; + +template +inline LoD SequenceSliceLoD(const T& in, const int64_t* offset_data, + const int64_t* length_data) { + auto out_lod = in.lod(); + size_t lod_offset = 0; + + auto n = in.lod()[0].size() - 1; + out_lod[0][0] = 0; + for (size_t i = 0; i < n; ++i) { + lod_offset += length_data[i]; + out_lod[0][i+1] = lod_offset; + } + return out_lod; +} + +template +class SequenceSliceOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto* offset = ctx.Input("Offset"); + auto* length = ctx.Input("Length"); + auto* out = ctx.Output("Out"); + + auto lod = in->lod(); + auto n = lod[0].size() - 1; + + PADDLE_ENFORCE_EQ(lod.size(), 1UL, + "Only support one level sequence now."); + PADDLE_ENFORCE_EQ( + n, static_cast(length->dims()[0]), + "The size of input-sequence and length-array should be the same") + PADDLE_ENFORCE_EQ( + n, static_cast(offset->dims()[0]), + "The size of input-sequence and offset-array should be the same") + + const int64_t* offset_data = offset->data(); + const int64_t* length_data = length->data(); + framework::Tensor offset_cpu; + framework::Tensor length_cpu; + + if (platform::is_gpu_place(ctx.GetPlace())) { + offset_cpu.mutable_data(offset->dims(), platform::CPUPlace()); + offset_cpu.CopyFrom(*offset, platform::CPUPlace(), ctx.device_context()); + offset_data = offset_cpu.data(); + + length_cpu.mutable_data(length->dims(), platform::CPUPlace()); + length_cpu.CopyFrom(*length, platform::CPUPlace(), ctx.device_context()); + length_data = length_cpu.data(); + } + + for (size_t i = 0; i < n; ++i) { + PADDLE_ENFORCE_LT(0, offset_data[i], + "The offset[%d] must greater than zero.", i) + PADDLE_ENFORCE_LT(0, length_data[i], + "The length[%d] must greater than zero.", i) + PADDLE_ENFORCE_LT( + lod[0][i] + offset_data[i] + length_data[i], + lod[0][i + 1], + "The target tensor's length overflow.") + } + + out->mutable_data(ctx.GetPlace()); + auto out_lod = SequenceSliceLoD(*in, offset_data, length_data); + auto out_dims = in->dims(); + out_dims[0] = out_lod[0][out_lod[0].size() - 1]; + out->Resize(out_dims); + out->set_lod(out_lod); + + auto in_stride = framework::stride(in->dims()); + auto out_stride = framework::stride(out->dims()); + + size_t out_offset = 0; + for (size_t i = 0; i < n; ++i) { + Tensor in_t = + in->Slice(static_cast(lod[0][i] + offset_data[i]), + static_cast(lod[0][i] + offset_data[i] + + length_data[i])); + + StridedMemcpy(ctx.device_context(), in_t.data(), + in_stride, in_t.dims(), out_stride, + out->data() + out_offset); + out_offset += length_data[i] * in_stride[0]; + } + } +}; + +template +class SequenceSliceGradOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto* offset = ctx.Input("Offset"); + auto* length = ctx.Input("Length"); + auto* out_grad = + ctx.Input(framework::GradVarName("Out")); + auto* x_grad = + ctx.Output(framework::GradVarName("X")); + + const int64_t* offset_data = offset->data(); + const int64_t* length_data = length->data(); + framework::Tensor offset_cpu; + framework::Tensor length_cpu; + + if (platform::is_gpu_place(ctx.GetPlace())) { + offset_cpu.mutable_data(offset->dims(), platform::CPUPlace()); + offset_cpu.CopyFrom(*offset, platform::CPUPlace(), ctx.device_context()); + offset_data = offset_cpu.data(); + + length_cpu.mutable_data(length->dims(), platform::CPUPlace()); + length_cpu.CopyFrom(*length, platform::CPUPlace(), ctx.device_context()); + length_data = length_cpu.data(); + } + + auto lod = in->lod(); + auto out_lod = out_grad->lod(); + + if (x_grad) { + x_grad->mutable_data(ctx.GetPlace()); + x_grad->set_lod(in->lod()); + math::SetConstant set_zero; + set_zero(ctx.device_context(), x_grad, static_cast(0)); + + auto out_grad_stride = framework::stride(out_grad->dims()); + + for (size_t i = 0; i < out_lod[0].size() - 1; ++i) { + Tensor out_grad_t = + out_grad->Slice(static_cast(out_lod[0][i]), + static_cast(out_lod[0][i + 1])); + auto out_grad_stride = framework::stride(out_grad_t.dims()); + + auto x_grad_stride = framework::stride(x_grad->dims()); + + Tensor x_grad_t = x_grad->Slice( + static_cast(lod[0][i] + offset_data[i]), + static_cast(lod[0][i] + offset_data[i] + length_data[i])); + + StridedMemcpy(ctx.device_context(), out_grad_t.data(), + out_grad_stride, out_grad_t.dims(), x_grad_stride, + x_grad_t.data()); + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/platform/cudnn_helper.h b/paddle/platform/cudnn_helper.h index dd48605b9ed688e4656d4cd1ddf1f298d0a50a9e..c5d8a6066ef3becb601344590f977a38c2af0a63 100644 --- a/paddle/platform/cudnn_helper.h +++ b/paddle/platform/cudnn_helper.h @@ -224,13 +224,15 @@ class ScopedConvolutionDescriptor { PADDLE_ENFORCE_EQ(pads.size(), strides.size()); PADDLE_ENFORCE_EQ(pads.size(), dilations.size()); -#if CUDNN_VERSION < 6000 +#if !CUDNN_VERSION_MIN(6, 0, 0) // cudnn v5 does not support dilation conv, the argument is called upscale // instead of dilations and it is must be one. for (size_t i = 0; i < dilations.size(); ++i) { PADDLE_ENFORCE_EQ( dilations[i], 1, - "Dilations conv is not supported in this cuDNN version"); + "Dilations conv is not supported in this cuDNN version(%d.%d.%d).", + CUDNN_VERSION / 1000, CUDNN_VERSION % 1000 / 100, + CUDNN_VERSION % 100); } #endif diff --git a/paddle/platform/cudnn_helper_test.cc b/paddle/platform/cudnn_helper_test.cc index 6bd85ae1ca8b47b203e0321e9d9224d5cfd3a586..427359f69713b961c4730b697d3ccde5f7085838 100644 --- a/paddle/platform/cudnn_helper_test.cc +++ b/paddle/platform/cudnn_helper_test.cc @@ -38,6 +38,26 @@ TEST(CudnnHelper, ScopedTensorDescriptor) { EXPECT_EQ(strides[2], 6); EXPECT_EQ(strides[1], 36); EXPECT_EQ(strides[0], 144); + + // test tensor5d: ScopedTensorDescriptor + ScopedTensorDescriptor tensor5d_desc; + std::vector shape_5d = {2, 4, 6, 6, 6}; + auto desc_5d = tensor5d_desc.descriptor(DataLayout::kNCDHW, shape_5d); + + std::vector dims_5d(5); + std::vector strides_5d(5); + paddle::platform::dynload::cudnnGetTensorNdDescriptor( + desc_5d, 5, &type, &nd, dims_5d.data(), strides_5d.data()); + + EXPECT_EQ(nd, 5); + for (size_t i = 0; i < dims_5d.size(); ++i) { + EXPECT_EQ(dims_5d[i], shape_5d[i]); + } + EXPECT_EQ(strides_5d[4], 1); + EXPECT_EQ(strides_5d[3], 6); + EXPECT_EQ(strides_5d[2], 36); + EXPECT_EQ(strides_5d[1], 216); + EXPECT_EQ(strides_5d[0], 864); } TEST(CudnnHelper, ScopedFilterDescriptor) { @@ -60,6 +80,20 @@ TEST(CudnnHelper, ScopedFilterDescriptor) { for (size_t i = 0; i < shape.size(); ++i) { EXPECT_EQ(kernel[i], shape[i]); } + + ScopedFilterDescriptor filter_desc_4d; + std::vector shape_4d = {2, 3, 3, 3}; + auto desc_4d = filter_desc.descriptor(DataLayout::kNCDHW, shape_4d); + + std::vector kernel_4d(4); + paddle::platform::dynload::cudnnGetFilterNdDescriptor( + desc_4d, 4, &type, &format, &nd, kernel_4d.data()); + + EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format); + EXPECT_EQ(nd, 4); + for (size_t i = 0; i < shape_4d.size(); ++i) { + EXPECT_EQ(kernel_4d[i], shape_4d[i]); + } } TEST(CudnnHelper, ScopedConvolutionDescriptor) { diff --git a/python/paddle/trainer_config_helpers/activations.py b/python/paddle/trainer_config_helpers/activations.py index c749fa827fea4a808ab715dcb3442aa24d06a4d2..00efc01c0592107314f5b23c951706d039d49a88 100644 --- a/python/paddle/trainer_config_helpers/activations.py +++ b/python/paddle/trainer_config_helpers/activations.py @@ -17,7 +17,8 @@ __all__ = [ "IdentityActivation", "LinearActivation", 'SequenceSoftmaxActivation', 'ExpActivation', "ReluActivation", "BReluActivation", "SoftReluActivation", "STanhActivation", "AbsActivation", "SquareActivation", "BaseActivation", - "LogActivation", "SqrtActivation", "ReciprocalActivation" + "LogActivation", "SqrtActivation", "ReciprocalActivation", + "SoftSignActivation" ] @@ -243,8 +244,20 @@ class ReciprocalActivation(BaseActivation): Reciprocal Activation. .. math:: - f(z) = 1/z + f(z)=\\frac{1}{z} """ def __init__(self): BaseActivation.__init__(self, 'reciprocal', False) + + +class SoftSignActivation(BaseActivation): + """ + SoftSign Activation. + + .. math:: + f(z)=\\frac{z}{1 + |z|} + """ + + def __init__(self): + BaseActivation.__init__(self, 'softsign', False) diff --git a/python/paddle/v2/fluid/layers.py b/python/paddle/v2/fluid/layers.py index 1789d2f82a8813331b3610fc69f8447925cd7501..02ad2ecd72193a2bd23e47f012aba981aaa9dc2a 100644 --- a/python/paddle/v2/fluid/layers.py +++ b/python/paddle/v2/fluid/layers.py @@ -661,7 +661,7 @@ def conv2d(input, if groups is None: num_filter_channels = num_channels else: - if num_channels % groups is not 0: + if num_channels % groups != 0: raise ValueError("num_channels must be divisible by groups.") num_filter_channels = num_channels / groups diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py index b8506125501b6e533c4594b37943ec36ca8e7d30..efe63a68f0745eb728b569a03d0344877c1484f7 100644 --- a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py +++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py @@ -4,6 +4,7 @@ import paddle.v2.fluid.core as core import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets +import paddle.v2.fluid.evaluator as evaluator from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.initializer import XavierInitializer from paddle.v2.fluid.optimizer import AdamOptimizer @@ -103,12 +104,13 @@ net = vgg16_bn_drop(images) predict = layers.fc(input=net, size=classdim, act='softmax') cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(x=cost) -accuracy = layers.accuracy(input=predict, label=label) # optimizer = SGDOptimizer(learning_rate=0.001) optimizer = AdamOptimizer(learning_rate=0.001) opts = optimizer.minimize(avg_cost) +accuracy, acc_out = evaluator.accuracy(input=predict, label=label) + BATCH_SIZE = 128 PASS_NUM = 1 @@ -124,6 +126,7 @@ exe.run(framework.default_startup_program()) for pass_id in range(PASS_NUM): batch_id = 0 + accuracy.reset(exe) for data in train_reader(): img_data = np.array(map(lambda x: x[0].reshape(data_shape), data)).astype("float32") @@ -141,12 +144,14 @@ for pass_id in range(PASS_NUM): outs = exe.run(framework.default_main_program(), feed={"pixel": tensor_img, "label": tensor_y}, - fetch_list=[avg_cost, accuracy]) + fetch_list=[avg_cost, acc_out]) loss = np.array(outs[0]) acc = np.array(outs[1]) + pass_acc = accuracy.eval(exe) print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) + - " loss:" + str(loss) + " acc:" + str(acc)) + " loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str( + pass_acc)) batch_id = batch_id + 1 if batch_id > 1: diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py index 75fbaf83e8f3e62eb0d0abef9cfa267b65e72973..8f737689609fec4d1819ae58b9665298547a3716 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py @@ -46,7 +46,6 @@ exe = Executor(place) exe.run(framework.default_startup_program()) for pass_id in range(PASS_NUM): - count = 0 accuracy.reset(exe) for data in train_reader(): img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]), @@ -66,13 +65,14 @@ for pass_id in range(PASS_NUM): loss = np.array(outs[0]) acc = np.array(outs[1]) pass_acc = accuracy.eval(exe) - print "pass id : ", pass_id, pass_acc + print("pass_id=" + str(pass_id) + " acc=" + str(acc) + " pass_acc=" + + str(pass_acc)) # print loss, acc - if loss < 10.0 and acc > 0.9: + if loss < 10.0 and pass_acc > 0.9: # if avg cost less than 10.0 and accuracy is larger than 0.9, we think our code is good. exit(0) pass_acc = accuracy.eval(exe) - print "pass id : ", pass_id, pass_acc + print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc)) exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py index cf10b1942e6a8243b18b0ae4586fdd7ec1a665fb..e42e4c9cc0024e193b0732df6d9ca3200df5f0b9 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py @@ -3,6 +3,7 @@ import paddle.v2 as paddle import paddle.v2.fluid.core as core import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.evaluator as evaluator from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.initializer import UniformInitializer from paddle.v2.fluid.optimizer import MomentumOptimizer @@ -30,11 +31,12 @@ label = layers.data(name='y', shape=[1], data_type='int64') cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(x=cost) -accuracy = layers.accuracy(input=predict, label=label) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) opts = optimizer.minimize(avg_cost) +accuracy, acc_out = evaluator.accuracy(input=predict, label=label) + train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), @@ -47,6 +49,7 @@ exe.run(framework.default_startup_program()) PASS_NUM = 100 for pass_id in range(PASS_NUM): + accuracy.reset(exe) for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") @@ -61,9 +64,13 @@ for pass_id in range(PASS_NUM): outs = exe.run(framework.default_main_program(), feed={'x': tensor_x, 'y': tensor_y}, - fetch_list=[avg_cost, accuracy]) + fetch_list=[avg_cost, acc_out]) out = np.array(outs[0]) acc = np.array(outs[1]) - if out[0] < 5.0: - exit(0) # if avg cost less than 5.0, we think our code is good. + pass_acc = accuracy.eval(exe) + + if pass_acc > 0.7: + exit(0) + # print("pass_id=" + str(pass_id) + " auc=" + + # str(acc) + " pass_acc=" + str(pass_acc)) exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py index e69b915a9cfaf9e06075991975563a1fc1196661..4929f7cf615e61de5c4f61ef44c5340e9ac4492a 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py @@ -1,6 +1,7 @@ import numpy as np import paddle.v2 as paddle import paddle.v2.fluid.core as core +import paddle.v2.fluid.evaluator as evaluator import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets @@ -32,8 +33,8 @@ def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32): avg_cost = layers.mean(x=cost) adam_optimizer = AdamOptimizer(learning_rate=0.002) opts = adam_optimizer.minimize(avg_cost) - acc = layers.accuracy(input=prediction, label=label) - return avg_cost, acc + accuracy, acc_out = evaluator.accuracy(input=prediction, label=label) + return avg_cost, accuracy, acc_out def to_lodtensor(data, place): @@ -59,7 +60,8 @@ def main(): dict_dim = len(word_dict) class_dim = 2 - cost, acc = convolution_net(input_dim=dict_dim, class_dim=class_dim) + cost, accuracy, acc_out = convolution_net( + input_dim=dict_dim, class_dim=class_dim) train_data = paddle.batch( paddle.reader.shuffle( @@ -71,6 +73,7 @@ def main(): exe.run(framework.default_startup_program()) for pass_id in xrange(PASS_NUM): + accuracy.reset(exe) for data in train_data(): tensor_words = to_lodtensor(map(lambda x: x[0], data), place) @@ -83,12 +86,13 @@ def main(): outs = exe.run(framework.default_main_program(), feed={"words": tensor_words, "label": tensor_label}, - fetch_list=[cost, acc]) + fetch_list=[cost, acc_out]) cost_val = np.array(outs[0]) acc_val = np.array(outs[1]) - - print("cost=" + str(cost_val) + " acc=" + str(acc_val)) - if cost_val < 1.0 and acc_val > 0.7: + pass_acc = accuracy.eval(exe) + print("cost=" + str(cost_val) + " acc=" + str(acc_val) + + " pass_acc=" + str(pass_acc)) + if cost_val < 1.0 and pass_acc > 0.8: exit(0) exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py index 65d44542501e6531fc1912cbc726a1d903b9c031..b3ee91938865afb929670a388a561b156aec1fe9 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py @@ -1,6 +1,7 @@ import numpy as np import paddle.v2 as paddle import paddle.v2.fluid.core as core +import paddle.v2.fluid.evaluator as evaluator import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor @@ -41,8 +42,8 @@ def stacked_lstm_net(input_dim, avg_cost = layers.mean(x=cost) adam_optimizer = AdamOptimizer(learning_rate=0.002) opts = adam_optimizer.minimize(avg_cost) - acc = layers.accuracy(input=prediction, label=label) - return avg_cost, acc + accuracy, acc_out = evaluator.accuracy(input=prediction, label=label) + return avg_cost, accuracy, acc_out def to_lodtensor(data, place): @@ -69,7 +70,8 @@ def main(): dict_dim = len(word_dict) class_dim = 2 - cost, acc = stacked_lstm_net(input_dim=dict_dim, class_dim=class_dim) + cost, accuracy, acc_out = stacked_lstm_net( + input_dim=dict_dim, class_dim=class_dim) train_data = paddle.batch( paddle.reader.shuffle( @@ -81,6 +83,7 @@ def main(): exe.run(framework.default_startup_program()) for pass_id in xrange(PASS_NUM): + accuracy.reset(exe) for data in train_data(): tensor_words = to_lodtensor(map(lambda x: x[0], data), place) @@ -93,12 +96,13 @@ def main(): outs = exe.run(framework.default_main_program(), feed={"words": tensor_words, "label": tensor_label}, - fetch_list=[cost, acc]) + fetch_list=[cost, acc_out]) cost_val = np.array(outs[0]) acc_val = np.array(outs[1]) - - print("cost=" + str(cost_val) + " acc=" + str(acc_val)) - if cost_val < 1.0 and acc_val > 0.7: + pass_acc = accuracy.eval(exe) + print("cost=" + str(cost_val) + " acc=" + str(acc_val) + + " pass_acc=" + str(pass_acc)) + if cost_val < 1.0 and acc_val > 0.8: exit(0) exit(1) diff --git a/python/paddle/v2/fluid/tests/test_pool2d_op.py b/python/paddle/v2/fluid/tests/test_pool2d_op.py index ac3fa6aa87835b3cd6fb9bbf6fe66b1d0c577ca2..5dff6270f455395ce6ca8ae2428236f630467095 100644 --- a/python/paddle/v2/fluid/tests/test_pool2d_op.py +++ b/python/paddle/v2/fluid/tests/test_pool2d_op.py @@ -3,8 +3,7 @@ import numpy as np from op_test import OpTest -def max_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): - +def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0): N, C, H, W = x.shape if global_pool == 1: ksize = [H, W] @@ -23,8 +22,7 @@ def max_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): return out -def avg_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): - +def avg_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0): N, C, H, W = x.shape if global_pool == 1: ksize = [H, W] @@ -47,6 +45,7 @@ def avg_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): class TestPool2d_Op(OpTest): def setUp(self): self.init_test_case() + self.init_global_pool() self.init_op_type() self.init_pool_type() if self.global_pool: @@ -75,8 +74,6 @@ class TestPool2d_Op(OpTest): self.check_grad(set(['X']), 'Out', max_relative_error=0.07) def init_test_case(self): - self.global_pool = True - self.pool2D_forward_naive = avg_pool2D_forward_naive self.shape = [2, 3, 5, 5] self.ksize = [3, 3] self.strides = [1, 1] @@ -87,12 +84,14 @@ class TestPool2d_Op(OpTest): def init_pool_type(self): self.pool_type = "avg" + self.pool2D_forward_naive = avg_pool2D_forward_naive + + def init_global_pool(self): + self.global_pool = True class TestCase1(TestPool2d_Op): def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = avg_pool2D_forward_naive self.shape = [2, 3, 7, 7] self.ksize = [3, 3] self.strides = [1, 1] @@ -103,12 +102,14 @@ class TestCase1(TestPool2d_Op): def init_pool_type(self): self.pool_type = "avg" + self.pool2D_forward_naive = avg_pool2D_forward_naive + + def init_global_pool(self): + self.global_pool = False class TestCase2(TestPool2d_Op): def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = avg_pool2D_forward_naive self.shape = [2, 3, 7, 7] self.ksize = [3, 3] self.strides = [1, 1] @@ -119,152 +120,69 @@ class TestCase2(TestPool2d_Op): def init_pool_type(self): self.pool_type = "avg" + self.pool2D_forward_naive = avg_pool2D_forward_naive + def init_global_pool(self): + self.global_pool = False -class TestCase3(TestPool2d_Op): - def init_test_case(self): - self.global_pool = True - self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] +class TestCase3(TestPool2d_Op): def init_op_type(self): self.op_type = "pool2d" def init_pool_type(self): self.pool_type = "max" - - -class TestCase4(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] + +class TestCase4(TestCase1): def init_op_type(self): self.op_type = "pool2d" def init_pool_type(self): self.pool_type = "max" - - -class TestCase5(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [1, 1] + +class TestCase5(TestCase2): def init_op_type(self): self.op_type = "pool2d" def init_pool_type(self): self.pool_type = "max" + self.pool2D_forward_naive = max_pool2D_forward_naive #--------------------test pool2d_cudnn-------------------- -class TestCaseCudnn1(TestPool2d_Op): - def init_test_case(self): - self.global_pool = True - self.pool2D_forward_naive = avg_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] - +class TestCudnnCase1(TestPool2d_Op): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "avg" - - -class TestCaseCudnn2(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = avg_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] +class TestCudnnCase2(TestCase1): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "avg" - - -class TestCaseCudnn3(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = avg_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [1, 1] +class TestCudnnCase3(TestCase2): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "avg" - - -class TestCaseCudnn4(TestPool2d_Op): - def init_test_case(self): - self.global_pool = True - self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] +class TestCudnnCase4(TestCase3): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "max" - - -class TestCaseCudnn5(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] +class TestCudnnCase5(TestCase4): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "max" - - -class TestCaseCudnn6(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [1, 1] +class TestCudnnCase6(TestCase5): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "max" - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_pool3d_op.py b/python/paddle/v2/fluid/tests/test_pool3d_op.py index 87483ae5e568c01141ff789f37e84069cb8e827d..2ba86665a7d207e61159c02643fa40daca3be080 100644 --- a/python/paddle/v2/fluid/tests/test_pool3d_op.py +++ b/python/paddle/v2/fluid/tests/test_pool3d_op.py @@ -3,8 +3,7 @@ import numpy as np from op_test import OpTest -def max_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): - +def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0): N, C, D, H, W = x.shape if global_pool == 1: ksize = [D, H, W] @@ -27,8 +26,7 @@ def max_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): return out -def avg_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): - +def avg_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0): N, C, D, H, W = x.shape if global_pool == 1: ksize = [D, H, W] @@ -55,6 +53,10 @@ def avg_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): class TestPool3d_Op(OpTest): def setUp(self): self.init_test_case() + self.init_global_pool() + self.init_op_type() + self.init_pool_type() + if self.global_pool: self.paddings = [0 for _ in range(len(self.paddings))] input = np.random.random(self.shape).astype("float32") @@ -81,74 +83,115 @@ class TestPool3d_Op(OpTest): self.check_grad(set(['X']), 'Out', max_relative_error=0.07) def init_test_case(self): - self.global_pool = True - self.op_type = "pool3d" - self.pool_type = "avg" - self.pool3D_forward_naive = avg_pool3D_forward_naive self.shape = [2, 3, 5, 5, 5] self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [0, 0, 0] + def init_op_type(self): + self.op_type = "pool3d" + + def init_pool_type(self): + self.pool_type = "avg" + self.pool3D_forward_naive = avg_pool3D_forward_naive + + def init_global_pool(self): + self.global_pool = True + class TestCase1(TestPool3d_Op): def init_test_case(self): - self.global_pool = False self.op_type = "pool3d" - self.pool_type = "avg" - self.pool3D_forward_naive = avg_pool3D_forward_naive self.shape = [2, 3, 7, 7, 7] self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [0, 0, 0] - -class TestCase2(TestPool3d_Op): - def init_test_case(self): - self.global_pool = False + def init_op_type(self): self.op_type = "pool3d" + + def init_pool_type(self): self.pool_type = "avg" self.pool3D_forward_naive = avg_pool3D_forward_naive + + def init_global_pool(self): + self.global_pool = False + + +class TestCase2(TestPool3d_Op): + def init_test_case(self): self.shape = [2, 3, 7, 7, 7] self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [1, 1, 1] + def init_op_type(self): + self.op_type = "pool3d" + + def init_pool_type(self): + self.pool_type = "avg" + self.pool3D_forward_naive = avg_pool3D_forward_naive + + def init_global_pool(self): + self.global_pool = False + class TestCase3(TestPool3d_Op): - def init_test_case(self): - self.global_pool = True + def init_op_type(self): self.op_type = "pool3d" + + def init_pool_type(self): self.pool_type = "max" self.pool3D_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 5, 5, 5] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [0, 0, 0] -class TestCase4(TestPool3d_Op): - def init_test_case(self): - self.global_pool = False +class TestCase4(TestCase1): + def init_op_type(self): self.op_type = "pool3d" + + def init_pool_type(self): self.pool_type = "max" self.pool3D_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 7, 7, 7] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [0, 0, 0] -class TestCase5(TestPool3d_Op): - def init_test_case(self): - self.global_pool = False +class TestCase5(TestCase2): + def init_op_type(self): self.op_type = "pool3d" + + def init_pool_type(self): self.pool_type = "max" self.pool3D_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 7, 7, 7] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [1, 1, 1] + + +#--------------------test pool3d_cudnn-------------------- +class TestCudnnCase1(TestPool3d_Op): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase2(TestCase1): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase3(TestCase2): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase4(TestCase3): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase5(TestCase4): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase6(TestCase5): + def init_op_type(self): + self.op_type = "pool3d_cudnn" if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_pool_max_op.py b/python/paddle/v2/fluid/tests/test_pool_max_op.py index 04843a28ac19e076e097d1aa1034bcf9378aa495..9d2d61c43868701392e90542f3b7fb2c4ea07548 100644 --- a/python/paddle/v2/fluid/tests/test_pool_max_op.py +++ b/python/paddle/v2/fluid/tests/test_pool_max_op.py @@ -3,11 +3,13 @@ import numpy as np from op_test import OpTest -def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0): +def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=False): N, C, D, H, W = x.shape - if global_pool == 1: + if global_pool: ksize = [D, H, W] + paddings = [0, 0, 0] + D_out = (D - ksize[0] + 2 * paddings[0]) / strides[0] + 1 H_out = (H - ksize[1] + 2 * paddings[1]) / strides[1] + 1 W_out = (W - ksize[2] + 2 * paddings[2]) / strides[2] + 1 @@ -40,11 +42,13 @@ def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0): return out, mask -def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0): +def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=False): N, C, H, W = x.shape - if global_pool == 1: + if global_pool: ksize = [H, W] + paddings = [0, 0] + H_out = (H - ksize[0] + 2 * paddings[0]) / strides[0] + 1 W_out = (W - ksize[1] + 2 * paddings[1]) / strides[1] + 1 out = np.zeros((N, C, H_out, W_out)) @@ -74,13 +78,13 @@ def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0): class TestMaxPoolWithIndex_Op(OpTest): def setUp(self): self.init_test_case() - if self.global_pool: - self.paddings = [0 for _ in range(len(self.paddings))] + self.init_global() + input = np.random.random(self.shape).astype("float32") output, mask = self.pool_forward_naive(input, self.ksize, self.strides, self.paddings, self.global_pool) output = output.astype("float32") - mask = mask.astype("float32") + mask = mask.astype("int32") self.attrs = { 'strides': self.strides, @@ -99,41 +103,24 @@ class TestMaxPoolWithIndex_Op(OpTest): # self.check_grad(set(['X']), ['Out'], max_relative_error=0.07) def init_test_case(self): - self.global_pool = True - self.index = "max_pool3d_with_index" - self.op_type = "%s" % self.index + self.op_type = "max_pool3d_with_index" self.pool_forward_naive = max_pool3D_forward_naive self.shape = [2, 3, 5, 5, 5] self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [1, 1, 1] + def init_global(self): + self.global_pool = False + class TestCase1(TestMaxPoolWithIndex_Op): - def init_test_case(self): + def init_global(self): self.global_pool = True - self.op_type = "max_pool3d_with_index" - self.pool_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 5, 5, 5] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [1, 1, 1] class TestCase2(TestMaxPoolWithIndex_Op): def init_test_case(self): - self.global_pool = False - self.op_type = "max_pool3d_with_index" - self.pool_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 7, 7, 7] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [1, 1, 1] - - -class TestCase3(TestMaxPoolWithIndex_Op): - def init_test_case(self): - self.global_pool = False self.op_type = "max_pool3d_with_index" self.pool_forward_naive = max_pool3D_forward_naive self.shape = [2, 3, 7, 7, 7] @@ -141,32 +128,18 @@ class TestCase3(TestMaxPoolWithIndex_Op): self.strides = [2, 2, 2] self.paddings = [0, 0, 0] - -class TestCase4(TestMaxPoolWithIndex_Op): - def init_test_case(self): + def init_global(self): self.global_pool = True - self.op_type = "max_pool3d_with_index" - self.pool_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 5, 5, 5] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [1, 1, 1] -class TestCase5(TestMaxPoolWithIndex_Op): - def init_test_case(self): - self.global_pool = True - self.op_type = "max_pool3d_with_index" - self.pool_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 5, 5, 5] - self.ksize = [3, 3, 3] - self.strides = [2, 2, 2] - self.paddings = [0, 0, 0] +class TestCase3(TestCase2): + def init_global(self): + self.global_pool = False -class TestCase6(TestMaxPoolWithIndex_Op): +#----------------max_pool2d_with_index---------------- +class TestCase4(TestMaxPoolWithIndex_Op): def init_test_case(self): - self.global_pool = False self.op_type = "max_pool2d_with_index" self.pool_forward_naive = max_pool2D_forward_naive self.shape = [2, 3, 7, 7] @@ -174,10 +147,17 @@ class TestCase6(TestMaxPoolWithIndex_Op): self.strides = [1, 1] self.paddings = [1, 1] + def init_global(self): + self.global_pool = True + -class TestCase7(TestMaxPoolWithIndex_Op): - def init_test_case(self): +class TestCase5(TestCase4): + def init_global(self): self.global_pool = False + + +class TestCase6(TestMaxPoolWithIndex_Op): + def init_test_case(self): self.op_type = "max_pool2d_with_index" self.pool_forward_naive = max_pool2D_forward_naive self.shape = [2, 3, 7, 7] @@ -185,27 +165,13 @@ class TestCase7(TestMaxPoolWithIndex_Op): self.strides = [2, 2] self.paddings = [0, 0] - -class TestCase8(TestMaxPoolWithIndex_Op): - def init_test_case(self): + def init_global(self): self.global_pool = True - self.op_type = "max_pool2d_with_index" - self.pool_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [1, 1] -class TestCase9(TestMaxPoolWithIndex_Op): - def init_test_case(self): - self.global_pool = True - self.op_type = "max_pool2d_with_index" - self.pool_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [2, 2] - self.paddings = [0, 0] +class TestCase7(TestCase6): + def init_global(self): + self.global_pool = False if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_sequence_slice_op.py b/python/paddle/v2/fluid/tests/test_sequence_slice_op.py new file mode 100644 index 0000000000000000000000000000000000000000..ccd9a05343b0c4aa05b258959665c0662f271512 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_sequence_slice_op.py @@ -0,0 +1,47 @@ +import unittest +import numpy as np +import sys +from op_test import OpTest + + +class TestSequenceSliceOp(OpTest): + def set_data(self): + self.init_test_case() + # only supprot one level LoD + x = np.random.random(self.x_dim).astype('float32') + lod = self.x_lod + offset = np.array(self.offset).astype("int64") + length = np.array(self.length).astype("int64") + + self.inputs = {'X': (x, lod), 'Offset': offset, 'Length': length} + outs = [] #np.zeros((100, 3, 2)).astype('float32') + out_lod = [[0]] + out_lod_offset = 0 + for i in range(len(offset)): + sub_x = x[lod[0][i] + offset[i, 0]:lod[0][i] + offset[i, 0] + + length[i, 0], :] + out_lod_offset = out_lod_offset + len(sub_x) + outs.append(sub_x) + out_lod[0].append(out_lod_offset) + outs = np.concatenate(outs, axis=0) + self.outputs = {'Out': (outs, out_lod)} + + def init_test_case(self): + self.x_dim = (100, 3, 2) + self.x_lod = [[0, 20, 40, 60, 80, 100]] + self.offset = [[1], [2], [3], [4], [5]] + self.length = [[10], [8], [6], [4], [2]] + + def setUp(self): + self.op_type = "sequence_slice" + self.set_data() + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +if __name__ == '__main__': + unittest.main()