From 24010472d4287b3ae2c740e166c52178dfa58a71 Mon Sep 17 00:00:00 2001 From: liym27 <33742067+liym27@users.noreply.github.com> Date: Sat, 28 Sep 2019 22:13:48 +0800 Subject: [PATCH] fix pool2d pool3d,support asymmetric padding and channel_last (#19739) * fix pool2d pool3d: 1. support asymmetric padding; 2. support padding algorithm:"SAME" and "VALID"; 3. support channel_last: data_format NHWC and NDHWC; 4. support inferring shape when input with negative dims in compile time; 5. change doc of python API and c++; 6. fix bug in cuda kernel when Attr(adaptive) is true. test=develop,test=document_preview * fix 'tensors' to 'Tensors'. test=develop,test=document_preview * add test for converage ValueError.test=develop,test=document_preview * resolve conflict in test_pool2d. test=develop --- paddle/fluid/API.spec | 4 +- paddle/fluid/operators/math/pooling.cc | 890 +++++++++++++- paddle/fluid/operators/math/pooling.cu | 605 +++++++-- paddle/fluid/operators/math/pooling.h | 59 +- paddle/fluid/operators/pool_cudnn_op.cu.cc | 246 +++- paddle/fluid/operators/pool_op.cc | 235 ++-- paddle/fluid/operators/pool_op.h | 140 ++- paddle/fluid/platform/cudnn_helper.h | 30 +- paddle/fluid/platform/dynload/cudnn.h | 3 +- python/paddle/fluid/layers/nn.py | 266 +++- .../fluid/tests/unittests/test_pool2d_op.py | 866 ++++++++++++- .../fluid/tests/unittests/test_pool3d_op.py | 1086 ++++++++++++++--- 12 files changed, 3933 insertions(+), 497 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index f435a79871e..8cf96441635 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -144,8 +144,8 @@ paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size', paddle.fluid.layers.sequence_pool (ArgSpec(args=['input', 'pool_type', 'is_test', 'pad_value'], varargs=None, keywords=None, defaults=(False, 0.0)), ('document', 'e90a93251c52dc4e6fb34fb3991b3f82')) paddle.fluid.layers.sequence_softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', 'eaa9d0bbd3d4e017c8bc4ecdac483711')) paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name', 'axis'], varargs=None, keywords=None, defaults=(False, None, -1)), ('document', 'cee673c79e3ff4582656a24e04f841e5')) -paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', 'be7e530dcbd603962e25573a63eb145e')) -paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', '053b1a855f13a066d005759171724bc6')) +paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive', 'data_format'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True, 'NCHW')), ('document', '630cae697d46b4b575b15d56cf8be25a')) +paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive', 'data_format'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True, 'NCDHW')), ('document', 'db0035a3132b1dfb12e53c57591fb9f6')) paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '52343203de40afe29607397e13aaf0d2')) paddle.fluid.layers.adaptive_pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '55db6ae7275fb9678a6814aebab81a9c')) paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)), ('document', '9e5a9f4f6d82d34a33d9ca632379cbcc')) diff --git a/paddle/fluid/operators/math/pooling.cc b/paddle/fluid/operators/math/pooling.cc index 30873e9f87f..1e86c2e7a32 100644 --- a/paddle/fluid/operators/math/pooling.cc +++ b/paddle/fluid/operators/math/pooling.cc @@ -13,17 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/pooling.h" #include +#include #include +#include "paddle/fluid/operators/math/math_function.h" namespace paddle { namespace operators { namespace math { /* - * All tensors are in NCHW format. - * Ksize, strides, paddings are two elements. These two elements represent - * height and width, respectively. - */ +* Tensors are in NCHW or NHWC format. +* Ksize, strides are two elements. These two elements represent height +* and width, respectively. +* Paddings are four elements. These four elements represent height_up, +* height_down, width_left and width_right, respectively. +*/ template class Pool2dFunctor { public: @@ -92,12 +96,137 @@ class Pool2dFunctor { } } } + + void operator()(const platform::CPUDeviceContext& context, + const framework::Tensor& input, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, + const std::string data_format, PoolProcess pool_process, + bool exclusive, bool adaptive, framework::Tensor* output) { + bool channel_last = (data_format == "NHWC"); + + const int batch_size = input.dims()[0]; + const int input_channels = channel_last ? input.dims()[3] : input.dims()[1]; + const int input_height = channel_last ? input.dims()[1] : input.dims()[2]; + const int input_width = channel_last ? input.dims()[2] : input.dims()[3]; + + const int output_channels = + channel_last ? output->dims()[3] : output->dims()[1]; + const int output_height = + channel_last ? output->dims()[1] : output->dims()[2]; + const int output_width = + channel_last ? output->dims()[2] : output->dims()[3]; + + const int ksize_height = ksize[0]; + const int ksize_width = ksize[1]; + + const int stride_height = strides[0]; + const int stride_width = strides[1]; + + const int padding_height = paddings[0]; + const int padding_width = paddings[1]; + + const T* input_data = input.data(); + T* output_data = output->mutable_data(context.GetPlace()); + + int hstart, hend; + int wstart, wend; + if (!channel_last) { + const int input_stride = input_height * input_width; + const int output_stride = output_height * output_width; + for (int i = 0; i < batch_size; i++) { + for (int c = 0; c < output_channels; ++c) { + for (int ph = 0; ph < output_height; ++ph) { + if (adaptive) { + hstart = AdaptStartIndex(ph, input_height, output_height); + hend = AdaptEndIndex(ph, input_height, output_height); + } else { + hstart = ph * stride_height - padding_height; + hend = std::min(hstart + ksize_height, input_height); + hstart = std::max(hstart, 0); + } + for (int pw = 0; pw < output_width; ++pw) { + if (adaptive) { + wstart = AdaptStartIndex(pw, input_width, output_width); + wend = AdaptEndIndex(pw, input_width, output_width); + } else { + wstart = pw * stride_width - padding_width; + wend = std::min(wstart + ksize_width, input_width); + wstart = std::max(wstart, 0); + } + + T ele = pool_process.initial(); + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + pool_process.compute(input_data[h * input_width + w], &ele); + } + } + int pool_size = (exclusive || adaptive) + ? (hend - hstart) * (wend - wstart) + : ksize_height * ksize_width; + pool_process.finalize(static_cast(pool_size), &ele); + output_data[ph * output_width + pw] = ele; + } + } + input_data += input_stride; + output_data += output_stride; + } + } + } else { + const int input_stride = input_height * input_width * input_channels; + const int output_stride = output_height * output_width * output_channels; + for (int i = 0; i < batch_size; i++) { + for (int c = 0; c < output_channels; ++c) { + for (int ph = 0; ph < output_height; ++ph) { + if (adaptive) { + hstart = AdaptStartIndex(ph, input_height, output_height); + hend = AdaptEndIndex(ph, input_height, output_height); + } else { + hstart = ph * stride_height - padding_height; + hend = std::min(hstart + ksize_height, input_height); + hstart = std::max(hstart, 0); + } + for (int pw = 0; pw < output_width; ++pw) { + if (adaptive) { + wstart = AdaptStartIndex(pw, input_width, output_width); + wend = AdaptEndIndex(pw, input_width, output_width); + } else { + wstart = pw * stride_width - padding_width; + wend = std::min(wstart + ksize_width, input_width); + wstart = std::max(wstart, 0); + } + T ele = pool_process.initial(); + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + pool_process.compute( + input_data[h * input_width * input_channels + + w * input_channels + c], + &ele); + } + } + int pool_size = (exclusive || adaptive) + ? (hend - hstart) * (wend - wstart) + : ksize_height * ksize_width; + + pool_process.finalize(static_cast(pool_size), &ele); + output_data[ph * output_width * output_channels + + pw * output_channels + c] = ele; + } + } + } + input_data += input_stride; + output_data += output_stride; + } + } + } }; /* -* All tensors are in NCHW format. -* Ksize, strides, paddings are two elements. These two elements represent height +* tensors are in NCHW or NHWC format. +* Ksize, strides are two elements. These two elements represent height * and width, respectively. +* Paddings are four elements. These four elements represent height_up, +* height_down, width_left and width_right, respectively. */ template class Pool2dGradFunctor { @@ -173,13 +302,147 @@ class Pool2dGradFunctor { } } } + + void operator()( + const platform::CPUDeviceContext& context, const framework::Tensor& input, + const framework::Tensor& output, const framework::Tensor& output_grad, + const std::vector& ksize, const std::vector& strides, + const std::vector& paddings, const std::string data_format, + PoolProcess pool_grad_process, bool exclusive, bool adaptive, + framework::Tensor* input_grad) { + bool channel_last = (data_format == "NHWC"); + + const int batch_size = input.dims()[0]; + + const int input_channels = channel_last ? input.dims()[3] : input.dims()[1]; + const int input_height = channel_last ? input.dims()[1] : input.dims()[2]; + const int input_width = channel_last ? input.dims()[2] : input.dims()[3]; + + const int output_channels = + channel_last ? output.dims()[3] : output.dims()[1]; + const int output_height = + channel_last ? output.dims()[1] : output.dims()[2]; + const int output_width = channel_last ? output.dims()[2] : output.dims()[3]; + + const int ksize_height = ksize[0]; + const int ksize_width = ksize[1]; + + const int stride_height = strides[0]; + const int stride_width = strides[1]; + + const int padding_height = paddings[0]; + const int padding_width = paddings[1]; + + const T* input_data = input.data(); + const T* output_data = output.data(); + const T* output_grad_data = output_grad.data(); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + + int hstart, hend; + int wstart, wend; + if (!channel_last) { + const int input_stride = input_height * input_width; + const int output_stride = output_height * output_width; + for (int i = 0; i < batch_size; i++) { + for (int c = 0; c < output_channels; ++c) { + for (int ph = 0; ph < output_height; ++ph) { + if (adaptive) { + hstart = AdaptStartIndex(ph, input_height, output_height); + hend = AdaptEndIndex(ph, input_height, output_height); + } else { + hstart = ph * stride_height - padding_height; + hend = std::min(hstart + ksize_height, input_height); + hstart = std::max(hstart, 0); + } + for (int pw = 0; pw < output_width; ++pw) { + if (adaptive) { + wstart = AdaptStartIndex(pw, input_width, output_width); + wend = AdaptEndIndex(pw, input_width, output_width); + } else { + wstart = pw * stride_width - padding_width; + wend = std::min(wstart + ksize_width, input_width); + wstart = std::max(wstart, 0); + } + int pool_size = (exclusive || adaptive) + ? (hend - hstart) * (wend - wstart) + : ksize_height * ksize_width; + float scale = 1.0 / pool_size; + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + pool_grad_process.compute( + input_data[h * input_width + w], + output_data[ph * output_width + pw], + output_grad_data[ph * output_width + pw], + static_cast(scale), + input_grad_data + h * input_width + w); + } + } + } + } + input_data += input_stride; + output_data += output_stride; + input_grad_data += input_stride; + output_grad_data += output_stride; + } + } + } else { + const int input_stride = input_height * input_width * input_channels; + const int output_stride = output_height * output_width * output_channels; + for (int i = 0; i < batch_size; i++) { + for (int c = 0; c < output_channels; ++c) { + for (int ph = 0; ph < output_height; ++ph) { + if (adaptive) { + hstart = AdaptStartIndex(ph, input_height, output_height); + hend = AdaptEndIndex(ph, input_height, output_height); + } else { + hstart = ph * stride_height - padding_height; + hend = std::min(hstart + ksize_height, input_height); + hstart = std::max(hstart, 0); + } + for (int pw = 0; pw < output_width; ++pw) { + if (adaptive) { + wstart = AdaptStartIndex(pw, input_width, output_width); + wend = AdaptEndIndex(pw, input_width, output_width); + } else { + wstart = pw * stride_width - padding_width; + wend = std::min(wstart + ksize_width, input_width); + wstart = std::max(wstart, 0); + } + int pool_size = (exclusive || adaptive) + ? (hend - hstart) * (wend - wstart) + : ksize_height * ksize_width; + float scale = 1.0 / pool_size; + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + auto input_idx = + h * input_width * input_channels + w * input_channels + c; + auto output_idx = ph * output_width * output_channels + + pw * output_channels + c; + pool_grad_process.compute( + input_data[input_idx], output_data[output_idx], + output_grad_data[output_idx], static_cast(scale), + input_grad_data + input_idx); + } + } + } + } + } + input_data += input_stride; + output_data += output_stride; + input_grad_data += input_stride; + output_grad_data += output_stride; + } + } + } }; /* - * All tensors are in NCHW format. - * Ksize, strides, paddings are two elements. These two elements represent - * height and width, respectively. - */ +* Tensors are in NCHW or NHWC format. +* Ksize, strides are two elements. These two elements represent height +* and width, respectively. +* Paddings are four elements. These four elements represent height_up, +* height_down, width_left and width_right, respectively. +*/ template class MaxPool2dGradFunctor { public: @@ -239,8 +502,112 @@ class MaxPool2dGradFunctor { } } } -}; + void operator()( + const platform::CPUDeviceContext& context, const framework::Tensor& input, + const framework::Tensor& output, const framework::Tensor& output_grad, + const std::vector& ksize, const std::vector& strides, + const std::vector& paddings, const std::string data_format, + framework::Tensor* input_grad) { + bool channel_last = (data_format == "NHWC"); + + const int batch_size = input.dims()[0]; + + const int input_channels = channel_last ? input.dims()[3] : input.dims()[1]; + const int input_height = channel_last ? input.dims()[1] : input.dims()[2]; + const int input_width = channel_last ? input.dims()[2] : input.dims()[3]; + + const int output_channels = + channel_last ? output.dims()[3] : output.dims()[1]; + const int output_height = + channel_last ? output.dims()[1] : output.dims()[2]; + const int output_width = channel_last ? output.dims()[2] : output.dims()[3]; + + const int ksize_height = ksize[0]; + const int ksize_width = ksize[1]; + + const int stride_height = strides[0]; + const int stride_width = strides[1]; + + const int padding_height = paddings[0]; + const int padding_width = paddings[1]; + + const T* input_data = input.data(); + const T* output_data = output.data(); + const T* output_grad_data = output_grad.data(); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + + if (!channel_last) { + const int input_stride = input_height * input_width; + const int output_stride = output_height * output_width; + for (int i = 0; i < batch_size; i++) { + for (int c = 0; c < output_channels; ++c) { + for (int ph = 0; ph < output_height; ++ph) { + int hstart = ph * stride_height - padding_height; + int hend = std::min(hstart + ksize_height, input_height); + hstart = std::max(hstart, 0); + for (int pw = 0; pw < output_width; ++pw) { + int wstart = pw * stride_width - padding_width; + int wend = std::min(wstart + ksize_width, input_width); + wstart = std::max(wstart, 0); + + bool stop = false; + for (int h = hstart; h < hend && !stop; ++h) { + for (int w = wstart; w < wend && !stop; ++w) { + int input_idx = h * input_width + w; + int output_idx = ph * output_width + pw; + if (input_data[input_idx] == output_data[output_idx]) { + input_grad_data[input_idx] += output_grad_data[output_idx]; + stop = true; + } + } + } + } + } + input_data += input_stride; + output_data += output_stride; + input_grad_data += input_stride; + output_grad_data += output_stride; + } + } + } else { + const int input_stride = input_height * input_width * input_channels; + const int output_stride = output_height * output_width * output_channels; + for (int i = 0; i < batch_size; i++) { + for (int c = 0; c < output_channels; ++c) { + for (int ph = 0; ph < output_height; ++ph) { + int hstart = ph * stride_height - padding_height; + int hend = std::min(hstart + ksize_height, input_height); + hstart = std::max(hstart, 0); + for (int pw = 0; pw < output_width; ++pw) { + int wstart = pw * stride_width - padding_width; + int wend = std::min(wstart + ksize_width, input_width); + wstart = std::max(wstart, 0); + + bool stop = false; + for (int h = hstart; h < hend && !stop; ++h) { + for (int w = wstart; w < wend && !stop; ++w) { + int input_idx = + h * input_width * input_channels + w * input_channels + c; + int output_idx = ph * output_width * output_channels + + pw * output_channels + c; + if (input_data[input_idx] == output_data[output_idx]) { + input_grad_data[input_idx] += output_grad_data[output_idx]; + stop = true; + } + } + } + } + } + } + input_data += input_stride; + output_data += output_stride; + input_grad_data += input_stride; + output_grad_data += output_stride; + } + } + } +}; template class MaxPool2dGradFunctor; template class MaxPool2dGradFunctor; @@ -266,10 +633,13 @@ template class Pool2dGradFunctor; /* - * All tensors are in NCDHW format. - * Ksize, strides, paddings are three elements. These three elements represent - * depth, height and width, respectively. - */ +* Tensors are in NCDHW or NDHWC format. +* Ksize, strides, paddings are three elements. These three elements represent +* depth, height and width, respectively. +* Paddings are six elements. These six elements represent depth_forth, +* depth_back, +* height_up, height_down, width_left and width_right, respectively. +*/ template class Pool3dFunctor { public: @@ -359,13 +729,180 @@ class Pool3dFunctor { } } } + void operator()(const platform::CPUDeviceContext& context, + const framework::Tensor& input, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, + const std::string data_format, PoolProcess pool_process, + bool exclusive, bool adaptive, framework::Tensor* output) { + bool channel_last = (data_format == "NDHWC"); + const int batch_size = input.dims()[0]; + + const int input_channels = channel_last ? input.dims()[4] : input.dims()[1]; + const int input_depth = channel_last ? input.dims()[1] : input.dims()[2]; + const int input_height = channel_last ? input.dims()[2] : input.dims()[3]; + const int input_width = channel_last ? input.dims()[3] : input.dims()[4]; + + const int output_channels = + channel_last ? output->dims()[4] : output->dims()[1]; + const int output_depth = + channel_last ? output->dims()[1] : output->dims()[2]; + const int output_height = + channel_last ? output->dims()[2] : output->dims()[3]; + const int output_width = + channel_last ? output->dims()[3] : output->dims()[4]; + + const int ksize_depth = ksize[0]; + const int ksize_height = ksize[1]; + const int ksize_width = ksize[2]; + + const int stride_depth = strides[0]; + const int stride_height = strides[1]; + const int stride_width = strides[2]; + + const int padding_depth = paddings[0]; + const int padding_height = paddings[1]; + const int padding_width = paddings[2]; + + const T* input_data = input.data(); + T* output_data = output->mutable_data(context.GetPlace()); + + int dstart, dend; + int hstart, hend; + int wstart, wend; + + if (!channel_last) { + const int input_stride = input_depth * input_height * input_width; + const int output_stride = output_depth * output_height * output_width; + for (int i = 0; i < batch_size; i++) { + for (int c = 0; c < output_channels; ++c) { + for (int pd = 0; pd < output_depth; ++pd) { + if (adaptive) { + dstart = AdaptStartIndex(pd, input_depth, output_depth); + dend = AdaptEndIndex(pd, input_depth, output_depth); + } else { + dstart = pd * stride_depth - padding_depth; + dend = std::min(dstart + ksize_depth, input_depth); + dstart = std::max(dstart, 0); + } + for (int ph = 0; ph < output_height; ++ph) { + if (adaptive) { + hstart = AdaptStartIndex(ph, input_height, output_height); + hend = AdaptEndIndex(ph, input_height, output_height); + } else { + hstart = ph * stride_height - padding_height; + hend = std::min(hstart + ksize_height, input_height); + hstart = std::max(hstart, 0); + } + for (int pw = 0; pw < output_width; ++pw) { + if (adaptive) { + wstart = AdaptStartIndex(pw, input_width, output_width); + wend = AdaptEndIndex(pw, input_width, output_width); + } else { + wstart = pw * stride_width - padding_width; + wend = std::min(wstart + ksize_width, input_width); + wstart = std::max(wstart, 0); + } + int output_idx = (pd * output_height + ph) * output_width + pw; + T ele = pool_process.initial(); + for (int d = dstart; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + pool_process.compute( + input_data[(d * input_height + h) * input_width + w], + &ele); + } + } + } + int pool_size = + (exclusive || adaptive) + ? (dend - dstart) * (hend - hstart) * (wend - wstart) + : ksize_depth * ksize_height * ksize_width; + pool_process.finalize(static_cast(pool_size), &ele); + output_data[output_idx] = ele; + } + } + } + input_data += input_stride; + output_data += output_stride; + } + } + } else { + const int input_stride = + input_depth * input_height * input_width * input_channels; + const int output_stride = + output_depth * output_height * output_width * output_channels; + for (int i = 0; i < batch_size; i++) { + for (int c = 0; c < output_channels; ++c) { + for (int pd = 0; pd < output_depth; ++pd) { + if (adaptive) { + dstart = AdaptStartIndex(pd, input_depth, output_depth); + dend = AdaptEndIndex(pd, input_depth, output_depth); + } else { + dstart = pd * stride_depth - padding_depth; + dend = std::min(dstart + ksize_depth, input_depth); + dstart = std::max(dstart, 0); + } + for (int ph = 0; ph < output_height; ++ph) { + if (adaptive) { + hstart = AdaptStartIndex(ph, input_height, output_height); + hend = AdaptEndIndex(ph, input_height, output_height); + } else { + hstart = ph * stride_height - padding_height; + hend = std::min(hstart + ksize_height, input_height); + hstart = std::max(hstart, 0); + } + for (int pw = 0; pw < output_width; ++pw) { + if (adaptive) { + wstart = AdaptStartIndex(pw, input_width, output_width); + wend = AdaptEndIndex(pw, input_width, output_width); + } else { + wstart = pw * stride_width - padding_width; + wend = std::min(wstart + ksize_width, input_width); + wstart = std::max(wstart, 0); + } + + T ele = pool_process.initial(); + for (int d = dstart; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + int input_idx = + ((d * input_height + h) * input_width + w) * + input_channels + + c; + pool_process.compute(input_data[input_idx], &ele); + } + } + } + int pool_size = + (exclusive || adaptive) + ? (dend - dstart) * (hend - hstart) * (wend - wstart) + : ksize_depth * ksize_height * ksize_width; + pool_process.finalize(static_cast(pool_size), &ele); + int output_idx = + ((pd * output_height + ph) * output_width + pw) * + output_channels + + c; + output_data[output_idx] = ele; + } + } + } + } + input_data += input_stride; + output_data += output_stride; + } + } + } }; /* - * All tensors are in NCDHW format. - * Ksize, strides, paddings are three elements. These three elements represent - * depth, height and width, respectively. - */ +* Tensors are in NCDHW or NDHWC format. +* Ksize, strides, paddings are three elements. These three elements represent +* depth, height and width, respectively. +* Paddings are six elements. These six elements represent depth_forth, +* depth_back, +* height_up, height_down, width_left and width_right, respectively. +*/ template class Pool3dGradFunctor { public: @@ -461,13 +998,187 @@ class Pool3dGradFunctor { } } } + void operator()( + const platform::CPUDeviceContext& context, const framework::Tensor& input, + const framework::Tensor& output, const framework::Tensor& output_grad, + const std::vector& ksize, const std::vector& strides, + const std::vector& paddings, const std::string data_format, + PoolProcess pool_grad_process, bool exclusive, bool adaptive, + framework::Tensor* input_grad) { + bool channel_last = (data_format == "NDHWC"); + + const int batch_size = input.dims()[0]; + const int input_channels = channel_last ? input.dims()[4] : input.dims()[1]; + const int input_depth = channel_last ? input.dims()[1] : input.dims()[2]; + const int input_height = channel_last ? input.dims()[2] : input.dims()[3]; + const int input_width = channel_last ? input.dims()[3] : input.dims()[4]; + + const int output_channels = + channel_last ? output.dims()[4] : output.dims()[1]; + const int output_depth = channel_last ? output.dims()[1] : output.dims()[2]; + const int output_height = + channel_last ? output.dims()[2] : output.dims()[3]; + const int output_width = channel_last ? output.dims()[3] : output.dims()[4]; + + const int ksize_depth = ksize[0]; + const int ksize_height = ksize[1]; + const int ksize_width = ksize[2]; + + const int stride_depth = strides[0]; + const int stride_height = strides[1]; + const int stride_width = strides[2]; + + const int padding_depth = paddings[0]; + const int padding_height = paddings[1]; + const int padding_width = paddings[2]; + + const T* input_data = input.data(); + const T* output_data = output.data(); + const T* output_grad_data = output_grad.data(); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + + int dstart, dend; + int hstart, hend; + int wstart, wend; + if (!channel_last) { + const int input_stride = input_depth * input_height * input_width; + const int output_stride = output_depth * output_height * output_width; + for (int i = 0; i < batch_size; i++) { + for (int c = 0; c < output_channels; ++c) { + for (int pd = 0; pd < output_depth; ++pd) { + if (adaptive) { + dstart = AdaptStartIndex(pd, input_depth, output_depth); + dend = AdaptEndIndex(pd, input_depth, output_depth); + } else { + dstart = pd * stride_depth - padding_depth; + dend = std::min(dstart + ksize_depth, input_depth); + dstart = std::max(dstart, 0); + } + for (int ph = 0; ph < output_height; ++ph) { + if (adaptive) { + hstart = AdaptStartIndex(ph, input_height, output_height); + hend = AdaptEndIndex(ph, input_height, output_height); + } else { + hstart = ph * stride_height - padding_height; + hend = std::min(hstart + ksize_height, input_height); + hstart = std::max(hstart, 0); + } + for (int pw = 0; pw < output_width; ++pw) { + if (adaptive) { + wstart = AdaptStartIndex(pw, input_width, output_width); + wend = AdaptEndIndex(pw, input_width, output_width); + } else { + wstart = pw * stride_width - padding_width; + wend = std::min(wstart + ksize_width, input_width); + wstart = std::max(wstart, 0); + } + + int pool_size = + (exclusive || adaptive) + ? (dend - dstart) * (hend - hstart) * (wend - wstart) + : ksize_depth * ksize_height * ksize_width; + float scale = 1.0 / pool_size; + for (int d = dstart; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + int input_idx = (d * input_height + h) * input_width + w; + int output_idx = + (pd * output_height + ph) * output_width + pw; + pool_grad_process.compute( + input_data[input_idx], output_data[output_idx], + output_grad_data[output_idx], static_cast(scale), + input_grad_data + input_idx); + } + } + } + } + } + } + input_data += input_stride; + output_data += output_stride; + input_grad_data += input_stride; + output_grad_data += output_stride; + } + } + } else { + const int input_stride = + input_depth * input_height * input_width * input_channels; + const int output_stride = + output_depth * output_height * output_width * output_channels; + for (int i = 0; i < batch_size; i++) { + for (int c = 0; c < output_channels; ++c) { + for (int pd = 0; pd < output_depth; ++pd) { + if (adaptive) { + dstart = AdaptStartIndex(pd, input_depth, output_depth); + dend = AdaptEndIndex(pd, input_depth, output_depth); + } else { + dstart = pd * stride_depth - padding_depth; + dend = std::min(dstart + ksize_depth, input_depth); + dstart = std::max(dstart, 0); + } + for (int ph = 0; ph < output_height; ++ph) { + if (adaptive) { + hstart = AdaptStartIndex(ph, input_height, output_height); + hend = AdaptEndIndex(ph, input_height, output_height); + } else { + hstart = ph * stride_height - padding_height; + hend = std::min(hstart + ksize_height, input_height); + hstart = std::max(hstart, 0); + } + for (int pw = 0; pw < output_width; ++pw) { + if (adaptive) { + wstart = AdaptStartIndex(pw, input_width, output_width); + wend = AdaptEndIndex(pw, input_width, output_width); + } else { + wstart = pw * stride_width - padding_width; + wend = std::min(wstart + ksize_width, input_width); + wstart = std::max(wstart, 0); + } + + int pool_size = + (exclusive || adaptive) + ? (dend - dstart) * (hend - hstart) * (wend - wstart) + : ksize_depth * ksize_height * ksize_width; + float scale = 1.0 / pool_size; + for (int d = dstart; d < dend; ++d) { + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + int input_idx = + ((d * input_height + h) * input_width + w) * + input_channels + + c; + int output_idx = + ((pd * output_height + ph) * output_width + pw) * + output_channels + + c; + pool_grad_process.compute( + input_data[input_idx], output_data[output_idx], + output_grad_data[output_idx], static_cast(scale), + input_grad_data + input_idx); + } + } + } + } + } + } + } + input_data += input_stride; + output_data += output_stride; + input_grad_data += input_stride; + output_grad_data += output_stride; + } + } + } }; /* - * All tensors are in NCDHW format. - * Ksize, strides, paddings are three elements. These three elements represent - * depth, height and width, respectively. - */ +* Tensors are in NCDHW or NDHWC format. +* Ksize, strides, paddings are three elements. These three elements represent +* depth, height and width, respectively. +* Paddings are six elements. These six elements represent depth_forth, +* depth_back, +* height_up, height_down, width_left and width_right, respectively. +*/ template class MaxPool3dGradFunctor { public: @@ -541,8 +1252,139 @@ class MaxPool3dGradFunctor { } } } -}; + void operator()( + const platform::CPUDeviceContext& context, const framework::Tensor& input, + const framework::Tensor& output, const framework::Tensor& output_grad, + const std::vector& ksize, const std::vector& strides, + const std::vector& paddings, const std::string data_format, + framework::Tensor* input_grad) { + bool channel_last = (data_format == "NDHWC"); + const int batch_size = input.dims()[0]; + + const int input_channels = channel_last ? input.dims()[4] : input.dims()[1]; + const int input_depth = channel_last ? input.dims()[1] : input.dims()[2]; + const int input_height = channel_last ? input.dims()[2] : input.dims()[3]; + const int input_width = channel_last ? input.dims()[3] : input.dims()[4]; + + const int output_channels = + channel_last ? output.dims()[4] : output.dims()[1]; + const int output_depth = channel_last ? output.dims()[1] : output.dims()[2]; + const int output_height = + channel_last ? output.dims()[2] : output.dims()[3]; + const int output_width = channel_last ? output.dims()[3] : output.dims()[4]; + + const int ksize_depth = ksize[0]; + const int ksize_height = ksize[1]; + const int ksize_width = ksize[2]; + + const int stride_depth = strides[0]; + const int stride_height = strides[1]; + const int stride_width = strides[2]; + const int padding_depth = paddings[0]; + const int padding_height = paddings[1]; + const int padding_width = paddings[2]; + + const T* input_data = input.data(); + const T* output_data = output.data(); + const T* output_grad_data = output_grad.data(); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + + if (!channel_last) { + const int input_stride = input_depth * input_height * input_width; + const int output_stride = output_depth * output_height * output_width; + for (int i = 0; i < batch_size; i++) { + for (int c = 0; c < output_channels; ++c) { + for (int pd = 0; pd < output_depth; ++pd) { + int dstart = pd * stride_depth - padding_depth; + int dend = std::min(dstart + ksize_depth, input_depth); + dstart = std::max(dstart, 0); + for (int ph = 0; ph < output_height; ++ph) { + int hstart = ph * stride_height - padding_height; + int hend = std::min(hstart + ksize_height, input_height); + hstart = std::max(hstart, 0); + for (int pw = 0; pw < output_width; ++pw) { + int wstart = pw * stride_width - padding_width; + int wend = std::min(wstart + ksize_width, input_width); + wstart = std::max(wstart, 0); + bool stop = false; + for (int d = dstart; d < dend && !stop; ++d) { + for (int h = hstart; h < hend && !stop; ++h) { + for (int w = wstart; w < wend && !stop; ++w) { + int input_idx = (d * input_height + h) * input_width + w; + int output_idx = + (pd * output_height + ph) * output_width + pw; + + if (input_data[input_idx] == output_data[output_idx]) { + input_grad_data[input_idx] += + output_grad_data[output_idx]; + stop = true; + } + } + } + } + } + } + } + input_data += input_stride; + output_data += output_stride; + input_grad_data += input_stride; + output_grad_data += output_stride; + } + } + } else { + const int input_stride = + input_depth * input_height * input_width * input_channels; + const int output_stride = + output_depth * output_height * output_width * output_channels; + for (int i = 0; i < batch_size; i++) { + for (int c = 0; c < output_channels; ++c) { + for (int pd = 0; pd < output_depth; ++pd) { + int dstart = pd * stride_depth - padding_depth; + int dend = std::min(dstart + ksize_depth, input_depth); + dstart = std::max(dstart, 0); + for (int ph = 0; ph < output_height; ++ph) { + int hstart = ph * stride_height - padding_height; + int hend = std::min(hstart + ksize_height, input_height); + hstart = std::max(hstart, 0); + for (int pw = 0; pw < output_width; ++pw) { + int wstart = pw * stride_width - padding_width; + int wend = std::min(wstart + ksize_width, input_width); + wstart = std::max(wstart, 0); + bool stop = false; + + for (int d = dstart; d < dend && !stop; ++d) { + for (int h = hstart; h < hend && !stop; ++h) { + for (int w = wstart; w < wend && !stop; ++w) { + int input_idx = + ((d * input_height + h) * input_width + w) * + input_channels + + c; + int output_idx = + ((pd * output_height + ph) * output_width + pw) * + output_channels + + c; + + if (input_data[input_idx] == output_data[output_idx]) { + input_grad_data[input_idx] += + output_grad_data[output_idx]; + stop = true; + } + } + } + } + } + } + } + } + input_data += input_stride; + output_data += output_stride; + input_grad_data += input_stride; + output_grad_data += output_stride; + } + } + } +}; template class MaxPool3dGradFunctor; template class MaxPool3dGradFunctor; diff --git a/paddle/fluid/operators/math/pooling.cu b/paddle/fluid/operators/math/pooling.cu index efce3f899a4..29c0a85d402 100644 --- a/paddle/fluid/operators/math/pooling.cu +++ b/paddle/fluid/operators/math/pooling.cu @@ -29,13 +29,22 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, const int ksize_width, const int stride_height, const int stride_width, const int padding_height, const int padding_width, PoolProcess pool_process, - bool exclusive, bool adaptive, T* output_data) { + bool exclusive, bool adaptive, T* output_data, + bool channel_last = false) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { - int pw = index % output_width; - int ph = (index / output_width) % output_height; - int c = (index / output_width / output_height) % channels; - int batch_idx = index / output_width / output_height / channels; + int pw, ph, c, batch_idx; + if (!channel_last) { /*NCHW*/ + pw = index % output_width; + ph = (index / output_width) % output_height; + c = (index / output_width / output_height) % channels; + batch_idx = index / output_width / output_height / channels; + } else { /*NHWC*/ + c = index % channels; + pw = (index / channels) % output_width; + ph = (index / channels / output_width) % output_height; + batch_idx = index / channels / output_width / output_height; + } int hstart, hend; int wstart, wend; @@ -55,11 +64,17 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, wstart = max(wstart, 0); } - input_data += (batch_idx * channels + c) * input_height * input_width; + if (!channel_last) { + input_data += (batch_idx * channels + c) * input_height * input_width; + } else { + input_data += batch_idx * input_height * input_width * channels; + } T ele = pool_process.initial(); for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { - pool_process.compute(input_data[h * input_width + w], &ele); + auto input_idx = channel_last ? (h * input_width + w) * channels + c + : h * input_width + w; + pool_process.compute(input_data[input_idx], &ele); } } int pool_size = (exclusive || adaptive) ? (hend - hstart) * (wend - wstart) @@ -68,7 +83,6 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, output_data[index] = ele; } } - template __global__ void KernelPool2DGrad( const int nthreads, const T* input_data, const T* output_data, @@ -76,13 +90,23 @@ __global__ void KernelPool2DGrad( const int input_width, const int output_height, const int output_width, const int ksize_height, const int ksize_width, const int stride_height, const int stride_width, const int padding_height, const int padding_width, - PoolProcess pool_process, bool exclusive, bool adaptive, T* input_grad) { + PoolProcess pool_process, bool exclusive, bool adaptive, T* input_grad, + bool channel_last = false) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { - int w_offset = index % input_width + padding_width; - int h_offset = (index / input_width) % input_height + padding_height; - int offsetC = (index / input_width / input_height) % channels; - int batch_idx = index / input_width / input_height / channels; + int w_offset, h_offset, offsetC, batch_idx; + if (!channel_last) { /* NCHW */ + w_offset = index % input_width + padding_width; + h_offset = (index / input_width) % input_height + padding_height; + offsetC = (index / input_width / input_height) % channels; + batch_idx = index / input_width / input_height / channels; + } else { /* NHWC */ + offsetC = index % channels; + w_offset = (index / channels) % input_width + padding_width; + h_offset = + (index / channels / input_width) % input_height + padding_height; + batch_idx = index / channels / input_width / input_height; + } int phstart, phend; int pwstart, pwend; @@ -105,10 +129,18 @@ __global__ void KernelPool2DGrad( } T gradient = 0; T input = input_data[index]; - int output_idx = - (batch_idx * channels + offsetC) * output_height * output_width; - output_data += output_idx; - output_grad += output_idx; + + int output_stride; + if (!channel_last) { + output_stride = + (batch_idx * channels + offsetC) * output_height * output_width; + } else { + output_stride = batch_idx * output_height * output_width * channels; + } + + output_data += output_stride; + output_grad += output_stride; + for (int ph = phstart; ph < phend; ++ph) { for (int pw = pwstart; pw < pwend; ++pw) { int pool_size; @@ -127,7 +159,9 @@ __global__ void KernelPool2DGrad( pool_size = exclusive ? (hend - hstart) * (wend - wstart) : ksize_height * ksize_width; } - int output_sub_idx = ph * output_width + pw; + int output_sub_idx = channel_last + ? (ph * output_width + pw) * channels + offsetC + : ph * output_width + pw; pool_process.compute(input, output_data[output_sub_idx], output_grad[output_sub_idx], static_cast(1.0 / pool_size), &gradient); @@ -144,14 +178,21 @@ __global__ void KernelMaxPool2DGrad( const int input_width, const int output_height, const int output_width, const int ksize_height, const int ksize_width, const int stride_height, const int stride_width, const int padding_height, const int padding_width, - T* input_grad) { + T* input_grad, bool channel_last = false) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { - int pw = index % output_width; - int ph = (index / output_width) % output_height; - int c = (index / output_width / output_height) % channels; - int batch_idx = index / output_width / output_height / channels; - + int pw, ph, c, batch_idx; + if (!channel_last) { /* NCHW */ + pw = index % output_width; + ph = (index / output_width) % output_height; + c = (index / output_width / output_height) % channels; + batch_idx = index / output_width / output_height / channels; + } else { /* NHWC */ + c = index % channels; + pw = (index / channels) % output_width; + ph = (index / channels / output_width) % output_height; + batch_idx = index / channels / output_width / output_height; + } int hstart = ph * stride_height - padding_height; int hend = min(hstart + ksize_height, input_height); hstart = max(hstart, 0); @@ -160,16 +201,24 @@ __global__ void KernelMaxPool2DGrad( int wend = min(wstart + ksize_width, input_width); wstart = max(wstart, 0); - input_data += (batch_idx * channels + c) * input_height * input_width; - input_grad += (batch_idx * channels + c) * input_height * input_width; + int input_stride; + if (!channel_last) { + input_stride = (batch_idx * channels + c) * input_height * input_width; + } else { + input_stride = batch_idx * input_height * input_width * channels; + } + input_data += input_stride; + input_grad += input_stride; T ele = output_data[index]; int maxIndex = -1; bool stop = false; for (int h = hstart; h < hend && !stop; ++h) { for (int w = wstart; w < wend && !stop; ++w) { - if (ele == input_data[h * input_width + w]) { - maxIndex = h * input_width + w; + int input_data_idx = channel_last ? (h * input_width + w) * channels + c + : h * input_width + w; + if (ele == input_data[input_data_idx]) { + maxIndex = input_data_idx; stop = true; } } @@ -214,10 +263,12 @@ void Pool2dDirectCUDAFunctor::operator()( } /* - * All tensors are in NCHW format. - * Ksize, strides, paddings are two elements. These two elements represent - * height and width, respectively. - */ +* Tensors are in NCHW or NHWC format. +* Ksize, strides are two elements. These two elements represent height +* and width, respectively. +* Paddings are four elements. These four elements represent height_up, +* height_down, width_left and width_right, respectively. +*/ template class Pool2dFunctor { public: @@ -254,13 +305,57 @@ class Pool2dFunctor { stride_width, padding_height, padding_width, pool_process, exclusive, adaptive, output_data); } -}; + void operator()(const platform::CUDADeviceContext& context, + const framework::Tensor& input, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, + const std::string data_format, PoolProcess pool_process, + bool exclusive, bool adaptive, framework::Tensor* output) { + bool channel_last = (data_format == "NHWC"); + const int batch_size = input.dims()[0]; + + const int input_channels = channel_last ? input.dims()[3] : input.dims()[1]; + const int input_height = channel_last ? input.dims()[1] : input.dims()[2]; + const int input_width = channel_last ? input.dims()[2] : input.dims()[3]; + + const int output_channels = + channel_last ? output->dims()[3] : output->dims()[1]; + const int output_height = + channel_last ? output->dims()[1] : output->dims()[2]; + const int output_width = + channel_last ? output->dims()[2] : output->dims()[3]; + + const int ksize_height = ksize[0]; + const int ksize_width = ksize[1]; + + const int stride_height = strides[0]; + const int stride_width = strides[1]; + + const int padding_height = paddings[0]; + const int padding_width = paddings[1]; + + const T* input_data = input.data(); + T* output_data = output->mutable_data(context.GetPlace()); + int nthreads = batch_size * output_channels * output_height * output_width; + int blocks = (nthreads + 1024 - 1) / 1024; + dim3 threads(1024, 1); + dim3 grid(blocks, 1); + + KernelPool2D<<>>( + nthreads, input_data, input_channels, input_height, input_width, + output_height, output_width, ksize_height, ksize_width, stride_height, + stride_width, padding_height, padding_width, pool_process, exclusive, + adaptive, output_data, channel_last); + } +}; /* - * All tensors are in NCHW format. - * Ksize, strides, paddings are two elements. These two elements represent - * height and width, respectively. - */ +* Tensors are in NCHW or NHWC format. +* Ksize, strides are two elements. These two elements represent height +* and width, respectively. +* Paddings are four elements. These four elements represent height_up, +* height_down, width_left and width_right, respectively. +*/ template class Pool2dGradFunctor { public: @@ -302,13 +397,62 @@ class Pool2dGradFunctor { ksize_width, stride_height, stride_width, padding_height, padding_width, pool_process, exclusive, adaptive, input_grad_data); } + void operator()( + const platform::CUDADeviceContext& context, + const framework::Tensor& input, const framework::Tensor& output, + const framework::Tensor& output_grad, const std::vector& ksize, + const std::vector& strides, const std::vector& paddings, + const std::string data_format, PoolProcess pool_process, bool exclusive, + bool adaptive, framework::Tensor* input_grad) { + bool channel_last = (data_format == "NHWC"); + + const int batch_size = input.dims()[0]; + + const int input_channels = channel_last ? input.dims()[3] : input.dims()[1]; + const int input_height = channel_last ? input.dims()[1] : input.dims()[2]; + const int input_width = channel_last ? input.dims()[2] : input.dims()[3]; + + const int output_channels = + channel_last ? output.dims()[3] : output.dims()[1]; + const int output_height = + channel_last ? output.dims()[1] : output.dims()[2]; + const int output_width = channel_last ? output.dims()[2] : output.dims()[3]; + + const int ksize_height = ksize[0]; + const int ksize_width = ksize[1]; + + const int stride_height = strides[0]; + const int stride_width = strides[1]; + + const int padding_height = paddings[0]; + const int padding_width = paddings[1]; + + const T* input_data = input.data(); + const T* output_data = output.data(); + const T* output_grad_data = output_grad.data(); + + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + + int nthreads = batch_size * input_channels * input_height * input_width; + int blocks = (nthreads + 1024 - 1) / 1024; + dim3 threads(1024, 1); + dim3 grid(blocks, 1); + + KernelPool2DGrad<<>>( + nthreads, input_data, output_data, output_grad_data, input_channels, + input_height, input_width, output_height, output_width, ksize_height, + ksize_width, stride_height, stride_width, padding_height, padding_width, + pool_process, exclusive, adaptive, input_grad_data, channel_last); + } }; /* - * All tensors are in NCHW format. - * Ksize, strides, paddings are two elements. These two elements represent - * height and width, respectively. - */ +* Tensors are in NCHW or NHWC format. +* Ksize, strides are two elements. These two elements represent height +* and width, respectively. +* Paddings are four elements. These four elements represent height_up, +* height_down, width_left and width_right, respectively. +*/ template class MaxPool2dGradFunctor { public: @@ -350,6 +494,51 @@ class MaxPool2dGradFunctor { ksize_width, stride_height, stride_width, padding_height, padding_width, input_grad_data); } + void operator()( + const platform::CUDADeviceContext& context, + const framework::Tensor& input, const framework::Tensor& output, + const framework::Tensor& output_grad, const std::vector& ksize, + const std::vector& strides, const std::vector& paddings, + const std::string data_format, framework::Tensor* input_grad) { + bool channel_last = (data_format == "NHWC"); + + const int batch_size = input.dims()[0]; + + const int input_channels = channel_last ? input.dims()[3] : input.dims()[1]; + const int input_height = channel_last ? input.dims()[1] : input.dims()[2]; + const int input_width = channel_last ? input.dims()[2] : input.dims()[3]; + + const int output_channels = + channel_last ? output.dims()[3] : output.dims()[1]; + const int output_height = + channel_last ? output.dims()[1] : output.dims()[2]; + const int output_width = channel_last ? output.dims()[2] : output.dims()[3]; + + const int ksize_height = ksize[0]; + const int ksize_width = ksize[1]; + + const int stride_height = strides[0]; + const int stride_width = strides[1]; + + const int padding_height = paddings[0]; + const int padding_width = paddings[1]; + + const T* input_data = input.data(); + const T* output_data = output.data(); + const T* output_grad_data = output_grad.data(); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + + int nthreads = batch_size * output_channels * output_height * output_width; + int blocks = (nthreads + 1024 - 1) / 1024; + dim3 threads(1024, 1); + dim3 grid(blocks, 1); + + KernelMaxPool2DGrad<<>>( + nthreads, input_data, output_data, output_grad_data, input_channels, + input_height, input_width, output_height, output_width, ksize_height, + ksize_width, stride_height, stride_width, padding_height, padding_width, + input_grad_data, channel_last); + } }; template class Pool2dDirectCUDAFunctor, @@ -389,15 +578,26 @@ __global__ void KernelPool3D( const int ksize_depth, const int ksize_height, const int ksize_width, const int stride_depth, const int stride_height, const int stride_width, const int padding_depth, const int padding_height, const int padding_width, - PoolProcess pool_process, bool exclusive, bool adaptive, T* output_data) { + PoolProcess pool_process, bool exclusive, bool adaptive, T* output_data, + bool channel_last = false) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { - int pw = index % output_width; - int ph = (index / output_width) % output_height; - int pd = (index / output_width / output_height) % output_depth; - int c = (index / output_width / output_height / output_depth) % channels; - int batch_idx = - index / output_width / output_height / output_depth / channels; + int pw, ph, pd, c, batch_idx; + if (!channel_last) { + pw = index % output_width; + ph = (index / output_width) % output_height; + pd = (index / output_width / output_height) % output_depth; + c = (index / output_width / output_height / output_depth) % channels; + batch_idx = + index / output_width / output_height / output_depth / channels; + } else { + c = index % channels; + pw = (index / channels) % output_width; + ph = (index / channels / output_width) % output_height; + pd = (index / channels / output_width / output_height) % output_depth; + batch_idx = + index / channels / output_width / output_height / output_depth; + } int dstart, dend; int hstart, hend; @@ -422,14 +622,26 @@ __global__ void KernelPool3D( hstart = max(hstart, 0); wstart = max(wstart, 0); } + + int input_data_stride; + if (!channel_last) { /* NCDHW */ + input_data_stride = + (batch_idx * channels + c) * input_depth * input_height * input_width; + } else { /* NDHWC */ + input_data_stride = + batch_idx * input_depth * input_height * input_width * channels; + } + input_data += input_data_stride; + T ele = pool_process.initial(); - input_data += - (batch_idx * channels + c) * input_depth * input_height * input_width; for (int d = dstart; d < dend; ++d) { for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { - pool_process.compute( - input_data[(d * input_height + h) * input_width + w], &ele); + auto input_data_idx = + channel_last + ? ((d * input_height + h) * input_width + w) * channels + c + : (d * input_height + h) * input_width + w; + pool_process.compute(input_data[input_data_idx], &ele); } } } @@ -450,15 +662,27 @@ __global__ void KernelPool3DGrad( const int ksize_height, const int ksize_width, const int stride_depth, const int stride_height, const int stride_width, const int padding_depth, const int padding_height, const int padding_width, PoolProcess pool_process, - bool exclusive, bool adaptive, T* input_grad) { + bool exclusive, bool adaptive, T* input_grad, bool channel_last = false) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { - int w_offset = index % input_width + padding_width; - int h_offset = (index / input_width) % input_height + padding_height; - int d_offset = - (index / input_width / input_height) % input_depth + padding_depth; - int offsetC = (index / input_width / input_height / input_depth) % channels; - int batch_idx = index / input_width / input_height / input_depth / channels; + int w_offset, h_offset, d_offset, offsetC, batch_idx; + if (!channel_last) { /* "NCDHW" */ + w_offset = index % input_width + padding_width; + h_offset = (index / input_width) % input_height + padding_height; + d_offset = + (index / input_width / input_height) % input_depth + padding_depth; + offsetC = (index / input_width / input_height / input_depth) % channels; + batch_idx = index / input_width / input_height / input_depth / channels; + + } else { /* "NDHWC" */ + offsetC = index % channels; + w_offset = (index / channels) % input_width + padding_width; + h_offset = + (index / channels / input_width) % input_height + padding_height; + d_offset = (index / channels / input_width / input_height) % input_depth + + padding_depth; + batch_idx = index / channels / input_width / input_height / input_depth; + } int pdstart, pdend; int phstart, phend; @@ -490,10 +714,17 @@ __global__ void KernelPool3DGrad( T gradient = 0; T input = input_data[index]; - int output_idx = (batch_idx * channels + offsetC) * output_depth * - output_height * output_width; - output_data += output_idx; - output_grad += output_idx; + + int output_stride; + if (!channel_last) { + output_stride = (batch_idx * channels + offsetC) * output_depth * + output_height * output_width; + } else { + output_stride = + batch_idx * output_depth * output_height * output_width * channels; + } + output_data += output_stride; + output_grad += output_stride; for (int pd = pdstart; pd < pdend; ++pd) { for (int ph = phstart; ph < phend; ++ph) { @@ -522,7 +753,13 @@ __global__ void KernelPool3DGrad( exclusive ? (dend - dstart) * (hend - hstart) * (wend - wstart) : ksize_depth * ksize_height * ksize_width; } - int output_sub_idx = (pd * output_height + ph) * output_width + pw; + + int output_sub_idx = + channel_last + ? ((pd * output_height + ph) * output_width + pw) * channels + + offsetC + : (pd * output_height + ph) * output_width + pw; + pool_process.compute(input, output_data[output_sub_idx], output_grad[output_sub_idx], static_cast(1.0 / pool_size), &gradient); @@ -541,38 +778,64 @@ __global__ void KernelMaxPool3DGrad( const int output_height, const int output_width, const int ksize_depth, const int ksize_height, const int ksize_width, const int stride_depth, const int stride_height, const int stride_width, const int padding_depth, - const int padding_height, const int padding_width, T* input_grad) { + const int padding_height, const int padding_width, T* input_grad, + bool channel_last = false) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { - int pw = index % output_width; - int ph = (index / output_width) % output_height; - int pd = (index / output_width / output_height) % output_depth; - int c = (index / output_width / output_height / output_depth) % channels; - int batch_idx = - index / output_width / output_height / output_depth / channels; + int pw, ph, pd, c, batch_idx; + + if (!channel_last) { /*NCDHW*/ + pw = index % output_width; + ph = (index / output_width) % output_height; + pd = (index / output_width / output_height) % output_depth; + c = (index / output_width / output_height / output_depth) % channels; + batch_idx = + index / output_width / output_height / output_depth / channels; + } else { /*NDHWC*/ + c = index % channels; + pw = (index / channels) % output_width; + ph = (index / channels / output_width) % output_height; + pd = (index / channels / output_width / output_height) % output_depth; + batch_idx = + index / channels / output_width / output_height / output_depth; + } + int dstart = pd * stride_depth - padding_depth; int hstart = ph * stride_height - padding_height; int wstart = pw * stride_width - padding_width; + int dend = min(dstart + ksize_depth, input_depth); int hend = min(hstart + ksize_height, input_height); int wend = min(wstart + ksize_width, input_width); + dstart = max(dstart, 0); hstart = max(hstart, 0); wstart = max(wstart, 0); + T ele = output_data[index]; bool stop = false; int maxIdx = -1; - input_data += - (batch_idx * channels + c) * input_depth * input_height * input_width; - input_grad += - (batch_idx * channels + c) * input_depth * input_height * input_width; + int input_stride; + if (!channel_last) { + input_stride = + (batch_idx * channels + c) * input_depth * input_height * input_width; + } else { + input_stride = + batch_idx * input_depth * input_height * input_width * channels; + } + input_data += input_stride; + input_grad += input_stride; for (int d = dstart; d < dend && !stop; ++d) { for (int h = hstart; h < hend && !stop; ++h) { for (int w = wstart; w < wend && !stop; ++w) { - if (ele == input_data[(d * input_height + h) * input_width + w]) { + int input_data_idx = + channel_last + ? ((d * input_height + h) * input_width + w) * channels + c + : (d * input_height + h) * input_width + w; + if (ele == input_data[input_data_idx]) { stop = true; - maxIdx = (d * input_height + h) * input_width + w; + maxIdx = input_data_idx; } } } @@ -585,10 +848,13 @@ __global__ void KernelMaxPool3DGrad( } /* - * All tensors are in NCDHW format. - * Ksize, strides, paddings are three elements. These three elements represent - * depth, height and width, respectively. - */ +* Tensors are in NCDHW or NDHWC format. +* Ksize, strides, paddings are three elements. These three elements represent +* depth, height and width, respectively. +* Paddings are six elements. These six elements represent depth_forth, +* depth_back, +* height_up, height_down, width_left and width_right, respectively. +*/ template class Pool3dFunctor { public: @@ -632,13 +898,67 @@ class Pool3dFunctor { padding_depth, padding_height, padding_width, pool_process, exclusive, adaptive, output_data); } + void operator()(const platform::CUDADeviceContext& context, + const framework::Tensor& input, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, + const std::string data_format, PoolProcess pool_process, + bool exclusive, bool adaptive, framework::Tensor* output) { + bool channel_last = (data_format == "NDHWC"); + const int batch_size = input.dims()[0]; + + const int input_channels = channel_last ? input.dims()[4] : input.dims()[1]; + const int input_depth = channel_last ? input.dims()[1] : input.dims()[2]; + const int input_height = channel_last ? input.dims()[2] : input.dims()[3]; + const int input_width = channel_last ? input.dims()[3] : input.dims()[4]; + + const int output_channels = + channel_last ? output->dims()[4] : output->dims()[1]; + const int output_depth = + channel_last ? output->dims()[1] : output->dims()[2]; + const int output_height = + channel_last ? output->dims()[2] : output->dims()[3]; + const int output_width = + channel_last ? output->dims()[3] : output->dims()[4]; + + const int ksize_depth = ksize[0]; + const int ksize_height = ksize[1]; + const int ksize_width = ksize[2]; + + const int stride_depth = strides[0]; + const int stride_height = strides[1]; + const int stride_width = strides[2]; + + const int padding_depth = paddings[0]; + const int padding_height = paddings[1]; + const int padding_width = paddings[2]; + + const T* input_data = input.data(); + T* output_data = output->mutable_data(context.GetPlace()); + + int nthreads = batch_size * output_channels * output_depth * output_height * + output_width; + int blocks = (nthreads + 1024 - 1) / 1024; + dim3 threads(1024, 1); + dim3 grid(blocks, 1); + + KernelPool3D<<>>( + nthreads, input_data, input_channels, input_depth, input_height, + input_width, output_depth, output_height, output_width, ksize_depth, + ksize_height, ksize_width, stride_depth, stride_height, stride_width, + padding_depth, padding_height, padding_width, pool_process, exclusive, + adaptive, output_data, channel_last); + } }; /* - * All tensors are in NCDHW format. - * Ksize, strides, paddings are three elements. These three elements represent - * depth, height and width, respectively. - */ +* Tensors are in NCDHW or NDHWC format. +* Ksize, strides, paddings are three elements. These three elements represent +* depth, height and width, respectively. +* Paddings are six elements. These six elements represent depth_forth, +* depth_back, +* height_up, height_down, width_left and width_right, respectively. +*/ template class Pool3dGradFunctor { public: @@ -688,13 +1008,69 @@ class Pool3dGradFunctor { stride_height, stride_width, padding_depth, padding_height, padding_width, pool_process, exclusive, adaptive, input_grad_data); } + void operator()( + const platform::CUDADeviceContext& context, + const framework::Tensor& input, const framework::Tensor& output, + const framework::Tensor& output_grad, const std::vector& ksize, + const std::vector& strides, const std::vector& paddings, + const std::string data_format, PoolProcess pool_process, bool exclusive, + bool adaptive, framework::Tensor* input_grad) { + bool channel_last = (data_format == "NDHWC"); + + const int batch_size = input.dims()[0]; + const int input_channels = channel_last ? input.dims()[4] : input.dims()[1]; + const int input_depth = channel_last ? input.dims()[1] : input.dims()[2]; + const int input_height = channel_last ? input.dims()[2] : input.dims()[3]; + const int input_width = channel_last ? input.dims()[3] : input.dims()[4]; + + const int output_channels = + channel_last ? output.dims()[4] : output.dims()[1]; + const int output_depth = channel_last ? output.dims()[1] : output.dims()[2]; + const int output_height = + channel_last ? output.dims()[2] : output.dims()[3]; + const int output_width = channel_last ? output.dims()[3] : output.dims()[4]; + + const int ksize_depth = ksize[0]; + const int ksize_height = ksize[1]; + const int ksize_width = ksize[2]; + + const int stride_depth = strides[0]; + const int stride_height = strides[1]; + const int stride_width = strides[2]; + + const int padding_depth = paddings[0]; + const int padding_height = paddings[1]; + const int padding_width = paddings[2]; + + const T* input_data = input.data(); + const T* output_data = output.data(); + const T* output_grad_data = output_grad.data(); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + + int nthreads = + batch_size * input_channels * input_depth * input_height * input_width; + int blocks = (nthreads + 1024 - 1) / 1024; + dim3 threads(1024, 1); + dim3 grid(blocks, 1); + + KernelPool3DGrad<<>>( + nthreads, input_data, output_data, output_grad_data, input_channels, + input_depth, input_height, input_width, output_depth, output_height, + output_width, ksize_depth, ksize_height, ksize_width, stride_depth, + stride_height, stride_width, padding_depth, padding_height, + padding_width, pool_process, exclusive, adaptive, input_grad_data, + channel_last); // add channel_last + } }; /* - * All tensors are in NCDHW format. - * Ksize, strides, paddings are three elements. These three elements represent - * depth, height and width, respectively. - */ +* tensors are in NCDHW or NDHWC format. +* Ksize, strides, paddings are three elements. These three elements represent +* depth, height and width, respectively. +* Paddings are six elements. These six elements represent depth_forth, +* depth_back, +* height_up, height_down, width_left and width_right, respectively. +*/ template class MaxPool3dGradFunctor { public: @@ -743,6 +1119,57 @@ class MaxPool3dGradFunctor { stride_height, stride_width, padding_depth, padding_height, padding_width, input_grad_data); } + void operator()( + const platform::CUDADeviceContext& context, + const framework::Tensor& input, const framework::Tensor& output, + const framework::Tensor& output_grad, const std::vector& ksize, + const std::vector& strides, const std::vector& paddings, + const std::string data_format, framework::Tensor* input_grad) { + bool channel_last = (data_format == "NDHWC"); + const int batch_size = input.dims()[0]; + + const int input_channels = channel_last ? input.dims()[4] : input.dims()[1]; + const int input_depth = channel_last ? input.dims()[1] : input.dims()[2]; + const int input_height = channel_last ? input.dims()[2] : input.dims()[3]; + const int input_width = channel_last ? input.dims()[3] : input.dims()[4]; + + const int output_channels = + channel_last ? output.dims()[4] : output.dims()[1]; + const int output_depth = channel_last ? output.dims()[1] : output.dims()[2]; + const int output_height = + channel_last ? output.dims()[2] : output.dims()[3]; + const int output_width = channel_last ? output.dims()[3] : output.dims()[4]; + + const int ksize_depth = ksize[0]; + const int ksize_height = ksize[1]; + const int ksize_width = ksize[2]; + + const int stride_depth = strides[0]; + const int stride_height = strides[1]; + const int stride_width = strides[2]; + + const int padding_depth = paddings[0]; + const int padding_height = paddings[1]; + const int padding_width = paddings[2]; + + const T* input_data = input.data(); + const T* output_data = output.data(); + const T* output_grad_data = output_grad.data(); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + + int nthreads = batch_size * output_channels * output_depth * output_height * + output_width; + int blocks = (nthreads + 1024 - 1) / 1024; + dim3 threads(1024, 1); + dim3 grid(blocks, 1); + + KernelMaxPool3DGrad<<>>( + nthreads, input_data, output_data, output_grad_data, input_channels, + input_depth, input_height, input_width, output_depth, output_height, + output_width, ksize_depth, ksize_height, ksize_width, stride_depth, + stride_height, stride_width, padding_depth, padding_height, + padding_width, input_grad_data, channel_last); // add channel_last + } }; template class MaxPool3dGradFunctor; diff --git a/paddle/fluid/operators/math/pooling.h b/paddle/fluid/operators/math/pooling.h index e1f8e6df1d1..548612e8de2 100644 --- a/paddle/fluid/operators/math/pooling.h +++ b/paddle/fluid/operators/math/pooling.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/tensor.h" @@ -83,10 +84,11 @@ HOSTDEVICE inline int AdaptEndIndex(int ph, int input_size, int output_size) { /* * \brief Getting pooling results, and calculating gradient. * - * In pool2d, all tensors are in NCHW format. Where N is batch size, C is the - * number of channels, H and W is the height and width of feature. - * In pool3d, all tensors are in NCDHW format. Where N is batch size, C is the - * number of channels, D, H and W is the depth, height and width of feature. + * In pool2d, all Tensors are in NCHW or NHWC format. Where N is batch size, C + * is the number of channels, H and W is the height and width of feature. + * In pool3d, all Tensors are in NCDHW or NDHWC format. Where N is batch size, C + * is the number of channels, D, H and W is the depth, height and width of + * feature. * * In max pooling, it is possible that the pooling region has multiple maximum * elements. In this case, we should compute the gradient of the first maximum @@ -115,6 +117,14 @@ class Pool2dFunctor { const std::vector& strides, const std::vector& paddings, PoolProcess pool_compute, bool exclusive, bool adaptive, framework::Tensor* output); + + // overload operator() to support argument data_format + void operator()(const DeviceContext& context, const framework::Tensor& input, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, + const std::string data_format, PoolProcess pool_compute, + bool exclusive, bool adaptive, framework::Tensor* output); }; template @@ -127,6 +137,15 @@ class Pool2dGradFunctor { const std::vector& strides, const std::vector& paddings, PoolProcess pool_compute, bool exclusive, bool adaptive, framework::Tensor* input_grad); + // overload operator() to support argument data_format + void operator()(const DeviceContext& context, const framework::Tensor& input, + const framework::Tensor& output, + const framework::Tensor& output_grad, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, + const std::string data_format, PoolProcess pool_compute, + bool exclusive, bool adaptive, framework::Tensor* input_grad); }; template @@ -139,6 +158,14 @@ class MaxPool2dGradFunctor { const std::vector& strides, const std::vector& paddings, framework::Tensor* input_grad); + // overload operator() to support argument data_format + void operator()(const DeviceContext& context, const framework::Tensor& input, + const framework::Tensor& output, + const framework::Tensor& output_grad, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, + const std::string data_format, framework::Tensor* input_grad); }; template @@ -149,6 +176,13 @@ class Pool3dFunctor { const std::vector& strides, const std::vector& paddings, PoolProcess pool_compute, bool exclusive, bool adaptive, framework::Tensor* output); + // overload operator() to support argument data_format + void operator()(const DeviceContext& context, const framework::Tensor& input, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, + const std::string data_format, PoolProcess pool_compute, + bool exclusive, bool adaptive, framework::Tensor* output); }; template @@ -161,6 +195,15 @@ class Pool3dGradFunctor { const std::vector& strides, const std::vector& paddings, PoolProcess pool_compute, bool exclusive, bool adaptive, framework::Tensor* input_grad); + // overload operator() to support argument data_format + void operator()(const DeviceContext& context, const framework::Tensor& input, + const framework::Tensor& output, + const framework::Tensor& output_grad, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, + const std::string data_format, PoolProcess pool_compute, + bool exclusive, bool adaptive, framework::Tensor* input_grad); }; template @@ -173,6 +216,14 @@ class MaxPool3dGradFunctor { const std::vector& strides, const std::vector& paddings, framework::Tensor* input_grad); + // overload operator() to support argument data_format + void operator()(const DeviceContext& context, const framework::Tensor& input, + const framework::Tensor& output, + const framework::Tensor& output_grad, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, + const std::string data_format, framework::Tensor* input_grad); }; /* diff --git a/paddle/fluid/operators/pool_cudnn_op.cu.cc b/paddle/fluid/operators/pool_cudnn_op.cu.cc index b26f1270268..78df73ae183 100644 --- a/paddle/fluid/operators/pool_cudnn_op.cu.cc +++ b/paddle/fluid/operators/pool_cudnn_op.cu.cc @@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/pool_op.h" #include "paddle/fluid/platform/cudnn_helper.h" @@ -27,47 +29,117 @@ using PoolingMode = platform::PoolingMode; template using ScalingParamType = typename platform::CudnnDataType::ScalingParamType; +DataLayout getLayoutFromStr(std::string data_format) { + if (data_format == "NHWC") { + return DataLayout::kNHWC; + } else if (data_format == "NCHW") { + return DataLayout::kNCHW; + } else if (data_format == "NCDHW") { + return DataLayout::kNCDHW; + } else { + return DataLayout::kNCDHW; + } +} + template class PoolCUDNNOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use CUDAPlace."); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, + "It must use CUDAPlace."); const Tensor *input = ctx.Input("X"); Tensor *output = ctx.Output("Out"); - - const T *input_data = input->data(); - T *output_data = output->mutable_data(ctx.GetPlace()); - + output->mutable_data(ctx.GetPlace()); std::string pooling_type = ctx.Attr("pooling_type"); bool exclusive = ctx.Attr("exclusive"); + bool adaptive = ctx.Attr("adaptive"); std::vector ksize = ctx.Attr>("ksize"); std::vector strides = ctx.Attr>("strides"); std::vector paddings = ctx.Attr>("paddings"); - if (ctx.Attr("global_pooling")) { - for (size_t i = 0; i < ksize.size(); ++i) { - paddings[i] = 0; - ksize[i] = static_cast(input->dims()[i + 2]); + std::string data_format = ctx.Attr("data_format"); + bool global_pooling = ctx.Attr("global_pooling"); + std::string padding_algorithm = ctx.Attr("padding_algorithm"); + const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); + + // update paddings + auto in_x_dims = input->dims(); + framework::DDim data_dims; + if (channel_last) { + data_dims = framework::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1); + } else { + data_dims = framework::slice_ddim(in_x_dims, 2, in_x_dims.size()); + } + UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm, + data_dims, strides, ksize); + if (data_dims.size() * 2 == paddings.size()) { + for (size_t i = 0; i < data_dims.size(); ++i) { + paddings.erase(paddings.begin() + i + 1); } } - // ------------------- cudnn descriptors --------------------- - ScopedTensorDescriptor input_desc; - ScopedTensorDescriptor output_desc; - ScopedPoolingDescriptor pool_desc; + if (global_pooling) { + UpdateKsize(&ksize, data_dims); + } + + const std::string str_NCHW = "NCHW", str_NHWC = "NHWC"; + const std::string str_NCDHW = "NCDHW", str_NDHWC = "NDHWC"; + + // -----------------transformed tensor ------------------------ + + Tensor transformed_input(input->type()); + Tensor transformed_output(output->type()); DataLayout layout; - if (strides.size() == 2U) { - layout = DataLayout::kNCHW; - } else { + if (data_format == str_NDHWC) { layout = DataLayout::kNCDHW; + auto &dev_ctx = + ctx.template device_context(); + std::vector axis{0, 4, 1, 2, 3}; + + // input + transformed_input.Resize(input->dims()); + + auto in_dims_vec = framework::vectorize(input->dims()); + in_dims_vec[1] = input->dims()[4]; + in_dims_vec[2] = input->dims()[1]; + in_dims_vec[3] = input->dims()[2]; + in_dims_vec[4] = input->dims()[3]; + transformed_input.Resize(framework::make_ddim(in_dims_vec)); + transformed_input.mutable_data(ctx.GetPlace(), input->type()); + + math::Transpose trans5; + trans5(dev_ctx, *input, &transformed_input, axis); + + // output + transformed_output.Resize(output->dims()); + + auto out_dims_vec = framework::vectorize(output->dims()); + out_dims_vec[1] = output->dims()[4]; + out_dims_vec[2] = output->dims()[1]; + out_dims_vec[3] = output->dims()[2]; + out_dims_vec[4] = output->dims()[3]; + transformed_output.Resize(framework::make_ddim(out_dims_vec)); + + } else { + layout = getLayoutFromStr(data_format); + transformed_input = *input; + transformed_output = *output; } + const T *tranformed_input_data = transformed_input.data(); + T *tranformed_output_data = transformed_output.mutable_data( + transformed_output.dims(), ctx.GetPlace()); + + // ------------------- cudnn descriptors --------------------- + ScopedTensorDescriptor input_desc; + ScopedTensorDescriptor output_desc; + ScopedPoolingDescriptor pool_desc; + cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( - layout, framework::vectorize(input->dims())); + layout, framework::vectorize(transformed_input.dims())); cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor( - layout, framework::vectorize(output->dims())); + layout, framework::vectorize(transformed_output.dims())); PoolingMode pooling_mode; if (pooling_type == "max") { @@ -83,9 +155,19 @@ class PoolCUDNNOpKernel : public framework::OpKernel { // ------------------- cudnn pool algorithm --------------------- auto handle = ctx.cuda_device_context().cudnn_handle(); ScalingParamType alpha = 1.0f, beta = 0.0f; + CUDNN_ENFORCE(platform::dynload::cudnnPoolingForward( - handle, cudnn_pool_desc, &alpha, cudnn_input_desc, input_data, &beta, - cudnn_output_desc, output_data)); + handle, cudnn_pool_desc, &alpha, cudnn_input_desc, + tranformed_input_data, &beta, cudnn_output_desc, + tranformed_output_data)); + // add + if (data_format == str_NDHWC) { + auto &dev_ctx = + ctx.template device_context(); + std::vector axis{0, 2, 3, 4, 1}; + math::Transpose trans5_v2; + trans5_v2(dev_ctx, transformed_output, output, axis); + } } }; @@ -93,8 +175,8 @@ template class PoolCUDNNGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use CUDAPlace."); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, + "It must use CUDAPlace."); const Tensor *input = ctx.Input("X"); const Tensor *output = ctx.Input("Out"); @@ -104,37 +186,109 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { std::string pooling_type = ctx.Attr("pooling_type"); bool exclusive = ctx.Attr("exclusive"); + bool adaptive = ctx.Attr("adaptive"); std::vector ksize = ctx.Attr>("ksize"); std::vector strides = ctx.Attr>("strides"); std::vector paddings = ctx.Attr>("paddings"); - - if (ctx.Attr("global_pooling")) { - for (size_t i = 0; i < ksize.size(); ++i) { - paddings[i] = 0; - ksize[i] = static_cast(input->dims()[i + 2]); + std::string data_format = ctx.Attr("data_format"); + bool global_pooling = ctx.Attr("global_pooling"); + std::string padding_algorithm = ctx.Attr("padding_algorithm"); + const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); + + // update paddings + auto in_x_dims = input->dims(); + framework::DDim data_dims; + if (channel_last) { + data_dims = framework::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1); + } else { + data_dims = framework::slice_ddim(in_x_dims, 2, in_x_dims.size()); + } + UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm, + data_dims, strides, ksize); + if (data_dims.size() * 2 == paddings.size()) { + for (size_t i = 0; i < data_dims.size(); ++i) { + paddings.erase(paddings.begin() + i + 1); } } - const T *input_data = input->data(); - const T *output_data = output->data(); - const T *output_grad_data = output_grad->data(); + if (global_pooling) { + UpdateKsize(&ksize, data_dims); + } - // ------------------- cudnn descriptors --------------------- - ScopedTensorDescriptor input_desc; - ScopedTensorDescriptor output_desc; - ScopedPoolingDescriptor pool_desc; + // ------- tensor grad -------------- + Tensor transformed_input(input->type()); + Tensor transformed_output(output->type()); + Tensor transformed_output_grad(output_grad->type()); + + input_grad->mutable_data(ctx.GetPlace()); + Tensor transformed_input_grad(input_grad->type()); DataLayout layout; + const std::string str_NCHW = "NCHW", str_NHWC = "NHWC"; + const std::string str_NCDHW = "NCDHW", str_NDHWC = "NDHWC"; + if (data_format == str_NDHWC) { + layout = DataLayout::kNCDHW; + auto &dev_ctx = + ctx.template device_context(); + std::vector axis{0, 4, 1, 2, 3}; + + // input + transformed_input.Resize(input->dims()); + auto in_dims_vec = framework::vectorize(input->dims()); + in_dims_vec[1] = input->dims()[4]; + in_dims_vec[2] = input->dims()[1]; + in_dims_vec[3] = input->dims()[2]; + in_dims_vec[4] = input->dims()[3]; + transformed_input.Resize(framework::make_ddim(in_dims_vec)); + transformed_input.mutable_data(ctx.GetPlace(), input->type()); + + math::Transpose trans5; + trans5(dev_ctx, *input, &transformed_input, axis); + + // output + transformed_output.Resize(output->dims()); + auto out_dims_vec = framework::vectorize(output->dims()); + out_dims_vec[1] = output->dims()[4]; + out_dims_vec[2] = output->dims()[1]; + out_dims_vec[3] = output->dims()[2]; + out_dims_vec[4] = output->dims()[3]; + transformed_output.Resize(framework::make_ddim(out_dims_vec)); + + transformed_output.mutable_data(ctx.GetPlace(), output->type()); + + math::Transpose trans5_v2; + trans5_v2(dev_ctx, *output, &transformed_output, axis); + + // output grad + transformed_output_grad.Resize(framework::make_ddim(out_dims_vec)); + transformed_output_grad.mutable_data(ctx.GetPlace(), output_grad->type()); + + math::Transpose trans5_v3; + trans5_v3(dev_ctx, *output_grad, &transformed_output_grad, axis); + + // input grad + transformed_input_grad.Resize(framework::make_ddim(in_dims_vec)); - if (strides.size() == 2U) { - layout = DataLayout::kNCHW; } else { - layout = DataLayout::kNCDHW; + layout = getLayoutFromStr(data_format); + transformed_input = *input; + transformed_output = *output; + transformed_output_grad = *output_grad; + transformed_input_grad = *input_grad; } + const T *input_data = transformed_input.data(); + const T *output_data = transformed_output.data(); + const T *output_grad_data = transformed_output_grad.data(); + + // ------------------- cudnn descriptors --------------------- + ScopedTensorDescriptor input_desc; + ScopedTensorDescriptor output_desc; + ScopedPoolingDescriptor pool_desc; + cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( - layout, framework::vectorize(input->dims())); + layout, framework::vectorize(transformed_input.dims())); cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor( - layout, framework::vectorize(output->dims())); + layout, framework::vectorize(transformed_output.dims())); PoolingMode pooling_mode; if (pooling_type == "max") { @@ -155,13 +309,21 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel { auto handle = ctx.cuda_device_context().cudnn_handle(); ScalingParamType alpha = 1.0f, beta = 0.0f; if (input_grad) { - T *input_grad_data = input_grad->mutable_data(ctx.GetPlace()); + T *input_grad_data = transformed_input_grad.mutable_data( + transformed_input_grad.dims(), ctx.GetPlace()); // Because beta is zero, it is unnecessary to reset input_grad. - CUDNN_ENFORCE(platform::dynload::cudnnPoolingBackward( handle, cudnn_pool_desc, &alpha, cudnn_output_desc, output_data, cudnn_output_desc, output_grad_data, cudnn_input_desc, input_data, &beta, cudnn_input_desc, input_grad_data)); + + if (data_format == str_NDHWC) { + auto &dev_ctx = + ctx.template device_context(); + std::vector axis{0, 2, 3, 4, 1}; + math::Transpose trans5_v4; + trans5_v4(dev_ctx, transformed_input_grad, input_grad, axis); + } } } }; diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 073c7fe7561..bde087e080d 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -24,29 +24,32 @@ limitations under the License. */ namespace paddle { namespace operators { -int PoolOutputSize(int input_size, int filter_size, int padding, int stride, - bool ceil_mode) { +int PoolOutputSize(int input_size, int filter_size, int padding_1, + int padding_2, int stride, bool ceil_mode) { int output_size; if (!ceil_mode) { - output_size = (input_size - filter_size + 2 * padding) / stride + 1; + output_size = + (input_size - filter_size + padding_1 + padding_2) / stride + 1; } else { output_size = - (input_size - filter_size + 2 * padding + stride - 1) / stride + 1; + (input_size - filter_size + padding_1 + padding_2 + stride - 1) / + stride + + 1; } - PADDLE_ENFORCE(output_size > 0, - "Due to the settings of padding(%d), filter_size(%d) and " - "stride(%d), the output size is less than 0, please check " - "again. Input_size:%d", - padding, filter_size, stride, input_size); + PADDLE_ENFORCE_GT( + output_size, 0, + "Due to the settings of padding(%d,%d), filter_size(%d) and " + "stride(%d), the output size is less than 0, please check " + "again. Input_size:%d", + padding_1, padding_2, filter_size, stride, input_size); return output_size; } void PoolOp::InferShape(framework::InferShapeContext* ctx) const { - PADDLE_ENFORCE(ctx->HasInput("X"), "X(Input) of Pooling should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Out(Output) of Pooling should not be null."); - - auto in_x_dims = ctx->GetInputDim("X"); + PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, + "X(Input) of Pooling should not be null."); + PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, + "Out(Output) of Pooling should not be null."); std::string pooling_type = ctx->Attrs().Get("pooling_type"); std::vector ksize = ctx->Attrs().Get>("ksize"); @@ -54,38 +57,60 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const { std::vector paddings = ctx->Attrs().Get>("paddings"); bool ceil_mode = ctx->Attrs().Get("ceil_mode"); bool adaptive = ctx->Attrs().Get("adaptive"); + bool global_pooling = ctx->Attrs().Get("global_pooling"); + std::string data_format = ctx->Attrs().Get("data_format"); + std::string padding_algorithm = + ctx->Attrs().Get("padding_algorithm"); - PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5, - "Pooling intput should be 4-D or 5-D tensor."); - - if (ctx->Attrs().Get("global_pooling")) { - ksize.resize(static_cast(in_x_dims.size()) - 2); - for (size_t i = 0; i < ksize.size(); ++i) { - paddings[i] = 0; - ksize[i] = static_cast(in_x_dims[i + 2]); - } - } + auto in_x_dims = ctx->GetInputDim("X"); + PADDLE_ENFORCE_EQ(in_x_dims.size() == 4 || in_x_dims.size() == 5, true, + "Pooling intput should be 4-D or 5-D tensor."); - PADDLE_ENFORCE(in_x_dims.size() - ksize.size() == 2U, - "Input size and pooling size should be consistent."); + PADDLE_ENFORCE_EQ(in_x_dims.size() - ksize.size(), 2U, + "Input size and pooling size should be consistent."); PADDLE_ENFORCE_EQ(ksize.size(), strides.size(), "Strides size and pooling size should be the same."); - PADDLE_ENFORCE_EQ(ksize.size(), paddings.size(), - "Paddings size and pooling size should be the same."); - std::vector output_shape({in_x_dims[0], in_x_dims[1]}); + const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); + + // update paddings if "SAME" or global_pooling + framework::DDim data_dims; + if (channel_last) { + data_dims = framework::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1); + } else { + data_dims = framework::slice_ddim(in_x_dims, 2, in_x_dims.size()); + } + UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm, + data_dims, strides, ksize); + + if (global_pooling) { + UpdateKsize(&ksize, data_dims); + } + + std::vector output_shape; if (adaptive) { output_shape.insert(output_shape.end(), ksize.begin(), ksize.end()); } else { - for (size_t i = 0; i < ksize.size(); ++i) { - if (!ctx->IsRuntime() && in_x_dims[i + 2] <= 0) { - output_shape.push_back(-1); + for (size_t i = 0; i < data_dims.size(); ++i) { + if ((!ctx->IsRuntime()) && (data_dims[i] < 0)) { + output_shape.push_back(in_x_dims[i]); } else { - output_shape.push_back(PoolOutputSize( - in_x_dims[i + 2], ksize[i], paddings[i], strides[i], ceil_mode)); + output_shape.push_back( + PoolOutputSize(data_dims[i], ksize[i], paddings[2 * i], + paddings[2 * i + 1], strides[i], ceil_mode)); } } } + + // output_N = input_N + output_shape.insert(output_shape.begin(), in_x_dims[0]); + // output_C = input_C + if (channel_last) { + output_shape.push_back(in_x_dims[in_x_dims.size() - 1]); + } else { + output_shape.insert(output_shape.begin() + 1, in_x_dims[1]); + } + ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); ctx->ShareLoD("X", "Out"); } @@ -93,7 +118,9 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const { framework::OpKernelType PoolOp::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { framework::LibraryType library_{framework::LibraryType::kPlain}; - std::string data_format = ctx.Attr("data_format"); + // std::string data_format = ctx.Attr("data_format"); // change: + // delete + std::string data_format = "AnyLayout"; framework::DataLayout layout_ = framework::StringToDataLayout(data_format); #ifdef PADDLE_WITH_CUDA @@ -114,16 +141,18 @@ framework::OpKernelType PoolOp::GetExpectedKernelType( } void PoolOpGrad::InferShape(framework::InferShapeContext* ctx) const { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); - PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), - "Input(X@GRAD) should not be null."); + PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, "Input(X) must not be null."); + PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("X")), true, + "Input(X@GRAD) should not be null."); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } framework::OpKernelType PoolOpGrad::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { framework::LibraryType library_{framework::LibraryType::kPlain}; - std::string data_format = ctx.Attr("data_format"); + // std::string data_format = ctx.Attr("data_format"); // + // change:delete + std::string data_format = "AnyLayout"; framework::DataLayout layout_ = framework::StringToDataLayout(data_format); #ifdef PADDLE_WITH_CUDA @@ -186,8 +215,8 @@ void Pool2dOpMaker::Make() { // TypedAttrChecker don't support vector type.) AddAttr>( "paddings", - "(vector, default {0,0}), paddings(height, width) of pooling " - "operator." + "(vector, default {0,0}), paddings(height_top, height_bottom, " + "width_left, wifth_right) of pooling operator." "If global_pooling = true, paddings and kernel size will be ignored.") .SetDefault({0, 0}); AddAttr( @@ -206,7 +235,7 @@ void Pool2dOpMaker::Make() { AddAttr( "use_cudnn", - "(bool, default false) Only used in cudnn kernel, need install cudnn") + "(bool, default false) Only used in cudnn kernel, need install cudnn.") .SetDefault(false); AddAttr( "ceil_mode", @@ -215,7 +244,7 @@ void Pool2dOpMaker::Make() { "the floor function will be used.") .SetDefault(false); AddAttr("use_mkldnn", - "(bool, default false) Only used in mkldnn kernel") + "(bool, default false) Only used in mkldnn kernel.") .SetDefault(false); AddAttr("use_quantizer", "(bool, default false) " @@ -229,18 +258,24 @@ void Pool2dOpMaker::Make() { "An optional string from: \"NHWC\", \"NCHW\". " "Defaults to \"NHWC\". Specify the data format of the output data, " "the input will be transformed automatically. ") - .SetDefault("AnyLayout"); + .SetDefault("NCHW"); AddAttr("is_test", "(bool, default false) Set to true for inference only, false " "for training. Some layers may run faster when this is true.") .SetDefault(false); + AddAttr( + "padding_algorithm", + "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\"," + "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. " + "Set to \"SAME\" or \"VALID\" for algorithm of padding. ") + .SetDefault("EXPLICIT"); // TODO(dzhwinter): need to registered layout transform function AddComment(R"DOC( The pooling2d operation calculates the output based on the input, pooling_type and ksize, strides, paddings parameters. -Input(X) and output(Out) are in NCHW format, where N is batch size, C is the +Input(X) and output(Out) are in NCHW or NHWC format, where N is batch size, C is the number of channels, H is the height of the feature, and W is the width of the feature. Parameters(ksize, strides, paddings) are two elements. These two elements represent height and width, respectively. @@ -256,30 +291,47 @@ Example: Out shape: $(N, C, H_{out}, W_{out})$ + For pool_padding = "SAME": + $$ + H_{out} = \\frac{(H_{in} + strides[0] - 1)}{strides[0]} + $$ + $$ + W_{out} = \\frac{(W_{in} + strides[1] - 1)}{strides[1]} + $$ + + For pool_padding = "VALID": + $$ + H_{out} = \\frac{(H_{in} - ksize[0] + strides[0])}{strides[0]} + $$ + $$ + W_{out} = \\frac{(W_{in} - ksize[1] + strides[1])}{strides[1]} + $$ + For ceil_mode = false: $$ - H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 + H_{out} = \\frac{(H_{in} - ksize[0] + pad_height_top + pad_height_bottom}{strides[0]} + 1 $$ $$ - W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 + W_{out} = \\frac{(W_{in} - ksize[1] + pad_width_left + pad_width_right}{strides[1]} + 1 $$ + For ceil_mode = true: $$ - H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0] + strides[0] - 1)}{strides[0]} + 1 + H_{out} = \\frac{(H_{in} - ksize[0] + pad_height_top + pad_height_bottom + strides[0] - 1)}{strides[0]} + 1 $$ $$ - W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1 + W_{out} = \\frac{(W_{in} - ksize[1] + pad_width_left + pad_width_right + strides[1] - 1)}{strides[1]} + 1 $$ For exclusive = false: $$ - hstart = i * strides[0] - paddings[0] + hstart = i * strides[0] - pad_height_top $$ $$ hend = hstart + ksize[0] $$ $$ - wstart = j * strides[1] - paddings[1] + wstart = j * strides[1] - pad_width_left $$ $$ wend = wstart + ksize[1] @@ -290,13 +342,13 @@ Example: For exclusive = true: $$ - hstart = max(0, i * strides[0] - paddings[0]) + hstart = max(0, i * strides[0] - pad_height_top) $$ $$ hend = min(H, hstart + ksize[0]) $$ $$ - wstart = max(0, j * strides[1] - paddings[1]) + wstart = max(0, j * strides[1] - pad_width_left) $$ $$ wend = min(W, wstart + ksize[1]) @@ -319,13 +371,14 @@ class PoolOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { void Pool3dOpMaker::Make() { AddInput("X", "(Tensor) The input tensor of pooling operator. " - "The format of input tensor is NCDHW, where N is batch size, C is " + "The format of input tensor is NCDHW or NDHWC, where N is batch " + "size, C is " "the number of channels, and D, H and W is the depth, height and " "width of " "the feature, respectively."); AddOutput("Out", "(Tensor) The output tensor of pooling operator." - "The format of output tensor is also NCDHW, " + "The format of output tensor is also NCDHW or NDHWC, " "where N is batch size, C is " "the number of channels, and D, H and W is the depth, height and " "width of the feature, respectively."); @@ -355,8 +408,10 @@ void Pool3dOpMaker::Make() { // TypedAttrChecker don't support vector type.) AddAttr>( "paddings", - "(vector, default {0,0,0}), paddings(depth, height, " - "width) of pooling operator. " + "(vector, default {0,0,0}), paddings(pad_depth_front, " + "pad_depth_back, " + "pad_height_top, pad_height_bottom, pad_width_left, pad_width_right" + ") of pooling operator. " "If global_pooling = true, ksize and paddings will be ignored.") .SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently, // TypedAttrChecker don't support vector type.) @@ -376,7 +431,7 @@ void Pool3dOpMaker::Make() { AddAttr( "use_cudnn", - "(bool, default false) Only used in cudnn kernel, need install cudnn") + "(bool, default false) Only used in cudnn kernel, need install cudnn.") .SetDefault(false); AddAttr( "ceil_mode", @@ -389,11 +444,17 @@ void Pool3dOpMaker::Make() { .SetDefault(false); AddAttr( "data_format", - "(string, default NCHW) Only used in " - "An optional string from: \"NHWC\", \"NCHW\". " - "Defaults to \"NHWC\". Specify the data format of the output data, " + "(string, default NCDHW) Only used in " + "An optional string from: \"NDHWC\", \"NCDHW\". " + "Defaults to \"NDHWC\". Specify the data format of the output data, " "the input will be transformed automatically. ") - .SetDefault("AnyLayout"); + .SetDefault("NCDHW"); + AddAttr( + "padding_algorithm", + "(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\"," + "\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. " + "Set to \"SAME\" or \"VALID\" for algorithm of padding. ") + .SetDefault("EXPLICIT"); // TODO(dzhwinter): need to registered layout transform function AddComment(R"DOC( @@ -401,7 +462,7 @@ Pool3d Operator. The pooling3d operation calculates the output based on the input, pooling_type, ksize, strides, and paddings parameters. -Input(X) and output(Out) are in NCDHW format, where N is batch +Input(X) and output(Out) are in NCDHW or NDHWC format, where N is batch size, C is the number of channels, and D, H and W are the depth, height and width of the feature, respectively. Parameters(ksize, strides, paddings) are three elements. These three elements represent depth, height and @@ -412,42 +473,65 @@ Example: X shape: $(N, C, D_{in}, H_{in}, W_{in})$ Output: Out shape: $(N, C, D_{out}, H_{out}, W_{out})$ + + For pool_padding = "SAME": + $$ + D_{out} = \\frac{(D_{in} + strides[0] - 1)}{strides[0]} + $$ + $$ + H_{out} = \\frac{(H_{in} + strides[1] - 1)}{strides[1]} + $$ + $$ + W_{out} = \\frac{(W_{in} + strides[2] - 1)}{strides[2]} + $$ + + For pool_padding = "VALID": + $$ + D_{out} = \\frac{(D_{in} - ksize[0] + strides[0])}{strides[0]} + $$ + $$ + H_{out} = \\frac{(H_{in} - ksize[1] + strides[1])}{strides[1]} + $$ + $$ + W_{out} = \\frac{(W_{in} - ksize[2] + strides[2])}{strides[2]} + $$ + For ceil_mode = false: $$ - D_{out} = \\frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 + D_{out} = \\frac{(D_{in} - ksize[0] + pad_depth_front + pad_depth_back)}{strides[0]} + 1 $$ $$ - H_{out} = \\frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[2]} + 1 + H_{out} = \\frac{(H_{in} - ksize[1] + pad_height_top + pad_height_bottom)}{strides[1]} + 1 $$ $$ - W_{out} = \\frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1 + W_{out} = \\frac{(W_{in} - ksize[2] + pad_width_left + pad_width_right)}{strides[2]} + 1 $$ For ceil_mode = true: $$ - D_{out} = \\frac{(D_{in} - ksize[0] + 2 * paddings[0] + strides[0] -1)}{strides[0]} + 1 + D_{out} = \\frac{(D_{in} - ksize[0] + pad_depth_front + pad_depth_back + strides[0] -1)}{strides[0]} + 1 $$ $$ - H_{out} = \\frac{(H_{in} - ksize[1] + 2 * paddings[1] + strides[1] -1)}{strides[1]} + 1 + H_{out} = \\frac{(H_{in} - ksize[1] + pad_height_top + pad_height_bottom + strides[1] -1)}{strides[1]} + 1 $$ $$ - W_{out} = \\frac{(W_{in} - ksize[2] + 2 * paddings[2] + strides[2] -1)}{strides[2]} + 1 + W_{out} = \\frac{(W_{in} - ksize[2] + pad_width_left + pad_width_right + strides[2] -1)}{strides[2]} + 1 $$ For exclusive = false: $$ - dstart = i * strides[0] - paddings[0] + dstart = i * strides[0] - pad_depth_front $$ $$ dend = dstart + ksize[0] $$ $$ - hstart = j * strides[1] - paddings[1] + hstart = j * strides[1] - pad_height_top $$ $$ hend = hstart + ksize[1] $$ $$ - wstart = k * strides[2] - paddings[2] + wstart = k * strides[2] - pad_width_left $$ $$ wend = wstart + ksize[2] @@ -458,16 +542,19 @@ Example: For exclusive = true: $$ - dstart = max(0, i * strides[0] - paddings[0]) + dstart = max(0, i * strides[0] - pad_depth_front) $$ $$ dend = min(D, dstart + ksize[0]) $$ $$ + hstart = max(0, j * strides[1] - pad_height_top) + $$ + $$ hend = min(H, hstart + ksize[1]) $$ $$ - wstart = max(0, k * strides[2] - paddings[2]) + wstart = max(0, k * strides[2] - pad_width_left) $$ $$ wend = min(W, wstart + ksize[2]) diff --git a/paddle/fluid/operators/pool_op.h b/paddle/fluid/operators/pool_op.h index 6c5900bd0f5..d7c6c6230a8 100644 --- a/paddle/fluid/operators/pool_op.h +++ b/paddle/fluid/operators/pool_op.h @@ -14,13 +14,13 @@ limitations under the License. */ #pragma once +#include #include #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/pooling.h" - namespace paddle { namespace operators { @@ -57,6 +57,57 @@ class Pool3dOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override; }; +inline void UpdatePadding(std::vector* paddings, const bool global_pooling, + const bool adaptive, + const std::string padding_algorithm, + const framework::DDim data_dims, + const std::vector& strides, + const std::vector& ksize) { + // set padding size == data_dims.size() * 2 + auto data_shape = framework::vectorize(data_dims); + if (paddings->size() == data_dims.size()) { + for (size_t i = 0; i < data_dims.size(); ++i) { + int copy_pad = *(paddings->begin() + 2 * i); + paddings->insert(paddings->begin() + 2 * i + 1, copy_pad); + } + } else { + PADDLE_ENFORCE_EQ( + data_dims.size() * 2, paddings->size(), + "Paddings size should be the same or twice as the pooling size."); + } + + // when padding_desc is "VALID" or "SAME" + if (padding_algorithm == "SAME") { + for (size_t i = 0; i < data_dims.size(); ++i) { + int out_size = (data_dims[i] + strides[i] - 1) / strides[0]; + int pad_sum = + std::max((out_size - 1) * strides[i] + ksize[i] - data_shape[i], 0); + int pad_0 = pad_sum / 2; + int pad_1 = pad_sum - pad_0; + *(paddings->begin() + i * 2) = pad_0; + *(paddings->begin() + i * 2 + 1) = pad_1; + } + } else if (padding_algorithm == "VALID") { + for (auto it = paddings->begin(); it != paddings->end(); it++) { + *it = 0; + } + } + + // if global_pooling == true or adaptive == true, padding will be ignore + if (global_pooling || adaptive) { + for (auto it = paddings->begin(); it != paddings->end(); it++) { + *it = 0; + } + } +} + +inline void UpdateKsize(std::vector* ksize, + const framework::DDim data_dims) { + ksize->resize(static_cast(data_dims.size())); + for (size_t i = 0; i < ksize->size(); ++i) { + *(ksize->begin() + i) = static_cast(data_dims[i]); + } +} template class PoolKernel : public framework::OpKernel { @@ -69,14 +120,36 @@ class PoolKernel : public framework::OpKernel { std::vector ksize = context.Attr>("ksize"); std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); + std::string data_format = context.Attr("data_format"); bool exclusive = context.Attr("exclusive"); bool adaptive = context.Attr("adaptive"); - if (context.Attr("global_pooling")) { - for (size_t i = 0; i < ksize.size(); ++i) { - paddings[i] = 0; - ksize[i] = static_cast(in_x->dims()[i + 2]); + bool global_pooling = context.Attr("global_pooling"); + std::string padding_algorithm = + context.Attr("padding_algorithm"); + + const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); + + // update paddings + auto in_x_dims = in_x->dims(); + framework::DDim data_dims; + if (channel_last) { + data_dims = framework::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1); + } else { + data_dims = framework::slice_ddim(in_x_dims, 2, in_x_dims.size()); + } + + UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm, + data_dims, strides, ksize); + if (data_dims.size() * 2 == paddings.size()) { + for (size_t i = 0; i < data_dims.size(); ++i) { + paddings.erase(paddings.begin() + i + 1); } } + + if (global_pooling) { + UpdateKsize(&ksize, data_dims); + } + auto& dev_ctx = context.template device_context(); switch (ksize.size()) { case 2: { @@ -85,16 +158,16 @@ class PoolKernel : public framework::OpKernel { DeviceContext, paddle::operators::math::MaxPool, T> pool2d_forward; paddle::operators::math::MaxPool pool_process; - pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process, - true, false, out); + pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format, + pool_process, true, false, out); } else if (pooling_type == "avg") { paddle::operators::math::Pool2dFunctor< DeviceContext, paddle::operators::math::AvgPool, T> pool2d_forward; paddle::operators::math::AvgPool pool_process; - pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process, - exclusive, adaptive, out); + pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format, + pool_process, exclusive, adaptive, out); } } break; case 3: { @@ -103,15 +176,16 @@ class PoolKernel : public framework::OpKernel { DeviceContext, paddle::operators::math::MaxPool, T> pool3d_forward; paddle::operators::math::MaxPool pool_process; - pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process, - true, false, out); + pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format, + pool_process, true, false, out); + } else if (pooling_type == "avg") { paddle::operators::math::Pool3dFunctor< DeviceContext, paddle::operators::math::AvgPool, T> pool3d_forward; paddle::operators::math::AvgPool pool_process; - pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process, - exclusive, adaptive, out); + pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format, + pool_process, exclusive, adaptive, out); } } break; default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } @@ -135,13 +209,33 @@ class PoolGradKernel : public framework::OpKernel { std::vector paddings = context.Attr>("paddings"); bool exclusive = context.Attr("exclusive"); bool adaptive = context.Attr("adaptive"); + std::string data_format = context.Attr("data_format"); + bool global_pooling = context.Attr("global_pooling"); + std::string padding_algorithm = + context.Attr("padding_algorithm"); + + const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); - if (context.Attr("global_pooling")) { - for (size_t i = 0; i < ksize.size(); ++i) { - paddings[i] = 0; - ksize[i] = static_cast(in_x->dims()[i + 2]); + // update paddings + auto in_x_dims = in_x->dims(); + framework::DDim data_dims; + if (channel_last) { + data_dims = framework::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1); + } else { + data_dims = framework::slice_ddim(in_x_dims, 2, in_x_dims.size()); + } + UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm, + data_dims, strides, ksize); + if (data_dims.size() * 2 == paddings.size()) { + for (size_t i = 0; i < data_dims.size(); ++i) { + paddings.erase(paddings.begin() + i + 1); } } + + if (global_pooling) { + UpdateKsize(&ksize, data_dims); + } + auto& dev_ctx = context.template device_context(); if (in_x_grad) { in_x_grad->mutable_data(context.GetPlace()); @@ -154,15 +248,15 @@ class PoolGradKernel : public framework::OpKernel { paddle::operators::math::MaxPool2dGradFunctor pool2d_backward; pool2d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides, - paddings, in_x_grad); + paddings, data_format, in_x_grad); } else if (pooling_type == "avg") { paddle::operators::math::Pool2dGradFunctor< DeviceContext, paddle::operators::math::AvgPoolGrad, T> pool2d_backward; paddle::operators::math::AvgPoolGrad pool_process; pool2d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides, - paddings, pool_process, exclusive, adaptive, - in_x_grad); + paddings, data_format, pool_process, exclusive, + adaptive, in_x_grad); } } break; case 3: { @@ -170,15 +264,15 @@ class PoolGradKernel : public framework::OpKernel { paddle::operators::math::MaxPool3dGradFunctor pool3d_backward; pool3d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides, - paddings, in_x_grad); + paddings, data_format, in_x_grad); } else if (pooling_type == "avg") { paddle::operators::math::Pool3dGradFunctor< DeviceContext, paddle::operators::math::AvgPoolGrad, T> pool3d_backward; paddle::operators::math::AvgPoolGrad pool_process; pool3d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides, - paddings, pool_process, exclusive, adaptive, - in_x_grad); + paddings, data_format, pool_process, exclusive, + adaptive, in_x_grad); } } break; default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } diff --git a/paddle/fluid/platform/cudnn_helper.h b/paddle/fluid/platform/cudnn_helper.h index 8c124e71583..eb312895cf1 100644 --- a/paddle/fluid/platform/cudnn_helper.h +++ b/paddle/fluid/platform/cudnn_helper.h @@ -72,6 +72,7 @@ enum class DataLayout { // Not use kNHWC, kNCHW, kNCDHW, + kNDHWC, // add, liyamei kNCHW_VECT_C, }; @@ -212,6 +213,8 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat( return CUDNN_TENSOR_NCHW; case DataLayout::kNCDHW: return CUDNN_TENSOR_NCHW; // NOTE: cudnn treat NdTensor as the same + case DataLayout::kNDHWC: + return CUDNN_TENSOR_NHWC; // add, liyamei default: PADDLE_THROW("Unknown cudnn equivalent for order"); } @@ -238,14 +241,31 @@ class ScopedTensorDescriptor { strides[i] = dims[i + 1] * strides[i + 1]; } // Update tensor descriptor dims setting if groups > 1 - // NOTE: Assume using NCHW or NCDHW order - std::vector dims_with_group(dims.begin(), dims.end()); // copy + // NOTE: Here, Assume using NCHW or NCDHW order + std::vector dims_with_group(dims.begin(), dims.end()); if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; } - PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptor( - desc_, type, dims_with_group.size(), dims_with_group.data(), - strides.data())); + + if (dims.size() == 4) { + if (format == CUDNN_TENSOR_NCHW) { + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptor( + desc_, type, dims_with_group.size(), dims_with_group.data(), + strides.data())); + } else { // CUDNN_TENSOR_NHWC + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensor4dDescriptor( + desc_, format, type, dims[0], dims[3], dims[1], dims[2])); + } + } else if (dims.size() == 5) { + if (format == CUDNN_TENSOR_NCHW) { + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptor( + desc_, type, dims_with_group.size(), dims_with_group.data(), + strides.data())); + } else { // CUDNN_TENSOR_NHWC + PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptorEx( + desc_, format, type, dims.size(), dims.data())); + } + } return desc_; } diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h index 67e2a18dd37..0f743801582 100644 --- a/paddle/fluid/platform/dynload/cudnn.h +++ b/paddle/fluid/platform/dynload/cudnn.h @@ -126,7 +126,8 @@ extern void EnforceCUDNNLoaded(const char* fn_name); __macro(cudnnRNNBackwardWeights); \ __macro(cudnnRNNForwardInference); \ __macro(cudnnDestroyDropoutDescriptor); \ - __macro(cudnnDestroyRNNDescriptor); + __macro(cudnnDestroyRNNDescriptor); \ + __macro(cudnnSetTensorNdDescriptorEx); CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 0632ad9ad2c..2cdfbf482a1 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -2871,15 +2871,16 @@ def pool2d(input, use_cudnn=True, ceil_mode=False, name=None, - exclusive=True): + exclusive=True, + data_format="NCHW"): """ ${comment} Args: input (Variable): The input tensor of pooling operator. The format of - input tensor is NCHW, where N is batch size, C is - the number of channels, H is the height of the - feature, and W is the width of the feature. + input tensor is `"NCHW"` or `"NHWC"`, where `N` is batch size, `C` is + the number of channels, `H` is the height of the + feature, and `W` is the width of the feature. pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two integers, (pool_size_Height, pool_size_Width). Otherwise, the pool kernel size will be a square of an int. @@ -2887,8 +2888,13 @@ def pool2d(input, pool_stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, it must contain two integers, (pool_stride_Height, pool_stride_Width). Otherwise, the pool stride size will be a square of an int. - pool_padding (int|list|tuple): The pool padding size. If pool padding size is a tuple, - it must contain two integers, (pool_padding_on_Height, pool_padding_on_Width). + pool_padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If pool padding size is a tuple or list, + it could be in three forms: `[pad_height, pad_width]` or + `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, + `pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. + when `data_format` is `"NHWC"`, `pool_padding` can be in the form + `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Otherwise, the pool padding size will be a square of an int. global_pooling (bool): ${global_pooling_comment} use_cudnn (bool): ${use_cudnn_comment} @@ -2896,55 +2902,125 @@ def pool2d(input, name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. exclusive (bool): Whether to exclude padding points in average pooling - mode, default is true + mode, default is `true`. + data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. + The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_height, input_width]`. Returns: Variable: The pooling result. Raises: - ValueError: If 'pool_type' is not "max" nor "avg" - ValueError: If 'global_pooling' is False and 'pool_size' is -1 - ValueError: If 'use_cudnn' is not a bool value. + ValueError: If `pool_type` is not "max" nor "avg" + ValueError: If `global_pooling` is False and `pool_size` is -1 + ValueError: If `use_cudnn` is not a bool value. Examples: .. code-block:: python import paddle.fluid as fluid + data = fluid.layers.data( - name='data', shape=[3, 32, 32], dtype='float32') - pool2d = fluid.layers.pool2d( - input=data, - pool_size=2, - pool_type='max', - pool_stride=1, - global_pooling=False) + name='data', shape=[10, 3, 32, 32], append_batch_size=False, dtype='float32') + + # example 1: + # Attr(pool_padding) is a list with 4 elements, Attr(data_format) is "NCHW". + out_1 = fluid.layers.pool2d( + input = data, + pool_size = 3, + pool_type = "avg", + pool_stride = 1, + pool_padding = [1, 2, 1, 0], + data_format = "NCHW") + + # example 2: + # Attr(pool_padding) is a string, Attr(data_format) is "NCHW". + out_2 = fluid.layers.pool2d( + input = data, + pool_size = 3, + pool_type = "avg", + pool_stride = 1, + pool_padding = "VALID", + data_format = "NCHW") """ if pool_type not in ["max", "avg"]: raise ValueError( - "Unknown pool_type: '%s'. It can only be 'max' or 'avg'.", + "Unknown Attr(pool_type): '%s'. It can only be 'max' or 'avg'.", str(pool_type)) if global_pooling is False and pool_size == -1: raise ValueError( - "When the global_pooling is False, pool_size must be passed " - "and be a valid value. Received pool_size: " + str(pool_size)) + "When Attr(global_pooling) is False, Attr(pool_size) must be passed " + "and be a valid value. Received pool_size: %s." % str(pool_size)) + + if not isinstance(use_cudnn, bool): + raise ValueError("Attr(use_cudnn) should be True or False. Received " + "Attr(use_cudnn): %s." % str(use_cudnn)) + + if data_format not in ["NCHW", "NHWC"]: + raise ValueError( + "Attr(data_format) should be 'NCHW' or 'NHWC'. Received " + "Attr(data_format): %s." % str(data_format)) pool_size = utils.convert_to_list(pool_size, 2, 'pool_size') - pool_padding = utils.convert_to_list(pool_padding, 2, 'pool_padding') pool_stride = utils.convert_to_list(pool_stride, 2, 'pool_stride') - if not isinstance(use_cudnn, bool): - raise ValueError("use_cudnn should be True or False") + def update_padding(padding, data_format): + def is_list_or_tuple(ele): + if isinstance(ele, list) or isinstance(ele, tuple): + return True + return False + + if is_list_or_tuple(padding) and len(padding) == 4: + if is_list_or_tuple(padding[0]) and (data_format == "NCHW"): + if not (padding[0] == [0, 0] and padding[1] == [0, 0]): + raise ValueError( + "Non-zero pool_padding(%s) in the batch or channel dimensions " + "is not supported." % str(padding)) + padding = padding[2:4] + padding = [ele for a_list in padding for ele in a_list] + elif is_list_or_tuple(padding[0]) and (data_format == "NHWC"): + if not (padding[0] == [0, 0] and padding[3] == [0, 0]): + raise ValueError( + "Non-zero pool_padding(%s) in the batch or channel dimensions " + "is not supported." % str(padding)) + padding = padding[1:3] + padding = [ele for a_list in padding for ele in a_list] + padding = utils.convert_to_list(padding, 4, 'padding') - l_type = 'pool2d' + else: + padding = utils.convert_to_list(padding, 2, 'padding') - helper = LayerHelper(l_type, **locals()) + return padding + + padding_algorithm = "EXPLICIT" + if isinstance(pool_padding, str): + pool_padding = pool_padding.upper() + if pool_padding not in ["SAME", "VALID"]: + raise ValueError( + "Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'." + % str(pool_padding)) + if pool_padding == "VALID": + padding_algorithm = "VALID" + pool_padding = [0, 0, 0, 0] + if ceil_mode != False: + raise ValueError( + "When Attr(pool_padding) is \"VALID\", Attr(ceil_mode) must be False. " + "Received ceil_mode: True.") + elif pool_padding == "SAME": + padding_algorithm = "SAME" + pool_padding = [0, 0, 0, 0] + + pool_padding = update_padding(pool_padding, data_format) + + op_type = 'pool2d' + helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) helper.append_op( - type=l_type, + type=op_type, inputs={"X": input}, outputs={"Out": pool_out}, attrs={ @@ -2953,10 +3029,12 @@ def pool2d(input, "global_pooling": global_pooling, "strides": pool_stride, "paddings": pool_padding, + "padding_algorithm": padding_algorithm, "use_cudnn": use_cudnn, "ceil_mode": ceil_mode, "use_mkldnn": False, "exclusive": exclusive, + "data_format": data_format, }) return pool_out @@ -2972,30 +3050,43 @@ def pool3d(input, use_cudnn=True, ceil_mode=False, name=None, - exclusive=True): + exclusive=True, + data_format="NCDHW"): """ ${comment} Args: input (Variable): The input tensor of pooling operator. The format of - input tensor is NCDHW, where N is batch size, C is - the number of channels, D is the depth of the feature, - H is the height of the feature, and W is the width + input tensor is `"NCDHW"` or `"NDHWC"`, where `N` is batch size, `C` is + the number of channels, `D` is the depth of the feature, + `H` is the height of the feature, and `W` is the width of the feature. pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain three integers, (pool_size_Depth, pool_size_Height, pool_size_Width). Otherwise, the pool kernel size will be the cube of an int. pool_type (string): ${pooling_type_comment} - pool_stride (int): stride of the pooling layer. - pool_padding (int): padding size. + pool_stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If pool stride size is a tuple or list, + it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`. + Otherwise, the pool stride size will be a cube of an int. + pool_padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list, + it could be in three forms: `[pad_depth, pad_height, pad_width]` or + `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, + and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form + `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. + when `data_format` is `"NDHWC"`, `pool_padding` can be in the form + `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. global_pooling (bool): ${global_pooling_comment} use_cudnn (bool): ${use_cudnn_comment} ceil_mode (bool): ${ceil_mode_comment} name (str): A name for this layer(optional). If set None, the layer will be named automatically. exclusive (bool): Whether to exclude padding points in average pooling - mode, default is true + mode, default is true. + data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`. + The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_depth, input_height, input_width]`. Returns: Variable: output of pool3d layer. @@ -3005,39 +3096,114 @@ def pool3d(input, .. code-block:: python import paddle.fluid as fluid + data = fluid.layers.data( - name='data', shape=[3, 32, 32, 32], dtype='float32') - pool3d = fluid.layers.pool3d( - input=data, - pool_size=2, - pool_type='max', - pool_stride=1, - global_pooling=False) + name='data', shape=[10, 3, 32, 32, 32], append_batch_size=False, dtype='float32') + + # example 1: + # Attr(pool_padding) is a list with 6 elements, Attr(data_format) is "NCDHW". + out_1 = fluid.layers.pool3d( + input = data, + pool_size = 2, + pool_type = "avg", + pool_stride = 1, + pool_padding = [1, 2, 1, 0, 1, 2], + global_pooling = False, + data_format = "NCDHW") + + # example 2: + # Attr(pool_padding) is a string, Attr(data_format) is "NCDHW". + out_2 = fluid.layers.pool3d( + input = data, + pool_size = 3, + pool_type = "avg", + pool_stride = 1, + pool_padding = "VALID", + global_pooling = False, + data_format = "NCDHW") + """ if pool_type not in ["max", "avg"]: raise ValueError( - "Unknown pool_type: '%s'. It can only be 'max' or 'avg'.", + "Unknown Attr(pool_type): '%s'. It can only be 'max' or 'avg'.", str(pool_type)) if global_pooling is False and pool_size == -1: raise ValueError( - "When the global_pooling is False, pool_size must be passed " - "and be a valid value. Received pool_size: " + str(pool_size)) + "When Attr(global_pooling) is False, Attr(pool_size) must be passed " + "and be a valid value. Received Attr(pool_size): %s." % + str(pool_size)) + + if not isinstance(use_cudnn, bool): + raise ValueError("Attr(use_cudnn) should be True or False. Received " + "Attr(use_cudnn): %s. " % str(use_cudnn)) + + if data_format not in ["NCDHW", "NDHWC"]: + raise ValueError( + "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received " + "Attr(data_format): %s" % str(data_format)) pool_size = utils.convert_to_list(pool_size, 3, 'pool_size') - pool_padding = utils.convert_to_list(pool_padding, 3, 'pool_padding') pool_stride = utils.convert_to_list(pool_stride, 3, 'pool_stride') - if not isinstance(use_cudnn, bool): - raise ValueError("use_cudnn should be True or False") + def update_padding(padding, data_format): + def is_list_or_tuple(ele): + if isinstance(ele, (list, tuple)): + return True + return False + + if is_list_or_tuple(padding) and len(padding) == 5: + if is_list_or_tuple(padding[0]) and (data_format == "NCDHW"): + if not (padding[0] == [0, 0] and padding[1] == [0, 0]): + raise ValueError( + "Non-zero pool_padding(%s) in the batch or channel dimensions " + "is not supported." % str(padding)) + padding = padding[2:5] + padding = [ele for a_list in padding for ele in a_list] + elif is_list_or_tuple(padding[0]) and (data_format == "NDHWC"): + if not (padding[0] == [0, 0] and padding[4] == [0, 0]): + raise ValueError( + "Non-zero pool_padding(%s) in the batch or channel dimensions " + "is not supported." % str(padding)) + padding = padding[1:4] + padding = [ele for a_list in padding for ele in a_list] + padding = utils.convert_to_list(padding, 6, 'padding') + + elif is_list_or_tuple(padding) and len(padding) == 6: + padding = utils.convert_to_list(padding, 6, 'padding') - l_type = "pool3d" - helper = LayerHelper(l_type, **locals()) + else: + padding = utils.convert_to_list(padding, 3, 'padding') + + return padding + + padding_algorithm = "EXPLICIT" + if isinstance(pool_padding, str): + pool_padding = pool_padding.upper() + if pool_padding not in ["SAME", "VALID"]: + raise ValueError( + "Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'." + % str(pool_padding)) + if pool_padding == "VALID": + padding_algorithm = "VALID" + pool_padding = [0, 0, 0, 0, 0, 0] + if ceil_mode != False: + raise ValueError( + "When Attr(pool_padding) is \"VALID\", ceil_mode must be False. " + "Received ceil_mode: True.") + elif pool_padding == "SAME": + padding_algorithm = "SAME" + pool_padding = [0, 0, 0, 0, 0, 0] + + pool_padding = update_padding(pool_padding, data_format) + + op_type = "pool3d" + helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) helper.append_op( - type=l_type, + type=op_type, inputs={"X": input}, outputs={"Out": pool_out}, attrs={ @@ -3046,10 +3212,12 @@ def pool3d(input, "global_pooling": global_pooling, "strides": pool_stride, "paddings": pool_padding, + "padding_algorithm": padding_algorithm, "use_cudnn": use_cudnn, "ceil_mode": ceil_mode, "use_mkldnn": False, "exclusive": exclusive, + "data_format": data_format, }) return pool_out diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_op.py b/python/paddle/fluid/tests/unittests/test_pool2d_op.py index 9d501b70942..b2f09b6a135 100644 --- a/python/paddle/fluid/tests/unittests/test_pool2d_op.py +++ b/python/paddle/fluid/tests/unittests/test_pool2d_op.py @@ -20,6 +20,7 @@ import numpy as np import paddle.fluid.core as core from op_test import OpTest +import paddle.fluid as fluid def adaptive_start_index(index, input_size, output_size): @@ -107,7 +108,7 @@ def avg_pool2D_forward_naive(x, x_masked = x[:, :, r_start:r_end, c_start:c_end] field_size = ((r_end - r_start) * (c_end - c_start)) \ - if (exclusive or adaptive) else (ksize[0] * ksize[1]) + if (exclusive or adaptive) else (ksize[0] * ksize[1]) if data_type == np.int8 or data_type == np.uint8: out[:, :, i, j] = (np.rint( np.sum(x_masked, axis=(2, 3)) / @@ -118,26 +119,139 @@ def avg_pool2D_forward_naive(x, return out +def pool2D_forward_naive(x, + ksize, + strides, + paddings, + global_pool=0, + ceil_mode=False, + exclusive=True, + adaptive=False, + data_format='NCHW', + pool_type="max", + padding_algorithm="EXPLICIT"): + + # update paddings + def _get_padding_with_SAME(input_shape, pool_size, pool_stride): + padding = [] + for input_size, filter_size, stride_size in zip(input_shape, pool_size, + pool_stride): + out_size = int((input_size + stride_size - 1) / stride_size) + pad_sum = np.max(( + (out_size - 1) * stride_size + filter_size - input_size, 0)) + pad_0 = int(pad_sum / 2) + pad_1 = int(pad_sum - pad_0) + padding.append(pad_0) + padding.append(pad_1) + return padding + + if isinstance(padding_algorithm, str): + padding_algorithm = padding_algorithm.upper() + if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]: + raise ValueError("Unknown Attr(padding_algorithm): '%s'. " + "It can only be 'SAME' or 'VALID'." % + str(padding_algorithm)) + + if padding_algorithm == "VALID": + paddings = [0, 0, 0, 0] + if ceil_mode != False: + raise ValueError( + "When Attr(pool_padding) is \"VALID\", Attr(ceil_mode)" + " must be False. " + "Received ceil_mode: True.") + elif padding_algorithm == "SAME": + input_data_shape = [] + if data_format == "NCHW": + input_data_shape = x.shape[2:4] + elif data_format == "NHWC": + input_data_shape = x.shape[1:3] + paddings = _get_padding_with_SAME(input_data_shape, ksize, strides) + + assert len(paddings) == 2 or len(paddings) == 4 + is_sys = True if len(paddings) == 2 else False + + N = x.shape[0] + C, H, W = [x.shape[1], x.shape[2], x.shape[3]] if data_format == 'NCHW' \ + else [x.shape[3], x.shape[1], x.shape[2]] + + if global_pool == 1: + ksize = [H, W] + paddings = [0 for _ in range(len(paddings))] + + pad_h_up = paddings[0] if is_sys else paddings[0] + pad_h_down = paddings[0] if is_sys else paddings[1] + pad_w_left = paddings[1] if is_sys else paddings[2] + pad_w_right = paddings[1] if is_sys else paddings[3] + + if adaptive: + H_out, W_out = ksize + else: + H_out = (H - ksize[0] + pad_h_up + pad_h_down + strides[0] - 1) // strides[0] + 1 \ + if ceil_mode else (H - ksize[0] + pad_h_up + pad_h_down) // strides[0] + 1 + W_out = (W - ksize[1] + pad_w_left + pad_w_right + strides[1] - 1) // strides[1] + 1 \ + if ceil_mode else (W - ksize[1] + pad_w_left + pad_w_right) // strides[1] + 1 + + out = np.zeros((N, C, H_out, W_out)) if data_format=='NCHW' \ + else np.zeros((N, H_out, W_out, C)) + for i in range(H_out): + if adaptive: + in_h_start = adaptive_start_index(i, H, ksize[0]) + in_h_end = adaptive_end_index(i, H, ksize[0]) + else: + in_h_start = np.max((i * strides[0] - pad_h_up, 0)) + in_h_end = np.min((i * strides[0] + ksize[0] - pad_h_up, H)) + + for j in range(W_out): + if adaptive: + in_w_start = adaptive_start_index(j, W, ksize[1]) + in_w_end = adaptive_end_index(j, W, ksize[1]) + else: + in_w_start = np.max((j * strides[1] - pad_w_left, 0)) + in_w_end = np.min((j * strides[1] + ksize[1] - pad_w_left, W)) + + if data_format == 'NCHW': + x_masked = x[:, :, in_h_start:in_h_end, in_w_start:in_w_end] + if pool_type == 'avg': + field_size = ((in_h_end - in_h_start) * (in_w_end - in_w_start)) \ + if (exclusive or adaptive) else (ksize[0] * ksize[1]) + out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size + elif pool_type == 'max': + out[:, :, i, j] = np.max(x_masked, axis=(2, 3)) + elif data_format == 'NHWC': + x_masked = x[:, in_h_start:in_h_end, in_w_start:in_w_end, :] + if pool_type == 'avg': + field_size = ((in_h_end - in_h_start) * (in_w_end - in_w_start)) \ + if (exclusive or adaptive) else (ksize[0] * ksize[1]) + out[:, i, j, :] = np.sum(x_masked, axis=(1, 2)) / field_size + elif pool_type == 'max': + out[:, i, j, :] = np.max(x_masked, axis=(1, 2)) + return out + + class TestPool2D_Op(OpTest): def setUp(self): self.op_type = "pool2d" self.use_cudnn = False + self.init_kernel_type() self.use_mkldnn = False self.init_data_type() self.init_test_case() + self.padding_algorithm = "EXPLICIT" + self.init_paddings() self.init_global_pool() self.init_kernel_type() self.init_pool_type() self.init_ceil_mode() self.init_exclusive() self.init_adaptive() - if self.global_pool: - self.paddings = [0 for _ in range(len(self.paddings))] + self.init_data_format() + self.init_shape() + input = np.random.random(self.shape).astype(self.dtype) - output = (self.pool2D_forward_naive( + output = pool2D_forward_naive( input, self.ksize, self.strides, self.paddings, self.global_pool, - self.ceil_mode, self.exclusive, self.adaptive, - self.dtype)).astype(self.dtype) + self.ceil_mode, self.exclusive, self.adaptive, self.data_format, + self.pool_type, self.padding_algorithm).astype(self.dtype) self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)} self.attrs = { @@ -149,10 +263,10 @@ class TestPool2D_Op(OpTest): 'use_cudnn': self.use_cudnn, 'use_mkldnn': self.use_mkldnn, 'ceil_mode': self.ceil_mode, - 'data_format': - 'AnyLayout', # TODO(dzhwinter) : should be fix latter + 'data_format': self.data_format, 'exclusive': self.exclusive, - 'adaptive': self.adaptive + 'adaptive': self.adaptive, + "padding_algorithm": self.padding_algorithm, } self.outputs = {'Out': output} @@ -177,14 +291,22 @@ class TestPool2D_Op(OpTest): elif self.pool_type != "max": self.check_grad(set(['X']), 'Out', max_relative_error=0.07) - def init_test_case(self): + def init_data_format(self): + self.data_format = "NCHW" + + def init_shape(self): self.shape = [2, 3, 5, 5] + + def init_test_case(self): self.ksize = [3, 3] self.strides = [1, 1] + + def init_paddings(self): self.paddings = [0, 0] + self.padding_algorithm = "EXPLICIT" def init_kernel_type(self): - pass + self.use_cudnn = False def init_data_type(self): self.dtype = np.float32 @@ -208,9 +330,10 @@ class TestPool2D_Op(OpTest): class TestCase1(TestPool2D_Op): def init_test_case(self): - self.shape = [2, 3, 7, 7] self.ksize = [3, 3] self.strides = [1, 1] + + def init_paddings(self): self.paddings = [0, 0] def init_pool_type(self): @@ -220,12 +343,16 @@ class TestCase1(TestPool2D_Op): def init_global_pool(self): self.global_pool = False + def init_shape(self): + self.shape = [2, 3, 7, 7] + class TestCase2(TestPool2D_Op): def init_test_case(self): - self.shape = [2, 3, 7, 7] self.ksize = [3, 3] self.strides = [1, 1] + + def init_paddings(self): self.paddings = [1, 1] def init_pool_type(self): @@ -235,6 +362,9 @@ class TestCase2(TestPool2D_Op): def init_global_pool(self): self.global_pool = False + def init_shape(self): + self.shape = [2, 3, 7, 7] + class TestCase3(TestPool2D_Op): def init_pool_type(self): @@ -366,5 +496,715 @@ class TestAvgPoolAdaptive(TestCase1): self.adaptive = True +#-------test pool2d with asymmetric padding----- + + +class TestPool2D_AsyPadding(TestPool2D_Op): + def init_test_case(self): + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [1, 0, 1, 2] + + def init_shape(self): + self.shape = [2, 3, 5, 5] + + +class TestCase1_AsyPadding(TestCase1): + def init_test_case(self): + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [1, 0, 1, 0] + + def init_shape(self): + self.shape = [2, 3, 7, 7] + + +class TestCase2_AsyPadding(TestCase2): + def init_test_case(self): + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [1, 2, 1, 2] + + def init_shape(self): + self.shape = [2, 3, 7, 7] + + +class TestCase3_AsyPadding(TestCase3): + def init_test_case(self): + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [1, 0, 1, 2] + + def init_shape(self): + self.shape = [2, 3, 5, 5] + + +class TestCase4_AsyPadding(TestCase4): + def init_test_case(self): + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [1, 0, 1, 0] + + def init_shape(self): + self.shape = [2, 3, 7, 7] + + +class TestCase5_AsyPadding((TestCase5)): + def init_test_case(self): + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [2, 2, 1, 2] + + def init_shape(self): + self.shape = [2, 3, 7, 7] + + +create_test_cudnn_class(TestPool2D_AsyPadding) +create_test_cudnn_class(TestCase1_AsyPadding) +create_test_cudnn_class(TestCase2_AsyPadding) +create_test_cudnn_class(TestCase3_AsyPadding) +create_test_cudnn_class(TestCase4_AsyPadding) +create_test_cudnn_class(TestCase5_AsyPadding) + +create_test_cudnn_fp16_class(TestPool2D_AsyPadding) +create_test_cudnn_fp16_class(TestCase1_AsyPadding, check_grad=False) +create_test_cudnn_fp16_class(TestCase2_AsyPadding) +create_test_cudnn_fp16_class(TestCase3_AsyPadding) +create_test_cudnn_fp16_class(TestCase4_AsyPadding) +create_test_cudnn_fp16_class(TestCase5_AsyPadding) + +create_test_cudnn_use_ceil_class(TestPool2D_AsyPadding) +create_test_cudnn_use_ceil_class(TestCase1_AsyPadding) + +create_test_use_ceil_class(TestCase1_AsyPadding) +create_test_use_ceil_class(TestCase2_AsyPadding) + + +class TestAvgInclude_AsyPadding(TestCase2): + def init_exclusive(self): + self.exclusive = False + + def init_test_case(self): + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [1, 2, 1, 2] + + def init_shape(self): + self.shape = [2, 3, 7, 7] + + +class TestCUDNNAvgInclude_AsyPadding(TestCase2): + def init_kernel_type(self): + self.use_cudnn = True + + def init_exclusive(self): + self.exclusive = False + + def init_test_case(self): + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [2, 1, 1, 1] + + def init_shape(self): + self.shape = [2, 3, 7, 7] + + +class TestAvgPoolAdaptive_AsyPadding(TestCase1): + def init_adaptive(self): + self.adaptive = True + + def init_test_case(self): + self.ksize = [3, 3] + self.strides = [1, 1] + self.paddings = [1, 1, 0, 2] + + def init_shape(self): + self.shape = [2, 3, 7, 7] + + +#----------- test channel_last -------------- +class TestPool2D_channel_last(TestPool2D_Op): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 5, 5, 3] + + +class TestCase1_channel_last(TestCase1): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 3] + + +class TestCase2_channel_last(TestCase2): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 3] + + +class TestCase3_channel_last(TestCase3): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 5, 5, 3] + + +class TestCase4_channel_last(TestCase4): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 3] + + +class TestCase5_channel_last(TestCase5): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 3] + + +create_test_cudnn_class(TestPool2D_channel_last) +create_test_cudnn_class(TestCase1_channel_last) +create_test_cudnn_class(TestCase2_channel_last) +create_test_cudnn_class(TestCase3_channel_last) +create_test_cudnn_class(TestCase4_channel_last) +create_test_cudnn_class(TestCase5_channel_last) + +create_test_cudnn_fp16_class(TestPool2D_channel_last) +create_test_cudnn_fp16_class(TestCase1_channel_last, check_grad=False) +create_test_cudnn_fp16_class(TestCase2_channel_last) +create_test_cudnn_fp16_class(TestCase3_channel_last) +create_test_cudnn_fp16_class(TestCase4_channel_last) +create_test_cudnn_fp16_class(TestCase5_channel_last) + +create_test_cudnn_use_ceil_class(TestPool2D_channel_last) +create_test_cudnn_use_ceil_class(TestCase1_channel_last) + +create_test_use_ceil_class(TestCase1_channel_last) +create_test_use_ceil_class(TestCase2_channel_last) + + +class TestCase5_Max(TestCase2): + def init_pool_type(self): + self.pool_type = "max" + + def test_check_grad(self): + if self.dtype == np.float16: + return + if self.has_cudnn() and self.pool_type == "max": + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, set(['X']), 'Out', max_relative_error=1.00) + elif self.pool_type == "max": + self.check_grad(set(['X']), 'Out', max_relative_error=1.00) + + +class TestCase5_channel_last_Max(TestCase5_Max): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 3] + + +create_test_cudnn_class(TestCase5_Max) +create_test_cudnn_class(TestCase5_channel_last_Max) + + +class TestAvgInclude_channel_last(TestCase2_channel_last): + def init_exclusive(self): + self.exclusive = False + + +class TestCUDNNAvgInclude_channel_last(TestCase2_channel_last): + def init_kernel_type(self): + self.use_cudnn = True + + def init_exclusive(self): + self.exclusive = False + + +class TestAvgPoolAdaptive_channel_last(TestCase1_channel_last): + def init_adaptive(self): + self.adaptive = True + + +class TestPool2D_AsyPadding_channel_last(TestPool2D_AsyPadding): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 5, 5, 3] + + +class TestCase1_AsyPadding_channel_last(TestCase1_AsyPadding): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 3] + + +class TestCase2_AsyPadding_channel_last(TestCase2_AsyPadding): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 3] + + +class TestCase3_AsyPadding_channel_last(TestCase3_AsyPadding): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 5, 5, 3] + + +class TestCase4_AsyPadding_channel_last(TestCase4_AsyPadding): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 3] + + +class TestCase5_AsyPadding_channel_last(TestCase5_AsyPadding): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 3] + + +create_test_cudnn_class(TestPool2D_AsyPadding_channel_last) +create_test_cudnn_class(TestCase1_AsyPadding_channel_last) +create_test_cudnn_class(TestCase2_AsyPadding_channel_last) +create_test_cudnn_class(TestCase3_AsyPadding_channel_last) +create_test_cudnn_class(TestCase4_AsyPadding_channel_last) +create_test_cudnn_class(TestCase5_AsyPadding_channel_last) + +create_test_cudnn_fp16_class(TestPool2D_AsyPadding_channel_last) +create_test_cudnn_fp16_class( + TestCase1_AsyPadding_channel_last, check_grad=False) +create_test_cudnn_fp16_class(TestCase2_AsyPadding_channel_last) +create_test_cudnn_fp16_class(TestCase3_AsyPadding_channel_last) +create_test_cudnn_fp16_class(TestCase4_AsyPadding_channel_last) +create_test_cudnn_fp16_class(TestCase5_AsyPadding_channel_last) + +create_test_cudnn_use_ceil_class(TestPool2D_AsyPadding_channel_last) +create_test_cudnn_use_ceil_class(TestCase1_AsyPadding_channel_last) + +create_test_use_ceil_class(TestCase1_AsyPadding_channel_last) +create_test_use_ceil_class(TestCase2_AsyPadding_channel_last) + + +class TestAvgInclude_AsyPadding_channel_last(TestAvgInclude_AsyPadding): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 3] + + +class TestCUDNNAvgInclude_AsyPadding_channel_last( + TestCUDNNAvgInclude_AsyPadding): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 3] + + +class TestAvgPoolAdaptive_AsyPadding_channel_last( + TestAvgPoolAdaptive_AsyPadding): + def init_data_format(self): + self.data_format = "NHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 3] + + +# test paddings: SAME VALID + + +def create_test_padding_SAME_class(parent): + class TestPaddingSMAECase(parent): + def init_paddings(self): + self.paddings = [0, 0] + self.padding_algorithm = "SAME" + + cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp") + TestPaddingSMAECase.__name__ = cls_name + globals()[cls_name] = TestPaddingSMAECase + + +create_test_padding_SAME_class(TestPool2D_Op) +create_test_padding_SAME_class(TestCase1) +create_test_padding_SAME_class(TestCase2) +create_test_padding_SAME_class(TestCase3) +create_test_padding_SAME_class(TestCase4) +create_test_padding_SAME_class(TestCase5) + +create_test_padding_SAME_class(TestPool2D_channel_last) +create_test_padding_SAME_class(TestCase1_channel_last) +create_test_padding_SAME_class(TestCase2_channel_last) +create_test_padding_SAME_class(TestCase3_channel_last) +create_test_padding_SAME_class(TestCase4_channel_last) +create_test_padding_SAME_class(TestCase5_channel_last) + + +def create_test_cudnn_padding_SAME_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") + class TestCUDNNPaddingSMAECase(parent): + def init_kernel_type(self): + self.use_cudnn = True + + def init_paddings(self): + self.paddings = [1, 1] + self.padding_algorithm = "SAME" + + cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingSAMEOp") + TestCUDNNPaddingSMAECase.__name__ = cls_name + globals()[cls_name] = TestCUDNNPaddingSMAECase + + +create_test_cudnn_padding_SAME_class(TestPool2D_Op) +create_test_cudnn_padding_SAME_class(TestCase1) +create_test_cudnn_padding_SAME_class(TestCase2) +create_test_cudnn_padding_SAME_class(TestCase3) +create_test_cudnn_padding_SAME_class(TestCase4) +create_test_cudnn_padding_SAME_class(TestCase5) + +create_test_cudnn_padding_SAME_class(TestPool2D_channel_last) +create_test_cudnn_padding_SAME_class(TestCase1_channel_last) +create_test_cudnn_padding_SAME_class(TestCase2_channel_last) +create_test_cudnn_padding_SAME_class(TestCase3_channel_last) +create_test_cudnn_padding_SAME_class(TestCase4_channel_last) +create_test_cudnn_padding_SAME_class(TestCase5_channel_last) + + +def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): + self.paddings = [1, 1] + self.padding_algorithm = "VALID" + + cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp") + TestPaddingVALIDCase.__name__ = cls_name + globals()[cls_name] = TestPaddingVALIDCase + + +create_test_padding_VALID_class(TestPool2D_Op) +create_test_padding_VALID_class(TestCase1) +create_test_padding_VALID_class(TestCase2) +create_test_padding_VALID_class(TestCase3) +create_test_padding_VALID_class(TestCase4) +create_test_padding_VALID_class(TestCase5) + +create_test_padding_VALID_class(TestPool2D_channel_last) +create_test_padding_VALID_class(TestCase1_channel_last) +create_test_padding_VALID_class(TestCase2_channel_last) +create_test_padding_VALID_class(TestCase3_channel_last) +create_test_padding_VALID_class(TestCase4_channel_last) +create_test_padding_VALID_class(TestCase5_channel_last) + + +def create_test_cudnn_padding_VALID_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") + class TestCUDNNPaddingVALIDCase(parent): + def init_kernel_type(self): + self.use_cudnn = True + + def init_paddings(self): + self.paddings = [1, 1] + self.padding_algorithm = "VALID" + + cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingVALIDOp") + TestCUDNNPaddingVALIDCase.__name__ = cls_name + globals()[cls_name] = TestCUDNNPaddingVALIDCase + + +create_test_cudnn_padding_VALID_class(TestPool2D_Op) +create_test_cudnn_padding_VALID_class(TestCase1) +create_test_cudnn_padding_VALID_class(TestCase2) +create_test_cudnn_padding_VALID_class(TestCase3) +create_test_cudnn_padding_VALID_class(TestCase4) +create_test_cudnn_padding_VALID_class(TestCase5) + +create_test_cudnn_padding_VALID_class(TestPool2D_channel_last) +create_test_cudnn_padding_VALID_class(TestCase1_channel_last) +create_test_cudnn_padding_VALID_class(TestCase2_channel_last) +create_test_cudnn_padding_VALID_class(TestCase3_channel_last) +create_test_cudnn_padding_VALID_class(TestCase4_channel_last) +create_test_cudnn_padding_VALID_class(TestCase5_channel_last) + + +# ----- test API +class TestPool2dAPI(OpTest): + def test_api(self): + x_NHWC = np.random.random([2, 5, 5, 3]).astype("float32") + x_NCHW = np.random.random([2, 3, 5, 5]).astype("float32") + + input_NHWC = fluid.layers.data( + name="input_NHWC", + shape=[2, 5, 5, 3], + append_batch_size=False, + dtype="float32") + + input_NCHW = fluid.layers.data( + name="input_NCHW", + shape=[2, 3, 5, 5], + append_batch_size=False, + dtype="float32") + + ksize = [3, 3] + out_1 = fluid.layers.pool2d( + input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[1, 1], + use_cudnn=False, + data_format="NHWC") + + out_2 = fluid.layers.pool2d( + input=input_NHWC, + pool_size=ksize, + pool_type="avg", + pool_padding=[[0, 0], [1, 1], [1, 1], [0, 0]], + use_cudnn=False, + data_format="NHWC") + + out_3 = fluid.layers.pool2d( + input=input_NCHW, + pool_size=ksize, + pool_type="avg", + pool_padding=[[0, 0], [0, 0], [1, 1], [1, 1]], + use_cudnn=False, + data_format="NCHW") + + out_4 = fluid.layers.pool2d( + input=input_NCHW, + pool_size=ksize, + pool_type="avg", + pool_padding=[1, 2, 1, 0], + use_cudnn=False, + data_format="NCHW") + # test VALID + out_5 = fluid.layers.pool2d( + input=input_NCHW, + pool_size=ksize, + pool_type="avg", + pool_padding="VALID", + use_cudnn=False, + data_format="NCHW") + + out_6 = fluid.layers.pool2d( + input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding="VALID", + use_cudnn=False, + data_format="NHWC") + + # test SAME + out_7 = fluid.layers.pool2d( + input=input_NCHW, + pool_size=[4, 4], + pool_type="avg", + pool_padding="SAME", + use_cudnn=False, + data_format="NCHW") + + out_8 = fluid.layers.pool2d( + input=input_NHWC, + pool_size=[4, 4], + pool_type="max", + pool_padding="SAME", + use_cudnn=False, + data_format="NHWC") + + exe = fluid.Executor(place=fluid.CPUPlace()) + [res_1, res_2, res_3, res_4, res_5, res_6, res_7, res_8] = exe.run( + fluid.default_main_program(), + feed={"input_NHWC": x_NHWC, + "input_NCHW": x_NCHW}, + fetch_list=[ + out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8 + ]) + + assert np.allclose( + res_1, + pool2D_forward_naive( + x=x_NHWC, + ksize=ksize, + pool_type="max", + strides=[1, 1], + paddings=[1, 1], + data_format="NHWC")) + + assert np.allclose( + res_2, + pool2D_forward_naive( + x=x_NHWC, + ksize=ksize, + pool_type="avg", + strides=[1, 1], + paddings=[1, 1, 1, 1], + data_format="NHWC")) + assert np.allclose( + res_3, + pool2D_forward_naive( + x=x_NCHW, + ksize=ksize, + pool_type="avg", + strides=[1, 1], + paddings=[1, 1, 1, 1], + data_format="NCHW"), + rtol=0.07, + atol=1e-05) + + assert np.allclose( + res_4, + pool2D_forward_naive( + x=x_NCHW, + ksize=ksize, + pool_type="avg", + strides=[1, 1], + paddings=[1, 2, 1, 0], + data_format="NCHW"), + rtol=0.07, + atol=1e-05) + + # VALID + assert np.allclose( + res_5, + pool2D_forward_naive( + x=x_NCHW, + ksize=ksize, + pool_type="avg", + strides=[1, 1], + paddings=[10, 20], # any ele is ok + padding_algorithm="VALID", + data_format="NCHW"), + rtol=0.07, + atol=1e-05) + assert np.allclose( + res_6, + pool2D_forward_naive( + x=x_NHWC, + ksize=ksize, + pool_type="max", + strides=[1, 1], + paddings=[10, 20], + padding_algorithm="VALID", + data_format="NHWC")) + # SAME + assert np.allclose( + res_7, + pool2D_forward_naive( + x=x_NCHW, + ksize=[4, 4], + pool_type="avg", + strides=[1, 1], + paddings=[10, 20], + padding_algorithm="SAME", + data_format="NCHW"), + rtol=0.07, + atol=1e-05) + + assert np.allclose( + res_8, + pool2D_forward_naive( + x=x_NHWC, + ksize=[4, 4], + pool_type="max", + strides=[1, 1], + paddings=[10, 20], + padding_algorithm="SAME", + data_format="NHWC")) + + +class TestPool2dAPI_Error(OpTest): + def test_api(self): + input_NHWC = fluid.layers.data( + name="input_NHWC", + shape=[2, 5, 5, 3], + append_batch_size=False, + dtype="float32") + ksize = [3, 3] + + # cudnn value error + def run_1(): + out_1 = fluid.layers.pool2d( + input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[1, 1], + use_cudnn=[0], + data_format="NHWC") + + self.assertRaises(ValueError, run_1) + + # data_format value error + def run_2(): + out_2 = fluid.layers.pool2d( + input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[1, 1], + use_cudnn=False, + data_format="NHWCC") + + self.assertRaises(ValueError, run_2) + + # padding str value error + def run_3(): + out_3 = fluid.layers.pool2d( + input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding="VALIDSAME", + use_cudnn=False, + data_format="NHWC") + + self.assertRaises(ValueError, run_3) + + # padding str valid and ceil_mode value error + def run_4(): + out_4 = fluid.layers.pool2d( + input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding="VALID", + use_cudnn=False, + ceil_mode=True, + data_format="NHWC") + + self.assertRaises(ValueError, run_4) + + # padding with 8 ele. value error + def run_5(): + out_5 = fluid.layers.pool2d( + input=input_NHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[[1, 1], [0, 0], [0, 0], [1, 1]], + use_cudnn=False, + data_format="NHWC") + + self.assertRaises(ValueError, run_5) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_pool3d_op.py b/python/paddle/fluid/tests/unittests/test_pool3d_op.py index 5898c5a67ee..6bbc3981b1c 100644 --- a/python/paddle/fluid/tests/unittests/test_pool3d_op.py +++ b/python/paddle/fluid/tests/unittests/test_pool3d_op.py @@ -20,6 +20,7 @@ import numpy as np import paddle.fluid.core as core from op_test import OpTest +import paddle.fluid as fluid def adaptive_start_index(index, input_size, output_size): @@ -30,54 +31,155 @@ def adaptive_end_index(index, input_size, output_size): return int(np.ceil((index + 1) * input_size / output_size)) -def max_pool3D_forward_naive(x, - ksize, - strides, - paddings, - global_pool=0, - ceil_mode=False, - exclusive=True, - adaptive=False): - N, C, D, H, W = x.shape +def pool3D_forward_naive(x, + ksize, + strides, + paddings, + global_pool=0, + ceil_mode=False, + exclusive=True, + adaptive=False, + data_format='NCDHW', + pool_type='max', + padding_algorithm="EXPLICIT"): + # update paddings + def _get_padding_with_SAME(input_shape, pool_size, pool_stride): + padding = [] + for input_size, filter_size, stride_size in zip(input_shape, pool_size, + pool_stride): + out_size = int((input_size + stride_size - 1) / stride_size) + pad_sum = np.max(( + (out_size - 1) * stride_size + filter_size - input_size, 0)) + pad_0 = int(pad_sum / 2) + pad_1 = int(pad_sum - pad_0) + padding.append(pad_0) + padding.append(pad_1) + return padding + + if isinstance(padding_algorithm, str): + padding_algorithm = padding_algorithm.upper() + if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]: + raise ValueError("Unknown Attr(padding_algorithm): '%s'. " + "It can only be 'SAME' or 'VALID'." % + str(padding_algorithm)) + + if padding_algorithm == "VALID": + paddings = [0, 0, 0, 0, 0, 0] + if ceil_mode != False: + raise ValueError( + "When Attr(pool_padding) is \"VALID\", Attr(ceil_mode)" + " must be False. " + "Received ceil_mode: True.") + elif padding_algorithm == "SAME": + input_data_shape = [] + if data_format == "NCDHW": + input_data_shape = x.shape[2:5] + elif data_format == "NDHWC": + input_data_shape = x.shape[1:4] + paddings = _get_padding_with_SAME(input_data_shape, ksize, strides) + + assert len(paddings) == 3 or len(paddings) == 6 + is_sys = True if len(paddings) == 3 else False + + N = x.shape[0] + C,D, H, W = [x.shape[1], x.shape[2], x.shape[3], x.shape[4]] \ + if data_format == 'NCDHW' else [x.shape[4], x.shape[1], x.shape[2],x.shape[3]] + if global_pool == 1: ksize = [D, H, W] + paddings = [0 for _ in range(len(paddings))] + + pad_d_forth = paddings[0] if is_sys else paddings[0] + pad_d_back = paddings[0] if is_sys else paddings[1] + pad_h_up = paddings[1] if is_sys else paddings[2] + pad_h_down = paddings[1] if is_sys else paddings[3] + pad_w_left = paddings[2] if is_sys else paddings[4] + pad_w_right = paddings[2] if is_sys else paddings[5] + if adaptive: D_out, H_out, W_out = ksize else: - D_out = (D - ksize[0] + 2 * paddings[0] + strides[0] - 1 - ) // strides[0] + 1 if ceil_mode else ( - H - ksize[0] + 2 * paddings[0]) // strides[0] + 1 - H_out = (H - ksize[1] + 2 * paddings[1] + strides[1] - 1 - ) // strides[1] + 1 if ceil_mode else ( - W - ksize[1] + 2 * paddings[1]) // strides[1] + 1 - W_out = (W - ksize[2] + 2 * paddings[2] + strides[2] - 1 - ) // strides[2] + 1 if ceil_mode else ( - W - ksize[2] + 2 * paddings[2]) // strides[2] + 1 - out = np.zeros((N, C, D_out, H_out, W_out)) + + D_out = (D - ksize[0] + pad_d_forth+pad_d_back + strides[0] - 1) // strides[0] + 1 \ + if ceil_mode else (D - ksize[0] + pad_d_forth+pad_d_back) // strides[0] + 1 + + H_out = (H - ksize[1] + pad_h_up + pad_h_down + strides[1] - 1) // strides[1] + 1 \ + if ceil_mode else (H - ksize[1] + pad_h_up + pad_h_down) // strides[1] + 1 + + W_out = (W - ksize[2] + pad_w_left + pad_w_right + strides[2] - 1) // strides[2] + 1 \ + if ceil_mode else (W - ksize[2] + pad_w_left + pad_w_right) // strides[2] + 1 + + + out = np.zeros((N, C, D_out, H_out, W_out)) if data_format=='NCDHW' \ + else np.zeros((N, D_out, H_out, W_out, C)) for k in range(D_out): if adaptive: d_start = adaptive_start_index(k, D, ksize[0]) d_end = adaptive_end_index(k, D, ksize[0]) else: - d_start = np.max((k * strides[0] - paddings[0], 0)) - d_end = np.min((k * strides[0] + ksize[0] - paddings[0], D)) + d_start = np.max((k * strides[0] - pad_d_forth, 0)) + d_end = np.min((k * strides[0] + ksize[0] - pad_d_forth, D)) + for i in range(H_out): if adaptive: h_start = adaptive_start_index(i, H, ksize[1]) h_end = adaptive_end_index(i, H, ksize[1]) else: - h_start = np.max((i * strides[1] - paddings[1], 0)) - h_end = np.min((i * strides[1] + ksize[1] - paddings[1], H)) + h_start = np.max((i * strides[1] - pad_h_up, 0)) + h_end = np.min((i * strides[1] + ksize[1] - pad_h_up, H)) + for j in range(W_out): if adaptive: w_start = adaptive_start_index(j, W, ksize[2]) w_end = adaptive_end_index(j, W, ksize[2]) else: - w_start = np.max((j * strides[2] - paddings[2], 0)) - w_end = np.min((j * strides[2] + ksize[2] - paddings[2], W)) - x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start:w_end] + w_start = np.max((j * strides[2] - pad_w_left, 0)) + w_end = np.min((j * strides[2] + ksize[2] - pad_w_left, W)) + + if data_format == 'NCDHW': + x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start: + w_end] + if pool_type == 'avg': + field_size = (d_end - d_start) * (h_end - h_start) * (w_end - w_start) \ + if (exclusive or adaptive) else ksize[0] * ksize[1] * ksize[2] + out[:, :, k, i, j] = np.sum(x_masked, + axis=(2, 3, 4)) / field_size + elif pool_type == 'max': + out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4)) + + elif data_format == 'NDHWC': + x_masked = x[:, d_start:d_end, h_start:h_end, w_start: + w_end, :] + if pool_type == 'avg': + field_size = (d_end - d_start) * (h_end - h_start) * (w_end - w_start) \ + if (exclusive or adaptive) else ksize[0] * ksize[1] * ksize[2] + out[:, k, i, j, :] = np.sum(x_masked, + axis=(1, 2, 3)) / field_size + elif pool_type == 'max': + out[:, k, i, j, :] = np.max(x_masked, axis=(1, 2, 3)) - out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4)) + return out + + +def max_pool3D_forward_naive(x, + ksize, + strides, + paddings, + global_pool=0, + ceil_mode=False, + exclusive=True, + adaptive=False): + out = pool3D_forward_naive( + x=x, + ksize=ksize, + strides=strides, + paddings=paddings, + global_pool=global_pool, + ceil_mode=ceil_mode, + exclusive=exclusive, + adaptive=adaptive, + data_format='NCDHW', + pool_type="max") return out @@ -89,56 +191,24 @@ def avg_pool3D_forward_naive(x, ceil_mode=False, exclusive=True, adaptive=False): - N, C, D, H, W = x.shape - if global_pool == 1: - ksize = [D, H, W] - if adaptive: - D_out, H_out, W_out = ksize - else: - D_out = (D - ksize[0] + 2 * paddings[0] + strides[0] - 1 - ) // strides[0] + 1 if ceil_mode else ( - H - ksize[0] + 2 * paddings[0]) // strides[0] + 1 - H_out = (H - ksize[1] + 2 * paddings[1] + strides[1] - 1 - ) // strides[1] + 1 if ceil_mode else ( - W - ksize[1] + 2 * paddings[1]) // strides[1] + 1 - W_out = (W - ksize[2] + 2 * paddings[2] + strides[2] - 1 - ) // strides[2] + 1 if ceil_mode else ( - W - ksize[2] + 2 * paddings[2]) // strides[2] + 1 - out = np.zeros((N, C, D_out, H_out, W_out)) - for k in range(D_out): - if adaptive: - d_start = adaptive_start_index(k, D, ksize[0]) - d_end = adaptive_end_index(k, D, ksize[0]) - else: - d_start = np.max((k * strides[0] - paddings[0], 0)) - d_end = np.min((k * strides[0] + ksize[0] - paddings[0], D)) - for i in range(H_out): - if adaptive: - h_start = adaptive_start_index(i, H, ksize[1]) - h_end = adaptive_end_index(i, H, ksize[1]) - else: - h_start = np.max((i * strides[1] - paddings[1], 0)) - h_end = np.min((i * strides[1] + ksize[1] - paddings[1], H)) - for j in range(W_out): - if adaptive: - w_start = adaptive_start_index(j, W, ksize[2]) - w_end = adaptive_end_index(j, W, ksize[2]) - else: - w_start = np.max((j * strides[2] - paddings[2], 0)) - w_end = np.min((j * strides[2] + ksize[2] - paddings[2], W)) - x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start:w_end] - - field_size = (d_end - d_start) * (h_end - h_start) * (w_end - w_start) \ - if (exclusive or adaptive) else ksize[0] * ksize[1] * ksize[2] - out[:, :, k, i, j] = np.sum(x_masked, axis=(2, 3, - 4)) / field_size + out = pool3D_forward_naive( + x=x, + ksize=ksize, + strides=strides, + paddings=paddings, + global_pool=global_pool, + ceil_mode=ceil_mode, + exclusive=exclusive, + adaptive=adaptive, + data_format='NCDHW', + pool_type="avg") return out class TestPool3d_Op(OpTest): def setUp(self): self.op_type = "pool3d" - self.use_cudnn = False + self.init_kernel_type() self.dtype = np.float32 self.init_test_case() self.init_global_pool() @@ -147,13 +217,15 @@ class TestPool3d_Op(OpTest): self.init_ceil_mode() self.init_exclusive() self.init_adaptive() + self.init_data_format() + self.init_shape() - if self.global_pool: - self.paddings = [0 for _ in range(len(self.paddings))] input = np.random.random(self.shape).astype(self.dtype) - output = self.pool3D_forward_naive( + output = pool3D_forward_naive( input, self.ksize, self.strides, self.paddings, self.global_pool, - self.ceil_mode, self.exclusive, self.adaptive).astype(self.dtype) + self.ceil_mode, self.exclusive, self.adaptive, self.data_format, + self.pool_type).astype(self.dtype) + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)} self.attrs = { @@ -164,8 +236,7 @@ class TestPool3d_Op(OpTest): 'global_pooling': self.global_pool, 'use_cudnn': self.use_cudnn, 'ceil_mode': self.ceil_mode, - 'data_format': - 'AnyLayout', # TODO(dzhwinter) : should be fix latter + 'data_format': self.data_format, 'exclusive': self.exclusive, 'adaptive': self.adaptive } @@ -192,18 +263,23 @@ class TestPool3d_Op(OpTest): elif self.pool_type != "max": self.check_grad(set(['X']), 'Out', max_relative_error=0.07) - def init_test_case(self): + def init_data_format(self): + self.data_format = "NCDHW" + + def init_shape(self): self.shape = [2, 3, 5, 5, 5] + + def init_test_case(self): self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [0, 0, 0] def init_kernel_type(self): - pass + self.use_cudnn = False + #pass def init_pool_type(self): self.pool_type = "avg" - self.pool3D_forward_naive = avg_pool3D_forward_naive def init_global_pool(self): self.global_pool = True @@ -219,30 +295,32 @@ class TestPool3d_Op(OpTest): class TestCase1(TestPool3d_Op): - def init_test_case(self): + def init_shape(self): self.shape = [2, 3, 7, 7, 7] + + def init_test_case(self): self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [0, 0, 0] def init_pool_type(self): self.pool_type = "avg" - self.pool3D_forward_naive = avg_pool3D_forward_naive def init_global_pool(self): self.global_pool = False class TestCase2(TestPool3d_Op): - def init_test_case(self): + def init_shape(self): self.shape = [2, 3, 7, 7, 7] + + def init_test_case(self): self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [1, 1, 1] def init_pool_type(self): self.pool_type = "avg" - self.pool3D_forward_naive = avg_pool3D_forward_naive def init_global_pool(self): self.global_pool = False @@ -251,158 +329,824 @@ class TestCase2(TestPool3d_Op): class TestCase3(TestPool3d_Op): def init_pool_type(self): self.pool_type = "max" - self.pool3D_forward_naive = max_pool3D_forward_naive class TestCase4(TestCase1): def init_pool_type(self): self.pool_type = "max" - self.pool3D_forward_naive = max_pool3D_forward_naive class TestCase5(TestCase2): def init_pool_type(self): self.pool_type = "max" - self.pool3D_forward_naive = max_pool3D_forward_naive -#--------------------test pool3d-------------------- -class TestCUDNNCase1(TestPool3d_Op): - def init_kernel_type(self): - self.use_cudnn = True +#--------------------test pool3d cudnn-------------------- -class TestFP16CUDNNCase1(TestPool3d_Op): - def init_kernel_type(self): - self.use_cudnn = True - self.dtype = np.float16 +def create_test_cudnn_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") + class TestCUDNNCase(parent): + def init_kernel_type(self): + self.use_cudnn = True - def test_check_output(self): - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - if core.is_float16_supported(place): - self.check_output_with_place(place, atol=1e-3) + cls_name = "{0}_{1}".format(parent.__name__, "CUDNNOp") + TestCUDNNCase.__name__ = cls_name + globals()[cls_name] = TestCUDNNCase -class TestCUDNNCase2(TestCase1): - def init_kernel_type(self): - self.use_cudnn = True +create_test_cudnn_class(TestPool3d_Op) +create_test_cudnn_class(TestCase1) +create_test_cudnn_class(TestCase2) +create_test_cudnn_class(TestCase3) +create_test_cudnn_class(TestCase4) +create_test_cudnn_class(TestCase5) -class TestFP16CUDNNCase2(TestCase1): - def init_kernel_type(self): - self.use_cudnn = True - self.dtype = np.float16 +def create_test_cudnn_fp16_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") + class TestCUDNNFp16Case(parent): + def init_kernel_type(self): + self.use_cudnn = True + self.dtype = np.float16 - def test_check_output(self): - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - if core.is_float16_supported(place): - self.check_output_with_place(place, atol=1e-3) + def test_check_output(self): + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + if core.is_float16_supported(place): + self.check_output_with_place(place, atol=1e-3) + cls_name = "{0}_{1}".format(parent.__name__, "CUDNNFp16Op") + TestCUDNNFp16Case.__name__ = cls_name + globals()[cls_name] = TestCUDNNFp16Case -class TestCUDNNCase3(TestCase2): - def init_kernel_type(self): - self.use_cudnn = True +create_test_cudnn_fp16_class(TestPool3d_Op) +create_test_cudnn_fp16_class(TestCase1) +create_test_cudnn_fp16_class(TestCase2) +create_test_cudnn_fp16_class(TestCase3) +create_test_cudnn_fp16_class(TestCase4) +create_test_cudnn_fp16_class(TestCase5) -class TestFP16CUDNNCase3(TestCase2): - def init_kernel_type(self): - self.use_cudnn = True - self.dtype = np.float16 - def test_check_output(self): - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - if core.is_float16_supported(place): - self.check_output_with_place(place, atol=1e-3) +# ---- test ceil mode ------ +def create_test_cudnn_use_ceil_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") + class TestPool3DUseCeilCase(parent): + def init_kernel_type(self): + self.use_cudnn = True + def init_ceil_mode(self): + self.ceil_mode = True -class TestCUDNNCase4(TestCase3): - def init_kernel_type(self): - self.use_cudnn = True + cls_name = "{0}_{1}".format(parent.__name__, "CUDNNOpCeilMode") + TestPool3DUseCeilCase.__name__ = cls_name + globals()[cls_name] = TestPool3DUseCeilCase -class TestFP16CUDNNCase4(TestCase3): - def init_kernel_type(self): - self.use_cudnn = True - self.dtype = np.float16 +create_test_cudnn_use_ceil_class(TestPool3d_Op) +create_test_cudnn_use_ceil_class(TestCase1) - def test_check_output(self): - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - if core.is_float16_supported(place): - self.check_output_with_place(place, atol=1e-3) +def create_test_use_ceil_class(parent): + class TestPool3DUseCeilCase(parent): + def init_ceil_mode(self): + self.ceil_mode = True -class TestCUDNNCase5(TestCase4): - def init_kernel_type(self): - self.use_cudnn = True + cls_name = "{0}_{1}".format(parent.__name__, "CeilModeCast") + TestPool3DUseCeilCase.__name__ = cls_name + globals()[cls_name] = TestPool3DUseCeilCase -class TestFP16CUDNNCase5(TestCase4): - def init_kernel_type(self): - self.use_cudnn = True - self.dtype = np.float16 +create_test_use_ceil_class(TestCase1) +create_test_use_ceil_class(TestCase2) - def test_check_output(self): - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - if core.is_float16_supported(place): - self.check_output_with_place(place, atol=1e-3) + +class TestAvgInclude(TestCase2): + def init_exclusive(self): + self.exclusive = False -class TestCUDNNCase6(TestCase5): +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNAvgInclude(TestCase2): def init_kernel_type(self): self.use_cudnn = True + def init_exclusive(self): + self.exclusive = False + + +class TestAvgPoolAdaptive(TestCase1): + def init_adaptive(self): + self.adaptive = True + + +#-------test pool3d with asymmetric padding------ + + +class TestPool3d_Op_AsyPadding(TestPool3d_Op): + def init_test_case(self): + self.ksize = [3, 3, 3] + self.strides = [1, 1, 1] + self.paddings = [0, 0, 0, 2, 3, 0] + + def init_shape(self): + self.shape = [2, 3, 5, 5, 5] + + +class TestCase1_AsyPadding(TestCase1): + def init_test_case(self): + self.ksize = [3, 3, 3] + self.strides = [1, 1, 1] + self.paddings = [1, 0, 2, 1, 2, 1] + + def init_shape(self): + self.shape = [2, 3, 7, 7, 7] + + +class TestCase2_AsyPadding(TestCase2): + def init_test_case(self): + self.ksize = [3, 3, 3] + self.strides = [1, 1, 1] + self.paddings = [1, 2, 1, 1, 1, 0] + + def init_shape(self): + self.shape = [2, 3, 7, 7, 7] + + +class TestCase3_AsyPadding(TestCase3): + def init_test_case(self): + self.ksize = [3, 3, 3] + self.strides = [1, 1, 1] + self.paddings = [1, 0, 0, 0, 1, 0] + + def init_shape(self): + self.shape = [2, 3, 5, 5, 5] -class TestFP16CUDNNCase6(TestCase5): + +class TestCase4_AsyPadding(TestCase4): + def init_test_case(self): + self.ksize = [3, 3, 3] + self.strides = [1, 1, 1] + self.paddings = [1, 0, 2, 1, 2, 1] + + def init_shape(self): + self.shape = [2, 3, 7, 7, 7] + + +class TestCase5_AsyPadding(TestCase5): + def init_test_case(self): + self.shape = [2, 7, 7, 7, 3] + self.ksize = [3, 3, 3] + self.strides = [1, 1, 1] + self.paddings = [1, 2, 1, 1, 1, 0] + + def init_shape(self): + self.shape = [2, 3, 7, 7, 7] + + +create_test_cudnn_class(TestPool3d_Op_AsyPadding) +create_test_cudnn_class(TestCase1_AsyPadding) +create_test_cudnn_class(TestCase2_AsyPadding) +create_test_cudnn_class(TestCase3_AsyPadding) +create_test_cudnn_class(TestCase4_AsyPadding) +create_test_cudnn_class(TestCase5_AsyPadding) + +create_test_cudnn_fp16_class(TestPool3d_Op_AsyPadding) +create_test_cudnn_fp16_class(TestCase1_AsyPadding) +create_test_cudnn_fp16_class(TestCase2_AsyPadding) +create_test_cudnn_fp16_class(TestCase3_AsyPadding) +create_test_cudnn_fp16_class(TestCase4_AsyPadding) +create_test_cudnn_fp16_class(TestCase5_AsyPadding) + +create_test_cudnn_use_ceil_class(TestPool3d_Op_AsyPadding) +create_test_cudnn_use_ceil_class(TestCase1_AsyPadding) + +create_test_use_ceil_class(TestCase1_AsyPadding) +create_test_use_ceil_class(TestCase2_AsyPadding) + + +class TestAvgInclude_AsyPadding(TestCase2): + def init_exclusive(self): + self.exclusive = False + + def init_test_case(self): + self.ksize = [3, 3, 3] + self.strides = [1, 1, 1] + self.paddings = [1, 2, 1, 1, 1, 0] + + def init_shape(self): + self.shape = [2, 3, 7, 7, 7] + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNAvgInclude_AsyPadding(TestCase2): def init_kernel_type(self): self.use_cudnn = True - self.dtype = np.float16 - def test_check_output(self): - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - if core.is_float16_supported(place): - self.check_output_with_place(place, atol=1e-3) + def init_exclusive(self): + self.exclusive = False + def init_test_case(self): + self.ksize = [3, 3, 3] + self.strides = [1, 1, 1] + self.paddings = [1, 0, 0, 0, 0, 0] -class TestCeilModeCase1(TestCUDNNCase1): - def init_ceil_mode(self): - self.ceil_mode = True + def init_shape(self): + self.shape = [2, 3, 5, 5, 5] -class TestCeilModeCase2(TestCUDNNCase2): - def init_ceil_mode(self): - self.ceil_mode = True +class TestAvgPoolAdaptive_AsyPadding(TestCase1): + def init_adaptive(self): + self.adaptive = True + def init_test_case(self): + self.ksize = [3, 3, 3] + self.strides = [1, 1, 1] + self.paddings = [1, 0, 2, 1, 2, 1] -class TestCeilModeCase3(TestCase1): - def init_ceil_mode(self): - self.ceil_mode = True + def init_shape(self): + self.shape = [2, 3, 7, 7, 7] -class TestCeilModeCase4(TestCase2): - def init_ceil_mode(self): - self.ceil_mode = True +# ------------ test channel_last -------------- +class TestPool3d_channel_last(TestPool3d_Op): + def init_data_format(self): + self.data_format = "NDHWC" + def init_shape(self): + self.shape = [2, 5, 5, 5, 3] -class TestAvgInclude(TestCase2): + +class TestCase1_channel_last(TestCase1): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 7, 3] + + +class TestCase2_channel_last(TestCase2): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 7, 3] + + +class TestCase3_channel_last(TestCase3): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 5, 5, 5, 3] + + +class TestCase4_channel_last(TestCase4): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 7, 3] + + +class TestCase5_channel_last(TestCase5): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 7, 3] + + +create_test_cudnn_class(TestPool3d_channel_last) +create_test_cudnn_class(TestCase1_channel_last) +create_test_cudnn_class(TestCase2_channel_last) +create_test_cudnn_class(TestCase3_channel_last) +create_test_cudnn_class(TestCase4_channel_last) +create_test_cudnn_class(TestCase5_channel_last) + +create_test_cudnn_use_ceil_class(TestPool3d_channel_last) +create_test_cudnn_use_ceil_class(TestCase1_channel_last) + +create_test_use_ceil_class(TestCase1_channel_last) +create_test_use_ceil_class(TestCase2_channel_last) + + +class TestCase5_Max(TestCase2): + def init_pool_type(self): + self.pool_type = "max" + + def test_check_grad(self): + if self.dtype == np.float16: + return + if self.has_cudnn() and self.pool_type == "max": + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, set(['X']), 'Out', max_relative_error=1.00) + elif self.pool_type == "max": + self.check_grad(set(['X']), 'Out', max_relative_error=1.00) + + +class TestCase5_channel_last_Max(TestCase5_Max): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 7, 3] + + +create_test_cudnn_class(TestCase5_Max) +create_test_cudnn_class(TestCase5_channel_last_Max) + + +class TestAvgInclude_channel_last(TestCase2_channel_last): def init_exclusive(self): self.exclusive = False -class TestCUDNNAvgInclude(TestCUDNNCase3): +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNAvgInclude_channel_last(TestCase2_channel_last): + def init_kernel_type(self): + self.use_cudnn = True + def init_exclusive(self): self.exclusive = False -class TestAvgPoolAdaptive(TestCase1): +class TestAvgPoolAdaptive_channel_last(TestCase1_channel_last): def init_adaptive(self): self.adaptive = True +# --- asy padding +class TestPool3d_Op_AsyPadding_channel_last(TestPool3d_Op_AsyPadding): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 5, 5, 5, 3] + + +class TestCase1_AsyPadding_channel_last(TestCase1_AsyPadding): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 7, 3] + + +class TestCase2_AsyPadding_channel_last(TestCase2_AsyPadding): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 7, 3] + + +class TestCase3_AsyPadding_channel_last(TestCase3_AsyPadding): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 5, 5, 5, 3] + + +class TestCase4_AsyPadding_channel_last(TestCase4_AsyPadding): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 7, 3] + + +class TestCase5_AsyPadding_channel_last(TestCase5_AsyPadding): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 7, 3] + + +create_test_cudnn_class(TestPool3d_Op_AsyPadding_channel_last) +create_test_cudnn_class(TestCase1_AsyPadding_channel_last) +create_test_cudnn_class(TestCase2_AsyPadding_channel_last) +create_test_cudnn_class(TestCase3_AsyPadding_channel_last) +create_test_cudnn_class(TestCase4_AsyPadding_channel_last) +create_test_cudnn_class(TestCase5_AsyPadding_channel_last) + +create_test_cudnn_use_ceil_class(TestPool3d_Op_AsyPadding_channel_last) +create_test_cudnn_use_ceil_class(TestCase1_AsyPadding_channel_last) + +create_test_use_ceil_class(TestCase1_AsyPadding_channel_last) +create_test_use_ceil_class(TestCase2_AsyPadding_channel_last) + + +class TestAvgInclude_AsyPadding_channel_last(TestAvgInclude_AsyPadding): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 7, 3] + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNAvgInclude_AsyPadding_channel_last( + TestCUDNNAvgInclude_AsyPadding): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 5, 5, 5, 3] + + +class TestAvgPoolAdaptive_AsyPadding_channel_last( + TestAvgPoolAdaptive_AsyPadding): + def init_data_format(self): + self.data_format = "NDHWC" + + def init_shape(self): + self.shape = [2, 7, 7, 7, 3] + + +#test padding = SAME VALID + + +def create_test_padding_SAME_class(parent): + class TestPaddingSMAECase(parent): + def init_paddings(self): + self.paddings = [0, 0] + self.padding_algorithm = "SAME" + + cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp") + TestPaddingSMAECase.__name__ = cls_name + globals()[cls_name] = TestPaddingSMAECase + + +create_test_padding_SAME_class(TestPool3d_Op) +create_test_padding_SAME_class(TestCase1) +create_test_padding_SAME_class(TestCase2) +create_test_padding_SAME_class(TestCase3) +create_test_padding_SAME_class(TestCase4) +create_test_padding_SAME_class(TestCase5) + +create_test_padding_SAME_class(TestPool3d_channel_last) +create_test_padding_SAME_class(TestCase1_channel_last) +create_test_padding_SAME_class(TestCase2_channel_last) +create_test_padding_SAME_class(TestCase3_channel_last) +create_test_padding_SAME_class(TestCase4_channel_last) +create_test_padding_SAME_class(TestCase5_channel_last) + + +def create_test_cudnn_padding_SAME_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") + class TestCUDNNPaddingSMAECase(parent): + def init_kernel_type(self): + self.use_cudnn = True + + def init_paddings(self): + self.paddings = [1, 1] + self.padding_algorithm = "SAME" + + cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingSAMEOp") + TestCUDNNPaddingSMAECase.__name__ = cls_name + globals()[cls_name] = TestCUDNNPaddingSMAECase + + +create_test_cudnn_padding_SAME_class(TestPool3d_Op) +create_test_cudnn_padding_SAME_class(TestCase1) +create_test_cudnn_padding_SAME_class(TestCase2) +create_test_cudnn_padding_SAME_class(TestCase3) +create_test_cudnn_padding_SAME_class(TestCase4) +create_test_cudnn_padding_SAME_class(TestCase5) + +create_test_cudnn_padding_SAME_class(TestPool3d_channel_last) +create_test_cudnn_padding_SAME_class(TestCase1_channel_last) +create_test_cudnn_padding_SAME_class(TestCase2_channel_last) +create_test_cudnn_padding_SAME_class(TestCase3_channel_last) +create_test_cudnn_padding_SAME_class(TestCase4_channel_last) +create_test_cudnn_padding_SAME_class(TestCase5_channel_last) + + +def create_test_padding_VALID_class(parent): + class TestPaddingVALIDCase(parent): + def init_paddings(self): + self.paddings = [1, 1] + self.padding_algorithm = "VALID" + + cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp") + TestPaddingVALIDCase.__name__ = cls_name + globals()[cls_name] = TestPaddingVALIDCase + + +create_test_padding_VALID_class(TestPool3d_Op) +create_test_padding_VALID_class(TestCase1) +create_test_padding_VALID_class(TestCase2) +create_test_padding_VALID_class(TestCase3) +create_test_padding_VALID_class(TestCase4) +create_test_padding_VALID_class(TestCase5) + +create_test_padding_VALID_class(TestPool3d_channel_last) +create_test_padding_VALID_class(TestCase1_channel_last) +create_test_padding_VALID_class(TestCase2_channel_last) +create_test_padding_VALID_class(TestCase3_channel_last) +create_test_padding_VALID_class(TestCase4_channel_last) +create_test_padding_VALID_class(TestCase5_channel_last) + + +def create_test_cudnn_padding_VALID_class(parent): + @unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") + class TestCUDNNPaddingVALIDCase(parent): + def init_kernel_type(self): + self.use_cudnn = True + + def init_paddings(self): + self.paddings = [1, 1] + self.padding_algorithm = "VALID" + + cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingVALIDOp") + TestCUDNNPaddingVALIDCase.__name__ = cls_name + globals()[cls_name] = TestCUDNNPaddingVALIDCase + + +create_test_cudnn_padding_VALID_class(TestPool3d_Op) +create_test_cudnn_padding_VALID_class(TestCase1) +create_test_cudnn_padding_VALID_class(TestCase2) +create_test_cudnn_padding_VALID_class(TestCase3) +create_test_cudnn_padding_VALID_class(TestCase4) +create_test_cudnn_padding_VALID_class(TestCase5) + +create_test_cudnn_padding_VALID_class(TestPool3d_channel_last) +create_test_cudnn_padding_VALID_class(TestCase1_channel_last) +create_test_cudnn_padding_VALID_class(TestCase2_channel_last) +create_test_cudnn_padding_VALID_class(TestCase3_channel_last) +create_test_cudnn_padding_VALID_class(TestCase4_channel_last) +create_test_cudnn_padding_VALID_class(TestCase5_channel_last) + + +#test API +class TestPool3dAPI(OpTest): + def test_api(self): + x_NDHWC = np.random.random([2, 5, 5, 5, 3]).astype("float32") + x_NCDHW = np.random.random([2, 3, 5, 5, 5]).astype("float32") + + input_NDHWC = fluid.layers.data( + name="input_NDHWC", + shape=[2, 5, 5, 5, 3], + append_batch_size=False, + dtype="float32") + + input_NCDHW = fluid.layers.data( + name="input_NCDHW", + shape=[2, 3, 5, 5, 5], + append_batch_size=False, + dtype="float32") + + ksize = [3, 3, 3] + out_1 = fluid.layers.pool3d( + input=input_NDHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[1, 1, 1], + use_cudnn=False, + data_format="NDHWC") + + out_2 = fluid.layers.pool3d( + input=input_NDHWC, + pool_size=ksize, + pool_type="avg", + pool_padding=[[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]], + use_cudnn=False, + data_format="NDHWC") + + out_3 = fluid.layers.pool3d( + input=input_NCDHW, + pool_size=ksize, + pool_type="avg", + pool_padding=[[0, 0], [0, 0], [1, 1], [1, 1], [1, 1]], + use_cudnn=False, + data_format="NCDHW") + + out_4 = fluid.layers.pool3d( + input=input_NCDHW, + pool_size=ksize, + pool_type="avg", + pool_padding=[1, 2, 1, 0, 0, 1], + use_cudnn=False, + data_format="NCDHW") + # test VALID + out_5 = fluid.layers.pool3d( + input=input_NDHWC, + pool_size=ksize, + pool_type="avg", + pool_padding="VALID", + use_cudnn=False, + data_format="NDHWC") + + out_6 = fluid.layers.pool3d( + input=input_NCDHW, + pool_size=ksize, + pool_type="avg", + pool_padding="VALID", + use_cudnn=False, + data_format="NCDHW") + + # test SAME + out_7 = fluid.layers.pool3d( + input=input_NDHWC, + pool_size=ksize, + pool_type="avg", + pool_padding="SAME", + use_cudnn=False, + data_format="NDHWC") + + out_8 = fluid.layers.pool3d( + input=input_NCDHW, + pool_size=[4, 4, 4], + pool_type="avg", + pool_padding="SAME", + use_cudnn=False, + data_format="NCDHW") + + exe = fluid.Executor(place=fluid.CPUPlace()) + [res_1, res_2, res_3, res_4, res_5, res_6, res_7, res_8] = exe.run( + fluid.default_main_program(), + feed={"input_NDHWC": x_NDHWC, + "input_NCDHW": x_NCDHW}, + fetch_list=[ + out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8 + ]) + + assert np.allclose( + res_1, + pool3D_forward_naive( + x=x_NDHWC, + ksize=ksize, + pool_type="max", + strides=[1, 1, 1], + paddings=[1, 1, 1], + data_format="NDHWC")) + + assert np.allclose( + res_2, + pool3D_forward_naive( + x=x_NDHWC, + ksize=ksize, + pool_type="avg", + strides=[1, 1, 1], + paddings=[1, 1, 1, 1, 1, 1], + data_format="NDHWC")) + assert np.allclose( + res_3, + pool3D_forward_naive( + x=x_NCDHW, + ksize=ksize, + pool_type="avg", + strides=[1, 1, 1], + paddings=[1, 1, 1, 1, 1, 1], + data_format="NCDHW"), + rtol=0.07, + atol=1e-05) + + assert np.allclose( + res_4, + pool3D_forward_naive( + x=x_NCDHW, + ksize=ksize, + pool_type="avg", + strides=[1, 1, 1], + paddings=[1, 2, 1, 0, 0, 1], + data_format="NCDHW"), + rtol=0.07, + atol=1e-05) + # VALID + assert np.allclose( + res_5, + pool3D_forward_naive( + x=x_NDHWC, + ksize=ksize, + pool_type="avg", + strides=[1, 1, 1], + paddings=[10, 20], + padding_algorithm="VALID", + data_format="NDHWC")) + + assert np.allclose( + res_6, + pool3D_forward_naive( + x=x_NCDHW, + ksize=ksize, + pool_type="avg", + strides=[1, 1, 1], + paddings=[10, 20], + padding_algorithm="VALID", + data_format="NCDHW"), + rtol=0.07, + atol=1e-05) + # SAME + assert np.allclose( + res_7, + pool3D_forward_naive( + x=x_NDHWC, + ksize=ksize, + pool_type="avg", + strides=[1, 1, 1], + paddings=[10, 20], + padding_algorithm="SAME", + data_format="NDHWC")) + + assert np.allclose( + res_8, + pool3D_forward_naive( + x=x_NCDHW, + ksize=[4, 4, 4], + pool_type="avg", + strides=[1, 1, 1], + paddings=[10, 20], + padding_algorithm="SAME", + data_format="NCDHW"), + rtol=0.07, + atol=1e-05) + + +class TestPool3dAPI_Error(OpTest): + def test_api(self): + input_NDHWC = fluid.layers.data( + name="input_NDHWC", + shape=[2, 5, 5, 5, 3], + append_batch_size=False, + dtype="float32") + ksize = [3, 3, 3] + + # cudnn value error + def run_1(): + out_1 = fluid.layers.pool3d( + input=input_NDHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[1, 1, 1], + use_cudnn=[0], + data_format="NDHWC") + + self.assertRaises(ValueError, run_1) + + # data_format value error + def run_2(): + out_2 = fluid.layers.pool3d( + input=input_NDHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[1, 1, 1], + use_cudnn=False, + data_format="NDHWCC") + + self.assertRaises(ValueError, run_2) + + # padding str value error + def run_3(): + out_3 = fluid.layers.pool3d( + input=input_NDHWC, + pool_size=ksize, + pool_type="max", + pool_padding="VALIDSAME", + use_cudnn=False, + data_format="NDHWC") + + self.assertRaises(ValueError, run_3) + + # padding str valid and ceil_mode value error + def run_4(): + out_4 = fluid.layers.pool3d( + input=input_NDHWC, + pool_size=ksize, + pool_type="max", + pool_padding="VALID", + use_cudnn=False, + ceil_mode=True, + data_format="NDHWC") + + self.assertRaises(ValueError, run_4) + + # padding with 8 ele. value error + def run_5(): + out_5 = fluid.layers.pool3d( + input=input_NDHWC, + pool_size=ksize, + pool_type="max", + pool_padding=[[1, 1], [0, 0], [0, 0], [1, 1], [1, 1]], + use_cudnn=False, + data_format="NDHWC") + + self.assertRaises(ValueError, run_5) + + if __name__ == '__main__': unittest.main() -- GitLab