提交 24010472 编写于 作者: L liym27 提交者: Tao Luo

fix pool2d pool3d,support asymmetric padding and channel_last (#19739)

* fix pool2d pool3d:
1. support asymmetric padding;
2. support padding algorithm:"SAME" and "VALID";
3. support channel_last: data_format NHWC and NDHWC;
4. support inferring shape when input with negative dims in compile time;
5. change doc of python API and c++;
6. fix bug in cuda kernel when Attr(adaptive) is true.

test=develop,test=document_preview

* fix 'tensors' to 'Tensors'. test=develop,test=document_preview

* add test for converage ValueError.test=develop,test=document_preview

* resolve conflict in test_pool2d. test=develop
上级 fe581b0e
......@@ -144,8 +144,8 @@ paddle.fluid.layers.conv3d (ArgSpec(args=['input', 'num_filters', 'filter_size',
paddle.fluid.layers.sequence_pool (ArgSpec(args=['input', 'pool_type', 'is_test', 'pad_value'], varargs=None, keywords=None, defaults=(False, 0.0)), ('document', 'e90a93251c52dc4e6fb34fb3991b3f82'))
paddle.fluid.layers.sequence_softmax (ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', 'eaa9d0bbd3d4e017c8bc4ecdac483711'))
paddle.fluid.layers.softmax (ArgSpec(args=['input', 'use_cudnn', 'name', 'axis'], varargs=None, keywords=None, defaults=(False, None, -1)), ('document', 'cee673c79e3ff4582656a24e04f841e5'))
paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', 'be7e530dcbd603962e25573a63eb145e'))
paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)), ('document', '053b1a855f13a066d005759171724bc6'))
paddle.fluid.layers.pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive', 'data_format'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True, 'NCHW')), ('document', '630cae697d46b4b575b15d56cf8be25a'))
paddle.fluid.layers.pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive', 'data_format'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True, 'NCDHW')), ('document', 'db0035a3132b1dfb12e53c57591fb9f6'))
paddle.fluid.layers.adaptive_pool2d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '52343203de40afe29607397e13aaf0d2'))
paddle.fluid.layers.adaptive_pool3d (ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)), ('document', '55db6ae7275fb9678a6814aebab81a9c'))
paddle.fluid.layers.batch_norm (ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)), ('document', '9e5a9f4f6d82d34a33d9ca632379cbcc'))
......
......@@ -13,17 +13,21 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/pooling.h"
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/operators/math/math_function.h"
namespace paddle {
namespace operators {
namespace math {
/*
* All tensors are in NCHW format.
* Ksize, strides, paddings are two elements. These two elements represent
* height and width, respectively.
*/
* Tensors are in NCHW or NHWC format.
* Ksize, strides are two elements. These two elements represent height
* and width, respectively.
* Paddings are four elements. These four elements represent height_up,
* height_down, width_left and width_right, respectively.
*/
template <typename PoolProcess, typename T>
class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
public:
......@@ -92,12 +96,137 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
}
}
}
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string data_format, PoolProcess pool_process,
bool exclusive, bool adaptive, framework::Tensor* output) {
bool channel_last = (data_format == "NHWC");
const int batch_size = input.dims()[0];
const int input_channels = channel_last ? input.dims()[3] : input.dims()[1];
const int input_height = channel_last ? input.dims()[1] : input.dims()[2];
const int input_width = channel_last ? input.dims()[2] : input.dims()[3];
const int output_channels =
channel_last ? output->dims()[3] : output->dims()[1];
const int output_height =
channel_last ? output->dims()[1] : output->dims()[2];
const int output_width =
channel_last ? output->dims()[2] : output->dims()[3];
const int ksize_height = ksize[0];
const int ksize_width = ksize[1];
const int stride_height = strides[0];
const int stride_width = strides[1];
const int padding_height = paddings[0];
const int padding_width = paddings[1];
const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace());
int hstart, hend;
int wstart, wend;
if (!channel_last) {
const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
for (int ph = 0; ph < output_height; ++ph) {
if (adaptive) {
hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) {
if (adaptive) {
wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
}
T ele = pool_process.initial();
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
pool_process.compute(input_data[h * input_width + w], &ele);
}
}
int pool_size = (exclusive || adaptive)
? (hend - hstart) * (wend - wstart)
: ksize_height * ksize_width;
pool_process.finalize(static_cast<T>(pool_size), &ele);
output_data[ph * output_width + pw] = ele;
}
}
input_data += input_stride;
output_data += output_stride;
}
}
} else {
const int input_stride = input_height * input_width * input_channels;
const int output_stride = output_height * output_width * output_channels;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
for (int ph = 0; ph < output_height; ++ph) {
if (adaptive) {
hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) {
if (adaptive) {
wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
}
T ele = pool_process.initial();
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
pool_process.compute(
input_data[h * input_width * input_channels +
w * input_channels + c],
&ele);
}
}
int pool_size = (exclusive || adaptive)
? (hend - hstart) * (wend - wstart)
: ksize_height * ksize_width;
pool_process.finalize(static_cast<T>(pool_size), &ele);
output_data[ph * output_width * output_channels +
pw * output_channels + c] = ele;
}
}
}
input_data += input_stride;
output_data += output_stride;
}
}
}
};
/*
* All tensors are in NCHW format.
* Ksize, strides, paddings are two elements. These two elements represent height
* tensors are in NCHW or NHWC format.
* Ksize, strides are two elements. These two elements represent height
* and width, respectively.
* Paddings are four elements. These four elements represent height_up,
* height_down, width_left and width_right, respectively.
*/
template <typename PoolProcess, class T>
class Pool2dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
......@@ -173,13 +302,147 @@ class Pool2dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
}
}
}
void operator()(
const platform::CPUDeviceContext& context, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output_grad,
const std::vector<int>& ksize, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::string data_format,
PoolProcess pool_grad_process, bool exclusive, bool adaptive,
framework::Tensor* input_grad) {
bool channel_last = (data_format == "NHWC");
const int batch_size = input.dims()[0];
const int input_channels = channel_last ? input.dims()[3] : input.dims()[1];
const int input_height = channel_last ? input.dims()[1] : input.dims()[2];
const int input_width = channel_last ? input.dims()[2] : input.dims()[3];
const int output_channels =
channel_last ? output.dims()[3] : output.dims()[1];
const int output_height =
channel_last ? output.dims()[1] : output.dims()[2];
const int output_width = channel_last ? output.dims()[2] : output.dims()[3];
const int ksize_height = ksize[0];
const int ksize_width = ksize[1];
const int stride_height = strides[0];
const int stride_width = strides[1];
const int padding_height = paddings[0];
const int padding_width = paddings[1];
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int hstart, hend;
int wstart, wend;
if (!channel_last) {
const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
for (int ph = 0; ph < output_height; ++ph) {
if (adaptive) {
hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) {
if (adaptive) {
wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
}
int pool_size = (exclusive || adaptive)
? (hend - hstart) * (wend - wstart)
: ksize_height * ksize_width;
float scale = 1.0 / pool_size;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
pool_grad_process.compute(
input_data[h * input_width + w],
output_data[ph * output_width + pw],
output_grad_data[ph * output_width + pw],
static_cast<T>(scale),
input_grad_data + h * input_width + w);
}
}
}
}
input_data += input_stride;
output_data += output_stride;
input_grad_data += input_stride;
output_grad_data += output_stride;
}
}
} else {
const int input_stride = input_height * input_width * input_channels;
const int output_stride = output_height * output_width * output_channels;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
for (int ph = 0; ph < output_height; ++ph) {
if (adaptive) {
hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) {
if (adaptive) {
wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
}
int pool_size = (exclusive || adaptive)
? (hend - hstart) * (wend - wstart)
: ksize_height * ksize_width;
float scale = 1.0 / pool_size;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
auto input_idx =
h * input_width * input_channels + w * input_channels + c;
auto output_idx = ph * output_width * output_channels +
pw * output_channels + c;
pool_grad_process.compute(
input_data[input_idx], output_data[output_idx],
output_grad_data[output_idx], static_cast<T>(scale),
input_grad_data + input_idx);
}
}
}
}
}
input_data += input_stride;
output_data += output_stride;
input_grad_data += input_stride;
output_grad_data += output_stride;
}
}
}
};
/*
* All tensors are in NCHW format.
* Ksize, strides, paddings are two elements. These two elements represent
* height and width, respectively.
*/
* Tensors are in NCHW or NHWC format.
* Ksize, strides are two elements. These two elements represent height
* and width, respectively.
* Paddings are four elements. These four elements represent height_up,
* height_down, width_left and width_right, respectively.
*/
template <class T>
class MaxPool2dGradFunctor<platform::CPUDeviceContext, T> {
public:
......@@ -239,53 +502,424 @@ class MaxPool2dGradFunctor<platform::CPUDeviceContext, T> {
}
}
}
};
template class MaxPool2dGradFunctor<platform::CPUDeviceContext, float>;
template class MaxPool2dGradFunctor<platform::CPUDeviceContext, double>;
void operator()(
const platform::CPUDeviceContext& context, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output_grad,
const std::vector<int>& ksize, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::string data_format,
framework::Tensor* input_grad) {
bool channel_last = (data_format == "NHWC");
template class Pool2dFunctor<platform::CPUDeviceContext,
paddle::operators::math::MaxPool<float>, float>;
template class Pool2dFunctor<platform::CPUDeviceContext,
paddle::operators::math::AvgPool<float>, float>;
template class Pool2dGradFunctor<platform::CPUDeviceContext,
paddle::operators::math::MaxPoolGrad<float>,
float>;
template class Pool2dGradFunctor<platform::CPUDeviceContext,
paddle::operators::math::AvgPoolGrad<float>,
float>;
template class Pool2dFunctor<platform::CPUDeviceContext,
paddle::operators::math::MaxPool<double>, double>;
template class Pool2dFunctor<platform::CPUDeviceContext,
paddle::operators::math::AvgPool<double>, double>;
template class Pool2dGradFunctor<platform::CPUDeviceContext,
paddle::operators::math::MaxPoolGrad<double>,
double>;
template class Pool2dGradFunctor<platform::CPUDeviceContext,
const int batch_size = input.dims()[0];
const int input_channels = channel_last ? input.dims()[3] : input.dims()[1];
const int input_height = channel_last ? input.dims()[1] : input.dims()[2];
const int input_width = channel_last ? input.dims()[2] : input.dims()[3];
const int output_channels =
channel_last ? output.dims()[3] : output.dims()[1];
const int output_height =
channel_last ? output.dims()[1] : output.dims()[2];
const int output_width = channel_last ? output.dims()[2] : output.dims()[3];
const int ksize_height = ksize[0];
const int ksize_width = ksize[1];
const int stride_height = strides[0];
const int stride_width = strides[1];
const int padding_height = paddings[0];
const int padding_width = paddings[1];
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
if (!channel_last) {
const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
for (int ph = 0; ph < output_height; ++ph) {
int hstart = ph * stride_height - padding_height;
int hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
for (int pw = 0; pw < output_width; ++pw) {
int wstart = pw * stride_width - padding_width;
int wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
bool stop = false;
for (int h = hstart; h < hend && !stop; ++h) {
for (int w = wstart; w < wend && !stop; ++w) {
int input_idx = h * input_width + w;
int output_idx = ph * output_width + pw;
if (input_data[input_idx] == output_data[output_idx]) {
input_grad_data[input_idx] += output_grad_data[output_idx];
stop = true;
}
}
}
}
}
input_data += input_stride;
output_data += output_stride;
input_grad_data += input_stride;
output_grad_data += output_stride;
}
}
} else {
const int input_stride = input_height * input_width * input_channels;
const int output_stride = output_height * output_width * output_channels;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
for (int ph = 0; ph < output_height; ++ph) {
int hstart = ph * stride_height - padding_height;
int hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
for (int pw = 0; pw < output_width; ++pw) {
int wstart = pw * stride_width - padding_width;
int wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
bool stop = false;
for (int h = hstart; h < hend && !stop; ++h) {
for (int w = wstart; w < wend && !stop; ++w) {
int input_idx =
h * input_width * input_channels + w * input_channels + c;
int output_idx = ph * output_width * output_channels +
pw * output_channels + c;
if (input_data[input_idx] == output_data[output_idx]) {
input_grad_data[input_idx] += output_grad_data[output_idx];
stop = true;
}
}
}
}
}
}
input_data += input_stride;
output_data += output_stride;
input_grad_data += input_stride;
output_grad_data += output_stride;
}
}
}
};
template class MaxPool2dGradFunctor<platform::CPUDeviceContext, float>;
template class MaxPool2dGradFunctor<platform::CPUDeviceContext, double>;
template class Pool2dFunctor<platform::CPUDeviceContext,
paddle::operators::math::MaxPool<float>, float>;
template class Pool2dFunctor<platform::CPUDeviceContext,
paddle::operators::math::AvgPool<float>, float>;
template class Pool2dGradFunctor<platform::CPUDeviceContext,
paddle::operators::math::MaxPoolGrad<float>,
float>;
template class Pool2dGradFunctor<platform::CPUDeviceContext,
paddle::operators::math::AvgPoolGrad<float>,
float>;
template class Pool2dFunctor<platform::CPUDeviceContext,
paddle::operators::math::MaxPool<double>, double>;
template class Pool2dFunctor<platform::CPUDeviceContext,
paddle::operators::math::AvgPool<double>, double>;
template class Pool2dGradFunctor<platform::CPUDeviceContext,
paddle::operators::math::MaxPoolGrad<double>,
double>;
template class Pool2dGradFunctor<platform::CPUDeviceContext,
paddle::operators::math::AvgPoolGrad<double>,
double>;
/*
* All tensors are in NCDHW format.
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
*/
* Tensors are in NCDHW or NDHWC format.
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
* Paddings are six elements. These six elements represent depth_forth,
* depth_back,
* height_up, height_down, width_left and width_right, respectively.
*/
template <typename PoolProcess, class T>
class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_process,
bool exclusive, bool adaptive, framework::Tensor* output) {
const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2];
const int input_height = input.dims()[3];
const int input_width = input.dims()[4];
const int output_channels = output->dims()[1];
const int output_depth = output->dims()[2];
const int output_height = output->dims()[3];
const int output_width = output->dims()[4];
const int ksize_depth = ksize[0];
const int ksize_height = ksize[1];
const int ksize_width = ksize[2];
const int stride_depth = strides[0];
const int stride_height = strides[1];
const int stride_width = strides[2];
const int padding_depth = paddings[0];
const int padding_height = paddings[1];
const int padding_width = paddings[2];
const int input_stride = input_depth * input_height * input_width;
const int output_stride = output_depth * output_height * output_width;
const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace());
int dstart, dend;
int hstart, hend;
int wstart, wend;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
for (int pd = 0; pd < output_depth; ++pd) {
if (adaptive) {
dstart = AdaptStartIndex(pd, input_depth, output_depth);
dend = AdaptEndIndex(pd, input_depth, output_depth);
} else {
dstart = pd * stride_depth - padding_depth;
dend = std::min(dstart + ksize_depth, input_depth);
dstart = std::max(dstart, 0);
}
for (int ph = 0; ph < output_height; ++ph) {
if (adaptive) {
hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) {
if (adaptive) {
wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
}
int output_idx = (pd * output_height + ph) * output_width + pw;
T ele = pool_process.initial();
for (int d = dstart; d < dend; ++d) {
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
pool_process.compute(
input_data[(d * input_height + h) * input_width + w],
&ele);
}
}
}
int pool_size =
(exclusive || adaptive)
? (dend - dstart) * (hend - hstart) * (wend - wstart)
: ksize_depth * ksize_height * ksize_width;
pool_process.finalize(static_cast<T>(pool_size), &ele);
output_data[output_idx] = ele;
}
}
}
input_data += input_stride;
output_data += output_stride;
}
}
}
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string data_format, PoolProcess pool_process,
bool exclusive, bool adaptive, framework::Tensor* output) {
bool channel_last = (data_format == "NDHWC");
const int batch_size = input.dims()[0];
const int input_channels = channel_last ? input.dims()[4] : input.dims()[1];
const int input_depth = channel_last ? input.dims()[1] : input.dims()[2];
const int input_height = channel_last ? input.dims()[2] : input.dims()[3];
const int input_width = channel_last ? input.dims()[3] : input.dims()[4];
const int output_channels =
channel_last ? output->dims()[4] : output->dims()[1];
const int output_depth =
channel_last ? output->dims()[1] : output->dims()[2];
const int output_height =
channel_last ? output->dims()[2] : output->dims()[3];
const int output_width =
channel_last ? output->dims()[3] : output->dims()[4];
const int ksize_depth = ksize[0];
const int ksize_height = ksize[1];
const int ksize_width = ksize[2];
const int stride_depth = strides[0];
const int stride_height = strides[1];
const int stride_width = strides[2];
const int padding_depth = paddings[0];
const int padding_height = paddings[1];
const int padding_width = paddings[2];
const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace());
int dstart, dend;
int hstart, hend;
int wstart, wend;
if (!channel_last) {
const int input_stride = input_depth * input_height * input_width;
const int output_stride = output_depth * output_height * output_width;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
for (int pd = 0; pd < output_depth; ++pd) {
if (adaptive) {
dstart = AdaptStartIndex(pd, input_depth, output_depth);
dend = AdaptEndIndex(pd, input_depth, output_depth);
} else {
dstart = pd * stride_depth - padding_depth;
dend = std::min(dstart + ksize_depth, input_depth);
dstart = std::max(dstart, 0);
}
for (int ph = 0; ph < output_height; ++ph) {
if (adaptive) {
hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) {
if (adaptive) {
wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
}
int output_idx = (pd * output_height + ph) * output_width + pw;
T ele = pool_process.initial();
for (int d = dstart; d < dend; ++d) {
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
pool_process.compute(
input_data[(d * input_height + h) * input_width + w],
&ele);
}
}
}
int pool_size =
(exclusive || adaptive)
? (dend - dstart) * (hend - hstart) * (wend - wstart)
: ksize_depth * ksize_height * ksize_width;
pool_process.finalize(static_cast<T>(pool_size), &ele);
output_data[output_idx] = ele;
}
}
}
input_data += input_stride;
output_data += output_stride;
}
}
} else {
const int input_stride =
input_depth * input_height * input_width * input_channels;
const int output_stride =
output_depth * output_height * output_width * output_channels;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
for (int pd = 0; pd < output_depth; ++pd) {
if (adaptive) {
dstart = AdaptStartIndex(pd, input_depth, output_depth);
dend = AdaptEndIndex(pd, input_depth, output_depth);
} else {
dstart = pd * stride_depth - padding_depth;
dend = std::min(dstart + ksize_depth, input_depth);
dstart = std::max(dstart, 0);
}
for (int ph = 0; ph < output_height; ++ph) {
if (adaptive) {
hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) {
if (adaptive) {
wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
}
T ele = pool_process.initial();
for (int d = dstart; d < dend; ++d) {
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
int input_idx =
((d * input_height + h) * input_width + w) *
input_channels +
c;
pool_process.compute(input_data[input_idx], &ele);
}
}
}
int pool_size =
(exclusive || adaptive)
? (dend - dstart) * (hend - hstart) * (wend - wstart)
: ksize_depth * ksize_height * ksize_width;
pool_process.finalize(static_cast<T>(pool_size), &ele);
int output_idx =
((pd * output_height + ph) * output_width + pw) *
output_channels +
c;
output_data[output_idx] = ele;
}
}
}
}
input_data += input_stride;
output_data += output_stride;
}
}
}
};
/*
* Tensors are in NCDHW or NDHWC format.
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
* Paddings are six elements. These six elements represent depth_forth,
* depth_back,
* height_up, height_down, width_left and width_right, respectively.
*/
template <typename PoolProcess, class T>
class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_process,
bool exclusive, bool adaptive, framework::Tensor* output) {
void operator()(
const platform::CPUDeviceContext& context, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output_grad,
const std::vector<int>& ksize, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_grad_process,
bool exclusive, bool adaptive, framework::Tensor* input_grad) {
const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2];
const int input_height = input.dims()[3];
const int input_width = input.dims()[4];
const int output_channels = output->dims()[1];
const int output_depth = output->dims()[2];
const int output_height = output->dims()[3];
const int output_width = output->dims()[4];
const int output_channels = output.dims()[1];
const int output_depth = output.dims()[2];
const int output_height = output.dims()[3];
const int output_width = output.dims()[4];
const int ksize_depth = ksize[0];
const int ksize_height = ksize[1];
const int ksize_width = ksize[2];
......@@ -295,12 +929,13 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const int padding_depth = paddings[0];
const int padding_height = paddings[1];
const int padding_width = paddings[2];
const int input_stride = input_depth * input_height * input_width;
const int output_stride = output_depth * output_height * output_width;
const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace());
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int dstart, dend;
int hstart, hend;
......@@ -334,66 +969,68 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
}
int output_idx = (pd * output_height + ph) * output_width + pw;
T ele = pool_process.initial();
int pool_size =
(exclusive || adaptive)
? (dend - dstart) * (hend - hstart) * (wend - wstart)
: ksize_depth * ksize_height * ksize_width;
float scale = 1.0 / pool_size;
for (int d = dstart; d < dend; ++d) {
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
pool_process.compute(
input_data[(d * input_height + h) * input_width + w],
&ele);
int input_idx = (d * input_height + h) * input_width + w;
int output_idx =
(pd * output_height + ph) * output_width + pw;
pool_grad_process.compute(
input_data[input_idx], output_data[output_idx],
output_grad_data[output_idx], static_cast<T>(scale),
input_grad_data + input_idx);
}
}
}
int pool_size =
(exclusive || adaptive)
? (dend - dstart) * (hend - hstart) * (wend - wstart)
: ksize_depth * ksize_height * ksize_width;
pool_process.finalize(static_cast<T>(pool_size), &ele);
output_data[output_idx] = ele;
}
}
}
input_data += input_stride;
output_data += output_stride;
input_grad_data += input_stride;
output_grad_data += output_stride;
}
}
}
};
/*
* All tensors are in NCDHW format.
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
*/
template <typename PoolProcess, class T>
class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
public:
void operator()(
const platform::CPUDeviceContext& context, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output_grad,
const std::vector<int>& ksize, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_grad_process,
bool exclusive, bool adaptive, framework::Tensor* input_grad) {
const std::vector<int>& paddings, const std::string data_format,
PoolProcess pool_grad_process, bool exclusive, bool adaptive,
framework::Tensor* input_grad) {
bool channel_last = (data_format == "NDHWC");
const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2];
const int input_height = input.dims()[3];
const int input_width = input.dims()[4];
const int output_channels = output.dims()[1];
const int output_depth = output.dims()[2];
const int output_height = output.dims()[3];
const int output_width = output.dims()[4];
const int input_channels = channel_last ? input.dims()[4] : input.dims()[1];
const int input_depth = channel_last ? input.dims()[1] : input.dims()[2];
const int input_height = channel_last ? input.dims()[2] : input.dims()[3];
const int input_width = channel_last ? input.dims()[3] : input.dims()[4];
const int output_channels =
channel_last ? output.dims()[4] : output.dims()[1];
const int output_depth = channel_last ? output.dims()[1] : output.dims()[2];
const int output_height =
channel_last ? output.dims()[2] : output.dims()[3];
const int output_width = channel_last ? output.dims()[3] : output.dims()[4];
const int ksize_depth = ksize[0];
const int ksize_height = ksize[1];
const int ksize_width = ksize[2];
const int stride_depth = strides[0];
const int stride_height = strides[1];
const int stride_width = strides[2];
const int padding_depth = paddings[0];
const int padding_height = paddings[1];
const int padding_width = paddings[2];
const int input_stride = input_depth * input_height * input_width;
const int output_stride = output_depth * output_height * output_width;
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
......@@ -403,6 +1040,9 @@ class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
int dstart, dend;
int hstart, hend;
int wstart, wend;
if (!channel_last) {
const int input_stride = input_depth * input_height * input_width;
const int output_stride = output_depth * output_height * output_width;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
for (int pd = 0; pd < output_depth; ++pd) {
......@@ -460,14 +1100,85 @@ class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
output_grad_data += output_stride;
}
}
} else {
const int input_stride =
input_depth * input_height * input_width * input_channels;
const int output_stride =
output_depth * output_height * output_width * output_channels;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
for (int pd = 0; pd < output_depth; ++pd) {
if (adaptive) {
dstart = AdaptStartIndex(pd, input_depth, output_depth);
dend = AdaptEndIndex(pd, input_depth, output_depth);
} else {
dstart = pd * stride_depth - padding_depth;
dend = std::min(dstart + ksize_depth, input_depth);
dstart = std::max(dstart, 0);
}
for (int ph = 0; ph < output_height; ++ph) {
if (adaptive) {
hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) {
if (adaptive) {
wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
}
int pool_size =
(exclusive || adaptive)
? (dend - dstart) * (hend - hstart) * (wend - wstart)
: ksize_depth * ksize_height * ksize_width;
float scale = 1.0 / pool_size;
for (int d = dstart; d < dend; ++d) {
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
int input_idx =
((d * input_height + h) * input_width + w) *
input_channels +
c;
int output_idx =
((pd * output_height + ph) * output_width + pw) *
output_channels +
c;
pool_grad_process.compute(
input_data[input_idx], output_data[output_idx],
output_grad_data[output_idx], static_cast<T>(scale),
input_grad_data + input_idx);
}
}
}
}
}
}
}
input_data += input_stride;
output_data += output_stride;
input_grad_data += input_stride;
output_grad_data += output_stride;
}
}
}
};
/*
* All tensors are in NCDHW format.
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
*/
* Tensors are in NCDHW or NDHWC format.
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
* Paddings are six elements. These six elements represent depth_forth,
* depth_back,
* height_up, height_down, width_left and width_right, respectively.
*/
template <class T>
class MaxPool3dGradFunctor<platform::CPUDeviceContext, T> {
public:
......@@ -541,8 +1252,139 @@ class MaxPool3dGradFunctor<platform::CPUDeviceContext, T> {
}
}
}
};
void operator()(
const platform::CPUDeviceContext& context, const framework::Tensor& input,
const framework::Tensor& output, const framework::Tensor& output_grad,
const std::vector<int>& ksize, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::string data_format,
framework::Tensor* input_grad) {
bool channel_last = (data_format == "NDHWC");
const int batch_size = input.dims()[0];
const int input_channels = channel_last ? input.dims()[4] : input.dims()[1];
const int input_depth = channel_last ? input.dims()[1] : input.dims()[2];
const int input_height = channel_last ? input.dims()[2] : input.dims()[3];
const int input_width = channel_last ? input.dims()[3] : input.dims()[4];
const int output_channels =
channel_last ? output.dims()[4] : output.dims()[1];
const int output_depth = channel_last ? output.dims()[1] : output.dims()[2];
const int output_height =
channel_last ? output.dims()[2] : output.dims()[3];
const int output_width = channel_last ? output.dims()[3] : output.dims()[4];
const int ksize_depth = ksize[0];
const int ksize_height = ksize[1];
const int ksize_width = ksize[2];
const int stride_depth = strides[0];
const int stride_height = strides[1];
const int stride_width = strides[2];
const int padding_depth = paddings[0];
const int padding_height = paddings[1];
const int padding_width = paddings[2];
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
if (!channel_last) {
const int input_stride = input_depth * input_height * input_width;
const int output_stride = output_depth * output_height * output_width;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
for (int pd = 0; pd < output_depth; ++pd) {
int dstart = pd * stride_depth - padding_depth;
int dend = std::min(dstart + ksize_depth, input_depth);
dstart = std::max(dstart, 0);
for (int ph = 0; ph < output_height; ++ph) {
int hstart = ph * stride_height - padding_height;
int hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
for (int pw = 0; pw < output_width; ++pw) {
int wstart = pw * stride_width - padding_width;
int wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
bool stop = false;
for (int d = dstart; d < dend && !stop; ++d) {
for (int h = hstart; h < hend && !stop; ++h) {
for (int w = wstart; w < wend && !stop; ++w) {
int input_idx = (d * input_height + h) * input_width + w;
int output_idx =
(pd * output_height + ph) * output_width + pw;
if (input_data[input_idx] == output_data[output_idx]) {
input_grad_data[input_idx] +=
output_grad_data[output_idx];
stop = true;
}
}
}
}
}
}
}
input_data += input_stride;
output_data += output_stride;
input_grad_data += input_stride;
output_grad_data += output_stride;
}
}
} else {
const int input_stride =
input_depth * input_height * input_width * input_channels;
const int output_stride =
output_depth * output_height * output_width * output_channels;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
for (int pd = 0; pd < output_depth; ++pd) {
int dstart = pd * stride_depth - padding_depth;
int dend = std::min(dstart + ksize_depth, input_depth);
dstart = std::max(dstart, 0);
for (int ph = 0; ph < output_height; ++ph) {
int hstart = ph * stride_height - padding_height;
int hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
for (int pw = 0; pw < output_width; ++pw) {
int wstart = pw * stride_width - padding_width;
int wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
bool stop = false;
for (int d = dstart; d < dend && !stop; ++d) {
for (int h = hstart; h < hend && !stop; ++h) {
for (int w = wstart; w < wend && !stop; ++w) {
int input_idx =
((d * input_height + h) * input_width + w) *
input_channels +
c;
int output_idx =
((pd * output_height + ph) * output_width + pw) *
output_channels +
c;
if (input_data[input_idx] == output_data[output_idx]) {
input_grad_data[input_idx] +=
output_grad_data[output_idx];
stop = true;
}
}
}
}
}
}
}
}
input_data += input_stride;
output_data += output_stride;
input_grad_data += input_stride;
output_grad_data += output_stride;
}
}
}
};
template class MaxPool3dGradFunctor<platform::CPUDeviceContext, float>;
template class MaxPool3dGradFunctor<platform::CPUDeviceContext, double>;
......
......@@ -29,13 +29,22 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
const int ksize_width, const int stride_height,
const int stride_width, const int padding_height,
const int padding_width, PoolProcess pool_process,
bool exclusive, bool adaptive, T* output_data) {
bool exclusive, bool adaptive, T* output_data,
bool channel_last = false) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int pw = index % output_width;
int ph = (index / output_width) % output_height;
int c = (index / output_width / output_height) % channels;
int batch_idx = index / output_width / output_height / channels;
int pw, ph, c, batch_idx;
if (!channel_last) { /*NCHW*/
pw = index % output_width;
ph = (index / output_width) % output_height;
c = (index / output_width / output_height) % channels;
batch_idx = index / output_width / output_height / channels;
} else { /*NHWC*/
c = index % channels;
pw = (index / channels) % output_width;
ph = (index / channels / output_width) % output_height;
batch_idx = index / channels / output_width / output_height;
}
int hstart, hend;
int wstart, wend;
......@@ -55,11 +64,17 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
wstart = max(wstart, 0);
}
if (!channel_last) {
input_data += (batch_idx * channels + c) * input_height * input_width;
} else {
input_data += batch_idx * input_height * input_width * channels;
}
T ele = pool_process.initial();
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
pool_process.compute(input_data[h * input_width + w], &ele);
auto input_idx = channel_last ? (h * input_width + w) * channels + c
: h * input_width + w;
pool_process.compute(input_data[input_idx], &ele);
}
}
int pool_size = (exclusive || adaptive) ? (hend - hstart) * (wend - wstart)
......@@ -68,7 +83,6 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
output_data[index] = ele;
}
}
template <typename PoolProcess, typename T>
__global__ void KernelPool2DGrad(
const int nthreads, const T* input_data, const T* output_data,
......@@ -76,13 +90,23 @@ __global__ void KernelPool2DGrad(
const int input_width, const int output_height, const int output_width,
const int ksize_height, const int ksize_width, const int stride_height,
const int stride_width, const int padding_height, const int padding_width,
PoolProcess pool_process, bool exclusive, bool adaptive, T* input_grad) {
PoolProcess pool_process, bool exclusive, bool adaptive, T* input_grad,
bool channel_last = false) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int w_offset = index % input_width + padding_width;
int h_offset = (index / input_width) % input_height + padding_height;
int offsetC = (index / input_width / input_height) % channels;
int batch_idx = index / input_width / input_height / channels;
int w_offset, h_offset, offsetC, batch_idx;
if (!channel_last) { /* NCHW */
w_offset = index % input_width + padding_width;
h_offset = (index / input_width) % input_height + padding_height;
offsetC = (index / input_width / input_height) % channels;
batch_idx = index / input_width / input_height / channels;
} else { /* NHWC */
offsetC = index % channels;
w_offset = (index / channels) % input_width + padding_width;
h_offset =
(index / channels / input_width) % input_height + padding_height;
batch_idx = index / channels / input_width / input_height;
}
int phstart, phend;
int pwstart, pwend;
......@@ -105,10 +129,18 @@ __global__ void KernelPool2DGrad(
}
T gradient = 0;
T input = input_data[index];
int output_idx =
int output_stride;
if (!channel_last) {
output_stride =
(batch_idx * channels + offsetC) * output_height * output_width;
output_data += output_idx;
output_grad += output_idx;
} else {
output_stride = batch_idx * output_height * output_width * channels;
}
output_data += output_stride;
output_grad += output_stride;
for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) {
int pool_size;
......@@ -127,7 +159,9 @@ __global__ void KernelPool2DGrad(
pool_size = exclusive ? (hend - hstart) * (wend - wstart)
: ksize_height * ksize_width;
}
int output_sub_idx = ph * output_width + pw;
int output_sub_idx = channel_last
? (ph * output_width + pw) * channels + offsetC
: ph * output_width + pw;
pool_process.compute(input, output_data[output_sub_idx],
output_grad[output_sub_idx],
static_cast<T>(1.0 / pool_size), &gradient);
......@@ -144,14 +178,21 @@ __global__ void KernelMaxPool2DGrad(
const int input_width, const int output_height, const int output_width,
const int ksize_height, const int ksize_width, const int stride_height,
const int stride_width, const int padding_height, const int padding_width,
T* input_grad) {
T* input_grad, bool channel_last = false) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int pw = index % output_width;
int ph = (index / output_width) % output_height;
int c = (index / output_width / output_height) % channels;
int batch_idx = index / output_width / output_height / channels;
int pw, ph, c, batch_idx;
if (!channel_last) { /* NCHW */
pw = index % output_width;
ph = (index / output_width) % output_height;
c = (index / output_width / output_height) % channels;
batch_idx = index / output_width / output_height / channels;
} else { /* NHWC */
c = index % channels;
pw = (index / channels) % output_width;
ph = (index / channels / output_width) % output_height;
batch_idx = index / channels / output_width / output_height;
}
int hstart = ph * stride_height - padding_height;
int hend = min(hstart + ksize_height, input_height);
hstart = max(hstart, 0);
......@@ -160,16 +201,24 @@ __global__ void KernelMaxPool2DGrad(
int wend = min(wstart + ksize_width, input_width);
wstart = max(wstart, 0);
input_data += (batch_idx * channels + c) * input_height * input_width;
input_grad += (batch_idx * channels + c) * input_height * input_width;
int input_stride;
if (!channel_last) {
input_stride = (batch_idx * channels + c) * input_height * input_width;
} else {
input_stride = batch_idx * input_height * input_width * channels;
}
input_data += input_stride;
input_grad += input_stride;
T ele = output_data[index];
int maxIndex = -1;
bool stop = false;
for (int h = hstart; h < hend && !stop; ++h) {
for (int w = wstart; w < wend && !stop; ++w) {
if (ele == input_data[h * input_width + w]) {
maxIndex = h * input_width + w;
int input_data_idx = channel_last ? (h * input_width + w) * channels + c
: h * input_width + w;
if (ele == input_data[input_data_idx]) {
maxIndex = input_data_idx;
stop = true;
}
}
......@@ -214,10 +263,12 @@ void Pool2dDirectCUDAFunctor<PoolProcess, T>::operator()(
}
/*
* All tensors are in NCHW format.
* Ksize, strides, paddings are two elements. These two elements represent
* height and width, respectively.
*/
* Tensors are in NCHW or NHWC format.
* Ksize, strides are two elements. These two elements represent height
* and width, respectively.
* Paddings are four elements. These four elements represent height_up,
* height_down, width_left and width_right, respectively.
*/
template <typename PoolProcess, typename T>
class Pool2dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
public:
......@@ -254,13 +305,57 @@ class Pool2dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
stride_width, padding_height, padding_width, pool_process, exclusive,
adaptive, output_data);
}
};
void operator()(const platform::CUDADeviceContext& context,
const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string data_format, PoolProcess pool_process,
bool exclusive, bool adaptive, framework::Tensor* output) {
bool channel_last = (data_format == "NHWC");
const int batch_size = input.dims()[0];
const int input_channels = channel_last ? input.dims()[3] : input.dims()[1];
const int input_height = channel_last ? input.dims()[1] : input.dims()[2];
const int input_width = channel_last ? input.dims()[2] : input.dims()[3];
const int output_channels =
channel_last ? output->dims()[3] : output->dims()[1];
const int output_height =
channel_last ? output->dims()[1] : output->dims()[2];
const int output_width =
channel_last ? output->dims()[2] : output->dims()[3];
const int ksize_height = ksize[0];
const int ksize_width = ksize[1];
const int stride_height = strides[0];
const int stride_width = strides[1];
const int padding_height = paddings[0];
const int padding_width = paddings[1];
const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_height * output_width;
int blocks = (nthreads + 1024 - 1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocks, 1);
KernelPool2D<PoolProcess, T><<<grid, threads, 0, context.stream()>>>(
nthreads, input_data, input_channels, input_height, input_width,
output_height, output_width, ksize_height, ksize_width, stride_height,
stride_width, padding_height, padding_width, pool_process, exclusive,
adaptive, output_data, channel_last);
}
};
/*
* All tensors are in NCHW format.
* Ksize, strides, paddings are two elements. These two elements represent
* height and width, respectively.
*/
* Tensors are in NCHW or NHWC format.
* Ksize, strides are two elements. These two elements represent height
* and width, respectively.
* Paddings are four elements. These four elements represent height_up,
* height_down, width_left and width_right, respectively.
*/
template <typename PoolProcess, typename T>
class Pool2dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
public:
......@@ -302,13 +397,62 @@ class Pool2dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
ksize_width, stride_height, stride_width, padding_height, padding_width,
pool_process, exclusive, adaptive, input_grad_data);
}
void operator()(
const platform::CUDADeviceContext& context,
const framework::Tensor& input, const framework::Tensor& output,
const framework::Tensor& output_grad, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& paddings,
const std::string data_format, PoolProcess pool_process, bool exclusive,
bool adaptive, framework::Tensor* input_grad) {
bool channel_last = (data_format == "NHWC");
const int batch_size = input.dims()[0];
const int input_channels = channel_last ? input.dims()[3] : input.dims()[1];
const int input_height = channel_last ? input.dims()[1] : input.dims()[2];
const int input_width = channel_last ? input.dims()[2] : input.dims()[3];
const int output_channels =
channel_last ? output.dims()[3] : output.dims()[1];
const int output_height =
channel_last ? output.dims()[1] : output.dims()[2];
const int output_width = channel_last ? output.dims()[2] : output.dims()[3];
const int ksize_height = ksize[0];
const int ksize_width = ksize[1];
const int stride_height = strides[0];
const int stride_width = strides[1];
const int padding_height = paddings[0];
const int padding_width = paddings[1];
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * input_channels * input_height * input_width;
int blocks = (nthreads + 1024 - 1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocks, 1);
KernelPool2DGrad<PoolProcess, T><<<grid, threads, 0, context.stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_channels,
input_height, input_width, output_height, output_width, ksize_height,
ksize_width, stride_height, stride_width, padding_height, padding_width,
pool_process, exclusive, adaptive, input_grad_data, channel_last);
}
};
/*
* All tensors are in NCHW format.
* Ksize, strides, paddings are two elements. These two elements represent
* height and width, respectively.
*/
* Tensors are in NCHW or NHWC format.
* Ksize, strides are two elements. These two elements represent height
* and width, respectively.
* Paddings are four elements. These four elements represent height_up,
* height_down, width_left and width_right, respectively.
*/
template <typename T>
class MaxPool2dGradFunctor<platform::CUDADeviceContext, T> {
public:
......@@ -350,6 +494,51 @@ class MaxPool2dGradFunctor<platform::CUDADeviceContext, T> {
ksize_width, stride_height, stride_width, padding_height, padding_width,
input_grad_data);
}
void operator()(
const platform::CUDADeviceContext& context,
const framework::Tensor& input, const framework::Tensor& output,
const framework::Tensor& output_grad, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& paddings,
const std::string data_format, framework::Tensor* input_grad) {
bool channel_last = (data_format == "NHWC");
const int batch_size = input.dims()[0];
const int input_channels = channel_last ? input.dims()[3] : input.dims()[1];
const int input_height = channel_last ? input.dims()[1] : input.dims()[2];
const int input_width = channel_last ? input.dims()[2] : input.dims()[3];
const int output_channels =
channel_last ? output.dims()[3] : output.dims()[1];
const int output_height =
channel_last ? output.dims()[1] : output.dims()[2];
const int output_width = channel_last ? output.dims()[2] : output.dims()[3];
const int ksize_height = ksize[0];
const int ksize_width = ksize[1];
const int stride_height = strides[0];
const int stride_width = strides[1];
const int padding_height = paddings[0];
const int padding_width = paddings[1];
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_height * output_width;
int blocks = (nthreads + 1024 - 1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocks, 1);
KernelMaxPool2DGrad<T><<<grid, threads, 0, context.stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_channels,
input_height, input_width, output_height, output_width, ksize_height,
ksize_width, stride_height, stride_width, padding_height, padding_width,
input_grad_data, channel_last);
}
};
template class Pool2dDirectCUDAFunctor<paddle::operators::math::MaxPool<float>,
......@@ -389,15 +578,26 @@ __global__ void KernelPool3D(
const int ksize_depth, const int ksize_height, const int ksize_width,
const int stride_depth, const int stride_height, const int stride_width,
const int padding_depth, const int padding_height, const int padding_width,
PoolProcess pool_process, bool exclusive, bool adaptive, T* output_data) {
PoolProcess pool_process, bool exclusive, bool adaptive, T* output_data,
bool channel_last = false) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int pw = index % output_width;
int ph = (index / output_width) % output_height;
int pd = (index / output_width / output_height) % output_depth;
int c = (index / output_width / output_height / output_depth) % channels;
int batch_idx =
int pw, ph, pd, c, batch_idx;
if (!channel_last) {
pw = index % output_width;
ph = (index / output_width) % output_height;
pd = (index / output_width / output_height) % output_depth;
c = (index / output_width / output_height / output_depth) % channels;
batch_idx =
index / output_width / output_height / output_depth / channels;
} else {
c = index % channels;
pw = (index / channels) % output_width;
ph = (index / channels / output_width) % output_height;
pd = (index / channels / output_width / output_height) % output_depth;
batch_idx =
index / channels / output_width / output_height / output_depth;
}
int dstart, dend;
int hstart, hend;
......@@ -422,14 +622,26 @@ __global__ void KernelPool3D(
hstart = max(hstart, 0);
wstart = max(wstart, 0);
}
T ele = pool_process.initial();
input_data +=
int input_data_stride;
if (!channel_last) { /* NCDHW */
input_data_stride =
(batch_idx * channels + c) * input_depth * input_height * input_width;
} else { /* NDHWC */
input_data_stride =
batch_idx * input_depth * input_height * input_width * channels;
}
input_data += input_data_stride;
T ele = pool_process.initial();
for (int d = dstart; d < dend; ++d) {
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
pool_process.compute(
input_data[(d * input_height + h) * input_width + w], &ele);
auto input_data_idx =
channel_last
? ((d * input_height + h) * input_width + w) * channels + c
: (d * input_height + h) * input_width + w;
pool_process.compute(input_data[input_data_idx], &ele);
}
}
}
......@@ -450,15 +662,27 @@ __global__ void KernelPool3DGrad(
const int ksize_height, const int ksize_width, const int stride_depth,
const int stride_height, const int stride_width, const int padding_depth,
const int padding_height, const int padding_width, PoolProcess pool_process,
bool exclusive, bool adaptive, T* input_grad) {
bool exclusive, bool adaptive, T* input_grad, bool channel_last = false) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int w_offset = index % input_width + padding_width;
int h_offset = (index / input_width) % input_height + padding_height;
int d_offset =
int w_offset, h_offset, d_offset, offsetC, batch_idx;
if (!channel_last) { /* "NCDHW" */
w_offset = index % input_width + padding_width;
h_offset = (index / input_width) % input_height + padding_height;
d_offset =
(index / input_width / input_height) % input_depth + padding_depth;
int offsetC = (index / input_width / input_height / input_depth) % channels;
int batch_idx = index / input_width / input_height / input_depth / channels;
offsetC = (index / input_width / input_height / input_depth) % channels;
batch_idx = index / input_width / input_height / input_depth / channels;
} else { /* "NDHWC" */
offsetC = index % channels;
w_offset = (index / channels) % input_width + padding_width;
h_offset =
(index / channels / input_width) % input_height + padding_height;
d_offset = (index / channels / input_width / input_height) % input_depth +
padding_depth;
batch_idx = index / channels / input_width / input_height / input_depth;
}
int pdstart, pdend;
int phstart, phend;
......@@ -490,10 +714,17 @@ __global__ void KernelPool3DGrad(
T gradient = 0;
T input = input_data[index];
int output_idx = (batch_idx * channels + offsetC) * output_depth *
int output_stride;
if (!channel_last) {
output_stride = (batch_idx * channels + offsetC) * output_depth *
output_height * output_width;
output_data += output_idx;
output_grad += output_idx;
} else {
output_stride =
batch_idx * output_depth * output_height * output_width * channels;
}
output_data += output_stride;
output_grad += output_stride;
for (int pd = pdstart; pd < pdend; ++pd) {
for (int ph = phstart; ph < phend; ++ph) {
......@@ -522,7 +753,13 @@ __global__ void KernelPool3DGrad(
exclusive ? (dend - dstart) * (hend - hstart) * (wend - wstart)
: ksize_depth * ksize_height * ksize_width;
}
int output_sub_idx = (pd * output_height + ph) * output_width + pw;
int output_sub_idx =
channel_last
? ((pd * output_height + ph) * output_width + pw) * channels +
offsetC
: (pd * output_height + ph) * output_width + pw;
pool_process.compute(input, output_data[output_sub_idx],
output_grad[output_sub_idx],
static_cast<T>(1.0 / pool_size), &gradient);
......@@ -541,38 +778,64 @@ __global__ void KernelMaxPool3DGrad(
const int output_height, const int output_width, const int ksize_depth,
const int ksize_height, const int ksize_width, const int stride_depth,
const int stride_height, const int stride_width, const int padding_depth,
const int padding_height, const int padding_width, T* input_grad) {
const int padding_height, const int padding_width, T* input_grad,
bool channel_last = false) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) {
int pw = index % output_width;
int ph = (index / output_width) % output_height;
int pd = (index / output_width / output_height) % output_depth;
int c = (index / output_width / output_height / output_depth) % channels;
int batch_idx =
int pw, ph, pd, c, batch_idx;
if (!channel_last) { /*NCDHW*/
pw = index % output_width;
ph = (index / output_width) % output_height;
pd = (index / output_width / output_height) % output_depth;
c = (index / output_width / output_height / output_depth) % channels;
batch_idx =
index / output_width / output_height / output_depth / channels;
} else { /*NDHWC*/
c = index % channels;
pw = (index / channels) % output_width;
ph = (index / channels / output_width) % output_height;
pd = (index / channels / output_width / output_height) % output_depth;
batch_idx =
index / channels / output_width / output_height / output_depth;
}
int dstart = pd * stride_depth - padding_depth;
int hstart = ph * stride_height - padding_height;
int wstart = pw * stride_width - padding_width;
int dend = min(dstart + ksize_depth, input_depth);
int hend = min(hstart + ksize_height, input_height);
int wend = min(wstart + ksize_width, input_width);
dstart = max(dstart, 0);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
T ele = output_data[index];
bool stop = false;
int maxIdx = -1;
input_data +=
(batch_idx * channels + c) * input_depth * input_height * input_width;
input_grad +=
(batch_idx * channels + c) * input_depth * input_height * input_width;
int input_stride;
if (!channel_last) {
input_stride =
(batch_idx * channels + c) * input_depth * input_height * input_width;
} else {
input_stride =
batch_idx * input_depth * input_height * input_width * channels;
}
input_data += input_stride;
input_grad += input_stride;
for (int d = dstart; d < dend && !stop; ++d) {
for (int h = hstart; h < hend && !stop; ++h) {
for (int w = wstart; w < wend && !stop; ++w) {
if (ele == input_data[(d * input_height + h) * input_width + w]) {
int input_data_idx =
channel_last
? ((d * input_height + h) * input_width + w) * channels + c
: (d * input_height + h) * input_width + w;
if (ele == input_data[input_data_idx]) {
stop = true;
maxIdx = (d * input_height + h) * input_width + w;
maxIdx = input_data_idx;
}
}
}
......@@ -585,10 +848,13 @@ __global__ void KernelMaxPool3DGrad(
}
/*
* All tensors are in NCDHW format.
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
*/
* Tensors are in NCDHW or NDHWC format.
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
* Paddings are six elements. These six elements represent depth_forth,
* depth_back,
* height_up, height_down, width_left and width_right, respectively.
*/
template <typename PoolProcess, class T>
class Pool3dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
public:
......@@ -632,13 +898,67 @@ class Pool3dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
padding_depth, padding_height, padding_width, pool_process, exclusive,
adaptive, output_data);
}
void operator()(const platform::CUDADeviceContext& context,
const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string data_format, PoolProcess pool_process,
bool exclusive, bool adaptive, framework::Tensor* output) {
bool channel_last = (data_format == "NDHWC");
const int batch_size = input.dims()[0];
const int input_channels = channel_last ? input.dims()[4] : input.dims()[1];
const int input_depth = channel_last ? input.dims()[1] : input.dims()[2];
const int input_height = channel_last ? input.dims()[2] : input.dims()[3];
const int input_width = channel_last ? input.dims()[3] : input.dims()[4];
const int output_channels =
channel_last ? output->dims()[4] : output->dims()[1];
const int output_depth =
channel_last ? output->dims()[1] : output->dims()[2];
const int output_height =
channel_last ? output->dims()[2] : output->dims()[3];
const int output_width =
channel_last ? output->dims()[3] : output->dims()[4];
const int ksize_depth = ksize[0];
const int ksize_height = ksize[1];
const int ksize_width = ksize[2];
const int stride_depth = strides[0];
const int stride_height = strides[1];
const int stride_width = strides[2];
const int padding_depth = paddings[0];
const int padding_height = paddings[1];
const int padding_width = paddings[2];
const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_depth * output_height *
output_width;
int blocks = (nthreads + 1024 - 1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocks, 1);
KernelPool3D<PoolProcess, T><<<grid, threads, 0, context.stream()>>>(
nthreads, input_data, input_channels, input_depth, input_height,
input_width, output_depth, output_height, output_width, ksize_depth,
ksize_height, ksize_width, stride_depth, stride_height, stride_width,
padding_depth, padding_height, padding_width, pool_process, exclusive,
adaptive, output_data, channel_last);
}
};
/*
* All tensors are in NCDHW format.
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
*/
* Tensors are in NCDHW or NDHWC format.
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
* Paddings are six elements. These six elements represent depth_forth,
* depth_back,
* height_up, height_down, width_left and width_right, respectively.
*/
template <typename PoolProcess, class T>
class Pool3dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
public:
......@@ -688,13 +1008,69 @@ class Pool3dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
stride_height, stride_width, padding_depth, padding_height,
padding_width, pool_process, exclusive, adaptive, input_grad_data);
}
void operator()(
const platform::CUDADeviceContext& context,
const framework::Tensor& input, const framework::Tensor& output,
const framework::Tensor& output_grad, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& paddings,
const std::string data_format, PoolProcess pool_process, bool exclusive,
bool adaptive, framework::Tensor* input_grad) {
bool channel_last = (data_format == "NDHWC");
const int batch_size = input.dims()[0];
const int input_channels = channel_last ? input.dims()[4] : input.dims()[1];
const int input_depth = channel_last ? input.dims()[1] : input.dims()[2];
const int input_height = channel_last ? input.dims()[2] : input.dims()[3];
const int input_width = channel_last ? input.dims()[3] : input.dims()[4];
const int output_channels =
channel_last ? output.dims()[4] : output.dims()[1];
const int output_depth = channel_last ? output.dims()[1] : output.dims()[2];
const int output_height =
channel_last ? output.dims()[2] : output.dims()[3];
const int output_width = channel_last ? output.dims()[3] : output.dims()[4];
const int ksize_depth = ksize[0];
const int ksize_height = ksize[1];
const int ksize_width = ksize[2];
const int stride_depth = strides[0];
const int stride_height = strides[1];
const int stride_width = strides[2];
const int padding_depth = paddings[0];
const int padding_height = paddings[1];
const int padding_width = paddings[2];
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads =
batch_size * input_channels * input_depth * input_height * input_width;
int blocks = (nthreads + 1024 - 1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocks, 1);
KernelPool3DGrad<PoolProcess, T><<<grid, threads, 0, context.stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_channels,
input_depth, input_height, input_width, output_depth, output_height,
output_width, ksize_depth, ksize_height, ksize_width, stride_depth,
stride_height, stride_width, padding_depth, padding_height,
padding_width, pool_process, exclusive, adaptive, input_grad_data,
channel_last); // add channel_last
}
};
/*
* All tensors are in NCDHW format.
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
*/
* tensors are in NCDHW or NDHWC format.
* Ksize, strides, paddings are three elements. These three elements represent
* depth, height and width, respectively.
* Paddings are six elements. These six elements represent depth_forth,
* depth_back,
* height_up, height_down, width_left and width_right, respectively.
*/
template <class T>
class MaxPool3dGradFunctor<platform::CUDADeviceContext, T> {
public:
......@@ -743,6 +1119,57 @@ class MaxPool3dGradFunctor<platform::CUDADeviceContext, T> {
stride_height, stride_width, padding_depth, padding_height,
padding_width, input_grad_data);
}
void operator()(
const platform::CUDADeviceContext& context,
const framework::Tensor& input, const framework::Tensor& output,
const framework::Tensor& output_grad, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& paddings,
const std::string data_format, framework::Tensor* input_grad) {
bool channel_last = (data_format == "NDHWC");
const int batch_size = input.dims()[0];
const int input_channels = channel_last ? input.dims()[4] : input.dims()[1];
const int input_depth = channel_last ? input.dims()[1] : input.dims()[2];
const int input_height = channel_last ? input.dims()[2] : input.dims()[3];
const int input_width = channel_last ? input.dims()[3] : input.dims()[4];
const int output_channels =
channel_last ? output.dims()[4] : output.dims()[1];
const int output_depth = channel_last ? output.dims()[1] : output.dims()[2];
const int output_height =
channel_last ? output.dims()[2] : output.dims()[3];
const int output_width = channel_last ? output.dims()[3] : output.dims()[4];
const int ksize_depth = ksize[0];
const int ksize_height = ksize[1];
const int ksize_width = ksize[2];
const int stride_depth = strides[0];
const int stride_height = strides[1];
const int stride_width = strides[2];
const int padding_depth = paddings[0];
const int padding_height = paddings[1];
const int padding_width = paddings[2];
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int nthreads = batch_size * output_channels * output_depth * output_height *
output_width;
int blocks = (nthreads + 1024 - 1) / 1024;
dim3 threads(1024, 1);
dim3 grid(blocks, 1);
KernelMaxPool3DGrad<T><<<grid, threads, 0, context.stream()>>>(
nthreads, input_data, output_data, output_grad_data, input_channels,
input_depth, input_height, input_width, output_depth, output_height,
output_width, ksize_depth, ksize_height, ksize_width, stride_depth,
stride_height, stride_width, padding_depth, padding_height,
padding_width, input_grad_data, channel_last); // add channel_last
}
};
template class MaxPool3dGradFunctor<platform::CUDADeviceContext, float>;
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/tensor.h"
......@@ -83,10 +84,11 @@ HOSTDEVICE inline int AdaptEndIndex(int ph, int input_size, int output_size) {
/*
* \brief Getting pooling results, and calculating gradient.
*
* In pool2d, all tensors are in NCHW format. Where N is batch size, C is the
* number of channels, H and W is the height and width of feature.
* In pool3d, all tensors are in NCDHW format. Where N is batch size, C is the
* number of channels, D, H and W is the depth, height and width of feature.
* In pool2d, all Tensors are in NCHW or NHWC format. Where N is batch size, C
* is the number of channels, H and W is the height and width of feature.
* In pool3d, all Tensors are in NCDHW or NDHWC format. Where N is batch size, C
* is the number of channels, D, H and W is the depth, height and width of
* feature.
*
* In max pooling, it is possible that the pooling region has multiple maximum
* elements. In this case, we should compute the gradient of the first maximum
......@@ -115,6 +117,14 @@ class Pool2dFunctor {
const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_compute,
bool exclusive, bool adaptive, framework::Tensor* output);
// overload operator() to support argument data_format
void operator()(const DeviceContext& context, const framework::Tensor& input,
const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string data_format, PoolProcess pool_compute,
bool exclusive, bool adaptive, framework::Tensor* output);
};
template <typename DeviceContext, typename PoolProcess, typename T>
......@@ -127,6 +137,15 @@ class Pool2dGradFunctor {
const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_compute,
bool exclusive, bool adaptive, framework::Tensor* input_grad);
// overload operator() to support argument data_format
void operator()(const DeviceContext& context, const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad,
const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string data_format, PoolProcess pool_compute,
bool exclusive, bool adaptive, framework::Tensor* input_grad);
};
template <typename DeviceContext, class T>
......@@ -139,6 +158,14 @@ class MaxPool2dGradFunctor {
const std::vector<int>& strides,
const std::vector<int>& paddings,
framework::Tensor* input_grad);
// overload operator() to support argument data_format
void operator()(const DeviceContext& context, const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad,
const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string data_format, framework::Tensor* input_grad);
};
template <typename DeviceContext, typename PoolProcess, typename T>
......@@ -149,6 +176,13 @@ class Pool3dFunctor {
const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_compute,
bool exclusive, bool adaptive, framework::Tensor* output);
// overload operator() to support argument data_format
void operator()(const DeviceContext& context, const framework::Tensor& input,
const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string data_format, PoolProcess pool_compute,
bool exclusive, bool adaptive, framework::Tensor* output);
};
template <typename DeviceContext, typename PoolProcess, typename T>
......@@ -161,6 +195,15 @@ class Pool3dGradFunctor {
const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_compute,
bool exclusive, bool adaptive, framework::Tensor* input_grad);
// overload operator() to support argument data_format
void operator()(const DeviceContext& context, const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad,
const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string data_format, PoolProcess pool_compute,
bool exclusive, bool adaptive, framework::Tensor* input_grad);
};
template <typename DeviceContext, class T>
......@@ -173,6 +216,14 @@ class MaxPool3dGradFunctor {
const std::vector<int>& strides,
const std::vector<int>& paddings,
framework::Tensor* input_grad);
// overload operator() to support argument data_format
void operator()(const DeviceContext& context, const framework::Tensor& input,
const framework::Tensor& output,
const framework::Tensor& output_grad,
const std::vector<int>& ksize,
const std::vector<int>& strides,
const std::vector<int>& paddings,
const std::string data_format, framework::Tensor* input_grad);
};
/*
......
......@@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/pool_op.h"
#include "paddle/fluid/platform/cudnn_helper.h"
......@@ -27,47 +29,117 @@ using PoolingMode = platform::PoolingMode;
template <typename T>
using ScalingParamType = typename platform::CudnnDataType<T>::ScalingParamType;
DataLayout getLayoutFromStr(std::string data_format) {
if (data_format == "NHWC") {
return DataLayout::kNHWC;
} else if (data_format == "NCHW") {
return DataLayout::kNCHW;
} else if (data_format == "NCDHW") {
return DataLayout::kNCDHW;
} else {
return DataLayout::kNCDHW;
}
}
template <typename T>
class PoolCUDNNOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true,
"It must use CUDAPlace.");
const Tensor *input = ctx.Input<Tensor>("X");
Tensor *output = ctx.Output<Tensor>("Out");
const T *input_data = input->data<T>();
T *output_data = output->mutable_data<T>(ctx.GetPlace());
output->mutable_data<T>(ctx.GetPlace());
std::string pooling_type = ctx.Attr<std::string>("pooling_type");
bool exclusive = ctx.Attr<bool>("exclusive");
bool adaptive = ctx.Attr<bool>("adaptive");
std::vector<int> ksize = ctx.Attr<std::vector<int>>("ksize");
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
if (ctx.Attr<bool>("global_pooling")) {
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0;
ksize[i] = static_cast<int>(input->dims()[i + 2]);
std::string data_format = ctx.Attr<std::string>("data_format");
bool global_pooling = ctx.Attr<bool>("global_pooling");
std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm");
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
// update paddings
auto in_x_dims = input->dims();
framework::DDim data_dims;
if (channel_last) {
data_dims = framework::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1);
} else {
data_dims = framework::slice_ddim(in_x_dims, 2, in_x_dims.size());
}
UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm,
data_dims, strides, ksize);
if (data_dims.size() * 2 == paddings.size()) {
for (size_t i = 0; i < data_dims.size(); ++i) {
paddings.erase(paddings.begin() + i + 1);
}
}
// ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor input_desc;
ScopedTensorDescriptor output_desc;
ScopedPoolingDescriptor pool_desc;
if (global_pooling) {
UpdateKsize(&ksize, data_dims);
}
const std::string str_NCHW = "NCHW", str_NHWC = "NHWC";
const std::string str_NCDHW = "NCDHW", str_NDHWC = "NDHWC";
// -----------------transformed tensor ------------------------
Tensor transformed_input(input->type());
Tensor transformed_output(output->type());
DataLayout layout;
if (strides.size() == 2U) {
layout = DataLayout::kNCHW;
} else {
if (data_format == str_NDHWC) {
layout = DataLayout::kNCDHW;
auto &dev_ctx =
ctx.template device_context<paddle::platform::CUDADeviceContext>();
std::vector<int> axis{0, 4, 1, 2, 3};
// input
transformed_input.Resize(input->dims());
auto in_dims_vec = framework::vectorize(input->dims());
in_dims_vec[1] = input->dims()[4];
in_dims_vec[2] = input->dims()[1];
in_dims_vec[3] = input->dims()[2];
in_dims_vec[4] = input->dims()[3];
transformed_input.Resize(framework::make_ddim(in_dims_vec));
transformed_input.mutable_data(ctx.GetPlace(), input->type());
math::Transpose<paddle::platform::CUDADeviceContext, T, 5> trans5;
trans5(dev_ctx, *input, &transformed_input, axis);
// output
transformed_output.Resize(output->dims());
auto out_dims_vec = framework::vectorize(output->dims());
out_dims_vec[1] = output->dims()[4];
out_dims_vec[2] = output->dims()[1];
out_dims_vec[3] = output->dims()[2];
out_dims_vec[4] = output->dims()[3];
transformed_output.Resize(framework::make_ddim(out_dims_vec));
} else {
layout = getLayoutFromStr(data_format);
transformed_input = *input;
transformed_output = *output;
}
const T *tranformed_input_data = transformed_input.data<T>();
T *tranformed_output_data = transformed_output.mutable_data<T>(
transformed_output.dims(), ctx.GetPlace());
// ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor input_desc;
ScopedTensorDescriptor output_desc;
ScopedPoolingDescriptor pool_desc;
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize<int>(input->dims()));
layout, framework::vectorize<int>(transformed_input.dims()));
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
layout, framework::vectorize<int>(output->dims()));
layout, framework::vectorize<int>(transformed_output.dims()));
PoolingMode pooling_mode;
if (pooling_type == "max") {
......@@ -83,9 +155,19 @@ class PoolCUDNNOpKernel : public framework::OpKernel<T> {
// ------------------- cudnn pool algorithm ---------------------
auto handle = ctx.cuda_device_context().cudnn_handle();
ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
CUDNN_ENFORCE(platform::dynload::cudnnPoolingForward(
handle, cudnn_pool_desc, &alpha, cudnn_input_desc, input_data, &beta,
cudnn_output_desc, output_data));
handle, cudnn_pool_desc, &alpha, cudnn_input_desc,
tranformed_input_data, &beta, cudnn_output_desc,
tranformed_output_data));
// add
if (data_format == str_NDHWC) {
auto &dev_ctx =
ctx.template device_context<paddle::platform::CUDADeviceContext>();
std::vector<int> axis{0, 2, 3, 4, 1};
math::Transpose<paddle::platform::CUDADeviceContext, T, 5> trans5_v2;
trans5_v2(dev_ctx, transformed_output, output, axis);
}
}
};
......@@ -93,7 +175,7 @@ template <typename T>
class PoolCUDNNGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true,
"It must use CUDAPlace.");
const Tensor *input = ctx.Input<Tensor>("X");
......@@ -104,37 +186,109 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel<T> {
std::string pooling_type = ctx.Attr<std::string>("pooling_type");
bool exclusive = ctx.Attr<bool>("exclusive");
bool adaptive = ctx.Attr<bool>("adaptive");
std::vector<int> ksize = ctx.Attr<std::vector<int>>("ksize");
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::string data_format = ctx.Attr<std::string>("data_format");
bool global_pooling = ctx.Attr<bool>("global_pooling");
std::string padding_algorithm = ctx.Attr<std::string>("padding_algorithm");
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
// update paddings
auto in_x_dims = input->dims();
framework::DDim data_dims;
if (channel_last) {
data_dims = framework::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1);
} else {
data_dims = framework::slice_ddim(in_x_dims, 2, in_x_dims.size());
}
UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm,
data_dims, strides, ksize);
if (data_dims.size() * 2 == paddings.size()) {
for (size_t i = 0; i < data_dims.size(); ++i) {
paddings.erase(paddings.begin() + i + 1);
}
}
if (ctx.Attr<bool>("global_pooling")) {
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0;
ksize[i] = static_cast<int>(input->dims()[i + 2]);
if (global_pooling) {
UpdateKsize(&ksize, data_dims);
}
// ------- tensor grad --------------
Tensor transformed_input(input->type());
Tensor transformed_output(output->type());
Tensor transformed_output_grad(output_grad->type());
input_grad->mutable_data<T>(ctx.GetPlace());
Tensor transformed_input_grad(input_grad->type());
DataLayout layout;
const std::string str_NCHW = "NCHW", str_NHWC = "NHWC";
const std::string str_NCDHW = "NCDHW", str_NDHWC = "NDHWC";
if (data_format == str_NDHWC) {
layout = DataLayout::kNCDHW;
auto &dev_ctx =
ctx.template device_context<paddle::platform::CUDADeviceContext>();
std::vector<int> axis{0, 4, 1, 2, 3};
// input
transformed_input.Resize(input->dims());
auto in_dims_vec = framework::vectorize(input->dims());
in_dims_vec[1] = input->dims()[4];
in_dims_vec[2] = input->dims()[1];
in_dims_vec[3] = input->dims()[2];
in_dims_vec[4] = input->dims()[3];
transformed_input.Resize(framework::make_ddim(in_dims_vec));
transformed_input.mutable_data(ctx.GetPlace(), input->type());
math::Transpose<paddle::platform::CUDADeviceContext, T, 5> trans5;
trans5(dev_ctx, *input, &transformed_input, axis);
// output
transformed_output.Resize(output->dims());
auto out_dims_vec = framework::vectorize(output->dims());
out_dims_vec[1] = output->dims()[4];
out_dims_vec[2] = output->dims()[1];
out_dims_vec[3] = output->dims()[2];
out_dims_vec[4] = output->dims()[3];
transformed_output.Resize(framework::make_ddim(out_dims_vec));
transformed_output.mutable_data(ctx.GetPlace(), output->type());
math::Transpose<paddle::platform::CUDADeviceContext, T, 5> trans5_v2;
trans5_v2(dev_ctx, *output, &transformed_output, axis);
// output grad
transformed_output_grad.Resize(framework::make_ddim(out_dims_vec));
transformed_output_grad.mutable_data(ctx.GetPlace(), output_grad->type());
math::Transpose<paddle::platform::CUDADeviceContext, T, 5> trans5_v3;
trans5_v3(dev_ctx, *output_grad, &transformed_output_grad, axis);
// input grad
transformed_input_grad.Resize(framework::make_ddim(in_dims_vec));
} else {
layout = getLayoutFromStr(data_format);
transformed_input = *input;
transformed_output = *output;
transformed_output_grad = *output_grad;
transformed_input_grad = *input_grad;
}
const T *input_data = input->data<T>();
const T *output_data = output->data<T>();
const T *output_grad_data = output_grad->data<T>();
const T *input_data = transformed_input.data<T>();
const T *output_data = transformed_output.data<T>();
const T *output_grad_data = transformed_output_grad.data<T>();
// ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor input_desc;
ScopedTensorDescriptor output_desc;
ScopedPoolingDescriptor pool_desc;
DataLayout layout;
if (strides.size() == 2U) {
layout = DataLayout::kNCHW;
} else {
layout = DataLayout::kNCDHW;
}
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize<int>(input->dims()));
layout, framework::vectorize<int>(transformed_input.dims()));
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
layout, framework::vectorize<int>(output->dims()));
layout, framework::vectorize<int>(transformed_output.dims()));
PoolingMode pooling_mode;
if (pooling_type == "max") {
......@@ -155,13 +309,21 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel<T> {
auto handle = ctx.cuda_device_context().cudnn_handle();
ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
if (input_grad) {
T *input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
T *input_grad_data = transformed_input_grad.mutable_data<T>(
transformed_input_grad.dims(), ctx.GetPlace());
// Because beta is zero, it is unnecessary to reset input_grad.
CUDNN_ENFORCE(platform::dynload::cudnnPoolingBackward(
handle, cudnn_pool_desc, &alpha, cudnn_output_desc, output_data,
cudnn_output_desc, output_grad_data, cudnn_input_desc, input_data,
&beta, cudnn_input_desc, input_grad_data));
if (data_format == str_NDHWC) {
auto &dev_ctx =
ctx.template device_context<paddle::platform::CUDADeviceContext>();
std::vector<int> axis{0, 2, 3, 4, 1};
math::Transpose<paddle::platform::CUDADeviceContext, T, 5> trans5_v4;
trans5_v4(dev_ctx, transformed_input_grad, input_grad, axis);
}
}
}
};
......
......@@ -24,68 +24,93 @@ limitations under the License. */
namespace paddle {
namespace operators {
int PoolOutputSize(int input_size, int filter_size, int padding, int stride,
bool ceil_mode) {
int PoolOutputSize(int input_size, int filter_size, int padding_1,
int padding_2, int stride, bool ceil_mode) {
int output_size;
if (!ceil_mode) {
output_size = (input_size - filter_size + 2 * padding) / stride + 1;
output_size =
(input_size - filter_size + padding_1 + padding_2) / stride + 1;
} else {
output_size =
(input_size - filter_size + 2 * padding + stride - 1) / stride + 1;
(input_size - filter_size + padding_1 + padding_2 + stride - 1) /
stride +
1;
}
PADDLE_ENFORCE(output_size > 0,
"Due to the settings of padding(%d), filter_size(%d) and "
PADDLE_ENFORCE_GT(
output_size, 0,
"Due to the settings of padding(%d,%d), filter_size(%d) and "
"stride(%d), the output size is less than 0, please check "
"again. Input_size:%d",
padding, filter_size, stride, input_size);
padding_1, padding_2, filter_size, stride, input_size);
return output_size;
}
void PoolOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE(ctx->HasInput("X"), "X(Input) of Pooling should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
"X(Input) of Pooling should not be null.");
PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
"Out(Output) of Pooling should not be null.");
auto in_x_dims = ctx->GetInputDim("X");
std::string pooling_type = ctx->Attrs().Get<std::string>("pooling_type");
std::vector<int> ksize = ctx->Attrs().Get<std::vector<int>>("ksize");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
bool ceil_mode = ctx->Attrs().Get<bool>("ceil_mode");
bool adaptive = ctx->Attrs().Get<bool>("adaptive");
bool global_pooling = ctx->Attrs().Get<bool>("global_pooling");
std::string data_format = ctx->Attrs().Get<std::string>("data_format");
std::string padding_algorithm =
ctx->Attrs().Get<std::string>("padding_algorithm");
PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5,
auto in_x_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(in_x_dims.size() == 4 || in_x_dims.size() == 5, true,
"Pooling intput should be 4-D or 5-D tensor.");
if (ctx->Attrs().Get<bool>("global_pooling")) {
ksize.resize(static_cast<size_t>(in_x_dims.size()) - 2);
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0;
ksize[i] = static_cast<int>(in_x_dims[i + 2]);
}
}
PADDLE_ENFORCE(in_x_dims.size() - ksize.size() == 2U,
PADDLE_ENFORCE_EQ(in_x_dims.size() - ksize.size(), 2U,
"Input size and pooling size should be consistent.");
PADDLE_ENFORCE_EQ(ksize.size(), strides.size(),
"Strides size and pooling size should be the same.");
PADDLE_ENFORCE_EQ(ksize.size(), paddings.size(),
"Paddings size and pooling size should be the same.");
std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
// update paddings if "SAME" or global_pooling
framework::DDim data_dims;
if (channel_last) {
data_dims = framework::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1);
} else {
data_dims = framework::slice_ddim(in_x_dims, 2, in_x_dims.size());
}
UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm,
data_dims, strides, ksize);
if (global_pooling) {
UpdateKsize(&ksize, data_dims);
}
std::vector<int64_t> output_shape;
if (adaptive) {
output_shape.insert(output_shape.end(), ksize.begin(), ksize.end());
} else {
for (size_t i = 0; i < ksize.size(); ++i) {
if (!ctx->IsRuntime() && in_x_dims[i + 2] <= 0) {
output_shape.push_back(-1);
for (size_t i = 0; i < data_dims.size(); ++i) {
if ((!ctx->IsRuntime()) && (data_dims[i] < 0)) {
output_shape.push_back(in_x_dims[i]);
} else {
output_shape.push_back(PoolOutputSize(
in_x_dims[i + 2], ksize[i], paddings[i], strides[i], ceil_mode));
output_shape.push_back(
PoolOutputSize(data_dims[i], ksize[i], paddings[2 * i],
paddings[2 * i + 1], strides[i], ceil_mode));
}
}
}
// output_N = input_N
output_shape.insert(output_shape.begin(), in_x_dims[0]);
// output_C = input_C
if (channel_last) {
output_shape.push_back(in_x_dims[in_x_dims.size() - 1]);
} else {
output_shape.insert(output_shape.begin() + 1, in_x_dims[1]);
}
ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
ctx->ShareLoD("X", "Out");
}
......@@ -93,7 +118,9 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const {
framework::OpKernelType PoolOp::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = ctx.Attr<std::string>("data_format");
// std::string data_format = ctx.Attr<std::string>("data_format"); // change:
// delete
std::string data_format = "AnyLayout";
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
#ifdef PADDLE_WITH_CUDA
......@@ -114,8 +141,8 @@ framework::OpKernelType PoolOp::GetExpectedKernelType(
}
void PoolOpGrad::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, "Input(X) must not be null.");
PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("X")), true,
"Input(X@GRAD) should not be null.");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
}
......@@ -123,7 +150,9 @@ void PoolOpGrad::InferShape(framework::InferShapeContext* ctx) const {
framework::OpKernelType PoolOpGrad::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = ctx.Attr<std::string>("data_format");
// std::string data_format = ctx.Attr<std::string>("data_format"); //
// change:delete
std::string data_format = "AnyLayout";
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
#ifdef PADDLE_WITH_CUDA
......@@ -186,8 +215,8 @@ void Pool2dOpMaker::Make() {
// TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>(
"paddings",
"(vector<int>, default {0,0}), paddings(height, width) of pooling "
"operator."
"(vector<int>, default {0,0}), paddings(height_top, height_bottom, "
"width_left, wifth_right) of pooling operator."
"If global_pooling = true, paddings and kernel size will be ignored.")
.SetDefault({0, 0});
AddAttr<bool>(
......@@ -206,7 +235,7 @@ void Pool2dOpMaker::Make() {
AddAttr<bool>(
"use_cudnn",
"(bool, default false) Only used in cudnn kernel, need install cudnn")
"(bool, default false) Only used in cudnn kernel, need install cudnn.")
.SetDefault(false);
AddAttr<bool>(
"ceil_mode",
......@@ -215,7 +244,7 @@ void Pool2dOpMaker::Make() {
"the floor function will be used.")
.SetDefault(false);
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
"(bool, default false) Only used in mkldnn kernel.")
.SetDefault(false);
AddAttr<bool>("use_quantizer",
"(bool, default false) "
......@@ -229,18 +258,24 @@ void Pool2dOpMaker::Make() {
"An optional string from: \"NHWC\", \"NCHW\". "
"Defaults to \"NHWC\". Specify the data format of the output data, "
"the input will be transformed automatically. ")
.SetDefault("AnyLayout");
.SetDefault("NCHW");
AddAttr<bool>("is_test",
"(bool, default false) Set to true for inference only, false "
"for training. Some layers may run faster when this is true.")
.SetDefault(false);
AddAttr<std::string>(
"padding_algorithm",
"(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
"\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
"Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
.SetDefault("EXPLICIT");
// TODO(dzhwinter): need to registered layout transform function
AddComment(R"DOC(
The pooling2d operation calculates the output based on
the input, pooling_type and ksize, strides, paddings parameters.
Input(X) and output(Out) are in NCHW format, where N is batch size, C is the
Input(X) and output(Out) are in NCHW or NHWC format, where N is batch size, C is the
number of channels, H is the height of the feature, and W is the width of the feature.
Parameters(ksize, strides, paddings) are two elements.
These two elements represent height and width, respectively.
......@@ -256,30 +291,47 @@ Example:
Out shape: $(N, C, H_{out}, W_{out})$
For pool_padding = "SAME":
$$
H_{out} = \\frac{(H_{in} + strides[0] - 1)}{strides[0]}
$$
$$
W_{out} = \\frac{(W_{in} + strides[1] - 1)}{strides[1]}
$$
For pool_padding = "VALID":
$$
H_{out} = \\frac{(H_{in} - ksize[0] + strides[0])}{strides[0]}
$$
$$
W_{out} = \\frac{(W_{in} - ksize[1] + strides[1])}{strides[1]}
$$
For ceil_mode = false:
$$
H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1
H_{out} = \\frac{(H_{in} - ksize[0] + pad_height_top + pad_height_bottom}{strides[0]} + 1
$$
$$
W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
W_{out} = \\frac{(W_{in} - ksize[1] + pad_width_left + pad_width_right}{strides[1]} + 1
$$
For ceil_mode = true:
$$
H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0] + strides[0] - 1)}{strides[0]} + 1
H_{out} = \\frac{(H_{in} - ksize[0] + pad_height_top + pad_height_bottom + strides[0] - 1)}{strides[0]} + 1
$$
$$
W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1
W_{out} = \\frac{(W_{in} - ksize[1] + pad_width_left + pad_width_right + strides[1] - 1)}{strides[1]} + 1
$$
For exclusive = false:
$$
hstart = i * strides[0] - paddings[0]
hstart = i * strides[0] - pad_height_top
$$
$$
hend = hstart + ksize[0]
$$
$$
wstart = j * strides[1] - paddings[1]
wstart = j * strides[1] - pad_width_left
$$
$$
wend = wstart + ksize[1]
......@@ -290,13 +342,13 @@ Example:
For exclusive = true:
$$
hstart = max(0, i * strides[0] - paddings[0])
hstart = max(0, i * strides[0] - pad_height_top)
$$
$$
hend = min(H, hstart + ksize[0])
$$
$$
wstart = max(0, j * strides[1] - paddings[1])
wstart = max(0, j * strides[1] - pad_width_left)
$$
$$
wend = min(W, wstart + ksize[1])
......@@ -319,13 +371,14 @@ class PoolOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput {
void Pool3dOpMaker::Make() {
AddInput("X",
"(Tensor) The input tensor of pooling operator. "
"The format of input tensor is NCDHW, where N is batch size, C is "
"The format of input tensor is NCDHW or NDHWC, where N is batch "
"size, C is "
"the number of channels, and D, H and W is the depth, height and "
"width of "
"the feature, respectively.");
AddOutput("Out",
"(Tensor) The output tensor of pooling operator."
"The format of output tensor is also NCDHW, "
"The format of output tensor is also NCDHW or NDHWC, "
"where N is batch size, C is "
"the number of channels, and D, H and W is the depth, height and "
"width of the feature, respectively.");
......@@ -355,8 +408,10 @@ void Pool3dOpMaker::Make() {
// TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>(
"paddings",
"(vector<int>, default {0,0,0}), paddings(depth, height, "
"width) of pooling operator. "
"(vector<int>, default {0,0,0}), paddings(pad_depth_front, "
"pad_depth_back, "
"pad_height_top, pad_height_bottom, pad_width_left, pad_width_right"
") of pooling operator. "
"If global_pooling = true, ksize and paddings will be ignored.")
.SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.)
......@@ -376,7 +431,7 @@ void Pool3dOpMaker::Make() {
AddAttr<bool>(
"use_cudnn",
"(bool, default false) Only used in cudnn kernel, need install cudnn")
"(bool, default false) Only used in cudnn kernel, need install cudnn.")
.SetDefault(false);
AddAttr<bool>(
"ceil_mode",
......@@ -389,11 +444,17 @@ void Pool3dOpMaker::Make() {
.SetDefault(false);
AddAttr<std::string>(
"data_format",
"(string, default NCHW) Only used in "
"An optional string from: \"NHWC\", \"NCHW\". "
"Defaults to \"NHWC\". Specify the data format of the output data, "
"(string, default NCDHW) Only used in "
"An optional string from: \"NDHWC\", \"NCDHW\". "
"Defaults to \"NDHWC\". Specify the data format of the output data, "
"the input will be transformed automatically. ")
.SetDefault("AnyLayout");
.SetDefault("NCDHW");
AddAttr<std::string>(
"padding_algorithm",
"(string, default \"EXPLICIT\") An optional string from: \"EXPLICIT\","
"\"SAME\",\"VALID\". Set to \"EXPLICIT\" for explicit padding. "
"Set to \"SAME\" or \"VALID\" for algorithm of padding. ")
.SetDefault("EXPLICIT");
// TODO(dzhwinter): need to registered layout transform function
AddComment(R"DOC(
......@@ -401,7 +462,7 @@ Pool3d Operator.
The pooling3d operation calculates the output based on
the input, pooling_type, ksize, strides, and paddings parameters.
Input(X) and output(Out) are in NCDHW format, where N is batch
Input(X) and output(Out) are in NCDHW or NDHWC format, where N is batch
size, C is the number of channels, and D, H and W are the depth, height and
width of the feature, respectively. Parameters(ksize, strides, paddings)
are three elements. These three elements represent depth, height and
......@@ -412,42 +473,65 @@ Example:
X shape: $(N, C, D_{in}, H_{in}, W_{in})$
Output:
Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
For pool_padding = "SAME":
$$
D_{out} = \\frac{(D_{in} + strides[0] - 1)}{strides[0]}
$$
$$
H_{out} = \\frac{(H_{in} + strides[1] - 1)}{strides[1]}
$$
$$
W_{out} = \\frac{(W_{in} + strides[2] - 1)}{strides[2]}
$$
For pool_padding = "VALID":
$$
D_{out} = \\frac{(D_{in} - ksize[0] + strides[0])}{strides[0]}
$$
$$
H_{out} = \\frac{(H_{in} - ksize[1] + strides[1])}{strides[1]}
$$
$$
W_{out} = \\frac{(W_{in} - ksize[2] + strides[2])}{strides[2]}
$$
For ceil_mode = false:
$$
D_{out} = \\frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1
D_{out} = \\frac{(D_{in} - ksize[0] + pad_depth_front + pad_depth_back)}{strides[0]} + 1
$$
$$
H_{out} = \\frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[2]} + 1
H_{out} = \\frac{(H_{in} - ksize[1] + pad_height_top + pad_height_bottom)}{strides[1]} + 1
$$
$$
W_{out} = \\frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1
W_{out} = \\frac{(W_{in} - ksize[2] + pad_width_left + pad_width_right)}{strides[2]} + 1
$$
For ceil_mode = true:
$$
D_{out} = \\frac{(D_{in} - ksize[0] + 2 * paddings[0] + strides[0] -1)}{strides[0]} + 1
D_{out} = \\frac{(D_{in} - ksize[0] + pad_depth_front + pad_depth_back + strides[0] -1)}{strides[0]} + 1
$$
$$
H_{out} = \\frac{(H_{in} - ksize[1] + 2 * paddings[1] + strides[1] -1)}{strides[1]} + 1
H_{out} = \\frac{(H_{in} - ksize[1] + pad_height_top + pad_height_bottom + strides[1] -1)}{strides[1]} + 1
$$
$$
W_{out} = \\frac{(W_{in} - ksize[2] + 2 * paddings[2] + strides[2] -1)}{strides[2]} + 1
W_{out} = \\frac{(W_{in} - ksize[2] + pad_width_left + pad_width_right + strides[2] -1)}{strides[2]} + 1
$$
For exclusive = false:
$$
dstart = i * strides[0] - paddings[0]
dstart = i * strides[0] - pad_depth_front
$$
$$
dend = dstart + ksize[0]
$$
$$
hstart = j * strides[1] - paddings[1]
hstart = j * strides[1] - pad_height_top
$$
$$
hend = hstart + ksize[1]
$$
$$
wstart = k * strides[2] - paddings[2]
wstart = k * strides[2] - pad_width_left
$$
$$
wend = wstart + ksize[2]
......@@ -458,16 +542,19 @@ Example:
For exclusive = true:
$$
dstart = max(0, i * strides[0] - paddings[0])
dstart = max(0, i * strides[0] - pad_depth_front)
$$
$$
dend = min(D, dstart + ksize[0])
$$
$$
hstart = max(0, j * strides[1] - pad_height_top)
$$
$$
hend = min(H, hstart + ksize[1])
$$
$$
wstart = max(0, k * strides[2] - paddings[2])
wstart = max(0, k * strides[2] - pad_width_left)
$$
$$
wend = min(W, wstart + ksize[2])
......
......@@ -14,13 +14,13 @@ limitations under the License. */
#pragma once
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/pooling.h"
namespace paddle {
namespace operators {
......@@ -57,6 +57,57 @@ class Pool3dOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override;
};
inline void UpdatePadding(std::vector<int>* paddings, const bool global_pooling,
const bool adaptive,
const std::string padding_algorithm,
const framework::DDim data_dims,
const std::vector<int>& strides,
const std::vector<int>& ksize) {
// set padding size == data_dims.size() * 2
auto data_shape = framework::vectorize<int>(data_dims);
if (paddings->size() == data_dims.size()) {
for (size_t i = 0; i < data_dims.size(); ++i) {
int copy_pad = *(paddings->begin() + 2 * i);
paddings->insert(paddings->begin() + 2 * i + 1, copy_pad);
}
} else {
PADDLE_ENFORCE_EQ(
data_dims.size() * 2, paddings->size(),
"Paddings size should be the same or twice as the pooling size.");
}
// when padding_desc is "VALID" or "SAME"
if (padding_algorithm == "SAME") {
for (size_t i = 0; i < data_dims.size(); ++i) {
int out_size = (data_dims[i] + strides[i] - 1) / strides[0];
int pad_sum =
std::max((out_size - 1) * strides[i] + ksize[i] - data_shape[i], 0);
int pad_0 = pad_sum / 2;
int pad_1 = pad_sum - pad_0;
*(paddings->begin() + i * 2) = pad_0;
*(paddings->begin() + i * 2 + 1) = pad_1;
}
} else if (padding_algorithm == "VALID") {
for (auto it = paddings->begin(); it != paddings->end(); it++) {
*it = 0;
}
}
// if global_pooling == true or adaptive == true, padding will be ignore
if (global_pooling || adaptive) {
for (auto it = paddings->begin(); it != paddings->end(); it++) {
*it = 0;
}
}
}
inline void UpdateKsize(std::vector<int>* ksize,
const framework::DDim data_dims) {
ksize->resize(static_cast<size_t>(data_dims.size()));
for (size_t i = 0; i < ksize->size(); ++i) {
*(ksize->begin() + i) = static_cast<int>(data_dims[i]);
}
}
template <typename DeviceContext, typename T>
class PoolKernel : public framework::OpKernel<T> {
......@@ -69,14 +120,36 @@ class PoolKernel : public framework::OpKernel<T> {
std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::string data_format = context.Attr<std::string>("data_format");
bool exclusive = context.Attr<bool>("exclusive");
bool adaptive = context.Attr<bool>("adaptive");
if (context.Attr<bool>("global_pooling")) {
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0;
ksize[i] = static_cast<int>(in_x->dims()[i + 2]);
bool global_pooling = context.Attr<bool>("global_pooling");
std::string padding_algorithm =
context.Attr<std::string>("padding_algorithm");
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
// update paddings
auto in_x_dims = in_x->dims();
framework::DDim data_dims;
if (channel_last) {
data_dims = framework::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1);
} else {
data_dims = framework::slice_ddim(in_x_dims, 2, in_x_dims.size());
}
UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm,
data_dims, strides, ksize);
if (data_dims.size() * 2 == paddings.size()) {
for (size_t i = 0; i < data_dims.size(); ++i) {
paddings.erase(paddings.begin() + i + 1);
}
}
if (global_pooling) {
UpdateKsize(&ksize, data_dims);
}
auto& dev_ctx = context.template device_context<DeviceContext>();
switch (ksize.size()) {
case 2: {
......@@ -85,16 +158,16 @@ class PoolKernel : public framework::OpKernel<T> {
DeviceContext, paddle::operators::math::MaxPool<T>, T>
pool2d_forward;
paddle::operators::math::MaxPool<T> pool_process;
pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process,
true, false, out);
pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format,
pool_process, true, false, out);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool2dFunctor<
DeviceContext, paddle::operators::math::AvgPool<T>, T>
pool2d_forward;
paddle::operators::math::AvgPool<T> pool_process;
pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process,
exclusive, adaptive, out);
pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format,
pool_process, exclusive, adaptive, out);
}
} break;
case 3: {
......@@ -103,15 +176,16 @@ class PoolKernel : public framework::OpKernel<T> {
DeviceContext, paddle::operators::math::MaxPool<T>, T>
pool3d_forward;
paddle::operators::math::MaxPool<T> pool_process;
pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process,
true, false, out);
pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format,
pool_process, true, false, out);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool3dFunctor<
DeviceContext, paddle::operators::math::AvgPool<T>, T>
pool3d_forward;
paddle::operators::math::AvgPool<T> pool_process;
pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process,
exclusive, adaptive, out);
pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, data_format,
pool_process, exclusive, adaptive, out);
}
} break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
......@@ -135,13 +209,33 @@ class PoolGradKernel : public framework::OpKernel<T> {
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
bool exclusive = context.Attr<bool>("exclusive");
bool adaptive = context.Attr<bool>("adaptive");
std::string data_format = context.Attr<std::string>("data_format");
bool global_pooling = context.Attr<bool>("global_pooling");
std::string padding_algorithm =
context.Attr<std::string>("padding_algorithm");
const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
if (context.Attr<bool>("global_pooling")) {
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0;
ksize[i] = static_cast<int>(in_x->dims()[i + 2]);
// update paddings
auto in_x_dims = in_x->dims();
framework::DDim data_dims;
if (channel_last) {
data_dims = framework::slice_ddim(in_x_dims, 1, in_x_dims.size() - 1);
} else {
data_dims = framework::slice_ddim(in_x_dims, 2, in_x_dims.size());
}
UpdatePadding(&paddings, global_pooling, adaptive, padding_algorithm,
data_dims, strides, ksize);
if (data_dims.size() * 2 == paddings.size()) {
for (size_t i = 0; i < data_dims.size(); ++i) {
paddings.erase(paddings.begin() + i + 1);
}
}
if (global_pooling) {
UpdateKsize(&ksize, data_dims);
}
auto& dev_ctx = context.template device_context<DeviceContext>();
if (in_x_grad) {
in_x_grad->mutable_data<T>(context.GetPlace());
......@@ -154,15 +248,15 @@ class PoolGradKernel : public framework::OpKernel<T> {
paddle::operators::math::MaxPool2dGradFunctor<DeviceContext, T>
pool2d_backward;
pool2d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides,
paddings, in_x_grad);
paddings, data_format, in_x_grad);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool2dGradFunctor<
DeviceContext, paddle::operators::math::AvgPoolGrad<T>, T>
pool2d_backward;
paddle::operators::math::AvgPoolGrad<T> pool_process;
pool2d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides,
paddings, pool_process, exclusive, adaptive,
in_x_grad);
paddings, data_format, pool_process, exclusive,
adaptive, in_x_grad);
}
} break;
case 3: {
......@@ -170,15 +264,15 @@ class PoolGradKernel : public framework::OpKernel<T> {
paddle::operators::math::MaxPool3dGradFunctor<DeviceContext, T>
pool3d_backward;
pool3d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides,
paddings, in_x_grad);
paddings, data_format, in_x_grad);
} else if (pooling_type == "avg") {
paddle::operators::math::Pool3dGradFunctor<
DeviceContext, paddle::operators::math::AvgPoolGrad<T>, T>
pool3d_backward;
paddle::operators::math::AvgPoolGrad<T> pool_process;
pool3d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides,
paddings, pool_process, exclusive, adaptive,
in_x_grad);
paddings, data_format, pool_process, exclusive,
adaptive, in_x_grad);
}
} break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
......
......@@ -72,6 +72,7 @@ enum class DataLayout { // Not use
kNHWC,
kNCHW,
kNCDHW,
kNDHWC, // add, liyamei
kNCHW_VECT_C,
};
......@@ -212,6 +213,8 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat(
return CUDNN_TENSOR_NCHW;
case DataLayout::kNCDHW:
return CUDNN_TENSOR_NCHW; // NOTE: cudnn treat NdTensor as the same
case DataLayout::kNDHWC:
return CUDNN_TENSOR_NHWC; // add, liyamei
default:
PADDLE_THROW("Unknown cudnn equivalent for order");
}
......@@ -238,14 +241,31 @@ class ScopedTensorDescriptor {
strides[i] = dims[i + 1] * strides[i + 1];
}
// Update tensor descriptor dims setting if groups > 1
// NOTE: Assume using NCHW or NCDHW order
std::vector<int> dims_with_group(dims.begin(), dims.end()); // copy
// NOTE: Here, Assume using NCHW or NCDHW order
std::vector<int> dims_with_group(dims.begin(), dims.end());
if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups;
}
if (dims.size() == 4) {
if (format == CUDNN_TENSOR_NCHW) {
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
desc_, type, dims_with_group.size(), dims_with_group.data(),
strides.data()));
} else { // CUDNN_TENSOR_NHWC
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensor4dDescriptor(
desc_, format, type, dims[0], dims[3], dims[1], dims[2]));
}
} else if (dims.size() == 5) {
if (format == CUDNN_TENSOR_NCHW) {
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
desc_, type, dims_with_group.size(), dims_with_group.data(),
strides.data()));
} else { // CUDNN_TENSOR_NHWC
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptorEx(
desc_, format, type, dims.size(), dims.data()));
}
}
return desc_;
}
......
......@@ -126,7 +126,8 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
__macro(cudnnRNNBackwardWeights); \
__macro(cudnnRNNForwardInference); \
__macro(cudnnDestroyDropoutDescriptor); \
__macro(cudnnDestroyRNNDescriptor);
__macro(cudnnDestroyRNNDescriptor); \
__macro(cudnnSetTensorNdDescriptorEx);
CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
......
......@@ -2871,15 +2871,16 @@ def pool2d(input,
use_cudnn=True,
ceil_mode=False,
name=None,
exclusive=True):
exclusive=True,
data_format="NCHW"):
"""
${comment}
Args:
input (Variable): The input tensor of pooling operator. The format of
input tensor is NCHW, where N is batch size, C is
the number of channels, H is the height of the
feature, and W is the width of the feature.
input tensor is `"NCHW"` or `"NHWC"`, where `N` is batch size, `C` is
the number of channels, `H` is the height of the
feature, and `W` is the width of the feature.
pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two integers, (pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be a square of an int.
......@@ -2887,8 +2888,13 @@ def pool2d(input,
pool_stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain two integers, (pool_stride_Height, pool_stride_Width).
Otherwise, the pool stride size will be a square of an int.
pool_padding (int|list|tuple): The pool padding size. If pool padding size is a tuple,
it must contain two integers, (pool_padding_on_Height, pool_padding_on_Width).
pool_padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
it could be in three forms: `[pad_height, pad_width]` or
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`,
`pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
Otherwise, the pool padding size will be a square of an int.
global_pooling (bool): ${global_pooling_comment}
use_cudnn (bool): ${use_cudnn_comment}
......@@ -2896,55 +2902,125 @@ def pool2d(input,
name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically.
exclusive (bool): Whether to exclude padding points in average pooling
mode, default is true
mode, default is `true`.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
Returns:
Variable: The pooling result.
Raises:
ValueError: If 'pool_type' is not "max" nor "avg"
ValueError: If 'global_pooling' is False and 'pool_size' is -1
ValueError: If 'use_cudnn' is not a bool value.
ValueError: If `pool_type` is not "max" nor "avg"
ValueError: If `global_pooling` is False and `pool_size` is -1
ValueError: If `use_cudnn` is not a bool value.
Examples:
.. code-block:: python
import paddle.fluid as fluid
data = fluid.layers.data(
name='data', shape=[3, 32, 32], dtype='float32')
pool2d = fluid.layers.pool2d(
input=data,
pool_size=2,
pool_type='max',
pool_stride=1,
global_pooling=False)
name='data', shape=[10, 3, 32, 32], append_batch_size=False, dtype='float32')
# example 1:
# Attr(pool_padding) is a list with 4 elements, Attr(data_format) is "NCHW".
out_1 = fluid.layers.pool2d(
input = data,
pool_size = 3,
pool_type = "avg",
pool_stride = 1,
pool_padding = [1, 2, 1, 0],
data_format = "NCHW")
# example 2:
# Attr(pool_padding) is a string, Attr(data_format) is "NCHW".
out_2 = fluid.layers.pool2d(
input = data,
pool_size = 3,
pool_type = "avg",
pool_stride = 1,
pool_padding = "VALID",
data_format = "NCHW")
"""
if pool_type not in ["max", "avg"]:
raise ValueError(
"Unknown pool_type: '%s'. It can only be 'max' or 'avg'.",
"Unknown Attr(pool_type): '%s'. It can only be 'max' or 'avg'.",
str(pool_type))
if global_pooling is False and pool_size == -1:
raise ValueError(
"When the global_pooling is False, pool_size must be passed "
"and be a valid value. Received pool_size: " + str(pool_size))
"When Attr(global_pooling) is False, Attr(pool_size) must be passed "
"and be a valid value. Received pool_size: %s." % str(pool_size))
if not isinstance(use_cudnn, bool):
raise ValueError("Attr(use_cudnn) should be True or False. Received "
"Attr(use_cudnn): %s." % str(use_cudnn))
if data_format not in ["NCHW", "NHWC"]:
raise ValueError(
"Attr(data_format) should be 'NCHW' or 'NHWC'. Received "
"Attr(data_format): %s." % str(data_format))
pool_size = utils.convert_to_list(pool_size, 2, 'pool_size')
pool_padding = utils.convert_to_list(pool_padding, 2, 'pool_padding')
pool_stride = utils.convert_to_list(pool_stride, 2, 'pool_stride')
if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False")
def update_padding(padding, data_format):
def is_list_or_tuple(ele):
if isinstance(ele, list) or isinstance(ele, tuple):
return True
return False
l_type = 'pool2d'
if is_list_or_tuple(padding) and len(padding) == 4:
if is_list_or_tuple(padding[0]) and (data_format == "NCHW"):
if not (padding[0] == [0, 0] and padding[1] == [0, 0]):
raise ValueError(
"Non-zero pool_padding(%s) in the batch or channel dimensions "
"is not supported." % str(padding))
padding = padding[2:4]
padding = [ele for a_list in padding for ele in a_list]
elif is_list_or_tuple(padding[0]) and (data_format == "NHWC"):
if not (padding[0] == [0, 0] and padding[3] == [0, 0]):
raise ValueError(
"Non-zero pool_padding(%s) in the batch or channel dimensions "
"is not supported." % str(padding))
padding = padding[1:3]
padding = [ele for a_list in padding for ele in a_list]
padding = utils.convert_to_list(padding, 4, 'padding')
helper = LayerHelper(l_type, **locals())
else:
padding = utils.convert_to_list(padding, 2, 'padding')
return padding
padding_algorithm = "EXPLICIT"
if isinstance(pool_padding, str):
pool_padding = pool_padding.upper()
if pool_padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(pool_padding))
if pool_padding == "VALID":
padding_algorithm = "VALID"
pool_padding = [0, 0, 0, 0]
if ceil_mode != False:
raise ValueError(
"When Attr(pool_padding) is \"VALID\", Attr(ceil_mode) must be False. "
"Received ceil_mode: True.")
elif pool_padding == "SAME":
padding_algorithm = "SAME"
pool_padding = [0, 0, 0, 0]
pool_padding = update_padding(pool_padding, data_format)
op_type = 'pool2d'
helper = LayerHelper(op_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
helper.append_op(
type=l_type,
type=op_type,
inputs={"X": input},
outputs={"Out": pool_out},
attrs={
......@@ -2953,10 +3029,12 @@ def pool2d(input,
"global_pooling": global_pooling,
"strides": pool_stride,
"paddings": pool_padding,
"padding_algorithm": padding_algorithm,
"use_cudnn": use_cudnn,
"ceil_mode": ceil_mode,
"use_mkldnn": False,
"exclusive": exclusive,
"data_format": data_format,
})
return pool_out
......@@ -2972,30 +3050,43 @@ def pool3d(input,
use_cudnn=True,
ceil_mode=False,
name=None,
exclusive=True):
exclusive=True,
data_format="NCDHW"):
"""
${comment}
Args:
input (Variable): The input tensor of pooling operator. The format of
input tensor is NCDHW, where N is batch size, C is
the number of channels, D is the depth of the feature,
H is the height of the feature, and W is the width
input tensor is `"NCDHW"` or `"NDHWC"`, where `N` is batch size, `C` is
the number of channels, `D` is the depth of the feature,
`H` is the height of the feature, and `W` is the width
of the feature.
pool_size (int|list|tuple): The pool kernel size. If pool kernel size
is a tuple or list, it must contain three integers,
(pool_size_Depth, pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be the cube of an int.
pool_type (string): ${pooling_type_comment}
pool_stride (int): stride of the pooling layer.
pool_padding (int): padding size.
pool_stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool stride size is a tuple or list,
it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`.
Otherwise, the pool stride size will be a cube of an int.
pool_padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list,
it could be in three forms: `[pad_depth, pad_height, pad_width]` or
`[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
`[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
global_pooling (bool): ${global_pooling_comment}
use_cudnn (bool): ${use_cudnn_comment}
ceil_mode (bool): ${ceil_mode_comment}
name (str): A name for this layer(optional). If set None, the layer
will be named automatically.
exclusive (bool): Whether to exclude padding points in average pooling
mode, default is true
mode, default is true.
data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_depth, input_height, input_width]`.
Returns:
Variable: output of pool3d layer.
......@@ -3005,39 +3096,114 @@ def pool3d(input,
.. code-block:: python
import paddle.fluid as fluid
data = fluid.layers.data(
name='data', shape=[3, 32, 32, 32], dtype='float32')
pool3d = fluid.layers.pool3d(
input=data,
pool_size=2,
pool_type='max',
pool_stride=1,
global_pooling=False)
name='data', shape=[10, 3, 32, 32, 32], append_batch_size=False, dtype='float32')
# example 1:
# Attr(pool_padding) is a list with 6 elements, Attr(data_format) is "NCDHW".
out_1 = fluid.layers.pool3d(
input = data,
pool_size = 2,
pool_type = "avg",
pool_stride = 1,
pool_padding = [1, 2, 1, 0, 1, 2],
global_pooling = False,
data_format = "NCDHW")
# example 2:
# Attr(pool_padding) is a string, Attr(data_format) is "NCDHW".
out_2 = fluid.layers.pool3d(
input = data,
pool_size = 3,
pool_type = "avg",
pool_stride = 1,
pool_padding = "VALID",
global_pooling = False,
data_format = "NCDHW")
"""
if pool_type not in ["max", "avg"]:
raise ValueError(
"Unknown pool_type: '%s'. It can only be 'max' or 'avg'.",
"Unknown Attr(pool_type): '%s'. It can only be 'max' or 'avg'.",
str(pool_type))
if global_pooling is False and pool_size == -1:
raise ValueError(
"When the global_pooling is False, pool_size must be passed "
"and be a valid value. Received pool_size: " + str(pool_size))
"When Attr(global_pooling) is False, Attr(pool_size) must be passed "
"and be a valid value. Received Attr(pool_size): %s." %
str(pool_size))
if not isinstance(use_cudnn, bool):
raise ValueError("Attr(use_cudnn) should be True or False. Received "
"Attr(use_cudnn): %s. " % str(use_cudnn))
if data_format not in ["NCDHW", "NDHWC"]:
raise ValueError(
"Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
"Attr(data_format): %s" % str(data_format))
pool_size = utils.convert_to_list(pool_size, 3, 'pool_size')
pool_padding = utils.convert_to_list(pool_padding, 3, 'pool_padding')
pool_stride = utils.convert_to_list(pool_stride, 3, 'pool_stride')
if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False")
def update_padding(padding, data_format):
def is_list_or_tuple(ele):
if isinstance(ele, (list, tuple)):
return True
return False
l_type = "pool3d"
helper = LayerHelper(l_type, **locals())
if is_list_or_tuple(padding) and len(padding) == 5:
if is_list_or_tuple(padding[0]) and (data_format == "NCDHW"):
if not (padding[0] == [0, 0] and padding[1] == [0, 0]):
raise ValueError(
"Non-zero pool_padding(%s) in the batch or channel dimensions "
"is not supported." % str(padding))
padding = padding[2:5]
padding = [ele for a_list in padding for ele in a_list]
elif is_list_or_tuple(padding[0]) and (data_format == "NDHWC"):
if not (padding[0] == [0, 0] and padding[4] == [0, 0]):
raise ValueError(
"Non-zero pool_padding(%s) in the batch or channel dimensions "
"is not supported." % str(padding))
padding = padding[1:4]
padding = [ele for a_list in padding for ele in a_list]
padding = utils.convert_to_list(padding, 6, 'padding')
elif is_list_or_tuple(padding) and len(padding) == 6:
padding = utils.convert_to_list(padding, 6, 'padding')
else:
padding = utils.convert_to_list(padding, 3, 'padding')
return padding
padding_algorithm = "EXPLICIT"
if isinstance(pool_padding, str):
pool_padding = pool_padding.upper()
if pool_padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(pool_padding))
if pool_padding == "VALID":
padding_algorithm = "VALID"
pool_padding = [0, 0, 0, 0, 0, 0]
if ceil_mode != False:
raise ValueError(
"When Attr(pool_padding) is \"VALID\", ceil_mode must be False. "
"Received ceil_mode: True.")
elif pool_padding == "SAME":
padding_algorithm = "SAME"
pool_padding = [0, 0, 0, 0, 0, 0]
pool_padding = update_padding(pool_padding, data_format)
op_type = "pool3d"
helper = LayerHelper(op_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
helper.append_op(
type=l_type,
type=op_type,
inputs={"X": input},
outputs={"Out": pool_out},
attrs={
......@@ -3046,10 +3212,12 @@ def pool3d(input,
"global_pooling": global_pooling,
"strides": pool_stride,
"paddings": pool_padding,
"padding_algorithm": padding_algorithm,
"use_cudnn": use_cudnn,
"ceil_mode": ceil_mode,
"use_mkldnn": False,
"exclusive": exclusive,
"data_format": data_format,
})
return pool_out
......
......@@ -20,6 +20,7 @@ import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
import paddle.fluid as fluid
def adaptive_start_index(index, input_size, output_size):
......@@ -118,26 +119,139 @@ def avg_pool2D_forward_naive(x,
return out
def pool2D_forward_naive(x,
ksize,
strides,
paddings,
global_pool=0,
ceil_mode=False,
exclusive=True,
adaptive=False,
data_format='NCHW',
pool_type="max",
padding_algorithm="EXPLICIT"):
# update paddings
def _get_padding_with_SAME(input_shape, pool_size, pool_stride):
padding = []
for input_size, filter_size, stride_size in zip(input_shape, pool_size,
pool_stride):
out_size = int((input_size + stride_size - 1) / stride_size)
pad_sum = np.max((
(out_size - 1) * stride_size + filter_size - input_size, 0))
pad_0 = int(pad_sum / 2)
pad_1 = int(pad_sum - pad_0)
padding.append(pad_0)
padding.append(pad_1)
return padding
if isinstance(padding_algorithm, str):
padding_algorithm = padding_algorithm.upper()
if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]:
raise ValueError("Unknown Attr(padding_algorithm): '%s'. "
"It can only be 'SAME' or 'VALID'." %
str(padding_algorithm))
if padding_algorithm == "VALID":
paddings = [0, 0, 0, 0]
if ceil_mode != False:
raise ValueError(
"When Attr(pool_padding) is \"VALID\", Attr(ceil_mode)"
" must be False. "
"Received ceil_mode: True.")
elif padding_algorithm == "SAME":
input_data_shape = []
if data_format == "NCHW":
input_data_shape = x.shape[2:4]
elif data_format == "NHWC":
input_data_shape = x.shape[1:3]
paddings = _get_padding_with_SAME(input_data_shape, ksize, strides)
assert len(paddings) == 2 or len(paddings) == 4
is_sys = True if len(paddings) == 2 else False
N = x.shape[0]
C, H, W = [x.shape[1], x.shape[2], x.shape[3]] if data_format == 'NCHW' \
else [x.shape[3], x.shape[1], x.shape[2]]
if global_pool == 1:
ksize = [H, W]
paddings = [0 for _ in range(len(paddings))]
pad_h_up = paddings[0] if is_sys else paddings[0]
pad_h_down = paddings[0] if is_sys else paddings[1]
pad_w_left = paddings[1] if is_sys else paddings[2]
pad_w_right = paddings[1] if is_sys else paddings[3]
if adaptive:
H_out, W_out = ksize
else:
H_out = (H - ksize[0] + pad_h_up + pad_h_down + strides[0] - 1) // strides[0] + 1 \
if ceil_mode else (H - ksize[0] + pad_h_up + pad_h_down) // strides[0] + 1
W_out = (W - ksize[1] + pad_w_left + pad_w_right + strides[1] - 1) // strides[1] + 1 \
if ceil_mode else (W - ksize[1] + pad_w_left + pad_w_right) // strides[1] + 1
out = np.zeros((N, C, H_out, W_out)) if data_format=='NCHW' \
else np.zeros((N, H_out, W_out, C))
for i in range(H_out):
if adaptive:
in_h_start = adaptive_start_index(i, H, ksize[0])
in_h_end = adaptive_end_index(i, H, ksize[0])
else:
in_h_start = np.max((i * strides[0] - pad_h_up, 0))
in_h_end = np.min((i * strides[0] + ksize[0] - pad_h_up, H))
for j in range(W_out):
if adaptive:
in_w_start = adaptive_start_index(j, W, ksize[1])
in_w_end = adaptive_end_index(j, W, ksize[1])
else:
in_w_start = np.max((j * strides[1] - pad_w_left, 0))
in_w_end = np.min((j * strides[1] + ksize[1] - pad_w_left, W))
if data_format == 'NCHW':
x_masked = x[:, :, in_h_start:in_h_end, in_w_start:in_w_end]
if pool_type == 'avg':
field_size = ((in_h_end - in_h_start) * (in_w_end - in_w_start)) \
if (exclusive or adaptive) else (ksize[0] * ksize[1])
out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size
elif pool_type == 'max':
out[:, :, i, j] = np.max(x_masked, axis=(2, 3))
elif data_format == 'NHWC':
x_masked = x[:, in_h_start:in_h_end, in_w_start:in_w_end, :]
if pool_type == 'avg':
field_size = ((in_h_end - in_h_start) * (in_w_end - in_w_start)) \
if (exclusive or adaptive) else (ksize[0] * ksize[1])
out[:, i, j, :] = np.sum(x_masked, axis=(1, 2)) / field_size
elif pool_type == 'max':
out[:, i, j, :] = np.max(x_masked, axis=(1, 2))
return out
class TestPool2D_Op(OpTest):
def setUp(self):
self.op_type = "pool2d"
self.use_cudnn = False
self.init_kernel_type()
self.use_mkldnn = False
self.init_data_type()
self.init_test_case()
self.padding_algorithm = "EXPLICIT"
self.init_paddings()
self.init_global_pool()
self.init_kernel_type()
self.init_pool_type()
self.init_ceil_mode()
self.init_exclusive()
self.init_adaptive()
if self.global_pool:
self.paddings = [0 for _ in range(len(self.paddings))]
self.init_data_format()
self.init_shape()
input = np.random.random(self.shape).astype(self.dtype)
output = (self.pool2D_forward_naive(
output = pool2D_forward_naive(
input, self.ksize, self.strides, self.paddings, self.global_pool,
self.ceil_mode, self.exclusive, self.adaptive,
self.dtype)).astype(self.dtype)
self.ceil_mode, self.exclusive, self.adaptive, self.data_format,
self.pool_type, self.padding_algorithm).astype(self.dtype)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)}
self.attrs = {
......@@ -149,10 +263,10 @@ class TestPool2D_Op(OpTest):
'use_cudnn': self.use_cudnn,
'use_mkldnn': self.use_mkldnn,
'ceil_mode': self.ceil_mode,
'data_format':
'AnyLayout', # TODO(dzhwinter) : should be fix latter
'data_format': self.data_format,
'exclusive': self.exclusive,
'adaptive': self.adaptive
'adaptive': self.adaptive,
"padding_algorithm": self.padding_algorithm,
}
self.outputs = {'Out': output}
......@@ -177,14 +291,22 @@ class TestPool2D_Op(OpTest):
elif self.pool_type != "max":
self.check_grad(set(['X']), 'Out', max_relative_error=0.07)
def init_test_case(self):
def init_data_format(self):
self.data_format = "NCHW"
def init_shape(self):
self.shape = [2, 3, 5, 5]
def init_test_case(self):
self.ksize = [3, 3]
self.strides = [1, 1]
def init_paddings(self):
self.paddings = [0, 0]
self.padding_algorithm = "EXPLICIT"
def init_kernel_type(self):
pass
self.use_cudnn = False
def init_data_type(self):
self.dtype = np.float32
......@@ -208,9 +330,10 @@ class TestPool2D_Op(OpTest):
class TestCase1(TestPool2D_Op):
def init_test_case(self):
self.shape = [2, 3, 7, 7]
self.ksize = [3, 3]
self.strides = [1, 1]
def init_paddings(self):
self.paddings = [0, 0]
def init_pool_type(self):
......@@ -220,12 +343,16 @@ class TestCase1(TestPool2D_Op):
def init_global_pool(self):
self.global_pool = False
def init_shape(self):
self.shape = [2, 3, 7, 7]
class TestCase2(TestPool2D_Op):
def init_test_case(self):
self.shape = [2, 3, 7, 7]
self.ksize = [3, 3]
self.strides = [1, 1]
def init_paddings(self):
self.paddings = [1, 1]
def init_pool_type(self):
......@@ -235,6 +362,9 @@ class TestCase2(TestPool2D_Op):
def init_global_pool(self):
self.global_pool = False
def init_shape(self):
self.shape = [2, 3, 7, 7]
class TestCase3(TestPool2D_Op):
def init_pool_type(self):
......@@ -366,5 +496,715 @@ class TestAvgPoolAdaptive(TestCase1):
self.adaptive = True
#-------test pool2d with asymmetric padding-----
class TestPool2D_AsyPadding(TestPool2D_Op):
def init_test_case(self):
self.ksize = [3, 3]
self.strides = [1, 1]
self.paddings = [1, 0, 1, 2]
def init_shape(self):
self.shape = [2, 3, 5, 5]
class TestCase1_AsyPadding(TestCase1):
def init_test_case(self):
self.ksize = [3, 3]
self.strides = [1, 1]
self.paddings = [1, 0, 1, 0]
def init_shape(self):
self.shape = [2, 3, 7, 7]
class TestCase2_AsyPadding(TestCase2):
def init_test_case(self):
self.ksize = [3, 3]
self.strides = [1, 1]
self.paddings = [1, 2, 1, 2]
def init_shape(self):
self.shape = [2, 3, 7, 7]
class TestCase3_AsyPadding(TestCase3):
def init_test_case(self):
self.ksize = [3, 3]
self.strides = [1, 1]
self.paddings = [1, 0, 1, 2]
def init_shape(self):
self.shape = [2, 3, 5, 5]
class TestCase4_AsyPadding(TestCase4):
def init_test_case(self):
self.ksize = [3, 3]
self.strides = [1, 1]
self.paddings = [1, 0, 1, 0]
def init_shape(self):
self.shape = [2, 3, 7, 7]
class TestCase5_AsyPadding((TestCase5)):
def init_test_case(self):
self.ksize = [3, 3]
self.strides = [1, 1]
self.paddings = [2, 2, 1, 2]
def init_shape(self):
self.shape = [2, 3, 7, 7]
create_test_cudnn_class(TestPool2D_AsyPadding)
create_test_cudnn_class(TestCase1_AsyPadding)
create_test_cudnn_class(TestCase2_AsyPadding)
create_test_cudnn_class(TestCase3_AsyPadding)
create_test_cudnn_class(TestCase4_AsyPadding)
create_test_cudnn_class(TestCase5_AsyPadding)
create_test_cudnn_fp16_class(TestPool2D_AsyPadding)
create_test_cudnn_fp16_class(TestCase1_AsyPadding, check_grad=False)
create_test_cudnn_fp16_class(TestCase2_AsyPadding)
create_test_cudnn_fp16_class(TestCase3_AsyPadding)
create_test_cudnn_fp16_class(TestCase4_AsyPadding)
create_test_cudnn_fp16_class(TestCase5_AsyPadding)
create_test_cudnn_use_ceil_class(TestPool2D_AsyPadding)
create_test_cudnn_use_ceil_class(TestCase1_AsyPadding)
create_test_use_ceil_class(TestCase1_AsyPadding)
create_test_use_ceil_class(TestCase2_AsyPadding)
class TestAvgInclude_AsyPadding(TestCase2):
def init_exclusive(self):
self.exclusive = False
def init_test_case(self):
self.ksize = [3, 3]
self.strides = [1, 1]
self.paddings = [1, 2, 1, 2]
def init_shape(self):
self.shape = [2, 3, 7, 7]
class TestCUDNNAvgInclude_AsyPadding(TestCase2):
def init_kernel_type(self):
self.use_cudnn = True
def init_exclusive(self):
self.exclusive = False
def init_test_case(self):
self.ksize = [3, 3]
self.strides = [1, 1]
self.paddings = [2, 1, 1, 1]
def init_shape(self):
self.shape = [2, 3, 7, 7]
class TestAvgPoolAdaptive_AsyPadding(TestCase1):
def init_adaptive(self):
self.adaptive = True
def init_test_case(self):
self.ksize = [3, 3]
self.strides = [1, 1]
self.paddings = [1, 1, 0, 2]
def init_shape(self):
self.shape = [2, 3, 7, 7]
#----------- test channel_last --------------
class TestPool2D_channel_last(TestPool2D_Op):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 5, 5, 3]
class TestCase1_channel_last(TestCase1):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 7, 7, 3]
class TestCase2_channel_last(TestCase2):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 7, 7, 3]
class TestCase3_channel_last(TestCase3):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 5, 5, 3]
class TestCase4_channel_last(TestCase4):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 7, 7, 3]
class TestCase5_channel_last(TestCase5):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 7, 7, 3]
create_test_cudnn_class(TestPool2D_channel_last)
create_test_cudnn_class(TestCase1_channel_last)
create_test_cudnn_class(TestCase2_channel_last)
create_test_cudnn_class(TestCase3_channel_last)
create_test_cudnn_class(TestCase4_channel_last)
create_test_cudnn_class(TestCase5_channel_last)
create_test_cudnn_fp16_class(TestPool2D_channel_last)
create_test_cudnn_fp16_class(TestCase1_channel_last, check_grad=False)
create_test_cudnn_fp16_class(TestCase2_channel_last)
create_test_cudnn_fp16_class(TestCase3_channel_last)
create_test_cudnn_fp16_class(TestCase4_channel_last)
create_test_cudnn_fp16_class(TestCase5_channel_last)
create_test_cudnn_use_ceil_class(TestPool2D_channel_last)
create_test_cudnn_use_ceil_class(TestCase1_channel_last)
create_test_use_ceil_class(TestCase1_channel_last)
create_test_use_ceil_class(TestCase2_channel_last)
class TestCase5_Max(TestCase2):
def init_pool_type(self):
self.pool_type = "max"
def test_check_grad(self):
if self.dtype == np.float16:
return
if self.has_cudnn() and self.pool_type == "max":
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, set(['X']), 'Out', max_relative_error=1.00)
elif self.pool_type == "max":
self.check_grad(set(['X']), 'Out', max_relative_error=1.00)
class TestCase5_channel_last_Max(TestCase5_Max):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 7, 7, 3]
create_test_cudnn_class(TestCase5_Max)
create_test_cudnn_class(TestCase5_channel_last_Max)
class TestAvgInclude_channel_last(TestCase2_channel_last):
def init_exclusive(self):
self.exclusive = False
class TestCUDNNAvgInclude_channel_last(TestCase2_channel_last):
def init_kernel_type(self):
self.use_cudnn = True
def init_exclusive(self):
self.exclusive = False
class TestAvgPoolAdaptive_channel_last(TestCase1_channel_last):
def init_adaptive(self):
self.adaptive = True
class TestPool2D_AsyPadding_channel_last(TestPool2D_AsyPadding):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 5, 5, 3]
class TestCase1_AsyPadding_channel_last(TestCase1_AsyPadding):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 7, 7, 3]
class TestCase2_AsyPadding_channel_last(TestCase2_AsyPadding):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 7, 7, 3]
class TestCase3_AsyPadding_channel_last(TestCase3_AsyPadding):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 5, 5, 3]
class TestCase4_AsyPadding_channel_last(TestCase4_AsyPadding):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 7, 7, 3]
class TestCase5_AsyPadding_channel_last(TestCase5_AsyPadding):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 7, 7, 3]
create_test_cudnn_class(TestPool2D_AsyPadding_channel_last)
create_test_cudnn_class(TestCase1_AsyPadding_channel_last)
create_test_cudnn_class(TestCase2_AsyPadding_channel_last)
create_test_cudnn_class(TestCase3_AsyPadding_channel_last)
create_test_cudnn_class(TestCase4_AsyPadding_channel_last)
create_test_cudnn_class(TestCase5_AsyPadding_channel_last)
create_test_cudnn_fp16_class(TestPool2D_AsyPadding_channel_last)
create_test_cudnn_fp16_class(
TestCase1_AsyPadding_channel_last, check_grad=False)
create_test_cudnn_fp16_class(TestCase2_AsyPadding_channel_last)
create_test_cudnn_fp16_class(TestCase3_AsyPadding_channel_last)
create_test_cudnn_fp16_class(TestCase4_AsyPadding_channel_last)
create_test_cudnn_fp16_class(TestCase5_AsyPadding_channel_last)
create_test_cudnn_use_ceil_class(TestPool2D_AsyPadding_channel_last)
create_test_cudnn_use_ceil_class(TestCase1_AsyPadding_channel_last)
create_test_use_ceil_class(TestCase1_AsyPadding_channel_last)
create_test_use_ceil_class(TestCase2_AsyPadding_channel_last)
class TestAvgInclude_AsyPadding_channel_last(TestAvgInclude_AsyPadding):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 7, 7, 3]
class TestCUDNNAvgInclude_AsyPadding_channel_last(
TestCUDNNAvgInclude_AsyPadding):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 7, 7, 3]
class TestAvgPoolAdaptive_AsyPadding_channel_last(
TestAvgPoolAdaptive_AsyPadding):
def init_data_format(self):
self.data_format = "NHWC"
def init_shape(self):
self.shape = [2, 7, 7, 3]
# test paddings: SAME VALID
def create_test_padding_SAME_class(parent):
class TestPaddingSMAECase(parent):
def init_paddings(self):
self.paddings = [0, 0]
self.padding_algorithm = "SAME"
cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp")
TestPaddingSMAECase.__name__ = cls_name
globals()[cls_name] = TestPaddingSMAECase
create_test_padding_SAME_class(TestPool2D_Op)
create_test_padding_SAME_class(TestCase1)
create_test_padding_SAME_class(TestCase2)
create_test_padding_SAME_class(TestCase3)
create_test_padding_SAME_class(TestCase4)
create_test_padding_SAME_class(TestCase5)
create_test_padding_SAME_class(TestPool2D_channel_last)
create_test_padding_SAME_class(TestCase1_channel_last)
create_test_padding_SAME_class(TestCase2_channel_last)
create_test_padding_SAME_class(TestCase3_channel_last)
create_test_padding_SAME_class(TestCase4_channel_last)
create_test_padding_SAME_class(TestCase5_channel_last)
def create_test_cudnn_padding_SAME_class(parent):
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNPaddingSMAECase(parent):
def init_kernel_type(self):
self.use_cudnn = True
def init_paddings(self):
self.paddings = [1, 1]
self.padding_algorithm = "SAME"
cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingSAMEOp")
TestCUDNNPaddingSMAECase.__name__ = cls_name
globals()[cls_name] = TestCUDNNPaddingSMAECase
create_test_cudnn_padding_SAME_class(TestPool2D_Op)
create_test_cudnn_padding_SAME_class(TestCase1)
create_test_cudnn_padding_SAME_class(TestCase2)
create_test_cudnn_padding_SAME_class(TestCase3)
create_test_cudnn_padding_SAME_class(TestCase4)
create_test_cudnn_padding_SAME_class(TestCase5)
create_test_cudnn_padding_SAME_class(TestPool2D_channel_last)
create_test_cudnn_padding_SAME_class(TestCase1_channel_last)
create_test_cudnn_padding_SAME_class(TestCase2_channel_last)
create_test_cudnn_padding_SAME_class(TestCase3_channel_last)
create_test_cudnn_padding_SAME_class(TestCase4_channel_last)
create_test_cudnn_padding_SAME_class(TestCase5_channel_last)
def create_test_padding_VALID_class(parent):
class TestPaddingVALIDCase(parent):
def init_paddings(self):
self.paddings = [1, 1]
self.padding_algorithm = "VALID"
cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp")
TestPaddingVALIDCase.__name__ = cls_name
globals()[cls_name] = TestPaddingVALIDCase
create_test_padding_VALID_class(TestPool2D_Op)
create_test_padding_VALID_class(TestCase1)
create_test_padding_VALID_class(TestCase2)
create_test_padding_VALID_class(TestCase3)
create_test_padding_VALID_class(TestCase4)
create_test_padding_VALID_class(TestCase5)
create_test_padding_VALID_class(TestPool2D_channel_last)
create_test_padding_VALID_class(TestCase1_channel_last)
create_test_padding_VALID_class(TestCase2_channel_last)
create_test_padding_VALID_class(TestCase3_channel_last)
create_test_padding_VALID_class(TestCase4_channel_last)
create_test_padding_VALID_class(TestCase5_channel_last)
def create_test_cudnn_padding_VALID_class(parent):
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNPaddingVALIDCase(parent):
def init_kernel_type(self):
self.use_cudnn = True
def init_paddings(self):
self.paddings = [1, 1]
self.padding_algorithm = "VALID"
cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingVALIDOp")
TestCUDNNPaddingVALIDCase.__name__ = cls_name
globals()[cls_name] = TestCUDNNPaddingVALIDCase
create_test_cudnn_padding_VALID_class(TestPool2D_Op)
create_test_cudnn_padding_VALID_class(TestCase1)
create_test_cudnn_padding_VALID_class(TestCase2)
create_test_cudnn_padding_VALID_class(TestCase3)
create_test_cudnn_padding_VALID_class(TestCase4)
create_test_cudnn_padding_VALID_class(TestCase5)
create_test_cudnn_padding_VALID_class(TestPool2D_channel_last)
create_test_cudnn_padding_VALID_class(TestCase1_channel_last)
create_test_cudnn_padding_VALID_class(TestCase2_channel_last)
create_test_cudnn_padding_VALID_class(TestCase3_channel_last)
create_test_cudnn_padding_VALID_class(TestCase4_channel_last)
create_test_cudnn_padding_VALID_class(TestCase5_channel_last)
# ----- test API
class TestPool2dAPI(OpTest):
def test_api(self):
x_NHWC = np.random.random([2, 5, 5, 3]).astype("float32")
x_NCHW = np.random.random([2, 3, 5, 5]).astype("float32")
input_NHWC = fluid.layers.data(
name="input_NHWC",
shape=[2, 5, 5, 3],
append_batch_size=False,
dtype="float32")
input_NCHW = fluid.layers.data(
name="input_NCHW",
shape=[2, 3, 5, 5],
append_batch_size=False,
dtype="float32")
ksize = [3, 3]
out_1 = fluid.layers.pool2d(
input=input_NHWC,
pool_size=ksize,
pool_type="max",
pool_padding=[1, 1],
use_cudnn=False,
data_format="NHWC")
out_2 = fluid.layers.pool2d(
input=input_NHWC,
pool_size=ksize,
pool_type="avg",
pool_padding=[[0, 0], [1, 1], [1, 1], [0, 0]],
use_cudnn=False,
data_format="NHWC")
out_3 = fluid.layers.pool2d(
input=input_NCHW,
pool_size=ksize,
pool_type="avg",
pool_padding=[[0, 0], [0, 0], [1, 1], [1, 1]],
use_cudnn=False,
data_format="NCHW")
out_4 = fluid.layers.pool2d(
input=input_NCHW,
pool_size=ksize,
pool_type="avg",
pool_padding=[1, 2, 1, 0],
use_cudnn=False,
data_format="NCHW")
# test VALID
out_5 = fluid.layers.pool2d(
input=input_NCHW,
pool_size=ksize,
pool_type="avg",
pool_padding="VALID",
use_cudnn=False,
data_format="NCHW")
out_6 = fluid.layers.pool2d(
input=input_NHWC,
pool_size=ksize,
pool_type="max",
pool_padding="VALID",
use_cudnn=False,
data_format="NHWC")
# test SAME
out_7 = fluid.layers.pool2d(
input=input_NCHW,
pool_size=[4, 4],
pool_type="avg",
pool_padding="SAME",
use_cudnn=False,
data_format="NCHW")
out_8 = fluid.layers.pool2d(
input=input_NHWC,
pool_size=[4, 4],
pool_type="max",
pool_padding="SAME",
use_cudnn=False,
data_format="NHWC")
exe = fluid.Executor(place=fluid.CPUPlace())
[res_1, res_2, res_3, res_4, res_5, res_6, res_7, res_8] = exe.run(
fluid.default_main_program(),
feed={"input_NHWC": x_NHWC,
"input_NCHW": x_NCHW},
fetch_list=[
out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8
])
assert np.allclose(
res_1,
pool2D_forward_naive(
x=x_NHWC,
ksize=ksize,
pool_type="max",
strides=[1, 1],
paddings=[1, 1],
data_format="NHWC"))
assert np.allclose(
res_2,
pool2D_forward_naive(
x=x_NHWC,
ksize=ksize,
pool_type="avg",
strides=[1, 1],
paddings=[1, 1, 1, 1],
data_format="NHWC"))
assert np.allclose(
res_3,
pool2D_forward_naive(
x=x_NCHW,
ksize=ksize,
pool_type="avg",
strides=[1, 1],
paddings=[1, 1, 1, 1],
data_format="NCHW"),
rtol=0.07,
atol=1e-05)
assert np.allclose(
res_4,
pool2D_forward_naive(
x=x_NCHW,
ksize=ksize,
pool_type="avg",
strides=[1, 1],
paddings=[1, 2, 1, 0],
data_format="NCHW"),
rtol=0.07,
atol=1e-05)
# VALID
assert np.allclose(
res_5,
pool2D_forward_naive(
x=x_NCHW,
ksize=ksize,
pool_type="avg",
strides=[1, 1],
paddings=[10, 20], # any ele is ok
padding_algorithm="VALID",
data_format="NCHW"),
rtol=0.07,
atol=1e-05)
assert np.allclose(
res_6,
pool2D_forward_naive(
x=x_NHWC,
ksize=ksize,
pool_type="max",
strides=[1, 1],
paddings=[10, 20],
padding_algorithm="VALID",
data_format="NHWC"))
# SAME
assert np.allclose(
res_7,
pool2D_forward_naive(
x=x_NCHW,
ksize=[4, 4],
pool_type="avg",
strides=[1, 1],
paddings=[10, 20],
padding_algorithm="SAME",
data_format="NCHW"),
rtol=0.07,
atol=1e-05)
assert np.allclose(
res_8,
pool2D_forward_naive(
x=x_NHWC,
ksize=[4, 4],
pool_type="max",
strides=[1, 1],
paddings=[10, 20],
padding_algorithm="SAME",
data_format="NHWC"))
class TestPool2dAPI_Error(OpTest):
def test_api(self):
input_NHWC = fluid.layers.data(
name="input_NHWC",
shape=[2, 5, 5, 3],
append_batch_size=False,
dtype="float32")
ksize = [3, 3]
# cudnn value error
def run_1():
out_1 = fluid.layers.pool2d(
input=input_NHWC,
pool_size=ksize,
pool_type="max",
pool_padding=[1, 1],
use_cudnn=[0],
data_format="NHWC")
self.assertRaises(ValueError, run_1)
# data_format value error
def run_2():
out_2 = fluid.layers.pool2d(
input=input_NHWC,
pool_size=ksize,
pool_type="max",
pool_padding=[1, 1],
use_cudnn=False,
data_format="NHWCC")
self.assertRaises(ValueError, run_2)
# padding str value error
def run_3():
out_3 = fluid.layers.pool2d(
input=input_NHWC,
pool_size=ksize,
pool_type="max",
pool_padding="VALIDSAME",
use_cudnn=False,
data_format="NHWC")
self.assertRaises(ValueError, run_3)
# padding str valid and ceil_mode value error
def run_4():
out_4 = fluid.layers.pool2d(
input=input_NHWC,
pool_size=ksize,
pool_type="max",
pool_padding="VALID",
use_cudnn=False,
ceil_mode=True,
data_format="NHWC")
self.assertRaises(ValueError, run_4)
# padding with 8 ele. value error
def run_5():
out_5 = fluid.layers.pool2d(
input=input_NHWC,
pool_size=ksize,
pool_type="max",
pool_padding=[[1, 1], [0, 0], [0, 0], [1, 1]],
use_cudnn=False,
data_format="NHWC")
self.assertRaises(ValueError, run_5)
if __name__ == '__main__':
unittest.main()
......@@ -20,6 +20,7 @@ import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
import paddle.fluid as fluid
def adaptive_start_index(index, input_size, output_size):
......@@ -30,58 +31,137 @@ def adaptive_end_index(index, input_size, output_size):
return int(np.ceil((index + 1) * input_size / output_size))
def max_pool3D_forward_naive(x,
def pool3D_forward_naive(x,
ksize,
strides,
paddings,
global_pool=0,
ceil_mode=False,
exclusive=True,
adaptive=False):
N, C, D, H, W = x.shape
adaptive=False,
data_format='NCDHW',
pool_type='max',
padding_algorithm="EXPLICIT"):
# update paddings
def _get_padding_with_SAME(input_shape, pool_size, pool_stride):
padding = []
for input_size, filter_size, stride_size in zip(input_shape, pool_size,
pool_stride):
out_size = int((input_size + stride_size - 1) / stride_size)
pad_sum = np.max((
(out_size - 1) * stride_size + filter_size - input_size, 0))
pad_0 = int(pad_sum / 2)
pad_1 = int(pad_sum - pad_0)
padding.append(pad_0)
padding.append(pad_1)
return padding
if isinstance(padding_algorithm, str):
padding_algorithm = padding_algorithm.upper()
if padding_algorithm not in ["SAME", "VALID", "EXPLICIT"]:
raise ValueError("Unknown Attr(padding_algorithm): '%s'. "
"It can only be 'SAME' or 'VALID'." %
str(padding_algorithm))
if padding_algorithm == "VALID":
paddings = [0, 0, 0, 0, 0, 0]
if ceil_mode != False:
raise ValueError(
"When Attr(pool_padding) is \"VALID\", Attr(ceil_mode)"
" must be False. "
"Received ceil_mode: True.")
elif padding_algorithm == "SAME":
input_data_shape = []
if data_format == "NCDHW":
input_data_shape = x.shape[2:5]
elif data_format == "NDHWC":
input_data_shape = x.shape[1:4]
paddings = _get_padding_with_SAME(input_data_shape, ksize, strides)
assert len(paddings) == 3 or len(paddings) == 6
is_sys = True if len(paddings) == 3 else False
N = x.shape[0]
C,D, H, W = [x.shape[1], x.shape[2], x.shape[3], x.shape[4]] \
if data_format == 'NCDHW' else [x.shape[4], x.shape[1], x.shape[2],x.shape[3]]
if global_pool == 1:
ksize = [D, H, W]
paddings = [0 for _ in range(len(paddings))]
pad_d_forth = paddings[0] if is_sys else paddings[0]
pad_d_back = paddings[0] if is_sys else paddings[1]
pad_h_up = paddings[1] if is_sys else paddings[2]
pad_h_down = paddings[1] if is_sys else paddings[3]
pad_w_left = paddings[2] if is_sys else paddings[4]
pad_w_right = paddings[2] if is_sys else paddings[5]
if adaptive:
D_out, H_out, W_out = ksize
else:
D_out = (D - ksize[0] + 2 * paddings[0] + strides[0] - 1
) // strides[0] + 1 if ceil_mode else (
H - ksize[0] + 2 * paddings[0]) // strides[0] + 1
H_out = (H - ksize[1] + 2 * paddings[1] + strides[1] - 1
) // strides[1] + 1 if ceil_mode else (
W - ksize[1] + 2 * paddings[1]) // strides[1] + 1
W_out = (W - ksize[2] + 2 * paddings[2] + strides[2] - 1
) // strides[2] + 1 if ceil_mode else (
W - ksize[2] + 2 * paddings[2]) // strides[2] + 1
out = np.zeros((N, C, D_out, H_out, W_out))
D_out = (D - ksize[0] + pad_d_forth+pad_d_back + strides[0] - 1) // strides[0] + 1 \
if ceil_mode else (D - ksize[0] + pad_d_forth+pad_d_back) // strides[0] + 1
H_out = (H - ksize[1] + pad_h_up + pad_h_down + strides[1] - 1) // strides[1] + 1 \
if ceil_mode else (H - ksize[1] + pad_h_up + pad_h_down) // strides[1] + 1
W_out = (W - ksize[2] + pad_w_left + pad_w_right + strides[2] - 1) // strides[2] + 1 \
if ceil_mode else (W - ksize[2] + pad_w_left + pad_w_right) // strides[2] + 1
out = np.zeros((N, C, D_out, H_out, W_out)) if data_format=='NCDHW' \
else np.zeros((N, D_out, H_out, W_out, C))
for k in range(D_out):
if adaptive:
d_start = adaptive_start_index(k, D, ksize[0])
d_end = adaptive_end_index(k, D, ksize[0])
else:
d_start = np.max((k * strides[0] - paddings[0], 0))
d_end = np.min((k * strides[0] + ksize[0] - paddings[0], D))
d_start = np.max((k * strides[0] - pad_d_forth, 0))
d_end = np.min((k * strides[0] + ksize[0] - pad_d_forth, D))
for i in range(H_out):
if adaptive:
h_start = adaptive_start_index(i, H, ksize[1])
h_end = adaptive_end_index(i, H, ksize[1])
else:
h_start = np.max((i * strides[1] - paddings[1], 0))
h_end = np.min((i * strides[1] + ksize[1] - paddings[1], H))
h_start = np.max((i * strides[1] - pad_h_up, 0))
h_end = np.min((i * strides[1] + ksize[1] - pad_h_up, H))
for j in range(W_out):
if adaptive:
w_start = adaptive_start_index(j, W, ksize[2])
w_end = adaptive_end_index(j, W, ksize[2])
else:
w_start = np.max((j * strides[2] - paddings[2], 0))
w_end = np.min((j * strides[2] + ksize[2] - paddings[2], W))
x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start:w_end]
w_start = np.max((j * strides[2] - pad_w_left, 0))
w_end = np.min((j * strides[2] + ksize[2] - pad_w_left, W))
if data_format == 'NCDHW':
x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start:
w_end]
if pool_type == 'avg':
field_size = (d_end - d_start) * (h_end - h_start) * (w_end - w_start) \
if (exclusive or adaptive) else ksize[0] * ksize[1] * ksize[2]
out[:, :, k, i, j] = np.sum(x_masked,
axis=(2, 3, 4)) / field_size
elif pool_type == 'max':
out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4))
elif data_format == 'NDHWC':
x_masked = x[:, d_start:d_end, h_start:h_end, w_start:
w_end, :]
if pool_type == 'avg':
field_size = (d_end - d_start) * (h_end - h_start) * (w_end - w_start) \
if (exclusive or adaptive) else ksize[0] * ksize[1] * ksize[2]
out[:, k, i, j, :] = np.sum(x_masked,
axis=(1, 2, 3)) / field_size
elif pool_type == 'max':
out[:, k, i, j, :] = np.max(x_masked, axis=(1, 2, 3))
return out
def avg_pool3D_forward_naive(x,
def max_pool3D_forward_naive(x,
ksize,
strides,
paddings,
......@@ -89,56 +169,46 @@ def avg_pool3D_forward_naive(x,
ceil_mode=False,
exclusive=True,
adaptive=False):
N, C, D, H, W = x.shape
if global_pool == 1:
ksize = [D, H, W]
if adaptive:
D_out, H_out, W_out = ksize
else:
D_out = (D - ksize[0] + 2 * paddings[0] + strides[0] - 1
) // strides[0] + 1 if ceil_mode else (
H - ksize[0] + 2 * paddings[0]) // strides[0] + 1
H_out = (H - ksize[1] + 2 * paddings[1] + strides[1] - 1
) // strides[1] + 1 if ceil_mode else (
W - ksize[1] + 2 * paddings[1]) // strides[1] + 1
W_out = (W - ksize[2] + 2 * paddings[2] + strides[2] - 1
) // strides[2] + 1 if ceil_mode else (
W - ksize[2] + 2 * paddings[2]) // strides[2] + 1
out = np.zeros((N, C, D_out, H_out, W_out))
for k in range(D_out):
if adaptive:
d_start = adaptive_start_index(k, D, ksize[0])
d_end = adaptive_end_index(k, D, ksize[0])
else:
d_start = np.max((k * strides[0] - paddings[0], 0))
d_end = np.min((k * strides[0] + ksize[0] - paddings[0], D))
for i in range(H_out):
if adaptive:
h_start = adaptive_start_index(i, H, ksize[1])
h_end = adaptive_end_index(i, H, ksize[1])
else:
h_start = np.max((i * strides[1] - paddings[1], 0))
h_end = np.min((i * strides[1] + ksize[1] - paddings[1], H))
for j in range(W_out):
if adaptive:
w_start = adaptive_start_index(j, W, ksize[2])
w_end = adaptive_end_index(j, W, ksize[2])
else:
w_start = np.max((j * strides[2] - paddings[2], 0))
w_end = np.min((j * strides[2] + ksize[2] - paddings[2], W))
x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start:w_end]
out = pool3D_forward_naive(
x=x,
ksize=ksize,
strides=strides,
paddings=paddings,
global_pool=global_pool,
ceil_mode=ceil_mode,
exclusive=exclusive,
adaptive=adaptive,
data_format='NCDHW',
pool_type="max")
return out
field_size = (d_end - d_start) * (h_end - h_start) * (w_end - w_start) \
if (exclusive or adaptive) else ksize[0] * ksize[1] * ksize[2]
out[:, :, k, i, j] = np.sum(x_masked, axis=(2, 3,
4)) / field_size
def avg_pool3D_forward_naive(x,
ksize,
strides,
paddings,
global_pool=0,
ceil_mode=False,
exclusive=True,
adaptive=False):
out = pool3D_forward_naive(
x=x,
ksize=ksize,
strides=strides,
paddings=paddings,
global_pool=global_pool,
ceil_mode=ceil_mode,
exclusive=exclusive,
adaptive=adaptive,
data_format='NCDHW',
pool_type="avg")
return out
class TestPool3d_Op(OpTest):
def setUp(self):
self.op_type = "pool3d"
self.use_cudnn = False
self.init_kernel_type()
self.dtype = np.float32
self.init_test_case()
self.init_global_pool()
......@@ -147,13 +217,15 @@ class TestPool3d_Op(OpTest):
self.init_ceil_mode()
self.init_exclusive()
self.init_adaptive()
self.init_data_format()
self.init_shape()
if self.global_pool:
self.paddings = [0 for _ in range(len(self.paddings))]
input = np.random.random(self.shape).astype(self.dtype)
output = self.pool3D_forward_naive(
output = pool3D_forward_naive(
input, self.ksize, self.strides, self.paddings, self.global_pool,
self.ceil_mode, self.exclusive, self.adaptive).astype(self.dtype)
self.ceil_mode, self.exclusive, self.adaptive, self.data_format,
self.pool_type).astype(self.dtype)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)}
self.attrs = {
......@@ -164,8 +236,7 @@ class TestPool3d_Op(OpTest):
'global_pooling': self.global_pool,
'use_cudnn': self.use_cudnn,
'ceil_mode': self.ceil_mode,
'data_format':
'AnyLayout', # TODO(dzhwinter) : should be fix latter
'data_format': self.data_format,
'exclusive': self.exclusive,
'adaptive': self.adaptive
}
......@@ -192,18 +263,23 @@ class TestPool3d_Op(OpTest):
elif self.pool_type != "max":
self.check_grad(set(['X']), 'Out', max_relative_error=0.07)
def init_test_case(self):
def init_data_format(self):
self.data_format = "NCDHW"
def init_shape(self):
self.shape = [2, 3, 5, 5, 5]
def init_test_case(self):
self.ksize = [3, 3, 3]
self.strides = [1, 1, 1]
self.paddings = [0, 0, 0]
def init_kernel_type(self):
pass
self.use_cudnn = False
#pass
def init_pool_type(self):
self.pool_type = "avg"
self.pool3D_forward_naive = avg_pool3D_forward_naive
def init_global_pool(self):
self.global_pool = True
......@@ -219,30 +295,32 @@ class TestPool3d_Op(OpTest):
class TestCase1(TestPool3d_Op):
def init_test_case(self):
def init_shape(self):
self.shape = [2, 3, 7, 7, 7]
def init_test_case(self):
self.ksize = [3, 3, 3]
self.strides = [1, 1, 1]
self.paddings = [0, 0, 0]
def init_pool_type(self):
self.pool_type = "avg"
self.pool3D_forward_naive = avg_pool3D_forward_naive
def init_global_pool(self):
self.global_pool = False
class TestCase2(TestPool3d_Op):
def init_test_case(self):
def init_shape(self):
self.shape = [2, 3, 7, 7, 7]
def init_test_case(self):
self.ksize = [3, 3, 3]
self.strides = [1, 1, 1]
self.paddings = [1, 1, 1]
def init_pool_type(self):
self.pool_type = "avg"
self.pool3D_forward_naive = avg_pool3D_forward_naive
def init_global_pool(self):
self.global_pool = False
......@@ -251,45 +329,45 @@ class TestCase2(TestPool3d_Op):
class TestCase3(TestPool3d_Op):
def init_pool_type(self):
self.pool_type = "max"
self.pool3D_forward_naive = max_pool3D_forward_naive
class TestCase4(TestCase1):
def init_pool_type(self):
self.pool_type = "max"
self.pool3D_forward_naive = max_pool3D_forward_naive
class TestCase5(TestCase2):
def init_pool_type(self):
self.pool_type = "max"
self.pool3D_forward_naive = max_pool3D_forward_naive
#--------------------test pool3d--------------------
class TestCUDNNCase1(TestPool3d_Op):
def init_kernel_type(self):
self.use_cudnn = True
#--------------------test pool3d cudnn--------------------
class TestFP16CUDNNCase1(TestPool3d_Op):
def create_test_cudnn_class(parent):
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNCase(parent):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=1e-3)
cls_name = "{0}_{1}".format(parent.__name__, "CUDNNOp")
TestCUDNNCase.__name__ = cls_name
globals()[cls_name] = TestCUDNNCase
class TestCUDNNCase2(TestCase1):
def init_kernel_type(self):
self.use_cudnn = True
create_test_cudnn_class(TestPool3d_Op)
create_test_cudnn_class(TestCase1)
create_test_cudnn_class(TestCase2)
create_test_cudnn_class(TestCase3)
create_test_cudnn_class(TestCase4)
create_test_cudnn_class(TestCase5)
class TestFP16CUDNNCase2(TestCase1):
def create_test_cudnn_fp16_class(parent):
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNFp16Case(parent):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
......@@ -300,109 +378,775 @@ class TestFP16CUDNNCase2(TestCase1):
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=1e-3)
cls_name = "{0}_{1}".format(parent.__name__, "CUDNNFp16Op")
TestCUDNNFp16Case.__name__ = cls_name
globals()[cls_name] = TestCUDNNFp16Case
class TestCUDNNCase3(TestCase2):
def init_kernel_type(self):
self.use_cudnn = True
create_test_cudnn_fp16_class(TestPool3d_Op)
create_test_cudnn_fp16_class(TestCase1)
create_test_cudnn_fp16_class(TestCase2)
create_test_cudnn_fp16_class(TestCase3)
create_test_cudnn_fp16_class(TestCase4)
create_test_cudnn_fp16_class(TestCase5)
class TestFP16CUDNNCase3(TestCase2):
# ---- test ceil mode ------
def create_test_cudnn_use_ceil_class(parent):
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestPool3DUseCeilCase(parent):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=1e-3)
def init_ceil_mode(self):
self.ceil_mode = True
cls_name = "{0}_{1}".format(parent.__name__, "CUDNNOpCeilMode")
TestPool3DUseCeilCase.__name__ = cls_name
globals()[cls_name] = TestPool3DUseCeilCase
class TestCUDNNCase4(TestCase3):
def init_kernel_type(self):
self.use_cudnn = True
create_test_cudnn_use_ceil_class(TestPool3d_Op)
create_test_cudnn_use_ceil_class(TestCase1)
class TestFP16CUDNNCase4(TestCase3):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=1e-3)
def create_test_use_ceil_class(parent):
class TestPool3DUseCeilCase(parent):
def init_ceil_mode(self):
self.ceil_mode = True
cls_name = "{0}_{1}".format(parent.__name__, "CeilModeCast")
TestPool3DUseCeilCase.__name__ = cls_name
globals()[cls_name] = TestPool3DUseCeilCase
class TestCUDNNCase5(TestCase4):
def init_kernel_type(self):
self.use_cudnn = True
create_test_use_ceil_class(TestCase1)
create_test_use_ceil_class(TestCase2)
class TestFP16CUDNNCase5(TestCase4):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=1e-3)
class TestAvgInclude(TestCase2):
def init_exclusive(self):
self.exclusive = False
class TestCUDNNCase6(TestCase5):
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNAvgInclude(TestCase2):
def init_kernel_type(self):
self.use_cudnn = True
def init_exclusive(self):
self.exclusive = False
class TestAvgPoolAdaptive(TestCase1):
def init_adaptive(self):
self.adaptive = True
#-------test pool3d with asymmetric padding------
class TestPool3d_Op_AsyPadding(TestPool3d_Op):
def init_test_case(self):
self.ksize = [3, 3, 3]
self.strides = [1, 1, 1]
self.paddings = [0, 0, 0, 2, 3, 0]
def init_shape(self):
self.shape = [2, 3, 5, 5, 5]
class TestCase1_AsyPadding(TestCase1):
def init_test_case(self):
self.ksize = [3, 3, 3]
self.strides = [1, 1, 1]
self.paddings = [1, 0, 2, 1, 2, 1]
def init_shape(self):
self.shape = [2, 3, 7, 7, 7]
class TestCase2_AsyPadding(TestCase2):
def init_test_case(self):
self.ksize = [3, 3, 3]
self.strides = [1, 1, 1]
self.paddings = [1, 2, 1, 1, 1, 0]
def init_shape(self):
self.shape = [2, 3, 7, 7, 7]
class TestCase3_AsyPadding(TestCase3):
def init_test_case(self):
self.ksize = [3, 3, 3]
self.strides = [1, 1, 1]
self.paddings = [1, 0, 0, 0, 1, 0]
def init_shape(self):
self.shape = [2, 3, 5, 5, 5]
class TestCase4_AsyPadding(TestCase4):
def init_test_case(self):
self.ksize = [3, 3, 3]
self.strides = [1, 1, 1]
self.paddings = [1, 0, 2, 1, 2, 1]
def init_shape(self):
self.shape = [2, 3, 7, 7, 7]
class TestCase5_AsyPadding(TestCase5):
def init_test_case(self):
self.shape = [2, 7, 7, 7, 3]
self.ksize = [3, 3, 3]
self.strides = [1, 1, 1]
self.paddings = [1, 2, 1, 1, 1, 0]
def init_shape(self):
self.shape = [2, 3, 7, 7, 7]
create_test_cudnn_class(TestPool3d_Op_AsyPadding)
create_test_cudnn_class(TestCase1_AsyPadding)
create_test_cudnn_class(TestCase2_AsyPadding)
create_test_cudnn_class(TestCase3_AsyPadding)
create_test_cudnn_class(TestCase4_AsyPadding)
create_test_cudnn_class(TestCase5_AsyPadding)
create_test_cudnn_fp16_class(TestPool3d_Op_AsyPadding)
create_test_cudnn_fp16_class(TestCase1_AsyPadding)
create_test_cudnn_fp16_class(TestCase2_AsyPadding)
create_test_cudnn_fp16_class(TestCase3_AsyPadding)
create_test_cudnn_fp16_class(TestCase4_AsyPadding)
create_test_cudnn_fp16_class(TestCase5_AsyPadding)
create_test_cudnn_use_ceil_class(TestPool3d_Op_AsyPadding)
create_test_cudnn_use_ceil_class(TestCase1_AsyPadding)
create_test_use_ceil_class(TestCase1_AsyPadding)
create_test_use_ceil_class(TestCase2_AsyPadding)
class TestAvgInclude_AsyPadding(TestCase2):
def init_exclusive(self):
self.exclusive = False
def init_test_case(self):
self.ksize = [3, 3, 3]
self.strides = [1, 1, 1]
self.paddings = [1, 2, 1, 1, 1, 0]
def init_shape(self):
self.shape = [2, 3, 7, 7, 7]
class TestFP16CUDNNCase6(TestCase5):
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNAvgInclude_AsyPadding(TestCase2):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=1e-3)
def init_exclusive(self):
self.exclusive = False
def init_test_case(self):
self.ksize = [3, 3, 3]
self.strides = [1, 1, 1]
self.paddings = [1, 0, 0, 0, 0, 0]
def init_shape(self):
self.shape = [2, 3, 5, 5, 5]
class TestCeilModeCase1(TestCUDNNCase1):
def init_ceil_mode(self):
self.ceil_mode = True
class TestAvgPoolAdaptive_AsyPadding(TestCase1):
def init_adaptive(self):
self.adaptive = True
class TestCeilModeCase2(TestCUDNNCase2):
def init_ceil_mode(self):
self.ceil_mode = True
def init_test_case(self):
self.ksize = [3, 3, 3]
self.strides = [1, 1, 1]
self.paddings = [1, 0, 2, 1, 2, 1]
def init_shape(self):
self.shape = [2, 3, 7, 7, 7]
class TestCeilModeCase3(TestCase1):
def init_ceil_mode(self):
self.ceil_mode = True
# ------------ test channel_last --------------
class TestPool3d_channel_last(TestPool3d_Op):
def init_data_format(self):
self.data_format = "NDHWC"
class TestCeilModeCase4(TestCase2):
def init_ceil_mode(self):
self.ceil_mode = True
def init_shape(self):
self.shape = [2, 5, 5, 5, 3]
class TestAvgInclude(TestCase2):
class TestCase1_channel_last(TestCase1):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 7, 7, 7, 3]
class TestCase2_channel_last(TestCase2):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 7, 7, 7, 3]
class TestCase3_channel_last(TestCase3):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 5, 5, 5, 3]
class TestCase4_channel_last(TestCase4):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 7, 7, 7, 3]
class TestCase5_channel_last(TestCase5):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 7, 7, 7, 3]
create_test_cudnn_class(TestPool3d_channel_last)
create_test_cudnn_class(TestCase1_channel_last)
create_test_cudnn_class(TestCase2_channel_last)
create_test_cudnn_class(TestCase3_channel_last)
create_test_cudnn_class(TestCase4_channel_last)
create_test_cudnn_class(TestCase5_channel_last)
create_test_cudnn_use_ceil_class(TestPool3d_channel_last)
create_test_cudnn_use_ceil_class(TestCase1_channel_last)
create_test_use_ceil_class(TestCase1_channel_last)
create_test_use_ceil_class(TestCase2_channel_last)
class TestCase5_Max(TestCase2):
def init_pool_type(self):
self.pool_type = "max"
def test_check_grad(self):
if self.dtype == np.float16:
return
if self.has_cudnn() and self.pool_type == "max":
place = core.CUDAPlace(0)
self.check_grad_with_place(
place, set(['X']), 'Out', max_relative_error=1.00)
elif self.pool_type == "max":
self.check_grad(set(['X']), 'Out', max_relative_error=1.00)
class TestCase5_channel_last_Max(TestCase5_Max):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 7, 7, 7, 3]
create_test_cudnn_class(TestCase5_Max)
create_test_cudnn_class(TestCase5_channel_last_Max)
class TestAvgInclude_channel_last(TestCase2_channel_last):
def init_exclusive(self):
self.exclusive = False
class TestCUDNNAvgInclude(TestCUDNNCase3):
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNAvgInclude_channel_last(TestCase2_channel_last):
def init_kernel_type(self):
self.use_cudnn = True
def init_exclusive(self):
self.exclusive = False
class TestAvgPoolAdaptive(TestCase1):
class TestAvgPoolAdaptive_channel_last(TestCase1_channel_last):
def init_adaptive(self):
self.adaptive = True
# --- asy padding
class TestPool3d_Op_AsyPadding_channel_last(TestPool3d_Op_AsyPadding):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 5, 5, 5, 3]
class TestCase1_AsyPadding_channel_last(TestCase1_AsyPadding):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 7, 7, 7, 3]
class TestCase2_AsyPadding_channel_last(TestCase2_AsyPadding):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 7, 7, 7, 3]
class TestCase3_AsyPadding_channel_last(TestCase3_AsyPadding):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 5, 5, 5, 3]
class TestCase4_AsyPadding_channel_last(TestCase4_AsyPadding):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 7, 7, 7, 3]
class TestCase5_AsyPadding_channel_last(TestCase5_AsyPadding):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 7, 7, 7, 3]
create_test_cudnn_class(TestPool3d_Op_AsyPadding_channel_last)
create_test_cudnn_class(TestCase1_AsyPadding_channel_last)
create_test_cudnn_class(TestCase2_AsyPadding_channel_last)
create_test_cudnn_class(TestCase3_AsyPadding_channel_last)
create_test_cudnn_class(TestCase4_AsyPadding_channel_last)
create_test_cudnn_class(TestCase5_AsyPadding_channel_last)
create_test_cudnn_use_ceil_class(TestPool3d_Op_AsyPadding_channel_last)
create_test_cudnn_use_ceil_class(TestCase1_AsyPadding_channel_last)
create_test_use_ceil_class(TestCase1_AsyPadding_channel_last)
create_test_use_ceil_class(TestCase2_AsyPadding_channel_last)
class TestAvgInclude_AsyPadding_channel_last(TestAvgInclude_AsyPadding):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 7, 7, 7, 3]
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNAvgInclude_AsyPadding_channel_last(
TestCUDNNAvgInclude_AsyPadding):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 5, 5, 5, 3]
class TestAvgPoolAdaptive_AsyPadding_channel_last(
TestAvgPoolAdaptive_AsyPadding):
def init_data_format(self):
self.data_format = "NDHWC"
def init_shape(self):
self.shape = [2, 7, 7, 7, 3]
#test padding = SAME VALID
def create_test_padding_SAME_class(parent):
class TestPaddingSMAECase(parent):
def init_paddings(self):
self.paddings = [0, 0]
self.padding_algorithm = "SAME"
cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp")
TestPaddingSMAECase.__name__ = cls_name
globals()[cls_name] = TestPaddingSMAECase
create_test_padding_SAME_class(TestPool3d_Op)
create_test_padding_SAME_class(TestCase1)
create_test_padding_SAME_class(TestCase2)
create_test_padding_SAME_class(TestCase3)
create_test_padding_SAME_class(TestCase4)
create_test_padding_SAME_class(TestCase5)
create_test_padding_SAME_class(TestPool3d_channel_last)
create_test_padding_SAME_class(TestCase1_channel_last)
create_test_padding_SAME_class(TestCase2_channel_last)
create_test_padding_SAME_class(TestCase3_channel_last)
create_test_padding_SAME_class(TestCase4_channel_last)
create_test_padding_SAME_class(TestCase5_channel_last)
def create_test_cudnn_padding_SAME_class(parent):
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNPaddingSMAECase(parent):
def init_kernel_type(self):
self.use_cudnn = True
def init_paddings(self):
self.paddings = [1, 1]
self.padding_algorithm = "SAME"
cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingSAMEOp")
TestCUDNNPaddingSMAECase.__name__ = cls_name
globals()[cls_name] = TestCUDNNPaddingSMAECase
create_test_cudnn_padding_SAME_class(TestPool3d_Op)
create_test_cudnn_padding_SAME_class(TestCase1)
create_test_cudnn_padding_SAME_class(TestCase2)
create_test_cudnn_padding_SAME_class(TestCase3)
create_test_cudnn_padding_SAME_class(TestCase4)
create_test_cudnn_padding_SAME_class(TestCase5)
create_test_cudnn_padding_SAME_class(TestPool3d_channel_last)
create_test_cudnn_padding_SAME_class(TestCase1_channel_last)
create_test_cudnn_padding_SAME_class(TestCase2_channel_last)
create_test_cudnn_padding_SAME_class(TestCase3_channel_last)
create_test_cudnn_padding_SAME_class(TestCase4_channel_last)
create_test_cudnn_padding_SAME_class(TestCase5_channel_last)
def create_test_padding_VALID_class(parent):
class TestPaddingVALIDCase(parent):
def init_paddings(self):
self.paddings = [1, 1]
self.padding_algorithm = "VALID"
cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp")
TestPaddingVALIDCase.__name__ = cls_name
globals()[cls_name] = TestPaddingVALIDCase
create_test_padding_VALID_class(TestPool3d_Op)
create_test_padding_VALID_class(TestCase1)
create_test_padding_VALID_class(TestCase2)
create_test_padding_VALID_class(TestCase3)
create_test_padding_VALID_class(TestCase4)
create_test_padding_VALID_class(TestCase5)
create_test_padding_VALID_class(TestPool3d_channel_last)
create_test_padding_VALID_class(TestCase1_channel_last)
create_test_padding_VALID_class(TestCase2_channel_last)
create_test_padding_VALID_class(TestCase3_channel_last)
create_test_padding_VALID_class(TestCase4_channel_last)
create_test_padding_VALID_class(TestCase5_channel_last)
def create_test_cudnn_padding_VALID_class(parent):
@unittest.skipIf(not core.is_compiled_with_cuda(),
"core is not compiled with CUDA")
class TestCUDNNPaddingVALIDCase(parent):
def init_kernel_type(self):
self.use_cudnn = True
def init_paddings(self):
self.paddings = [1, 1]
self.padding_algorithm = "VALID"
cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingVALIDOp")
TestCUDNNPaddingVALIDCase.__name__ = cls_name
globals()[cls_name] = TestCUDNNPaddingVALIDCase
create_test_cudnn_padding_VALID_class(TestPool3d_Op)
create_test_cudnn_padding_VALID_class(TestCase1)
create_test_cudnn_padding_VALID_class(TestCase2)
create_test_cudnn_padding_VALID_class(TestCase3)
create_test_cudnn_padding_VALID_class(TestCase4)
create_test_cudnn_padding_VALID_class(TestCase5)
create_test_cudnn_padding_VALID_class(TestPool3d_channel_last)
create_test_cudnn_padding_VALID_class(TestCase1_channel_last)
create_test_cudnn_padding_VALID_class(TestCase2_channel_last)
create_test_cudnn_padding_VALID_class(TestCase3_channel_last)
create_test_cudnn_padding_VALID_class(TestCase4_channel_last)
create_test_cudnn_padding_VALID_class(TestCase5_channel_last)
#test API
class TestPool3dAPI(OpTest):
def test_api(self):
x_NDHWC = np.random.random([2, 5, 5, 5, 3]).astype("float32")
x_NCDHW = np.random.random([2, 3, 5, 5, 5]).astype("float32")
input_NDHWC = fluid.layers.data(
name="input_NDHWC",
shape=[2, 5, 5, 5, 3],
append_batch_size=False,
dtype="float32")
input_NCDHW = fluid.layers.data(
name="input_NCDHW",
shape=[2, 3, 5, 5, 5],
append_batch_size=False,
dtype="float32")
ksize = [3, 3, 3]
out_1 = fluid.layers.pool3d(
input=input_NDHWC,
pool_size=ksize,
pool_type="max",
pool_padding=[1, 1, 1],
use_cudnn=False,
data_format="NDHWC")
out_2 = fluid.layers.pool3d(
input=input_NDHWC,
pool_size=ksize,
pool_type="avg",
pool_padding=[[0, 0], [1, 1], [1, 1], [1, 1], [0, 0]],
use_cudnn=False,
data_format="NDHWC")
out_3 = fluid.layers.pool3d(
input=input_NCDHW,
pool_size=ksize,
pool_type="avg",
pool_padding=[[0, 0], [0, 0], [1, 1], [1, 1], [1, 1]],
use_cudnn=False,
data_format="NCDHW")
out_4 = fluid.layers.pool3d(
input=input_NCDHW,
pool_size=ksize,
pool_type="avg",
pool_padding=[1, 2, 1, 0, 0, 1],
use_cudnn=False,
data_format="NCDHW")
# test VALID
out_5 = fluid.layers.pool3d(
input=input_NDHWC,
pool_size=ksize,
pool_type="avg",
pool_padding="VALID",
use_cudnn=False,
data_format="NDHWC")
out_6 = fluid.layers.pool3d(
input=input_NCDHW,
pool_size=ksize,
pool_type="avg",
pool_padding="VALID",
use_cudnn=False,
data_format="NCDHW")
# test SAME
out_7 = fluid.layers.pool3d(
input=input_NDHWC,
pool_size=ksize,
pool_type="avg",
pool_padding="SAME",
use_cudnn=False,
data_format="NDHWC")
out_8 = fluid.layers.pool3d(
input=input_NCDHW,
pool_size=[4, 4, 4],
pool_type="avg",
pool_padding="SAME",
use_cudnn=False,
data_format="NCDHW")
exe = fluid.Executor(place=fluid.CPUPlace())
[res_1, res_2, res_3, res_4, res_5, res_6, res_7, res_8] = exe.run(
fluid.default_main_program(),
feed={"input_NDHWC": x_NDHWC,
"input_NCDHW": x_NCDHW},
fetch_list=[
out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8
])
assert np.allclose(
res_1,
pool3D_forward_naive(
x=x_NDHWC,
ksize=ksize,
pool_type="max",
strides=[1, 1, 1],
paddings=[1, 1, 1],
data_format="NDHWC"))
assert np.allclose(
res_2,
pool3D_forward_naive(
x=x_NDHWC,
ksize=ksize,
pool_type="avg",
strides=[1, 1, 1],
paddings=[1, 1, 1, 1, 1, 1],
data_format="NDHWC"))
assert np.allclose(
res_3,
pool3D_forward_naive(
x=x_NCDHW,
ksize=ksize,
pool_type="avg",
strides=[1, 1, 1],
paddings=[1, 1, 1, 1, 1, 1],
data_format="NCDHW"),
rtol=0.07,
atol=1e-05)
assert np.allclose(
res_4,
pool3D_forward_naive(
x=x_NCDHW,
ksize=ksize,
pool_type="avg",
strides=[1, 1, 1],
paddings=[1, 2, 1, 0, 0, 1],
data_format="NCDHW"),
rtol=0.07,
atol=1e-05)
# VALID
assert np.allclose(
res_5,
pool3D_forward_naive(
x=x_NDHWC,
ksize=ksize,
pool_type="avg",
strides=[1, 1, 1],
paddings=[10, 20],
padding_algorithm="VALID",
data_format="NDHWC"))
assert np.allclose(
res_6,
pool3D_forward_naive(
x=x_NCDHW,
ksize=ksize,
pool_type="avg",
strides=[1, 1, 1],
paddings=[10, 20],
padding_algorithm="VALID",
data_format="NCDHW"),
rtol=0.07,
atol=1e-05)
# SAME
assert np.allclose(
res_7,
pool3D_forward_naive(
x=x_NDHWC,
ksize=ksize,
pool_type="avg",
strides=[1, 1, 1],
paddings=[10, 20],
padding_algorithm="SAME",
data_format="NDHWC"))
assert np.allclose(
res_8,
pool3D_forward_naive(
x=x_NCDHW,
ksize=[4, 4, 4],
pool_type="avg",
strides=[1, 1, 1],
paddings=[10, 20],
padding_algorithm="SAME",
data_format="NCDHW"),
rtol=0.07,
atol=1e-05)
class TestPool3dAPI_Error(OpTest):
def test_api(self):
input_NDHWC = fluid.layers.data(
name="input_NDHWC",
shape=[2, 5, 5, 5, 3],
append_batch_size=False,
dtype="float32")
ksize = [3, 3, 3]
# cudnn value error
def run_1():
out_1 = fluid.layers.pool3d(
input=input_NDHWC,
pool_size=ksize,
pool_type="max",
pool_padding=[1, 1, 1],
use_cudnn=[0],
data_format="NDHWC")
self.assertRaises(ValueError, run_1)
# data_format value error
def run_2():
out_2 = fluid.layers.pool3d(
input=input_NDHWC,
pool_size=ksize,
pool_type="max",
pool_padding=[1, 1, 1],
use_cudnn=False,
data_format="NDHWCC")
self.assertRaises(ValueError, run_2)
# padding str value error
def run_3():
out_3 = fluid.layers.pool3d(
input=input_NDHWC,
pool_size=ksize,
pool_type="max",
pool_padding="VALIDSAME",
use_cudnn=False,
data_format="NDHWC")
self.assertRaises(ValueError, run_3)
# padding str valid and ceil_mode value error
def run_4():
out_4 = fluid.layers.pool3d(
input=input_NDHWC,
pool_size=ksize,
pool_type="max",
pool_padding="VALID",
use_cudnn=False,
ceil_mode=True,
data_format="NDHWC")
self.assertRaises(ValueError, run_4)
# padding with 8 ele. value error
def run_5():
out_5 = fluid.layers.pool3d(
input=input_NDHWC,
pool_size=ksize,
pool_type="max",
pool_padding=[[1, 1], [0, 0], [0, 0], [1, 1], [1, 1]],
use_cudnn=False,
data_format="NDHWC")
self.assertRaises(ValueError, run_5)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册