“068bfbb817611c856acd8c535de2b33a6126786c”上不存在“paddle/legacy/gserver/layers/SlopeInterceptLayer.cpp”
提交 38895302 编写于 作者: N nhzlx

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into add_conv_elementwise_pass

test=develop
...@@ -77,6 +77,8 @@ paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name'] ...@@ -77,6 +77,8 @@ paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name']
paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None)) paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None))
paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)) paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)) paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
paddle.fluid.layers.adaptive_pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None))
paddle.fluid.layers.adaptive_pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None))
paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)) paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False))
paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)) paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
......
...@@ -31,7 +31,7 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -31,7 +31,7 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const framework::Tensor& input, const std::vector<int>& ksize, const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_process, const std::vector<int>& paddings, PoolProcess pool_process,
bool exclusive, framework::Tensor* output) { bool exclusive, bool adaptive, framework::Tensor* output) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
...@@ -51,16 +51,28 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -51,16 +51,28 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
int hstart, hend;
int wstart, wend;
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
for (int ph = 0; ph < output_height; ++ph) { for (int ph = 0; ph < output_height; ++ph) {
int hstart = ph * stride_height - padding_height; if (adaptive) {
int hend = std::min(hstart + ksize_height, input_height); hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) { for (int pw = 0; pw < output_width; ++pw) {
int wstart = pw * stride_width - padding_width; if (adaptive) {
int wend = std::min(wstart + ksize_width, input_width); wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
}
T ele = pool_process.initial(); T ele = pool_process.initial();
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
...@@ -68,7 +80,8 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -68,7 +80,8 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
pool_process.compute(input_data[h * input_width + w], &ele); pool_process.compute(input_data[h * input_width + w], &ele);
} }
} }
int pool_size = exclusive ? (hend - hstart) * (wend - wstart) int pool_size = (exclusive || adaptive)
? (hend - hstart) * (wend - wstart)
: ksize_height * ksize_width; : ksize_height * ksize_width;
pool_process.finalize(static_cast<T>(pool_size), &ele); pool_process.finalize(static_cast<T>(pool_size), &ele);
output_data[ph * output_width + pw] = ele; output_data[ph * output_width + pw] = ele;
...@@ -94,7 +107,7 @@ class Pool2dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -94,7 +107,7 @@ class Pool2dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const framework::Tensor& output, const framework::Tensor& output_grad, const framework::Tensor& output, const framework::Tensor& output_grad,
const std::vector<int>& ksize, const std::vector<int>& strides, const std::vector<int>& ksize, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_grad_process, const std::vector<int>& paddings, PoolProcess pool_grad_process,
bool exclusive, framework::Tensor* input_grad) { bool exclusive, bool adaptive, framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
...@@ -115,17 +128,30 @@ class Pool2dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -115,17 +128,30 @@ class Pool2dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int hstart, hend;
int wstart, wend;
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
for (int ph = 0; ph < output_height; ++ph) { for (int ph = 0; ph < output_height; ++ph) {
int hstart = ph * stride_height - padding_height; if (adaptive) {
int hend = std::min(hstart + ksize_height, input_height); hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) { for (int pw = 0; pw < output_width; ++pw) {
int wstart = pw * stride_width - padding_width; if (adaptive) {
int wend = std::min(wstart + ksize_width, input_width); wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
int pool_size = exclusive ? (hend - hstart) * (wend - wstart) }
int pool_size = (exclusive || adaptive)
? (hend - hstart) * (wend - wstart)
: ksize_height * ksize_width; : ksize_height * ksize_width;
float scale = 1.0 / pool_size; float scale = 1.0 / pool_size;
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
...@@ -251,7 +277,7 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -251,7 +277,7 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const framework::Tensor& input, const std::vector<int>& ksize, const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_process, const std::vector<int>& paddings, PoolProcess pool_process,
bool exclusive, framework::Tensor* output) { bool exclusive, bool adaptive, framework::Tensor* output) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
...@@ -276,20 +302,38 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -276,20 +302,38 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(context.GetPlace()); T* output_data = output->mutable_data<T>(context.GetPlace());
int dstart, dend;
int hstart, hend;
int wstart, wend;
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
for (int pd = 0; pd < output_depth; ++pd) { for (int pd = 0; pd < output_depth; ++pd) {
int dstart = pd * stride_depth - padding_depth; if (adaptive) {
int dend = std::min(dstart + ksize_depth, input_depth); dstart = AdaptStartIndex(pd, input_depth, output_depth);
dend = AdaptEndIndex(pd, input_depth, output_depth);
} else {
dstart = pd * stride_depth - padding_depth;
dend = std::min(dstart + ksize_depth, input_depth);
dstart = std::max(dstart, 0); dstart = std::max(dstart, 0);
}
for (int ph = 0; ph < output_height; ++ph) { for (int ph = 0; ph < output_height; ++ph) {
int hstart = ph * stride_height - padding_height; if (adaptive) {
int hend = std::min(hstart + ksize_height, input_height); hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) { for (int pw = 0; pw < output_width; ++pw) {
int wstart = pw * stride_width - padding_width; if (adaptive) {
int wend = std::min(wstart + ksize_width, input_width); wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
}
int output_idx = (pd * output_height + ph) * output_width + pw; int output_idx = (pd * output_height + ph) * output_width + pw;
T ele = pool_process.initial(); T ele = pool_process.initial();
for (int d = dstart; d < dend; ++d) { for (int d = dstart; d < dend; ++d) {
...@@ -302,7 +346,7 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -302,7 +346,7 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
} }
} }
int pool_size = int pool_size =
exclusive (exclusive || adaptive)
? (dend - dstart) * (hend - hstart) * (wend - wstart) ? (dend - dstart) * (hend - hstart) * (wend - wstart)
: ksize_depth * ksize_height * ksize_width; : ksize_depth * ksize_height * ksize_width;
pool_process.finalize(static_cast<T>(pool_size), &ele); pool_process.finalize(static_cast<T>(pool_size), &ele);
...@@ -330,7 +374,7 @@ class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -330,7 +374,7 @@ class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const framework::Tensor& output, const framework::Tensor& output_grad, const framework::Tensor& output, const framework::Tensor& output_grad,
const std::vector<int>& ksize, const std::vector<int>& strides, const std::vector<int>& ksize, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_grad_process, const std::vector<int>& paddings, PoolProcess pool_grad_process,
bool exclusive, framework::Tensor* input_grad) { bool exclusive, bool adaptive, framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
...@@ -356,24 +400,41 @@ class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -356,24 +400,41 @@ class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace()); T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
int dstart, dend;
int hstart, hend;
int wstart, wend;
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
for (int pd = 0; pd < output_depth; ++pd) { for (int pd = 0; pd < output_depth; ++pd) {
int dstart = pd * stride_depth - padding_depth; if (adaptive) {
int dend = std::min(dstart + ksize_depth, input_depth); dstart = AdaptStartIndex(pd, input_depth, output_depth);
dend = AdaptEndIndex(pd, input_depth, output_depth);
} else {
dstart = pd * stride_depth - padding_depth;
dend = std::min(dstart + ksize_depth, input_depth);
dstart = std::max(dstart, 0); dstart = std::max(dstart, 0);
}
for (int ph = 0; ph < output_height; ++ph) { for (int ph = 0; ph < output_height; ++ph) {
int hstart = ph * stride_height - padding_height; if (adaptive) {
int hend = std::min(hstart + ksize_height, input_height); hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) { for (int pw = 0; pw < output_width; ++pw) {
int wstart = pw * stride_width - padding_width; if (adaptive) {
int wend = std::min(wstart + ksize_width, input_width); wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
}
int pool_size = int pool_size =
exclusive (exclusive || adaptive)
? (dend - dstart) * (hend - hstart) * (wend - wstart) ? (dend - dstart) * (hend - hstart) * (wend - wstart)
: ksize_depth * ksize_height * ksize_width; : ksize_depth * ksize_height * ksize_width;
float scale = 1.0 / pool_size; float scale = 1.0 / pool_size;
...@@ -517,8 +578,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> { ...@@ -517,8 +578,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
void operator()(const platform::CPUDeviceContext& context, void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input, const std::vector<int>& ksize, const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* output, const std::vector<int>& paddings, bool adaptive,
framework::Tensor* mask) { framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
...@@ -538,16 +599,28 @@ class MaxPool2dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> { ...@@ -538,16 +599,28 @@ class MaxPool2dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
T1* output_data = output->mutable_data<T1>(context.GetPlace()); T1* output_data = output->mutable_data<T1>(context.GetPlace());
T2* mask_data = mask->mutable_data<T2>(context.GetPlace()); T2* mask_data = mask->mutable_data<T2>(context.GetPlace());
int hstart, hend;
int wstart, wend;
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
for (int ph = 0; ph < output_height; ++ph) { for (int ph = 0; ph < output_height; ++ph) {
int hstart = ph * stride_height - padding_height; if (adaptive) {
int hend = std::min(hstart + ksize_height, input_height); hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) { for (int pw = 0; pw < output_width; ++pw) {
int wstart = pw * stride_width - padding_width; if (adaptive) {
int wend = std::min(wstart + ksize_width, input_width); wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
}
T1 ele = static_cast<T1>(-FLT_MAX); T1 ele = static_cast<T1>(-FLT_MAX);
int index = -1; int index = -1;
...@@ -584,7 +657,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUDeviceContext, T1, T2> { ...@@ -584,7 +657,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CPUDeviceContext, T1, T2> {
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, const std::vector<int>& ksize, const framework::Tensor& mask, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings, bool adaptive,
framework::Tensor* input_grad) { framework::Tensor* input_grad) {
const int batch_size = input_grad->dims()[0]; const int batch_size = input_grad->dims()[0];
const int input_height = input_grad->dims()[2]; const int input_height = input_grad->dims()[2];
...@@ -637,8 +710,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> { ...@@ -637,8 +710,8 @@ class MaxPool3dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
void operator()(const platform::CPUDeviceContext& context, void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input, const std::vector<int>& ksize, const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* output, const std::vector<int>& paddings, bool adaptive,
framework::Tensor* mask) { framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
...@@ -663,20 +736,38 @@ class MaxPool3dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> { ...@@ -663,20 +736,38 @@ class MaxPool3dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
T1* output_data = output->mutable_data<T1>(context.GetPlace()); T1* output_data = output->mutable_data<T1>(context.GetPlace());
T2* mask_data = mask->mutable_data<T2>(context.GetPlace()); T2* mask_data = mask->mutable_data<T2>(context.GetPlace());
int dstart, dend;
int hstart, hend;
int wstart, wend;
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
for (int pd = 0; pd < output_depth; ++pd) { for (int pd = 0; pd < output_depth; ++pd) {
int dstart = pd * stride_depth - padding_depth; if (adaptive) {
int dend = std::min(dstart + ksize_depth, input_depth); dstart = AdaptStartIndex(pd, input_depth, output_depth);
dend = AdaptEndIndex(pd, input_depth, output_depth);
} else {
dstart = pd * stride_depth - padding_depth;
dend = std::min(dstart + ksize_depth, input_depth);
dstart = std::max(dstart, 0); dstart = std::max(dstart, 0);
}
for (int ph = 0; ph < output_height; ++ph) { for (int ph = 0; ph < output_height; ++ph) {
int hstart = ph * stride_height - padding_height; if (adaptive) {
int hend = std::min(hstart + ksize_height, input_height); hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
} else {
hstart = ph * stride_height - padding_height;
hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) { for (int pw = 0; pw < output_width; ++pw) {
int wstart = pw * stride_width - padding_width; if (adaptive) {
int wend = std::min(wstart + ksize_width, input_width); wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
wstart = pw * stride_width - padding_width;
wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
}
int output_idx = (pd * output_height + ph) * output_width + pw; int output_idx = (pd * output_height + ph) * output_width + pw;
T1 ele = static_cast<T1>(-FLT_MAX); T1 ele = static_cast<T1>(-FLT_MAX);
...@@ -718,7 +809,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUDeviceContext, T1, T2> { ...@@ -718,7 +809,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CPUDeviceContext, T1, T2> {
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, const std::vector<int>& ksize, const framework::Tensor& mask, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings, bool adaptive,
framework::Tensor* input_grad) { framework::Tensor* input_grad) {
const int batch_size = input_grad->dims()[0]; const int batch_size = input_grad->dims()[0];
const int input_depth = input_grad->dims()[2]; const int input_depth = input_grad->dims()[2];
......
...@@ -29,7 +29,7 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, ...@@ -29,7 +29,7 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
const int ksize_width, const int stride_height, const int ksize_width, const int stride_height,
const int stride_width, const int padding_height, const int stride_width, const int padding_height,
const int padding_width, PoolProcess pool_process, const int padding_width, PoolProcess pool_process,
bool exclusive, T* output_data) { bool exclusive, bool adaptive, T* output_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -37,13 +37,23 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, ...@@ -37,13 +37,23 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
int c = (index / output_width / output_height) % channels; int c = (index / output_width / output_height) % channels;
int batch_idx = index / output_width / output_height / channels; int batch_idx = index / output_width / output_height / channels;
int hstart = ph * stride_height - padding_height; int hstart, hend;
int hend = min(hstart + ksize_height, input_height); int wstart, wend;
if (adaptive) {
hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
hstart = ph * stride_height - padding_height;
hend = min(hstart + ksize_height, input_height);
hstart = max(hstart, 0); hstart = max(hstart, 0);
int wstart = pw * stride_width - padding_width; wstart = pw * stride_width - padding_width;
int wend = min(wstart + ksize_width, input_width); wend = min(wstart + ksize_width, input_width);
wstart = max(wstart, 0); wstart = max(wstart, 0);
}
input_data += (batch_idx * channels + c) * input_height * input_width; input_data += (batch_idx * channels + c) * input_height * input_width;
T ele = pool_process.initial(); T ele = pool_process.initial();
...@@ -52,7 +62,7 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, ...@@ -52,7 +62,7 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
pool_process.compute(input_data[h * input_width + w], &ele); pool_process.compute(input_data[h * input_width + w], &ele);
} }
} }
int pool_size = exclusive ? (hend - hstart) * (wend - wstart) int pool_size = (exclusive || adaptive) ? (hend - hstart) * (wend - wstart)
: ksize_height * ksize_width; : ksize_height * ksize_width;
pool_process.finalize(static_cast<T>(pool_size), &ele); pool_process.finalize(static_cast<T>(pool_size), &ele);
output_data[index] = ele; output_data[index] = ele;
...@@ -66,22 +76,33 @@ __global__ void KernelPool2DGrad( ...@@ -66,22 +76,33 @@ __global__ void KernelPool2DGrad(
const int input_width, const int output_height, const int output_width, const int input_width, const int output_height, const int output_width,
const int ksize_height, const int ksize_width, const int stride_height, const int ksize_height, const int ksize_width, const int stride_height,
const int stride_width, const int padding_height, const int padding_width, const int stride_width, const int padding_height, const int padding_width,
PoolProcess pool_process, bool exclusive, T* input_grad) { PoolProcess pool_process, bool exclusive, bool adaptive, T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int offsetW = index % input_width + padding_width; int w_offset = index % input_width + padding_width;
int offsetH = (index / input_width) % input_height + padding_height; int h_offset = (index / input_width) % input_height + padding_height;
int offsetC = (index / input_width / input_height) % channels; int offsetC = (index / input_width / input_height) % channels;
int batch_idx = index / input_width / input_height / channels; int batch_idx = index / input_width / input_height / channels;
int phstart = (offsetH < ksize_height) int phstart, phend;
int pwstart, pwend;
if (adaptive) {
phstart = h_offset * output_height / input_height;
phend =
min((h_offset + 1) * output_height / input_height + 1, output_height);
pwstart = w_offset * output_width / input_width;
pwend =
min((w_offset + 1) * output_width / input_width + 1, output_width);
} else {
phstart = (h_offset < ksize_height)
? 0 ? 0
: (offsetH - ksize_height) / stride_height + 1; : (h_offset - ksize_height) / stride_height + 1;
int pwstart = (offsetW < ksize_width) pwstart = (w_offset < ksize_width)
? 0 ? 0
: (offsetW - ksize_width) / stride_width + 1; : (w_offset - ksize_width) / stride_width + 1;
int phend = min(offsetH / stride_height + 1, output_height); phend = min(h_offset / stride_height + 1, output_height);
int pwend = min(offsetW / stride_width + 1, output_width); pwend = min(w_offset / stride_width + 1, output_width);
}
T gradient = 0; T gradient = 0;
T input = input_data[index]; T input = input_data[index];
int output_idx = int output_idx =
...@@ -90,14 +111,22 @@ __global__ void KernelPool2DGrad( ...@@ -90,14 +111,22 @@ __global__ void KernelPool2DGrad(
output_grad += output_idx; output_grad += output_idx;
for (int ph = phstart; ph < phend; ++ph) { for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) { for (int pw = pwstart; pw < pwend; ++pw) {
int pool_size;
if (adaptive) {
pool_size = static_cast<int>(ceil(static_cast<double>(input_height) /
ksize_height)) *
static_cast<int>(
ceil(static_cast<double>(input_width) / ksize_width));
} else {
int hstart = ph * stride_height - padding_height; int hstart = ph * stride_height - padding_height;
int wstart = pw * stride_width - padding_width; int wstart = pw * stride_width - padding_width;
int hend = min(hstart + ksize_height, input_height); int hend = min(hstart + ksize_height, input_height);
int wend = min(wstart + ksize_width, input_width); int wend = min(wstart + ksize_width, input_width);
hstart = max(hstart, 0); hstart = max(hstart, 0);
wstart = max(wstart, 0); wstart = max(wstart, 0);
int pool_size = exclusive ? (hend - hstart) * (wend - wstart) pool_size = exclusive ? (hend - hstart) * (wend - wstart)
: ksize_height * ksize_width; : ksize_height * ksize_width;
}
int output_sub_idx = ph * output_width + pw; int output_sub_idx = ph * output_width + pw;
pool_process.compute(input, output_data[output_sub_idx], pool_process.compute(input, output_data[output_sub_idx],
output_grad[output_sub_idx], output_grad[output_sub_idx],
...@@ -181,7 +210,7 @@ void Pool2dDirectCUDAFunctor<PoolProcess, T>::operator()( ...@@ -181,7 +210,7 @@ void Pool2dDirectCUDAFunctor<PoolProcess, T>::operator()(
KernelPool2D<PoolProcess, T><<<grid, threads, 0, stream>>>( KernelPool2D<PoolProcess, T><<<grid, threads, 0, stream>>>(
nthreads, input, input_channels, input_height, input_width, output_height, nthreads, input, input_channels, input_height, input_width, output_height,
output_width, ksize_height, ksize_width, stride_height, stride_width, output_width, ksize_height, ksize_width, stride_height, stride_width,
padding_height, padding_width, pool_compute, exclusive, output); padding_height, padding_width, pool_compute, exclusive, false, output);
} }
/* /*
...@@ -196,7 +225,7 @@ class Pool2dFunctor<platform::CUDADeviceContext, PoolProcess, T> { ...@@ -196,7 +225,7 @@ class Pool2dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
const framework::Tensor& input, const std::vector<int>& ksize, const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_process, const std::vector<int>& paddings, PoolProcess pool_process,
bool exclusive, framework::Tensor* output) { bool exclusive, bool adaptive, framework::Tensor* output) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
...@@ -223,7 +252,7 @@ class Pool2dFunctor<platform::CUDADeviceContext, PoolProcess, T> { ...@@ -223,7 +252,7 @@ class Pool2dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
nthreads, input_data, input_channels, input_height, input_width, nthreads, input_data, input_channels, input_height, input_width,
output_height, output_width, ksize_height, ksize_width, stride_height, output_height, output_width, ksize_height, ksize_width, stride_height,
stride_width, padding_height, padding_width, pool_process, exclusive, stride_width, padding_height, padding_width, pool_process, exclusive,
output_data); adaptive, output_data);
} }
}; };
...@@ -242,7 +271,8 @@ class Pool2dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> { ...@@ -242,7 +271,8 @@ class Pool2dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
const std::vector<int>& ksize, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_process, const std::vector<int>& paddings, PoolProcess pool_process,
bool exclusive, framework::Tensor* input_grad) { bool exclusive, bool adaptive,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
...@@ -270,7 +300,7 @@ class Pool2dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> { ...@@ -270,7 +300,7 @@ class Pool2dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
nthreads, input_data, output_data, output_grad_data, input_channels, nthreads, input_data, output_data, output_grad_data, input_channels,
input_height, input_width, output_height, output_width, ksize_height, input_height, input_width, output_height, output_width, ksize_height,
ksize_width, stride_height, stride_width, padding_height, padding_width, ksize_width, stride_height, stride_width, padding_height, padding_width,
pool_process, exclusive, input_grad_data); pool_process, exclusive, adaptive, input_grad_data);
} }
}; };
...@@ -359,7 +389,7 @@ __global__ void KernelPool3D( ...@@ -359,7 +389,7 @@ __global__ void KernelPool3D(
const int ksize_depth, const int ksize_height, const int ksize_width, const int ksize_depth, const int ksize_height, const int ksize_width,
const int stride_depth, const int stride_height, const int stride_width, const int stride_depth, const int stride_height, const int stride_width,
const int padding_depth, const int padding_height, const int padding_width, const int padding_depth, const int padding_height, const int padding_width,
PoolProcess pool_process, bool exclusive, T* output_data) { PoolProcess pool_process, bool exclusive, bool adaptive, T* output_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -368,15 +398,30 @@ __global__ void KernelPool3D( ...@@ -368,15 +398,30 @@ __global__ void KernelPool3D(
int c = (index / output_width / output_height / output_depth) % channels; int c = (index / output_width / output_height / output_depth) % channels;
int batch_idx = int batch_idx =
index / output_width / output_height / output_depth / channels; index / output_width / output_height / output_depth / channels;
int dstart = pd * stride_depth - padding_depth;
int hstart = ph * stride_height - padding_height; int dstart, dend;
int wstart = pw * stride_width - padding_width; int hstart, hend;
int dend = min(dstart + ksize_depth, input_depth); int wstart, wend;
int hend = min(hstart + ksize_height, input_height); if (adaptive) {
int wend = min(wstart + ksize_width, input_width); dstart = AdaptStartIndex(pd, input_depth, output_depth);
dend = AdaptEndIndex(pd, input_depth, output_depth);
hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
dstart = pd * stride_depth - padding_depth;
hstart = ph * stride_height - padding_height;
wstart = pw * stride_width - padding_width;
dend = min(dstart + ksize_depth, input_depth);
hend = min(hstart + ksize_height, input_height);
wend = min(wstart + ksize_width, input_width);
dstart = max(dstart, 0); dstart = max(dstart, 0);
hstart = max(hstart, 0); hstart = max(hstart, 0);
wstart = max(wstart, 0); wstart = max(wstart, 0);
}
T ele = pool_process.initial(); T ele = pool_process.initial();
input_data += input_data +=
(batch_idx * channels + c) * input_depth * input_height * input_width; (batch_idx * channels + c) * input_depth * input_height * input_width;
...@@ -388,7 +433,7 @@ __global__ void KernelPool3D( ...@@ -388,7 +433,7 @@ __global__ void KernelPool3D(
} }
} }
} }
int pool_size = exclusive int pool_size = (exclusive || adaptive)
? (dend - dstart) * (hend - hstart) * (wend - wstart) ? (dend - dstart) * (hend - hstart) * (wend - wstart)
: ksize_depth * ksize_height * ksize_width; : ksize_depth * ksize_height * ksize_width;
pool_process.finalize(static_cast<T>(pool_size), &ele); pool_process.finalize(static_cast<T>(pool_size), &ele);
...@@ -405,28 +450,43 @@ __global__ void KernelPool3DGrad( ...@@ -405,28 +450,43 @@ __global__ void KernelPool3DGrad(
const int ksize_height, const int ksize_width, const int stride_depth, const int ksize_height, const int ksize_width, const int stride_depth,
const int stride_height, const int stride_width, const int padding_depth, const int stride_height, const int stride_width, const int padding_depth,
const int padding_height, const int padding_width, PoolProcess pool_process, const int padding_height, const int padding_width, PoolProcess pool_process,
bool exclusive, T* input_grad) { bool exclusive, bool adaptive, T* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int offsetW = index % input_width + padding_width; int w_offset = index % input_width + padding_width;
int offsetH = (index / input_width) % input_height + padding_height; int h_offset = (index / input_width) % input_height + padding_height;
int offsetD = int d_offset =
(index / input_width / input_height) % input_depth + padding_depth; (index / input_width / input_height) % input_depth + padding_depth;
int offsetC = (index / input_width / input_height / input_depth) % channels; int offsetC = (index / input_width / input_height / input_depth) % channels;
int batch_idx = index / input_width / input_height / input_depth / channels; int batch_idx = index / input_width / input_height / input_depth / channels;
int pdstart = (offsetD < ksize_depth) int pdstart, pdend;
int phstart, phend;
int pwstart, pwend;
if (adaptive) {
pdstart = d_offset * output_depth / input_depth;
pdend =
min((d_offset + 1) * output_depth / input_depth + 1, output_depth);
phstart = h_offset * output_height / input_height;
phend =
min((h_offset + 1) * output_height / input_height + 1, output_height);
pwstart = w_offset * output_width / input_width;
pwend =
min((w_offset + 1) * output_width / input_width + 1, output_width);
} else {
pdstart = (d_offset < ksize_depth)
? 0 ? 0
: (offsetD - ksize_depth) / stride_depth + 1; : (d_offset - ksize_depth) / stride_depth + 1;
int phstart = (offsetH < ksize_height) phstart = (h_offset < ksize_height)
? 0 ? 0
: (offsetH - ksize_height) / stride_height + 1; : (h_offset - ksize_height) / stride_height + 1;
int pwstart = (offsetW < ksize_width) pwstart = (w_offset < ksize_width)
? 0 ? 0
: (offsetW - ksize_width) / stride_width + 1; : (w_offset - ksize_width) / stride_width + 1;
int pdend = min((offsetD) / stride_depth + 1, output_depth); pdend = min((d_offset) / stride_depth + 1, output_depth);
int phend = min((offsetH) / stride_height + 1, output_height); phend = min((h_offset) / stride_height + 1, output_height);
int pwend = min((offsetW) / stride_width + 1, output_width); pwend = min((w_offset) / stride_width + 1, output_width);
}
T gradient = 0; T gradient = 0;
T input = input_data[index]; T input = input_data[index];
...@@ -439,6 +499,16 @@ __global__ void KernelPool3DGrad( ...@@ -439,6 +499,16 @@ __global__ void KernelPool3DGrad(
for (int ph = phstart; ph < phend; ++ph) { for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pwstart; pw < pwend; ++pw) { for (int pw = pwstart; pw < pwend; ++pw) {
// figure out the pooling size // figure out the pooling size
int pool_size;
if (adaptive) {
pool_size =
static_cast<int>(
ceil(static_cast<double>(input_depth) / ksize_depth)) *
static_cast<int>(
ceil(static_cast<double>(input_height) / ksize_height)) *
static_cast<int>(
ceil(static_cast<double>(input_width) / ksize_width));
} else {
int dstart = pd * stride_depth - padding_depth; int dstart = pd * stride_depth - padding_depth;
int hstart = ph * stride_height - padding_height; int hstart = ph * stride_height - padding_height;
int wstart = pw * stride_width - padding_width; int wstart = pw * stride_width - padding_width;
...@@ -448,9 +518,10 @@ __global__ void KernelPool3DGrad( ...@@ -448,9 +518,10 @@ __global__ void KernelPool3DGrad(
dstart = max(dstart, 0); dstart = max(dstart, 0);
hstart = max(hstart, 0); hstart = max(hstart, 0);
wstart = max(wstart, 0); wstart = max(wstart, 0);
int pool_size = pool_size =
exclusive ? (dend - dstart) * (hend - hstart) * (wend - wstart) exclusive ? (dend - dstart) * (hend - hstart) * (wend - wstart)
: ksize_depth * ksize_height * ksize_width; : ksize_depth * ksize_height * ksize_width;
}
int output_sub_idx = (pd * output_height + ph) * output_width + pw; int output_sub_idx = (pd * output_height + ph) * output_width + pw;
pool_process.compute(input, output_data[output_sub_idx], pool_process.compute(input, output_data[output_sub_idx],
output_grad[output_sub_idx], output_grad[output_sub_idx],
...@@ -525,7 +596,7 @@ class Pool3dFunctor<platform::CUDADeviceContext, PoolProcess, T> { ...@@ -525,7 +596,7 @@ class Pool3dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
const framework::Tensor& input, const std::vector<int>& ksize, const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_process, const std::vector<int>& paddings, PoolProcess pool_process,
bool exclusive, framework::Tensor* output) { bool exclusive, bool adaptive, framework::Tensor* output) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
...@@ -559,7 +630,7 @@ class Pool3dFunctor<platform::CUDADeviceContext, PoolProcess, T> { ...@@ -559,7 +630,7 @@ class Pool3dFunctor<platform::CUDADeviceContext, PoolProcess, T> {
input_width, output_depth, output_height, output_width, ksize_depth, input_width, output_depth, output_height, output_width, ksize_depth,
ksize_height, ksize_width, stride_depth, stride_height, stride_width, ksize_height, ksize_width, stride_depth, stride_height, stride_width,
padding_depth, padding_height, padding_width, pool_process, exclusive, padding_depth, padding_height, padding_width, pool_process, exclusive,
output_data); adaptive, output_data);
} }
}; };
...@@ -578,7 +649,8 @@ class Pool3dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> { ...@@ -578,7 +649,8 @@ class Pool3dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
const std::vector<int>& ksize, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_process, const std::vector<int>& paddings, PoolProcess pool_process,
bool exclusive, framework::Tensor* input_grad) { bool exclusive, bool adaptive,
framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
...@@ -614,7 +686,7 @@ class Pool3dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> { ...@@ -614,7 +686,7 @@ class Pool3dGradFunctor<platform::CUDADeviceContext, PoolProcess, T> {
input_depth, input_height, input_width, output_depth, output_height, input_depth, input_height, input_width, output_depth, output_height,
output_width, ksize_depth, ksize_height, ksize_width, stride_depth, output_width, ksize_depth, ksize_height, ksize_width, stride_depth,
stride_height, stride_width, padding_depth, padding_height, stride_height, stride_width, padding_depth, padding_height,
padding_width, pool_process, exclusive, input_grad_data); padding_width, pool_process, exclusive, adaptive, input_grad_data);
} }
}; };
...@@ -703,7 +775,7 @@ __global__ void KernelMaxPool2dWithIdx( ...@@ -703,7 +775,7 @@ __global__ void KernelMaxPool2dWithIdx(
const int input_height, const int input_width, const int output_height, const int input_height, const int input_width, const int output_height,
const int output_width, const int ksize_height, const int ksize_width, const int output_width, const int ksize_height, const int ksize_width,
const int stride_height, const int stride_width, const int padding_height, const int stride_height, const int stride_width, const int padding_height,
const int padding_width, T1* output_data, T2* mask_data) { const int padding_width, bool adaptive, T1* output_data, T2* mask_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -711,13 +783,23 @@ __global__ void KernelMaxPool2dWithIdx( ...@@ -711,13 +783,23 @@ __global__ void KernelMaxPool2dWithIdx(
int c = (index / output_width / output_height) % channels; int c = (index / output_width / output_height) % channels;
int batch_idx = index / output_width / output_height / channels; int batch_idx = index / output_width / output_height / channels;
int hstart = ph * stride_height - padding_height; int hstart, hend;
int hend = min(hstart + ksize_height, input_height); int wstart, wend;
if (adaptive) {
hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
hstart = ph * stride_height - padding_height;
hend = min(hstart + ksize_height, input_height);
hstart = max(hstart, 0); hstart = max(hstart, 0);
int wstart = pw * stride_width - padding_width; wstart = pw * stride_width - padding_width;
int wend = min(wstart + ksize_width, input_width); wend = min(wstart + ksize_width, input_width);
wstart = max(wstart, 0); wstart = max(wstart, 0);
}
input_data += (batch_idx * channels + c) * input_height * input_width; input_data += (batch_idx * channels + c) * input_height * input_width;
T1 ele = -FLT_MAX; T1 ele = -FLT_MAX;
...@@ -742,36 +824,47 @@ __global__ void KernelMaxPool2DWithIdxGrad( ...@@ -742,36 +824,47 @@ __global__ void KernelMaxPool2DWithIdxGrad(
const int channels, const int input_height, const int input_width, const int channels, const int input_height, const int input_width,
const int output_height, const int output_width, const int ksize_height, const int output_height, const int output_width, const int ksize_height,
const int ksize_width, const int stride_height, const int stride_width, const int ksize_width, const int stride_height, const int stride_width,
const int padding_height, const int padding_width, T1* input_grad) { const int padding_height, const int padding_width, bool adaptive,
T1* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int w_offset = index % input_width; int w_offset = index % input_width;
int h_offset = (index / input_width) % input_height; int h_offset = (index / input_width) % input_height;
int c_offset = (index / input_width / input_height) % channels; int offsetC = (index / input_width / input_height) % channels;
int batch_idx = index / input_width / input_height / channels; int batch_idx = index / input_width / input_height / channels;
int ph_start = int phstart, phend;
int pwstart, pwend;
if (adaptive) {
phstart = h_offset * output_height / input_height;
phend =
min((h_offset + 1) * output_height / input_height + 1, output_height);
pwstart = w_offset * output_width / input_width;
pwend =
min((w_offset + 1) * output_width / input_width + 1, output_width);
} else {
phstart =
(h_offset + padding_height < ksize_height) (h_offset + padding_height < ksize_height)
? 0 ? 0
: (h_offset + padding_height - ksize_height) / stride_height + 1; : (h_offset + padding_height - ksize_height) / stride_height + 1;
int pw_start = pwstart =
(w_offset + padding_width < ksize_width) (w_offset + padding_width < ksize_width)
? 0 ? 0
: (w_offset + padding_width - ksize_width) / stride_width + 1; : (w_offset + padding_width - ksize_width) / stride_width + 1;
int ph_end = phend =
min((h_offset + padding_height) / stride_height + 1, output_height); min((h_offset + padding_height) / stride_height + 1, output_height);
int pw_end = pwend = min((w_offset + padding_width) / stride_width + 1, output_width);
min((w_offset + padding_width) / stride_width + 1, output_width); }
T1 gradient = 0; T1 gradient = 0;
int input_current_featuremap_idx = h_offset * input_width + w_offset; int input_current_featuremap_idx = h_offset * input_width + w_offset;
int output_idx = int output_idx =
(batch_idx * channels + c_offset) * output_height * output_width; (batch_idx * channels + offsetC) * output_height * output_width;
mask_data += output_idx; mask_data += output_idx;
output_grad += output_idx; output_grad += output_idx;
for (int ph = ph_start; ph < ph_end; ++ph) { for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pw_start; pw < pw_end; ++pw) { for (int pw = pwstart; pw < pwend; ++pw) {
if (mask_data[ph * output_width + pw] == input_current_featuremap_idx) if (mask_data[ph * output_width + pw] == input_current_featuremap_idx)
gradient += output_grad[ph * output_width + pw]; gradient += output_grad[ph * output_width + pw];
} }
...@@ -791,8 +884,8 @@ class MaxPool2dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> { ...@@ -791,8 +884,8 @@ class MaxPool2dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> {
void operator()(const platform::CUDADeviceContext& context, void operator()(const platform::CUDADeviceContext& context,
const framework::Tensor& input, const std::vector<int>& ksize, const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* output, const std::vector<int>& paddings, bool adaptive,
framework::Tensor* mask) { framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
...@@ -819,7 +912,8 @@ class MaxPool2dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> { ...@@ -819,7 +912,8 @@ class MaxPool2dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> {
KernelMaxPool2dWithIdx<T1, T2><<<grid, threads, 0, context.stream()>>>( KernelMaxPool2dWithIdx<T1, T2><<<grid, threads, 0, context.stream()>>>(
nthreads, input_data, input_channels, input_height, input_width, nthreads, input_data, input_channels, input_height, input_width,
output_height, output_width, ksize_height, ksize_width, stride_height, output_height, output_width, ksize_height, ksize_width, stride_height,
stride_width, padding_height, padding_width, output_data, mask_data); stride_width, padding_height, padding_width, adaptive, output_data,
mask_data);
} }
}; };
...@@ -835,7 +929,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> { ...@@ -835,7 +929,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> {
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, const std::vector<int>& ksize, const framework::Tensor& mask, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings, bool adaptive,
framework::Tensor* input_grad) { framework::Tensor* input_grad) {
const int batch_size = input_grad->dims()[0]; const int batch_size = input_grad->dims()[0];
const int input_channels = input_grad->dims()[1]; const int input_channels = input_grad->dims()[1];
...@@ -862,7 +956,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> { ...@@ -862,7 +956,7 @@ class MaxPool2dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> {
KernelMaxPool2DWithIdxGrad<T1, T2><<<grid, threads, 0, context.stream()>>>( KernelMaxPool2DWithIdxGrad<T1, T2><<<grid, threads, 0, context.stream()>>>(
nthreads, output_grad_data, mask_data, input_channels, input_height, nthreads, output_grad_data, mask_data, input_channels, input_height,
input_width, output_height, output_width, ksize_height, ksize_width, input_width, output_height, output_width, ksize_height, ksize_width,
stride_height, stride_width, padding_height, padding_width, stride_height, stride_width, padding_height, padding_width, adaptive,
input_grad_data); input_grad_data);
} }
}; };
...@@ -884,7 +978,7 @@ __global__ void KernelMaxPool3DWithIdx( ...@@ -884,7 +978,7 @@ __global__ void KernelMaxPool3DWithIdx(
const int ksize_depth, const int ksize_height, const int ksize_width, const int ksize_depth, const int ksize_height, const int ksize_width,
const int stride_depth, const int stride_height, const int stride_width, const int stride_depth, const int stride_height, const int stride_width,
const int padding_depth, const int padding_height, const int padding_width, const int padding_depth, const int padding_height, const int padding_width,
T1* output_data, T2* mask_data) { bool adaptive, T1* output_data, T2* mask_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -894,15 +988,29 @@ __global__ void KernelMaxPool3DWithIdx( ...@@ -894,15 +988,29 @@ __global__ void KernelMaxPool3DWithIdx(
int batch_idx = int batch_idx =
index / output_width / output_height / output_depth / channels; index / output_width / output_height / output_depth / channels;
int dstart = pd * stride_depth - padding_depth; int dstart, dend;
int hstart = ph * stride_height - padding_height; int hstart, hend;
int wstart = pw * stride_width - padding_width; int wstart, wend;
int dend = min(dstart + ksize_depth, input_depth); if (adaptive) {
int hend = min(hstart + ksize_height, input_height); dstart = AdaptStartIndex(pd, input_depth, output_depth);
int wend = min(wstart + ksize_width, input_width); dend = AdaptEndIndex(pd, input_depth, output_depth);
hstart = AdaptStartIndex(ph, input_height, output_height);
hend = AdaptEndIndex(ph, input_height, output_height);
wstart = AdaptStartIndex(pw, input_width, output_width);
wend = AdaptEndIndex(pw, input_width, output_width);
} else {
dstart = pd * stride_depth - padding_depth;
hstart = ph * stride_height - padding_height;
wstart = pw * stride_width - padding_width;
dend = min(dstart + ksize_depth, input_depth);
hend = min(hstart + ksize_height, input_height);
wend = min(wstart + ksize_width, input_width);
dstart = max(dstart, 0); dstart = max(dstart, 0);
hstart = max(hstart, 0); hstart = max(hstart, 0);
wstart = max(wstart, 0); wstart = max(wstart, 0);
}
T1 ele = -FLT_MAX; T1 ele = -FLT_MAX;
int max_index = -1; int max_index = -1;
...@@ -932,46 +1040,58 @@ __global__ void KernelMaxPool3DWithIdxGrad( ...@@ -932,46 +1040,58 @@ __global__ void KernelMaxPool3DWithIdxGrad(
const int output_width, const int ksize_depth, const int ksize_height, const int output_width, const int ksize_depth, const int ksize_height,
const int ksize_width, const int stride_depth, const int stride_height, const int ksize_width, const int stride_depth, const int stride_height,
const int stride_width, const int padding_depth, const int padding_height, const int stride_width, const int padding_depth, const int padding_height,
const int padding_width, T1* input_grad) { const int padding_width, bool adaptive, T1* input_grad) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int w_offset = index % input_width; int w_offset = index % input_width;
int h_offset = (index / input_width) % input_height; int h_offset = (index / input_width) % input_height;
int d_offset = (index / input_width / input_height) % input_depth; int d_offset = (index / input_width / input_height) % input_depth;
int c_offset = int offsetC = (index / input_width / input_height / input_depth) % channels;
(index / input_width / input_height / input_depth) % channels;
int batch_idx = index / input_width / input_height / input_depth / channels; int batch_idx = index / input_width / input_height / input_depth / channels;
int pd_start = int pdstart, pdend;
int phstart, phend;
int pwstart, pwend;
if (adaptive) {
pdstart = d_offset * output_depth / input_depth;
pdend =
min((d_offset + 1) * output_depth / input_depth + 1, output_depth);
phstart = h_offset * output_height / input_height;
phend =
min((h_offset + 1) * output_height / input_height + 1, output_height);
pwstart = w_offset * output_width / input_width;
pwend =
min((w_offset + 1) * output_width / input_width + 1, output_width);
} else {
pdstart =
(d_offset + padding_depth < ksize_depth) (d_offset + padding_depth < ksize_depth)
? 0 ? 0
: (d_offset + padding_depth - ksize_depth) / stride_depth + 1; : (d_offset + padding_depth - ksize_depth) / stride_depth + 1;
int ph_start = phstart =
(h_offset + padding_height < ksize_height) (h_offset + padding_height < ksize_height)
? 0 ? 0
: (h_offset + padding_height - ksize_height) / stride_height + 1; : (h_offset + padding_height - ksize_height) / stride_height + 1;
int pw_start = pwstart =
(w_offset + padding_width < ksize_width) (w_offset + padding_width < ksize_width)
? 0 ? 0
: (w_offset + padding_width - ksize_width) / stride_width + 1; : (w_offset + padding_width - ksize_width) / stride_width + 1;
int pd_end = pdend = min((d_offset + padding_depth) / stride_depth + 1, output_depth);
min((d_offset + padding_depth) / stride_depth + 1, output_depth); phend =
int ph_end =
min((h_offset + padding_height) / stride_height + 1, output_height); min((h_offset + padding_height) / stride_height + 1, output_height);
int pw_end = pwend = min((w_offset + padding_width) / stride_width + 1, output_width);
min((w_offset + padding_width) / stride_width + 1, output_width); }
T1 gradient = 0; T1 gradient = 0;
int input_current_feature_map_idx = int input_current_feature_map_idx =
(d_offset * input_height + h_offset) * input_width + w_offset; (d_offset * input_height + h_offset) * input_width + w_offset;
int output_idx = (batch_idx * channels + c_offset) * output_depth * int output_idx = (batch_idx * channels + offsetC) * output_depth *
output_height * output_width; output_height * output_width;
mask += output_idx; mask += output_idx;
output_grad += output_idx; output_grad += output_idx;
for (int pd = pd_start; pd < pd_end; ++pd) { for (int pd = pdstart; pd < pdend; ++pd) {
for (int ph = ph_start; ph < ph_end; ++ph) { for (int ph = phstart; ph < phend; ++ph) {
for (int pw = pw_start; pw < pw_end; ++pw) { for (int pw = pwstart; pw < pwend; ++pw) {
if (mask[(pd * output_height + ph) * output_width + pw] == if (mask[(pd * output_height + ph) * output_width + pw] ==
input_current_feature_map_idx) input_current_feature_map_idx)
gradient += gradient +=
...@@ -994,8 +1114,8 @@ class MaxPool3dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> { ...@@ -994,8 +1114,8 @@ class MaxPool3dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> {
void operator()(const platform::CUDADeviceContext& context, void operator()(const platform::CUDADeviceContext& context,
const framework::Tensor& input, const std::vector<int>& ksize, const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* output, const std::vector<int>& paddings, bool adaptive,
framework::Tensor* mask) { framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_channels = input.dims()[1]; const int input_channels = input.dims()[1];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
...@@ -1029,7 +1149,8 @@ class MaxPool3dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> { ...@@ -1029,7 +1149,8 @@ class MaxPool3dWithIndexFunctor<platform::CUDADeviceContext, T1, T2> {
nthreads, input_data, input_channels, input_depth, input_height, nthreads, input_data, input_channels, input_depth, input_height,
input_width, output_depth, output_height, output_width, ksize_depth, input_width, output_depth, output_height, output_width, ksize_depth,
ksize_height, ksize_width, stride_depth, stride_height, stride_width, ksize_height, ksize_width, stride_depth, stride_height, stride_width,
padding_depth, padding_height, padding_width, output_data, mask_data); padding_depth, padding_height, padding_width, adaptive, output_data,
mask_data);
} }
}; };
...@@ -1045,7 +1166,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> { ...@@ -1045,7 +1166,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> {
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, const std::vector<int>& ksize, const framework::Tensor& mask, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings, bool adaptive,
framework::Tensor* input_grad) { framework::Tensor* input_grad) {
const int batch_size = input_grad->dims()[0]; const int batch_size = input_grad->dims()[0];
const int input_channels = input_grad->dims()[1]; const int input_channels = input_grad->dims()[1];
...@@ -1079,7 +1200,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> { ...@@ -1079,7 +1200,7 @@ class MaxPool3dWithIndexGradFunctor<platform::CUDADeviceContext, T1, T2> {
nthreads, output_grad_data, mask_data, input_channels, input_depth, nthreads, output_grad_data, mask_data, input_channels, input_depth,
input_height, input_width, output_depth, output_height, output_width, input_height, input_width, output_depth, output_height, output_width,
ksize_depth, ksize_height, ksize_width, stride_depth, stride_height, ksize_depth, ksize_height, ksize_width, stride_depth, stride_height,
stride_width, padding_depth, padding_height, padding_width, stride_width, padding_depth, padding_height, padding_width, adaptive,
input_grad_data); input_grad_data);
} }
}; };
......
...@@ -68,6 +68,18 @@ class AvgPoolGrad { ...@@ -68,6 +68,18 @@ class AvgPoolGrad {
} }
}; };
/* used for adaptive pool to calculate start and end index of each divided grid
*/
HOSTDEVICE inline int AdaptStartIndex(int ph, int input_size, int output_size) {
return static_cast<int>(
floor(static_cast<double>(ph * input_size) / output_size));
}
HOSTDEVICE inline int AdaptEndIndex(int ph, int input_size, int output_size) {
return static_cast<int>(
ceil(static_cast<double>((ph + 1) * input_size) / output_size));
}
/* /*
* \brief Getting pooling results, and calculating gradient. * \brief Getting pooling results, and calculating gradient.
* *
...@@ -102,7 +114,7 @@ class Pool2dFunctor { ...@@ -102,7 +114,7 @@ class Pool2dFunctor {
const std::vector<int>& ksize, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_compute, const std::vector<int>& paddings, PoolProcess pool_compute,
bool exclusive, framework::Tensor* output); bool exclusive, bool adaptive, framework::Tensor* output);
}; };
template <typename DeviceContext, typename PoolProcess, typename T> template <typename DeviceContext, typename PoolProcess, typename T>
...@@ -114,7 +126,7 @@ class Pool2dGradFunctor { ...@@ -114,7 +126,7 @@ class Pool2dGradFunctor {
const std::vector<int>& ksize, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_compute, const std::vector<int>& paddings, PoolProcess pool_compute,
bool exclusive, framework::Tensor* input_grad); bool exclusive, bool adaptive, framework::Tensor* input_grad);
}; };
template <typename DeviceContext, class T> template <typename DeviceContext, class T>
...@@ -136,7 +148,7 @@ class Pool3dFunctor { ...@@ -136,7 +148,7 @@ class Pool3dFunctor {
const std::vector<int>& ksize, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_compute, const std::vector<int>& paddings, PoolProcess pool_compute,
bool exclusive, framework::Tensor* output); bool exclusive, bool adaptive, framework::Tensor* output);
}; };
template <typename DeviceContext, typename PoolProcess, typename T> template <typename DeviceContext, typename PoolProcess, typename T>
...@@ -148,7 +160,7 @@ class Pool3dGradFunctor { ...@@ -148,7 +160,7 @@ class Pool3dGradFunctor {
const std::vector<int>& ksize, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_compute, const std::vector<int>& paddings, PoolProcess pool_compute,
bool exclusive, framework::Tensor* input_grad); bool exclusive, bool adaptive, framework::Tensor* input_grad);
}; };
template <typename DeviceContext, class T> template <typename DeviceContext, class T>
...@@ -176,8 +188,8 @@ class MaxPool2dWithIndexFunctor { ...@@ -176,8 +188,8 @@ class MaxPool2dWithIndexFunctor {
void operator()(const DeviceContext& context, const framework::Tensor& input, void operator()(const DeviceContext& context, const framework::Tensor& input,
const std::vector<int>& ksize, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* output, const std::vector<int>& paddings, bool adaptive,
framework::Tensor* mask); framework::Tensor* output, framework::Tensor* mask);
}; };
template <typename DeviceContext, typename T1, typename T2> template <typename DeviceContext, typename T1, typename T2>
...@@ -187,7 +199,7 @@ class MaxPool2dWithIndexGradFunctor { ...@@ -187,7 +199,7 @@ class MaxPool2dWithIndexGradFunctor {
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, const std::vector<int>& ksize, const framework::Tensor& mask, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings, bool adaptive,
framework::Tensor* input_grad); framework::Tensor* input_grad);
}; };
...@@ -197,8 +209,8 @@ class MaxPool3dWithIndexFunctor { ...@@ -197,8 +209,8 @@ class MaxPool3dWithIndexFunctor {
void operator()(const DeviceContext& context, const framework::Tensor& input, void operator()(const DeviceContext& context, const framework::Tensor& input,
const std::vector<int>& ksize, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* output, const std::vector<int>& paddings, bool adaptive,
framework::Tensor* mask); framework::Tensor* output, framework::Tensor* mask);
}; };
template <typename DeviceContext, typename T1, typename T2> template <typename DeviceContext, typename T1, typename T2>
...@@ -208,7 +220,7 @@ class MaxPool3dWithIndexGradFunctor { ...@@ -208,7 +220,7 @@ class MaxPool3dWithIndexGradFunctor {
const framework::Tensor& output_grad, const framework::Tensor& output_grad,
const framework::Tensor& mask, const std::vector<int>& ksize, const framework::Tensor& mask, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, const std::vector<int>& paddings, bool adaptive,
framework::Tensor* input_grad); framework::Tensor* input_grad);
}; };
......
...@@ -52,6 +52,7 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -52,6 +52,7 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const {
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides"); std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings"); std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
bool ceil_mode = ctx->Attrs().Get<bool>("ceil_mode"); bool ceil_mode = ctx->Attrs().Get<bool>("ceil_mode");
bool adaptive = ctx->Attrs().Get<bool>("adaptive");
PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5, PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5,
"Pooling intput should be 4-D or 5-D tensor."); "Pooling intput should be 4-D or 5-D tensor.");
...@@ -72,9 +73,13 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -72,9 +73,13 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const {
"Paddings size and pooling size should be the same."); "Paddings size and pooling size should be the same.");
std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]}); std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
if (adaptive) {
output_shape.insert(output_shape.end(), ksize.begin(), ksize.end());
} else {
for (size_t i = 0; i < ksize.size(); ++i) { for (size_t i = 0; i < ksize.size(); ++i) {
output_shape.push_back(PoolOutputSize(in_x_dims[i + 2], ksize[i], output_shape.push_back(PoolOutputSize(
paddings[i], strides[i], ceil_mode)); in_x_dims[i + 2], ksize[i], paddings[i], strides[i], ceil_mode));
}
} }
ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
ctx->ShareLoD("X", "Out"); ctx->ShareLoD("X", "Out");
...@@ -186,6 +191,14 @@ void Pool2dOpMaker::Make() { ...@@ -186,6 +191,14 @@ void Pool2dOpMaker::Make() {
"averaging calculating, otherwise, include the zero-padding. Note, it " "averaging calculating, otherwise, include the zero-padding. Note, it "
"is only used when pooling_type is avg. The defalut is True.") "is only used when pooling_type is avg. The defalut is True.")
.SetDefault(true); .SetDefault(true);
AddAttr<bool>(
"adaptive",
"(bool, default False) When true, will perform adaptive pooling instead, "
"output shape in H and W dimensions will be same as ksize, input data "
"will be divided into grids specify by ksize averagely and perform "
"pooling in each grid area to get output pooling value.")
.SetDefault(false);
AddAttr<bool>( AddAttr<bool>(
"use_cudnn", "use_cudnn",
"(bool, default false) Only used in cudnn kernel, need install cudnn") "(bool, default false) Only used in cudnn kernel, need install cudnn")
...@@ -264,6 +277,14 @@ Example: ...@@ -264,6 +277,14 @@ Example:
Output(i ,j) = \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)} Output(i ,j) = \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
$$ $$
For adaptive = true:
$$
hstart = floor(i * H_{in} / H_{out})
hend = ceil((i + 1) * H_{in} / H_{out})
wstart = floor(j * W_{in} / W_{out})
wend = ceil((j + 1) * W_{in} / W_{out})
Output(i ,j) = \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
$$
)DOC"); )DOC");
} }
...@@ -325,6 +346,13 @@ void Pool3dOpMaker::Make() { ...@@ -325,6 +346,13 @@ void Pool3dOpMaker::Make() {
"averaging calculating, otherwise, include the zero-padding. Note, it " "averaging calculating, otherwise, include the zero-padding. Note, it "
"is only used when pooling_type is avg. The defalut is True.") "is only used when pooling_type is avg. The defalut is True.")
.SetDefault(true); .SetDefault(true);
AddAttr<bool>(
"adaptive",
"(bool, default False) When true, will perform adaptive pooling instead, "
"output shape in H and W dimensions will be same as ksize, input data "
"will be divided into grids specify by ksize averagely and perform "
"pooling in each grid area to get output pooling value.")
.SetDefault(false);
AddAttr<bool>( AddAttr<bool>(
"use_cudnn", "use_cudnn",
...@@ -376,6 +404,37 @@ Example: ...@@ -376,6 +404,37 @@ Example:
H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1] + strides[1] -1)}{strides[1]} + 1 \\ H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1] + strides[1] -1)}{strides[1]} + 1 \\
W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2] + strides[2] -1)}{strides[2]} + 1 W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2] + strides[2] -1)}{strides[2]} + 1
$$ $$
For exclusive = true:
$$
dstart = i * strides[0] - paddings[0]
dend = dstart + ksize[0]
hstart = j * strides[1] - paddings[1]
hend = hstart + ksize[1]
wstart = k * strides[2] - paddings[2]
wend = wstart + ksize[2]
Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{ksize[0] * ksize[1] * ksize[2]}
$$
For exclusive = false:
$$
dstart = max(0, i * strides[0] - paddings[0])
dend = min(D, dstart + ksize[0])
hstart = max(0, j * strides[1] - paddings[1])
hend = min(H, hstart + ksize[1])
wstart = max(0, k * strides[2] - paddings[2])
wend = min(W, wstart + ksize[2])
Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
$$
For adaptive = true:
$$
dstart = floor(i * D_{in} / D_{out})
dend = ceil((i + 1) * D_{in} / D_{out})
hstart = floor(j * H_{in} / H_{out})
hend = ceil((j + 1) * H_{in} / H_{out})
wstart = floor(k * W_{in} / W_{out})
wend = ceil((k + 1) * W_{in} / W_{out})
Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
$$
)DOC"); )DOC");
} }
......
...@@ -70,6 +70,7 @@ class PoolKernel : public framework::OpKernel<T> { ...@@ -70,6 +70,7 @@ class PoolKernel : public framework::OpKernel<T> {
std::vector<int> strides = context.Attr<std::vector<int>>("strides"); std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
bool exclusive = context.Attr<bool>("exclusive"); bool exclusive = context.Attr<bool>("exclusive");
bool adaptive = context.Attr<bool>("adaptive");
if (context.Attr<bool>("global_pooling")) { if (context.Attr<bool>("global_pooling")) {
for (size_t i = 0; i < ksize.size(); ++i) { for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0; paddings[i] = 0;
...@@ -85,7 +86,7 @@ class PoolKernel : public framework::OpKernel<T> { ...@@ -85,7 +86,7 @@ class PoolKernel : public framework::OpKernel<T> {
pool2d_forward; pool2d_forward;
paddle::operators::math::MaxPool<T> pool_process; paddle::operators::math::MaxPool<T> pool_process;
pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process, pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process,
true, out); true, false, out);
} else if (pooling_type == "avg") { } else if (pooling_type == "avg") {
paddle::operators::math::Pool2dFunctor< paddle::operators::math::Pool2dFunctor<
...@@ -93,7 +94,7 @@ class PoolKernel : public framework::OpKernel<T> { ...@@ -93,7 +94,7 @@ class PoolKernel : public framework::OpKernel<T> {
pool2d_forward; pool2d_forward;
paddle::operators::math::AvgPool<T> pool_process; paddle::operators::math::AvgPool<T> pool_process;
pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process, pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process,
exclusive, out); exclusive, adaptive, out);
} }
} break; } break;
case 3: { case 3: {
...@@ -103,14 +104,14 @@ class PoolKernel : public framework::OpKernel<T> { ...@@ -103,14 +104,14 @@ class PoolKernel : public framework::OpKernel<T> {
pool3d_forward; pool3d_forward;
paddle::operators::math::MaxPool<T> pool_process; paddle::operators::math::MaxPool<T> pool_process;
pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process, pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process,
true, out); true, false, out);
} else if (pooling_type == "avg") { } else if (pooling_type == "avg") {
paddle::operators::math::Pool3dFunctor< paddle::operators::math::Pool3dFunctor<
DeviceContext, paddle::operators::math::AvgPool<T>, T> DeviceContext, paddle::operators::math::AvgPool<T>, T>
pool3d_forward; pool3d_forward;
paddle::operators::math::AvgPool<T> pool_process; paddle::operators::math::AvgPool<T> pool_process;
pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process, pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, pool_process,
exclusive, out); exclusive, adaptive, out);
} }
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
...@@ -133,6 +134,7 @@ class PoolGradKernel : public framework::OpKernel<T> { ...@@ -133,6 +134,7 @@ class PoolGradKernel : public framework::OpKernel<T> {
std::vector<int> strides = context.Attr<std::vector<int>>("strides"); std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
bool exclusive = context.Attr<bool>("exclusive"); bool exclusive = context.Attr<bool>("exclusive");
bool adaptive = context.Attr<bool>("adaptive");
if (context.Attr<bool>("global_pooling")) { if (context.Attr<bool>("global_pooling")) {
for (size_t i = 0; i < ksize.size(); ++i) { for (size_t i = 0; i < ksize.size(); ++i) {
...@@ -159,7 +161,8 @@ class PoolGradKernel : public framework::OpKernel<T> { ...@@ -159,7 +161,8 @@ class PoolGradKernel : public framework::OpKernel<T> {
pool2d_backward; pool2d_backward;
paddle::operators::math::AvgPoolGrad<T> pool_process; paddle::operators::math::AvgPoolGrad<T> pool_process;
pool2d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides, pool2d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides,
paddings, pool_process, exclusive, in_x_grad); paddings, pool_process, exclusive, adaptive,
in_x_grad);
} }
} break; } break;
case 3: { case 3: {
...@@ -174,7 +177,8 @@ class PoolGradKernel : public framework::OpKernel<T> { ...@@ -174,7 +177,8 @@ class PoolGradKernel : public framework::OpKernel<T> {
pool3d_backward; pool3d_backward;
paddle::operators::math::AvgPoolGrad<T> pool_process; paddle::operators::math::AvgPoolGrad<T> pool_process;
pool3d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides, pool3d_backward(dev_ctx, *in_x, *out, *out_grad, ksize, strides,
paddings, pool_process, exclusive, in_x_grad); paddings, pool_process, exclusive, adaptive,
in_x_grad);
} }
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
......
...@@ -40,6 +40,7 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { ...@@ -40,6 +40,7 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
std::vector<int> ksize = ctx->Attrs().Get<std::vector<int>>("ksize"); std::vector<int> ksize = ctx->Attrs().Get<std::vector<int>>("ksize");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides"); std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings"); std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
bool adaptive = ctx->Attrs().Get<bool>("adaptive");
PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5, PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5,
"Pooling intput should be 4-D or 5-D tensor."); "Pooling intput should be 4-D or 5-D tensor.");
...@@ -60,10 +61,14 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { ...@@ -60,10 +61,14 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
"Paddings size and pooling size should be the same."); "Paddings size and pooling size should be the same.");
std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]}); std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
if (adaptive) {
output_shape.insert(output_shape.end(), ksize.begin(), ksize.end());
} else {
for (size_t i = 0; i < ksize.size(); ++i) { for (size_t i = 0; i < ksize.size(); ++i) {
output_shape.push_back(MaxPoolOutputSize(in_x_dims[i + 2], ksize[i], output_shape.push_back(MaxPoolOutputSize(in_x_dims[i + 2], ksize[i],
paddings[i], strides[i])); paddings[i], strides[i]));
} }
}
ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
ctx->SetOutputDim("Mask", framework::make_ddim(output_shape)); ctx->SetOutputDim("Mask", framework::make_ddim(output_shape));
} }
...@@ -133,6 +138,14 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -133,6 +138,14 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
"(bool, default:false) Whether to use the global pooling. " "(bool, default:false) Whether to use the global pooling. "
"If global_pooling = true, ksize and paddings will be ignored.") "If global_pooling = true, ksize and paddings will be ignored.")
.SetDefault(false); .SetDefault(false);
AddAttr<bool>(
"adaptive",
"(bool, default False) When true, will perform adaptive pooling "
"instead, "
"output shape in H and W dimensions will be same as ksize, input data "
"will be divided into grids specify by ksize averagely and perform "
"pooling in each grid area to get output pooling value.")
.SetDefault(false);
AddAttr<std::vector<int>>("strides", AddAttr<std::vector<int>>("strides",
"(vector<int>, default {1, 1}), strides(height, " "(vector<int>, default {1, 1}), strides(height, "
"width) of pooling operator.") "width) of pooling operator.")
...@@ -170,6 +183,12 @@ Example: ...@@ -170,6 +183,12 @@ Example:
W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
$$ $$
For adaptive = true:
$$
H_{out} = ksize[0] W_{out} = ksize[1]
$$
)DOC"); )DOC");
} }
}; };
...@@ -209,6 +228,14 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -209,6 +228,14 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
"(bool, default false) Whether to use the global pooling. " "(bool, default false) Whether to use the global pooling. "
"If global_pooling = true, ksize and paddings will be ignored.") "If global_pooling = true, ksize and paddings will be ignored.")
.SetDefault(false); .SetDefault(false);
AddAttr<bool>(
"adaptive",
"(bool, default False) When true, will perform adaptive pooling "
"instead, "
"output shape in H and W dimensions will be same as ksize, input data "
"will be divided into grids specify by ksize averagely and perform "
"pooling in each grid area to get output pooling value.")
.SetDefault(false);
AddAttr<std::vector<int>>("strides", AddAttr<std::vector<int>>("strides",
"(vector<int>, default {1,1,1}), strides(depth, " "(vector<int>, default {1,1,1}), strides(depth, "
"height, width) of pooling operator.") "height, width) of pooling operator.")
...@@ -247,6 +274,11 @@ Example: ...@@ -247,6 +274,11 @@ Example:
W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1 W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1
$$ $$
For adaptive = true:
$$
D_{out} = ksize[0] H_{out} = ksize[1] W_{out} = ksize[2]
$$
)DOC"); )DOC");
} }
}; };
......
...@@ -36,6 +36,7 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T1> { ...@@ -36,6 +36,7 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T1> {
std::vector<int> ksize = context.Attr<std::vector<int>>("ksize"); std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
std::vector<int> strides = context.Attr<std::vector<int>>("strides"); std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
bool adaptive = context.Attr<bool>("adaptive");
auto& dev_ctx = context.template device_context<DeviceContext>(); auto& dev_ctx = context.template device_context<DeviceContext>();
if (context.Attr<bool>("global_pooling")) { if (context.Attr<bool>("global_pooling")) {
...@@ -50,13 +51,15 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T1> { ...@@ -50,13 +51,15 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T1> {
paddle::operators::math::MaxPool2dWithIndexFunctor<DeviceContext, T1, paddle::operators::math::MaxPool2dWithIndexFunctor<DeviceContext, T1,
T2> T2>
pool2d_forward; pool2d_forward;
pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, out, mask); pool2d_forward(dev_ctx, *in_x, ksize, strides, paddings, adaptive, out,
mask);
} break; } break;
case 3: { case 3: {
paddle::operators::math::MaxPool3dWithIndexFunctor<DeviceContext, T1, paddle::operators::math::MaxPool3dWithIndexFunctor<DeviceContext, T1,
T2> T2>
pool3d_forward; pool3d_forward;
pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, out, mask); pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, adaptive, out,
mask);
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
} }
...@@ -75,6 +78,7 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T1> { ...@@ -75,6 +78,7 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T1> {
std::vector<int> ksize = context.Attr<std::vector<int>>("ksize"); std::vector<int> ksize = context.Attr<std::vector<int>>("ksize");
std::vector<int> strides = context.Attr<std::vector<int>>("strides"); std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
bool adaptive = context.Attr<bool>("adaptive");
if (context.Attr<bool>("global_pooling")) { if (context.Attr<bool>("global_pooling")) {
for (size_t i = 0; i < ksize.size(); ++i) { for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0; paddings[i] = 0;
...@@ -93,14 +97,14 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T1> { ...@@ -93,14 +97,14 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T1> {
T1, T2> T1, T2>
pool2d_backward; pool2d_backward;
pool2d_backward(device_ctx, *out_grad, *mask, ksize, strides, pool2d_backward(device_ctx, *out_grad, *mask, ksize, strides,
paddings, in_x_grad); paddings, adaptive, in_x_grad);
} break; } break;
case 3: { case 3: {
paddle::operators::math::MaxPool3dWithIndexGradFunctor<DeviceContext, paddle::operators::math::MaxPool3dWithIndexGradFunctor<DeviceContext,
T1, T2> T1, T2>
pool3d_backward; pool3d_backward;
pool3d_backward(device_ctx, *out_grad, *mask, ksize, strides, pool3d_backward(device_ctx, *out_grad, *mask, ksize, strides,
paddings, in_x_grad); paddings, adaptive, in_x_grad);
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); }
} }
......
...@@ -56,13 +56,13 @@ class SppKernel : public framework::OpKernel<T> { ...@@ -56,13 +56,13 @@ class SppKernel : public framework::OpKernel<T> {
math::Pool2dFunctor<DeviceContext, math::MaxPool<T>, T> pool_forward; math::Pool2dFunctor<DeviceContext, math::MaxPool<T>, T> pool_forward;
math::MaxPool<T> max_process; math::MaxPool<T> max_process;
pool_forward(context.template device_context<DeviceContext>(), *in_x, pool_forward(context.template device_context<DeviceContext>(), *in_x,
kernel_size, strides, paddings, max_process, true, kernel_size, strides, paddings, max_process, true, false,
&out_level); &out_level);
} else if (pooling_type == "avg") { } else if (pooling_type == "avg") {
math::Pool2dFunctor<DeviceContext, math::AvgPool<T>, T> pool_forward; math::Pool2dFunctor<DeviceContext, math::AvgPool<T>, T> pool_forward;
math::AvgPool<T> avg_process; math::AvgPool<T> avg_process;
pool_forward(context.template device_context<DeviceContext>(), *in_x, pool_forward(context.template device_context<DeviceContext>(), *in_x,
kernel_size, strides, paddings, avg_process, true, kernel_size, strides, paddings, avg_process, true, false,
&out_level); &out_level);
} }
// flatten pooling output shape // flatten pooling output shape
...@@ -156,7 +156,7 @@ class SppGradKernel : public framework::OpKernel<T> { ...@@ -156,7 +156,7 @@ class SppGradKernel : public framework::OpKernel<T> {
math::AvgPoolGrad<T> avg_process; math::AvgPoolGrad<T> avg_process;
pool_backward(context.template device_context<DeviceContext>(), *in_x, pool_backward(context.template device_context<DeviceContext>(), *in_x,
*&out_level, *&outgrad_level, kernel_size, strides, *&out_level, *&outgrad_level, kernel_size, strides,
paddings, avg_process, true, in_x_grad); paddings, avg_process, true, false, in_x_grad);
} }
} }
} }
......
...@@ -52,6 +52,8 @@ __all__ = [ ...@@ -52,6 +52,8 @@ __all__ = [
'softmax', 'softmax',
'pool2d', 'pool2d',
'pool3d', 'pool3d',
'adaptive_pool2d',
'adaptive_pool3d',
'batch_norm', 'batch_norm',
'beam_search_decode', 'beam_search_decode',
'conv2d_transpose', 'conv2d_transpose',
...@@ -2500,6 +2502,204 @@ def pool3d(input, ...@@ -2500,6 +2502,204 @@ def pool3d(input,
return pool_out return pool_out
@templatedoc(op_type="pool2d")
def adaptive_pool2d(input,
pool_size,
pool_type="max",
require_index=False,
name=None):
"""
${comment}
Args:
input (Variable): The input tensor of pooling operator. The format of
input tensor is NCHW, where N is batch size, C is
the number of channels, H is the height of the
feature, and W is the width of the feature.
pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two integers, (pool_size_Height, pool_size_Width).
pool_type: ${pooling_type_comment}
require_index (bool): If true, the index of max pooling point along with outputs.
it cannot be set in average pooling type.
name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically.
Returns:
Variable: The pooling result.
Raises:
ValueError: 'pool_type' is not 'max' nor 'avg'.
ValueError: invalid setting 'require_index' true when 'pool_type' is 'avg'.
ValueError: 'pool_size' should be a list or tuple with length as 2.
Examples:
.. code-block:: python
# suppose input data in shape of [N, C, H, W], `pool_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimentions
# of input data into m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive average pool performs calculations as follow:
#
# for i in range(m):
# for j in range(n):
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
#
data = fluid.layers.data(
name='data', shape=[3, 32, 32], dtype='float32')
pool_out = fluid.layers.adaptive_pool2d(
input=data,
pool_size=[3, 3],
pool_type='avg')
"""
if pool_type not in ["max", "avg"]:
raise ValueError(
"Unknown pool_type: '%s'. It can only be 'max' or 'avg'.",
str(pool_type))
if pool_type == "avg" and require_index:
raise ValueError(
"invalid setting 'require_index' true when 'pool_type' is 'avg'.")
def _is_list_or_tuple_(data):
return (isinstance(data, list) or isinstance(data, tuple))
if not _is_list_or_tuple_(pool_size) or len(pool_size) != 2:
raise ValueError(
"'pool_size' should be a list or tuple with length as 2.")
if pool_type == "max":
l_type = 'max_pool2d_with_index'
else:
l_type = "pool2d"
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out}
if pool_type == "max":
mask = helper.create_variable_for_type_inference(dtype)
outputs["Mask"] = mask
helper.append_op(
type=l_type,
inputs={"X": input},
outputs=outputs,
attrs={
"pooling_type": pool_type,
"ksize": pool_size,
"adaptive": True,
})
return (pool_out, mask) if require_index else pool_out
@templatedoc(op_type="pool3d")
def adaptive_pool3d(input,
pool_size,
pool_type="max",
require_index=False,
name=None):
"""
${comment}
Args:
input (Variable): The input tensor of pooling operator. The format of
input tensor is NCHW, where N is batch size, C is
the number of channels, H is the height of the
feature, and W is the width of the feature.
pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two integers, (Depth, Height, Width).
pool_type: ${pooling_type_comment}
require_index (bool): If true, the index of max pooling point along with outputs.
it cannot be set in average pooling type.
name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically.
Returns:
Variable: The pooling result.
Raises:
ValueError: 'pool_type' is not 'max' nor 'avg'.
ValueError: invalid setting 'require_index' true when 'pool_type' is 'avg'.
ValueError: 'pool_size' should be a list or tuple with length as 2.
Examples:
.. code-block:: python
# suppose input data in shape of [N, C, D, H, W], `pool_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimentions
# of input data into l * m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive average pool performs calculations as follow:
#
# for i in range(l):
# for j in range(m):
# for k in range(n):
# dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l)
# hstart = floor(j * H / m)
# hend = ceil((j + 1) * H / m)
# wstart = floor(k * W / n)
# wend = ceil((k + 1) * W / n)
# output[:, :, i, j, k] =
# avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
#
data = fluid.layers.data(
name='data', shape=[3, 32, 32], dtype='float32')
pool_out, mask = fluid.layers.adaptive_pool3d(
input=data,
pool_size=[3, 3],
pool_type='avg')
"""
if pool_type not in ["max", "avg"]:
raise ValueError(
"Unknown pool_type: '%s'. It can only be 'max' or 'avg'.",
str(pool_type))
if pool_type == "avg" and require_index:
raise ValueError(
"invalid setting 'require_index' true when 'pool_type' is 'avg'.")
def _is_list_or_tuple_(data):
return (isinstance(data, list) or isinstance(data, tuple))
if not _is_list_or_tuple_(pool_size) or len(pool_size) != 3:
raise ValueError(
"'pool_size' should be a list or tuple with length as 3.")
if pool_type == "max":
l_type = 'max_pool3d_with_index'
else:
l_type = "pool3d"
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out}
if pool_type == "max":
mask = helper.create_variable_for_type_inference(dtype)
outputs["Mask"] = mask
helper.append_op(
type=l_type,
inputs={"X": input},
outputs=outputs,
attrs={
"pooling_type": pool_type,
"ksize": pool_size,
"adaptive": True,
})
return (pool_out, mask) if require_index else pool_out
def batch_norm(input, def batch_norm(input,
act=None, act=None,
is_test=False, is_test=False,
......
...@@ -233,6 +233,29 @@ class TestBook(unittest.TestCase): ...@@ -233,6 +233,29 @@ class TestBook(unittest.TestCase):
pool_stride=[1, 2], pool_stride=[1, 2],
pool_padding=(2, 1))) pool_padding=(2, 1)))
def test_adaptive_pool2d(self):
program = Program()
with program_guard(program):
x = layers.data(name='x', shape=[3, 224, 224], dtype='float32')
self.assertIsNotNone(
layers.adaptive_pool2d(
x, [3, 3], pool_type='avg'))
pool, mask = layers.adaptive_pool2d(x, [3, 3], require_index=True)
self.assertIsNotNone(pool)
self.assertIsNotNone(mask)
def test_adaptive_pool3d(self):
program = Program()
with program_guard(program):
x = layers.data(name='x', shape=[3, 244, 224, 224], dtype='float32')
self.assertIsNotNone(
layers.adaptive_pool3d(
x, [3, 3, 3], pool_type='avg'))
pool, mask = layers.adaptive_pool3d(
x, [3, 3, 3], require_index=True)
self.assertIsNotNone(pool)
self.assertIsNotNone(mask)
def test_lstm_unit(self): def test_lstm_unit(self):
program = Program() program = Program()
with program_guard(program): with program_guard(program):
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from __future__ import division
import unittest import unittest
import numpy as np import numpy as np
...@@ -21,16 +22,28 @@ import paddle.fluid.core as core ...@@ -21,16 +22,28 @@ import paddle.fluid.core as core
from op_test import OpTest from op_test import OpTest
def adaptive_start_index(index, input_size, output_size):
return int(np.floor(index * input_size / output_size))
def adaptive_end_index(index, input_size, output_size):
return int(np.ceil((index + 1) * input_size / output_size))
def max_pool2D_forward_naive(x, def max_pool2D_forward_naive(x,
ksize, ksize,
strides, strides,
paddings, paddings,
global_pool=0, global_pool=0,
ceil_mode=False, ceil_mode=False,
exclusive=True): exclusive=True,
adaptive=False):
N, C, H, W = x.shape N, C, H, W = x.shape
if global_pool == 1: if global_pool == 1:
ksize = [H, W] ksize = [H, W]
if adaptive:
H_out, W_out = ksize
else:
H_out = (H - ksize[0] + 2 * paddings[0] + strides[0] - 1 H_out = (H - ksize[0] + 2 * paddings[0] + strides[0] - 1
) // strides[0] + 1 if ceil_mode else ( ) // strides[0] + 1 if ceil_mode else (
H - ksize[0] + 2 * paddings[0]) // strides[0] + 1 H - ksize[0] + 2 * paddings[0]) // strides[0] + 1
...@@ -40,6 +53,12 @@ def max_pool2D_forward_naive(x, ...@@ -40,6 +53,12 @@ def max_pool2D_forward_naive(x,
out = np.zeros((N, C, H_out, W_out)) out = np.zeros((N, C, H_out, W_out))
for i in range(H_out): for i in range(H_out):
for j in range(W_out): for j in range(W_out):
if adaptive:
r_start = adaptive_start_index(i, H, ksize[0])
r_end = adaptive_end_index(i, H, ksize[0])
c_start = adaptive_start_index(j, W, ksize[1])
c_end = adaptive_end_index(j, W, ksize[1])
else:
r_start = np.max((i * strides[0] - paddings[0], 0)) r_start = np.max((i * strides[0] - paddings[0], 0))
r_end = np.min((i * strides[0] + ksize[0] - paddings[0], H)) r_end = np.min((i * strides[0] + ksize[0] - paddings[0], H))
c_start = np.max((j * strides[1] - paddings[1], 0)) c_start = np.max((j * strides[1] - paddings[1], 0))
...@@ -56,10 +75,14 @@ def avg_pool2D_forward_naive(x, ...@@ -56,10 +75,14 @@ def avg_pool2D_forward_naive(x,
paddings, paddings,
global_pool=0, global_pool=0,
ceil_mode=False, ceil_mode=False,
exclusive=True): exclusive=True,
adaptive=False):
N, C, H, W = x.shape N, C, H, W = x.shape
if global_pool == 1: if global_pool == 1:
ksize = [H, W] ksize = [H, W]
if adaptive:
H_out, W_out = ksize
else:
H_out = (H - ksize[0] + 2 * paddings[0] + strides[0] - 1 H_out = (H - ksize[0] + 2 * paddings[0] + strides[0] - 1
) // strides[0] + 1 if ceil_mode else ( ) // strides[0] + 1 if ceil_mode else (
H - ksize[0] + 2 * paddings[0]) // strides[0] + 1 H - ksize[0] + 2 * paddings[0]) // strides[0] + 1
...@@ -69,14 +92,20 @@ def avg_pool2D_forward_naive(x, ...@@ -69,14 +92,20 @@ def avg_pool2D_forward_naive(x,
out = np.zeros((N, C, H_out, W_out)) out = np.zeros((N, C, H_out, W_out))
for i in range(H_out): for i in range(H_out):
for j in range(W_out): for j in range(W_out):
if adaptive:
r_start = adaptive_start_index(i, H, ksize[0])
r_end = adaptive_end_index(i, H, ksize[0])
c_start = adaptive_start_index(j, W, ksize[1])
c_end = adaptive_end_index(j, W, ksize[1])
else:
r_start = np.max((i * strides[0] - paddings[0], 0)) r_start = np.max((i * strides[0] - paddings[0], 0))
r_end = np.min((i * strides[0] + ksize[0] - paddings[0], H)) r_end = np.min((i * strides[0] + ksize[0] - paddings[0], H))
c_start = np.max((j * strides[1] - paddings[1], 0)) c_start = np.max((j * strides[1] - paddings[1], 0))
c_end = np.min((j * strides[1] + ksize[1] - paddings[1], W)) c_end = np.min((j * strides[1] + ksize[1] - paddings[1], W))
x_masked = x[:, :, r_start:r_end, c_start:c_end] x_masked = x[:, :, r_start:r_end, c_start:c_end]
field_size = ((r_end - r_start) * (c_end - c_start)) if exclusive \ field_size = ((r_end - r_start) * (c_end - c_start)) \
else (ksize[0] * ksize[1]) if (exclusive or adaptive) else (ksize[0] * ksize[1])
out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size
return out return out
...@@ -93,12 +122,13 @@ class TestPool2D_Op(OpTest): ...@@ -93,12 +122,13 @@ class TestPool2D_Op(OpTest):
self.init_pool_type() self.init_pool_type()
self.init_ceil_mode() self.init_ceil_mode()
self.init_exclusive() self.init_exclusive()
self.init_adaptive()
if self.global_pool: if self.global_pool:
self.paddings = [0 for _ in range(len(self.paddings))] self.paddings = [0 for _ in range(len(self.paddings))]
input = np.random.random(self.shape).astype(self.dtype) input = np.random.random(self.shape).astype(self.dtype)
output = self.pool2D_forward_naive( output = self.pool2D_forward_naive(
input, self.ksize, self.strides, self.paddings, self.global_pool, input, self.ksize, self.strides, self.paddings, self.global_pool,
self.ceil_mode, self.exclusive).astype(self.dtype) self.ceil_mode, self.exclusive, self.adaptive).astype(self.dtype)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)} self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)}
self.attrs = { self.attrs = {
...@@ -112,7 +142,8 @@ class TestPool2D_Op(OpTest): ...@@ -112,7 +142,8 @@ class TestPool2D_Op(OpTest):
'ceil_mode': self.ceil_mode, 'ceil_mode': self.ceil_mode,
'data_format': 'data_format':
'AnyLayout', # TODO(dzhwinter) : should be fix latter 'AnyLayout', # TODO(dzhwinter) : should be fix latter
'exclusive': self.exclusive 'exclusive': self.exclusive,
'adaptive': self.adaptive
} }
self.outputs = {'Out': output} self.outputs = {'Out': output}
...@@ -159,6 +190,9 @@ class TestPool2D_Op(OpTest): ...@@ -159,6 +190,9 @@ class TestPool2D_Op(OpTest):
def init_exclusive(self): def init_exclusive(self):
self.exclusive = True self.exclusive = True
def init_adaptive(self):
self.adaptive = False
class TestCase1(TestPool2D_Op): class TestCase1(TestPool2D_Op):
def init_test_case(self): def init_test_case(self):
...@@ -315,5 +349,10 @@ class TestCUDNNAvgInclude(TestCase2): ...@@ -315,5 +349,10 @@ class TestCUDNNAvgInclude(TestCase2):
self.exclusive = False self.exclusive = False
class TestAvgPoolAdaptive(TestCase1):
def init_adaptive(self):
self.adaptive = True
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from __future__ import division
import unittest import unittest
import numpy as np import numpy as np
...@@ -21,16 +22,28 @@ import paddle.fluid.core as core ...@@ -21,16 +22,28 @@ import paddle.fluid.core as core
from op_test import OpTest from op_test import OpTest
def adaptive_start_index(index, input_size, output_size):
return int(np.floor(index * input_size / output_size))
def adaptive_end_index(index, input_size, output_size):
return int(np.ceil((index + 1) * input_size / output_size))
def max_pool3D_forward_naive(x, def max_pool3D_forward_naive(x,
ksize, ksize,
strides, strides,
paddings, paddings,
global_pool=0, global_pool=0,
ceil_mode=False, ceil_mode=False,
exclusive=True): exclusive=True,
adaptive=False):
N, C, D, H, W = x.shape N, C, D, H, W = x.shape
if global_pool == 1: if global_pool == 1:
ksize = [D, H, W] ksize = [D, H, W]
if adaptive:
D_out, H_out, W_out = ksize
else:
D_out = (D - ksize[0] + 2 * paddings[0] + strides[0] - 1 D_out = (D - ksize[0] + 2 * paddings[0] + strides[0] - 1
) // strides[0] + 1 if ceil_mode else ( ) // strides[0] + 1 if ceil_mode else (
H - ksize[0] + 2 * paddings[0]) // strides[0] + 1 H - ksize[0] + 2 * paddings[0]) // strides[0] + 1
...@@ -42,14 +55,26 @@ def max_pool3D_forward_naive(x, ...@@ -42,14 +55,26 @@ def max_pool3D_forward_naive(x,
W - ksize[2] + 2 * paddings[2]) // strides[2] + 1 W - ksize[2] + 2 * paddings[2]) // strides[2] + 1
out = np.zeros((N, C, D_out, H_out, W_out)) out = np.zeros((N, C, D_out, H_out, W_out))
for k in range(D_out): for k in range(D_out):
if adaptive:
d_start = adaptive_start_index(k, D, ksize[0])
d_end = adaptive_end_index(k, D, ksize[0])
else:
d_start = np.max((k * strides[0] - paddings[0], 0)) d_start = np.max((k * strides[0] - paddings[0], 0))
d_end = np.min((k * strides[0] + ksize[0] - paddings[0], D)) d_end = np.min((k * strides[0] + ksize[0] - paddings[0], D))
for i in range(H_out): for i in range(H_out):
h_start = np.max((i * strides[0] - paddings[0], 0)) if adaptive:
h_end = np.min((i * strides[0] + ksize[0] - paddings[0], H)) h_start = adaptive_start_index(i, H, ksize[1])
h_end = adaptive_end_index(i, H, ksize[1])
else:
h_start = np.max((i * strides[1] - paddings[1], 0))
h_end = np.min((i * strides[1] + ksize[1] - paddings[1], H))
for j in range(W_out): for j in range(W_out):
w_start = np.max((j * strides[1] - paddings[1], 0)) if adaptive:
w_end = np.min((j * strides[1] + ksize[1] - paddings[1], W)) w_start = adaptive_start_index(j, W, ksize[2])
w_end = adaptive_end_index(j, W, ksize[2])
else:
w_start = np.max((j * strides[2] - paddings[2], 0))
w_end = np.min((j * strides[2] + ksize[2] - paddings[2], W))
x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start:w_end] x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start:w_end]
out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4)) out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4))
...@@ -62,10 +87,14 @@ def avg_pool3D_forward_naive(x, ...@@ -62,10 +87,14 @@ def avg_pool3D_forward_naive(x,
paddings, paddings,
global_pool=0, global_pool=0,
ceil_mode=False, ceil_mode=False,
exclusive=True): exclusive=True,
adaptive=False):
N, C, D, H, W = x.shape N, C, D, H, W = x.shape
if global_pool == 1: if global_pool == 1:
ksize = [D, H, W] ksize = [D, H, W]
if adaptive:
D_out, H_out, W_out = ksize
else:
D_out = (D - ksize[0] + 2 * paddings[0] + strides[0] - 1 D_out = (D - ksize[0] + 2 * paddings[0] + strides[0] - 1
) // strides[0] + 1 if ceil_mode else ( ) // strides[0] + 1 if ceil_mode else (
H - ksize[0] + 2 * paddings[0]) // strides[0] + 1 H - ksize[0] + 2 * paddings[0]) // strides[0] + 1
...@@ -77,18 +106,30 @@ def avg_pool3D_forward_naive(x, ...@@ -77,18 +106,30 @@ def avg_pool3D_forward_naive(x,
W - ksize[2] + 2 * paddings[2]) // strides[2] + 1 W - ksize[2] + 2 * paddings[2]) // strides[2] + 1
out = np.zeros((N, C, D_out, H_out, W_out)) out = np.zeros((N, C, D_out, H_out, W_out))
for k in range(D_out): for k in range(D_out):
if adaptive:
d_start = adaptive_start_index(k, D, ksize[0])
d_end = adaptive_end_index(k, D, ksize[0])
else:
d_start = np.max((k * strides[0] - paddings[0], 0)) d_start = np.max((k * strides[0] - paddings[0], 0))
d_end = np.min((k * strides[0] + ksize[0] - paddings[0], D)) d_end = np.min((k * strides[0] + ksize[0] - paddings[0], D))
for i in range(H_out): for i in range(H_out):
h_start = np.max((i * strides[0] - paddings[0], 0)) if adaptive:
h_end = np.min((i * strides[0] + ksize[0] - paddings[0], H)) h_start = adaptive_start_index(i, H, ksize[1])
h_end = adaptive_end_index(i, H, ksize[1])
else:
h_start = np.max((i * strides[1] - paddings[1], 0))
h_end = np.min((i * strides[1] + ksize[1] - paddings[1], H))
for j in range(W_out): for j in range(W_out):
w_start = np.max((j * strides[1] - paddings[1], 0)) if adaptive:
w_end = np.min((j * strides[1] + ksize[1] - paddings[1], W)) w_start = adaptive_start_index(j, W, ksize[2])
w_end = adaptive_end_index(j, W, ksize[2])
else:
w_start = np.max((j * strides[2] - paddings[2], 0))
w_end = np.min((j * strides[2] + ksize[2] - paddings[2], W))
x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start:w_end] x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start:w_end]
field_size = (d_end - d_start) * (h_end - h_start) * (w_end - w_start) \ field_size = (d_end - d_start) * (h_end - h_start) * (w_end - w_start) \
if exclusive else ksize[0] * ksize[1] * ksize[2] if (exclusive or adaptive) else ksize[0] * ksize[1] * ksize[2]
out[:, :, k, i, j] = np.sum(x_masked, axis=(2, 3, out[:, :, k, i, j] = np.sum(x_masked, axis=(2, 3,
4)) / field_size 4)) / field_size
return out return out
...@@ -105,13 +146,14 @@ class TestPool3d_Op(OpTest): ...@@ -105,13 +146,14 @@ class TestPool3d_Op(OpTest):
self.init_pool_type() self.init_pool_type()
self.init_ceil_mode() self.init_ceil_mode()
self.init_exclusive() self.init_exclusive()
self.init_adaptive()
if self.global_pool: if self.global_pool:
self.paddings = [0 for _ in range(len(self.paddings))] self.paddings = [0 for _ in range(len(self.paddings))]
input = np.random.random(self.shape).astype(self.dtype) input = np.random.random(self.shape).astype(self.dtype)
output = self.pool3D_forward_naive( output = self.pool3D_forward_naive(
input, self.ksize, self.strides, self.paddings, self.global_pool, input, self.ksize, self.strides, self.paddings, self.global_pool,
self.ceil_mode, self.exclusive).astype(self.dtype) self.ceil_mode, self.exclusive, self.adaptive).astype(self.dtype)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)} self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(input)}
self.attrs = { self.attrs = {
...@@ -124,7 +166,8 @@ class TestPool3d_Op(OpTest): ...@@ -124,7 +166,8 @@ class TestPool3d_Op(OpTest):
'ceil_mode': self.ceil_mode, 'ceil_mode': self.ceil_mode,
'data_format': 'data_format':
'AnyLayout', # TODO(dzhwinter) : should be fix latter 'AnyLayout', # TODO(dzhwinter) : should be fix latter
'exclusive': self.exclusive 'exclusive': self.exclusive,
'adaptive': self.adaptive
} }
self.outputs = {'Out': output} self.outputs = {'Out': output}
...@@ -171,6 +214,9 @@ class TestPool3d_Op(OpTest): ...@@ -171,6 +214,9 @@ class TestPool3d_Op(OpTest):
def init_exclusive(self): def init_exclusive(self):
self.exclusive = True self.exclusive = True
def init_adaptive(self):
self.adaptive = False
class TestCase1(TestPool3d_Op): class TestCase1(TestPool3d_Op):
def init_test_case(self): def init_test_case(self):
...@@ -353,5 +399,10 @@ class TestCUDNNAvgInclude(TestCUDNNCase3): ...@@ -353,5 +399,10 @@ class TestCUDNNAvgInclude(TestCUDNNCase3):
self.exclusive = False self.exclusive = False
class TestAvgPoolAdaptive(TestCase1):
def init_adaptive(self):
self.adaptive = True
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -13,33 +13,62 @@ ...@@ -13,33 +13,62 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from __future__ import division
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=False): def adaptive_start_index(index, input_size, output_size):
return int(np.floor(index * input_size / output_size))
def adaptive_end_index(index, input_size, output_size):
return int(np.ceil((index + 1) * input_size / output_size))
def max_pool3D_forward_naive(x,
ksize,
strides,
paddings,
global_pool=False,
adaptive=False):
N, C, D, H, W = x.shape N, C, D, H, W = x.shape
if global_pool: if global_pool:
ksize = [D, H, W] ksize = [D, H, W]
paddings = [0, 0, 0] paddings = [0, 0, 0]
if adaptive:
D_out, H_out, W_out = ksize
else:
D_out = (D - ksize[0] + 2 * paddings[0]) // strides[0] + 1 D_out = (D - ksize[0] + 2 * paddings[0]) // strides[0] + 1
H_out = (H - ksize[1] + 2 * paddings[1]) // strides[1] + 1 H_out = (H - ksize[1] + 2 * paddings[1]) // strides[1] + 1
W_out = (W - ksize[2] + 2 * paddings[2]) // strides[2] + 1 W_out = (W - ksize[2] + 2 * paddings[2]) // strides[2] + 1
out = np.zeros((N, C, D_out, H_out, W_out)) out = np.zeros((N, C, D_out, H_out, W_out))
mask = np.zeros((N, C, D_out, H_out, W_out)) mask = np.zeros((N, C, D_out, H_out, W_out))
for k in range(D_out): for k in range(D_out):
if adaptive:
d_start = adaptive_start_index(k, D, ksize[0])
d_end = adaptive_end_index(k, D, ksize[0])
else:
d_start = np.max((k * strides[0] - paddings[0], 0)) d_start = np.max((k * strides[0] - paddings[0], 0))
d_end = np.min((k * strides[0] + ksize[0] - paddings[0], D)) d_end = np.min((k * strides[0] + ksize[0] - paddings[0], D))
for i in range(H_out): for i in range(H_out):
h_start = np.max((i * strides[0] - paddings[0], 0)) if adaptive:
h_end = np.min((i * strides[0] + ksize[0] - paddings[0], H)) h_start = adaptive_start_index(i, H, ksize[1])
h_end = adaptive_end_index(i, H, ksize[1])
else:
h_start = np.max((i * strides[1] - paddings[1], 0))
h_end = np.min((i * strides[1] + ksize[1] - paddings[1], H))
for j in range(W_out): for j in range(W_out):
w_start = np.max((j * strides[1] - paddings[1], 0)) if adaptive:
w_end = np.min((j * strides[1] + ksize[1] - paddings[1], W)) w_start = adaptive_start_index(j, W, ksize[2])
w_end = adaptive_end_index(j, W, ksize[2])
else:
w_start = np.max((j * strides[2] - paddings[2], 0))
w_end = np.min((j * strides[2] + ksize[2] - paddings[2], W))
x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start:w_end] x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start:w_end]
out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4)) out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4))
...@@ -58,19 +87,33 @@ def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=False): ...@@ -58,19 +87,33 @@ def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=False):
return out, mask return out, mask
def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=False): def max_pool2D_forward_naive(x,
ksize,
strides,
paddings,
global_pool=False,
adaptive=False):
N, C, H, W = x.shape N, C, H, W = x.shape
if global_pool: if global_pool:
ksize = [H, W] ksize = [H, W]
paddings = [0, 0] paddings = [0, 0]
if adaptive:
H_out, W_out = ksize
else:
H_out = (H - ksize[0] + 2 * paddings[0]) // strides[0] + 1 H_out = (H - ksize[0] + 2 * paddings[0]) // strides[0] + 1
W_out = (W - ksize[1] + 2 * paddings[1]) // strides[1] + 1 W_out = (W - ksize[1] + 2 * paddings[1]) // strides[1] + 1
out = np.zeros((N, C, H_out, W_out)) out = np.zeros((N, C, H_out, W_out))
mask = np.zeros((N, C, H_out, W_out)) mask = np.zeros((N, C, H_out, W_out))
for i in range(H_out): for i in range(H_out):
for j in range(W_out): for j in range(W_out):
if adaptive:
r_start = adaptive_start_index(i, H, ksize[0])
r_end = adaptive_end_index(i, H, ksize[0])
c_start = adaptive_start_index(j, W, ksize[1])
c_end = adaptive_end_index(j, W, ksize[1])
else:
r_start = np.max((i * strides[0] - paddings[0], 0)) r_start = np.max((i * strides[0] - paddings[0], 0))
r_end = np.min((i * strides[0] + ksize[0] - paddings[0], H)) r_end = np.min((i * strides[0] + ksize[0] - paddings[0], H))
c_start = np.max((j * strides[1] - paddings[1], 0)) c_start = np.max((j * strides[1] - paddings[1], 0))
...@@ -95,10 +138,12 @@ class TestMaxPoolWithIndex_Op(OpTest): ...@@ -95,10 +138,12 @@ class TestMaxPoolWithIndex_Op(OpTest):
def setUp(self): def setUp(self):
self.init_test_case() self.init_test_case()
self.init_global() self.init_global()
self.init_adaptive()
input = np.random.random(self.shape).astype("float32") input = np.random.random(self.shape).astype("float32")
output, mask = self.pool_forward_naive(input, self.ksize, self.strides, output, mask = self.pool_forward_naive(input, self.ksize, self.strides,
self.paddings, self.global_pool) self.paddings, self.global_pool,
self.adaptive)
output = output.astype("float32") output = output.astype("float32")
mask = mask.astype("int32") mask = mask.astype("int32")
...@@ -107,6 +152,7 @@ class TestMaxPoolWithIndex_Op(OpTest): ...@@ -107,6 +152,7 @@ class TestMaxPoolWithIndex_Op(OpTest):
'paddings': self.paddings, 'paddings': self.paddings,
'ksize': self.ksize, 'ksize': self.ksize,
'global_pooling': self.global_pool, 'global_pooling': self.global_pool,
'adaptive': self.adaptive,
} }
self.inputs = {'X': input} self.inputs = {'X': input}
...@@ -129,6 +175,9 @@ class TestMaxPoolWithIndex_Op(OpTest): ...@@ -129,6 +175,9 @@ class TestMaxPoolWithIndex_Op(OpTest):
def init_global(self): def init_global(self):
self.global_pool = False self.global_pool = False
def init_adaptive(self):
self.adaptive = False
class TestCase1(TestMaxPoolWithIndex_Op): class TestCase1(TestMaxPoolWithIndex_Op):
def init_global(self): def init_global(self):
...@@ -190,5 +239,15 @@ class TestCase7(TestCase6): ...@@ -190,5 +239,15 @@ class TestCase7(TestCase6):
self.global_pool = False self.global_pool = False
class TestCastAdaptive2d(TestCase6):
def init_adaptive(self):
self.adaptive = True
class TestCastAdaptive3d(TestMaxPoolWithIndex_Op):
def init_adaptive(self):
self.adaptive = True
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册