提交 eab47459 编写于 作者: D dengkaipeng

add adaptive mode for pool.

上级 1213e283
...@@ -19,6 +19,16 @@ namespace paddle { ...@@ -19,6 +19,16 @@ namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
static inline int ADAPT_START_INDEX(int ph, int input_size, int output_size) {
return static_cast<int>(
floor(static_cast<float>(ph * input_size) / output_size));
}
static inline int ADAPT_END_INDEX(int ph, int input_size, int output_size) {
return static_cast<int>(
ceil(static_cast<float>((ph + 1) * input_size) / output_size));
}
/* /*
* All tensors are in NCHW format. * All tensors are in NCHW format.
* Ksize, strides, paddings are two elements. These two elements represent * Ksize, strides, paddings are two elements. These two elements represent
...@@ -31,7 +41,7 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -31,7 +41,7 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const framework::Tensor& input, const std::vector<int>& ksize, const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_process, const std::vector<int>& paddings, PoolProcess pool_process,
bool exclusive, framework::Tensor* output) { bool exclusive, bool adaptive, framework::Tensor* output) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
...@@ -54,13 +64,23 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -54,13 +64,23 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
for (int ph = 0; ph < output_height; ++ph) { for (int ph = 0; ph < output_height; ++ph) {
if (adaptive) {
int hstart = ADAPT_START_INDEX(ph, input_height, output_height);
int hend = ADAPT_END_INDEX(ph, input_height, output_height);
} else {
int hstart = ph * stride_height - padding_height; int hstart = ph * stride_height - padding_height;
int hend = std::min(hstart + ksize_height, input_height); int hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) { for (int pw = 0; pw < output_width; ++pw) {
if (adaptive) {
int wstart = ADAPT_START_INDEX(pw, input_width, output_width);
int wend = ADAPT_END_INDEX(pw, input_width, output_width);
} else {
int wstart = pw * stride_width - padding_width; int wstart = pw * stride_width - padding_width;
int wend = std::min(wstart + ksize_width, input_width); int wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
}
T ele = pool_process.initial(); T ele = pool_process.initial();
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
...@@ -68,7 +88,8 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -68,7 +88,8 @@ class Pool2dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
pool_process.compute(input_data[h * input_width + w], &ele); pool_process.compute(input_data[h * input_width + w], &ele);
} }
} }
int pool_size = exclusive ? (hend - hstart) * (wend - wstart) int pool_size = (exclusive || adaptive)
? (hend - hstart) * (wend - wstart)
: ksize_height * ksize_width; : ksize_height * ksize_width;
pool_process.finalize(static_cast<T>(pool_size), &ele); pool_process.finalize(static_cast<T>(pool_size), &ele);
output_data[ph * output_width + pw] = ele; output_data[ph * output_width + pw] = ele;
...@@ -94,7 +115,7 @@ class Pool2dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -94,7 +115,7 @@ class Pool2dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const framework::Tensor& output, const framework::Tensor& output_grad, const framework::Tensor& output, const framework::Tensor& output_grad,
const std::vector<int>& ksize, const std::vector<int>& strides, const std::vector<int>& ksize, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_grad_process, const std::vector<int>& paddings, PoolProcess pool_grad_process,
bool exclusive, framework::Tensor* input_grad) { bool exclusive, bool adaptive, framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
...@@ -118,14 +139,25 @@ class Pool2dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -118,14 +139,25 @@ class Pool2dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
for (int ph = 0; ph < output_height; ++ph) { for (int ph = 0; ph < output_height; ++ph) {
if (adaptive) {
int hstart = ADAPT_START_INDEX(ph, input_height, output_height);
int hend = ADAPT_END_INDEX(ph, input_height, output_height);
} else {
int hstart = ph * stride_height - padding_height; int hstart = ph * stride_height - padding_height;
int hend = std::min(hstart + ksize_height, input_height); int hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) { for (int pw = 0; pw < output_width; ++pw) {
if (adaptive) {
int wstart = ADAPT_START_INDEX(pw, input_width, output_width);
int wend = ADAPT_END_INDEX(pw, input_width, output_width);
} else {
int wstart = pw * stride_width - padding_width; int wstart = pw * stride_width - padding_width;
int wend = std::min(wstart + ksize_width, input_width); int wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
int pool_size = exclusive ? (hend - hstart) * (wend - wstart) }
int pool_size = (exclusive || adaptive)
? (hend - hstart) * (wend - wstart)
: ksize_height * ksize_width; : ksize_height * ksize_width;
float scale = 1.0 / pool_size; float scale = 1.0 / pool_size;
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
...@@ -251,7 +283,7 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -251,7 +283,7 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const framework::Tensor& input, const std::vector<int>& ksize, const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_process, const std::vector<int>& paddings, PoolProcess pool_process,
bool exclusive, framework::Tensor* output) { bool exclusive, bool adaptive, framework::Tensor* output) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
...@@ -279,17 +311,32 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -279,17 +311,32 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
for (int pd = 0; pd < output_depth; ++pd) { for (int pd = 0; pd < output_depth; ++pd) {
if (adaptive) {
int dstart = ADAPT_START_INDEX(pd, input_depth, output_depth);
int dend = ADAPT_END_INDEX(pd, input_depth, output_depth);
} else {
int dstart = pd * stride_depth - padding_depth; int dstart = pd * stride_depth - padding_depth;
int dend = std::min(dstart + ksize_depth, input_depth); int dend = std::min(dstart + ksize_depth, input_depth);
dstart = std::max(dstart, 0); dstart = std::max(dstart, 0);
}
for (int ph = 0; ph < output_height; ++ph) { for (int ph = 0; ph < output_height; ++ph) {
if (adaptive) {
int hstart = ADAPT_START_INDEX(ph, input_height, output_height);
int hend = ADAPT_END_INDEX(ph, input_height, output_height);
} else {
int hstart = ph * stride_height - padding_height; int hstart = ph * stride_height - padding_height;
int hend = std::min(hstart + ksize_height, input_height); int hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) { for (int pw = 0; pw < output_width; ++pw) {
if (adaptive) {
int wstart = ADAPT_START_INDEX(pw, input_width, output_width);
int wend = ADAPT_END_INDEX(pw, input_width, output_width);
} else {
int wstart = pw * stride_width - padding_width; int wstart = pw * stride_width - padding_width;
int wend = std::min(wstart + ksize_width, input_width); int wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
}
int output_idx = (pd * output_height + ph) * output_width + pw; int output_idx = (pd * output_height + ph) * output_width + pw;
T ele = pool_process.initial(); T ele = pool_process.initial();
for (int d = dstart; d < dend; ++d) { for (int d = dstart; d < dend; ++d) {
...@@ -302,7 +349,7 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -302,7 +349,7 @@ class Pool3dFunctor<platform::CPUDeviceContext, PoolProcess, T> {
} }
} }
int pool_size = int pool_size =
exclusive (exclusive || adaptive)
? (dend - dstart) * (hend - hstart) * (wend - wstart) ? (dend - dstart) * (hend - hstart) * (wend - wstart)
: ksize_depth * ksize_height * ksize_width; : ksize_depth * ksize_height * ksize_width;
pool_process.finalize(static_cast<T>(pool_size), &ele); pool_process.finalize(static_cast<T>(pool_size), &ele);
...@@ -330,7 +377,7 @@ class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -330,7 +377,7 @@ class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
const framework::Tensor& output, const framework::Tensor& output_grad, const framework::Tensor& output, const framework::Tensor& output_grad,
const std::vector<int>& ksize, const std::vector<int>& strides, const std::vector<int>& ksize, const std::vector<int>& strides,
const std::vector<int>& paddings, PoolProcess pool_grad_process, const std::vector<int>& paddings, PoolProcess pool_grad_process,
bool exclusive, framework::Tensor* input_grad) { bool exclusive, bool adaptive, framework::Tensor* input_grad) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_depth = input.dims()[2]; const int input_depth = input.dims()[2];
const int input_height = input.dims()[3]; const int input_height = input.dims()[3];
...@@ -359,21 +406,35 @@ class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> { ...@@ -359,21 +406,35 @@ class Pool3dGradFunctor<platform::CPUDeviceContext, PoolProcess, T> {
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
for (int pd = 0; pd < output_depth; ++pd) { for (int pd = 0; pd < output_depth; ++pd) {
if (adaptive) {
int dstart = ADAPT_START_INDEX(pd, input_depth, output_depth);
int dend = ADAPT_END_INDEX(pd, input_depth, output_depth);
} else {
int dstart = pd * stride_depth - padding_depth; int dstart = pd * stride_depth - padding_depth;
int dend = std::min(dstart + ksize_depth, input_depth); int dend = std::min(dstart + ksize_depth, input_depth);
dstart = std::max(dstart, 0); dstart = std::max(dstart, 0);
}
for (int ph = 0; ph < output_height; ++ph) { for (int ph = 0; ph < output_height; ++ph) {
if (adaptive) {
int hstart = ADAPT_START_INDEX(ph, input_height, output_height);
int hend = ADAPT_END_INDEX(ph, input_height, output_height);
} else {
int hstart = ph * stride_height - padding_height; int hstart = ph * stride_height - padding_height;
int hend = std::min(hstart + ksize_height, input_height); int hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) { for (int pw = 0; pw < output_width; ++pw) {
if (adaptive) {
int wstart = ADAPT_START_INDEX(pw, input_width, output_width);
int wend = ADAPT_END_INDEX(pw, input_width, output_width);
} else {
int wstart = pw * stride_width - padding_width; int wstart = pw * stride_width - padding_width;
int wend = std::min(wstart + ksize_width, input_width); int wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
}
int pool_size = int pool_size =
exclusive (exclusive || adaptive)
? (dend - dstart) * (hend - hstart) * (wend - wstart) ? (dend - dstart) * (hend - hstart) * (wend - wstart)
: ksize_depth * ksize_height * ksize_width; : ksize_depth * ksize_height * ksize_width;
float scale = 1.0 / pool_size; float scale = 1.0 / pool_size;
...@@ -517,8 +578,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> { ...@@ -517,8 +578,8 @@ class MaxPool2dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
void operator()(const platform::CPUDeviceContext& context, void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input, const std::vector<int>& ksize, const framework::Tensor& input, const std::vector<int>& ksize,
const std::vector<int>& strides, const std::vector<int>& strides,
const std::vector<int>& paddings, framework::Tensor* output, const std::vector<int>& paddings, bool adaptive,
framework::Tensor* mask) { framework::Tensor* output, framework::Tensor* mask) {
const int batch_size = input.dims()[0]; const int batch_size = input.dims()[0];
const int input_height = input.dims()[2]; const int input_height = input.dims()[2];
const int input_width = input.dims()[3]; const int input_width = input.dims()[3];
...@@ -541,13 +602,23 @@ class MaxPool2dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> { ...@@ -541,13 +602,23 @@ class MaxPool2dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
for (int ph = 0; ph < output_height; ++ph) { for (int ph = 0; ph < output_height; ++ph) {
if (adaptive) {
int hstart = ADAPT_START_INDEX(ph, input_height, output_height);
int hend = ADAPT_END_INDEX(ph, input_height, output_height);
} else {
int hstart = ph * stride_height - padding_height; int hstart = ph * stride_height - padding_height;
int hend = std::min(hstart + ksize_height, input_height); int hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) { for (int pw = 0; pw < output_width; ++pw) {
if (adaptive) {
int wstart = ADAPT_START_INDEX(pw, input_width, output_width);
int wend = ADAPT_END_INDEX(pw, input_width, output_width);
} else {
int wstart = pw * stride_width - padding_width; int wstart = pw * stride_width - padding_width;
int wend = std::min(wstart + ksize_width, input_width); int wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
}
T1 ele = static_cast<T1>(-FLT_MAX); T1 ele = static_cast<T1>(-FLT_MAX);
int index = -1; int index = -1;
...@@ -666,17 +737,32 @@ class MaxPool3dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> { ...@@ -666,17 +737,32 @@ class MaxPool3dWithIndexFunctor<platform::CPUDeviceContext, T1, T2> {
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
for (int pd = 0; pd < output_depth; ++pd) { for (int pd = 0; pd < output_depth; ++pd) {
if (adaptive) {
int dstart = ADAPT_START_INDEX(pd, input_depth, output_depth);
int dend = ADAPT_END_INDEX(pd, input_depth, output_depth);
} else {
int dstart = pd * stride_depth - padding_depth; int dstart = pd * stride_depth - padding_depth;
int dend = std::min(dstart + ksize_depth, input_depth); int dend = std::min(dstart + ksize_depth, input_depth);
dstart = std::max(dstart, 0); dstart = std::max(dstart, 0);
}
for (int ph = 0; ph < output_height; ++ph) { for (int ph = 0; ph < output_height; ++ph) {
if (adaptive) {
int hstart = ADAPT_START_INDEX(ph, input_height, output_height);
int hend = ADAPT_END_INDEX(ph, input_height, output_height);
} else {
int hstart = ph * stride_height - padding_height; int hstart = ph * stride_height - padding_height;
int hend = std::min(hstart + ksize_height, input_height); int hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0); hstart = std::max(hstart, 0);
}
for (int pw = 0; pw < output_width; ++pw) { for (int pw = 0; pw < output_width; ++pw) {
if (adaptive) {
int wstart = ADAPT_START_INDEX(pw, input_width, output_width);
int wend = ADAPT_END_INDEX(pw, input_width, output_width);
} else {
int wstart = pw * stride_width - padding_width; int wstart = pw * stride_width - padding_width;
int wend = std::min(wstart + ksize_width, input_width); int wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
}
int output_idx = (pd * output_height + ph) * output_width + pw; int output_idx = (pd * output_height + ph) * output_width + pw;
T1 ele = static_cast<T1>(-FLT_MAX); T1 ele = static_cast<T1>(-FLT_MAX);
......
...@@ -29,7 +29,7 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, ...@@ -29,7 +29,7 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
const int ksize_width, const int stride_height, const int ksize_width, const int stride_height,
const int stride_width, const int padding_height, const int stride_width, const int padding_height,
const int padding_width, PoolProcess pool_process, const int padding_width, PoolProcess pool_process,
bool exclusive, T* output_data) { bool exclusive, bool adaptive, T* output_data) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads;
index += blockDim.x * gridDim.x) { index += blockDim.x * gridDim.x) {
int pw = index % output_width; int pw = index % output_width;
...@@ -37,6 +37,13 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, ...@@ -37,6 +37,13 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
int c = (index / output_width / output_height) % channels; int c = (index / output_width / output_height) % channels;
int batch_idx = index / output_width / output_height / channels; int batch_idx = index / output_width / output_height / channels;
if (adaptive) {
int hstart = ADAPT_START_INDEX(ph, input_height, output_height);
int hend = ADAPT_END_INDEX(ph, input_height, output_height);
int wstart = ADAPT_START_INDEX(pw, input_width, output_width);
int wend = ADAPT_END_INDEX(pw, input_width, output_width);
} else {
int hstart = ph * stride_height - padding_height; int hstart = ph * stride_height - padding_height;
int hend = min(hstart + ksize_height, input_height); int hend = min(hstart + ksize_height, input_height);
hstart = max(hstart, 0); hstart = max(hstart, 0);
...@@ -44,6 +51,7 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, ...@@ -44,6 +51,7 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
int wstart = pw * stride_width - padding_width; int wstart = pw * stride_width - padding_width;
int wend = min(wstart + ksize_width, input_width); int wend = min(wstart + ksize_width, input_width);
wstart = max(wstart, 0); wstart = max(wstart, 0);
}
input_data += (batch_idx * channels + c) * input_height * input_width; input_data += (batch_idx * channels + c) * input_height * input_width;
T ele = pool_process.initial(); T ele = pool_process.initial();
...@@ -52,7 +60,7 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, ...@@ -52,7 +60,7 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data,
pool_process.compute(input_data[h * input_width + w], &ele); pool_process.compute(input_data[h * input_width + w], &ele);
} }
} }
int pool_size = exclusive ? (hend - hstart) * (wend - wstart) int pool_size = (exclusive || adaptive) ? (hend - hstart) * (wend - wstart)
: ksize_height * ksize_width; : ksize_height * ksize_width;
pool_process.finalize(static_cast<T>(pool_size), &ele); pool_process.finalize(static_cast<T>(pool_size), &ele);
output_data[index] = ele; output_data[index] = ele;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册