diff --git a/paddle/fluid/operators/math/pooling.cc b/paddle/fluid/operators/math/pooling.cc index 97a2e81c84c060a8be57db6274839ee39edf466c..b871851798e48e6b598cb4ab8e2e42db478a3820 100644 --- a/paddle/fluid/operators/math/pooling.cc +++ b/paddle/fluid/operators/math/pooling.cc @@ -11,8 +11,9 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #include "paddle/fluid/operators/math/pooling.h" +#include +#include namespace paddle { namespace operators { @@ -27,9 +28,10 @@ template class Pool2dFunctor { public: void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, std::vector& ksize, - std::vector& strides, std::vector& paddings, - PoolProcess pool_process, framework::Tensor* output) { + const framework::Tensor& input, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, PoolProcess pool_process, + framework::Tensor* output) { const int batch_size = input.dims()[0]; const int input_height = input.dims()[2]; const int input_width = input.dims()[3]; @@ -63,11 +65,11 @@ class Pool2dFunctor { T ele = pool_process.initial(); for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { - pool_process.compute(ele, input_data[h * input_width + w]); + pool_process.compute(input_data[h * input_width + w], &ele); } } int pool_size = (hend - hstart) * (wend - wstart); - pool_process.finalize(ele, (static_cast(pool_size))); + pool_process.finalize(static_cast(pool_size), &ele); output_data[ph * output_width + pw] = ele; } } @@ -86,13 +88,12 @@ class Pool2dFunctor { template class Pool2dGradFunctor { public: - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, - const framework::Tensor& output, - const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings, - PoolProcess pool_grad_process, - framework::Tensor* input_grad) { + void operator()( + const platform::CPUDeviceContext& context, const framework::Tensor& input, + const framework::Tensor& output, const framework::Tensor& output_grad, + const std::vector& ksize, const std::vector& strides, + const std::vector& paddings, PoolProcess pool_grad_process, + framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_height = input.dims()[2]; const int input_width = input.dims()[3]; @@ -131,8 +132,8 @@ class Pool2dGradFunctor { input_data[h * input_width + w], output_data[ph * output_width + pw], output_grad_data[ph * output_width + pw], - input_grad_data[h * input_width + w], - static_cast(scale)); + static_cast(scale), + input_grad_data + h * input_width + w); } } } @@ -154,12 +155,11 @@ class Pool2dGradFunctor { template class MaxPool2dGradFunctor { public: - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, - const framework::Tensor& output, - const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings, - framework::Tensor* input_grad) { + void operator()( + const platform::CPUDeviceContext& context, const framework::Tensor& input, + const framework::Tensor& output, const framework::Tensor& output_grad, + const std::vector& ksize, const std::vector& strides, + const std::vector& paddings, framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_height = input.dims()[2]; const int input_width = input.dims()[3]; @@ -246,9 +246,10 @@ template class Pool3dFunctor { public: void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, std::vector& ksize, - std::vector& strides, std::vector& paddings, - PoolProcess pool_process, framework::Tensor* output) { + const framework::Tensor& input, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, PoolProcess pool_process, + framework::Tensor* output) { const int batch_size = input.dims()[0]; const int input_depth = input.dims()[2]; const int input_height = input.dims()[3]; @@ -293,14 +294,14 @@ class Pool3dFunctor { for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { pool_process.compute( - ele, - input_data[(d * input_height + h) * input_width + w]); + input_data[(d * input_height + h) * input_width + w], + &ele); } } } int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart); - pool_process.finalize(ele, static_cast(pool_size)); + pool_process.finalize(static_cast(pool_size), &ele); output_data[output_idx] = ele; } } @@ -320,13 +321,12 @@ class Pool3dFunctor { template class Pool3dGradFunctor { public: - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, - const framework::Tensor& output, - const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings, - PoolProcess pool_grad_process, - framework::Tensor* input_grad) { + void operator()( + const platform::CPUDeviceContext& context, const framework::Tensor& input, + const framework::Tensor& output, const framework::Tensor& output_grad, + const std::vector& ksize, const std::vector& strides, + const std::vector& paddings, PoolProcess pool_grad_process, + framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_depth = input.dims()[2]; const int input_height = input.dims()[3]; @@ -379,8 +379,8 @@ class Pool3dGradFunctor { (pd * output_height + ph) * output_width + pw; pool_grad_process.compute( input_data[input_idx], output_data[output_idx], - output_grad_data[output_idx], - input_grad_data[input_idx], static_cast(scale)); + output_grad_data[output_idx], static_cast(scale), + input_grad_data + input_idx); } } } @@ -404,12 +404,11 @@ class Pool3dGradFunctor { template class MaxPool3dGradFunctor { public: - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, - const framework::Tensor& output, - const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings, - framework::Tensor* input_grad) { + void operator()( + const platform::CPUDeviceContext& context, const framework::Tensor& input, + const framework::Tensor& output, const framework::Tensor& output_grad, + const std::vector& ksize, const std::vector& strides, + const std::vector& paddings, framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_depth = input.dims()[2]; const int input_height = input.dims()[3]; @@ -510,9 +509,10 @@ template class MaxPool2dWithIndexFunctor { public: void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, std::vector& ksize, - std::vector& strides, std::vector& paddings, - framework::Tensor* output, framework::Tensor* mask) { + const framework::Tensor& input, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* output, + framework::Tensor* mask) { const int batch_size = input.dims()[0]; const int input_height = input.dims()[2]; const int input_width = input.dims()[3]; @@ -576,8 +576,9 @@ class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::CPUDeviceContext& context, const framework::Tensor& output_grad, - const framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings, + const framework::Tensor& mask, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* input_grad) { const int batch_size = input_grad->dims()[0]; const int input_height = input_grad->dims()[2]; @@ -628,9 +629,10 @@ template class MaxPool3dWithIndexFunctor { public: void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, std::vector& ksize, - std::vector& strides, std::vector& paddings, - framework::Tensor* output, framework::Tensor* mask) { + const framework::Tensor& input, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* output, + framework::Tensor* mask) { const int batch_size = input.dims()[0]; const int input_depth = input.dims()[2]; const int input_height = input.dims()[3]; @@ -708,8 +710,9 @@ class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::CPUDeviceContext& context, const framework::Tensor& output_grad, - const framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings, + const framework::Tensor& mask, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* input_grad) { const int batch_size = input_grad->dims()[0]; const int input_depth = input_grad->dims()[2]; diff --git a/paddle/fluid/operators/math/pooling.cu b/paddle/fluid/operators/math/pooling.cu index 267f8c409df301f9b1a8c68f337473198cf827f4..b1c76350d1724629bae175abf47e6671a1532242 100644 --- a/paddle/fluid/operators/math/pooling.cu +++ b/paddle/fluid/operators/math/pooling.cu @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include +#include #include "paddle/fluid/operators/math/pooling.h" #include "paddle/fluid/platform/cuda_primitives.h" @@ -47,11 +49,11 @@ __global__ void KernelPool2D(const int nthreads, const T* input_data, T ele = pool_process.initial(); for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { - pool_process.compute(ele, input_data[h * input_width + w]); + pool_process.compute(input_data[h * input_width + w], &ele); } } int pool_size = (hend - hstart) * (wend - wstart); - pool_process.finalize(ele, (static_cast(pool_size))); + pool_process.finalize(static_cast(pool_size), &ele); output_data[index] = ele; } } @@ -96,8 +98,8 @@ __global__ void KernelPool2DGrad( int pool_size = (hend - hstart) * (wend - wstart); int output_sub_idx = ph * output_width + pw; pool_process.compute(input, output_data[output_sub_idx], - output_grad[output_sub_idx], gradient, - static_cast(1.0 / pool_size)); + output_grad[output_sub_idx], + static_cast(1.0 / pool_size), &gradient); } } input_grad[index] = gradient; @@ -158,9 +160,10 @@ template class Pool2dFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& input, std::vector& ksize, - std::vector& strides, std::vector& paddings, - PoolProcess pool_process, framework::Tensor* output) { + const framework::Tensor& input, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, PoolProcess pool_process, + framework::Tensor* output) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; const int input_height = input.dims()[2]; @@ -201,9 +204,11 @@ class Pool2dGradFunctor { void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& input, const framework::Tensor& output, - const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings, - PoolProcess pool_process, framework::Tensor* input_grad) { + const framework::Tensor& output_grad, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, PoolProcess pool_process, + framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; const int input_height = input.dims()[2]; @@ -246,8 +251,10 @@ class MaxPool2dGradFunctor { void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& input, const framework::Tensor& output, - const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings, + const framework::Tensor& output_grad, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; @@ -340,12 +347,12 @@ __global__ void KernelPool3D(const int nthreads, const T* input_data, for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { pool_process.compute( - ele, input_data[(d * input_height + h) * input_width + w]); + input_data[(d * input_height + h) * input_width + w], &ele); } } } int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart); - pool_process.finalize(ele, static_cast(pool_size)); + pool_process.finalize(static_cast(pool_size), &ele); output_data[index] = ele; } } @@ -405,8 +412,8 @@ __global__ void KernelPool3DGrad( int pool_size = (dend - dstart) * (hend - hstart) * (wend - wstart); int output_sub_idx = (pd * output_height + ph) * output_width + pw; pool_process.compute(input, output_data[output_sub_idx], - output_grad[output_sub_idx], gradient, - static_cast(1.0 / pool_size)); + output_grad[output_sub_idx], + static_cast(1.0 / pool_size), &gradient); } } } @@ -474,9 +481,10 @@ template class Pool3dFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& input, std::vector& ksize, - std::vector& strides, std::vector& paddings, - PoolProcess pool_process, framework::Tensor* output) { + const framework::Tensor& input, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, PoolProcess pool_process, + framework::Tensor* output) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; const int input_depth = input.dims()[2]; @@ -525,9 +533,11 @@ class Pool3dGradFunctor { void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& input, const framework::Tensor& output, - const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings, - PoolProcess pool_process, framework::Tensor* input_grad) { + const framework::Tensor& output_grad, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, PoolProcess pool_process, + framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; const int input_depth = input.dims()[2]; @@ -578,8 +588,10 @@ class MaxPool3dGradFunctor { void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& input, const framework::Tensor& output, - const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings, + const framework::Tensor& output_grad, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* input_grad) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; @@ -736,9 +748,10 @@ template class MaxPool2dWithIndexFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& input, std::vector& ksize, - std::vector& strides, std::vector& paddings, - framework::Tensor* output, framework::Tensor* mask) { + const framework::Tensor& input, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* output, + framework::Tensor* mask) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; const int input_height = input.dims()[2]; @@ -779,8 +792,9 @@ class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& output_grad, - const framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings, + const framework::Tensor& mask, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* input_grad) { const int batch_size = input_grad->dims()[0]; const int input_channels = input_grad->dims()[1]; @@ -937,9 +951,10 @@ template class MaxPool3dWithIndexFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& input, std::vector& ksize, - std::vector& strides, std::vector& paddings, - framework::Tensor* output, framework::Tensor* mask) { + const framework::Tensor& input, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* output, + framework::Tensor* mask) { const int batch_size = input.dims()[0]; const int input_channels = input.dims()[1]; const int input_depth = input.dims()[2]; @@ -987,8 +1002,9 @@ class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& output_grad, - const framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings, + const framework::Tensor& mask, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* input_grad) { const int batch_size = input_grad->dims()[0]; const int input_channels = input_grad->dims()[1]; diff --git a/paddle/fluid/operators/math/pooling.h b/paddle/fluid/operators/math/pooling.h index 74cb42f0d02086a6776b22d57832757ae3ffc470..2538d739cce95d1b2fc5b3f905af5e6d94cf7af5 100644 --- a/paddle/fluid/operators/math/pooling.h +++ b/paddle/fluid/operators/math/pooling.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/device_context.h" @@ -23,8 +24,8 @@ namespace operators { namespace math { #define FLT_MAX \ - __FLT_MAX__ // It might need to be placed in another file, but I'm still - // wondering where to put it. + __FLT_MAX__ // TODO(zcd) :It might need to be placed in another file, but I'm + // still wondering where to put it. /* * \brief Extracting simple operations from pooling. @@ -40,33 +41,33 @@ template class MaxPool { public: DEVICE inline T initial() { return static_cast(-FLT_MAX); } - DEVICE inline void compute(T& y, const T& x) { y = y > x ? y : x; } - DEVICE inline void finalize(T& y, const T& pool_field) {} + DEVICE inline void compute(const T& x, T* y) { *y = *y > x ? *y : x; } + DEVICE inline void finalize(const T& pool_field, T* y) {} }; template class AvgPool { public: DEVICE inline T initial() { return static_cast(0); } - DEVICE inline void compute(T& y, const T& x) { y += x; } - DEVICE inline void finalize(T& y, const T& pool_field) { y /= pool_field; } + DEVICE inline void compute(const T& x, T* y) { *y += x; } + DEVICE inline void finalize(const T& pool_field, T* y) { *y /= pool_field; } }; template class MaxPoolGrad { public: - DEVICE inline void compute(const T& x, const T& y, const T& dy, T& dx, - T scale) { - dx += dy * (x == y); + DEVICE inline void compute(const T& x, const T& y, const T& dy, T scale, + T* dx) { + *dx += dy * (x == y); } }; template class AvgPoolGrad { public: - DEVICE inline void compute(const T& x, const T& y, const T& dy, T& dx, - T scale) { - dx += (scale * dy); + DEVICE inline void compute(const T& x, const T& y, const T& dy, T scale, + T* dx) { + *dx += (scale * dy); } }; @@ -88,8 +89,9 @@ template class Pool2dFunctor { public: void operator()(const DeviceContext& context, const framework::Tensor& input, - std::vector& ksize, std::vector& strides, - std::vector& paddings, PoolProcess pool_compute, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, PoolProcess pool_compute, framework::Tensor* output); }; @@ -98,9 +100,11 @@ class Pool2dGradFunctor { public: void operator()(const DeviceContext& context, const framework::Tensor& input, const framework::Tensor& output, - const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings, - PoolProcess pool_compute, framework::Tensor* input_grad); + const framework::Tensor& output_grad, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, PoolProcess pool_compute, + framework::Tensor* input_grad); }; template @@ -108,8 +112,10 @@ class MaxPool2dGradFunctor { public: void operator()(const DeviceContext& context, const framework::Tensor& input, const framework::Tensor& output, - const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings, + const framework::Tensor& output_grad, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* input_grad); }; @@ -117,8 +123,9 @@ template class Pool3dFunctor { public: void operator()(const DeviceContext& context, const framework::Tensor& input, - std::vector& ksize, std::vector& strides, - std::vector& paddings, PoolProcess pool_compute, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, PoolProcess pool_compute, framework::Tensor* output); }; @@ -127,9 +134,11 @@ class Pool3dGradFunctor { public: void operator()(const DeviceContext& context, const framework::Tensor& input, const framework::Tensor& output, - const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings, - PoolProcess pool_compute, framework::Tensor* input_grad); + const framework::Tensor& output_grad, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, PoolProcess pool_compute, + framework::Tensor* input_grad); }; template @@ -137,8 +146,10 @@ class MaxPool3dGradFunctor { public: void operator()(const DeviceContext& context, const framework::Tensor& input, const framework::Tensor& output, - const framework::Tensor& output_grad, std::vector& ksize, - std::vector& strides, std::vector& paddings, + const framework::Tensor& output_grad, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* input_grad); }; @@ -153,8 +164,9 @@ template class MaxPool2dWithIndexFunctor { public: void operator()(const DeviceContext& context, const framework::Tensor& input, - std::vector& ksize, std::vector& strides, - std::vector& paddings, framework::Tensor* output, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* output, framework::Tensor* mask); }; @@ -163,8 +175,9 @@ class MaxPool2dWithIndexGradFunctor { public: void operator()(const DeviceContext& context, const framework::Tensor& output_grad, - const framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings, + const framework::Tensor& mask, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* input_grad); }; @@ -172,8 +185,9 @@ template class MaxPool3dWithIndexFunctor { public: void operator()(const DeviceContext& context, const framework::Tensor& input, - std::vector& ksize, std::vector& strides, - std::vector& paddings, framework::Tensor* output, + const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* output, framework::Tensor* mask); }; @@ -182,8 +196,9 @@ class MaxPool3dWithIndexGradFunctor { public: void operator()(const DeviceContext& context, const framework::Tensor& output_grad, - const framework::Tensor& mask, std::vector& ksize, - std::vector& strides, std::vector& paddings, + const framework::Tensor& mask, const std::vector& ksize, + const std::vector& strides, + const std::vector& paddings, framework::Tensor* input_grad); };