diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h index 89c1f48edacbe0a4432957fe066481412db7e6e1..88418062927cd0f7714e992cc2495109da45d32f 100644 --- a/paddle/cuda/include/hl_cnn.h +++ b/paddle/cuda/include/hl_cnn.h @@ -116,6 +116,7 @@ extern void hl_maxpool_backward(const int frameCnt, * @param[in] paddingW padding width. * @param[out] tgtData output data. * @param[in] tgtStride stride between output data samples. + * @param[in] excludeMode whether to consider paddings for size. * */ extern void hl_avgpool_forward(const int frameCnt, @@ -132,7 +133,8 @@ extern void hl_avgpool_forward(const int frameCnt, const int paddingH, const int paddingW, real* tgtData, - const int tgtStride); + const int tgtStride, + bool excludeMode); /** * @brief Maximum pool backward. @@ -154,6 +156,7 @@ extern void hl_avgpool_forward(const int frameCnt, * @param[in] scaleB scale. * @param[out] backGrad output grad. * @param[in] outStride stride between output data samples. + * @param[in] excludeMode whether to consider paddings for size. * */ extern void hl_avgpool_backward(const int frameCnt, @@ -172,7 +175,8 @@ extern void hl_avgpool_backward(const int frameCnt, real scaleA, real scaleB, real* backGrad, - const int outStride); + const int outStride, + bool excludeMode); extern void hl_maxpool3D_forward(const int frameCnt, const real* inputData, diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/cuda/include/stub/hl_cnn_stub.h index 968ed4840ffb0623b57bd6e6d839973e109394de..706cc59a8e394b109d2b290425f4b5f51d987f28 100644 --- a/paddle/cuda/include/stub/hl_cnn_stub.h +++ b/paddle/cuda/include/stub/hl_cnn_stub.h @@ -68,7 +68,8 @@ inline void hl_avgpool_forward(const int frameCnt, const int paddingH, const int paddingW, real* tgtData, - const int tgtStride) {} + const int tgtStride, + const bool excludeMode) {} inline void hl_avgpool_backward(const int frameCnt, const real* outGrad, @@ -86,7 +87,8 @@ inline void hl_avgpool_backward(const int frameCnt, real scaleA, real scaleB, real* backGrad, - const int outStride) {} + const int outStride, + const bool excludeMode) {} inline void hl_maxpool3D_forward(const int frameCnt, const real* inputData, diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/cuda/src/hl_cuda_cnn.cu index 3699b1e8ae9d8f813439eaeaa760c4a9f6e100a0..2d1bc4f6d55fac4b74f4e58d40fe56aa61d19cf9 100644 --- a/paddle/cuda/src/hl_cuda_cnn.cu +++ b/paddle/cuda/src/hl_cuda_cnn.cu @@ -210,7 +210,8 @@ __global__ void KeAvgPoolForward(const int nthreads, const int padH, const int padW, real* tgtData, - const int tgtStride) { + const int tgtStride, + const bool excludeMode) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < nthreads) { int pw = index % pooledW; @@ -224,7 +225,8 @@ __global__ void KeAvgPoolForward(const int nthreads, int wend = min(wstart + sizeX, width); hstart = max(hstart, 0); wstart = max(wstart, 0); - int pool_size = (hend - hstart) * (wend - wstart); + int poolSize = + excludeMode ? (hend - hstart) * (wend - wstart) : sizeY * sizeX; real aveval = 0; inputData += (frameNum * channels + c) * height * width; @@ -235,7 +237,7 @@ __global__ void KeAvgPoolForward(const int nthreads, } int tgtIndex = index % (pooledW * pooledH * channels) + frameNum * tgtStride; - tgtData[tgtIndex] = aveval / pool_size; + tgtData[tgtIndex] = aveval / poolSize; } } @@ -253,7 +255,8 @@ void hl_avgpool_forward(const int frameCnt, const int paddingH, const int paddingW, real* tgtData, - const int tgtStride) { + const int tgtStride, + const bool excludeMode) { int num_kernels = pooledH * pooledW * channels * frameCnt; int blocks = (num_kernels + 1024 - 1) / 1024; KeAvgPoolForward<<>>(num_kernels, @@ -270,7 +273,8 @@ void hl_avgpool_forward(const int frameCnt, paddingH, paddingW, tgtData, - tgtStride); + tgtStride, + excludeMode); CHECK_SYNC("hl_avgpool_forward failed"); } @@ -290,7 +294,8 @@ __global__ void KeAvgPoolBackward(const int nthreads, real scaleA, real scaleB, real* tgtGrad, - const int outStride) { + const int outStride, + const bool excludeMode) { int index = blockIdx.x * blockDim.x + threadIdx.x; if (index < nthreads) { int offsetW = index % width + padW; @@ -314,8 +319,9 @@ __global__ void KeAvgPoolBackward(const int nthreads, int wstart = pw * strideW - padW; int wend = min(wstart + sizeX, width); wstart = max(wstart, 0); - int poolsize = (hend - hstart) * (wend - wstart); - gradient += outGrad[ph * pooledW + pw] / poolsize; + int poolSize = + excludeMode ? (hend - hstart) * (wend - wstart) : sizeY * sizeX; + gradient += outGrad[ph * pooledW + pw] / poolSize; } } tgtGrad[index] = scaleB * tgtGrad[index] + scaleA * gradient; @@ -338,7 +344,8 @@ void hl_avgpool_backward(const int frameCnt, real scaleA, real scaleB, real* backGrad, - const int outStride) { + const int outStride, + const bool excludeMode) { int num_kernels = height * width * channels * frameCnt; int blocks = (num_kernels + 1024 - 1) / 1024; @@ -358,7 +365,8 @@ void hl_avgpool_backward(const int frameCnt, scaleA, scaleB, backGrad, - outStride); + outStride, + excludeMode); CHECK_SYNC("hl_avgpool_backward failed"); } diff --git a/paddle/gserver/layers/PoolLayer.cpp b/paddle/gserver/layers/PoolLayer.cpp index 87613a96c5b3c2da212f63e9e678bcd22308b08e..fceb389d06d8d2cb0357186bf83edda9957c6c19 100644 --- a/paddle/gserver/layers/PoolLayer.cpp +++ b/paddle/gserver/layers/PoolLayer.cpp @@ -45,6 +45,8 @@ bool PoolLayer::init(const LayerMap& layerMap, strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride(); confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding(); outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); + + excludeMode_ = conf.has_exclude_mode() ? conf.exclude_mode() : true; return true; } diff --git a/paddle/gserver/layers/PoolLayer.h b/paddle/gserver/layers/PoolLayer.h index d43292ad2d4bbe1229ca59ca21bee92c9ec006a3..9df672a935868e9c61f4dd1fd47a9c309b214f12 100644 --- a/paddle/gserver/layers/PoolLayer.h +++ b/paddle/gserver/layers/PoolLayer.h @@ -38,6 +38,8 @@ protected: std::string poolType_; + bool excludeMode_; + public: explicit PoolLayer(const LayerConfig& config) : Layer(config) {} diff --git a/paddle/gserver/layers/PoolProjection.cpp b/paddle/gserver/layers/PoolProjection.cpp index d90b438448eb72e72e22e9a91a3cbcd84ac7e6cb..6a9de394cee3769784a38f5512b15f52b1ed6fa1 100644 --- a/paddle/gserver/layers/PoolProjection.cpp +++ b/paddle/gserver/layers/PoolProjection.cpp @@ -36,6 +36,8 @@ PoolProjection::PoolProjection(const ProjectionConfig& config, strideY_ = conf.has_stride_y() ? conf.stride_y() : conf.stride(); confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding(); outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); + + excludeMode_ = conf.has_exclude_mode() ? conf.exclude_mode() : true; } size_t PoolProjection::getSize() { @@ -141,7 +143,8 @@ void AvgPoolProjection::forward() { outputY_, outputX_, confPaddingY_, - confPadding_); + confPadding_, + excludeMode_); } void AvgPoolProjection::backward(const UpdateCallback& callback) { @@ -166,6 +169,7 @@ void AvgPoolProjection::backward(const UpdateCallback& callback) { 1, 1, confPaddingY_, - confPadding_); + confPadding_, + excludeMode_); } } // namespace paddle diff --git a/paddle/gserver/layers/PoolProjection.h b/paddle/gserver/layers/PoolProjection.h index 9a75f465f6fbb2f2a928b0e36fcfbe0e510d7b3a..a0412714bca7a273e999e4d6bd552e833d20d69c 100644 --- a/paddle/gserver/layers/PoolProjection.h +++ b/paddle/gserver/layers/PoolProjection.h @@ -28,6 +28,7 @@ protected: int confPaddingY_, confPadding_; size_t channels_; std::string poolType_; + bool excludeMode_; public: PoolProjection(const ProjectionConfig& config, diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index c5359f272b4bed4d4d2483bf19d7ae482b0d33dd..2b6ba7747060c00cc72338c4ab9b148163fdb7b1 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1211,7 +1211,10 @@ void setPoolConfig(TestConfig* config, pool->set_output_y(oh); } -void testPoolLayer(const string& poolType, bool trans, bool useGpu) { +void testPoolLayer(const string& poolType, + bool trans, + bool useGpu, + bool excludeMode = true) { TestConfig config; config.inputDefs.push_back({INPUT_DATA, "layer_0", 3136, 0}); LayerInputConfig* input = config.layerConfig.add_inputs(); @@ -1219,6 +1222,7 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) { pool->set_img_size(14); pool->set_img_size_y(14); + pool->set_exclude_mode(excludeMode); setPoolConfig(&config, pool, poolType); config.layerConfig.set_size(pool->output_x() * pool->output_y() * pool->channels()); @@ -1250,16 +1254,26 @@ void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { TEST(Layer, PoolLayer) { testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false); + testPoolLayer("avg-projection", + /* trans= */ false, + /* useGpu= */ false, + /* excludeMode= */ false); testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false); testPoolLayer("max-pool-with-mask", /* trans= */ false, /* useGpu= */ false); #ifdef PADDLE_WITH_CUDA testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true); + testPoolLayer("avg-projection", + /* trans= */ false, + /* useGpu= */ true, + /* excludeMode= */ false); testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true); testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); + testPoolLayer2( + "cudnn-avg-incl-pad-pool", /* trans= */ false, /* useGpu= */ true); testPoolLayer("max-pool-with-mask", /* trans= */ false, /* useGpu= */ true); #endif } diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 88e9180690606c92cf46c5b295d80f14e5d64567..ebbbdfab1d368e83889431e77b4f4c77b71621cc 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1130,7 +1130,8 @@ void GpuMatrix::avgPoolForward(Matrix& inputMat, size_t outputH, size_t outputW, size_t paddingH, - size_t paddingW) { + size_t paddingW, + bool excludeMode) { CHECK(inputMat.useGpu_ == true) << "Matrix type are not equal"; real* inputData = inputMat.getData(); @@ -1153,7 +1154,8 @@ void GpuMatrix::avgPoolForward(Matrix& inputMat, paddingH, paddingW, data_, - getStride()); + getStride(), + excludeMode); } void GpuMatrix::avgPoolBackward(Matrix& outGrad, @@ -1168,7 +1170,8 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, real scaleTargets, real scaleOutput, size_t paddingH, - size_t paddingW) { + size_t paddingW, + bool excludeMode) { CHECK(outGrad.useGpu_ == true) << "Matrix type are not equal"; real* outDiff = outGrad.getData(); @@ -1194,7 +1197,8 @@ void GpuMatrix::avgPoolBackward(Matrix& outGrad, scaleTargets, scaleOutput, data_, - outGrad.getStride()); + outGrad.getStride(), + excludeMode); } void GpuMatrix::maxPool3DForward(Matrix& inputMat, @@ -2136,7 +2140,8 @@ void CpuMatrix::avgPoolForward(Matrix& input, size_t outputH, size_t outputW, size_t paddingH, - size_t paddingW) { + size_t paddingW, + bool excludeMode) { // The main loop size_t num = input.getHeight(); size_t inLength = imgSizeH * imgSizeW; @@ -2165,7 +2170,8 @@ void CpuMatrix::avgPoolForward(Matrix& input, tgtData[ph * outputW + pw] += inData[h * imgSizeW + w]; } } - int poolSize = (hend - hstart) * (wend - wstart); + int poolSize = + excludeMode ? (hend - hstart) * (wend - wstart) : sizeY * sizeX; CHECK(poolSize); tgtData[ph * outputW + pw] /= poolSize; } @@ -2189,7 +2195,8 @@ void CpuMatrix::avgPoolBackward(Matrix& input, real scaleTargets, real scaleOutput, size_t paddingH, - size_t paddingW) { + size_t paddingW, + bool excludeMode) { size_t num = input.getHeight(); size_t channels = input.getWidth() / outputH / outputW; size_t inLength = imgSizeH * imgSizeW; @@ -2211,7 +2218,8 @@ void CpuMatrix::avgPoolBackward(Matrix& input, int wstart = pw * strideW - paddingW; int wend = std::min(wstart + sizeX, imgSizeW); wstart = std::max(wstart, 0); - int poolSize = (hend - hstart) * (wend - wstart); + int poolSize = + excludeMode ? (hend - hstart) * (wend - wstart) : sizeY * sizeX; CHECK(poolSize); for (int h = hstart; h < hend; ++h) { diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index e273f1123690e31984c97185c5a8bc5e7b92c38c..c8e690e6421668bdade4e50a61882c915b2ddc7c 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -911,7 +911,8 @@ public: size_t outputH, size_t outputW, size_t paddingH, - size_t paddingW) { + size_t paddingW, + bool excludeMode = true) { LOG(FATAL) << "Not implemeted"; } @@ -927,9 +928,11 @@ public: real scaleTargets, real scaleOutput, size_t paddingH, - size_t paddingW) { + size_t paddingW, + bool excludeMode = true) { LOG(FATAL) << "Not implemeted"; } + /** * Pooling 3D forward operation, pick out the largest element * in the sizeX of value @@ -1458,7 +1461,8 @@ public: size_t outputH, size_t outputW, size_t paddingH, - size_t paddingW); + size_t paddingW, + bool excludeMode = true); void avgPoolBackward(Matrix& input, size_t imgSizeH, @@ -1472,7 +1476,8 @@ public: real scaleTargets, real scaleOutput, size_t paddingH, - size_t paddingW); + size_t paddingW, + bool excludeMode = true); void maxPool3DForward(Matrix& inputMat, Matrix& maxPoolIdx, @@ -1730,7 +1735,8 @@ public: size_t outputH, size_t outputW, size_t paddingH, - size_t paddingW); + size_t paddingW, + bool excludeMode = true); void avgPoolBackward(Matrix& input, size_t imgSizeH, @@ -1744,7 +1750,8 @@ public: real scaleTargets, real scaleOutput, size_t paddingH, - size_t paddingW); + size_t paddingW, + bool excludeMode = true); void maxPool3DForward(Matrix& inputMat, Matrix& maxPoolIdx, diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 2fcdbbc8bd671f8ae911cf82c7a91091f252a82f..1fbdd5bbd82a0ae15f620fa11a14ac3126e52838 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -139,6 +139,8 @@ message PoolConfig { optional uint32 output_z = 16 [ default = 1 ]; optional uint32 img_size_z = 17 [ default = 1 ]; optional uint32 padding_z = 18 [ default = 1 ]; + + optional bool exclude_mode = 19; } message SppConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 5b173694dd0e4a52c0179f12f5edd74e2c41cb8c..3fe844b883054fe6335e5a438426bebb8636c908 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1233,7 +1233,7 @@ def parse_bilinear(bilinear, input_layer_name, bilinear_conf): bilinear_conf.out_size_y = bilinear.out_size_y -def parse_pool(pool, input_layer_name, pool_conf, ceil_mode): +def parse_pool(pool, input_layer_name, pool_conf, ceil_mode, exclude_mode): pool_conf.pool_type = pool.pool_type config_assert(pool.pool_type in [ 'max-projection', 'avg-projection', 'max-pool-with-mask', 'cudnn-max-pool', 'cudnn-avg-pool' @@ -1262,6 +1262,8 @@ def parse_pool(pool, input_layer_name, pool_conf, ceil_mode): pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y, pool_conf.padding_y, pool_conf.stride_y, not ceil_mode) + if exclude_mode != None: + pool_conf.exclude_mode = exclude_mode def parse_pool3d(pool, input_layer_name, pool_conf, ceil_mode): @@ -2303,7 +2305,8 @@ class NormLayer(LayerBase): class PoolLayer(LayerBase): layer_type = 'pool' - def __init__(self, name, inputs, ceil_mode=True, **xargs): + def __init__(self, name, inputs, ceil_mode=True, exclude_mode=None, + **xargs): use_mkldnn = int(g_command_config_args.get("use_mkldnn", 0)) if self.layer_type == "mkldnn_pool": config_assert(use_mkldnn, "mkldnn_pool only support MKLDNN") @@ -2314,7 +2317,7 @@ class PoolLayer(LayerBase): input_layer = self.get_input_layer(input_index) pool_conf = self.config.inputs[input_index].pool_conf parse_pool(self.inputs[input_index].pool, input_layer.name, - pool_conf, ceil_mode) + pool_conf, ceil_mode, exclude_mode) self.set_cnn_layer(name, pool_conf.output_y, pool_conf.output_x, pool_conf.channels) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index f6dc58b9c0ed0b14ad9db098892af14274aed0c1..8c5cc25d6c7b835699366e02ea8fb130ac3257dd 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -21,7 +21,7 @@ from .activations import LinearActivation, SigmoidActivation, TanhActivation, \ ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation from .evaluators import * from .poolings import MaxPooling, AvgPooling, MaxWithMaskPooling, BasePoolingType, \ - CudnnAvgPooling, CudnnMaxPooling + CudnnAvgPooling, CudnnAvgInclPadPooling, CudnnMaxPooling from .attrs import * from .default_decorators import * @@ -2709,7 +2709,8 @@ def img_pool_layer(input, pool_size_y=None, stride_y=None, padding_y=None, - ceil_mode=True): + ceil_mode=True, + exclude_mode=None): """ Image pooling Layer. @@ -2773,10 +2774,15 @@ def img_pool_layer(input, :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. :type layer_attr: ExtraLayerAttribute - :param ceil_mode: Wether to use the ceil function to calculate output height and width. + :param ceil_mode: Whether to use the ceil function to calculate output height and width. True is the default. If it is set to False, the floor function will be used. :type ceil_mode: bool + :param exclude_mode: Whether to exclude the padding cells when calculating, but only + work when pool_type is AvgPooling. If None, also exclude the padding + cells. If use cudnn, use CudnnAvgPooling or CudnnAvgInclPadPooling + as pool_type to identify the mode. + :type exclude_mode: bool :return: LayerOutput object. :rtype: LayerOutput """ @@ -2790,7 +2796,7 @@ def img_pool_layer(input, pool_type.name = 'avg' assert type(pool_type) in [AvgPooling, MaxPooling, MaxWithMaskPooling, CudnnAvgPooling, - CudnnMaxPooling], \ + CudnnMaxPooling, CudnnAvgInclPadPooling], \ "only (Cudnn)AvgPooling, (Cudnn)MaxPooling, MaxWithMaskPooling are supported" type_name = pool_type.name + '-projection' \ @@ -2819,6 +2825,7 @@ def img_pool_layer(input, padding_y=padding_y)) ], ceil_mode=ceil_mode, + exclude_mode=exclude_mode, **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput( name, diff --git a/python/paddle/trainer_config_helpers/poolings.py b/python/paddle/trainer_config_helpers/poolings.py index f45616551bcd4822c668234c3afaf6aa35cd2953..e0aeb311b3ae842aee337dbbf869e2f947d22bd9 100644 --- a/python/paddle/trainer_config_helpers/poolings.py +++ b/python/paddle/trainer_config_helpers/poolings.py @@ -16,7 +16,8 @@ __all__ = [ "BasePoolingType", "MaxPooling", "AvgPooling", "MaxWithMaskPooling", - "CudnnMaxPooling", "CudnnAvgPooling", "SumPooling", "SquareRootNPooling" + "CudnnMaxPooling", "CudnnAvgPooling", "CudnnAvgInclPadPooling", + "SumPooling", "SquareRootNPooling" ] @@ -88,6 +89,16 @@ class CudnnAvgPooling(BasePoolingType): BasePoolingType.__init__(self, "cudnn-avg-pool") +class CudnnAvgInclPadPooling(BasePoolingType): + """ + Cudnn average pooling only support GPU. Return the average value in the + pooling window taking into account the padding cells. + """ + + def __init__(self): + BasePoolingType.__init__(self, "cudnn-avg-incl-pad-pool") + + class AvgPooling(BasePoolingType): """ Average pooling.