From e2c071358914fdead0a86348fd91c8227e78f280 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 8 Nov 2016 10:12:04 +0000 Subject: [PATCH] follow comments --- paddle/cuda/include/hl_cnn.h | 8 +- paddle/gserver/layers/PoolProjection.cpp | 22 ++-- paddle/gserver/layers/PoolProjection.h | 1 + .../layers/SpatialPyramidPoolLayer.cpp | 14 ++- .../paddle/trainer_config_helpers/layers.py | 112 +++++++++--------- .../tests/configs/generate_protostr.sh | 2 - 6 files changed, 81 insertions(+), 78 deletions(-) diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h index 4bd9d5e7c9e..de6c2fb8f28 100644 --- a/paddle/cuda/include/hl_cnn.h +++ b/paddle/cuda/include/hl_cnn.h @@ -91,7 +91,7 @@ extern void hl_expand_feature2col( * @param[in] paddingH padding height. * @param[in] paddingW padding width. * @param[out] tgtData output data. - * @param[in] tgtStride output data stride. + * @param[in] tgtStride stride between output data samples. * */ extern void hl_maxpool_forward( @@ -125,7 +125,7 @@ extern void hl_maxpool_forward( * @param[in] paddingH padding height. * @param[in] paddingW padding width. * @param[out] targetGrad output grad. - * @param[in] outStride output grad data stride. + * @param[in] outStride stride between output data samples. * */ extern void hl_maxpool_backward( @@ -157,7 +157,7 @@ extern void hl_maxpool_backward( * @param[in] paddingH padding height. * @param[in] paddingW padding width. * @param[out] tgtData output data. - * @param[in] tgtStride output data stride. + * @param[in] tgtStride stride between output data samples. * */ extern void hl_avgpool_forward( @@ -189,7 +189,7 @@ extern void hl_avgpool_forward( * @param[in] scaleA scale. * @param[in] scaleB scale. * @param[out] backGrad output grad. - * @param[in] outStride output grad data stride. + * @param[in] outStride stride between output data samples. * */ extern void hl_avgpool_backward( diff --git a/paddle/gserver/layers/PoolProjection.cpp b/paddle/gserver/layers/PoolProjection.cpp index e10788e9264..8c7d027c072 100644 --- a/paddle/gserver/layers/PoolProjection.cpp +++ b/paddle/gserver/layers/PoolProjection.cpp @@ -34,9 +34,9 @@ PoolProjection* PoolProjection::create(const ProjectionConfig& config, void MaxPoolProjection::forward() { MatrixPtr inputV = in_->value; MatrixPtr outV = out_->value; - outV->maxPoolForward(*inputV, imgSizeY_, imgSize_, channels_, - sizeX_, sizeY_, strideY_, stride_, - outputY_, outputX_, confPaddingY_, confPadding_); + outV->maxPoolForward(*inputV, imgSizeY_, imgSize_, channels_, sizeX_, sizeY_, + strideY_, stride_, outputY_, outputX_, confPaddingY_, + confPadding_); } void MaxPoolProjection::backward(const UpdateCallback& callback) { @@ -50,17 +50,16 @@ void MaxPoolProjection::backward(const UpdateCallback& callback) { return; } inputGrad->maxPoolBackward(*inputV, imgSizeY_, imgSize_, *outGrad, *outV, - sizeX_, sizeY_, - strideY_, stride_, outputY_, outputX_, 1, 1, - confPaddingY_, confPadding_); + sizeX_, sizeY_, strideY_, stride_, outputY_, + outputX_, 1, 1, confPaddingY_, confPadding_); } void AvgPoolProjection::forward() { MatrixPtr inputV = in_->value; MatrixPtr outV = out_->value; - outV->avgPoolForward(*inputV, imgSizeY_, imgSize_, channels_, - sizeX_, sizeY_, strideY_, stride_, - outputY_, outputX_, confPaddingY_, confPadding_); + outV->avgPoolForward(*inputV, imgSizeY_, imgSize_, channels_, sizeX_, sizeY_, + strideY_, stride_, outputY_, outputX_, confPaddingY_, + confPadding_); } void AvgPoolProjection::backward(const UpdateCallback& callback) { @@ -73,9 +72,8 @@ void AvgPoolProjection::backward(const UpdateCallback& callback) { return; } - inputGrad->avgPoolBackward(*outputGrad, imgSizeY_, imgSize_, - sizeX_, sizeY_, strideY_, stride_, - outputY_, outputX_, 1, 1, + inputGrad->avgPoolBackward(*outputGrad, imgSizeY_, imgSize_, sizeX_, sizeY_, + strideY_, stride_, outputY_, outputX_, 1, 1, confPaddingY_, confPadding_); } } // namespace paddle diff --git a/paddle/gserver/layers/PoolProjection.h b/paddle/gserver/layers/PoolProjection.h index 73d8a41aefa..9fa16c1ea64 100644 --- a/paddle/gserver/layers/PoolProjection.h +++ b/paddle/gserver/layers/PoolProjection.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "Projection.h" +#include "paddle/math/MathUtils.h" namespace paddle { diff --git a/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp b/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp index 7ec76136404..846e2e06660 100644 --- a/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp +++ b/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp @@ -56,8 +56,15 @@ ProjectionConfig SpatialPyramidPoolLayer::getConfig(size_t imgSizeW, size_t SpatialPyramidPoolLayer::getSize() { CHECK_EQ(inputLayers_.size(), 1UL); size_t layerSize = 0; + const SppConfig& sppConf = config_.inputs(0).spp_conf(); imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight(); imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth(); + if (imgSizeH_ == 0) { + imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_; + } + if (imgSizeW_ == 0) { + imgSizeW_ = sppConf.img_size(); + } size_t outputH = 1; size_t outputW = (std::pow(4, pyramidHeight_) - 1) / (4 - 1); @@ -66,10 +73,10 @@ size_t SpatialPyramidPoolLayer::getSize() { getOutput().setFrameHeight(outputH); getOutput().setFrameWidth(outputW); + return layerSize; } - bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { Layer::init(layerMap, parameterMap); @@ -90,8 +97,8 @@ bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap, size_t endCol = 0; for (size_t i = 0; i < pyramidHeight_; i++) { poolProjections_.emplace_back(PoolProjection::create( - getConfig(imgSizeW_, imgSizeH_, channels_, i, poolType_), - nullptr, useGpu_)); + getConfig(imgSizeW_, imgSizeH_, channels_, i, poolType_), nullptr, + useGpu_)); endCol += poolProjections_[i]->getOutputSize(); projCol_.push_back(std::make_pair(startCol, endCol)); startCol = endCol; @@ -125,4 +132,3 @@ void SpatialPyramidPoolLayer::backward(const UpdateCallback& callback) { } } // namespace paddle - diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index e6338e80453..1459c9a84a5 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -56,7 +56,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", 'rank_cost', 'lambda_cost', 'huber_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer', 'print_layer', - # 'spp_layer', + 'spp_layer', ] @@ -112,7 +112,7 @@ class LayerType(object): LINEAR_COMBINATION_LAYER = "convex_comb" BLOCK_EXPAND = "blockexpand" MAXOUT = "maxout" - # SPP_LAYER = "spp" + SPP_LAYER = "spp" PRINT_LAYER = "print" @@ -1711,60 +1711,60 @@ def img_pool_layer(input, pool_size, name=None, num_filters=num_channels) -# @wrap_name_default("spp") -# @layer_support() -# def spp_layer(input, name=None, num_channels=None, pool_type=None, -# pyramid_height=None, img_width=None, layer_attr=None): -# pass -# """ -# Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition. -# The details please refer to -# `Kaiming He's paper `_. - -# :param name: layer name. -# :type name: basestring -# :param input: layer's input. -# :type input: LayerOutput -# :param num_channels: number of input channel. -# :type num_channels: int -# :param pool_type: Pooling type. MaxPooling or AveragePooling. Default is MaxPooling. -# :type scale: BasePoolingType -# :param pyramid_height: pyramid height. -# :type pyramid_height: int -# :param img_width: the width of input feature map. If it is None, the input feature -# map should be square. -# :type img_width: int|None -# :param layer_attr: Extra Layer Attribute. -# :type layer_attr: ExtraLayerAttribute -# :return: LayerOutput object. -# :rtype: LayerOutput -# """ -# if num_channels is None: -# assert input.num_filters is not None -# num_channels = input.num_filters - -# if pool_type is None: -# pool_type = MaxPooling() -# elif isinstance(pool_type, AvgPooling): -# pool_type.name = 'avg' - -# type_name = pool_type.name -# if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)): -# type_name += '-projection' - -# Layer( -# name=name, -# type=LayerType.SPP_LAYER, -# inputs=Input(input.name, -# spp=SpatialPyramidPool(pool_type=type_name, -# channels=num_channels, -# pyramid_height=pyramid_height, -# img_width=img_width) -# ), -# **ExtraLayerAttribute.to_kwargs(layer_attr) -# ) -# return LayerOutput(name, LayerType.SPP_LAYER, parents=[input], -# num_filters=num_channels) +@wrap_name_default("spp") +@layer_support() +def spp_layer(input, name=None, num_channels=None, pool_type=None, + pyramid_height=None, img_width=None, layer_attr=None): + pass + """ + Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition. + The details please refer to + `Kaiming He's paper `_. + + :param name: layer name. + :type name: basestring + :param input: layer's input. + :type input: LayerOutput + :param num_channels: number of input channel. + :type num_channels: int + :param pool_type: Pooling type. MaxPooling or AveragePooling. Default is MaxPooling. + :type scale: BasePoolingType + :param pyramid_height: pyramid height. + :type pyramid_height: int + :param img_width: the width of input feature map. If it is None, the input feature + map should be square. + :type img_width: int|None + :param layer_attr: Extra Layer Attribute. + :type layer_attr: ExtraLayerAttribute + :return: LayerOutput object. + :rtype: LayerOutput + """ + if num_channels is None: + assert input.num_filters is not None + num_channels = input.num_filters + + if pool_type is None: + pool_type = MaxPooling() + elif isinstance(pool_type, AvgPooling): + pool_type.name = 'avg' + + type_name = pool_type.name + if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)): + type_name += '-projection' + + Layer( + name=name, + type=LayerType.SPP_LAYER, + inputs=Input(input.name, + spp=SpatialPyramidPool(pool_type=type_name, + channels=num_channels, + pyramid_height=pyramid_height, + img_width=img_width) + ), + **ExtraLayerAttribute.to_kwargs(layer_attr) + ) + return LayerOutput(name, LayerType.SPP_LAYER, parents=[input], + num_filters=num_channels) def __img_norm_layer__(name, input, size, norm_type, scale, power, diff --git a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh index 7440181970a..e1686742b77 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh @@ -11,11 +11,9 @@ test_sequence_pooling test_lstmemory_layer test_grumemory_layer last_first_seq test_expand_layer test_ntm_layers test_hsigmoid img_layers util_layers simple_rnn_layers unused_layers test_cost_layers test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight -# test_maxout test_bi_grumemory math_ops test_spp_layer) test_maxout test_bi_grumemory math_ops test_spp_layer) - for conf in ${configs[*]} do echo "Generating " $conf -- GitLab