From 5ece5c96ada7a14099408f072abefd213b08bbce Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 8 Nov 2016 03:36:02 +0000 Subject: [PATCH] add python wrap for sppLayer --- doc/ui/api/trainer_config_helpers/layers.rst | 12 ++++ paddle/gserver/layers/PoolProjection.cpp | 2 +- .../layers/SpatialPyramidPoolLayer.cpp | 6 +- .../gserver/layers/SpatialPyramidPoolLayer.h | 1 - paddle/gserver/tests/test_LayerGrad.cpp | 2 + paddle/math/Matrix.cpp | 14 +++-- proto/ModelConfig.proto.m4 | 6 +- python/paddle/trainer/config_parser.py | 46 +++++++++++++- .../paddle/trainer_config_helpers/layers.py | 61 ++++++++++++++++++- .../tests/configs/check.md5 | 1 + .../tests/configs/generate_protostr.sh | 2 +- .../tests/configs/test_spp_layer.py | 17 ++++++ 12 files changed, 155 insertions(+), 15 deletions(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py diff --git a/doc/ui/api/trainer_config_helpers/layers.rst b/doc/ui/api/trainer_config_helpers/layers.rst index 5bb88b0615..a7cf57d017 100644 --- a/doc/ui/api/trainer_config_helpers/layers.rst +++ b/doc/ui/api/trainer_config_helpers/layers.rst @@ -46,6 +46,12 @@ conv_operator :members: conv_operator :noindex: +conv_projection +------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: conv_projection + :noindex: + conv_shift_layer ------------------ .. automodule:: paddle.trainer_config_helpers.layers @@ -71,6 +77,12 @@ img_pool_layer -------------- .. automodule:: paddle.trainer_config_helpers.layers :members: img_pool_layer + :noindex: + +spp_layer +-------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: spp_layer :noindex: maxout_layer diff --git a/paddle/gserver/layers/PoolProjection.cpp b/paddle/gserver/layers/PoolProjection.cpp index 468ca6f1b7..e10788e926 100644 --- a/paddle/gserver/layers/PoolProjection.cpp +++ b/paddle/gserver/layers/PoolProjection.cpp @@ -16,7 +16,7 @@ limitations under the License. */ namespace paddle { -REGISTER_PROJECTION_CREATE_FUNC(pool2, &PoolProjection::create); +REGISTER_PROJECTION_CREATE_FUNC(pool, &PoolProjection::create); PoolProjection* PoolProjection::create(const ProjectionConfig& config, ParameterPtr parameter, bool useGpu) { diff --git a/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp b/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp index bcdba5c151..83334a5988 100644 --- a/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp +++ b/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp @@ -24,7 +24,7 @@ ProjectionConfig SpatialPyramidPoolLayer::getConfig(size_t imgSizeW, size_t pyramidLevel, std::string& poolType) { ProjectionConfig config; - config.set_type("pool2"); + config.set_type("pool"); PoolConfig* conf = config.mutable_pool_conf(); conf->set_channels(channels); conf->set_img_size(imgSizeW); @@ -93,7 +93,7 @@ bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap, startCol = endCol; projInput_.emplace_back(Argument()); } - outputSize_ = endCol; + CHECK_EQ(endCol, getSize()); return true; } @@ -101,7 +101,7 @@ void SpatialPyramidPoolLayer::forward(PassType passType) { Layer::forward(passType); int batchSize = getInput(0).getBatchSize(); - resetOutput(batchSize, outputSize_); + resetOutput(batchSize, getSize()); for (size_t i = 0; i < pyramidHeight_; i++) { size_t startCol = projCol_[i].first; size_t endCol = projCol_[i].second; diff --git a/paddle/gserver/layers/SpatialPyramidPoolLayer.h b/paddle/gserver/layers/SpatialPyramidPoolLayer.h index de1fd4da07..156581530a 100644 --- a/paddle/gserver/layers/SpatialPyramidPoolLayer.h +++ b/paddle/gserver/layers/SpatialPyramidPoolLayer.h @@ -27,7 +27,6 @@ protected: size_t imgSizeW_; size_t imgSizeH_; size_t pyramidHeight_; - size_t outputSize_; std::string poolType_; std::vector> poolProjections_; diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index c3597f5607..595e20354a 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -931,6 +931,8 @@ void testSppLayer(const string& poolType, const int pyramidHeight, bool trans, sppConfig->set_channels(16); sppConfig->set_img_size(10); sppConfig->set_img_size_y(20); + int outputSize = (std::pow(4, sppConfig->pyramid_height()) - 1) / (4 - 1); + config.layerConfig.set_size(outputSize * sppConfig->channels()); testLayerGrad(config, "spp", 100, trans, useGpu); } diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 721c3de59f..607334aaa9 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1510,18 +1510,19 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH, CHECK(inHeight * inWidth == inputMat.getWidth() / channels); CHECK_EQ(num, this->getHeight()); CHECK_EQ(channels * outputH * outputW, this->getWidth()); + size_t outStride = getStride(); /* initialize the data_ */ for (size_t i = 0; i < height_; i++) { for (size_t j = 0; j < width_; j++) { - outData[i * getStride() + j] = -(real)FLT_MAX; + outData[i * outStride + j] = -(real)FLT_MAX; } } /* pool max one by one */ for (size_t n = 0; n < num; ++n) { // frame by frame if (!isContiguous()) { - outData = data_ + n * getStride(); + outData = data_ + n * outStride; } for (size_t c = 0; c < channels; ++c) { // channel by channel for (size_t ph = 0; ph < outputH; ++ph) { @@ -1564,10 +1565,15 @@ void CpuMatrix::maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW, real* inData = image.getData(); real* otData = outV.getData(); real* otGrad = outGrad.getData(); + + size_t outStride = outV.getStride(); + real* origOutData = otData; + real* origOutGrad = otGrad; + for (size_t n = 0; n < num; ++n) { if (!outV.isContiguous()) { - otData = outV.getData() + n * outV.getStride(); - otGrad = outGrad.getData() + n * outGrad.getStride(); + otData = origOutData + n * outStride; + otGrad = origOutGrad + n * outStride; } for (size_t c = 0; c < channels; ++c) { for (size_t ph = 0; ph < outputH; ++ph) { diff --git a/proto/ModelConfig.proto.m4 b/proto/ModelConfig.proto.m4 index 4ef1550105..a247f6f3e7 100644 --- a/proto/ModelConfig.proto.m4 +++ b/proto/ModelConfig.proto.m4 @@ -202,11 +202,11 @@ message ProjectionConfig { optional ConvConfig conv_conf = 8; optional int32 num_filters = 9; - // For pool - optional PoolConfig pool_conf = 10; - // For IdentityOffsetProjection optional uint64 offset = 11 [default = 0]; + + // For pool + optional PoolConfig pool_conf = 12; } message OperatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index e909894316..7ad2b7fd5c 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -470,6 +470,7 @@ class Input(Cfg): image=None, block_expand=None, maxout=None, + spp=None, format=None, nnz=None, is_static=None, @@ -669,7 +670,6 @@ class ConvProjection(Projection): def calc_parameter_dims(self, input_size, output_size): return None - # Define a operator for mixed layer @config_class class Operator(Cfg): @@ -783,6 +783,15 @@ class Pool(Cfg): padding_y = None): self.add_keys(locals()) +class SpatialPyramidPool(Cfg): + def __init__( + self, + pool_type, + pyramid_height, + channels, + img_width = None): + self.add_keys(locals()) + # please refer to the comments in proto/ModelConfig.proto @config_class class Norm(Cfg): @@ -1043,6 +1052,22 @@ def parse_pool(pool, input_layer_name, pool_conf): 2*pool_conf.padding_y - pool_conf.size_y) / \ float(pool_conf.stride_y))) + 1 +def parse_spp(spp, input_layer_name, spp_conf): + spp_conf.pool_type = spp.pool_type + config_assert(spp.pool_type in ['max-projection', 'avg-projection'], + "pool-type %s is not in " "['max-projection', 'avg-projection']" + % spp.pool_type) + spp_conf.pyramid_height = spp.pyramid_height + spp_conf.channels = spp.channels + + img_pixels = g_layer_map[input_layer_name].size / spp_conf.channels + + spp_conf.img_size = default(spp.img_width, int(img_pixels ** 0.5)) + spp_conf.img_size_y = img_pixels / spp_conf.img_size + config_assert(spp_conf.img_size * spp_conf.img_size_y == img_pixels, + "Incorrect input image size %d for input image pixels %d" + % (spp_conf.img_size, img_pixels)) + def parse_image(image, input_layer_name, image_conf): image_conf.channels = image.channels image_pixels = g_layer_map[input_layer_name].size / image_conf.channels @@ -1649,6 +1674,25 @@ class PoolLayer(LayerBase): name, pool_conf.output_y, pool_conf.output_x)) self.set_layer_size((pool_conf.output_x * pool_conf.output_y) * pool_conf.channels) +@config_layer('spp') +class SpatialPyramidPoolLayer(LayerBase): + def __init__( + self, + name, + inputs, + device=None): + super(SpatialPyramidPoolLayer, self).__init__(name, 'spp', 0, inputs=inputs, device=device) + for input_index in xrange(len(self.inputs)): + input_layer = self.get_input_layer(input_index) + parse_spp( + self.inputs[input_index].spp, + input_layer.name, + self.config.inputs[input_index].spp_conf) + spp_conf = self.config.inputs[input_index].spp_conf + output_size = (pow(4, spp_conf.pyramid_height) - 1) / (4 - 1) + print("output size for %s is %d " % (name, output_size)) + self.set_layer_size(output_size * spp_conf.channels) + @config_layer('batch_norm') class BatchNormLayer(LayerBase): layer_type = 'batch_norm' diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 9a23c02431..03243c03b0 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -55,7 +55,8 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", 'multi_binary_label_cross_entropy', 'rank_cost', 'lambda_cost', 'huber_cost', 'block_expand_layer', - 'maxout_layer', 'out_prod_layer', 'print_layer' + 'maxout_layer', 'out_prod_layer', 'print_layer', + 'spp_layer', ] @@ -111,6 +112,7 @@ class LayerType(object): LINEAR_COMBINATION_LAYER = "convex_comb" BLOCK_EXPAND = "blockexpand" MAXOUT = "maxout" + SPP_LAYER = "spp" PRINT_LAYER = "print" @@ -868,6 +870,7 @@ def pooling_layer(input, pooling_type=None, name=None, bias_attr=None, size=input.size) + @wrap_bias_attr_default() @wrap_param_attr_default() @wrap_act_default(param_names=['gate_act'], @@ -1708,6 +1711,62 @@ def img_pool_layer(input, pool_size, name=None, num_filters=num_channels) +@wrap_name_default("spp") +@layer_support() +def spp_layer(input, name=None, num_channels=None, pool_type=None, + pyramid_height=None, img_width=None, layer_attr=None): + pass + """ + Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition. + The details please refer to + `Kaiming He's paper `_. + + :param name: layer name. + :type name: basestring + :param input: layer's input. + :type input: LayerOutput + :param num_channels: number of input channel. + :type num_channels: int + :param pool_type: Pooling type. MaxPooling or AveragePooling. Default is MaxPooling. + :type scale: BasePoolingType + :param pyramid_height: pyramid height. + :type pyramid_height: int + :param img_width: the width of input feature map. If it is None, the input feature + map should be square. + :type img_width: int|None + :param layer_attr: Extra Layer Attribute. + :type layer_attr: ExtraLayerAttribute + :return: LayerOutput object. + :rtype: LayerOutput + """ + if num_channels is None: + assert input.num_filters is not None + num_channels = input.num_filters + + if pool_type is None: + pool_type = MaxPooling() + elif isinstance(pool_type, AvgPooling): + pool_type.name = 'avg' + + type_name = pool_type.name + if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)): + type_name += '-projection' + + Layer( + name=name, + type=LayerType.SPP_LAYER, + inputs=Input(input.name, + spp=SpatialPyramidPool(pool_type=type_name, + channels=num_channels, + pyramid_height=pyramid_height, + img_width=img_width) + ), + **ExtraLayerAttribute.to_kwargs(layer_attr) + ) + return LayerOutput(name, LayerType.SPP_LAYER, parents=[input], + num_filters=num_channels) + + def __img_norm_layer__(name, input, size, norm_type, scale, power, num_channels, blocked, layer_attr): if num_channels is None: diff --git a/python/paddle/trainer_config_helpers/tests/configs/check.md5 b/python/paddle/trainer_config_helpers/tests/configs/check.md5 index 72dfdad7bd..bf0512420e 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/check.md5 +++ b/python/paddle/trainer_config_helpers/tests/configs/check.md5 @@ -20,3 +20,4 @@ fded24727338fb8ce44d9951ed8aea08 test_rnn_group.protostr 67d6fde3afb54f389d0ce4ff14726fe1 test_sequence_pooling.protostr f586a548ef4350ba1ed47a81859a64cb unused_layers.protostr f937a5a6e7e8864b4d8cf56b0f7c7f44 util_layers.protostr +60c9a71e19bd4b2a1253712799d0ae70 test_spp_layer.protostr diff --git a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh index 6a31ceabdf..6102c614de 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh @@ -9,7 +9,7 @@ test_sequence_pooling test_lstmemory_layer test_grumemory_layer last_first_seq test_expand_layer test_ntm_layers test_hsigmoid img_layers util_layers simple_rnn_layers unused_layers test_cost_layers test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight -test_maxout test_bi_grumemory) +test_maxout test_bi_grumemory test_spp_layer) for conf in ${configs[*]} diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py new file mode 100644 index 0000000000..6786c27639 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py @@ -0,0 +1,17 @@ +from paddle.trainer_config_helpers import * + +settings( + batch_size=100, + learning_rate=1e-5 +) + +data = data_layer(name='data', size=3200) + + +spp = spp_layer(input=data, + pyramid_height=2, + num_channels=16, + pool_type=MaxPooling(), + img_width=10) + +outputs(spp) -- GitLab