From 0eac39928090c44fc3b8b4edc18604ff7b662f91 Mon Sep 17 00:00:00 2001 From: yuan Date: Tue, 13 Dec 2016 20:57:59 +0800 Subject: [PATCH] priorbox layer for ssd --- paddle/gserver/layers/PriorBox.cpp | 137 ++++++++++++++++++ proto/ModelConfig.proto | 10 ++ python/paddle/trainer/config_parser.py | 13 ++ .../paddle/trainer_config_helpers/layers.py | 36 +++++ 4 files changed, 196 insertions(+) create mode 100644 paddle/gserver/layers/PriorBox.cpp diff --git a/paddle/gserver/layers/PriorBox.cpp b/paddle/gserver/layers/PriorBox.cpp new file mode 100644 index 00000000000..b0d59cd145c --- /dev/null +++ b/paddle/gserver/layers/PriorBox.cpp @@ -0,0 +1,137 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/BaseMatrix.h" + +namespace paddle { + +class PriorBoxLayer : public Layer { +public: + explicit PriorBoxLayer(const LayerConfig& config) : Layer(config) {} + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + void forward(PassType passType); + void backward(const UpdateCallback& callback) {} + int numPriors_; + std::vector minSize_; + std::vector maxSize_; + std::vector aspectRatio_; + std::vector variance_; + MatrixPtr buffer_; +}; + +bool PriorBoxLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + std::copy(config_.inputs(0).priorbox_conf().min_size().begin(), + config_.inputs(0).priorbox_conf().min_size().end(), + std::back_inserter(minSize_)); + std::copy(config_.inputs(0).priorbox_conf().max_size().begin(), + config_.inputs(0).priorbox_conf().max_size().end(), + std::back_inserter(maxSize_)); + std::copy(config_.inputs(0).priorbox_conf().aspect_ratio().begin(), + config_.inputs(0).priorbox_conf().aspect_ratio().end(), + std::back_inserter(aspectRatio_)); + std::copy(config_.inputs(0).priorbox_conf().variance().begin(), + config_.inputs(0).priorbox_conf().variance().end(), + std::back_inserter(variance_)); + // flip + int input_ratio_length = aspectRatio_.size(); + for (int index = 0; index < input_ratio_length; index++) + aspectRatio_.push_back(1 / aspectRatio_[index]); + aspectRatio_.push_back(1.); + numPriors_ = aspectRatio_.size(); + if (maxSize_.size() > 0) + numPriors_++; + buffer_ = Matrix::create(1, 1, false, false); + return true; +} + +void PriorBoxLayer::forward(PassType passType) { + Layer::forward(passType); + auto input = getInput(0); + int layer_width = input.getFrameWidth(); + int layer_height = input.getFrameHeight(); + + MatrixPtr inV1 = getInputValue(1); + int image_width = inV1->getElement(0, 0); + int image_height = inV1->getElement(0, 1); + float step_w = static_cast(image_width) / layer_width; + float step_h = static_cast(image_height) / layer_height; + int dim = layer_height * layer_width * numPriors_ * 4; + reserveOutput(1, dim * 2); + // use a cpu buffer to compute + Matrix::resizeOrCreate(buffer_, 1, dim * 2, false, false); + auto* tmp_ptr = buffer_->getData(); + + int idx = 0; + for (int h = 0; h < layer_height; ++h) { + for (int w = 0; w < layer_width; ++w) { + float center_x = (w + 0.5) * step_w; + float center_y = (h + 0.5) * step_h; + int min_size = 0; + for (size_t s = 0; s < minSize_.size(); s++) { + // first prior. + min_size = minSize_[s]; + int box_width = min_size; + int box_height = min_size; + // xmin, ymin, xmax, ymax. + tmp_ptr[idx++] = (center_x - box_width / 2.) / image_width; + tmp_ptr[idx++] = (center_y - box_height / 2.) / image_height; + tmp_ptr[idx++] = (center_x + box_width / 2.) / image_width; + tmp_ptr[idx++] = (center_y + box_height / 2.) / image_height; + + if (maxSize_.size() > 0) { + CHECK_EQ(minSize_.size(), maxSize_.size()); + // second prior. + for (size_t s = 0; s < maxSize_.size(); s++) { + int max_size = maxSize_[s]; + box_width = box_height = sqrt(min_size * max_size); + tmp_ptr[idx++] = (center_x - box_width / 2.) / image_width; + tmp_ptr[idx++] = (center_y - box_height / 2.) / image_height; + tmp_ptr[idx++] = (center_x + box_width / 2.) / image_width; + tmp_ptr[idx++] = (center_y + box_height / 2.) / image_height; + } + } + } + // rest of priors. + for (size_t r = 0; r < aspectRatio_.size(); r++) { + float ar = aspectRatio_[r]; + if (fabs(ar - 1.) < 1e-6) + continue; + float box_width = min_size * sqrt(ar); + float box_height = min_size / sqrt(ar); + tmp_ptr[idx++] = (center_x - box_width / 2.) / image_width; + tmp_ptr[idx++] = (center_y - box_height / 2.) / image_height; + tmp_ptr[idx++] = (center_x + box_width / 2.) / image_width; + tmp_ptr[idx++] = (center_y + box_height / 2.) / image_height; + } + } + } + // clip the prior's coordidate such that it is within [0, 1] + for (int d = 0; d < dim; ++d) + tmp_ptr[d] = std::min(std::max(tmp_ptr[d], (float)0.), (float)1.); + // set the variance. + for (int h = 0; h < layer_height; h++) + for (int w = 0; w < layer_width; w++) + for (int i = 0; i < numPriors_; i++) + for (int j = 0; j < 4; j++) + tmp_ptr[idx++] = variance_[j]; + MatrixPtr outV = getOutputValue(); + outV->copyFrom(buffer_->data_, dim * 2); +} +REGISTER_LAYER(priorbox, PriorBoxLayer); + +} // namespace paddle diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index b34e1ebdeda..460a39275fb 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -248,6 +248,15 @@ message ImageConfig { required uint32 img_size_y = 9; } +message PriorBoxConfig { + repeated uint32 min_size = 1; + repeated uint32 max_size = 2; + repeated float aspect_ratio = 3; + repeated float variance = 4; + optional bool flip = 5 [default = true]; + optional bool clip = 6 [default = true]; +} + message LayerInputConfig { required string input_layer_name = 1; optional string input_parameter_name = 2; @@ -263,6 +272,7 @@ message LayerInputConfig { optional BilinearInterpConfig bilinear_interp_conf = 10; optional MaxOutConfig maxout_conf = 11; optional SppConfig spp_conf = 12; + optional PriorBoxConfig priorbox_conf = 13; } message LayerConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 5b7f4d85e2c..5de524e507b 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1577,6 +1577,19 @@ class PrintLayer(LayerBase): def __init__(self, name, inputs): super(PrintLayer, self).__init__(name, 'print', 0, inputs) +@config_layer('priorbox') +class PriorBoxLayer(LayerBase): + def __init__(self, name, inputs, size, min_size, max_size, aspect_ratio, variance): + super(PriorBoxLayer, self).__init__(name, 'priorbox', 0, inputs) + config_assert(len(inputs) == 2, 'PriorBoxLayer must have 2 input') + self.config.inputs[0].priorbox_conf.min_size.extend(min_size) + self.config.inputs[0].priorbox_conf.max_size.extend(max_size) + self.config.inputs[0].priorbox_conf.aspect_ratio.extend(aspect_ratio) + self.config.inputs[0].priorbox_conf.variance.extend(variance) + self.config.size = size + input_layer0 = self.get_input_layer(0) + input_layer1 = self.get_input_layer(1) + @config_layer('data') class DataLayer(LayerBase): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 8dd6b7b7d28..f04b5646aab 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -106,6 +106,7 @@ __all__ = [ 'maxout_layer', 'out_prod_layer', 'print_layer', + 'priorbox_layer', 'spp_layer', ] @@ -171,6 +172,7 @@ class LayerType(object): SPP_LAYER = "spp" PRINT_LAYER = "print" + PRIORBOX_LAYER = "priorbox" CTC_LAYER = "ctc" WARP_CTC_LAYER = "warp_ctc" @@ -933,6 +935,40 @@ def print_layer(input, name=None): inputs=[l.name for l in input], ) # this layer don't return anything, can not be input of other layer. +@wrap_name_default("priorbox") +def priorbox_layer(input, img_shape, aspect_ratio, variance, min_size, max_size=[], name=None): + """ + Compute the priorbox and set the variance. This layer is necessary for ssd. + + :param name: The Layer Name. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput + :param img_shape: The width and height of the network input image. + :type img_shape: LayerOutput + :param aspect_ratio: The aspect ratio. + :type aspect_ratio: list + :param variance: The bounding box variance. + :type min_size: The min size of the priorbox width/height. + :param min_size: list + :type max_size: The max size of the priorbox width/height. Could be NULL. + :param max_size: list + :return: LayerOutput + """ + # plus one for ratio 1. + num_filters = (len(aspect_ratio) * 2 + 1 + len(max_size)) * 4 + size=(input.size / input.num_filters) * num_filters * 2 + Layer( + name=name, + type=LayerType.PRIORBOX_LAYER, + inputs=[input.name, img_shape.name], + size=size, + min_size=min_size, + max_size=max_size, + aspect_ratio=aspect_ratio, + variance=variance) + return LayerOutput( + name, LayerType.PRIORBOX_LAYER, parents=[input, img_shape], num_filters=num_filters, size=size) @wrap_name_default("seq_pooling") @wrap_bias_attr_default(has_bias=False) -- GitLab