diff --git a/paddle/gserver/layers/PriorBox.cpp b/paddle/gserver/layers/PriorBox.cpp new file mode 100644 index 0000000000000000000000000000000000000000..36ace7597cd66cc2d83353ec999a75c79dd1e33e --- /dev/null +++ b/paddle/gserver/layers/PriorBox.cpp @@ -0,0 +1,149 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "paddle/math/BaseMatrix.h" +#include "paddle/math/Matrix.h" + +namespace paddle { +/** + * @brief A layer for generating priorbox locations and variances. + * - Input: Two and only two input layer are accepted. The input layer must be + * be a data output layer and a convolution output layer. + * - Output: The priorbox locations and variances of the input data. + * Reference: + * Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, + * Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector + */ + +class PriorBoxLayer : public Layer { +public: + explicit PriorBoxLayer(const LayerConfig& config) : Layer(config) {} + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + void forward(PassType passType); + void backward(const UpdateCallback& callback) {} + +protected: + int numPriors_; + std::vector minSize_; + std::vector maxSize_; + std::vector aspectRatio_; + std::vector variance_; + MatrixPtr buffer_; +}; + +bool PriorBoxLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + auto pbConf = config_.inputs(0).priorbox_conf(); + std::copy(pbConf.min_size().begin(), + pbConf.min_size().end(), + std::back_inserter(minSize_)); + std::copy(pbConf.max_size().begin(), + pbConf.max_size().end(), + std::back_inserter(maxSize_)); + std::copy(pbConf.aspect_ratio().begin(), + pbConf.aspect_ratio().end(), + std::back_inserter(aspectRatio_)); + std::copy(pbConf.variance().begin(), + pbConf.variance().end(), + std::back_inserter(variance_)); + // flip + int inputRatioLength = aspectRatio_.size(); + for (int index = 0; index < inputRatioLength; index++) + aspectRatio_.push_back(1 / aspectRatio_[index]); + aspectRatio_.push_back(1.); + numPriors_ = aspectRatio_.size(); + if (maxSize_.size() > 0) numPriors_++; + return true; +} + +void PriorBoxLayer::forward(PassType passType) { + Layer::forward(passType); + auto input = getInput(0); + int layerWidth = input.getFrameWidth(); + int layerHeight = input.getFrameHeight(); + + auto image = getInput(1); + int imageWidth = image.getFrameWidth(); + int imageHeight = image.getFrameHeight(); + + real stepW = static_cast(imageWidth) / layerWidth; + real stepH = static_cast(imageHeight) / layerHeight; + int dim = layerHeight * layerWidth * numPriors_ * 4; + reserveOutput(1, dim * 2); + // use a cpu buffer to compute + Matrix::resizeOrCreate(buffer_, 1, dim * 2, false, false); + auto* tmpPtr = buffer_->getData(); + + int idx = 0; + for (int h = 0; h < layerHeight; ++h) { + for (int w = 0; w < layerWidth; ++w) { + real centerX = (w + 0.5) * stepW; + real centerY = (h + 0.5) * stepH; + int minSize = 0; + for (size_t s = 0; s < minSize_.size(); s++) { + // first prior. + minSize = minSize_[s]; + int boxWidth = minSize; + int boxHeight = minSize; + // xmin, ymin, xmax, ymax. + tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; + tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; + tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth; + tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight; + // set the variance. + for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t]; + + if (maxSize_.size() > 0) { + CHECK_EQ(minSize_.size(), maxSize_.size()); + // second prior. + for (size_t s = 0; s < maxSize_.size(); s++) { + int maxSize = maxSize_[s]; + boxWidth = boxHeight = sqrt(minSize * maxSize); + tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; + tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; + tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth; + tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight; + // set the variance. + for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t]; + } + } + } + // rest of priors. + for (size_t r = 0; r < aspectRatio_.size(); r++) { + real ar = aspectRatio_[r]; + if (fabs(ar - 1.) < 1e-6) continue; + real boxWidth = minSize * sqrt(ar); + real boxHeight = minSize / sqrt(ar); + tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; + tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; + tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth; + tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight; + // set the variance. + for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t]; + } + } + } + // clip the prior's coordidate such that it is within [0, 1] + for (int d = 0; d < dim * 2; ++d) + if ((d % 8) < 4) + tmpPtr[d] = std::min(std::max(tmpPtr[d], (real)0.), (real)1.); + MatrixPtr outV = getOutputValue(); + outV->copyFrom(buffer_->data_, dim * 2); +} +REGISTER_LAYER(priorbox, PriorBoxLayer); + +} // namespace paddle diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 34dc375f21a54688c459236551fb1bc4d41f2eb1..c26a2a7f06bc16c113f1812868b5d2b8a5060635 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -34,6 +34,14 @@ add_unittest_without_exec(test_ConvTrans add_test(NAME test_ConvTrans COMMAND test_ConvTrans) +################# test_PriorBox ####################### +add_unittest_without_exec(test_PriorBox + test_PriorBox.cpp + LayerGradUtil.cpp + TestUtil.cpp) + +add_test(NAME test_PriorBox + COMMAND test_PriorBox) ################# test_ConvUnify ####################### add_unittest_without_exec(test_ConvUnify test_ConvUnify.cpp diff --git a/paddle/gserver/tests/test_PriorBox.cpp b/paddle/gserver/tests/test_PriorBox.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a6d6a242696633e66a05bf9fc9eee81a468ed056 --- /dev/null +++ b/paddle/gserver/tests/test_PriorBox.cpp @@ -0,0 +1,212 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include + +#include "LayerGradUtil.h" +#include "TestUtil.h" + +using namespace paddle; // NOLINT +using namespace std; // NOLINT + +// Do one forward pass of priorBox layer and check to see if its output +// matches the given result +void doOnePriorBoxTest(size_t feature_map_width, + size_t feature_map_height, + size_t image_width, + size_t image_height, + vector min_size, + vector max_size, + vector aspect_ratio, + vector variance, + bool use_gpu, + MatrixPtr& result) { + // Setting up the priorbox layer + TestConfig configt; + configt.layerConfig.set_type("priorbox"); + + configt.inputDefs.push_back({INPUT_DATA, "featureMap", 1, 0}); + LayerInputConfig* input = configt.layerConfig.add_inputs(); + configt.inputDefs.push_back({INPUT_DATA, "image", 1, 0}); + configt.layerConfig.add_inputs(); + PriorBoxConfig* pb = input->mutable_priorbox_conf(); + for (size_t i = 0; i < min_size.size(); i++) pb->add_min_size(min_size[i]); + for (size_t i = 0; i < max_size.size(); i++) pb->add_max_size(max_size[i]); + for (size_t i = 0; i < variance.size(); i++) pb->add_variance(variance[i]); + for (size_t i = 0; i < aspect_ratio.size(); i++) + pb->add_aspect_ratio(aspect_ratio[i]); + + // data layer initialize + std::vector dataLayers; + LayerMap layerMap; + vector datas; + initDataLayer( + configt, &dataLayers, &datas, &layerMap, "priorbox", 1, false, use_gpu); + dataLayers[0]->getOutput().setFrameHeight(feature_map_height); + dataLayers[0]->getOutput().setFrameWidth(feature_map_width); + dataLayers[1]->getOutput().setFrameHeight(image_height); + dataLayers[1]->getOutput().setFrameWidth(image_width); + + // test layer initialize + std::vector parameters; + LayerPtr priorboxLayer; + initTestLayer(configt, &layerMap, ¶meters, &priorboxLayer); + priorboxLayer->forward(PASS_GC); + checkMatrixEqual(priorboxLayer->getOutputValue(), result); +} + +TEST(Layer, priorBoxLayerFwd) { + vector minSize; + vector maxSize; + vector aspectRatio; + vector variance; + bool useGpu = false; + + minSize.push_back(276); + maxSize.push_back(330); + variance.push_back(0.1); + variance.push_back(0.1); + variance.push_back(0.2); + variance.push_back(0.2); + + // CPU case 1. + MatrixPtr result; + real resultData[] = {0.04, + 0.04, + 0.96, + 0.96, + 0.1, + 0.1, + 0.2, + 0.2, + 0, + 0, + 1, + 1, + 0.1, + 0.1, + 0.2, + 0.2}; + result = Matrix::create(1, 2 * 8, false, useGpu); + result->setData(resultData); + doOnePriorBoxTest(/* feature_map_width */ 1, + /* feature_map_height */ 1, + /* image_width */ 300, + /* image_height */ 300, + minSize, + maxSize, + aspectRatio, + variance, + useGpu, + result); + // CPU case 2. + variance[1] = 0.2; + variance[3] = 0.1; + maxSize.pop_back(); + real resultData2[] = {0, 0, 0.595, 0.595, 0.1, 0.2, 0.2, 0.1, + 0.405, 0, 1, 0.595, 0.1, 0.2, 0.2, 0.1, + 0, 0.405, 0.595, 1, 0.1, 0.2, 0.2, 0.1, + 0.405, 0.405, 1, 1, 0.1, 0.2, 0.2, 0.1}; + Matrix::resizeOrCreate(result, 1, 4 * 8, false, useGpu); + result->setData(resultData2); + doOnePriorBoxTest(/* feature_map_width */ 2, + /* feature_map_height */ 2, + /* image_width */ 400, + /* image_height */ 400, + minSize, + maxSize, + aspectRatio, + variance, + useGpu, + result); + // CPU case 3. + aspectRatio.push_back(2); + real resultData3[] = {0.04, 0.04, 0.96, 0.96, 0.1, 0.2, + 0.2, 0.1, 0, 0.17473088, 1, 0.825269, + 0.1, 0.2, 0.2, 0.1, 0.17473088, 0, + 0.825269, 1, 0.1, 0.2, 0.2, 0.1}; + Matrix::resizeOrCreate(result, 1, 3 * 8, false, useGpu); + result->setData(resultData3); + doOnePriorBoxTest(/* feature_map_width */ 1, + /* feature_map_height */ 1, + /* image_width */ 300, + /* image_height */ 300, + minSize, + maxSize, + aspectRatio, + variance, + useGpu, + result); + +#ifndef PADDLE_ONLY_CPU + // reset the input parameters + variance[1] = 0.1; + variance[3] = 0.2; + maxSize.push_back(330); + aspectRatio.pop_back(); + MatrixPtr resultGpu; + useGpu = true; + // GPU case 1. + resultGpu = Matrix::create(1, 2 * 8, false, useGpu); + resultGpu->copyFrom(resultData, 2 * 8); + doOnePriorBoxTest(/* feature_map_width */ 1, + /* feature_map_height */ 1, + /* image_width */ 300, + /* image_height */ 300, + minSize, + maxSize, + aspectRatio, + variance, + useGpu, + resultGpu); + // GPU case 2. + variance[1] = 0.2; + variance[3] = 0.1; + maxSize.pop_back(); + Matrix::resizeOrCreate(resultGpu, 1, 4 * 8, false, useGpu); + resultGpu->copyFrom(resultData2, 4 * 8); + doOnePriorBoxTest(/* feature_map_width */ 2, + /* feature_map_height */ 2, + /* image_width */ 400, + /* image_height */ 400, + minSize, + maxSize, + aspectRatio, + variance, + useGpu, + resultGpu); + // GPU case 3. + aspectRatio.push_back(2); + Matrix::resizeOrCreate(resultGpu, 1, 3 * 8, false, useGpu); + resultGpu->copyFrom(resultData3, 3 * 8); + doOnePriorBoxTest(/* feature_map_width */ 1, + /* feature_map_height */ 1, + /* image_width */ 300, + /* image_height */ 300, + minSize, + maxSize, + aspectRatio, + variance, + useGpu, + resultGpu); +#endif +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + initMain(argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index be4d0041f91cf7d0306d14338b43bb25e052fd58..3a9d339976fff91d79e7459ad5984cf78ea8990a 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -248,6 +248,13 @@ message ImageConfig { optional uint32 img_size_y = 9; } +message PriorBoxConfig { + repeated uint32 min_size = 1; + repeated uint32 max_size = 2; + repeated float aspect_ratio = 3; + repeated float variance = 4; +} + message LayerInputConfig { required string input_layer_name = 1; optional string input_parameter_name = 2; @@ -263,6 +270,7 @@ message LayerInputConfig { optional BilinearInterpConfig bilinear_interp_conf = 10; optional MaxOutConfig maxout_conf = 11; optional SppConfig spp_conf = 12; + optional PriorBoxConfig priorbox_conf = 13; } message LayerConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 29704391f2be6c366bc6408f32ad0306f5ff7fc1..2eb7b17a0b40eb42d98b6df02ae26559ee2d8a7e 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1589,6 +1589,27 @@ class PrintLayer(LayerBase): super(PrintLayer, self).__init__(name, 'print', 0, inputs) +@config_layer('priorbox') +class PriorBoxLayer(LayerBase): + def __init__(self, name, inputs, size, min_size, max_size, aspect_ratio, + variance): + super(PriorBoxLayer, self).__init__(name, 'priorbox', 0, inputs) + config_assert(len(inputs) == 2, 'PriorBoxLayer must have 2 inputs') + input_layer = self.get_input_layer(1) + config_assert( + input_layer.type == 'data', + 'Expecting the second input layer of an priorbox layer to be ' + 'a data layer') + config_assert(input_layer.width > 0, 'The data layer must set width') + config_assert(input_layer.height > 0, 'The data layer must set height') + config_assert(len(variance) == 4, 'The variance must have 4 inputs') + self.config.inputs[0].priorbox_conf.min_size.extend(min_size) + self.config.inputs[0].priorbox_conf.max_size.extend(max_size) + self.config.inputs[0].priorbox_conf.aspect_ratio.extend(aspect_ratio) + self.config.inputs[0].priorbox_conf.variance.extend(variance) + self.config.size = size + + @config_layer('data') class DataLayer(LayerBase): def __init__(self, name, size, height=None, width=None, device=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index da951390c955866f8d9cc83770333ecd3156c49e..9b6e5774bc82dc05e14a2565fa9cce98764adf04 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -106,6 +106,7 @@ __all__ = [ 'maxout_layer', 'out_prod_layer', 'print_layer', + 'priorbox_layer', 'spp_layer', ] @@ -171,6 +172,7 @@ class LayerType(object): SPP_LAYER = "spp" PRINT_LAYER = "print" + PRIORBOX_LAYER = "priorbox" CTC_LAYER = "ctc" WARP_CTC_LAYER = "warp_ctc" @@ -934,6 +936,52 @@ def print_layer(input, name=None): # this layer don't return anything, can not be input of other layer. +@wrap_name_default("priorbox") +def priorbox_layer(input, + image, + aspect_ratio, + variance, + min_size, + max_size=[], + name=None): + """ + Compute the priorbox and set the variance. This layer is necessary for ssd. + + :param name: The Layer Name. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput + :param image: The network input image. + :type image: LayerOutput + :param aspect_ratio: The aspect ratio. + :type aspect_ratio: list + :param variance: The bounding box variance. + :type min_size: The min size of the priorbox width/height. + :param min_size: list + :type max_size: The max size of the priorbox width/height. Could be NULL. + :param max_size: list + :return: LayerOutput + """ + # plus one for ratio 1. + num_filters = (len(aspect_ratio) * 2 + 1 + len(max_size)) * 4 + size = (input.size / input.num_filters) * num_filters * 2 + Layer( + name=name, + type=LayerType.PRIORBOX_LAYER, + inputs=[input.name, image.name], + size=size, + min_size=min_size, + max_size=max_size, + aspect_ratio=aspect_ratio, + variance=variance) + return LayerOutput( + name, + LayerType.PRIORBOX_LAYER, + parents=[input, image], + num_filters=num_filters, + size=size) + + @wrap_name_default("seq_pooling") @wrap_bias_attr_default(has_bias=False) @wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling())