From 0eac39928090c44fc3b8b4edc18604ff7b662f91 Mon Sep 17 00:00:00 2001
From: yuan <yuan.gao@noplz.name>
Date: Tue, 13 Dec 2016 20:57:59 +0800
Subject: [PATCH] priorbox layer for ssd

---
 paddle/gserver/layers/PriorBox.cpp            | 137 ++++++++++++++++++
 proto/ModelConfig.proto                       |  10 ++
 python/paddle/trainer/config_parser.py        |  13 ++
 .../paddle/trainer_config_helpers/layers.py   |  36 +++++
 4 files changed, 196 insertions(+)
 create mode 100644 paddle/gserver/layers/PriorBox.cpp
diff --git a/paddle/gserver/layers/PriorBox.cpp b/paddle/gserver/layers/PriorBox.cpp
new file mode 100644
index 0000000000..b0d59cd145
--- /dev/null
+++ b/paddle/gserver/layers/PriorBox.cpp
@@ -0,0 +1,137 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "Layer.h"
+#include "paddle/math/Matrix.h"
+#include "paddle/math/BaseMatrix.h"
+
+namespace paddle {
+
+class PriorBoxLayer : public Layer {
+public:
+  explicit PriorBoxLayer(const LayerConfig& config) : Layer(config) {}
+  bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
+  void forward(PassType passType);
+  void backward(const UpdateCallback& callback) {}
+  int numPriors_;
+  std::vector<int> minSize_;
+  std::vector<int> maxSize_;
+  std::vector<float> aspectRatio_;
+  std::vector<float> variance_;
+  MatrixPtr buffer_;
+};
+
+bool PriorBoxLayer::init(const LayerMap& layerMap,
+                       const ParameterMap& parameterMap) {
+  Layer::init(layerMap, parameterMap);
+  std::copy(config_.inputs(0).priorbox_conf().min_size().begin(),
+            config_.inputs(0).priorbox_conf().min_size().end(),
+            std::back_inserter(minSize_));
+  std::copy(config_.inputs(0).priorbox_conf().max_size().begin(),
+            config_.inputs(0).priorbox_conf().max_size().end(),
+            std::back_inserter(maxSize_));
+  std::copy(config_.inputs(0).priorbox_conf().aspect_ratio().begin(),
+            config_.inputs(0).priorbox_conf().aspect_ratio().end(),
+            std::back_inserter(aspectRatio_));
+  std::copy(config_.inputs(0).priorbox_conf().variance().begin(),
+            config_.inputs(0).priorbox_conf().variance().end(),
+            std::back_inserter(variance_));
+  // flip
+  int input_ratio_length = aspectRatio_.size();
+  for (int index = 0; index < input_ratio_length; index++)
+      aspectRatio_.push_back(1 / aspectRatio_[index]);
+  aspectRatio_.push_back(1.);
+  numPriors_ = aspectRatio_.size();
+  if (maxSize_.size() > 0)
+      numPriors_++;
+  buffer_ = Matrix::create(1, 1, false, false);
+  return true;
+}
+
+void PriorBoxLayer::forward(PassType passType) {
+  Layer::forward(passType);
+  auto input = getInput(0);
+  int layer_width = input.getFrameWidth();
+  int layer_height = input.getFrameHeight();
+
+  MatrixPtr inV1 = getInputValue(1);
+  int image_width = inV1->getElement(0, 0);
+  int image_height = inV1->getElement(0, 1);
+  float step_w = static_cast<float>(image_width) / layer_width;
+  float step_h = static_cast<float>(image_height) / layer_height;
+  int dim = layer_height * layer_width * numPriors_ * 4;
+  reserveOutput(1, dim * 2);
+  // use a cpu buffer to compute
+  Matrix::resizeOrCreate(buffer_, 1, dim * 2, false, false);
+  auto* tmp_ptr = buffer_->getData();
+
+  int idx = 0;
+  for (int h = 0; h < layer_height; ++h) {
+    for (int w = 0; w < layer_width; ++w) {
+      float center_x = (w + 0.5)  * step_w;
+      float center_y = (h + 0.5) * step_h;
+      int min_size = 0;
+      for (size_t s = 0; s < minSize_.size(); s++) {
+        // first prior.
+        min_size = minSize_[s];
+        int box_width = min_size;
+        int box_height = min_size;
+        // xmin, ymin, xmax, ymax.
+        tmp_ptr[idx++] = (center_x - box_width / 2.) / image_width;
+        tmp_ptr[idx++] = (center_y - box_height / 2.) / image_height;
+        tmp_ptr[idx++] = (center_x + box_width / 2.) / image_width;
+        tmp_ptr[idx++] = (center_y + box_height / 2.) / image_height;
+
+        if (maxSize_.size() > 0) {
+          CHECK_EQ(minSize_.size(), maxSize_.size());
+          // second prior.
+          for (size_t s = 0; s < maxSize_.size(); s++) {
+            int max_size = maxSize_[s];
+            box_width = box_height = sqrt(min_size * max_size);
+            tmp_ptr[idx++] = (center_x - box_width / 2.) / image_width;
+            tmp_ptr[idx++] = (center_y - box_height / 2.) / image_height;
+            tmp_ptr[idx++] = (center_x + box_width / 2.) / image_width;
+            tmp_ptr[idx++] = (center_y + box_height / 2.) / image_height;
+          }
+        }
+      }
+      // rest of priors.
+      for (size_t r = 0; r < aspectRatio_.size(); r++) {
+        float ar = aspectRatio_[r];
+        if (fabs(ar - 1.) < 1e-6)
+          continue;
+        float box_width = min_size * sqrt(ar);
+        float box_height = min_size / sqrt(ar);
+        tmp_ptr[idx++] = (center_x - box_width / 2.) / image_width;
+        tmp_ptr[idx++] = (center_y - box_height / 2.) / image_height;
+        tmp_ptr[idx++] = (center_x + box_width / 2.) / image_width;
+        tmp_ptr[idx++] = (center_y + box_height / 2.) / image_height;
+      }
+    }
+  }
+  // clip the prior's coordidate such that it is within [0, 1]
+  for (int d = 0; d < dim; ++d)
+    tmp_ptr[d] = std::min(std::max(tmp_ptr[d], (float)0.), (float)1.);
+  // set the variance.
+  for (int h = 0; h < layer_height; h++)
+    for (int w = 0; w < layer_width; w++)
+      for (int i = 0; i < numPriors_; i++)
+        for (int j = 0; j < 4; j++)
+          tmp_ptr[idx++] = variance_[j];
+  MatrixPtr outV = getOutputValue();
+  outV->copyFrom(buffer_->data_, dim * 2);
+}
+REGISTER_LAYER(priorbox, PriorBoxLayer);
+
+}  // namespace paddle
diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index b34e1ebded..460a39275f 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -248,6 +248,15 @@ message ImageConfig {
   required uint32 img_size_y = 9;
 }
 
+message PriorBoxConfig {
+  repeated uint32 min_size = 1;
+  repeated uint32 max_size = 2;
+  repeated float aspect_ratio = 3;
+  repeated float variance = 4;
+  optional bool flip = 5 [default = true];
+  optional bool clip = 6 [default = true];
+}
+
 message LayerInputConfig {
   required string input_layer_name = 1;
   optional string input_parameter_name = 2;
@@ -263,6 +272,7 @@ message LayerInputConfig {
   optional BilinearInterpConfig bilinear_interp_conf = 10;
   optional MaxOutConfig maxout_conf = 11;
   optional SppConfig spp_conf = 12;
+  optional PriorBoxConfig priorbox_conf = 13;
 }
 
 message LayerConfig {
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 5b7f4d85e2..5de524e507 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -1577,6 +1577,19 @@ class PrintLayer(LayerBase):
     def __init__(self, name, inputs):
         super(PrintLayer, self).__init__(name, 'print', 0, inputs)
 
+@config_layer('priorbox')
+class PriorBoxLayer(LayerBase):
+    def __init__(self, name, inputs, size, min_size, max_size, aspect_ratio, variance):
+        super(PriorBoxLayer, self).__init__(name, 'priorbox', 0, inputs)
+        config_assert(len(inputs) == 2, 'PriorBoxLayer must have 2 input')
+        self.config.inputs[0].priorbox_conf.min_size.extend(min_size)
+        self.config.inputs[0].priorbox_conf.max_size.extend(max_size)
+        self.config.inputs[0].priorbox_conf.aspect_ratio.extend(aspect_ratio)
+        self.config.inputs[0].priorbox_conf.variance.extend(variance)
+        self.config.size = size
+        input_layer0 = self.get_input_layer(0)
+        input_layer1 = self.get_input_layer(1)
+
 
 @config_layer('data')
 class DataLayer(LayerBase):
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 8dd6b7b7d2..f04b5646aa 100644
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -106,6 +106,7 @@ __all__ = [
     'maxout_layer',
     'out_prod_layer',
     'print_layer',
+    'priorbox_layer',
     'spp_layer',
 ]
 
@@ -171,6 +172,7 @@ class LayerType(object):
     SPP_LAYER = "spp"
 
     PRINT_LAYER = "print"
+    PRIORBOX_LAYER = "priorbox"
 
     CTC_LAYER = "ctc"
     WARP_CTC_LAYER = "warp_ctc"
@@ -933,6 +935,40 @@ def print_layer(input, name=None):
         inputs=[l.name for l in input], )
     # this layer don't return anything, can not be input of other layer.
 
+@wrap_name_default("priorbox")
+def priorbox_layer(input, img_shape, aspect_ratio, variance, min_size, max_size=[], name=None):
+    """
+    Compute the priorbox and set the variance. This layer is necessary for ssd.
+
+    :param name: The Layer Name.
+    :type name: basestring
+    :param input: The input layer.
+    :type input: LayerOutput
+    :param img_shape: The width and height of the network input image.
+    :type img_shape: LayerOutput
+    :param aspect_ratio: The aspect ratio.
+    :type aspect_ratio: list
+    :param variance: The bounding box variance.
+    :type min_size: The min size of the priorbox width/height.
+    :param min_size: list
+    :type max_size: The max size of the priorbox width/height. Could be NULL.
+    :param max_size: list
+    :return: LayerOutput
+    """
+    # plus one for ratio 1.
+    num_filters = (len(aspect_ratio) * 2 + 1 + len(max_size)) * 4
+    size=(input.size / input.num_filters) * num_filters * 2
+    Layer(
+        name=name,
+        type=LayerType.PRIORBOX_LAYER,
+        inputs=[input.name, img_shape.name],
+        size=size,
+        min_size=min_size,
+        max_size=max_size,
+        aspect_ratio=aspect_ratio,
+        variance=variance)
+    return LayerOutput(
+        name, LayerType.PRIORBOX_LAYER, parents=[input, img_shape], num_filters=num_filters, size=size)
 
 @wrap_name_default("seq_pooling")
 @wrap_bias_attr_default(has_bias=False)
-- 
GitLab