提交 0eac3992 编写于 作者: Y yuan

priorbox layer for ssd

上级 bf473971
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/BaseMatrix.h"
namespace paddle {
class PriorBoxLayer : public Layer {
public:
explicit PriorBoxLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback) {}
int numPriors_;
std::vector<int> minSize_;
std::vector<int> maxSize_;
std::vector<float> aspectRatio_;
std::vector<float> variance_;
MatrixPtr buffer_;
};
bool PriorBoxLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
std::copy(config_.inputs(0).priorbox_conf().min_size().begin(),
config_.inputs(0).priorbox_conf().min_size().end(),
std::back_inserter(minSize_));
std::copy(config_.inputs(0).priorbox_conf().max_size().begin(),
config_.inputs(0).priorbox_conf().max_size().end(),
std::back_inserter(maxSize_));
std::copy(config_.inputs(0).priorbox_conf().aspect_ratio().begin(),
config_.inputs(0).priorbox_conf().aspect_ratio().end(),
std::back_inserter(aspectRatio_));
std::copy(config_.inputs(0).priorbox_conf().variance().begin(),
config_.inputs(0).priorbox_conf().variance().end(),
std::back_inserter(variance_));
// flip
int input_ratio_length = aspectRatio_.size();
for (int index = 0; index < input_ratio_length; index++)
aspectRatio_.push_back(1 / aspectRatio_[index]);
aspectRatio_.push_back(1.);
numPriors_ = aspectRatio_.size();
if (maxSize_.size() > 0)
numPriors_++;
buffer_ = Matrix::create(1, 1, false, false);
return true;
}
void PriorBoxLayer::forward(PassType passType) {
Layer::forward(passType);
auto input = getInput(0);
int layer_width = input.getFrameWidth();
int layer_height = input.getFrameHeight();
MatrixPtr inV1 = getInputValue(1);
int image_width = inV1->getElement(0, 0);
int image_height = inV1->getElement(0, 1);
float step_w = static_cast<float>(image_width) / layer_width;
float step_h = static_cast<float>(image_height) / layer_height;
int dim = layer_height * layer_width * numPriors_ * 4;
reserveOutput(1, dim * 2);
// use a cpu buffer to compute
Matrix::resizeOrCreate(buffer_, 1, dim * 2, false, false);
auto* tmp_ptr = buffer_->getData();
int idx = 0;
for (int h = 0; h < layer_height; ++h) {
for (int w = 0; w < layer_width; ++w) {
float center_x = (w + 0.5) * step_w;
float center_y = (h + 0.5) * step_h;
int min_size = 0;
for (size_t s = 0; s < minSize_.size(); s++) {
// first prior.
min_size = minSize_[s];
int box_width = min_size;
int box_height = min_size;
// xmin, ymin, xmax, ymax.
tmp_ptr[idx++] = (center_x - box_width / 2.) / image_width;
tmp_ptr[idx++] = (center_y - box_height / 2.) / image_height;
tmp_ptr[idx++] = (center_x + box_width / 2.) / image_width;
tmp_ptr[idx++] = (center_y + box_height / 2.) / image_height;
if (maxSize_.size() > 0) {
CHECK_EQ(minSize_.size(), maxSize_.size());
// second prior.
for (size_t s = 0; s < maxSize_.size(); s++) {
int max_size = maxSize_[s];
box_width = box_height = sqrt(min_size * max_size);
tmp_ptr[idx++] = (center_x - box_width / 2.) / image_width;
tmp_ptr[idx++] = (center_y - box_height / 2.) / image_height;
tmp_ptr[idx++] = (center_x + box_width / 2.) / image_width;
tmp_ptr[idx++] = (center_y + box_height / 2.) / image_height;
}
}
}
// rest of priors.
for (size_t r = 0; r < aspectRatio_.size(); r++) {
float ar = aspectRatio_[r];
if (fabs(ar - 1.) < 1e-6)
continue;
float box_width = min_size * sqrt(ar);
float box_height = min_size / sqrt(ar);
tmp_ptr[idx++] = (center_x - box_width / 2.) / image_width;
tmp_ptr[idx++] = (center_y - box_height / 2.) / image_height;
tmp_ptr[idx++] = (center_x + box_width / 2.) / image_width;
tmp_ptr[idx++] = (center_y + box_height / 2.) / image_height;
}
}
}
// clip the prior's coordidate such that it is within [0, 1]
for (int d = 0; d < dim; ++d)
tmp_ptr[d] = std::min(std::max(tmp_ptr[d], (float)0.), (float)1.);
// set the variance.
for (int h = 0; h < layer_height; h++)
for (int w = 0; w < layer_width; w++)
for (int i = 0; i < numPriors_; i++)
for (int j = 0; j < 4; j++)
tmp_ptr[idx++] = variance_[j];
MatrixPtr outV = getOutputValue();
outV->copyFrom(buffer_->data_, dim * 2);
}
REGISTER_LAYER(priorbox, PriorBoxLayer);
} // namespace paddle
...@@ -248,6 +248,15 @@ message ImageConfig { ...@@ -248,6 +248,15 @@ message ImageConfig {
required uint32 img_size_y = 9; required uint32 img_size_y = 9;
} }
message PriorBoxConfig {
repeated uint32 min_size = 1;
repeated uint32 max_size = 2;
repeated float aspect_ratio = 3;
repeated float variance = 4;
optional bool flip = 5 [default = true];
optional bool clip = 6 [default = true];
}
message LayerInputConfig { message LayerInputConfig {
required string input_layer_name = 1; required string input_layer_name = 1;
optional string input_parameter_name = 2; optional string input_parameter_name = 2;
...@@ -263,6 +272,7 @@ message LayerInputConfig { ...@@ -263,6 +272,7 @@ message LayerInputConfig {
optional BilinearInterpConfig bilinear_interp_conf = 10; optional BilinearInterpConfig bilinear_interp_conf = 10;
optional MaxOutConfig maxout_conf = 11; optional MaxOutConfig maxout_conf = 11;
optional SppConfig spp_conf = 12; optional SppConfig spp_conf = 12;
optional PriorBoxConfig priorbox_conf = 13;
} }
message LayerConfig { message LayerConfig {
......
...@@ -1577,6 +1577,19 @@ class PrintLayer(LayerBase): ...@@ -1577,6 +1577,19 @@ class PrintLayer(LayerBase):
def __init__(self, name, inputs): def __init__(self, name, inputs):
super(PrintLayer, self).__init__(name, 'print', 0, inputs) super(PrintLayer, self).__init__(name, 'print', 0, inputs)
@config_layer('priorbox')
class PriorBoxLayer(LayerBase):
def __init__(self, name, inputs, size, min_size, max_size, aspect_ratio, variance):
super(PriorBoxLayer, self).__init__(name, 'priorbox', 0, inputs)
config_assert(len(inputs) == 2, 'PriorBoxLayer must have 2 input')
self.config.inputs[0].priorbox_conf.min_size.extend(min_size)
self.config.inputs[0].priorbox_conf.max_size.extend(max_size)
self.config.inputs[0].priorbox_conf.aspect_ratio.extend(aspect_ratio)
self.config.inputs[0].priorbox_conf.variance.extend(variance)
self.config.size = size
input_layer0 = self.get_input_layer(0)
input_layer1 = self.get_input_layer(1)
@config_layer('data') @config_layer('data')
class DataLayer(LayerBase): class DataLayer(LayerBase):
......
...@@ -106,6 +106,7 @@ __all__ = [ ...@@ -106,6 +106,7 @@ __all__ = [
'maxout_layer', 'maxout_layer',
'out_prod_layer', 'out_prod_layer',
'print_layer', 'print_layer',
'priorbox_layer',
'spp_layer', 'spp_layer',
] ]
...@@ -171,6 +172,7 @@ class LayerType(object): ...@@ -171,6 +172,7 @@ class LayerType(object):
SPP_LAYER = "spp" SPP_LAYER = "spp"
PRINT_LAYER = "print" PRINT_LAYER = "print"
PRIORBOX_LAYER = "priorbox"
CTC_LAYER = "ctc" CTC_LAYER = "ctc"
WARP_CTC_LAYER = "warp_ctc" WARP_CTC_LAYER = "warp_ctc"
...@@ -933,6 +935,40 @@ def print_layer(input, name=None): ...@@ -933,6 +935,40 @@ def print_layer(input, name=None):
inputs=[l.name for l in input], ) inputs=[l.name for l in input], )
# this layer don't return anything, can not be input of other layer. # this layer don't return anything, can not be input of other layer.
@wrap_name_default("priorbox")
def priorbox_layer(input, img_shape, aspect_ratio, variance, min_size, max_size=[], name=None):
"""
Compute the priorbox and set the variance. This layer is necessary for ssd.
:param name: The Layer Name.
:type name: basestring
:param input: The input layer.
:type input: LayerOutput
:param img_shape: The width and height of the network input image.
:type img_shape: LayerOutput
:param aspect_ratio: The aspect ratio.
:type aspect_ratio: list
:param variance: The bounding box variance.
:type min_size: The min size of the priorbox width/height.
:param min_size: list
:type max_size: The max size of the priorbox width/height. Could be NULL.
:param max_size: list
:return: LayerOutput
"""
# plus one for ratio 1.
num_filters = (len(aspect_ratio) * 2 + 1 + len(max_size)) * 4
size=(input.size / input.num_filters) * num_filters * 2
Layer(
name=name,
type=LayerType.PRIORBOX_LAYER,
inputs=[input.name, img_shape.name],
size=size,
min_size=min_size,
max_size=max_size,
aspect_ratio=aspect_ratio,
variance=variance)
return LayerOutput(
name, LayerType.PRIORBOX_LAYER, parents=[input, img_shape], num_filters=num_filters, size=size)
@wrap_name_default("seq_pooling") @wrap_name_default("seq_pooling")
@wrap_bias_attr_default(has_bias=False) @wrap_bias_attr_default(has_bias=False)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册