提交 5961b52b 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #1653 from Noplz/normalize-layer

CrossChannelNorm Layer for SSD
...@@ -109,6 +109,12 @@ sum_to_one_norm ...@@ -109,6 +109,12 @@ sum_to_one_norm
:members: sum_to_one_norm :members: sum_to_one_norm
:noindex: :noindex:
cross_channel_norm
------------------
.. automodule:: paddle.v2.layer
:members: cross_channel_norm
:noindex:
Recurrent Layers Recurrent Layers
================ ================
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
#include "NormLayer.h"
#include "paddle/math/BaseMatrix.h"
#include "paddle/math/Matrix.h"
namespace paddle {
MatrixPtr CrossChannelNormLayer::createSampleMatrix(MatrixPtr data,
size_t iter,
size_t spatialDim) {
return Matrix::create(data->getData() + iter * channels_ * spatialDim,
channels_,
spatialDim,
false,
useGpu_);
}
MatrixPtr CrossChannelNormLayer::createSpatialMatrix(MatrixPtr data,
size_t iter,
size_t spatialDim) {
return Matrix::create(
data->getData() + iter * spatialDim, 1, spatialDim, false, useGpu_);
}
void CrossChannelNormLayer::forward(PassType passType) {
Layer::forward(passType);
MatrixPtr inV = getInputValue(0);
size_t batchSize = inV->getHeight();
size_t dataDim = inV->getWidth();
CHECK_EQ(getSize(), dataDim);
reserveOutput(batchSize, dataDim);
MatrixPtr outV = getOutputValue();
size_t spatialDim = dataDim / channels_;
Matrix::resizeOrCreate(dataBuffer_, batchSize, dataDim, false, useGpu_);
Matrix::resizeOrCreate(spatialBuffer_, 1, spatialDim, false, useGpu_);
Matrix::resizeOrCreate(normBuffer_, batchSize, spatialDim, false, useGpu_);
normBuffer_->zeroMem();
// add eps to avoid overflow
normBuffer_->addScalar(*normBuffer_, 1e-6);
inV->square2(*dataBuffer_);
for (size_t i = 0; i < batchSize; i++) {
const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim);
const MatrixPtr dataTmp = createSampleMatrix(dataBuffer_, i, spatialDim);
MatrixPtr outVTmp = createSampleMatrix(outV, i, spatialDim);
MatrixPtr normTmp = createSpatialMatrix(normBuffer_, i, spatialDim);
// compute norm.
spatialBuffer_->sumCols(*dataTmp, 1, 0);
spatialBuffer_->sqrt2(*spatialBuffer_);
normTmp->copyFrom(*spatialBuffer_);
outVTmp->copyFrom(*inVTmp);
outVTmp->divRowVector(*spatialBuffer_);
// scale the layer.
outVTmp->mulColVector(*scale_->getW());
}
}
void CrossChannelNormLayer::backward(const UpdateCallback& callback) {
MatrixPtr inG = getInputGrad(0);
MatrixPtr inV = getInputValue(0);
MatrixPtr outG = getOutputGrad();
MatrixPtr outV = getOutputValue();
size_t batchSize = inG->getHeight();
size_t dataDim = inG->getWidth();
size_t spatialDim = dataDim / channels_;
dataBuffer_->dotMul(*outG, *outV);
Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_);
Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_);
Matrix::resizeOrCreate(sampleBuffer_, channels_, spatialDim, false, useGpu_);
scaleDiff_->zeroMem();
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outGTmp = createSampleMatrix(outG, i, spatialDim);
const MatrixPtr dataTmp = createSampleMatrix(dataBuffer_, i, spatialDim);
const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim);
const MatrixPtr inGTmp = createSampleMatrix(inG, i, spatialDim);
const MatrixPtr normTmp = createSpatialMatrix(normBuffer_, i, spatialDim);
channelBuffer_->sumRows(*dataTmp, 1, 0);
channelBuffer_->dotDiv(*channelBuffer_, *(scale_->getW()));
// store a / scale[i] in scaleDiff_ temporary
scaleDiff_->add(*channelBuffer_, 1.);
sampleBuffer_->dotMul(*inVTmp, *outGTmp);
spatialBuffer_->sumCols(*sampleBuffer_, 1., 1.);
// scale the grad
inGTmp->copyFrom(*inVTmp);
inGTmp->mulRowVector(*spatialBuffer_);
// divide by square of norm
spatialBuffer_->dotMul(*normTmp, *normTmp);
inGTmp->divRowVector(*spatialBuffer_);
// subtract
inGTmp->add(*outGTmp, -1, 1);
// divide by norm
inGTmp->divRowVector(*normTmp);
// scale the diff
inGTmp->mulColVector(*scale_->getW());
}
// updata scale
if (scale_->getWGrad()) scale_->getWGrad()->copyFrom(*scaleDiff_);
scale_->getParameterPtr()->incUpdate(callback);
}
} // namespace paddle
...@@ -26,6 +26,8 @@ Layer* NormLayer::create(const LayerConfig& config) { ...@@ -26,6 +26,8 @@ Layer* NormLayer::create(const LayerConfig& config) {
return new ResponseNormLayer(config); return new ResponseNormLayer(config);
} else if (norm == "cmrnorm-projection") { } else if (norm == "cmrnorm-projection") {
return new CMRProjectionNormLayer(config); return new CMRProjectionNormLayer(config);
} else if (norm == "cross-channel-norm") {
return new CrossChannelNormLayer(config);
} else { } else {
LOG(FATAL) << "Unknown norm type: " << norm; LOG(FATAL) << "Unknown norm type: " << norm;
return nullptr; return nullptr;
...@@ -54,4 +56,14 @@ bool ResponseNormLayer::init(const LayerMap& layerMap, ...@@ -54,4 +56,14 @@ bool ResponseNormLayer::init(const LayerMap& layerMap,
return true; return true;
} }
bool CrossChannelNormLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK(parameters_[0]);
const NormConfig& conf = config_.inputs(0).norm_conf();
channels_ = conf.channels();
scale_.reset(new Weight(channels_, 1, parameters_[0]));
return true;
}
} // namespace paddle } // namespace paddle
...@@ -65,4 +65,35 @@ public: ...@@ -65,4 +65,35 @@ public:
} }
}; };
/**
* This layer applys normalization across the channels of each sample to a
* conv layer's output, and scales the output by a group of trainable factors
* whose dimensions equal to the number of channels.
* - Input: One and only one input layer are accepted.
* - Output: The normalized data of the input data.
* Reference:
* Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
* Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector
*/
class CrossChannelNormLayer : public NormLayer {
public:
explicit CrossChannelNormLayer(const LayerConfig& config)
: NormLayer(config) {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback);
MatrixPtr createSampleMatrix(MatrixPtr data, size_t iter, size_t spatialDim);
MatrixPtr createSpatialMatrix(MatrixPtr data, size_t iter, size_t spatialDim);
protected:
size_t channels_;
std::unique_ptr<Weight> scale_;
MatrixPtr scaleDiff_;
MatrixPtr normBuffer_;
MatrixPtr dataBuffer_;
MatrixPtr channelBuffer_;
MatrixPtr spatialBuffer_;
MatrixPtr sampleBuffer_;
};
} // namespace paddle } // namespace paddle
...@@ -20,7 +20,7 @@ namespace paddle { ...@@ -20,7 +20,7 @@ namespace paddle {
/** /**
* @brief A layer for generating priorbox locations and variances. * @brief A layer for generating priorbox locations and variances.
* - Input: Two and only two input layer are accepted. The input layer must be * - Input: Two and only two input layer are accepted. The input layer must be
* be a data output layer and a convolution output layer. * be a data output layer and a convolution output layer.
* - Output: The priorbox locations and variances of the input data. * - Output: The priorbox locations and variances of the input data.
* Reference: * Reference:
* Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, * Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
...@@ -45,27 +45,32 @@ protected: ...@@ -45,27 +45,32 @@ protected:
MatrixPtr buffer_; MatrixPtr buffer_;
}; };
REGISTER_LAYER(priorbox, PriorBoxLayer);
bool PriorBoxLayer::init(const LayerMap& layerMap, bool PriorBoxLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) { const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap); Layer::init(layerMap, parameterMap);
auto pbConf = config_.inputs(0).priorbox_conf(); auto pbConf = config_.inputs(0).priorbox_conf();
std::vector<real> tmp;
aspectRatio_.push_back(1.);
std::copy(pbConf.min_size().begin(), std::copy(pbConf.min_size().begin(),
pbConf.min_size().end(), pbConf.min_size().end(),
std::back_inserter(minSize_)); std::back_inserter(minSize_));
std::copy(pbConf.max_size().begin(), std::copy(pbConf.max_size().begin(),
pbConf.max_size().end(), pbConf.max_size().end(),
std::back_inserter(maxSize_)); std::back_inserter(maxSize_));
std::copy(pbConf.aspect_ratio().begin(),
pbConf.aspect_ratio().end(),
std::back_inserter(aspectRatio_));
std::copy(pbConf.variance().begin(), std::copy(pbConf.variance().begin(),
pbConf.variance().end(), pbConf.variance().end(),
std::back_inserter(variance_)); std::back_inserter(variance_));
std::copy(pbConf.aspect_ratio().begin(),
pbConf.aspect_ratio().end(),
std::back_inserter(tmp));
// flip // flip
int inputRatioLength = aspectRatio_.size(); int inputRatioLength = tmp.size();
for (int index = 0; index < inputRatioLength; index++) for (int index = 0; index < inputRatioLength; index++) {
aspectRatio_.push_back(1 / aspectRatio_[index]); aspectRatio_.push_back(tmp[index]);
aspectRatio_.push_back(1.); aspectRatio_.push_back(1 / tmp[index]);
}
numPriors_ = aspectRatio_.size(); numPriors_ = aspectRatio_.size();
if (maxSize_.size() > 0) numPriors_++; if (maxSize_.size() > 0) numPriors_++;
return true; return true;
...@@ -94,12 +99,12 @@ void PriorBoxLayer::forward(PassType passType) { ...@@ -94,12 +99,12 @@ void PriorBoxLayer::forward(PassType passType) {
for (int w = 0; w < layerWidth; ++w) { for (int w = 0; w < layerWidth; ++w) {
real centerX = (w + 0.5) * stepW; real centerX = (w + 0.5) * stepW;
real centerY = (h + 0.5) * stepH; real centerY = (h + 0.5) * stepH;
int minSize = 0; real minSize = 0;
for (size_t s = 0; s < minSize_.size(); s++) { for (size_t s = 0; s < minSize_.size(); s++) {
// first prior. // first prior.
minSize = minSize_[s]; minSize = minSize_[s];
int boxWidth = minSize; real boxWidth = minSize;
int boxHeight = minSize; real boxHeight = minSize;
// xmin, ymin, xmax, ymax. // xmin, ymin, xmax, ymax.
tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
...@@ -112,7 +117,7 @@ void PriorBoxLayer::forward(PassType passType) { ...@@ -112,7 +117,7 @@ void PriorBoxLayer::forward(PassType passType) {
CHECK_EQ(minSize_.size(), maxSize_.size()); CHECK_EQ(minSize_.size(), maxSize_.size());
// second prior. // second prior.
for (size_t s = 0; s < maxSize_.size(); s++) { for (size_t s = 0; s < maxSize_.size(); s++) {
int maxSize = maxSize_[s]; real maxSize = maxSize_[s];
boxWidth = boxHeight = sqrt(minSize * maxSize); boxWidth = boxHeight = sqrt(minSize * maxSize);
tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth; tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight; tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
...@@ -145,6 +150,5 @@ void PriorBoxLayer::forward(PassType passType) { ...@@ -145,6 +150,5 @@ void PriorBoxLayer::forward(PassType passType) {
MatrixPtr outV = getOutputValue(); MatrixPtr outV = getOutputValue();
outV->copyFrom(buffer_->data_, dim * 2); outV->copyFrom(buffer_->data_, dim * 2);
} }
REGISTER_LAYER(priorbox, PriorBoxLayer);
} // namespace paddle } // namespace paddle
...@@ -1642,6 +1642,25 @@ TEST(Layer, PadLayer) { ...@@ -1642,6 +1642,25 @@ TEST(Layer, PadLayer) {
} }
} }
TEST(Layer, CrossChannelNormLayer) {
TestConfig config;
config.layerConfig.set_type("norm");
config.layerConfig.set_size(100);
LayerInputConfig* input = config.layerConfig.add_inputs();
NormConfig* norm = input->mutable_norm_conf();
norm->set_norm_type("cross-channel-norm");
norm->set_channels(10);
norm->set_size(100);
norm->set_scale(0);
norm->set_pow(0);
norm->set_blocked(0);
config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10});
for (auto useGpu : {false, true}) {
testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false, 5);
}
}
TEST(Layer, smooth_l1) { TEST(Layer, smooth_l1) {
TestConfig config; TestConfig config;
config.layerConfig.set_type("smooth_l1"); config.layerConfig.set_type("smooth_l1");
......
...@@ -1453,6 +1453,24 @@ void BaseMatrixT<T>::divRowVector(BaseMatrixT& b) { ...@@ -1453,6 +1453,24 @@ void BaseMatrixT<T>::divRowVector(BaseMatrixT& b) {
true_type() /* bAsRowVector */, false_type()); true_type() /* bAsRowVector */, false_type());
} }
template<class T>
void BaseMatrixT<T>::mulColVector(BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0);
int numRows = height_;
int numCols = width_;
applyBinary(binary::DotMul<T>(), b, numRows, numCols, offset,
false_type(), true_type() /* bAsColVector */);
}
template<class T>
void BaseMatrixT<T>::divColVector(BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0);
int numRows = height_;
int numCols = width_;
applyBinary(binary::DotDiv<T>(), b, numRows, numCols, offset,
false_type(), true_type() /* bAsColVector */);
}
template<> template<>
template <class Agg> template <class Agg>
int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) { int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) {
......
...@@ -545,6 +545,9 @@ public: ...@@ -545,6 +545,9 @@ public:
void mulRowVector(BaseMatrixT& b); void mulRowVector(BaseMatrixT& b);
void divRowVector(BaseMatrixT& b); void divRowVector(BaseMatrixT& b);
void mulColVector(BaseMatrixT& b);
void divColVector(BaseMatrixT& b);
void addP2P(BaseMatrixT& b); void addP2P(BaseMatrixT& b);
/** /**
......
...@@ -110,6 +110,8 @@ TEST(BaseMatrix, BaseMatrix) { ...@@ -110,6 +110,8 @@ TEST(BaseMatrix, BaseMatrix) {
compare(&BaseMatrix::addRowVector); compare(&BaseMatrix::addRowVector);
compare(&BaseMatrix::mulRowVector); compare(&BaseMatrix::mulRowVector);
compare(&BaseMatrix::divRowVector); compare(&BaseMatrix::divRowVector);
compare(&BaseMatrix::mulColVector);
compare(&BaseMatrix::divColVector);
compare(&BaseMatrix::addP2P); compare(&BaseMatrix::addP2P);
compare(&BaseMatrix::invSqrt); compare(&BaseMatrix::invSqrt);
} }
......
...@@ -1220,9 +1220,11 @@ def parse_image(image, input_layer_name, image_conf): ...@@ -1220,9 +1220,11 @@ def parse_image(image, input_layer_name, image_conf):
def parse_norm(norm, input_layer_name, norm_conf): def parse_norm(norm, input_layer_name, norm_conf):
norm_conf.norm_type = norm.norm_type norm_conf.norm_type = norm.norm_type
config_assert(norm.norm_type in ['rnorm', 'cmrnorm-projection'], config_assert(
"norm-type %s is not in [rnorm, 'cmrnorm-projection']" % norm.norm_type in
norm.norm_type) ['rnorm', 'cmrnorm-projection', 'cross-channel-norm'],
"norm-type %s is not in [rnorm, cmrnorm-projection, cross-channel-norm]"
% norm.norm_type)
norm_conf.channels = norm.channels norm_conf.channels = norm.channels
norm_conf.size = norm.size norm_conf.size = norm.size
norm_conf.scale = norm.scale norm_conf.scale = norm.scale
...@@ -1898,6 +1900,9 @@ class NormLayer(LayerBase): ...@@ -1898,6 +1900,9 @@ class NormLayer(LayerBase):
norm_conf) norm_conf)
self.set_cnn_layer(name, norm_conf.output_y, norm_conf.output_x, self.set_cnn_layer(name, norm_conf.output_y, norm_conf.output_x,
norm_conf.channels, False) norm_conf.channels, False)
if norm_conf.norm_type == "cross-channel-norm":
self.create_input_parameter(0, norm_conf.channels,
[norm_conf.channels, 1])
@config_layer('pool') @config_layer('pool')
......
...@@ -112,6 +112,7 @@ __all__ = [ ...@@ -112,6 +112,7 @@ __all__ = [
'out_prod_layer', 'out_prod_layer',
'print_layer', 'print_layer',
'priorbox_layer', 'priorbox_layer',
'cross_channel_norm_layer',
'spp_layer', 'spp_layer',
'pad_layer', 'pad_layer',
'eos_layer', 'eos_layer',
...@@ -1008,6 +1009,46 @@ def priorbox_layer(input, ...@@ -1008,6 +1009,46 @@ def priorbox_layer(input,
size=size) size=size)
@wrap_name_default("cross_channel_norm")
def cross_channel_norm_layer(input, name=None, param_attr=None):
"""
Normalize a layer's output. This layer is necessary for ssd.
This layer applys normalize across the channels of each sample to
a conv layer's output and scale the output by a group of trainable
factors which dimensions equal to the channel's number.
:param name: The Layer Name.
:type name: basestring
:param input: The input layer.
:type input: LayerOutput
:param param_attr: The Parameter Attribute|list.
:type param_attr: ParameterAttribute
:return: LayerOutput
"""
assert input.num_filters is not None
Layer(
name=name,
type=LayerType.NORM_LAYER,
inputs=[
Input(
input.name,
norm=Norm(
norm_type="cross-channel-norm",
channels=input.num_filters,
size=input.size,
scale=0,
pow=0,
blocked=0),
**param_attr.attr)
])
return LayerOutput(
name,
LayerType.NORM_LAYER,
parents=input,
num_filters=input.num_filters,
size=input.size)
@wrap_name_default("seq_pooling") @wrap_name_default("seq_pooling")
@wrap_bias_attr_default(has_bias=False) @wrap_bias_attr_default(has_bias=False)
@wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling()) @wrap_param_default(['pooling_type'], default_factory=lambda _: MaxPooling())
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册