提交 eb43d93a 编写于 作者: G gaoyuan

Change Normalize layer to CrossChannelNorm layer

上级 eea0097d
...@@ -13,53 +13,13 @@ See the License for the specific language governing permissions and ...@@ -13,53 +13,13 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "Layer.h" #include "Layer.h"
#include "NormLayer.h"
#include "paddle/math/BaseMatrix.h" #include "paddle/math/BaseMatrix.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
namespace paddle { namespace paddle {
/**
* This layer applys normalize across the channels of each sample to a
* conv layer's output and scale the output by a group of trainable factors
* which dimensions equal to the channel's number.
* - Input: One and only one input layer are accepted. The input layer must be
* be a data output layer.
* - Output: The normalized data of the input data.
* Reference:
* Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
* Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector
*/
class NormalizeLayer : public Layer { void CrossChannelNormLayer::forward(PassType passType) {
public:
explicit NormalizeLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback);
protected:
size_t channels_;
std::unique_ptr<Weight> scale_;
MatrixPtr scaleDiff_;
MatrixPtr normBuffer_;
MatrixPtr dataBuffer_;
MatrixPtr channelBuffer_;
MatrixPtr spatialBuffer_;
MatrixPtr sampleBuffer_;
};
REGISTER_LAYER(normalize, NormalizeLayer);
bool NormalizeLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK(parameters_[0]);
channels_ = config_.num_filters();
scale_.reset(new Weight(channels_, 1, parameters_[0]));
return true;
}
void NormalizeLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
auto in = getInput(0); auto in = getInput(0);
MatrixPtr inV = getInputValue(0); MatrixPtr inV = getInputValue(0);
...@@ -74,16 +34,12 @@ void NormalizeLayer::forward(PassType passType) { ...@@ -74,16 +34,12 @@ void NormalizeLayer::forward(PassType passType) {
Matrix::resizeOrCreate(dataBuffer_, batchSize, dataDim, false, useGpu_); Matrix::resizeOrCreate(dataBuffer_, batchSize, dataDim, false, useGpu_);
Matrix::resizeOrCreate(spatialBuffer_, 1, spatialDim, false, useGpu_); Matrix::resizeOrCreate(spatialBuffer_, 1, spatialDim, false, useGpu_);
Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_);
Matrix::resizeOrCreate(sampleBuffer_, channels_, spatialDim, false, useGpu_);
Matrix::resizeOrCreate(normBuffer_, batchSize, spatialDim, false, useGpu_); Matrix::resizeOrCreate(normBuffer_, batchSize, spatialDim, false, useGpu_);
normBuffer_->zeroMem(); normBuffer_->zeroMem();
spatialBuffer_->zeroMem(); spatialBuffer_->zeroMem();
sampleBuffer_->zeroMem();
dataBuffer_->zeroMem(); dataBuffer_->zeroMem();
// add eps to avoid overflow // add eps to avoid overflow
normBuffer_->addScalar(*normBuffer_, 1e-6); normBuffer_->addScalar(*normBuffer_, 1e-6);
channelBuffer_->resetOne();
inV->square2(*dataBuffer_); inV->square2(*dataBuffer_);
for (size_t i = 0; i < batchSize; i++) { for (size_t i = 0; i < batchSize; i++) {
spatialBuffer_->zeroMem(); spatialBuffer_->zeroMem();
...@@ -102,18 +58,14 @@ void NormalizeLayer::forward(PassType passType) { ...@@ -102,18 +58,14 @@ void NormalizeLayer::forward(PassType passType) {
spatialBuffer_->sumCols(*dataTmp, 1, 1); spatialBuffer_->sumCols(*dataTmp, 1, 1);
spatialBuffer_->sqrt2(*spatialBuffer_); spatialBuffer_->sqrt2(*spatialBuffer_);
normTmp->copyFrom(*spatialBuffer_); normTmp->copyFrom(*spatialBuffer_);
sampleBuffer_->mul(*channelBuffer_, *spatialBuffer_, 1., 0.); outTmp->copyFrom(*inTmp);
sampleBuffer_->dotDiv(*inTmp, *sampleBuffer_); outTmp->divRowVector(*spatialBuffer_);
outTmp->copyFrom(*sampleBuffer_);
// scale the layer. // scale the layer.
spatialBuffer_->resetOne(); outTmp->mulColVector(*scale_->getW());
sampleBuffer_->mul(*scale_->getW(), *spatialBuffer_, 1., 0.);
outTmp->dotMul(*outTmp, *sampleBuffer_);
} }
} }
void NormalizeLayer::backward(const UpdateCallback& callback) { void CrossChannelNormLayer::backward(const UpdateCallback& callback) {
MatrixPtr inG = getInputGrad(0); MatrixPtr inG = getInputGrad(0);
MatrixPtr inV = getInputValue(0); MatrixPtr inV = getInputValue(0);
MatrixPtr outG = getOutputGrad(); MatrixPtr outG = getOutputGrad();
...@@ -124,9 +76,10 @@ void NormalizeLayer::backward(const UpdateCallback& callback) { ...@@ -124,9 +76,10 @@ void NormalizeLayer::backward(const UpdateCallback& callback) {
size_t dataDim = inG->getWidth(); size_t dataDim = inG->getWidth();
size_t spatialDim = dataDim / channels_; size_t spatialDim = dataDim / channels_;
bool syncFlag = hl_get_sync_flag();
dataBuffer_->dotMul(*outG, *outV); dataBuffer_->dotMul(*outG, *outV);
Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_); Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_);
Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_);
Matrix::resizeOrCreate(sampleBuffer_, channels_, spatialDim, false, useGpu_);
scaleDiff_->zeroMem(); scaleDiff_->zeroMem();
for (size_t i = 0; i < batchSize; i++) { for (size_t i = 0; i < batchSize; i++) {
spatialBuffer_->zeroMem(); spatialBuffer_->zeroMem();
...@@ -154,28 +107,20 @@ void NormalizeLayer::backward(const UpdateCallback& callback) { ...@@ -154,28 +107,20 @@ void NormalizeLayer::backward(const UpdateCallback& callback) {
sampleBuffer_->dotMul(*inValueTmp, *outGradTmp); sampleBuffer_->dotMul(*inValueTmp, *outGradTmp);
spatialBuffer_->sumCols(*sampleBuffer_, 1., 1.); spatialBuffer_->sumCols(*sampleBuffer_, 1., 1.);
// scale the grad // scale the grad
channelBuffer_->resetOne(); inGradTmp->copyFrom(*inValueTmp);
sampleBuffer_->mul(*channelBuffer_, *spatialBuffer_, 1., 0.); inGradTmp->mulRowVector(*spatialBuffer_);
inGradTmp->dotMul(*inValueTmp, *sampleBuffer_);
// divide by square of norm // divide by square of norm
spatialBuffer_->dotMul(*normTmp, *normTmp); spatialBuffer_->dotMul(*normTmp, *normTmp);
sampleBuffer_->mul(*channelBuffer_, *spatialBuffer_, 1., 0.); inGradTmp->divRowVector(*spatialBuffer_);
inGradTmp->dotDiv(*inGradTmp, *sampleBuffer_);
// subtract // subtract
inGradTmp->add(*outGradTmp, -1, 1); inGradTmp->add(*outGradTmp, -1, 1);
// divide by norm // divide by norm
sampleBuffer_->mul(*channelBuffer_, *normTmp, 1., 0.); inGradTmp->divRowVector(*normTmp);
inGradTmp->dotDiv(*inGradTmp, *sampleBuffer_);
// scale the diff // scale the diff
spatialBuffer_->resetOne(); inGradTmp->mulColVector(*scale_->getW());
sampleBuffer_->mul(*scale_->getW(), *spatialBuffer_, 1., 0.);
inGradTmp->dotMul(*inGradTmp, *sampleBuffer_);
} }
// updata scale // updata scale
if (scale_->getWGrad()) scale_->getWGrad()->copyFrom(*scaleDiff_); if (scale_->getWGrad()) scale_->getWGrad()->copyFrom(*scaleDiff_);
hl_set_sync_flag(false);
hl_set_sync_flag(syncFlag);
scale_->getParameterPtr()->incUpdate(callback); scale_->getParameterPtr()->incUpdate(callback);
} }
......
...@@ -26,6 +26,8 @@ Layer* NormLayer::create(const LayerConfig& config) { ...@@ -26,6 +26,8 @@ Layer* NormLayer::create(const LayerConfig& config) {
return new ResponseNormLayer(config); return new ResponseNormLayer(config);
} else if (norm == "cmrnorm-projection") { } else if (norm == "cmrnorm-projection") {
return new CMRProjectionNormLayer(config); return new CMRProjectionNormLayer(config);
} else if (norm == "cross-channel-norm") {
return new CrossChannelNormLayer(config);
} else { } else {
LOG(FATAL) << "Unknown norm type: " << norm; LOG(FATAL) << "Unknown norm type: " << norm;
return nullptr; return nullptr;
...@@ -54,4 +56,14 @@ bool ResponseNormLayer::init(const LayerMap& layerMap, ...@@ -54,4 +56,14 @@ bool ResponseNormLayer::init(const LayerMap& layerMap,
return true; return true;
} }
bool CrossChannelNormLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK(parameters_[0]);
const NormConfig& conf = config_.inputs(0).norm_conf();
channels_ = conf.channels();
scale_.reset(new Weight(channels_, 1, parameters_[0]));
return true;
}
} // namespace paddle } // namespace paddle
...@@ -65,4 +65,35 @@ public: ...@@ -65,4 +65,35 @@ public:
} }
}; };
/**
* This layer applys normalize across the channels of each sample to a
* conv layer's output and scale the output by a group of trainable factors
* which dimensions equal to the channel's number.
* - Input: One and only one input layer are accepted. The input layer must be
* be a data output layer.
* - Output: The normalized data of the input data.
* Reference:
* Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
* Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector
*/
class CrossChannelNormLayer : public NormLayer {
public:
explicit CrossChannelNormLayer(const LayerConfig& config)
: NormLayer(config) {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback);
protected:
size_t channels_;
std::unique_ptr<Weight> scale_;
MatrixPtr scaleDiff_;
MatrixPtr normBuffer_;
MatrixPtr dataBuffer_;
MatrixPtr channelBuffer_;
MatrixPtr spatialBuffer_;
MatrixPtr sampleBuffer_;
};
} // namespace paddle } // namespace paddle
...@@ -1623,17 +1623,22 @@ TEST(Layer, PadLayer) { ...@@ -1623,17 +1623,22 @@ TEST(Layer, PadLayer) {
} }
} }
TEST(Layer, NormalizeLayer) { TEST(Layer, CrossChannelNormLayer) {
TestConfig config; TestConfig config;
config.layerConfig.set_type("normalize"); config.layerConfig.set_type("norm");
config.layerConfig.set_size(100); config.layerConfig.set_size(100);
config.layerConfig.set_num_filters(10); LayerInputConfig* input = config.layerConfig.add_inputs();
NormConfig* norm = input->mutable_norm_conf();
norm->set_norm_type("cross-channel-norm");
norm->set_channels(10);
norm->set_size(100);
norm->set_scale(0);
norm->set_pow(0);
norm->set_blocked(0);
config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10}); config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10});
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
testLayerGrad(config, "normalize", 10, false, useGpu, false, 5); testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false, 5);
} }
} }
......
...@@ -1453,6 +1453,24 @@ void BaseMatrixT<T>::divRowVector(BaseMatrixT& b) { ...@@ -1453,6 +1453,24 @@ void BaseMatrixT<T>::divRowVector(BaseMatrixT& b) {
true_type() /* bAsRowVector */, false_type()); true_type() /* bAsRowVector */, false_type());
} }
template<class T>
void BaseMatrixT<T>::mulColVector(BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0);
int numRows = height_;
int numCols = width_;
applyBinary(binary::DotMul<T>(), b, numRows, numCols, offset,
false_type(), true_type() /* bAsColVector */);
}
template<class T>
void BaseMatrixT<T>::divColVector(BaseMatrixT& b) {
MatrixOffset offset(0, 0, 0, 0);
int numRows = height_;
int numCols = width_;
applyBinary(binary::DotDiv<T>(), b, numRows, numCols, offset,
false_type(), true_type() /* bAsColVector */);
}
template<> template<>
template <class Agg> template <class Agg>
int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) { int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) {
......
...@@ -545,6 +545,9 @@ public: ...@@ -545,6 +545,9 @@ public:
void mulRowVector(BaseMatrixT& b); void mulRowVector(BaseMatrixT& b);
void divRowVector(BaseMatrixT& b); void divRowVector(BaseMatrixT& b);
void mulColVector(BaseMatrixT& b);
void divColVector(BaseMatrixT& b);
void addP2P(BaseMatrixT& b); void addP2P(BaseMatrixT& b);
/** /**
......
...@@ -110,6 +110,8 @@ TEST(BaseMatrix, BaseMatrix) { ...@@ -110,6 +110,8 @@ TEST(BaseMatrix, BaseMatrix) {
compare(&BaseMatrix::addRowVector); compare(&BaseMatrix::addRowVector);
compare(&BaseMatrix::mulRowVector); compare(&BaseMatrix::mulRowVector);
compare(&BaseMatrix::divRowVector); compare(&BaseMatrix::divRowVector);
compare(&BaseMatrix::mulColVector);
compare(&BaseMatrix::divColVector);
compare(&BaseMatrix::addP2P); compare(&BaseMatrix::addP2P);
compare(&BaseMatrix::invSqrt); compare(&BaseMatrix::invSqrt);
} }
......
...@@ -1156,9 +1156,11 @@ def parse_image(image, input_layer_name, image_conf): ...@@ -1156,9 +1156,11 @@ def parse_image(image, input_layer_name, image_conf):
def parse_norm(norm, input_layer_name, norm_conf): def parse_norm(norm, input_layer_name, norm_conf):
norm_conf.norm_type = norm.norm_type norm_conf.norm_type = norm.norm_type
config_assert(norm.norm_type in ['rnorm', 'cmrnorm-projection'], config_assert(
"norm-type %s is not in [rnorm, 'cmrnorm-projection']" % norm.norm_type in
norm.norm_type) ['rnorm', 'cmrnorm-projection', 'cross-channel-norm'],
"norm-type %s is not in [rnorm, cmrnorm-projection, cross-channel-norm]"
% norm.norm_type)
norm_conf.channels = norm.channels norm_conf.channels = norm.channels
norm_conf.size = norm.size norm_conf.size = norm.size
norm_conf.scale = norm.scale norm_conf.scale = norm.scale
...@@ -1619,16 +1621,6 @@ class PriorBoxLayer(LayerBase): ...@@ -1619,16 +1621,6 @@ class PriorBoxLayer(LayerBase):
self.config.size = size self.config.size = size
@config_layer('normalize')
class NormalizeLayer(LayerBase):
def __init__(self, name, inputs, size, num_filters, **xargs):
super(NormalizeLayer, self).__init__(name, 'normalize', 0, inputs,
**xargs)
self.config.size = size
self.config.num_filters = num_filters
self.create_input_parameter(0, num_filters, [num_filters, 1])
@config_layer('data') @config_layer('data')
class DataLayer(LayerBase): class DataLayer(LayerBase):
def __init__(self, name, size, height=None, width=None, device=None): def __init__(self, name, size, height=None, width=None, device=None):
...@@ -1831,6 +1823,9 @@ class NormLayer(LayerBase): ...@@ -1831,6 +1823,9 @@ class NormLayer(LayerBase):
norm_conf) norm_conf)
self.set_cnn_layer(name, norm_conf.output_y, norm_conf.output_x, self.set_cnn_layer(name, norm_conf.output_y, norm_conf.output_x,
norm_conf.channels, False) norm_conf.channels, False)
if norm_conf.norm_type == "cross-channel-norm":
self.create_input_parameter(0, norm_conf.channels,
[norm_conf.channels, 1])
@config_layer('pool') @config_layer('pool')
......
...@@ -111,7 +111,7 @@ __all__ = [ ...@@ -111,7 +111,7 @@ __all__ = [
'out_prod_layer', 'out_prod_layer',
'print_layer', 'print_layer',
'priorbox_layer', 'priorbox_layer',
'normalize_layer', 'cross_channel_norm_layer',
'spp_layer', 'spp_layer',
'pad_layer', 'pad_layer',
'eos_layer', 'eos_layer',
...@@ -185,7 +185,6 @@ class LayerType(object): ...@@ -185,7 +185,6 @@ class LayerType(object):
PRINT_LAYER = "print" PRINT_LAYER = "print"
PRIORBOX_LAYER = "priorbox" PRIORBOX_LAYER = "priorbox"
NORMALIZE_LAYER = "normalize"
CTC_LAYER = "ctc" CTC_LAYER = "ctc"
WARP_CTC_LAYER = "warp_ctc" WARP_CTC_LAYER = "warp_ctc"
...@@ -1000,8 +999,8 @@ def priorbox_layer(input, ...@@ -1000,8 +999,8 @@ def priorbox_layer(input,
size=size) size=size)
@wrap_name_default("normalize") @wrap_name_default("cross_channel_norm")
def normalize_layer(input, name=None, param_attr=None): def cross_channel_norm_layer(input, name=None, param_attr=None):
""" """
Normalize a layer's output. This layer is necessary for ssd. Normalize a layer's output. This layer is necessary for ssd.
This layer applys normalize across the channels of each sample to This layer applys normalize across the channels of each sample to
...@@ -1017,13 +1016,22 @@ def normalize_layer(input, name=None, param_attr=None): ...@@ -1017,13 +1016,22 @@ def normalize_layer(input, name=None, param_attr=None):
""" """
Layer( Layer(
name=name, name=name,
type=LayerType.NORMALIZE_LAYER, type=LayerType.NORM_LAYER,
inputs=[Input(input.name, **param_attr.attr)], inputs=[
Input(
input.name,
norm=Norm(
norm_type="cross-channel-norm",
channels=input.num_filters,
size=input.size, size=input.size,
num_filters=input.num_filters) scale=0,
pow=0,
blocked=0),
**param_attr.attr)
])
return LayerOutput( return LayerOutput(
name, name,
LayerType.NORMALIZE_LAYER, LayerType.NORM_LAYER,
parents=input, parents=input,
num_filters=input.num_filters, num_filters=input.num_filters,
size=input.size) size=input.size)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册