diff --git a/paddle/gserver/layers/NormalizeLayer.cpp b/paddle/gserver/layers/CrossChannelNormLayer.cpp similarity index 64% rename from paddle/gserver/layers/NormalizeLayer.cpp rename to paddle/gserver/layers/CrossChannelNormLayer.cpp index 22df8adb4eecf912eeba87ff89d27c0fbc61ae14..ced719999154bc3957d9e171220e7b3f703a8336 100644 --- a/paddle/gserver/layers/NormalizeLayer.cpp +++ b/paddle/gserver/layers/CrossChannelNormLayer.cpp @@ -13,53 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" +#include "NormLayer.h" #include "paddle/math/BaseMatrix.h" #include "paddle/math/Matrix.h" namespace paddle { -/** - * This layer applys normalize across the channels of each sample to a - * conv layer's output and scale the output by a group of trainable factors - * which dimensions equal to the channel's number. - * - Input: One and only one input layer are accepted. The input layer must be - * be a data output layer. - * - Output: The normalized data of the input data. - * Reference: - * Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, - * Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector - */ -class NormalizeLayer : public Layer { -public: - explicit NormalizeLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback); - -protected: - size_t channels_; - std::unique_ptr scale_; - MatrixPtr scaleDiff_; - MatrixPtr normBuffer_; - MatrixPtr dataBuffer_; - MatrixPtr channelBuffer_; - MatrixPtr spatialBuffer_; - MatrixPtr sampleBuffer_; -}; - -REGISTER_LAYER(normalize, NormalizeLayer); - -bool NormalizeLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - Layer::init(layerMap, parameterMap); - CHECK(parameters_[0]); - channels_ = config_.num_filters(); - scale_.reset(new Weight(channels_, 1, parameters_[0])); - return true; -} - -void NormalizeLayer::forward(PassType passType) { +void CrossChannelNormLayer::forward(PassType passType) { Layer::forward(passType); auto in = getInput(0); MatrixPtr inV = getInputValue(0); @@ -74,16 +34,12 @@ void NormalizeLayer::forward(PassType passType) { Matrix::resizeOrCreate(dataBuffer_, batchSize, dataDim, false, useGpu_); Matrix::resizeOrCreate(spatialBuffer_, 1, spatialDim, false, useGpu_); - Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_); - Matrix::resizeOrCreate(sampleBuffer_, channels_, spatialDim, false, useGpu_); Matrix::resizeOrCreate(normBuffer_, batchSize, spatialDim, false, useGpu_); normBuffer_->zeroMem(); spatialBuffer_->zeroMem(); - sampleBuffer_->zeroMem(); dataBuffer_->zeroMem(); // add eps to avoid overflow normBuffer_->addScalar(*normBuffer_, 1e-6); - channelBuffer_->resetOne(); inV->square2(*dataBuffer_); for (size_t i = 0; i < batchSize; i++) { spatialBuffer_->zeroMem(); @@ -102,18 +58,14 @@ void NormalizeLayer::forward(PassType passType) { spatialBuffer_->sumCols(*dataTmp, 1, 1); spatialBuffer_->sqrt2(*spatialBuffer_); normTmp->copyFrom(*spatialBuffer_); - sampleBuffer_->mul(*channelBuffer_, *spatialBuffer_, 1., 0.); - sampleBuffer_->dotDiv(*inTmp, *sampleBuffer_); - outTmp->copyFrom(*sampleBuffer_); - + outTmp->copyFrom(*inTmp); + outTmp->divRowVector(*spatialBuffer_); // scale the layer. - spatialBuffer_->resetOne(); - sampleBuffer_->mul(*scale_->getW(), *spatialBuffer_, 1., 0.); - outTmp->dotMul(*outTmp, *sampleBuffer_); + outTmp->mulColVector(*scale_->getW()); } } -void NormalizeLayer::backward(const UpdateCallback& callback) { +void CrossChannelNormLayer::backward(const UpdateCallback& callback) { MatrixPtr inG = getInputGrad(0); MatrixPtr inV = getInputValue(0); MatrixPtr outG = getOutputGrad(); @@ -124,9 +76,10 @@ void NormalizeLayer::backward(const UpdateCallback& callback) { size_t dataDim = inG->getWidth(); size_t spatialDim = dataDim / channels_; - bool syncFlag = hl_get_sync_flag(); dataBuffer_->dotMul(*outG, *outV); Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_); + Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_); + Matrix::resizeOrCreate(sampleBuffer_, channels_, spatialDim, false, useGpu_); scaleDiff_->zeroMem(); for (size_t i = 0; i < batchSize; i++) { spatialBuffer_->zeroMem(); @@ -154,28 +107,20 @@ void NormalizeLayer::backward(const UpdateCallback& callback) { sampleBuffer_->dotMul(*inValueTmp, *outGradTmp); spatialBuffer_->sumCols(*sampleBuffer_, 1., 1.); // scale the grad - channelBuffer_->resetOne(); - sampleBuffer_->mul(*channelBuffer_, *spatialBuffer_, 1., 0.); - - inGradTmp->dotMul(*inValueTmp, *sampleBuffer_); + inGradTmp->copyFrom(*inValueTmp); + inGradTmp->mulRowVector(*spatialBuffer_); // divide by square of norm spatialBuffer_->dotMul(*normTmp, *normTmp); - sampleBuffer_->mul(*channelBuffer_, *spatialBuffer_, 1., 0.); - inGradTmp->dotDiv(*inGradTmp, *sampleBuffer_); + inGradTmp->divRowVector(*spatialBuffer_); // subtract inGradTmp->add(*outGradTmp, -1, 1); // divide by norm - sampleBuffer_->mul(*channelBuffer_, *normTmp, 1., 0.); - inGradTmp->dotDiv(*inGradTmp, *sampleBuffer_); + inGradTmp->divRowVector(*normTmp); // scale the diff - spatialBuffer_->resetOne(); - sampleBuffer_->mul(*scale_->getW(), *spatialBuffer_, 1., 0.); - inGradTmp->dotMul(*inGradTmp, *sampleBuffer_); + inGradTmp->mulColVector(*scale_->getW()); } // updata scale if (scale_->getWGrad()) scale_->getWGrad()->copyFrom(*scaleDiff_); - hl_set_sync_flag(false); - hl_set_sync_flag(syncFlag); scale_->getParameterPtr()->incUpdate(callback); } diff --git a/paddle/gserver/layers/NormLayer.cpp b/paddle/gserver/layers/NormLayer.cpp index 3db0af2515ee9f64aa6c0b0a441e88562d9e398e..e094078bfe86e30c06e1b80ebc04c8213fe9abcf 100644 --- a/paddle/gserver/layers/NormLayer.cpp +++ b/paddle/gserver/layers/NormLayer.cpp @@ -26,6 +26,8 @@ Layer* NormLayer::create(const LayerConfig& config) { return new ResponseNormLayer(config); } else if (norm == "cmrnorm-projection") { return new CMRProjectionNormLayer(config); + } else if (norm == "cross-channel-norm") { + return new CrossChannelNormLayer(config); } else { LOG(FATAL) << "Unknown norm type: " << norm; return nullptr; @@ -54,4 +56,14 @@ bool ResponseNormLayer::init(const LayerMap& layerMap, return true; } +bool CrossChannelNormLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + CHECK(parameters_[0]); + const NormConfig& conf = config_.inputs(0).norm_conf(); + channels_ = conf.channels(); + scale_.reset(new Weight(channels_, 1, parameters_[0])); + return true; +} + } // namespace paddle diff --git a/paddle/gserver/layers/NormLayer.h b/paddle/gserver/layers/NormLayer.h index e77faaa322570933b3ea2de877b7859857306432..59ba226dfe5f5f96ce0d5a97c05851c60cace287 100644 --- a/paddle/gserver/layers/NormLayer.h +++ b/paddle/gserver/layers/NormLayer.h @@ -65,4 +65,35 @@ public: } }; +/** + * This layer applys normalize across the channels of each sample to a + * conv layer's output and scale the output by a group of trainable factors + * which dimensions equal to the channel's number. + * - Input: One and only one input layer are accepted. The input layer must be + * be a data output layer. + * - Output: The normalized data of the input data. + * Reference: + * Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, + * Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector + */ +class CrossChannelNormLayer : public NormLayer { +public: + explicit CrossChannelNormLayer(const LayerConfig& config) + : NormLayer(config) {} + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + void forward(PassType passType); + void backward(const UpdateCallback& callback); + +protected: + size_t channels_; + std::unique_ptr scale_; + MatrixPtr scaleDiff_; + MatrixPtr normBuffer_; + MatrixPtr dataBuffer_; + MatrixPtr channelBuffer_; + MatrixPtr spatialBuffer_; + MatrixPtr sampleBuffer_; +}; + } // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index a7d3eaeaf98cb017a4ca9e81e1f58bfd17335eb0..7afaf87189256f27b72f41f10c3f6efe742eb9e4 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1623,17 +1623,22 @@ TEST(Layer, PadLayer) { } } -TEST(Layer, NormalizeLayer) { +TEST(Layer, CrossChannelNormLayer) { TestConfig config; - config.layerConfig.set_type("normalize"); + config.layerConfig.set_type("norm"); config.layerConfig.set_size(100); - config.layerConfig.set_num_filters(10); - + LayerInputConfig* input = config.layerConfig.add_inputs(); + NormConfig* norm = input->mutable_norm_conf(); + norm->set_norm_type("cross-channel-norm"); + norm->set_channels(10); + norm->set_size(100); + norm->set_scale(0); + norm->set_pow(0); + norm->set_blocked(0); config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10}); - config.layerConfig.add_inputs(); for (auto useGpu : {false, true}) { - testLayerGrad(config, "normalize", 10, false, useGpu, false, 5); + testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false, 5); } } diff --git a/paddle/math/BaseMatrix.cu b/paddle/math/BaseMatrix.cu index 0a0d92d1ae65f5b6020eb71fe2a6db5a3c625d9c..de48b6fac9c7d8125a552022c52353ef6bcef995 100644 --- a/paddle/math/BaseMatrix.cu +++ b/paddle/math/BaseMatrix.cu @@ -1453,6 +1453,24 @@ void BaseMatrixT::divRowVector(BaseMatrixT& b) { true_type() /* bAsRowVector */, false_type()); } +template +void BaseMatrixT::mulColVector(BaseMatrixT& b) { + MatrixOffset offset(0, 0, 0, 0); + int numRows = height_; + int numCols = width_; + applyBinary(binary::DotMul(), b, numRows, numCols, offset, + false_type(), true_type() /* bAsColVector */); +} + +template +void BaseMatrixT::divColVector(BaseMatrixT& b) { + MatrixOffset offset(0, 0, 0, 0); + int numRows = height_; + int numCols = width_; + applyBinary(binary::DotDiv(), b, numRows, numCols, offset, + false_type(), true_type() /* bAsColVector */); +} + template<> template int BaseMatrixT::applyRow(Agg agg, BaseMatrixT& b) { diff --git a/paddle/math/BaseMatrix.h b/paddle/math/BaseMatrix.h index 8691c87ac3b88499a9676d59af533e0f4713dfc3..6ed48c8d88ee698689de6f7a7f470b97a094ea5b 100644 --- a/paddle/math/BaseMatrix.h +++ b/paddle/math/BaseMatrix.h @@ -545,6 +545,9 @@ public: void mulRowVector(BaseMatrixT& b); void divRowVector(BaseMatrixT& b); + void mulColVector(BaseMatrixT& b); + void divColVector(BaseMatrixT& b); + void addP2P(BaseMatrixT& b); /** diff --git a/paddle/math/tests/test_BaseMatrix.cpp b/paddle/math/tests/test_BaseMatrix.cpp index 21918b86e1ad98766ceaf09dea3020d6e8592191..22ce39701fca7b650fc03794cb0701e0987d2dae 100644 --- a/paddle/math/tests/test_BaseMatrix.cpp +++ b/paddle/math/tests/test_BaseMatrix.cpp @@ -110,6 +110,8 @@ TEST(BaseMatrix, BaseMatrix) { compare(&BaseMatrix::addRowVector); compare(&BaseMatrix::mulRowVector); compare(&BaseMatrix::divRowVector); + compare(&BaseMatrix::mulColVector); + compare(&BaseMatrix::divColVector); compare(&BaseMatrix::addP2P); compare(&BaseMatrix::invSqrt); } diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index c52039219575936414fb17a67f84bd1422035b98..3e6a73dcf868c92426e211f6cae415e2afcf1b8e 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1156,9 +1156,11 @@ def parse_image(image, input_layer_name, image_conf): def parse_norm(norm, input_layer_name, norm_conf): norm_conf.norm_type = norm.norm_type - config_assert(norm.norm_type in ['rnorm', 'cmrnorm-projection'], - "norm-type %s is not in [rnorm, 'cmrnorm-projection']" % - norm.norm_type) + config_assert( + norm.norm_type in + ['rnorm', 'cmrnorm-projection', 'cross-channel-norm'], + "norm-type %s is not in [rnorm, cmrnorm-projection, cross-channel-norm]" + % norm.norm_type) norm_conf.channels = norm.channels norm_conf.size = norm.size norm_conf.scale = norm.scale @@ -1619,16 +1621,6 @@ class PriorBoxLayer(LayerBase): self.config.size = size -@config_layer('normalize') -class NormalizeLayer(LayerBase): - def __init__(self, name, inputs, size, num_filters, **xargs): - super(NormalizeLayer, self).__init__(name, 'normalize', 0, inputs, - **xargs) - self.config.size = size - self.config.num_filters = num_filters - self.create_input_parameter(0, num_filters, [num_filters, 1]) - - @config_layer('data') class DataLayer(LayerBase): def __init__(self, name, size, height=None, width=None, device=None): @@ -1831,6 +1823,9 @@ class NormLayer(LayerBase): norm_conf) self.set_cnn_layer(name, norm_conf.output_y, norm_conf.output_x, norm_conf.channels, False) + if norm_conf.norm_type == "cross-channel-norm": + self.create_input_parameter(0, norm_conf.channels, + [norm_conf.channels, 1]) @config_layer('pool') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 1541b532d950a22a5d2e9928626d1b7a047c1fe1..b6a94264765b7f41d1d033ead5bc891569a93974 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -111,7 +111,7 @@ __all__ = [ 'out_prod_layer', 'print_layer', 'priorbox_layer', - 'normalize_layer', + 'cross_channel_norm_layer', 'spp_layer', 'pad_layer', 'eos_layer', @@ -185,7 +185,6 @@ class LayerType(object): PRINT_LAYER = "print" PRIORBOX_LAYER = "priorbox" - NORMALIZE_LAYER = "normalize" CTC_LAYER = "ctc" WARP_CTC_LAYER = "warp_ctc" @@ -1000,8 +999,8 @@ def priorbox_layer(input, size=size) -@wrap_name_default("normalize") -def normalize_layer(input, name=None, param_attr=None): +@wrap_name_default("cross_channel_norm") +def cross_channel_norm_layer(input, name=None, param_attr=None): """ Normalize a layer's output. This layer is necessary for ssd. This layer applys normalize across the channels of each sample to @@ -1017,13 +1016,22 @@ def normalize_layer(input, name=None, param_attr=None): """ Layer( name=name, - type=LayerType.NORMALIZE_LAYER, - inputs=[Input(input.name, **param_attr.attr)], - size=input.size, - num_filters=input.num_filters) + type=LayerType.NORM_LAYER, + inputs=[ + Input( + input.name, + norm=Norm( + norm_type="cross-channel-norm", + channels=input.num_filters, + size=input.size, + scale=0, + pow=0, + blocked=0), + **param_attr.attr) + ]) return LayerOutput( name, - LayerType.NORMALIZE_LAYER, + LayerType.NORM_LAYER, parents=input, num_filters=input.num_filters, size=input.size)