From 84660653f847e313da0226f376e95d8dc3523d50 Mon Sep 17 00:00:00 2001 From: guosheng Date: Thu, 27 Jul 2017 17:21:40 +0800 Subject: [PATCH] add RowL2NormLayer --- paddle/gserver/layers/RowL2NormLayer.cpp | 99 +++++++++++++++++++ paddle/gserver/tests/test_LayerGrad.cpp | 13 +++ python/paddle/trainer/config_parser.py | 10 ++ .../paddle/trainer_config_helpers/layers.py | 38 +++++++ 4 files changed, 160 insertions(+) create mode 100644 paddle/gserver/layers/RowL2NormLayer.cpp diff --git a/paddle/gserver/layers/RowL2NormLayer.cpp b/paddle/gserver/layers/RowL2NormLayer.cpp new file mode 100644 index 00000000000..1362c6ef121 --- /dev/null +++ b/paddle/gserver/layers/RowL2NormLayer.cpp @@ -0,0 +1,99 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "paddle/math/Matrix.h" + +namespace paddle { + +/** + * A layer for L2 normalization in each row, + * \f[ + * out[i] = \frac{in[i]}{\sqrt{\sum_{k=1}^N in[k]^{2}}} + * \f] + * where the size of \f$in\f$ is (batchSize x dataDim), + * and the size of \f$out\f$ is (batchSize x dataDim). + */ + +class RowL2NormLayer : public Layer { +protected: + MatrixPtr inSquare_; + MatrixPtr reciSqrtRowSquareSum_; + MatrixPtr dotSum_; + +public: + explicit RowL2NormLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; +}; + +REGISTER_LAYER(row_l2_norm, RowL2NormLayer); + +bool RowL2NormLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + CHECK_EQ(inputLayers_.size(), 1U); + + return true; +} + +void RowL2NormLayer::forward(PassType passType) { + Layer::forward(passType); + + MatrixPtr inV = getInputValue(0); + + /* malloc memory for the output_ if necessary */ + size_t batchSize = inV->getHeight(); + size_t dataDim = getSize(); + CHECK_EQ(dataDim, inV->getWidth()); + resetOutput(batchSize, dataDim); + MatrixPtr outV = getOutputValue(); + + Matrix::resizeOrCreate(inSquare_, batchSize, dataDim, false, useGpu_); + inV->square2(*inSquare_); + Matrix::resizeOrCreate(reciSqrtRowSquareSum_, batchSize, 1, false, useGpu_); + inSquare_->rowSum(*reciSqrtRowSquareSum_); + reciSqrtRowSquareSum_->sqrt2(*reciSqrtRowSquareSum_); + reciSqrtRowSquareSum_->scalarDiv(*reciSqrtRowSquareSum_, 1.0); + outV->rowScale(0, *inV, *reciSqrtRowSquareSum_); +} + +void RowL2NormLayer::backward(const UpdateCallback& callback) { + MatrixPtr inV = getInputValue(0); + MatrixPtr inG = getInputGrad(0); + MatrixPtr outV = getOutputValue(); + MatrixPtr outG = getOutputGrad(); + size_t batchSize = inV->getHeight(); + + // inG[ij] += outG[ij] / reciSqrtRowSquareSum + // inG[ij] += -inV[ij] * reciSqrtRowSquareSum * reciSqrtRowSquareSum * + // DotMul(outG[i], inV[i]) + if (inG) { + Matrix::resizeOrCreate(dotSum_, batchSize, 1, false, useGpu_); + dotSum_->zeroMem(); + dotSum_->rowDotMul(0, *outG, *outV); + dotSum_->dotMul(*dotSum_, *reciSqrtRowSquareSum_); + dotSum_->dotMul(*dotSum_, *reciSqrtRowSquareSum_); + inSquare_->rowScale(0, *inV, *dotSum_); + inG->sub(*inSquare_); + inG->addRowScale(0, *outG, *reciSqrtRowSquareSum_); + } +} + +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 0975c3bc957..0d8789e0a2e 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1879,6 +1879,19 @@ TEST(Layer, CropLayer) { } } +TEST(Layer, RowL2NormLayer) { + const size_t batchSize = 128; + const size_t size = 512; + TestConfig config; + config.layerConfig.set_type("row_l2_norm"); + config.layerConfig.set_size(size); + config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); + config.layerConfig.add_inputs(); + for (auto useGpu : {false, true}) { + testLayerGrad(config, "row_l2_norm", batchSize, false, useGpu, false); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 5477158ecb8..c5e56e59de4 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2725,6 +2725,16 @@ class SumToOneNormLayer(LayerBase): self.set_layer_size(input_layer0.size) +@config_layer('row_l2_norm') +class RowL2NormLayer(LayerBase): + def __init__(self, name, inputs, device=None): + super(RowL2NormLayer, self).__init__( + name, 'row_l2_norm', 0, inputs=inputs, device=device) + config_assert(len(self.inputs) == 1, 'RowL2NormLayer must have 1 input') + input_layer0 = self.get_input_layer(0) + self.set_layer_size(input_layer0.size) + + @config_layer('cos_vm') class CosSimVecMatLayer(LayerBase): def __init__(self, name, size, inputs, cos_scale=1.0, device=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 14f072fc551..9985a290a54 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -76,6 +76,7 @@ __all__ = [ 'trans_layer', 'rotate_layer', 'sum_to_one_norm_layer', + 'row_l2_norm_layer', 'get_output_layer', 'LayerType', 'context_projection', @@ -159,6 +160,7 @@ class LayerType(object): BATCH_NORM_LAYER = 'batch_norm' NORM_LAYER = 'norm' SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm' + ROW_L2_NORM_LAYER = 'row_l2_norm' ADDTO_LAYER = 'addto' CONCAT_LAYER = 'concat' @@ -2849,6 +2851,42 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None): name, LayerType.SUM_TO_ONE_NORM_LAYER, parents=[input], size=input.size) +@wrap_name_default() +@layer_support() +def row_l2_norm_layer(input, name=None, layer_attr=None): + """ + A layer for L2-normalization in each row. + + .. math:: + out[i] = \frac{in[i]}{\sqrt{\sum_{k=1}^N in[k]^{2}}} + + where the size of :math:`in` is (batchSize x dataDim) , + and the size of :math:`out` is a (batchSize x dataDim) . + + The example usage is: + + .. code-block:: python + + row_l2_norm_layer = row_l2_norm_layer(input=layer) + + :param input: Input layer. + :type input: LayerOutput + :param name: Layer name. + :type name: basestring + :param layer_attr: extra layer attributes. + :type layer_attr: ExtraLayerAttribute. + :return: LayerOutput object. + :rtype: LayerOutput + """ + Layer( + name=name, + type=LayerType.ROW_L2_NORM_LAYER, + inputs=[input.name], + **ExtraAttr.to_kwargs(layer_attr)) + return LayerOutput( + name, LayerType.ROW_L2_NORM_LAYER, parents=[input], size=input.size) + + @wrap_name_default("addto") @wrap_act_default(act=LinearActivation()) @wrap_bias_attr_default(has_bias=False) -- GitLab