add RowL2NormLayer

84660653 · guosheng · 2200ff5e · 84660653 · 84660653 · 84660653
4 changed file
--- a/paddle/gserver/layers/RowL2NormLayer.cpp
+++ b/paddle/gserver/layers/RowL2NormLayer.cpp
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "Layer.h"
+#include "paddle/math/Matrix.h"
+
+namespace paddle {
+
+/**
+ * A layer for L2 normalization in each row,
+ * \f[
+ *   out[i] = \frac{in[i]}{\sqrt{\sum_{k=1}^N in[k]^{2}}}
+ * \f]
+ * where the size of \f$in\f$ is (batchSize x dataDim),
+ * and the size of \f$out\f$ is (batchSize x dataDim).
+ */
+
+class RowL2NormLayer : public Layer {
+protected:
+  MatrixPtr inSquare_;
+  MatrixPtr reciSqrtRowSquareSum_;
+  MatrixPtr dotSum_;
+
+public:
+  explicit RowL2NormLayer(const LayerConfig& config) : Layer(config) {}
+
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+
+  void forward(PassType passType) override;
+  void backward(const UpdateCallback& callback = nullptr) override;
+};
+
+REGISTER_LAYER(row_l2_norm, RowL2NormLayer);
+
+bool RowL2NormLayer::init(const LayerMap& layerMap,
+                          const ParameterMap& parameterMap) {
+  Layer::init(layerMap, parameterMap);
+
+  CHECK_EQ(inputLayers_.size(), 1U);
+
+  return true;
+}
+
+void RowL2NormLayer::forward(PassType passType) {
+  Layer::forward(passType);
+
+  MatrixPtr inV = getInputValue(0);
+
+  /* malloc memory for the output_ if necessary */
+  size_t batchSize = inV->getHeight();
+  size_t dataDim = getSize();
+  CHECK_EQ(dataDim, inV->getWidth());
+  resetOutput(batchSize, dataDim);
+  MatrixPtr outV = getOutputValue();
+
+  Matrix::resizeOrCreate(inSquare_, batchSize, dataDim, false, useGpu_);
+  inV->square2(*inSquare_);
+  Matrix::resizeOrCreate(reciSqrtRowSquareSum_, batchSize, 1, false, useGpu_);
+  inSquare_->rowSum(*reciSqrtRowSquareSum_);
+  reciSqrtRowSquareSum_->sqrt2(*reciSqrtRowSquareSum_);
+  reciSqrtRowSquareSum_->scalarDiv(*reciSqrtRowSquareSum_, 1.0);
+  outV->rowScale(0, *inV, *reciSqrtRowSquareSum_);
+}
+
+void RowL2NormLayer::backward(const UpdateCallback& callback) {
+  MatrixPtr inV = getInputValue(0);
+  MatrixPtr inG = getInputGrad(0);
+  MatrixPtr outV = getOutputValue();
+  MatrixPtr outG = getOutputGrad();
+  size_t batchSize = inV->getHeight();
+
+  // inG[ij] += outG[ij] / reciSqrtRowSquareSum
+  // inG[ij] += -inV[ij] * reciSqrtRowSquareSum * reciSqrtRowSquareSum *
+  // DotMul(outG[i], inV[i])
+  if (inG) {
+    Matrix::resizeOrCreate(dotSum_, batchSize, 1, false, useGpu_);
+    dotSum_->zeroMem();
+    dotSum_->rowDotMul(0, *outG, *outV);
+    dotSum_->dotMul(*dotSum_, *reciSqrtRowSquareSum_);
+    dotSum_->dotMul(*dotSum_, *reciSqrtRowSquareSum_);
+    inSquare_->rowScale(0, *inV, *dotSum_);
+    inG->sub(*inSquare_);
+    inG->addRowScale(0, *outG, *reciSqrtRowSquareSum_);
+  }
+}
+
+}  // namespace paddle
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -1879,6 +1879,19 @@ TEST(Layer, CropLayer) {
  }
 }

+TEST(Layer, RowL2NormLayer) {
+  const size_t batchSize = 128;
+  const size_t size = 512;
+  TestConfig config;
+  config.layerConfig.set_type("row_l2_norm");
+  config.layerConfig.set_size(size);
+  config.inputDefs.push_back({INPUT_DATA, "input", size, 0});
+  config.layerConfig.add_inputs();
+  for (auto useGpu : {false, true}) {
+    testLayerGrad(config, "row_l2_norm", batchSize, false, useGpu, false);
+  }
+}
+
 int main(int argc, char** argv) {
  testing::InitGoogleTest(&argc, argv);
  initMain(argc, argv);

--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2725,6 +2725,16 @@ class SumToOneNormLayer(LayerBase):
        self.set_layer_size(input_layer0.size)


+@config_layer('row_l2_norm')
+class RowL2NormLayer(LayerBase):
+    def __init__(self, name, inputs, device=None):
+        super(RowL2NormLayer, self).__init__(
+            name, 'row_l2_norm', 0, inputs=inputs, device=device)
+        config_assert(len(self.inputs) == 1, 'RowL2NormLayer must have 1 input')
+        input_layer0 = self.get_input_layer(0)
+        self.set_layer_size(input_layer0.size)
+
+
 @config_layer('cos_vm')
 class CosSimVecMatLayer(LayerBase):
    def __init__(self, name, size, inputs, cos_scale=1.0, device=None):

--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -76,6 +76,7 @@ __all__ = [
    'trans_layer',
    'rotate_layer',
    'sum_to_one_norm_layer',
+    'row_l2_norm_layer',
    'get_output_layer',
    'LayerType',
    'context_projection',
@@ -159,6 +160,7 @@ class LayerType(object):
    BATCH_NORM_LAYER = 'batch_norm'
    NORM_LAYER = 'norm'
    SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm'
+    ROW_L2_NORM_LAYER = 'row_l2_norm'
    ADDTO_LAYER = 'addto'

    CONCAT_LAYER = 'concat'
@@ -2849,6 +2851,42 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None):
        name, LayerType.SUM_TO_ONE_NORM_LAYER, parents=[input], size=input.size)


+@wrap_name_default()
+@layer_support()
+def row_l2_norm_layer(input, name=None, layer_attr=None):
+    """
+    A layer for L2-normalization in each row.
+
+    .. math::
+       out[i] = \frac{in[i]}{\sqrt{\sum_{k=1}^N in[k]^{2}}}
+
+    where the size of :math:`in` is (batchSize x dataDim) ,
+    and the size of :math:`out` is a (batchSize x dataDim) .
+
+    The example usage is:
+
+    .. code-block:: python
+
+       row_l2_norm_layer = row_l2_norm_layer(input=layer)
+
+    :param input: Input layer.
+    :type input: LayerOutput
+    :param name: Layer name.
+    :type name: basestring
+    :param layer_attr: extra layer attributes.
+    :type layer_attr: ExtraLayerAttribute.
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+    Layer(
+        name=name,
+        type=LayerType.ROW_L2_NORM_LAYER,
+        inputs=[input.name],
+        **ExtraAttr.to_kwargs(layer_attr))
+    return LayerOutput(
+        name, LayerType.ROW_L2_NORM_LAYER, parents=[input], size=input.size)
+
+
 @wrap_name_default("addto")
 @wrap_act_default(act=LinearActivation())
 @wrap_bias_attr_default(has_bias=False)