Add elementwise math operations (#343)

* Add elementwise math operations This allows use to use expressions like: y=log(1+exp(x)) Also added unittests for ActivationFunction * Enforce keyword arguments for non-positional arguments * Add LogActivation to doc

Add elementwise math operations (#343)
* Add elementwise math operations This allows use to use expressions like: y=log(1+exp(x)) Also added unittests for ActivationFunction * Enforce keyword arguments for non-positional arguments * Add LogActivation to doc
6c3a678c · emailweixu · luotao1 · 568d9cff · 6c3a678c · 6c3a678c
13 changed file
--- a/doc/ui/api/trainer_config_helpers/activations.rst
+++ b/doc/ui/api/trainer_config_helpers/activations.rst
@@ -32,6 +32,13 @@ LinearActivation
 ..  automodule:: paddle.trainer_config_helpers.activations
    :members: LinearActivation
    :noindex:
+
+LogActivation
+==================
+
+..  automodule:: paddle.trainer_config_helpers.activations
+    :members: LogActivation
+    :noindex:
    
 SquareActivation
 ================

--- a/paddle/gserver/activations/ActivationFunction.cpp
+++ b/paddle/gserver/activations/ActivationFunction.cpp
@@ -295,6 +295,7 @@ void forward(Argument& act) {

 void backward(Argument& act) { act.grad->squareDerivative(*act.in); }
 END_DEFINE_ACTIVATION(square)
+
 /**
 * @brief Exponential Activation.
 * \f[
@@ -307,8 +308,36 @@ void forward(Argument& act) { act.value->exp(*act.value); }
 void backward(Argument& act) { act.grad->expDerivative(*act.value); }
 END_DEFINE_ACTIVATION(exponential)

+/**
+ * @brief Logarithm Activation.
+ * \f[
+ * f(z) = log(z)
+ * \f]
+ */
+BEGIN_DEFINE_ACTIVATION(log)
+void forward(Argument& act) {
+  SetDevice device(act.deviceId);
+  Matrix::resizeOrCreate(act.in, act.value->getHeight(), act.value->getWidth(),
+                         /* trans */ false, useGpu(act.deviceId));
+
+  act.in->copyFrom(*act.value);
+  act.value->log(*act.value);
+}
+
+void backward(Argument& act) { act.grad->dotDiv(*act.grad, *act.in); }
+END_DEFINE_ACTIVATION(log)
+
 ActivationFunction* ActivationFunction::create(const std::string& type) {
  return gActivationRegistrar.createByType(type);
 }

+std::vector<std::string> ActivationFunction::getAllRegisteredTypes() {
+  std::vector<std::string> types;
+  gActivationRegistrar.forEachType([&](const std::string& type) {
+      types.push_back(type);
+    });
+  return types;
+}
+
+
 }  // namespace paddle
--- a/paddle/gserver/activations/ActivationFunction.h
+++ b/paddle/gserver/activations/ActivationFunction.h
@@ -15,6 +15,7 @@ limitations under the License. */

 #pragma once
 #include <string>
+#include <vector>

 namespace paddle {

@@ -32,6 +33,7 @@ struct Argument;
 class ActivationFunction {
 public:
  static ActivationFunction* create(const std::string& type);
+  static std::vector<std::string> getAllRegisteredTypes();

  ActivationFunction() {}


--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
@@ -20,6 +20,13 @@ add_unittest_without_exec(test_LayerGrad
 add_test(NAME test_LayerGrad
    COMMAND test_LayerGrad)

+add_unittest_without_exec(test_ActivationGrad
+    test_ActivationGrad.cpp
+    LayerGradUtil.cpp
+    TestUtil.cpp)
+add_test(NAME test_ActivationGrad
+    COMMAND test_ActivationGrad)
+
 ################## test_Evaluator #######################
 add_unittest(test_Evaluator
    test_Evaluator.cpp

--- a/paddle/gserver/tests/test_ActivationGrad.cpp
+++ b/paddle/gserver/tests/test_ActivationGrad.cpp
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include <vector>
+#include <string>
+#include "paddle/gserver/layers/DataLayer.h"
+#include "ModelConfig.pb.h"
+#include "paddle/trainer/Trainer.h"
+
+#include "TestUtil.h"
+#include "LayerGradUtil.h"
+
+using namespace paddle;  // NOLINT
+using namespace std;     // NOLINT
+
+P_DECLARE_bool(use_gpu);
+P_DECLARE_bool(thread_local_rand_use_global_seed);
+
+void testActivation(const string& act) {
+  LOG(INFO) << "test activation: " << act;
+  size_t size = 10;
+  TestConfig config;
+  config.biasSize = 0;
+  config.layerConfig.set_type("addto");
+  config.layerConfig.set_size(size);
+  config.layerConfig.set_active_type(act);
+  config.inputDefs.push_back({INPUT_DATA, "layer_0", size, 0});
+  config.layerConfig.add_inputs();
+  for (auto useGpu : {false, true}) {
+    testLayerGrad(config,
+                  act + "_activation",
+                  100,
+                  /* trans= */false,
+                  useGpu,
+                  /* useWeight */true);
+  }
+}
+
+TEST(Activation, activation) {
+  auto types = ActivationFunction::getAllRegisteredTypes();
+  std::set<string> excluded{"sequence_softmax"};
+  for (auto type : types) {
+    if (excluded.count(type)) continue;
+    testActivation(type);
+  }
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  initMain(argc, argv);
+  FLAGS_thread_local_rand_use_global_seed = true;
+  srand(1);
+  return RUN_ALL_TESTS();
+}
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2573,8 +2573,9 @@ class MixedLayer(LayerBase):
            for input in self.inputs:
                psize += input.calc_bias_size()

-        self.config.bias_size = psize
-        self.create_bias_parameter(bias, psize)
+        if bias:
+            self.config.bias_size = psize
+            self.create_bias_parameter(bias, psize)

        if error_clipping_threshold is not None:
            self.config.error_clipping_threshold = error_clipping_threshold
@@ -2659,8 +2660,9 @@ class ConcatenateLayer2(LayerBase):
            for input in self.inputs:
                psize += input.calc_bias_size()

-        self.config.bias_size = psize
-        self.create_bias_parameter(bias, psize)
+        if bias:
+            self.config.bias_size = psize
+            self.create_bias_parameter(bias, psize)

 @config_layer('recurrent')
 class RecurrentLayer(LayerBase):

--- a/python/paddle/trainer_config_helpers/activations.py
+++ b/python/paddle/trainer_config_helpers/activations.py
@@ -199,3 +199,12 @@ class ExpActivation(BaseActivation):
       f(z) = e^z.
    """
    def __init__(self): BaseActivation.__init__(self, 'exponential', False)
+
+class LogActivation(BaseActivation):
+    """
+    Logarithm Activation.
+
+    .. math::
+       f(z) = log(z)
+    """
+    def __init__(self): BaseActivation.__init__(self, 'log', False)
--- a/python/paddle/trainer_config_helpers/default_decorators.py
+++ b/python/paddle/trainer_config_helpers/default_decorators.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 import functools
+import inspect
 from .attrs import ParamAttr
 from .activations import TanhActivation
 from paddle.trainer.config_parser import *
@@ -37,8 +38,12 @@ def wrap_param_default(param_names=None, default_factory=None,
        @functools.wraps(func)
        def __wrapper__(*args, **kwargs):
            if len(args) != 0:
-                logger.warning("please use keyword arguments in paddle config.")
-
+                argspec = inspect.getargspec(func)
+                num_positional = len(argspec.args)
+                if argspec.defaults:
+                    num_positional -= len(argspec.defaults)
+                if not argspec.varargs and len(args) > num_positional:
+                    logger.fatal("Must use keyword arguments for non-positional args")
            for name in param_names:
                if not_set_callback(kwargs, name):  # Not set
                    kwargs[name] = default_factory(func)

--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -564,7 +564,7 @@ class MixedLayerType(LayerOutput):
        self.inputs = []
        self.finalized = False

-    def __add__(self, other):
+    def __iadd__(self, other):
        """
        + += operator
        :param other: Other projection.

--- a/python/paddle/trainer_config_helpers/math.py
+++ b/python/paddle/trainer_config_helpers/math.py
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .layers import LayerOutput, mixed_layer, identity_projection, \
+    slope_intercept_layer
+from .attrs import is_compatible_with
+from .default_decorators import *
+import activations as act
+
+__all__ = []
+
+def register_unary_math_op(op_name, act):
+    def op(input, name=None):
+        return mixed_layer(input=[identity_projection(input=input)],
+                           name=name,
+                           act=act)
+    op = wrap_name_default(op_name)(op)
+    op.__doc__ = type(act).__doc__
+    globals()[op_name] = op
+    __all__.append(op_name)
+
+register_unary_math_op('exp', act.ExpActivation())
+register_unary_math_op('log', act.LogActivation())
+register_unary_math_op('abs', act.AbsActivation())
+register_unary_math_op('sigmoid', act.SigmoidActivation())
+register_unary_math_op('tanh', act.TanhActivation())
+register_unary_math_op('square', act.SquareActivation())
+
+def add(layeroutput, other):
+    if is_compatible_with(other, float):
+        return slope_intercept_layer(input=layeroutput, intercept=other)
+    assert isinstance(other, LayerOutput)
+    return mixed_layer(input=[identity_projection(input=layeroutput),
+                              identity_projection(input=other)])
+
+LayerOutput.__radd__ = add
+LayerOutput.__add__ = add
+
+def sub(layeroutput, other):
+    if is_compatible_with(other, float):
+        return slope_intercept_layer(input=layeroutput, intercept=other)
+    assert isinstance(other, LayerOutput)
+    neg = slope_intercept_layer(input=other, slope=-1.0)
+    return mixed_layer(input=[identity_projection(input=layeroutput),
+                              identity_projection(input=neg)])
+
+LayerOutput.__sub__ = sub
+
+def rsub(layeroutput, other):
+    neg = slope_intercept_layer(input=layeroutput, slope=-1.0)
+    return add(neg, other)
+
+LayerOutput.__rsub__ = rsub
--- a/python/paddle/trainer_config_helpers/tests/configs/check.md5
+++ b/python/paddle/trainer_config_helpers/tests/configs/check.md5
 86c0815275a9d5eb902e23c6a592f58a  img_layers.protostr
 a5d9259ff1fd7ca23d0ef090052cb1f2  last_first_seq.protostr
 9c038249ec8ff719753a746cdb04c026  layer_activations.protostr
-34e04043cbb12931c47fa44ec50eeffc  projections.protostr
+5913f87b39cee3b2701fa158270aca26  projections.protostr
 7334ba0a4544f0623231330fc51d390d  shared_fc.protostr
-bb8e233b05b8e07f9ed386b7aee4f2c6  shared_lstm.protostr
+8b8b6bb128a7dfcc937be86145f53e2f  shared_lstm.protostr
 6b39e34beea8dfb782bee9bd3dea9eb5  simple_rnn_layers.protostr
-f98e79e1630d5eb827c300e64836d269  test_bi_grumemory.protostr
+4e78f0ded79f6fefb58ca0c104b57c79  test_bi_grumemory.protostr
 0fc1409600f1a3301da994ab9d28b0bf  test_cost_layers.protostr
 6cd5f28a3416344f20120698470e0a4c  test_cost_layers_with_weight.protostr
 144bc6d3a509de74115fa623741797ed  test_expand_layer.protostr
@@ -16,7 +16,8 @@ d350bd91a0dc13e854b1364c3d9339c6  test_lstmemory_layer.protostr
 5433ed33d4e7414eaf658f2a55946186  test_maxout.protostr
 251a948ba41c1071afcd3d9cf9c233f7  test_ntm_layers.protostr
 e6ff04e70aea27c7b06d808cc49c9497  test_print_layer.protostr
-fded24727338fb8ce44d9951ed8aea08  test_rnn_group.protostr
+2a75dd33b640c49a8821c2da6e574577  test_rnn_group.protostr
 67d6fde3afb54f389d0ce4ff14726fe1  test_sequence_pooling.protostr
 f586a548ef4350ba1ed47a81859a64cb  unused_layers.protostr
-f937a5a6e7e8864b4d8cf56b0f7c7f44  util_layers.protostr
+8122477f4f65244580cec09edc590041  util_layers.protostr
+dcd76bebb5f9c755f481c26192917818  math_ops.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
@@ -9,7 +9,7 @@ test_sequence_pooling test_lstmemory_layer test_grumemory_layer
 last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
 img_layers util_layers simple_rnn_layers unused_layers test_cost_layers
 test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight
-test_maxout test_bi_grumemory)
+test_maxout test_bi_grumemory math_ops)


 for conf in ${configs[*]}

--- a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
+from paddle.trainer_config_helpers import *
+from paddle.trainer_config_helpers import math
+
+settings(
+    batch_size=1000,
+    learning_rate=1e-5
+)
+
+x = data_layer(name='data', size=100)
+x = math.exp(x)
+x = math.log(x)
+x = math.abs(x)
+x = math.sigmoid(x)
+x = math.square(x)
+x = math.square(x)
+y = 1 + x
+y = y + 1
+y = x + y
+y = y - x
+y = y - 2
+y = 2 - y
+
+outputs(y)
+