From 6c3a678c9a537a6b37c189490c646790860757c0 Mon Sep 17 00:00:00 2001 From: emailweixu Date: Fri, 4 Nov 2016 10:37:22 -0700 Subject: [PATCH] Add elementwise math operations (#343) * Add elementwise math operations This allows use to use expressions like: y=log(1+exp(x)) Also added unittests for ActivationFunction * Enforce keyword arguments for non-positional arguments * Add LogActivation to doc --- .../trainer_config_helpers/activations.rst | 7 ++ .../activations/ActivationFunction.cpp | 29 ++++++++ .../gserver/activations/ActivationFunction.h | 2 + paddle/gserver/tests/CMakeLists.txt | 7 ++ paddle/gserver/tests/test_ActivationGrad.cpp | 66 +++++++++++++++++++ python/paddle/trainer/config_parser.py | 10 +-- .../trainer_config_helpers/activations.py | 9 +++ .../default_decorators.py | 9 ++- .../paddle/trainer_config_helpers/layers.py | 2 +- python/paddle/trainer_config_helpers/math.py | 64 ++++++++++++++++++ .../tests/configs/check.md5 | 11 ++-- .../tests/configs/generate_protostr.sh | 2 +- .../tests/configs/math_ops.py | 24 +++++++ 13 files changed, 229 insertions(+), 13 deletions(-) create mode 100644 paddle/gserver/tests/test_ActivationGrad.cpp create mode 100644 python/paddle/trainer_config_helpers/math.py create mode 100644 python/paddle/trainer_config_helpers/tests/configs/math_ops.py diff --git a/doc/ui/api/trainer_config_helpers/activations.rst b/doc/ui/api/trainer_config_helpers/activations.rst index c4e14ed779e..070ed03ab6c 100644 --- a/doc/ui/api/trainer_config_helpers/activations.rst +++ b/doc/ui/api/trainer_config_helpers/activations.rst @@ -32,6 +32,13 @@ LinearActivation .. automodule:: paddle.trainer_config_helpers.activations :members: LinearActivation :noindex: + +LogActivation +================== + +.. automodule:: paddle.trainer_config_helpers.activations + :members: LogActivation + :noindex: SquareActivation ================ diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp index 9918d20d908..27eed75d4d7 100644 --- a/paddle/gserver/activations/ActivationFunction.cpp +++ b/paddle/gserver/activations/ActivationFunction.cpp @@ -295,6 +295,7 @@ void forward(Argument& act) { void backward(Argument& act) { act.grad->squareDerivative(*act.in); } END_DEFINE_ACTIVATION(square) + /** * @brief Exponential Activation. * \f[ @@ -307,8 +308,36 @@ void forward(Argument& act) { act.value->exp(*act.value); } void backward(Argument& act) { act.grad->expDerivative(*act.value); } END_DEFINE_ACTIVATION(exponential) +/** + * @brief Logarithm Activation. + * \f[ + * f(z) = log(z) + * \f] + */ +BEGIN_DEFINE_ACTIVATION(log) +void forward(Argument& act) { + SetDevice device(act.deviceId); + Matrix::resizeOrCreate(act.in, act.value->getHeight(), act.value->getWidth(), + /* trans */ false, useGpu(act.deviceId)); + + act.in->copyFrom(*act.value); + act.value->log(*act.value); +} + +void backward(Argument& act) { act.grad->dotDiv(*act.grad, *act.in); } +END_DEFINE_ACTIVATION(log) + ActivationFunction* ActivationFunction::create(const std::string& type) { return gActivationRegistrar.createByType(type); } +std::vector ActivationFunction::getAllRegisteredTypes() { + std::vector types; + gActivationRegistrar.forEachType([&](const std::string& type) { + types.push_back(type); + }); + return types; +} + + } // namespace paddle diff --git a/paddle/gserver/activations/ActivationFunction.h b/paddle/gserver/activations/ActivationFunction.h index 29860b4a736..c483372256c 100644 --- a/paddle/gserver/activations/ActivationFunction.h +++ b/paddle/gserver/activations/ActivationFunction.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include +#include namespace paddle { @@ -32,6 +33,7 @@ struct Argument; class ActivationFunction { public: static ActivationFunction* create(const std::string& type); + static std::vector getAllRegisteredTypes(); ActivationFunction() {} diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index ff2abf76973..26ee2b3aae6 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -20,6 +20,13 @@ add_unittest_without_exec(test_LayerGrad add_test(NAME test_LayerGrad COMMAND test_LayerGrad) +add_unittest_without_exec(test_ActivationGrad + test_ActivationGrad.cpp + LayerGradUtil.cpp + TestUtil.cpp) +add_test(NAME test_ActivationGrad + COMMAND test_ActivationGrad) + ################## test_Evaluator ####################### add_unittest(test_Evaluator test_Evaluator.cpp diff --git a/paddle/gserver/tests/test_ActivationGrad.cpp b/paddle/gserver/tests/test_ActivationGrad.cpp new file mode 100644 index 00000000000..2c5d17090df --- /dev/null +++ b/paddle/gserver/tests/test_ActivationGrad.cpp @@ -0,0 +1,66 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "paddle/gserver/layers/DataLayer.h" +#include "ModelConfig.pb.h" +#include "paddle/trainer/Trainer.h" + +#include "TestUtil.h" +#include "LayerGradUtil.h" + +using namespace paddle; // NOLINT +using namespace std; // NOLINT + +P_DECLARE_bool(use_gpu); +P_DECLARE_bool(thread_local_rand_use_global_seed); + +void testActivation(const string& act) { + LOG(INFO) << "test activation: " << act; + size_t size = 10; + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("addto"); + config.layerConfig.set_size(size); + config.layerConfig.set_active_type(act); + config.inputDefs.push_back({INPUT_DATA, "layer_0", size, 0}); + config.layerConfig.add_inputs(); + for (auto useGpu : {false, true}) { + testLayerGrad(config, + act + "_activation", + 100, + /* trans= */false, + useGpu, + /* useWeight */true); + } +} + +TEST(Activation, activation) { + auto types = ActivationFunction::getAllRegisteredTypes(); + std::set excluded{"sequence_softmax"}; + for (auto type : types) { + if (excluded.count(type)) continue; + testActivation(type); + } +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + initMain(argc, argv); + FLAGS_thread_local_rand_use_global_seed = true; + srand(1); + return RUN_ALL_TESTS(); +} diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index e9098943165..e9038fea8a2 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2573,8 +2573,9 @@ class MixedLayer(LayerBase): for input in self.inputs: psize += input.calc_bias_size() - self.config.bias_size = psize - self.create_bias_parameter(bias, psize) + if bias: + self.config.bias_size = psize + self.create_bias_parameter(bias, psize) if error_clipping_threshold is not None: self.config.error_clipping_threshold = error_clipping_threshold @@ -2659,8 +2660,9 @@ class ConcatenateLayer2(LayerBase): for input in self.inputs: psize += input.calc_bias_size() - self.config.bias_size = psize - self.create_bias_parameter(bias, psize) + if bias: + self.config.bias_size = psize + self.create_bias_parameter(bias, psize) @config_layer('recurrent') class RecurrentLayer(LayerBase): diff --git a/python/paddle/trainer_config_helpers/activations.py b/python/paddle/trainer_config_helpers/activations.py index 29201451937..ad5cdc0a0eb 100644 --- a/python/paddle/trainer_config_helpers/activations.py +++ b/python/paddle/trainer_config_helpers/activations.py @@ -199,3 +199,12 @@ class ExpActivation(BaseActivation): f(z) = e^z. """ def __init__(self): BaseActivation.__init__(self, 'exponential', False) + +class LogActivation(BaseActivation): + """ + Logarithm Activation. + + .. math:: + f(z) = log(z) + """ + def __init__(self): BaseActivation.__init__(self, 'log', False) diff --git a/python/paddle/trainer_config_helpers/default_decorators.py b/python/paddle/trainer_config_helpers/default_decorators.py index b20aebc685f..be00f48b457 100644 --- a/python/paddle/trainer_config_helpers/default_decorators.py +++ b/python/paddle/trainer_config_helpers/default_decorators.py @@ -13,6 +13,7 @@ # limitations under the License. import functools +import inspect from .attrs import ParamAttr from .activations import TanhActivation from paddle.trainer.config_parser import * @@ -37,8 +38,12 @@ def wrap_param_default(param_names=None, default_factory=None, @functools.wraps(func) def __wrapper__(*args, **kwargs): if len(args) != 0: - logger.warning("please use keyword arguments in paddle config.") - + argspec = inspect.getargspec(func) + num_positional = len(argspec.args) + if argspec.defaults: + num_positional -= len(argspec.defaults) + if not argspec.varargs and len(args) > num_positional: + logger.fatal("Must use keyword arguments for non-positional args") for name in param_names: if not_set_callback(kwargs, name): # Not set kwargs[name] = default_factory(func) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 9a23c02431d..49f0ff3289d 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -564,7 +564,7 @@ class MixedLayerType(LayerOutput): self.inputs = [] self.finalized = False - def __add__(self, other): + def __iadd__(self, other): """ + += operator :param other: Other projection. diff --git a/python/paddle/trainer_config_helpers/math.py b/python/paddle/trainer_config_helpers/math.py new file mode 100644 index 00000000000..e35849b77ac --- /dev/null +++ b/python/paddle/trainer_config_helpers/math.py @@ -0,0 +1,64 @@ +# Copyright (c) 2016 Baidu, Inc. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .layers import LayerOutput, mixed_layer, identity_projection, \ + slope_intercept_layer +from .attrs import is_compatible_with +from .default_decorators import * +import activations as act + +__all__ = [] + +def register_unary_math_op(op_name, act): + def op(input, name=None): + return mixed_layer(input=[identity_projection(input=input)], + name=name, + act=act) + op = wrap_name_default(op_name)(op) + op.__doc__ = type(act).__doc__ + globals()[op_name] = op + __all__.append(op_name) + +register_unary_math_op('exp', act.ExpActivation()) +register_unary_math_op('log', act.LogActivation()) +register_unary_math_op('abs', act.AbsActivation()) +register_unary_math_op('sigmoid', act.SigmoidActivation()) +register_unary_math_op('tanh', act.TanhActivation()) +register_unary_math_op('square', act.SquareActivation()) + +def add(layeroutput, other): + if is_compatible_with(other, float): + return slope_intercept_layer(input=layeroutput, intercept=other) + assert isinstance(other, LayerOutput) + return mixed_layer(input=[identity_projection(input=layeroutput), + identity_projection(input=other)]) + +LayerOutput.__radd__ = add +LayerOutput.__add__ = add + +def sub(layeroutput, other): + if is_compatible_with(other, float): + return slope_intercept_layer(input=layeroutput, intercept=other) + assert isinstance(other, LayerOutput) + neg = slope_intercept_layer(input=other, slope=-1.0) + return mixed_layer(input=[identity_projection(input=layeroutput), + identity_projection(input=neg)]) + +LayerOutput.__sub__ = sub + +def rsub(layeroutput, other): + neg = slope_intercept_layer(input=layeroutput, slope=-1.0) + return add(neg, other) + +LayerOutput.__rsub__ = rsub diff --git a/python/paddle/trainer_config_helpers/tests/configs/check.md5 b/python/paddle/trainer_config_helpers/tests/configs/check.md5 index 72dfdad7bdd..93d129b765e 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/check.md5 +++ b/python/paddle/trainer_config_helpers/tests/configs/check.md5 @@ -1,11 +1,11 @@ 86c0815275a9d5eb902e23c6a592f58a img_layers.protostr a5d9259ff1fd7ca23d0ef090052cb1f2 last_first_seq.protostr 9c038249ec8ff719753a746cdb04c026 layer_activations.protostr -34e04043cbb12931c47fa44ec50eeffc projections.protostr +5913f87b39cee3b2701fa158270aca26 projections.protostr 7334ba0a4544f0623231330fc51d390d shared_fc.protostr -bb8e233b05b8e07f9ed386b7aee4f2c6 shared_lstm.protostr +8b8b6bb128a7dfcc937be86145f53e2f shared_lstm.protostr 6b39e34beea8dfb782bee9bd3dea9eb5 simple_rnn_layers.protostr -f98e79e1630d5eb827c300e64836d269 test_bi_grumemory.protostr +4e78f0ded79f6fefb58ca0c104b57c79 test_bi_grumemory.protostr 0fc1409600f1a3301da994ab9d28b0bf test_cost_layers.protostr 6cd5f28a3416344f20120698470e0a4c test_cost_layers_with_weight.protostr 144bc6d3a509de74115fa623741797ed test_expand_layer.protostr @@ -16,7 +16,8 @@ d350bd91a0dc13e854b1364c3d9339c6 test_lstmemory_layer.protostr 5433ed33d4e7414eaf658f2a55946186 test_maxout.protostr 251a948ba41c1071afcd3d9cf9c233f7 test_ntm_layers.protostr e6ff04e70aea27c7b06d808cc49c9497 test_print_layer.protostr -fded24727338fb8ce44d9951ed8aea08 test_rnn_group.protostr +2a75dd33b640c49a8821c2da6e574577 test_rnn_group.protostr 67d6fde3afb54f389d0ce4ff14726fe1 test_sequence_pooling.protostr f586a548ef4350ba1ed47a81859a64cb unused_layers.protostr -f937a5a6e7e8864b4d8cf56b0f7c7f44 util_layers.protostr +8122477f4f65244580cec09edc590041 util_layers.protostr +dcd76bebb5f9c755f481c26192917818 math_ops.protostr diff --git a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh index 6a31ceabdf3..9e23bd1fe2b 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh @@ -9,7 +9,7 @@ test_sequence_pooling test_lstmemory_layer test_grumemory_layer last_first_seq test_expand_layer test_ntm_layers test_hsigmoid img_layers util_layers simple_rnn_layers unused_layers test_cost_layers test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight -test_maxout test_bi_grumemory) +test_maxout test_bi_grumemory math_ops) for conf in ${configs[*]} diff --git a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py new file mode 100644 index 00000000000..fe515b70293 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py @@ -0,0 +1,24 @@ +from paddle.trainer_config_helpers import * +from paddle.trainer_config_helpers import math + +settings( + batch_size=1000, + learning_rate=1e-5 +) + +x = data_layer(name='data', size=100) +x = math.exp(x) +x = math.log(x) +x = math.abs(x) +x = math.sigmoid(x) +x = math.square(x) +x = math.square(x) +y = 1 + x +y = y + 1 +y = x + y +y = y - x +y = y - 2 +y = 2 - y + +outputs(y) + -- GitLab