From ebad8e525d711230b63cd0112a55cac3f6cc751a Mon Sep 17 00:00:00 2001 From: xuwei06 Date: Fri, 4 Nov 2016 13:04:28 -0700 Subject: [PATCH] Add SumCost This allows user to implement any type of cost by summing over the output of non-cost layers. Change-Id: Ic55aaabbf0c1299e70b8e48a0effcc91f8f5bd29 --- paddle/gserver/layers/CostLayer.cpp | 29 +++++++++++++++ paddle/gserver/tests/test_LayerGrad.cpp | 13 +++++++ python/paddle/trainer/config_parser.py | 1 + .../paddle/trainer_config_helpers/layers.py | 37 ++++++++++++++++--- .../tests/configs/test_cost_layers.py | 6 ++- 5 files changed, 79 insertions(+), 7 deletions(-) diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 14ff8510f7b..0bb8359a904 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -562,4 +562,33 @@ void HuberTwoClass::backwardImpIn( } } +class SumCostLayer : public Layer { +public: + explicit SumCostLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool ret = Layer::init(layerMap, parameterMap); + if (!ret) return ret; + CHECK_EQ(inputLayers_.size(), 1UL); + return true; + } + + virtual void forward(PassType passType) { + Layer::forward(passType); + const MatrixPtr& input = getInputValue(0); + + /* malloc memory for the output_ if necessary */ + int batchSize = input->getHeight(); + int size = 1; + resizeOutput(batchSize, size); + output_.value->sumRows(*input); + } + + virtual void backward(const UpdateCallback& callback = nullptr) { + getInputGrad(0)->add((real)1); + } +}; + +REGISTER_LAYER(sum_cost, SumCostLayer); + } // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 5397b952bce..61b89f5ec3c 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -935,6 +935,19 @@ TEST(Layer, rankCostLayer) { } } +TEST(Layer, sumCostLayer) { + TestConfig config; + config.layerConfig.set_type("sum_cost"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "sum_cost", 100, false, useGpu); + } +} + TEST(Layer, weightedRankCostLayer) { TestConfig config; config.layerConfig.set_type("rank-cost"); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 73631602a92..5a0d5018f0d 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1795,6 +1795,7 @@ define_cost('SumOfSquaresCostLayer', 'square_error') define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy') define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy') define_cost('HuberTwoClass', 'huber') +define_cost('SumCost', 'sum_cost') @config_layer('hsigmoid') class HierarchicalSigmoidLayer(LayerBase): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 49f0ff3289d..c768a419c0b 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -52,7 +52,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", 'convex_comb_layer', 'ctc_layer', 'crf_layer', 'crf_decoding_layer', 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', - 'multi_binary_label_cross_entropy', + 'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', 'lambda_cost', 'huber_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer', 'print_layer' @@ -126,6 +126,7 @@ class LayerType(object): CROSS_ENTROPY_WITH_SELFNORM = "multi_class_cross_entropy_with_selfnorm" SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy" MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy" + SUM_COST = "sum_cost" @staticmethod def is_layer_type(type_name): @@ -3924,8 +3925,6 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): :type input: LayerOutput. :param label: The input label. :type input: LayerOutput. - :param type: The type of cost. - :type type: basestring. :param name: The name of this layers. It is not necessary. :type name: None|basestring. :param coeff: The coefficient affects the gradient in the backward. @@ -3961,8 +3960,6 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0, :type input: LayerOutput. :param label: The input label. :type input: LayerOutput. - :param type: The type of cost. - :type type: basestring. :param name: The name of this layers. It is not necessary. :type name: None|basestring. :param coeff: The coefficient affects the gradient in the backward. @@ -3987,6 +3984,36 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0, parents=[input, label]) +@wrap_name_default() +@layer_support() +def sum_cost(input, name=None, layer_attr=None): + """ + A loss layer which calculate the sum of the input as loss + + .. code-block:: python + + cost = sum_cost(input) + + :param input: The first input layer. + :type input: LayerOutput. + :param name: The name of this layers. It is not necessary. + :type name: None|basestring. + :param layer_attr: Extra Layer Attribute. + :type layer_attr: ExtraLayerAttribute + :return: LayerOutput object. + :rtype: LayerOutput. + """ + Layer(name=name, + type=LayerType.SUM_COST, + inputs=[input.name], + **ExtraLayerAttribute.to_kwargs(layer_attr) + ) + + return LayerOutput(name, + LayerType.SUM_COST, + parents=[input]) + + @wrap_name_default() @layer_support() def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py index 64b45f4ded1..f1b3365f84e 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py @@ -11,8 +11,9 @@ labels = data_layer(name='labels', size=5000) probs = data_layer(name='probs', size=10) xe_label = data_layer(name='xe-label', size=10) +hidden = fc_layer(input=seq_in, size=4) outputs(ctc_layer(input=seq_in, label=labels), - crf_layer(input=fc_layer(input=seq_in, size=4), + crf_layer(input=hidden, label=data_layer(name='crf_label', size=4)), rank_cost(left=data_layer(name='left', size=1), right=data_layer(name='right', size=1), @@ -23,4 +24,5 @@ outputs(ctc_layer(input=seq_in, label=labels), cross_entropy_with_selfnorm(input=probs, label=xe_label), huber_cost(input=data_layer(name='huber_probs', size=1), label=data_layer(name='huber_label', size=1)), - multi_binary_label_cross_entropy(input=probs, label=xe_label)) + multi_binary_label_cross_entropy(input=probs, label=xe_label), + sum_cost(hidden)) -- GitLab