diff --git a/doc/source/gserver/layers/layer.rst b/doc/source/gserver/layers/layer.rst index 807b22ca140ee71208a96e2877b9c5636620b165..4b8e149505f0695ad2fa4be967a50d1a0ac48b43 100644 --- a/doc/source/gserver/layers/layer.rst +++ b/doc/source/gserver/layers/layer.rst @@ -465,6 +465,11 @@ SumOfSquaresCostLayer .. doxygenclass:: paddle::SumOfSquaresCostLayer :members: +SumCostLayer +````````````````````` +.. doxygenclass:: paddle::SumCostLayer + :members: + CosSimLayer ----------- .. doxygenclass:: paddle::CosSimLayer diff --git a/doc/ui/api/trainer_config_helpers/layers.rst b/doc/ui/api/trainer_config_helpers/layers.rst index a98e8f2f55c585d161acdad41072b1248f4598c8..5d674fb8946a396f7dcd82763ec4e4a5812aee6b 100644 --- a/doc/ui/api/trainer_config_helpers/layers.rst +++ b/doc/ui/api/trainer_config_helpers/layers.rst @@ -407,6 +407,12 @@ hsigmoid :members: hsigmoid :noindex: +sum_cost +--------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: sum_cost + :noindex: + Check Layer ============ diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 14ff8510f7b19dc24b7b1ba603485488ddd4979d..949788be497874a5bb34e49e11bdc8ba3205ba61 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -562,4 +562,39 @@ void HuberTwoClass::backwardImpIn( } } +/** + * This cost layer compute the sum of its input as loss. + * \f[ + * o(i) = \sum_{j=1}^D y_{ij} + * \f] + */ +class SumCostLayer : public Layer { +public: + explicit SumCostLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool ret = Layer::init(layerMap, parameterMap); + if (!ret) return ret; + CHECK_EQ(inputLayers_.size(), 1UL); + return true; + } + + virtual void forward(PassType passType) { + Layer::forward(passType); + const MatrixPtr& input = getInputValue(0); + + /* malloc memory for the output_ if necessary */ + int batchSize = input->getHeight(); + int size = 1; + resizeOutput(batchSize, size); + output_.value->sumRows(*input); + } + + virtual void backward(const UpdateCallback& callback = nullptr) { + getInputGrad(0)->add((real)1); + } +}; + +REGISTER_LAYER(sum_cost, SumCostLayer); + } // namespace paddle diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index b464e16737ae561dce6e7d4f16a4dd61f73204e0..f263c688213ae6a83d5db4a1025aa252344dfab8 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -129,7 +129,7 @@ protected: * This cost layer compute Euclidean (L2) loss for real-valued regression * tasks. * \f[ - * L = \frac{1}{2N} \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2} + * L = \sum_{i=1}^N {|| \hat{y}_i - y_i||_2^2} * \f] */ class SumOfSquaresCostLayer : public CostLayer { diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 9e9ee9eeceb928229f2e3be29c229f7a2ab14d8a..0ba3756b49dc159f55bcf1c15783ab8c547f6a19 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -998,6 +998,19 @@ TEST(Layer, rankCostLayer) { } } +TEST(Layer, sumCostLayer) { + TestConfig config; + config.layerConfig.set_type("sum_cost"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "sum_cost", 100, false, useGpu); + } +} + TEST(Layer, weightedRankCostLayer) { TestConfig config; config.layerConfig.set_type("rank-cost"); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 06ef35544590a882da7c7cf7213acba86898c2ea..d11ee1feca62ef53079291d93de7333991b65df7 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1903,6 +1903,7 @@ define_cost('SumOfSquaresCostLayer', 'square_error') define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy') define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy') define_cost('HuberTwoClass', 'huber') +define_cost('SumCost', 'sum_cost') @config_layer('hsigmoid') class HierarchicalSigmoidLayer(LayerBase): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index bd8e9f07b6aeb7f857a3760efcd60f2d6ca4ad8d..95fbcc3693c89463b8eedf236908daac51e93b0f 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -53,7 +53,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", 'convex_comb_layer', 'ctc_layer', 'crf_layer', 'crf_decoding_layer', 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', - 'multi_binary_label_cross_entropy', + 'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', 'lambda_cost', 'huber_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer', 'print_layer' @@ -130,6 +130,7 @@ class LayerType(object): CROSS_ENTROPY_WITH_SELFNORM = "multi_class_cross_entropy_with_selfnorm" SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy" MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy" + SUM_COST = "sum_cost" @staticmethod def is_layer_type(type_name): @@ -4053,8 +4054,6 @@ def cross_entropy(input, label, name=None, coeff=1.0, layer_attr=None): :type input: LayerOutput. :param label: The input label. :type input: LayerOutput. - :param type: The type of cost. - :type type: basestring. :param name: The name of this layers. It is not necessary. :type name: None|basestring. :param coeff: The coefficient affects the gradient in the backward. @@ -4091,8 +4090,6 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0, :type input: LayerOutput. :param label: The input label. :type input: LayerOutput. - :param type: The type of cost. - :type type: basestring. :param name: The name of this layers. It is not necessary. :type name: None|basestring. :param coeff: The coefficient affects the gradient in the backward. @@ -4117,6 +4114,36 @@ def cross_entropy_with_selfnorm(input, label, name=None, coeff=1.0, parents=[input, label], size=1) +@wrap_name_default() +@layer_support() +def sum_cost(input, name=None, layer_attr=None): + """ + A loss layer which calculate the sum of the input as loss + + .. code-block:: python + + cost = sum_cost(input) + + :param input: The first input layer. + :type input: LayerOutput. + :param name: The name of this layers. It is not necessary. + :type name: None|basestring. + :param layer_attr: Extra Layer Attribute. + :type layer_attr: ExtraLayerAttribute + :return: LayerOutput object. + :rtype: LayerOutput. + """ + Layer(name=name, + type=LayerType.SUM_COST, + inputs=[input.name], + **ExtraLayerAttribute.to_kwargs(layer_attr) + ) + + return LayerOutput(name, + LayerType.SUM_COST, + parents=[input]) + + @wrap_name_default() @layer_support() def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr index 5261cf0c44943689a957bb99c21075bb7341cd49..f6045fe1f68255daf0d9b5ab05034eec633e4503 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr @@ -23,6 +23,17 @@ layers { size: 10 active_type: "" } +layers { + name: "__fc_layer_0__" + type: "fc" + size: 4 + active_type: "tanh" + inputs { + input_layer_name: "input" + input_parameter_name: "___fc_layer_0__.w0" + } + bias_parameter_name: "___fc_layer_0__.wbias" +} layers { name: "__ctc_layer_0__" type: "ctc" @@ -36,17 +47,6 @@ layers { } norm_by_times: false } -layers { - name: "__fc_layer_0__" - type: "fc" - size: 4 - active_type: "tanh" - inputs { - input_layer_name: "input" - input_parameter_name: "___fc_layer_0__.w0" - } - bias_parameter_name: "___fc_layer_0__.wbias" -} layers { name: "crf_label" type: "data" @@ -191,6 +191,16 @@ layers { } coeff: 1.0 } +layers { + name: "__sum_cost_0__" + type: "sum_cost" + size: 1 + active_type: "" + inputs { + input_layer_name: "__fc_layer_0__" + } + coeff: 1.0 +} parameters { name: "___fc_layer_0__.w0" size: 800 @@ -241,14 +251,15 @@ output_layer_names: "__cross_entropy_0__" output_layer_names: "__cross_entropy_with_selfnorm_0__" output_layer_names: "__huber_cost_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__" +output_layer_names: "__sum_cost_0__" sub_models { name: "root" layer_names: "input" layer_names: "labels" layer_names: "probs" layer_names: "xe-label" - layer_names: "__ctc_layer_0__" layer_names: "__fc_layer_0__" + layer_names: "__ctc_layer_0__" layer_names: "crf_label" layer_names: "__crf_layer_0__" layer_names: "left" @@ -264,6 +275,7 @@ sub_models { layer_names: "huber_label" layer_names: "__huber_cost_0__" layer_names: "__multi_binary_label_cross_entropy_0__" + layer_names: "__sum_cost_0__" input_layer_names: "input" input_layer_names: "labels" input_layer_names: "crf_label" @@ -284,6 +296,7 @@ sub_models { output_layer_names: "__cross_entropy_with_selfnorm_0__" output_layer_names: "__huber_cost_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__" + output_layer_names: "__sum_cost_0__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py index 64b45f4ded10b09ec4a7e77499e2d7b21215f430..f1b3365f84e3e990afc110984a1bb3bd378007e7 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py @@ -11,8 +11,9 @@ labels = data_layer(name='labels', size=5000) probs = data_layer(name='probs', size=10) xe_label = data_layer(name='xe-label', size=10) +hidden = fc_layer(input=seq_in, size=4) outputs(ctc_layer(input=seq_in, label=labels), - crf_layer(input=fc_layer(input=seq_in, size=4), + crf_layer(input=hidden, label=data_layer(name='crf_label', size=4)), rank_cost(left=data_layer(name='left', size=1), right=data_layer(name='right', size=1), @@ -23,4 +24,5 @@ outputs(ctc_layer(input=seq_in, label=labels), cross_entropy_with_selfnorm(input=probs, label=xe_label), huber_cost(input=data_layer(name='huber_probs', size=1), label=data_layer(name='huber_label', size=1)), - multi_binary_label_cross_entropy(input=probs, label=xe_label)) + multi_binary_label_cross_entropy(input=probs, label=xe_label), + sum_cost(hidden))