From e6db484d154c041c1cf6650743bcf27dd2549b77 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 14 Aug 2017 15:51:00 +0800 Subject: [PATCH] make clear that current huber_cost is for two-classification --- paddle/gserver/layers/CostLayer.cpp | 29 ++++++++++--------- paddle/gserver/layers/CostLayer.h | 18 +++++------- paddle/gserver/tests/test_LayerGrad.cpp | 2 +- python/paddle/trainer/config_parser.py | 2 +- .../paddle/trainer_config_helpers/layers.py | 27 ++++++++++++----- .../protostr/test_cost_layers.protostr | 10 +++---- .../tests/configs/test_cost_layers.py | 2 +- 7 files changed, 50 insertions(+), 40 deletions(-) diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 6bfdea3c6e3..138c86a6d6d 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -575,10 +575,10 @@ void MultiBinaryLabelCrossEntropy::backwardImp(Matrix& output, // // Huber loss for robust 2-classes classification // -REGISTER_LAYER(huber, HuberTwoClass); +REGISTER_LAYER(huber, HuberTwoClassification); -bool HuberTwoClass::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { +bool HuberTwoClassification::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { CostLayer::init(layerMap, parameterMap); if (useGpu_) { tmpCpuInput_.reserve(inputLayers_.size()); @@ -589,7 +589,9 @@ bool HuberTwoClass::init(const LayerMap& layerMap, return true; } -void HuberTwoClass::forwardImp(Matrix& output, Argument& label, Matrix& cost) { +void HuberTwoClassification::forwardImp(Matrix& output, + Argument& label, + Matrix& cost) { if (useGpu_) { for (size_t i = 0; i < inputLayers_.size(); i++) { tmpCpuInput_[i].resizeAndCopyFrom( @@ -600,10 +602,11 @@ void HuberTwoClass::forwardImp(Matrix& output, Argument& label, Matrix& cost) { forwardImpIn(output, label, cost); } -void HuberTwoClass::forwardImpIn(Matrix& output, - Argument& label, - Matrix& target) { +void HuberTwoClassification::forwardImpIn(Matrix& output, + Argument& label, + Matrix& target) { size_t numSamples = target.getHeight(); + CHECK(label.ids); CHECK_EQ((*label.ids).getSize(), numSamples); CHECK_EQ(output.getHeight(), numSamples); CHECK_EQ(output.getWidth(), (size_t)1); @@ -624,9 +627,9 @@ void HuberTwoClass::forwardImpIn(Matrix& output, target.copyFrom(cost.data(), numSamples); } -void HuberTwoClass::backwardImp(Matrix& outputValue, - Argument& label, - Matrix& outputGrad) { +void HuberTwoClassification::backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) { if (useGpu_) { backwardImpIn( *tmpCpuInput_[0].value, tmpCpuInput_[1], *tmpCpuInput_[0].grad); @@ -636,9 +639,9 @@ void HuberTwoClass::backwardImp(Matrix& outputValue, } } -void HuberTwoClass::backwardImpIn(Matrix& output, - Argument& label, - Matrix& outputG) { +void HuberTwoClassification::backwardImpIn(Matrix& output, + Argument& label, + Matrix& outputG) { size_t numSamples = output.getHeight(); real* out = output.getData(); real* grad = outputG.getData(); diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index 14c0b33ec1a..77427b7a08e 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -307,21 +307,17 @@ public: /** * Huber loss for robust 2-classes classification. * - * For label={0, 1}, let y=2*label-1. Given output f, the loss is: - * \f[ - * Loss = - * \left\{\begin{matrix} - * 4 * y * f & \textit{if} \ \ y* f < -1 \\ - * (1 - y * f)^2 & \textit{if} \ \ -1 < y * f < 1 \\ - * 0 & \textit{otherwise} - * \end{matrix}\right. - * \f] + * For label={0, 1}, let y=2*label-1. Given output f(x), the loss is: + * Loss = 4 * y * f, if y* f < -1 \\ + * Loss = (1 - y * f)^2, if -1 < y * f < 1 \\ + * Loss = 0, otherwise */ -class HuberTwoClass : public CostLayer { +class HuberTwoClassification : public CostLayer { std::vector tmpCpuInput_; public: - explicit HuberTwoClass(const LayerConfig& config) : CostLayer(config) {} + explicit HuberTwoClassification(const LayerConfig& config) + : CostLayer(config) {} bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 0f312b6ca50..6d60250f6d8 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -830,7 +830,7 @@ TEST(Layer, square_error_weighted) { TEST(Layer, huber_two_class) { TestConfig config; - config.layerConfig.set_type("huber"); + config.layerConfig.set_type("huber_classification"); config.biasSize = 0; config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index da99e5bd534..248da9417f2 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2255,7 +2255,7 @@ define_cost('PnpairValidation', 'pnpair-validation') define_cost('SumOfSquaresCostLayer', 'square_error') define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy') define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy') -define_cost('HuberTwoClass', 'huber') +define_cost('HuberTwoClassification', 'huber_classification') define_cost('SumCost', 'sum_cost') define_cost('SmoothL1Cost', 'smooth_l1') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 1bc55c86960..20d96efe152 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -108,7 +108,7 @@ __all__ = [ 'sum_cost', 'rank_cost', 'lambda_cost', - 'huber_cost', + 'huber_classification_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer', @@ -216,7 +216,7 @@ class LayerType(object): RANK_COST = 'rank-cost' LAMBDA_COST = 'lambda_cost' - HUBER = 'huber' + HUBER_CLASSIFICATION = 'huber_classification' CROSS_ENTROPY = 'multi-class-cross-entropy' CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm' SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy' @@ -5605,16 +5605,26 @@ def sum_cost(input, name=None, layer_attr=None): @wrap_name_default() @layer_support() -def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): +def huber_classification_cost(input, + label, + name=None, + coeff=1.0, + layer_attr=None): """ - A loss layer for huber loss. + For classification purposes, a variant of the Huber loss called modified Huber + is sometimes used. Given a prediction f(x) (a real-valued classifier score) and + a true binary class label :math:`y\in \left \{-1, 1 \right \}`, the modified Huber + loss is defined as: + + .. math: + loss = \max \left ( 0, 1-yf(x) \right )^2, yf(x)\geq 1 + loss = -4yf(x), \text{otherwise} The example usage is: .. code-block:: python - cost = huber_cost(input=input_layer, - label=label_layer) + cost = huber_classification_cost(input=input_layer, label=label_layer) :param input: The first input layer. :type input: LayerOutput. @@ -5634,11 +5644,12 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): assert input.size == 1 Layer( name=name, - type=LayerType.HUBER, + type=LayerType.HUBER_CLASSIFICATION, inputs=[input.name, label.name], coeff=coeff, **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput(name, LayerType.HUBER, parents=[input, label], size=1) + return LayerOutput( + name, LayerType.HUBER_CLASSIFICATION, parents=[input, label], size=1) @wrap_name_default() diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr index 05847344be6..a64e5ea0ddb 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr @@ -180,8 +180,8 @@ layers { active_type: "" } layers { - name: "__huber_cost_0__" - type: "huber" + name: "__huber_classification_cost_0__" + type: "huber_classification" size: 1 active_type: "" inputs { @@ -300,7 +300,7 @@ output_layer_names: "__rank_cost_0__" output_layer_names: "__lambda_cost_0__" output_layer_names: "__cross_entropy_0__" output_layer_names: "__cross_entropy_with_selfnorm_0__" -output_layer_names: "__huber_cost_0__" +output_layer_names: "__huber_classification_cost_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__sum_cost_0__" output_layer_names: "__nce_layer_0__" @@ -326,7 +326,7 @@ sub_models { layer_names: "__cross_entropy_with_selfnorm_0__" layer_names: "huber_probs" layer_names: "huber_label" - layer_names: "__huber_cost_0__" + layer_names: "__huber_classification_cost_0__" layer_names: "__multi_binary_label_cross_entropy_0__" layer_names: "__sum_cost_0__" layer_names: "__nce_layer_0__" @@ -349,7 +349,7 @@ sub_models { output_layer_names: "__lambda_cost_0__" output_layer_names: "__cross_entropy_0__" output_layer_names: "__cross_entropy_with_selfnorm_0__" - output_layer_names: "__huber_cost_0__" + output_layer_names: "__huber_classification_cost_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__sum_cost_0__" output_layer_names: "__nce_layer_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py index d2a3b702a1d..98bf026d606 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py @@ -33,7 +33,7 @@ outputs( input=probs, label=xe_label), cross_entropy_with_selfnorm( input=probs, label=xe_label), - huber_cost( + huber_classification_cost( input=data_layer( name='huber_probs', size=1), label=data_layer( -- GitLab