提交 3065cb26 编写于 作者: L Luo Tao

add huber_regression_cost

上级 27a99bfb
...@@ -409,6 +409,11 @@ multi_binary_label_cross_entropy_cost ...@@ -409,6 +409,11 @@ multi_binary_label_cross_entropy_cost
.. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost .. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost
:noindex: :noindex:
huber_regression_cost
-------------------------
.. autoclass:: paddle.v2.layer.huber_regression_cost
:noindex:
huber_classification_cost huber_classification_cost
------------------------- -------------------------
.. autoclass:: paddle.v2.layer.huber_classification_cost .. autoclass:: paddle.v2.layer.huber_classification_cost
......
...@@ -594,6 +594,61 @@ void HuberCost::forwardImp(Matrix& output, Argument& label, Matrix& cost) { ...@@ -594,6 +594,61 @@ void HuberCost::forwardImp(Matrix& output, Argument& label, Matrix& cost) {
} }
} }
//
// Huber loss for robust regression.
//
REGISTER_LAYER(huber_regression, HuberRegressionLoss);
bool HuberRegressionLoss::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
HuberCost::init(layerMap, parameterMap);
delta_ = config_.delta();
return true;
}
void HuberRegressionLoss::forwardImp(Matrix& output,
Argument& label,
Matrix& target) {
HuberCost::forwardImp(output, label, target);
size_t numSamples = target.getHeight();
CHECK(label.value);
CHECK_EQ((*label.value).getHeight(), numSamples);
CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(output.getWidth(), (*label.value).getWidth());
CHECK_EQ(target.getWidth(), (size_t)1);
real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
real* lbl =
useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData();
std::vector<real> cost(numSamples);
for (size_t i = 0; i < numSamples; ++i) {
real a = std::abs(lbl[i] - out[i]);
if (a <= delta_)
cost[i] = a * a / 2;
else
cost[i] = delta_ * (a - delta_ / 2);
}
target.copyFrom(cost.data(), numSamples);
}
void HuberRegressionLoss::backwardImp(Matrix& output,
Argument& label,
Matrix& outputG) {
size_t numSamples = output.getHeight();
real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData();
real* lbl =
useGpu_ ? tmpCpuInput_[1].value->getData() : (*label.value).getData();
real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData();
for (size_t i = 0; i < numSamples; ++i) {
real a = lbl[i] - out[i];
if (std::abs(a) <= delta_)
grad[i] += -a;
else
grad[i] += a > 0 ? delta_ : -delta_;
}
if (useGpu_) outputG.copyFrom(grad, numSamples);
}
// //
// Huber loss for robust 2-classes classification // Huber loss for robust 2-classes classification
// //
......
...@@ -321,6 +321,30 @@ public: ...@@ -321,6 +321,30 @@ public:
void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {} void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {}
}; };
/**
* Huber loss for robust regression.
*
* Given output f(x), label y and delta, the loss is:
* Loss = 0.5 * (1 - y * f)^2, if abs(y - f) <= delta \\
* Loss = delta * abs(y - f) - 0.5 * delta^2, otherwise
*/
class HuberRegressionLoss : public HuberCost {
public:
explicit HuberRegressionLoss(const LayerConfig& config) : HuberCost(config) {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
void backwardImp(Matrix& outputValue,
Argument& label,
Matrix& outputGrad) override;
protected:
real delta_;
};
/** /**
* Huber loss for robust 2-classes classification. * Huber loss for robust 2-classes classification.
* *
......
...@@ -828,6 +828,24 @@ TEST(Layer, square_error_weighted) { ...@@ -828,6 +828,24 @@ TEST(Layer, square_error_weighted) {
} }
} }
TEST(Layer, huber_regression_loss) {
TestConfig config;
config.layerConfig.set_type("huber_regression");
config.biasSize = 0;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0});
config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0});
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
for (auto delta : {1, 3, 5}) {
config.layerConfig.set_delta(delta);
testLayerGrad(config, "huber_regression", 100, /* trans */ false, useGpu);
}
}
}
TEST(Layer, huber_two_class) { TEST(Layer, huber_two_class) {
TestConfig config; TestConfig config;
config.layerConfig.set_type("huber_classification"); config.layerConfig.set_type("huber_classification");
...@@ -839,7 +857,7 @@ TEST(Layer, huber_two_class) { ...@@ -839,7 +857,7 @@ TEST(Layer, huber_two_class) {
config.layerConfig.add_inputs(); config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
testLayerGrad(config, "huber", 100, /* trans */ false, useGpu); testLayerGrad(config, "huber_two_class", 100, /* trans */ false, useGpu);
} }
} }
......
...@@ -496,6 +496,9 @@ message LayerConfig { ...@@ -496,6 +496,9 @@ message LayerConfig {
optional int32 axis = 54 [ default = 2 ]; optional int32 axis = 54 [ default = 2 ];
repeated uint32 offset = 55; repeated uint32 offset = 55;
repeated uint32 shape = 56; repeated uint32 shape = 56;
// for HuberRegressionLoss
optional double delta = 57 [ default = 1.0 ];
} }
message EvaluatorConfig { message EvaluatorConfig {
......
...@@ -2317,6 +2317,17 @@ class LambdaCost(LayerBase): ...@@ -2317,6 +2317,17 @@ class LambdaCost(LayerBase):
self.config.max_sort_size = max_sort_size self.config.max_sort_size = max_sort_size
@config_layer('huber_regression')
class HuberRegressionLoss(LayerBase):
def __init__(self, name, inputs, delta=1., coeff=1., device=None):
super(HuberRegressionLoss, self).__init__(
name, 'huber_regression', 1, inputs=inputs, device=device)
config_assert(
len(self.inputs) == 2, 'HuberRegression must have 2 inputs')
self.config.delta = delta
self.config.coeff = coeff
@config_layer('nce') @config_layer('nce')
class NCELayer(LayerBase): class NCELayer(LayerBase):
def __init__(self, def __init__(self,
......
...@@ -108,6 +108,7 @@ __all__ = [ ...@@ -108,6 +108,7 @@ __all__ = [
'sum_cost', 'sum_cost',
'rank_cost', 'rank_cost',
'lambda_cost', 'lambda_cost',
'huber_regression_cost',
'huber_classification_cost', 'huber_classification_cost',
'block_expand_layer', 'block_expand_layer',
'maxout_layer', 'maxout_layer',
...@@ -216,6 +217,7 @@ class LayerType(object): ...@@ -216,6 +217,7 @@ class LayerType(object):
RANK_COST = 'rank-cost' RANK_COST = 'rank-cost'
LAMBDA_COST = 'lambda_cost' LAMBDA_COST = 'lambda_cost'
HUBER_REGRESSION = 'huber_regression'
HUBER_CLASSIFICATION = 'huber_classification' HUBER_CLASSIFICATION = 'huber_classification'
CROSS_ENTROPY = 'multi-class-cross-entropy' CROSS_ENTROPY = 'multi-class-cross-entropy'
CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm' CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm'
...@@ -5603,6 +5605,57 @@ def sum_cost(input, name=None, layer_attr=None): ...@@ -5603,6 +5605,57 @@ def sum_cost(input, name=None, layer_attr=None):
return LayerOutput(name, LayerType.SUM_COST, parents=[input], size=1) return LayerOutput(name, LayerType.SUM_COST, parents=[input], size=1)
@wrap_name_default()
@layer_support()
def huber_regression_cost(input,
label,
name=None,
delta=1.0,
coeff=1.0,
layer_attr=None):
"""
In statistics, the Huber loss is a loss function used in robust regression,
that is less sensitive to outliers in data than the squared error loss.
Given a prediction f(x), a label y and :math:`\delta`, the loss function
is defined as:
.. math:
loss = 0.5*\left ( y-f(x) \right )^2, \left | y-f(x) \right |\leq \delta
loss = \delta \left | y-f(x) \right |-0.5\delta ^2, otherwise
The example usage is:
.. code-block:: python
cost = huber_regression_cost(input=input_layer, label=label_layer)
:param input: The first input layer.
:type input: LayerOutput.
:param label: The input label.
:type input: LayerOutput.
:param name: The name of this layers. It is not necessary.
:type name: None|basestring.
:param delta: The difference between the observed and predicted values.
:type delta: float.
:param coeff: The coefficient affects the gradient in the backward.
:type coeff: float.
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput.
"""
assert isinstance(input, LayerOutput)
Layer(
name=name,
type=LayerType.HUBER_REGRESSION,
inputs=[input.name, label.name],
delta=delta,
coeff=coeff,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name, LayerType.HUBER_REGRESSION, parents=[input, label], size=1)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def huber_classification_cost(input, def huber_classification_cost(input,
......
...@@ -167,6 +167,20 @@ layers { ...@@ -167,6 +167,20 @@ layers {
softmax_selfnorm_alpha: 0.1 softmax_selfnorm_alpha: 0.1
coeff: 1.0 coeff: 1.0
} }
layers {
name: "__huber_regression_cost_0__"
type: "huber_regression"
size: 1
active_type: ""
inputs {
input_layer_name: "input"
}
inputs {
input_layer_name: "labels"
}
coeff: 1.0
delta: 1.0
}
layers { layers {
name: "huber_probs" name: "huber_probs"
type: "data" type: "data"
...@@ -300,6 +314,7 @@ output_layer_names: "__rank_cost_0__" ...@@ -300,6 +314,7 @@ output_layer_names: "__rank_cost_0__"
output_layer_names: "__lambda_cost_0__" output_layer_names: "__lambda_cost_0__"
output_layer_names: "__cross_entropy_0__" output_layer_names: "__cross_entropy_0__"
output_layer_names: "__cross_entropy_with_selfnorm_0__" output_layer_names: "__cross_entropy_with_selfnorm_0__"
output_layer_names: "__huber_regression_cost_0__"
output_layer_names: "__huber_classification_cost_0__" output_layer_names: "__huber_classification_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__" output_layer_names: "__sum_cost_0__"
...@@ -324,6 +339,7 @@ sub_models { ...@@ -324,6 +339,7 @@ sub_models {
layer_names: "__lambda_cost_0__" layer_names: "__lambda_cost_0__"
layer_names: "__cross_entropy_0__" layer_names: "__cross_entropy_0__"
layer_names: "__cross_entropy_with_selfnorm_0__" layer_names: "__cross_entropy_with_selfnorm_0__"
layer_names: "__huber_regression_cost_0__"
layer_names: "huber_probs" layer_names: "huber_probs"
layer_names: "huber_label" layer_names: "huber_label"
layer_names: "__huber_classification_cost_0__" layer_names: "__huber_classification_cost_0__"
...@@ -349,6 +365,7 @@ sub_models { ...@@ -349,6 +365,7 @@ sub_models {
output_layer_names: "__lambda_cost_0__" output_layer_names: "__lambda_cost_0__"
output_layer_names: "__cross_entropy_0__" output_layer_names: "__cross_entropy_0__"
output_layer_names: "__cross_entropy_with_selfnorm_0__" output_layer_names: "__cross_entropy_with_selfnorm_0__"
output_layer_names: "__huber_regression_cost_0__"
output_layer_names: "__huber_classification_cost_0__" output_layer_names: "__huber_classification_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__" output_layer_names: "__sum_cost_0__"
......
...@@ -33,6 +33,8 @@ outputs( ...@@ -33,6 +33,8 @@ outputs(
input=probs, label=xe_label), input=probs, label=xe_label),
cross_entropy_with_selfnorm( cross_entropy_with_selfnorm(
input=probs, label=xe_label), input=probs, label=xe_label),
huber_regression_cost(
input=seq_in, label=labels),
huber_classification_cost( huber_classification_cost(
input=data_layer( input=data_layer(
name='huber_probs', size=1), name='huber_probs', size=1),
......
...@@ -141,12 +141,13 @@ class CostLayerTest(unittest.TestCase): ...@@ -141,12 +141,13 @@ class CostLayerTest(unittest.TestCase):
cost8 = layer.rank_cost(left=score, right=score, label=score) cost8 = layer.rank_cost(left=score, right=score, label=score)
cost9 = layer.lambda_cost(input=inference, score=score) cost9 = layer.lambda_cost(input=inference, score=score)
cost10 = layer.sum_cost(input=inference) cost10 = layer.sum_cost(input=inference)
cost11 = layer.huber_classification_cost(input=score, label=label) cost11 = layer.huber_regression_cost(input=score, label=label)
cost12 = layer.huber_classification_cost(input=score, label=label)
print layer.parse_network([cost1, cost2]) print layer.parse_network([cost1, cost2])
print layer.parse_network([cost3, cost4]) print layer.parse_network([cost3, cost4])
print layer.parse_network([cost5, cost6]) print layer.parse_network([cost5, cost6])
print layer.parse_network([cost7, cost8, cost9, cost10, cost11]) print layer.parse_network([cost7, cost8, cost9, cost10, cost11, cost12])
crf = layer.crf(input=inference, label=label) crf = layer.crf(input=inference, label=label)
crf_decoding = layer.crf_decoding(input=inference, size=3) crf_decoding = layer.crf_decoding(input=inference, size=3)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册