提交 1fd00758 编写于 作者: D dangqingqing

SmoothL1 loss

上级 68b958c3
...@@ -498,6 +498,12 @@ hsigmoid ...@@ -498,6 +498,12 @@ hsigmoid
:members: hsigmoid :members: hsigmoid
:noindex: :noindex:
smooth_l1
---------
.. automodule:: paddle.trainer_config_helpers.layers
:members: smooth_l1
:noindex:
Check Layer Check Layer
============ ============
......
...@@ -217,7 +217,7 @@ void SmoothL1CostLayer::forwardImp(Matrix& output, ...@@ -217,7 +217,7 @@ void SmoothL1CostLayer::forwardImp(Matrix& output,
targetCpu->copyFrom(target); targetCpu->copyFrom(target);
outputCpu->copyFrom(output); outputCpu->copyFrom(output);
labelCpu->copyFrom(*label.value); labelCpu->copyFrom(*label.value);
targetCpu->smoothL1(*outputCpu, *(labelCpu)); targetCpu->smoothL1(*outputCpu, *labelCpu);
target.copyFrom(*targetCpu); target.copyFrom(*targetCpu);
} else { } else {
target.smoothL1(output, *label.value); target.smoothL1(output, *label.value);
......
...@@ -91,8 +91,8 @@ public: ...@@ -91,8 +91,8 @@ public:
* *
* [1] Jacob Devlin, Rabih Zbib, Zhongqiang Huang, Thomas Lamar, * [1] Jacob Devlin, Rabih Zbib, Zhongqiang Huang, Thomas Lamar,
* Richard Schwartz, and John Makhoul. Fast and robust neural * Richard Schwartz, and John Makhoul. Fast and robust neural
* network joint models for statistical machine translation. * network joint models for statistical machine translation. * In
* In Proceedings of the ACL 2014 Conference. * Proceedings of the ACL 2014 Conference.
*/ */
class MultiClassCrossEntropyWithSelfNorm : public CostLayer { class MultiClassCrossEntropyWithSelfNorm : public CostLayer {
public: public:
...@@ -164,9 +164,11 @@ public: ...@@ -164,9 +164,11 @@ public:
* tasks. * tasks.
* \f[ * \f[
* L = * L =
* (output - label)^2 * 0.5 / -1 < (output - label) < 1 / * 0.5 * x^2 if / -1 < |x| < 1 /
* (output - label) - 0.5 / otherwise / * |x| - 0.5 / otherwise /
* \f] * \f]
*
* x = output - label
*/ */
class SmoothL1CostLayer : public CostLayer { class SmoothL1CostLayer : public CostLayer {
public: public:
......
...@@ -1685,7 +1685,7 @@ TEST(Layer, smooth_l1) { ...@@ -1685,7 +1685,7 @@ TEST(Layer, smooth_l1) {
config.layerConfig.add_inputs(); config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
testLayerGrad(config, "smooth_l1", 100, false, useGpu, false, 2.0); testLayerGrad(config, "smooth_l1", 100, false, useGpu, false);
} }
} }
......
...@@ -3616,17 +3616,18 @@ void CpuMatrix::smoothL1(Matrix& output, Matrix& label) { ...@@ -3616,17 +3616,18 @@ void CpuMatrix::smoothL1(Matrix& output, Matrix& label) {
CHECK_EQ(output.getHeight(), numSamples); CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(label.getWidth(), dim); CHECK_EQ(label.getWidth(), dim);
CHECK_EQ(getWidth(), (size_t)1); CHECK_EQ(getWidth(), (size_t)1);
real* out = output.getData();
real* cost = getData(); real* cost = getData();
real* out = output.getData();
real* lbl = label.getData(); real* lbl = label.getData();
for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) { for (size_t i = 0; i < numSamples; ++i, out += dim, lbl += dim) {
for (size_t j = 0; j < dim; ++j) { for (size_t j = 0; j < dim; ++j) {
cost[j] = std::fabs(out[j] - lbl[j]); real absVal = std::fabs(out[j] - lbl[j]);
if (cost[j] < 1.0) if (absVal < 1.0)
cost[j] = 0.5 * cost[j] * cost[j]; cost[i] += 0.5 * absVal * absVal;
else else
cost[j] = cost[j] - 0.5; cost[i] += absVal - 0.5;
} }
} }
} }
...@@ -3640,17 +3641,20 @@ void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) { ...@@ -3640,17 +3641,20 @@ void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) {
CHECK_EQ(label.getHeight(), numSamples); CHECK_EQ(label.getHeight(), numSamples);
CHECK_EQ(output.getHeight(), numSamples); CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(label.getWidth(), dim); CHECK_EQ(label.getWidth(), dim);
CHECK_EQ(getWidth(), (size_t)1); CHECK_EQ(getWidth(), dim);
real* out = output.getData(); real* out = output.getData();
real* cost = getData();
real* lbl = label.getData(); real* lbl = label.getData();
real* grad = getData();
// f'(x) = x if |x| < 1 for (size_t i = 0; i < numSamples; ++i, out += dim, grad += dim, lbl += dim) {
// = sign(x) otherwise
for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) {
for (size_t j = 0; j < dim; ++j) { for (size_t j = 0; j < dim; ++j) {
cost[j] = out[j] - lbl[j]; real val = out[j] - lbl[j];
if (std::fabs(cost[j]) >= 1) cost[j] = (0 < cost[j]) - (cost[j] < 0); if (std::fabs(val) < 1) {
grad[j] += val;
} else {
grad[j] += (real(0) < val) - (val < real(0));
}
} }
} }
} }
......
...@@ -2119,6 +2119,7 @@ define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy') ...@@ -2119,6 +2119,7 @@ define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy')
define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy') define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy')
define_cost('HuberTwoClass', 'huber') define_cost('HuberTwoClass', 'huber')
define_cost('SumCost', 'sum_cost') define_cost('SumCost', 'sum_cost')
define_cost('SmoothL1Cost', 'smooth_l1')
@config_layer('hsigmoid') @config_layer('hsigmoid')
......
...@@ -116,6 +116,7 @@ __all__ = [ ...@@ -116,6 +116,7 @@ __all__ = [
'spp_layer', 'spp_layer',
'pad_layer', 'pad_layer',
'eos_layer', 'eos_layer',
'smooth_l1_cost',
'layer_support', 'layer_support',
] ]
...@@ -201,6 +202,7 @@ class LayerType(object): ...@@ -201,6 +202,7 @@ class LayerType(object):
SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy" SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy"
MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy" MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy"
SUM_COST = "sum_cost" SUM_COST = "sum_cost"
SMOOTH_L1 = "smooth_l1"
@staticmethod @staticmethod
def is_layer_type(type_name): def is_layer_type(type_name):
...@@ -5249,8 +5251,6 @@ def multi_binary_label_cross_entropy(input, ...@@ -5249,8 +5251,6 @@ def multi_binary_label_cross_entropy(input,
:type input: LayerOutput :type input: LayerOutput
:param label: The input label. :param label: The input label.
:type input: LayerOutput :type input: LayerOutput
:param type: The type of cost.
:type type: basestring
:param name: The name of this layers. It is not necessary. :param name: The name of this layers. It is not necessary.
:type name: None|basestring :type name: None|basestring
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The coefficient affects the gradient in the backward.
...@@ -5279,3 +5279,56 @@ def multi_binary_label_cross_entropy(input, ...@@ -5279,3 +5279,56 @@ def multi_binary_label_cross_entropy(input,
LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY, LayerType.MULTI_BIN_LABEL_CROSS_ENTROPY,
parents=[input, label], parents=[input, label],
size=1) size=1)
@wrap_name_default()
@layer_support()
def smooth_l1_cost(input, label, name=None, layer_attr=None):
"""
This is a L1 loss but more smooth. It requires that the
size of input and label are equal.
More details can be found by referring to `Fast R-CNN
<https://arxiv.org/pdf/1504.08083v2.pdf>`_
.. math::
L = \sum_{i} smooth_{L1}(input_i - label_i)
in which
.. math::
mooth_{L1}(x) =
\begin{cases}
0.5x^2& \text{if} |x| < 1 \\
|x|-0.5& \text{otherwise}
\end{cases}
.. code-block:: python
cost = smooth_l1_cost(input=input_layer,
label=label_layer)
:param input: The input layer.
:type input: LayerOutput
:param label: The input label.
:type input: LayerOutput
:param name: The name of this layers. It is not necessary.
:type name: None|basestring
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput)
assert isinstance(label, LayerOutput)
assert input.size == label.size
Layer(
name=name,
type=LayerType.SMOOTH_L1,
inputs=[input.name, label.name],
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name, LayerType.SMOOTH_L1, parents=[input, label], size=1)
...@@ -5,6 +5,6 @@ last_first_seq test_expand_layer test_ntm_layers test_hsigmoid ...@@ -5,6 +5,6 @@ last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape) test_seq_concat_reshape test_smooth_l1)
export whole_configs=(test_split_datasource) export whole_configs=(test_split_datasource)
type: "nn"
layers {
name: "input"
type: "data"
size: 300
active_type: ""
}
layers {
name: "label"
type: "data"
size: 300
active_type: ""
}
layers {
name: "__smooth_l1_cost_0__"
type: "smooth_l1"
size: 1
active_type: ""
inputs {
input_layer_name: "input"
}
inputs {
input_layer_name: "label"
}
coeff: 1.0
}
input_layer_names: "input"
input_layer_names: "label"
output_layer_names: "__smooth_l1_cost_0__"
sub_models {
name: "root"
layer_names: "input"
layer_names: "label"
layer_names: "__smooth_l1_cost_0__"
input_layer_names: "input"
input_layer_names: "label"
output_layer_names: "__smooth_l1_cost_0__"
is_recurrent_layer_group: false
}
from paddle.trainer_config_helpers import *
data = data_layer(name='input', size=300)
lbl = data_layer(name='label', size=300)
smooth_l1 = smooth_l1_cost(input=data, label=lbl)
outputs(smooth_l1)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册