提交 e6db484d 编写于 作者: L Luo Tao

make clear that current huber_cost is for two-classification

上级 493e1c04
...@@ -575,10 +575,10 @@ void MultiBinaryLabelCrossEntropy::backwardImp(Matrix& output, ...@@ -575,10 +575,10 @@ void MultiBinaryLabelCrossEntropy::backwardImp(Matrix& output,
// //
// Huber loss for robust 2-classes classification // Huber loss for robust 2-classes classification
// //
REGISTER_LAYER(huber, HuberTwoClass); REGISTER_LAYER(huber, HuberTwoClassification);
bool HuberTwoClass::init(const LayerMap& layerMap, bool HuberTwoClassification::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) { const ParameterMap& parameterMap) {
CostLayer::init(layerMap, parameterMap); CostLayer::init(layerMap, parameterMap);
if (useGpu_) { if (useGpu_) {
tmpCpuInput_.reserve(inputLayers_.size()); tmpCpuInput_.reserve(inputLayers_.size());
...@@ -589,7 +589,9 @@ bool HuberTwoClass::init(const LayerMap& layerMap, ...@@ -589,7 +589,9 @@ bool HuberTwoClass::init(const LayerMap& layerMap,
return true; return true;
} }
void HuberTwoClass::forwardImp(Matrix& output, Argument& label, Matrix& cost) { void HuberTwoClassification::forwardImp(Matrix& output,
Argument& label,
Matrix& cost) {
if (useGpu_) { if (useGpu_) {
for (size_t i = 0; i < inputLayers_.size(); i++) { for (size_t i = 0; i < inputLayers_.size(); i++) {
tmpCpuInput_[i].resizeAndCopyFrom( tmpCpuInput_[i].resizeAndCopyFrom(
...@@ -600,10 +602,11 @@ void HuberTwoClass::forwardImp(Matrix& output, Argument& label, Matrix& cost) { ...@@ -600,10 +602,11 @@ void HuberTwoClass::forwardImp(Matrix& output, Argument& label, Matrix& cost) {
forwardImpIn(output, label, cost); forwardImpIn(output, label, cost);
} }
void HuberTwoClass::forwardImpIn(Matrix& output, void HuberTwoClassification::forwardImpIn(Matrix& output,
Argument& label, Argument& label,
Matrix& target) { Matrix& target) {
size_t numSamples = target.getHeight(); size_t numSamples = target.getHeight();
CHECK(label.ids);
CHECK_EQ((*label.ids).getSize(), numSamples); CHECK_EQ((*label.ids).getSize(), numSamples);
CHECK_EQ(output.getHeight(), numSamples); CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(output.getWidth(), (size_t)1); CHECK_EQ(output.getWidth(), (size_t)1);
...@@ -624,9 +627,9 @@ void HuberTwoClass::forwardImpIn(Matrix& output, ...@@ -624,9 +627,9 @@ void HuberTwoClass::forwardImpIn(Matrix& output,
target.copyFrom(cost.data(), numSamples); target.copyFrom(cost.data(), numSamples);
} }
void HuberTwoClass::backwardImp(Matrix& outputValue, void HuberTwoClassification::backwardImp(Matrix& outputValue,
Argument& label, Argument& label,
Matrix& outputGrad) { Matrix& outputGrad) {
if (useGpu_) { if (useGpu_) {
backwardImpIn( backwardImpIn(
*tmpCpuInput_[0].value, tmpCpuInput_[1], *tmpCpuInput_[0].grad); *tmpCpuInput_[0].value, tmpCpuInput_[1], *tmpCpuInput_[0].grad);
...@@ -636,9 +639,9 @@ void HuberTwoClass::backwardImp(Matrix& outputValue, ...@@ -636,9 +639,9 @@ void HuberTwoClass::backwardImp(Matrix& outputValue,
} }
} }
void HuberTwoClass::backwardImpIn(Matrix& output, void HuberTwoClassification::backwardImpIn(Matrix& output,
Argument& label, Argument& label,
Matrix& outputG) { Matrix& outputG) {
size_t numSamples = output.getHeight(); size_t numSamples = output.getHeight();
real* out = output.getData(); real* out = output.getData();
real* grad = outputG.getData(); real* grad = outputG.getData();
......
...@@ -307,21 +307,17 @@ public: ...@@ -307,21 +307,17 @@ public:
/** /**
* Huber loss for robust 2-classes classification. * Huber loss for robust 2-classes classification.
* *
* For label={0, 1}, let y=2*label-1. Given output f, the loss is: * For label={0, 1}, let y=2*label-1. Given output f(x), the loss is:
* \f[ * Loss = 4 * y * f, if y* f < -1 \\
* Loss = * Loss = (1 - y * f)^2, if -1 < y * f < 1 \\
* \left\{\begin{matrix} * Loss = 0, otherwise
* 4 * y * f & \textit{if} \ \ y* f < -1 \\
* (1 - y * f)^2 & \textit{if} \ \ -1 < y * f < 1 \\
* 0 & \textit{otherwise}
* \end{matrix}\right.
* \f]
*/ */
class HuberTwoClass : public CostLayer { class HuberTwoClassification : public CostLayer {
std::vector<Argument> tmpCpuInput_; std::vector<Argument> tmpCpuInput_;
public: public:
explicit HuberTwoClass(const LayerConfig& config) : CostLayer(config) {} explicit HuberTwoClassification(const LayerConfig& config)
: CostLayer(config) {}
bool init(const LayerMap& layerMap, bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override; const ParameterMap& parameterMap) override;
......
...@@ -830,7 +830,7 @@ TEST(Layer, square_error_weighted) { ...@@ -830,7 +830,7 @@ TEST(Layer, square_error_weighted) {
TEST(Layer, huber_two_class) { TEST(Layer, huber_two_class) {
TestConfig config; TestConfig config;
config.layerConfig.set_type("huber"); config.layerConfig.set_type("huber_classification");
config.biasSize = 0; config.biasSize = 0;
config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0});
......
...@@ -2255,7 +2255,7 @@ define_cost('PnpairValidation', 'pnpair-validation') ...@@ -2255,7 +2255,7 @@ define_cost('PnpairValidation', 'pnpair-validation')
define_cost('SumOfSquaresCostLayer', 'square_error') define_cost('SumOfSquaresCostLayer', 'square_error')
define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy') define_cost('MultiBinaryLabelCrossEntropy', 'multi_binary_label_cross_entropy')
define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy') define_cost('SoftBinaryClassCrossEntropy', 'soft_binary_class_cross_entropy')
define_cost('HuberTwoClass', 'huber') define_cost('HuberTwoClassification', 'huber_classification')
define_cost('SumCost', 'sum_cost') define_cost('SumCost', 'sum_cost')
define_cost('SmoothL1Cost', 'smooth_l1') define_cost('SmoothL1Cost', 'smooth_l1')
......
...@@ -108,7 +108,7 @@ __all__ = [ ...@@ -108,7 +108,7 @@ __all__ = [
'sum_cost', 'sum_cost',
'rank_cost', 'rank_cost',
'lambda_cost', 'lambda_cost',
'huber_cost', 'huber_classification_cost',
'block_expand_layer', 'block_expand_layer',
'maxout_layer', 'maxout_layer',
'out_prod_layer', 'out_prod_layer',
...@@ -216,7 +216,7 @@ class LayerType(object): ...@@ -216,7 +216,7 @@ class LayerType(object):
RANK_COST = 'rank-cost' RANK_COST = 'rank-cost'
LAMBDA_COST = 'lambda_cost' LAMBDA_COST = 'lambda_cost'
HUBER = 'huber' HUBER_CLASSIFICATION = 'huber_classification'
CROSS_ENTROPY = 'multi-class-cross-entropy' CROSS_ENTROPY = 'multi-class-cross-entropy'
CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm' CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm'
SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy' SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy'
...@@ -5605,16 +5605,26 @@ def sum_cost(input, name=None, layer_attr=None): ...@@ -5605,16 +5605,26 @@ def sum_cost(input, name=None, layer_attr=None):
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): def huber_classification_cost(input,
label,
name=None,
coeff=1.0,
layer_attr=None):
""" """
A loss layer for huber loss. For classification purposes, a variant of the Huber loss called modified Huber
is sometimes used. Given a prediction f(x) (a real-valued classifier score) and
a true binary class label :math:`y\in \left \{-1, 1 \right \}`, the modified Huber
loss is defined as:
.. math:
loss = \max \left ( 0, 1-yf(x) \right )^2, yf(x)\geq 1
loss = -4yf(x), \text{otherwise}
The example usage is: The example usage is:
.. code-block:: python .. code-block:: python
cost = huber_cost(input=input_layer, cost = huber_classification_cost(input=input_layer, label=label_layer)
label=label_layer)
:param input: The first input layer. :param input: The first input layer.
:type input: LayerOutput. :type input: LayerOutput.
...@@ -5634,11 +5644,12 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -5634,11 +5644,12 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
assert input.size == 1 assert input.size == 1
Layer( Layer(
name=name, name=name,
type=LayerType.HUBER, type=LayerType.HUBER_CLASSIFICATION,
inputs=[input.name, label.name], inputs=[input.name, label.name],
coeff=coeff, coeff=coeff,
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.HUBER, parents=[input, label], size=1) return LayerOutput(
name, LayerType.HUBER_CLASSIFICATION, parents=[input, label], size=1)
@wrap_name_default() @wrap_name_default()
......
...@@ -180,8 +180,8 @@ layers { ...@@ -180,8 +180,8 @@ layers {
active_type: "" active_type: ""
} }
layers { layers {
name: "__huber_cost_0__" name: "__huber_classification_cost_0__"
type: "huber" type: "huber_classification"
size: 1 size: 1
active_type: "" active_type: ""
inputs { inputs {
...@@ -300,7 +300,7 @@ output_layer_names: "__rank_cost_0__" ...@@ -300,7 +300,7 @@ output_layer_names: "__rank_cost_0__"
output_layer_names: "__lambda_cost_0__" output_layer_names: "__lambda_cost_0__"
output_layer_names: "__cross_entropy_0__" output_layer_names: "__cross_entropy_0__"
output_layer_names: "__cross_entropy_with_selfnorm_0__" output_layer_names: "__cross_entropy_with_selfnorm_0__"
output_layer_names: "__huber_cost_0__" output_layer_names: "__huber_classification_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__" output_layer_names: "__sum_cost_0__"
output_layer_names: "__nce_layer_0__" output_layer_names: "__nce_layer_0__"
...@@ -326,7 +326,7 @@ sub_models { ...@@ -326,7 +326,7 @@ sub_models {
layer_names: "__cross_entropy_with_selfnorm_0__" layer_names: "__cross_entropy_with_selfnorm_0__"
layer_names: "huber_probs" layer_names: "huber_probs"
layer_names: "huber_label" layer_names: "huber_label"
layer_names: "__huber_cost_0__" layer_names: "__huber_classification_cost_0__"
layer_names: "__multi_binary_label_cross_entropy_0__" layer_names: "__multi_binary_label_cross_entropy_0__"
layer_names: "__sum_cost_0__" layer_names: "__sum_cost_0__"
layer_names: "__nce_layer_0__" layer_names: "__nce_layer_0__"
...@@ -349,7 +349,7 @@ sub_models { ...@@ -349,7 +349,7 @@ sub_models {
output_layer_names: "__lambda_cost_0__" output_layer_names: "__lambda_cost_0__"
output_layer_names: "__cross_entropy_0__" output_layer_names: "__cross_entropy_0__"
output_layer_names: "__cross_entropy_with_selfnorm_0__" output_layer_names: "__cross_entropy_with_selfnorm_0__"
output_layer_names: "__huber_cost_0__" output_layer_names: "__huber_classification_cost_0__"
output_layer_names: "__multi_binary_label_cross_entropy_0__" output_layer_names: "__multi_binary_label_cross_entropy_0__"
output_layer_names: "__sum_cost_0__" output_layer_names: "__sum_cost_0__"
output_layer_names: "__nce_layer_0__" output_layer_names: "__nce_layer_0__"
......
...@@ -33,7 +33,7 @@ outputs( ...@@ -33,7 +33,7 @@ outputs(
input=probs, label=xe_label), input=probs, label=xe_label),
cross_entropy_with_selfnorm( cross_entropy_with_selfnorm(
input=probs, label=xe_label), input=probs, label=xe_label),
huber_cost( huber_classification_cost(
input=data_layer( input=data_layer(
name='huber_probs', size=1), name='huber_probs', size=1),
label=data_layer( label=data_layer(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册