From 27a99bfb1446171969da0219a6125a79c39eb582 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Thu, 17 Aug 2017 18:10:37 +0800 Subject: [PATCH] Add base class for huber_regression_cost and huber_classification_cost --- doc/api/v2/config/layer.rst | 6 +-- paddle/gserver/layers/CostLayer.cpp | 55 ++++++++++++---------------- paddle/gserver/layers/CostLayer.h | 27 ++++++++++---- python/paddle/v2/tests/test_layer.py | 2 +- 4 files changed, 46 insertions(+), 44 deletions(-) diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index cb330ea5e..22a6b2ab8 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -409,9 +409,9 @@ multi_binary_label_cross_entropy_cost .. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost :noindex: -huber_cost ----------- -.. autoclass:: paddle.v2.layer.huber_cost +huber_classification_cost +------------------------- +.. autoclass:: paddle.v2.layer.huber_classification_cost :noindex: lambda_cost diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 138c86a6d..69cf39322 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -572,13 +572,8 @@ void MultiBinaryLabelCrossEntropy::backwardImp(Matrix& output, } } -// -// Huber loss for robust 2-classes classification -// -REGISTER_LAYER(huber, HuberTwoClassification); - -bool HuberTwoClassification::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { +bool HuberCost::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { CostLayer::init(layerMap, parameterMap); if (useGpu_) { tmpCpuInput_.reserve(inputLayers_.size()); @@ -589,9 +584,7 @@ bool HuberTwoClassification::init(const LayerMap& layerMap, return true; } -void HuberTwoClassification::forwardImp(Matrix& output, - Argument& label, - Matrix& cost) { +void HuberCost::forwardImp(Matrix& output, Argument& label, Matrix& cost) { if (useGpu_) { for (size_t i = 0; i < inputLayers_.size(); i++) { tmpCpuInput_[i].resizeAndCopyFrom( @@ -599,12 +592,22 @@ void HuberTwoClassification::forwardImp(Matrix& output, } hl_stream_synchronize(HPPL_STREAM_DEFAULT); } - forwardImpIn(output, label, cost); } -void HuberTwoClassification::forwardImpIn(Matrix& output, - Argument& label, - Matrix& target) { +// +// Huber loss for robust 2-classes classification +// +REGISTER_LAYER(huber_classification, HuberTwoClassification); + +bool HuberTwoClassification::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + return HuberCost::init(layerMap, parameterMap); +} + +void HuberTwoClassification::forwardImp(Matrix& output, + Argument& label, + Matrix& target) { + HuberCost::forwardImp(output, label, target); size_t numSamples = target.getHeight(); CHECK(label.ids); CHECK_EQ((*label.ids).getSize(), numSamples); @@ -627,25 +630,13 @@ void HuberTwoClassification::forwardImpIn(Matrix& output, target.copyFrom(cost.data(), numSamples); } -void HuberTwoClassification::backwardImp(Matrix& outputValue, +void HuberTwoClassification::backwardImp(Matrix& output, Argument& label, - Matrix& outputGrad) { - if (useGpu_) { - backwardImpIn( - *tmpCpuInput_[0].value, tmpCpuInput_[1], *tmpCpuInput_[0].grad); - outputGrad.copyFrom(*tmpCpuInput_[0].grad); - } else { - backwardImpIn(outputValue, label, outputGrad); - } -} - -void HuberTwoClassification::backwardImpIn(Matrix& output, - Argument& label, - Matrix& outputG) { + Matrix& outputG) { size_t numSamples = output.getHeight(); - real* out = output.getData(); - real* grad = outputG.getData(); - int* lbl = (*label.ids).getData(); + real* out = useGpu_ ? tmpCpuInput_[0].value->getData() : output.getData(); + int* lbl = useGpu_ ? tmpCpuInput_[1].ids->getData() : (*label.ids).getData(); + real* grad = useGpu_ ? tmpCpuInput_[0].grad->getData() : outputG.getData(); for (size_t i = 0; i < numSamples; ++i) { int y = 2 * lbl[i] - 1; if (y * out[i] < -1) @@ -653,8 +644,8 @@ void HuberTwoClassification::backwardImpIn(Matrix& output, else if (y * out[i] < 1) grad[i] += -2 * (1 - y * out[i]) * y; } + if (useGpu_) outputG.copyFrom(grad, numSamples); } - /** * This cost layer compute the sum of its input as loss. * \f[ diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index 77427b7a0..c006dc811 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -304,6 +304,23 @@ public: Matrix& outputGrad) override; }; +/* + * A base layer for HuberRegressionLoss and HuberTwoClassification. + */ +class HuberCost : public CostLayer { +public: + std::vector tmpCpuInput_; + + explicit HuberCost(const LayerConfig& config) : CostLayer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; + + void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {} +}; + /** * Huber loss for robust 2-classes classification. * @@ -312,25 +329,19 @@ public: * Loss = (1 - y * f)^2, if -1 < y * f < 1 \\ * Loss = 0, otherwise */ -class HuberTwoClassification : public CostLayer { - std::vector tmpCpuInput_; - +class HuberTwoClassification : public HuberCost { public: explicit HuberTwoClassification(const LayerConfig& config) - : CostLayer(config) {} + : HuberCost(config) {} bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - void forwardImpIn(Matrix& output, Argument& label, Matrix& cost); - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) override; - - void backwardImpIn(Matrix& outputValue, Argument& label, Matrix& outputGrad); }; typedef std::shared_ptr CostLayerPtr; diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index f2097e195..7373a55ce 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -141,7 +141,7 @@ class CostLayerTest(unittest.TestCase): cost8 = layer.rank_cost(left=score, right=score, label=score) cost9 = layer.lambda_cost(input=inference, score=score) cost10 = layer.sum_cost(input=inference) - cost11 = layer.huber_cost(input=score, label=label) + cost11 = layer.huber_classification_cost(input=score, label=label) print layer.parse_network([cost1, cost2]) print layer.parse_network([cost3, cost4]) -- GitLab