From 7ff9764fc093466f64e6d6830c547e6d6af1c92c Mon Sep 17 00:00:00 2001 From: gaoyuan Date: Thu, 9 Mar 2017 17:19:44 +0800 Subject: [PATCH] smooth_l1_loss_layer --- paddle/gserver/layers/CostLayer.cpp | 59 +++++++++++++++++++++++++ paddle/gserver/layers/CostLayer.h | 23 ++++++++++ paddle/gserver/tests/test_LayerGrad.cpp | 14 ++++++ paddle/math/Matrix.cpp | 49 ++++++++++++++++++++ paddle/math/Matrix.h | 11 +++++ 5 files changed, 156 insertions(+) diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 998b8d7d303..e2a4153bad7 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -192,6 +192,65 @@ void SumOfSquaresCostLayer::backwardImp(Matrix& output, outputG.sumOfSquaresBp(output, *label.value); } +// +// class SmoothL1CostLayer +// + +REGISTER_LAYER(smooth_l1, SmoothL1CostLayer); + +bool SmoothL1CostLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + return CostLayer::init(layerMap, parameterMap); +} + +void SmoothL1CostLayer::forwardImp(Matrix& output, + Argument& label, + Matrix& target) { + MatrixPtr targetCpu, labelCpu, outputCpu; + if (useGpu_) { + Matrix::resizeOrCreate( + targetCpu, target.getHeight(), target.getWidth(), false, false); + Matrix::resizeOrCreate( + outputCpu, output.getHeight(), output.getWidth(), false, false); + Matrix::resizeOrCreate(labelCpu, + label.value->getHeight(), + label.value->getWidth(), + false, + false); + targetCpu->copyFrom(target); + outputCpu->copyFrom(output); + labelCpu->copyFrom(*label.value); + targetCpu->smoothL1(*outputCpu, *(labelCpu)); + target.copyFrom(*targetCpu); + } else { + target.smoothL1(output, *label.value); + } +} + +void SmoothL1CostLayer::backwardImp(Matrix& output, + Argument& label, + Matrix& outputG) { + MatrixPtr outputGCpu, labelCpu, outputCpu; + if (useGpu_) { + Matrix::resizeOrCreate( + outputGCpu, outputG.getHeight(), outputG.getWidth(), false, false); + Matrix::resizeOrCreate( + outputCpu, output.getHeight(), output.getWidth(), false, false); + Matrix::resizeOrCreate(labelCpu, + label.value->getHeight(), + label.value->getWidth(), + false, + false); + outputGCpu->copyFrom(outputG); + outputCpu->copyFrom(output); + labelCpu->copyFrom(*label.value); + outputGCpu->smoothL1Bp(*outputCpu, *labelCpu); + outputG.copyFrom(*outputGCpu); + } else { + outputG.smoothL1Bp(output, *label.value); + } +} + // // class RankingCost // diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index b3045e0b313..2f85fd3eca8 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -159,6 +159,29 @@ public: Matrix& outputGrad) override; }; +/** + * This cost layer compute smooth L1 loss for real-valued regression + * tasks. + * \f[ + * L = + * y / -1 < y < 1 / + * sign(y) / otherwise / + * \f] + */ +class SmoothL1CostLayer : public CostLayer { +public: + explicit SmoothL1CostLayer(const LayerConfig& config) : CostLayer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; + + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; +}; + /** * A cost layer for learning to rank (LTR) task. This layer contains at leat * three inputs. diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 14d9db52470..28a06097339 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1623,6 +1623,20 @@ TEST(Layer, PadLayer) { } } +TEST(Layer, smooth_l1) { + TestConfig config; + config.layerConfig.set_type("smooth_l1"); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 1, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "smooth_l1", 100, false, useGpu, false, 2.0); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 07450bfb0ef..9eead5b62c6 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -3590,6 +3590,55 @@ void CpuMatrix::sumOfSquaresBp(Matrix& output, Matrix& label) { } } +void CpuMatrix::smoothL1(Matrix& output, Matrix& label) { + CHECK(output.useGpu_ == false && label.useGpu_ == false) + << "Matrix type are not equal"; + + size_t numSamples = getHeight(); + size_t dim = output.getWidth(); + CHECK_EQ(label.getHeight(), numSamples); + CHECK_EQ(output.getHeight(), numSamples); + CHECK_EQ(label.getWidth(), dim); + CHECK_EQ(getWidth(), (size_t)1); + real* out = output.getData(); + real* cost = getData(); + real* lbl = label.getData(); + + for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) { + for (size_t j = 0; j < dim; ++j) { + cost[j] = std::fabs(out[j] - lbl[j]); + if (cost[j] < 1.0) + cost[j] = 0.5 * cost[j] * cost[j]; + else + cost[j] = cost[j] - 0.5; + } + } +} + +void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) { + CHECK(output.useGpu_ == false && label.useGpu_ == false) + << "Matrix type are not equal"; + + size_t numSamples = getHeight(); + size_t dim = output.getWidth(); + CHECK_EQ(label.getHeight(), numSamples); + CHECK_EQ(output.getHeight(), numSamples); + CHECK_EQ(label.getWidth(), dim); + CHECK_EQ(getWidth(), (size_t)1); + real* out = output.getData(); + real* cost = getData(); + real* lbl = label.getData(); + + // f'(x) = x if |x| < 1 + // = sign(x) otherwise + for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) { + for (size_t j = 0; j < dim; ++j) { + cost[j] = out[j] - lbl[j]; + if (std::fabs(cost[j]) >= 1) cost[j] = (0 < cost[j]) - (cost[j] < 0); + } + } +} + void CpuMatrix::tanh(Matrix& output) { CHECK(isContiguous()); CHECK(output.isContiguous()); diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index d0ba2e93fea..dbdb6296145 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -783,6 +783,14 @@ public: LOG(FATAL) << "Not implemented"; } + virtual void smoothL1(Matrix& output, Matrix& label) { + LOG(FATAL) << "Not implemented"; + } + + virtual void smoothL1Bp(Matrix& outputV, Matrix& label) { + LOG(FATAL) << "Not implemented"; + } + virtual void tanh(Matrix& output) { LOG(FATAL) << "Not implemented"; } virtual void tanhDerivative(Matrix& output) { @@ -1720,6 +1728,9 @@ public: /// gradient of sumOfSquares. void sumOfSquaresBp(Matrix& outputV, Matrix& label); + void smoothL1(Matrix& output, Matrix& label); + void smoothL1Bp(Matrix& output, Matrix& label); + void tanh(Matrix& output); void tanhDerivative(Matrix& output); -- GitLab