提交 7ff9764f 编写于 作者: G gaoyuan

smooth_l1_loss_layer

上级 515543ab
......@@ -192,6 +192,65 @@ void SumOfSquaresCostLayer::backwardImp(Matrix& output,
outputG.sumOfSquaresBp(output, *label.value);
}
//
// class SmoothL1CostLayer
//
REGISTER_LAYER(smooth_l1, SmoothL1CostLayer);
bool SmoothL1CostLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
return CostLayer::init(layerMap, parameterMap);
}
void SmoothL1CostLayer::forwardImp(Matrix& output,
Argument& label,
Matrix& target) {
MatrixPtr targetCpu, labelCpu, outputCpu;
if (useGpu_) {
Matrix::resizeOrCreate(
targetCpu, target.getHeight(), target.getWidth(), false, false);
Matrix::resizeOrCreate(
outputCpu, output.getHeight(), output.getWidth(), false, false);
Matrix::resizeOrCreate(labelCpu,
label.value->getHeight(),
label.value->getWidth(),
false,
false);
targetCpu->copyFrom(target);
outputCpu->copyFrom(output);
labelCpu->copyFrom(*label.value);
targetCpu->smoothL1(*outputCpu, *(labelCpu));
target.copyFrom(*targetCpu);
} else {
target.smoothL1(output, *label.value);
}
}
void SmoothL1CostLayer::backwardImp(Matrix& output,
Argument& label,
Matrix& outputG) {
MatrixPtr outputGCpu, labelCpu, outputCpu;
if (useGpu_) {
Matrix::resizeOrCreate(
outputGCpu, outputG.getHeight(), outputG.getWidth(), false, false);
Matrix::resizeOrCreate(
outputCpu, output.getHeight(), output.getWidth(), false, false);
Matrix::resizeOrCreate(labelCpu,
label.value->getHeight(),
label.value->getWidth(),
false,
false);
outputGCpu->copyFrom(outputG);
outputCpu->copyFrom(output);
labelCpu->copyFrom(*label.value);
outputGCpu->smoothL1Bp(*outputCpu, *labelCpu);
outputG.copyFrom(*outputGCpu);
} else {
outputG.smoothL1Bp(output, *label.value);
}
}
//
// class RankingCost
//
......
......@@ -159,6 +159,29 @@ public:
Matrix& outputGrad) override;
};
/**
* This cost layer compute smooth L1 loss for real-valued regression
* tasks.
* \f[
* L =
* y / -1 < y < 1 /
* sign(y) / otherwise /
* \f]
*/
class SmoothL1CostLayer : public CostLayer {
public:
explicit SmoothL1CostLayer(const LayerConfig& config) : CostLayer(config) {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forwardImp(Matrix& output, Argument& label, Matrix& cost) override;
void backwardImp(Matrix& outputValue,
Argument& label,
Matrix& outputGrad) override;
};
/**
* A cost layer for learning to rank (LTR) task. This layer contains at leat
* three inputs.
......
......@@ -1623,6 +1623,20 @@ TEST(Layer, PadLayer) {
}
}
TEST(Layer, smooth_l1) {
TestConfig config;
config.layerConfig.set_type("smooth_l1");
config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0});
config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 1, 0});
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config, "smooth_l1", 100, false, useGpu, false, 2.0);
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
......
......@@ -3590,6 +3590,55 @@ void CpuMatrix::sumOfSquaresBp(Matrix& output, Matrix& label) {
}
}
void CpuMatrix::smoothL1(Matrix& output, Matrix& label) {
CHECK(output.useGpu_ == false && label.useGpu_ == false)
<< "Matrix type are not equal";
size_t numSamples = getHeight();
size_t dim = output.getWidth();
CHECK_EQ(label.getHeight(), numSamples);
CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(label.getWidth(), dim);
CHECK_EQ(getWidth(), (size_t)1);
real* out = output.getData();
real* cost = getData();
real* lbl = label.getData();
for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) {
for (size_t j = 0; j < dim; ++j) {
cost[j] = std::fabs(out[j] - lbl[j]);
if (cost[j] < 1.0)
cost[j] = 0.5 * cost[j] * cost[j];
else
cost[j] = cost[j] - 0.5;
}
}
}
void CpuMatrix::smoothL1Bp(Matrix& output, Matrix& label) {
CHECK(output.useGpu_ == false && label.useGpu_ == false)
<< "Matrix type are not equal";
size_t numSamples = getHeight();
size_t dim = output.getWidth();
CHECK_EQ(label.getHeight(), numSamples);
CHECK_EQ(output.getHeight(), numSamples);
CHECK_EQ(label.getWidth(), dim);
CHECK_EQ(getWidth(), (size_t)1);
real* out = output.getData();
real* cost = getData();
real* lbl = label.getData();
// f'(x) = x if |x| < 1
// = sign(x) otherwise
for (size_t i = 0; i < numSamples; ++i, out += dim, cost += dim, lbl += dim) {
for (size_t j = 0; j < dim; ++j) {
cost[j] = out[j] - lbl[j];
if (std::fabs(cost[j]) >= 1) cost[j] = (0 < cost[j]) - (cost[j] < 0);
}
}
}
void CpuMatrix::tanh(Matrix& output) {
CHECK(isContiguous());
CHECK(output.isContiguous());
......
......@@ -783,6 +783,14 @@ public:
LOG(FATAL) << "Not implemented";
}
virtual void smoothL1(Matrix& output, Matrix& label) {
LOG(FATAL) << "Not implemented";
}
virtual void smoothL1Bp(Matrix& outputV, Matrix& label) {
LOG(FATAL) << "Not implemented";
}
virtual void tanh(Matrix& output) { LOG(FATAL) << "Not implemented"; }
virtual void tanhDerivative(Matrix& output) {
......@@ -1720,6 +1728,9 @@ public:
/// gradient of sumOfSquares.
void sumOfSquaresBp(Matrix& outputV, Matrix& label);
void smoothL1(Matrix& output, Matrix& label);
void smoothL1Bp(Matrix& output, Matrix& label);
void tanh(Matrix& output);
void tanhDerivative(Matrix& output);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册