提交 65969dad 编写于 作者: Y yangyaming

Add DetectionOutputLayer and MultiBoxLossLayer.

上级 14c0e71d
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "DetectionOutputLayer.h"
namespace paddle {
REGISTER_LAYER(detection_output, DetectionOutputLayer);
bool DetectionOutputLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
auto& layerConf = config_.inputs(0).detection_output_conf();
numClasses_ = layerConf.num_classes();
inputNum_ = layerConf.input_num();
nmsThreshold_ = layerConf.nms_threshold();
confidenceThreshold_ = layerConf.confidence_threshold();
nmsTopK_ = layerConf.nms_top_k();
keepTopK_ = layerConf.keep_top_k();
backgroundId_ = layerConf.background_id();
return true;
}
void DetectionOutputLayer::forward(PassType passType) {
Layer::forward(passType);
size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight();
locSizeSum_ = 0;
confSizeSum_ = 0;
for (size_t n = 0; n < inputNum_; ++n) {
const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n));
const MatrixPtr inConf = getInputValue(*getConfInputLayer(n));
locSizeSum_ += inLoc->getElementCnt();
confSizeSum_ += inConf->getElementCnt();
}
Matrix::resizeOrCreate(locTmpBuffer_, 1, locSizeSum_, false, useGpu_);
Matrix::resizeOrCreate(
confTmpBuffer_, confSizeSum_ / numClasses_, numClasses_, false, useGpu_);
locBuffer_ = locTmpBuffer_;
confBuffer_ = confTmpBuffer_;
size_t locOffset = 0;
size_t confOffset = 0;
auto& layerConf = config_.inputs(0).detection_output_conf();
for (size_t n = 0; n < inputNum_; ++n) {
const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n));
const MatrixPtr inConf = getInputValue(*getConfInputLayer(n));
size_t height = getInput(*getLocInputLayer(n)).getFrameHeight();
if (!height) height = layerConf.height();
size_t width = getInput(*getLocInputLayer(n)).getFrameWidth();
if (!width) width = layerConf.width();
locOffset += appendWithPermute(*inLoc,
height,
width,
locSizeSum_,
locOffset,
batchSize,
*locBuffer_,
kNCHWToNHWC);
confOffset += appendWithPermute(*inConf,
height,
width,
confSizeSum_,
confOffset,
batchSize,
*confBuffer_,
kNCHWToNHWC);
}
CHECK_EQ(locOffset, locSizeSum_ / batchSize);
CHECK_EQ(confOffset, confSizeSum_ / batchSize);
MatrixPtr priorValue;
if (useGpu_) {
Matrix::resizeOrCreate(locCpuBuffer_, 1, locSizeSum_, false, false);
Matrix::resizeOrCreate(
confCpuBuffer_, confSizeSum_ / numClasses_, numClasses_, false, false);
MatrixPtr priorTmpValue = getInputValue(*getPriorBoxLayer());
Matrix::resizeOrCreate(
priorCpuValue_, 1, priorTmpValue->getElementCnt(), false, false);
locCpuBuffer_->copyFrom(*locTmpBuffer_);
confCpuBuffer_->copyFrom(*confTmpBuffer_);
priorCpuValue_->copyFrom(*priorTmpValue);
locBuffer_ = locCpuBuffer_;
confBuffer_ = confCpuBuffer_;
priorValue = priorCpuValue_;
} else {
priorValue = getInputValue(*getPriorBoxLayer());
}
confBuffer_->softmax(*confBuffer_);
size_t numPriors = priorValue->getElementCnt() / 8;
vector<vector<NormalizedBBox>> allDecodedBBoxes;
for (size_t n = 0; n < batchSize; ++n) {
vector<NormalizedBBox> decodedBBoxes;
for (size_t i = 0; i < numPriors; ++i) {
size_t priorOffset = i * 8;
size_t locPredOffset = n * numPriors * 4 + i * 4;
vector<NormalizedBBox> priorBBoxVec;
getBBoxFromPriorData(
priorValue->getData() + priorOffset, 1, priorBBoxVec);
vector<vector<real>> priorBBoxVar;
getBBoxVarFromPriorData(
priorValue->getData() + priorOffset, 1, priorBBoxVar);
vector<real> locPredData;
for (size_t j = 0; j < 4; ++j)
locPredData.push_back(*(locBuffer_->getData() + locPredOffset + j));
NormalizedBBox bbox =
decodeBBoxWithVar(priorBBoxVec[0], priorBBoxVar[0], locPredData);
decodedBBoxes.push_back(bbox);
}
allDecodedBBoxes.push_back(decodedBBoxes);
}
vector<map<size_t, vector<size_t>>> allIndices;
size_t numKept = getDetectionIndices(confBuffer_->getData(),
numPriors,
numClasses_,
backgroundId_,
batchSize,
confidenceThreshold_,
nmsTopK_,
nmsThreshold_,
keepTopK_,
allDecodedBBoxes,
&allIndices);
resetOutput(numKept, 7);
MatrixPtr outV = getOutputValue();
getDetectionOutput(confBuffer_->getData(),
numKept,
numPriors,
numClasses_,
batchSize,
allIndices,
allDecodedBBoxes,
*outV);
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <map>
#include <vector>
#include "DetectionUtil.h"
#include "Layer.h"
using std::vector;
using std::map;
using std::pair;
namespace paddle {
/**
* The detection output layer for a SSD detection task. This layer apply the
* Non-maximum suppression to the all predicted bounding box and keep the
* Top-K bounding boxes.
* - Input: This layer need three input layers: This first input layer
* is the priorbox layer. The rest two input layers are convolution
* layers for generating bbox location offset and the classification
* confidence.
* - Output: The predict bounding box location.
*/
class DetectionOutputLayer : public Layer {
public:
explicit DetectionOutputLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr) {}
protected:
inline LayerPtr getPriorBoxLayer() { return inputLayers_[0]; }
inline LayerPtr getLocInputLayer(size_t index) {
return inputLayers_[1 + index];
}
inline LayerPtr getConfInputLayer(size_t index) {
return inputLayers_[1 + inputNum_ + index];
}
private:
size_t numClasses_; // number of classes
size_t inputNum_; // number of input layers
real nmsThreshold_;
real confidenceThreshold_;
size_t nmsTopK_;
size_t keepTopK_;
size_t backgroundId_;
size_t locSizeSum_;
size_t confSizeSum_;
MatrixPtr locBuffer_;
MatrixPtr confBuffer_;
MatrixPtr locTmpBuffer_;
MatrixPtr confTmpBuffer_;
MatrixPtr priorCpuValue_;
MatrixPtr locCpuBuffer_;
MatrixPtr confCpuBuffer_;
};
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MultiBoxLossLayer.h"
#include <float.h>
#include <vector>
#include "DataLayer.h"
using std::vector;
using std::map;
using std::pair;
namespace paddle {
REGISTER_LAYER(multibox_loss, MultiBoxLossLayer);
bool MultiBoxLossLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
auto layerConf = config_.inputs(0).multibox_loss_conf();
numClasses_ = layerConf.num_classes();
inputNum_ = layerConf.input_num();
overlapThreshold_ = layerConf.overlap_threshold();
negPosRatio_ = layerConf.neg_pos_ratio();
negOverlap_ = layerConf.neg_overlap();
backgroundId_ = layerConf.background_id();
return true;
}
void MultiBoxLossLayer::forward(PassType passType) {
Layer::forward(passType);
size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight();
resetOutput(batchSize, 1);
// all location data and confidence score data
locSizeSum_ = 0;
confSizeSum_ = 0;
for (size_t n = 0; n < inputNum_; ++n) {
const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n));
const MatrixPtr inConf = getInputValue(*getConfInputLayer(n));
locSizeSum_ += inLoc->getElementCnt();
confSizeSum_ += inConf->getElementCnt();
}
// locBuffer layout:
// | xmin1 | ymin1 | xmax1 | ymax1 | xmin2 ......
Matrix::resizeOrCreate(locTmpBuffer_, 1, locSizeSum_, false, useGpu_);
locBuffer_ = locTmpBuffer_;
// confBuffer layout:
// | class1 score | class2 score | ... |classN score | class1 score | ......
Matrix::resizeOrCreate(confTmpBuffer_, 1, confSizeSum_, false, useGpu_);
confBuffer_ = confTmpBuffer_;
// concate location data and confidence score data
size_t locOffset = 0;
size_t confOffset = 0;
auto& layerConf = config_.inputs(0).multibox_loss_conf();
for (size_t n = 0; n < inputNum_; ++n) {
const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n));
const MatrixPtr inConf = getInputValue(*getConfInputLayer(n));
size_t height = getInput(*getLocInputLayer(n)).getFrameHeight();
if (!height) height = layerConf.height();
size_t width = getInput(*getLocInputLayer(n)).getFrameWidth();
if (!width) width = layerConf.width();
locOffset += appendWithPermute(*inLoc,
height,
width,
locSizeSum_,
locOffset,
batchSize,
*locBuffer_,
kNCHWToNHWC);
confOffset += appendWithPermute(*inConf,
height,
width,
confSizeSum_,
confOffset,
batchSize,
*confBuffer_,
kNCHWToNHWC);
}
CHECK_EQ(locOffset, locSizeSum_ / batchSize);
CHECK_EQ(confOffset, confSizeSum_ / batchSize);
// priorValue layout:
// | xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var
// | xmin2 | ......
MatrixPtr priorValue;
// labelValue layout:
// | class1_1 | xmin1_1 | ymin1_1 | xmax1_1 | ymax1_1 | difficult1_1 | ......
MatrixPtr labelValue;
// Copy data from GPU to CPU if use GPU
if (useGpu_) {
Matrix::resizeOrCreate(locCpuBuffer_, 1, locSizeSum_, false, false);
Matrix::resizeOrCreate(confCpuBuffer_, 1, confSizeSum_, false, false);
MatrixPtr priorTmpValue = getInputValue(*getPriorBoxLayer());
Matrix::resizeOrCreate(
priorCpuValue_, 1, priorTmpValue->getElementCnt(), false, false);
MatrixPtr labelTmpValue = getInputValue(*getLabelLayer());
Matrix::resizeOrCreate(labelCpuValue_,
labelTmpValue->getHeight(),
labelTmpValue->getWidth(),
false,
false);
locCpuBuffer_->copyFrom(*locTmpBuffer_);
confCpuBuffer_->copyFrom(*confTmpBuffer_);
priorCpuValue_->copyFrom(*priorTmpValue);
labelCpuValue_->copyFrom(*labelTmpValue);
locBuffer_ = locCpuBuffer_;
confBuffer_ = confCpuBuffer_;
priorValue = priorCpuValue_;
labelValue = labelCpuValue_;
} else {
priorValue = getInputValue(*getPriorBoxLayer());
labelValue = getInputValue(*getLabelLayer());
}
// Get max scores for each prior bbox. Used in negative mining
vector<vector<real>> allMaxConfScore;
numPriors_ = priorValue->getElementCnt() / 8;
getMaxConfidenceScores(confBuffer_->getData(),
batchSize,
numPriors_,
numClasses_,
backgroundId_,
&allMaxConfScore);
// Match prior bbox to groundtruth bbox
Argument label = getInput(*getLabelLayer());
const int* labelIndex = label.sequenceStartPositions->getData(false);
size_t seqNum = label.getNumSequences();
numMatches_ = 0;
numNegs_ = 0;
allMatchIndices_.clear();
allNegIndices_.clear();
pair<size_t, size_t> retPair = generateMatchIndices(*priorValue,
numPriors_,
*labelValue,
labelIndex,
seqNum,
allMaxConfScore,
batchSize,
overlapThreshold_,
negOverlap_,
negPosRatio_,
&allMatchIndices_,
&allNegIndices_);
numMatches_ = retPair.first;
numNegs_ = retPair.second;
// BBox location L1 smooth loss
locLoss_ = 0.0;
if (numMatches_ >= 1) {
size_t count = 0;
MatrixPtr locLossOutput;
Matrix::resizeOrCreate(locLossOutput, numMatches_ * 4, 1, false, false);
Matrix::resizeOrCreate(locGTData_, numMatches_ * 4, 1, false, false);
Matrix::resizeOrCreate(locDiff_, numMatches_ * 4, 1, false, false);
locDiff_->zeroMem();
vector<real> locGTData;
for (size_t n = 0; n < batchSize; ++n) {
for (size_t i = 0; i < numPriors_; ++i) {
if (allMatchIndices_[n][i] == -1) continue; // match none
size_t locOffset =
n * (locBuffer_->getElementCnt() / batchSize) + i * 4;
locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[0];
locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[1];
locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[2];
locDiff_->getData()[count++] = (locBuffer_->getData() + locOffset)[3];
const int gtIdx = allMatchIndices_[n][i];
size_t priorOffset = i * 8;
vector<NormalizedBBox> priorBBoxVec;
getBBoxFromPriorData(
priorValue->getData() + priorOffset, 1, priorBBoxVec);
vector<vector<real>> priorBBoxVar;
getBBoxVarFromPriorData(
priorValue->getData() + priorOffset, 1, priorBBoxVar);
size_t labelOffset = (labelIndex[n] + gtIdx) * 6;
vector<NormalizedBBox> gtBBoxVec;
getBBoxFromLabelData(labelValue->getData() + labelOffset, 1, gtBBoxVec);
vector<real> gtEncode;
encodeBBoxWithVar(
priorBBoxVec[0], priorBBoxVar[0], gtBBoxVec[0], gtEncode);
locGTData.insert(locGTData.end(), gtEncode.begin(), gtEncode.end());
}
}
locGTData_->copyFrom(&locGTData[0], numMatches_ * 4);
locLossOutput->smoothL1(*locDiff_, *locGTData_, 0.0);
locLoss_ = locLossOutput->getSum() / numMatches_;
}
// BBox confidence softmax loss
confLoss_ = 0;
numConf_ = numMatches_ + numNegs_;
if (numConf_ >= 1) {
Matrix::resizeOrCreate(confProb_, numConf_, numClasses_, false, false);
IVector::resizeOrCreate(confGTData_, numConf_, false);
confProb_->zeroMem();
size_t count = 0;
vector<real> confPredData;
for (size_t n = 0; n < batchSize; ++n) {
for (size_t i = 0; i < numPriors_; ++i) {
if (allMatchIndices_[n][i] == -1) continue;
size_t labelOffset = (labelIndex[n] + allMatchIndices_[n][i]) * 6;
const int gtLabel = (labelValue->getData() + labelOffset)[0];
confGTData_->getData()[count] = gtLabel;
size_t confOffset = n * numPriors_ * numClasses_ + i * numClasses_;
for (size_t j = 0; j < numClasses_; ++j) {
confProb_->getData()[count * numClasses_ + j] =
(confBuffer_->getData() + confOffset)[j];
confPredData.push_back((confBuffer_->getData() + confOffset)[j]);
}
++count;
}
// Negative mining samples
for (size_t i = 0; i < allNegIndices_[n].size(); ++i) {
confGTData_->getData()[count] = backgroundId_;
size_t confOffset =
n * numPriors_ * numClasses_ + allNegIndices_[n][i] * numClasses_;
for (size_t j = 0; j < numClasses_; ++j) {
confProb_->getData()[count * numClasses_ + j] =
(confBuffer_->getData() + confOffset)[j];
confPredData.push_back((confBuffer_->getData() + confOffset)[j]);
}
count++;
}
}
confProb_->softmax(*confProb_);
MatrixPtr confLossOutput;
Matrix::resizeOrCreate(confLossOutput, numConf_, 1, false, false);
confLossOutput->oneHotCrossEntropy(*confProb_, *confGTData_);
confLoss_ = confLossOutput->getSum() / numMatches_;
}
real loss = locLoss_ + confLoss_;
MatrixPtr outV = getOutputValue();
vector<real> tmp(batchSize, loss);
outV->copyFrom(&tmp[0], batchSize);
}
void MultiBoxLossLayer::backward(const UpdateCallback& callback) {
size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight();
locBuffer_->zeroMem();
confBuffer_->zeroMem();
// Back propagate on location prediction
if (numMatches_ >= 1) {
MatrixPtr locDiffBuffer;
Matrix::resizeOrCreate(locDiffBuffer, numMatches_ * 4, 1, false, false);
locDiffBuffer->smoothL1Bp(*locDiff_, *locGTData_, 0.0);
locDiff_->copyFrom(*locDiffBuffer);
// scale gradient
for (size_t i = 0; i < numMatches_ * 4; ++i)
locDiff_->getData()[i] *= (1. / numMatches_);
// Copy gradient back
size_t count = 0;
for (size_t n = 0; n < batchSize; ++n)
for (size_t i = 0; i < numPriors_; ++i) {
if (allMatchIndices_[n][i] == -1) continue;
real* locDiffData = locBuffer_->getData() + n * numPriors_ * 4 + i * 4;
locDiffData[0] = (locDiff_->getData() + count * 4)[0];
locDiffData[1] = (locDiff_->getData() + count * 4)[1];
locDiffData[2] = (locDiff_->getData() + count * 4)[2];
locDiffData[3] = (locDiff_->getData() + count * 4)[3];
++count;
}
CHECK_EQ(count, numMatches_);
}
if (numConf_ >= 1) {
for (size_t i = 0; i < numConf_; ++i)
confProb_->getData()[i * numClasses_ + confGTData_->getData()[i]] -= 1;
for (size_t i = 0; i < numConf_ * numClasses_; ++i)
confProb_->getData()[i] *= (1. / numMatches_);
size_t count = 0;
for (size_t n = 0; n < batchSize; ++n) {
for (size_t i = 0; i < numPriors_; ++i) {
if (allMatchIndices_[n][i] == -1) continue;
real* confDiffData = confBuffer_->getData() +
n * numPriors_ * numClasses_ + i * numClasses_;
for (size_t j = 0; j < numClasses_; ++j)
confDiffData[j] = (confProb_->getData() + count * numClasses_)[j];
++count;
}
for (size_t i = 0; i < allNegIndices_[n].size(); ++i) {
int idx = allNegIndices_[n][i];
real* confDiffData = confBuffer_->getData() +
n * numPriors_ * numClasses_ + idx * numClasses_;
for (size_t j = 0; j < numClasses_; ++j)
confDiffData[j] = (confProb_->getData() + count * numClasses_)[j];
++count;
}
}
CHECK_EQ(count, numConf_);
}
if (useGpu_) {
locTmpBuffer_->copyFrom(*locCpuBuffer_);
confTmpBuffer_->copyFrom(*confCpuBuffer_);
locBuffer_ = locTmpBuffer_;
confBuffer_ = confTmpBuffer_;
}
// copy back
size_t locOffset = 0;
size_t confOffset = 0;
auto layerConf = config_.inputs(0).multibox_loss_conf();
for (size_t n = 0; n < inputNum_; ++n) {
const MatrixPtr inLocG = getInputGrad(*getLocInputLayer(n));
const MatrixPtr inConfG = getInputGrad(*getConfInputLayer(n));
size_t height = getInput(*getLocInputLayer(n)).getFrameHeight();
if (!height) height = layerConf.height();
size_t width = getInput(*getLocInputLayer(n)).getFrameWidth();
if (!width) width = layerConf.width();
// NHWC to NCHW
MatrixPtr locGBuffer;
Matrix::resizeOrCreate(
locGBuffer, inLocG->getHeight(), inLocG->getWidth(), false, useGpu_);
MatrixPtr confGBuffer;
Matrix::resizeOrCreate(
confGBuffer, inConfG->getHeight(), inConfG->getWidth(), false, useGpu_);
locOffset += decomposeWithPermute(*locBuffer_,
height,
width,
locSizeSum_,
locOffset,
batchSize,
*locGBuffer,
kNHWCToNCHW);
inLocG->add(*locGBuffer);
confOffset += decomposeWithPermute(*confBuffer_,
height,
width,
confSizeSum_,
confOffset,
batchSize,
*confGBuffer,
kNHWCToNCHW);
inConfG->add(*confGBuffer);
}
CHECK_EQ(locOffset, locSizeSum_ / batchSize);
CHECK_EQ(confOffset, confSizeSum_ / batchSize);
}
} // namespace paddle
/* copyright (c) 2016 paddlepaddle authors. all rights reserve.
licensed under the apache license, version 2.0 (the "license");
you may not use this file except in compliance with the license.
you may obtain a copy of the license at
http://www.apache.org/licenses/license-2.0
unless required by applicable law or agreed to in writing, software
distributed under the license is distributed on an "as is" basis,
without warranties or conditions of any kind, either express or implied.
see the license for the specific language governing permissions and
limitations under the license. */
#pragma once
#include <vector>
#include "CostLayer.h"
#include "DataLayer.h"
#include "DetectionUtil.h"
#include "Layer.h"
using std::vector;
using std::pair;
namespace paddle {
/**
* The multibox loss layer for a SSD detection task.
* The loss is composed by the location loss and the confidence loss.
* The location loss is a smooth L1 loss and the confidence loss is
* a softmax loss.
* - Input: This layer need four input layers: This first input layer
* is the priorbox layer and the second layer is a label layer.
* The rest two input layers are convolution layers for generating
* bbox location offset and the classification confidence.
* - Output: The Single Shot Multibox Detection loss value.
* Reference:
* Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
* Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector
*/
class MultiBoxLossLayer : public CostLayer {
public:
explicit MultiBoxLossLayer(const LayerConfig& config) : CostLayer(config) {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr);
void forwardImp(Matrix& output, Argument& label, Matrix& cost) {}
void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {}
protected:
inline LayerPtr getPriorBoxLayer() { return inputLayers_[0]; }
inline LayerPtr getLabelLayer() { return inputLayers_[1]; }
inline LayerPtr getLocInputLayer(size_t index) {
return inputLayers_[2 + index];
}
inline LayerPtr getConfInputLayer(size_t index) {
return inputLayers_[2 + inputNum_ + index];
}
protected:
size_t numClasses_;
real overlapThreshold_;
real negPosRatio_;
real negOverlap_;
size_t inputNum_;
size_t backgroundId_;
real locLoss_;
real confLoss_;
size_t numPriors_;
size_t numMatches_;
size_t numNegs_;
size_t numConf_;
size_t locSizeSum_;
size_t confSizeSum_;
vector<vector<int>> allMatchIndices_;
vector<vector<int>> allNegIndices_;
MatrixPtr locGTData_;
IVectorPtr confGTData_;
MatrixPtr locBuffer_;
MatrixPtr confBuffer_;
MatrixPtr locDiff_;
MatrixPtr confProb_;
MatrixPtr labelCpuValue_;
MatrixPtr priorCpuValue_;
MatrixPtr locCpuBuffer_;
MatrixPtr confCpuBuffer_;
MatrixPtr locTmpBuffer_;
MatrixPtr confTmpBuffer_;
};
} // namespace paddle
......@@ -45,6 +45,13 @@ add_unittest_without_exec(test_PriorBox
add_test(NAME test_PriorBox
COMMAND test_PriorBox)
################# test_DetectionOutput #######################
add_unittest_without_exec(test_DetectionOutput
test_DetectionOutput.cpp
LayerGradUtil.cpp)
add_test(NAME test_DetectionOutput
COMMAND test_DetectionOutput)
################# test_ConvUnify #######################
add_unittest_without_exec(test_ConvUnify
test_ConvUnify.cpp
......
......@@ -387,6 +387,31 @@ void initDataLayer(TestConfig testConf,
data.value->sigmoid(*data.value);
data.grad->zeroMem();
break;
case INPUT_SELF_DEFINE_DATA: {
size_t height = testConf.inputDefs[i].selfDefinedData->getHeight();
size_t width = testConf.inputDefs[i].selfDefinedData->getWidth();
CHECK_GT(static_cast<int>(height), 0);
CHECK_GT(static_cast<int>(width), 0);
data.value = Matrix::create(height, width, false, useGpu);
data.grad = Matrix::create(height, width, false, useGpu);
data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData);
data.grad->zeroMem();
const std::vector<int>& labelSeqStartPositions =
testConf.inputDefs[i].labelSeqStartPositions;
if (labelSeqStartPositions.size() != 0) {
CHECK(!sequenceStartPositions);
CHECK_GE(static_cast<int>(labelSeqStartPositions.size()), 2);
sequenceStartPositions =
ICpuGpuVector::create(labelSeqStartPositions.size(), useGpu);
sequenceStartPositions->copyFrom(labelSeqStartPositions.data(),
labelSeqStartPositions.size(),
useGpu);
data.sequenceStartPositions = sequenceStartPositions;
}
break;
}
default:
LOG(FATAL) << " unknown inputType ";
return;
......
......@@ -31,7 +31,8 @@ enum InputType {
INPUT_SEQUENCE_LABEL,
INPUT_SPARSE_NON_VALUE_DATA,
INPUT_SPARSE_FLOAT_VALUE_DATA,
INPUT_DENSE_DIM_DATA, // using sequence length to init dense data
INPUT_DENSE_DIM_DATA, // using sequence length to init dense data
INPUT_SELF_DEFINE_DATA, // support customizing for input value
};
struct ParaSparse {
......@@ -66,6 +67,7 @@ struct InputDef {
bool isStatic;
std::vector<int> labelInitValue;
std::vector<int> labelSeqStartPositions;
MatrixPtr selfDefinedData;
InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) {
inputType = type;
......@@ -76,6 +78,20 @@ struct InputDef {
isStatic = false;
}
InputDef(InputType type,
string nameIn,
MatrixPtr selfDefinedData,
std::vector<int> selfDefinedSeqStartPos = {})
: labelSeqStartPositions(selfDefinedSeqStartPos),
selfDefinedData(selfDefinedData) {
inputType = type;
name = nameIn;
dim = 0;
sparse = {""};
paraSize = 0;
isStatic = false;
}
InputDef(InputType type,
string nameIn,
size_t dimIn,
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle; // NOLINT
using namespace std; // NOLINT
// Do one forward pass of priorBox layer and check to see if its output
// matches the given result
void doOneDetectionOutputTest(MatrixPtr& inputLoc,
MatrixPtr& inputConf,
MatrixPtr& inputPriorBox,
size_t feature_map_width,
size_t feature_map_height,
real nms_threshold,
bool use_gpu,
MatrixPtr& result) {
// Setting up the detection output layer
TestConfig configt;
configt.layerConfig.set_type("detection_output");
LayerInputConfig* input = configt.layerConfig.add_inputs();
configt.layerConfig.add_inputs();
configt.layerConfig.add_inputs();
DetectionOutputConfig* detOutput = input->mutable_detection_output_conf();
detOutput->set_width(feature_map_width);
detOutput->set_height(feature_map_height);
detOutput->set_nms_threshold(nms_threshold);
detOutput->set_num_classes(2);
detOutput->set_nms_top_k(20);
detOutput->set_keep_top_k(10);
detOutput->set_background_id(0);
detOutput->set_confidence_threshold(0.01);
detOutput->set_input_num(1);
configt.inputDefs.push_back({INPUT_DATA_TARGET, "priorbox", 32, 0});
configt.inputDefs.push_back({INPUT_DATA, "input_loc", 16, 0});
configt.inputDefs.push_back({INPUT_DATA, "input_conf", 8, 0});
// data layer initialize
std::vector<DataLayerPtr> dataLayers;
LayerMap layerMap;
vector<Argument> datas;
initDataLayer(
configt, &dataLayers, &datas, &layerMap, "priorbox", 1, false, use_gpu);
dataLayers[0]->getOutputValue()->copyFrom(*inputPriorBox);
dataLayers[1]->getOutputValue()->copyFrom(*inputLoc);
dataLayers[2]->getOutputValue()->copyFrom(*inputConf);
// test layer initialize
std::vector<ParameterPtr> parameters;
LayerPtr detectionOutputLayer;
initTestLayer(configt, &layerMap, &parameters, &detectionOutputLayer);
detectionOutputLayer->forward(PASS_GC);
checkMatrixEqual(detectionOutputLayer->getOutputValue(), result);
}
TEST(Layer, detectionOutputLayerFwd) {
bool useGpu = false;
// CPU case 1.
MatrixPtr inputLoc;
MatrixPtr inputConf;
MatrixPtr inputPriorBox;
MatrixPtr result, result2, result3, result4;
real nmsTreshold = 0.01;
real inputLocData[] = {0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1};
real inputConfData[] = {0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6};
real inputPriorBoxData[] = {0.1, 0.1, 0.5, 0.5, 0.1, 0.1, 0.2, 0.2,
0.2, 0.2, 0.6, 0.6, 0.1, 0.1, 0.2, 0.2,
0.3, 0.3, 0.7, 0.7, 0.1, 0.1, 0.2, 0.2,
0.4, 0.4, 0.8, 0.8, 0.1, 0.1, 0.2, 0.2};
real resultData[] = {
0, 1, 0.68997443, 0.099959746, 0.099959746, 0.50804031, 0.50804031};
inputLoc = Matrix::create(1, 16, false, useGpu);
inputConf = Matrix::create(1, 8, false, useGpu);
inputPriorBox = Matrix::create(1, 32, false, useGpu);
result = Matrix::create(1, 7, false, useGpu);
inputLoc->setData(inputLocData);
inputConf->setData(inputConfData);
inputPriorBox->setData(inputPriorBoxData);
result->setData(resultData);
doOneDetectionOutputTest(inputLoc,
inputConf,
inputPriorBox,
/* feature_map_width */ 1,
/* feature_map_height */ 1,
nmsTreshold,
useGpu,
result);
// CPU case 2.
nmsTreshold = 0.2;
result2 = Matrix::create(2, 7, false, useGpu);
real resultData2[] = {0,
1,
0.68997443,
0.099959746,
0.099959746,
0.50804031,
0.50804031,
0,
1,
0.59868765,
0.29995975,
0.29995975,
0.70804024,
0.70804024};
result2->setData(resultData2);
doOneDetectionOutputTest(inputLoc,
inputConf,
inputPriorBox,
/* feature_map_width */ 1,
/* feature_map_height */ 1,
nmsTreshold,
useGpu,
result2);
#ifndef PADDLE_ONLY_CPU
// GPU case 1.
useGpu = true;
inputLoc = Matrix::create(1, 16, false, useGpu);
inputConf = Matrix::create(1, 8, false, useGpu);
inputPriorBox = Matrix::create(1, 32, false, useGpu);
inputLoc->copyFrom(inputLocData, 16);
inputConf->copyFrom(inputConfData, 8);
inputPriorBox->copyFrom(inputPriorBoxData, 32);
nmsTreshold = 0.01;
result3 = Matrix::create(1, 7, false, useGpu);
result3->copyFrom(resultData, 7);
doOneDetectionOutputTest(inputLoc,
inputConf,
inputPriorBox,
/* feature_map_width */ 1,
/* feature_map_height */ 1,
nmsTreshold,
useGpu,
result3);
// GPU case 2.
nmsTreshold = 0.2;
result4 = Matrix::create(2, 7, false, useGpu);
result4->copyFrom(resultData2, 14);
doOneDetectionOutputTest(inputLoc,
inputConf,
inputPriorBox,
/* feature_map_width */ 1,
/* feature_map_height */ 1,
nmsTreshold,
useGpu,
result4);
#endif
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
return RUN_ALL_TESTS();
}
......@@ -1689,6 +1689,70 @@ TEST(Layer, smooth_l1) {
}
}
TEST(Layer, multibox_loss) {
TestConfig config;
config.layerConfig.set_type("multibox_loss");
config.biasSize = 0;
LayerInputConfig* input = config.layerConfig.add_inputs();
MultiBoxLossConfig* multiboxLoss = input->mutable_multibox_loss_conf();
multiboxLoss->set_num_classes(21);
multiboxLoss->set_input_num(1);
multiboxLoss->set_overlap_threshold(0.5);
multiboxLoss->set_neg_pos_ratio(3);
multiboxLoss->set_neg_overlap(0.5);
multiboxLoss->set_background_id(0);
multiboxLoss->set_height(3);
multiboxLoss->set_width(3);
size_t gtNum = 1;
MatrixPtr labelValue = Matrix::create(gtNum, 6, false, false);
labelValue->randomizeUniform();
labelValue->add(-0.5);
labelValue->sigmoid(*labelValue);
real* labelData = labelValue->getData();
size_t labelWidth = labelValue->getWidth();
for (size_t i = 0; i < gtNum; ++i) {
*(labelData + i * labelWidth) = std::rand() % 20 + 1;
*(labelData + i * labelWidth + 1) = 0.400259;
*(labelData + i * labelWidth + 2) = 0.377857;
*(labelData + i * labelWidth + 3) = 0.525712;
*(labelData + i * labelWidth + 4) = 0.519368;
}
vector<int> seqStartPositions(gtNum + 1, 0);
for (size_t i = 1; i <= gtNum; ++i) {
seqStartPositions[i] = i;
}
// Ensure at lease one matched bbox
MatrixPtr priorValue = Matrix::create(1, 72, false, false);
priorValue->randomizeUniform();
priorValue->add(-0.5);
priorValue->sigmoid(*priorValue);
real* priorData = priorValue->getData();
*(priorData) = 0.424811;
*(priorData + 1) = 0.397059;
*(priorData + 2) = 0.538905;
*(priorData + 3) = 0.447091;
*(priorData + 4) = 0.425720;
*(priorData + 5) = 0.515228;
*(priorData + 6) = 0.519452;
*(priorData + 7) = 0.591065;
config.inputDefs.push_back(
{INPUT_SELF_DEFINE_DATA, "priorbox", priorValue, {}});
config.inputDefs.push_back(
{INPUT_SELF_DEFINE_DATA, "label", labelValue, seqStartPositions});
config.inputDefs.push_back({INPUT_DATA, "locPred", 36, 0});
config.inputDefs.push_back({INPUT_DATA, "confPred", 189, 0});
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config, "multibox_loss", 1, false, useGpu, false);
}
}
TEST(Layer, TransLayer) {
TestConfig config;
const int height = 128;
......
......@@ -266,6 +266,29 @@ message PadConfig {
repeated uint32 pad_w = 4;
}
message MultiBoxLossConfig {
required uint32 num_classes = 1;
required float overlap_threshold = 2;
required float neg_pos_ratio = 3;
required float neg_overlap = 4;
required uint32 background_id = 5;
required uint32 input_num = 6;
optional uint32 height = 7 [default = 1];
optional uint32 width = 8 [default = 1];
}
message DetectionOutputConfig {
required uint32 num_classes = 1;
required float nms_threshold = 2;
required uint32 nms_top_k = 3;
required uint32 background_id = 4;
required uint32 input_num = 5;
required uint32 keep_top_k = 6;
required float confidence_threshold = 7;
optional uint32 height = 8 [default = 1];
optional uint32 width = 9 [default = 1];
}
message LayerInputConfig {
required string input_layer_name = 1;
optional string input_parameter_name = 2;
......@@ -284,6 +307,8 @@ message LayerInputConfig {
optional PriorBoxConfig priorbox_conf = 13;
optional PadConfig pad_conf = 14;
optional RowConvConfig row_conv_conf = 15;
optional MultiBoxLossConfig multibox_loss_conf = 16;
optional DetectionOutputConfig detection_output_conf = 17;
}
message LayerConfig {
......
......@@ -1676,6 +1676,52 @@ class PriorBoxLayer(LayerBase):
self.config.size = size
@config_layer('multibox_loss')
class MultiBoxLossLayer(LayerBase):
def __init__(self, name, inputs, input_num, num_classes, overlap_threshold,
neg_pos_ratio, neg_overlap, background_id):
super(MultiBoxLossLayer, self).__init__(name, 'multibox_loss', 0,
inputs)
config_assert(
len(inputs) == (input_num * 2 + 2),
'MultiBoxLossLayer does not have enough inputs')
config_assert(num_classes > background_id,
'Classes number must greater than background ID')
self.config.inputs[0].multibox_loss_conf.num_classes = num_classes
self.config.inputs[
0].multibox_loss_conf.overlap_threshold = overlap_threshold
self.config.inputs[0].multibox_loss_conf.neg_pos_ratio = neg_pos_ratio
self.config.inputs[0].multibox_loss_conf.neg_overlap = neg_overlap
self.config.inputs[0].multibox_loss_conf.background_id = background_id
self.config.inputs[0].multibox_loss_conf.input_num = input_num
self.config.size = 1
@config_layer('detection_output')
class DetectionOutputLayer(LayerBase):
def __init__(self, name, inputs, size, input_num, num_classes,
nms_threshold, nms_top_k, keep_top_k, confidence_threshold,
background_id):
super(DetectionOutputLayer, self).__init__(name, 'detection_output', 0,
inputs)
config_assert(
len(inputs) == (input_num * 2 + 1),
'DetectionOutputLayer does not have enough inputs')
config_assert(num_classes > background_id,
'Classes number must greater than background ID')
self.config.inputs[0].detection_output_conf.num_classes = num_classes
self.config.inputs[
0].detection_output_conf.nms_threshold = nms_threshold
self.config.inputs[0].detection_output_conf.nms_top_k = nms_top_k
self.config.inputs[0].detection_output_conf.keep_top_k = keep_top_k
self.config.inputs[
0].detection_output_conf.confidence_threshold = confidence_threshold
self.config.inputs[
0].detection_output_conf.background_id = background_id
self.config.inputs[0].detection_output_conf.input_num = input_num
self.config.size = size
@config_layer('data')
class DataLayer(LayerBase):
def __init__(self, name, size, height=None, width=None, device=None):
......
......@@ -115,6 +115,8 @@ __all__ = [
'print_layer',
'priorbox_layer',
'cross_channel_norm_layer',
'multibox_loss_layer',
'detection_output_layer',
'spp_layer',
'pad_layer',
'eos_layer',
......@@ -195,6 +197,8 @@ class LayerType(object):
PRINT_LAYER = 'print'
PRIORBOX_LAYER = 'priorbox'
MULTIBOX_LOSS_LAYER = 'multibox_loss'
DETECTION_OUTPUT_LAYER = 'detection_output'
CTC_LAYER = 'ctc'
WARP_CTC_LAYER = 'warp_ctc'
......@@ -1052,6 +1056,163 @@ def priorbox_layer(input,
size=size)
@wrap_name_default("multibox_loss")
def multibox_loss_layer(input_loc,
input_conf,
priorbox,
label,
num_classes,
overlap_threshold=0.5,
neg_pos_ratio=3.0,
neg_overlap=0.5,
background_id=0,
name=None):
"""
Compute the location loss and the confidence loss for ssd.
:param name: The Layer Name.
:type name: basestring
:param input_loc: The input predict location.
:type input_loc: LayerOutput
:param input_conf: The input priorbox confidence.
:type input_conf: LayerOutput
:param priorbox: The input priorbox location and the variance.
:type priorbox: LayerOutput
:param label: The input label.
:type label: LayerOutput
:param num_classes: The number of the classification.
:type num_classes: int
:param overlap_threshold: The threshold of the overlap.
:type overlap_threshold: float
:param neg_pos_ratio: The ratio of the negative bbox to the positive bbox.
:type neg_pos_ratio: float
:param neg_overlap: The negative bbox overlap threshold.
:type neg_overlap: float
:param background_id: The background class index.
:type background_id: int
:return: LayerOutput
"""
input_loc_num = 0
input_conf_num = 0
if isinstance(input_loc, LayerOutput):
input_loc = [input_loc]
assert isinstance(input_loc, collections.Sequence) # list or tuple
for each in input_loc:
assert isinstance(each, LayerOutput)
input_loc_num += 1
if isinstance(input_conf, LayerOutput):
input_conf = [input_conf]
assert isinstance(input_conf, collections.Sequence) # list or tuple
for each in input_conf:
assert isinstance(each, LayerOutput)
input_conf_num += 1
# Check the input layer number.
assert input_loc_num == input_conf_num
inputs = [priorbox.name, label.name]
inputs.extend([l.name for l in input_loc])
inputs.extend([l.name for l in input_conf])
parents = [priorbox, label]
parents.extend(input_loc)
parents.extend(input_conf)
Layer(
name=name,
type=LayerType.MULTIBOX_LOSS_LAYER,
inputs=inputs,
input_num=input_loc_num,
num_classes=num_classes,
overlap_threshold=overlap_threshold,
neg_pos_ratio=neg_pos_ratio,
neg_overlap=neg_overlap,
background_id=background_id)
return LayerOutput(
name, LayerType.MULTIBOX_LOSS_LAYER, parents=parents, size=1)
@wrap_name_default("detection_output")
def detection_output_layer(input_loc,
input_conf,
priorbox,
num_classes,
nms_threshold=0.45,
nms_top_k=400,
keep_top_k=200,
confidence_threshold=0.01,
background_id=0,
name=None):
"""
Apply the NMS to the output of network and compute the predict bounding
box location.
:param name: The Layer Name.
:type name: basestring
:param input_loc: The input predict location.
:type input_loc: LayerOutput
:param input_conf: The input priorbox confidence.
:type input_conf: LayerOutput
:param priorbox: The input priorbox location and the variance.
:type priorbox: LayerOutput
:param num_classes: The number of the classification.
:type num_classes: int
:param nms_threshold: The Non-maximum suppression threshold.
:type nms_threshold: float
:param nms_top_k: The bbox number kept of the NMS's output
:type nms_top_k: int
:param keep_top_k: The bbox number kept of the layer's output
:type keep_top_k: int
:param confidence_threshold: The classification confidence threshold
:type confidence_threshold: float
:param background_id: The background class index.
:type background_id: int
:return: LayerOutput
"""
input_loc_num = 0
input_conf_num = 0
if isinstance(input_loc, LayerOutput):
input_loc = [input_loc]
assert isinstance(input_loc, collections.Sequence) # list or tuple
for each in input_loc:
assert isinstance(each, LayerOutput)
input_loc_num += 1
if isinstance(input_conf, LayerOutput):
input_conf = [input_conf]
assert isinstance(input_conf, collections.Sequence) # list or tuple
for each in input_conf:
assert isinstance(each, LayerOutput)
input_conf_num += 1
# Check the input layer number.
assert input_loc_num == input_conf_num
inputs = [priorbox.name]
inputs.extend([l.name for l in input_loc])
inputs.extend([l.name for l in input_conf])
parents = [priorbox]
parents.extend(input_loc)
parents.extend(input_conf)
size = keep_top_k * 7
Layer(
name=name,
type=LayerType.DETECTION_OUTPUT_LAYER,
inputs=inputs,
size=size,
input_num=input_loc_num,
num_classes=num_classes,
nms_threshold=nms_threshold,
nms_top_k=nms_top_k,
keep_top_k=keep_top_k,
confidence_threshold=confidence_threshold,
background_id=background_id)
return LayerOutput(
name, LayerType.DETECTION_OUTPUT_LAYER, parents=parents, size=size)
@wrap_name_default("cross_channel_norm")
def cross_channel_norm_layer(input, name=None, param_attr=None):
"""
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册