提交 54bb12ee 编写于 作者: G gongweibao

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fixcmake

......@@ -445,6 +445,11 @@ smooth_l1_cost
.. autoclass:: paddle.v2.layer.smooth_l1_cost
:noindex:
multibox_loss
--------------
.. autoclass:: paddle.v2.layer.multibox_loss
:noindex:
Check Layer
============
......@@ -468,3 +473,11 @@ prelu
--------
.. autoclass:: paddle.v2.layer.prelu
:noindex:
Detection output Layer
======================
detection_output
----------------
.. autoclass:: paddle.v2.layer.detection_output
:noindex:
......@@ -2,9 +2,13 @@
cc_library(ddim SRCS ddim.cc)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
cc_test(tensor_test SRCS tensor_test.cc DEPS ddim)
cc_test(variable_test SRCS variable_test.cc)
cc_test(scope_test SRCS scope_test.cc)
cc_test(enforce_test SRCS enforce_test.cc)
proto_library(attr_type SRCS attr_type.proto)
proto_library(op_proto SRCS op_proto.proto DEPS attr_type)
cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf)
proto_library(op_desc SRCS op_desc.proto DEPS attr_type)
cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
syntax="proto2";
package paddle.framework;
import "attr_type.proto";
// AttrDesc is used to describe Attributes of an Operator. It contain's
// name, type, and value of Attribute.
//
// e.g, for scale=3.0: name=scala, type=AttrType.FLOAT, value=3.0
message AttrDesc {
required string name = 1;
required AttrType type = 2;
optional int32 i = 3;
optional float f = 4;
optional string s = 5;
repeated int32 ints = 6;
repeated float floats = 7;
repeated string strings = 8;
};
// Protocol Message to describe an Operator.
//
// In PaddlePaddle, Operator is used to do a certain computation such
// as "add", "sub", "cosine", etc.
// (1) Operator needs to know the input and output variable names.
// (2) Some ops may have special attributes such as "scale" in "CosineOp".
//
// 3rd-party language can build this proto message and call
// AddOp(const OpDesc& op_desc) of Paddle core to create an Operator.
message OpDesc {
// input names of this Operator.
repeated string inputs = 1;
// output names of this Operator.
repeated string outputs = 2;
// type of this Operator, such as "add", "sub", "fc".
required string type = 3;
// Attributes of this Operator. e.g., scale=3.0 in cosine op.
repeated AttrDesc attrs = 4;
};
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <paddle/framework/op_desc.pb.h>
TEST(OpDesc, Create) {
paddle::framework::OpDesc op_desc;
op_desc.set_type("add");
op_desc.add_inputs("X");
op_desc.add_inputs("Y");
op_desc.add_outputs("Z");
auto attr = op_desc.mutable_attrs()->Add();
attr->set_type(paddle::framework::AttrType::FLOAT);
attr->set_f(3.14);
// required field name is not set, so IsInitialized should be false.
ASSERT_FALSE(op_desc.IsInitialized());
attr->set_name("add");
// after all required fields are set, IsInitialized should be true now.
ASSERT_TRUE(op_desc.IsInitialized());
}
\ No newline at end of file
......@@ -14,33 +14,39 @@ limitations under the License. */
#pragma once
#include <memory>
#include <type_traits>
#include "paddle/framework/ddim.h"
#include "paddle/framework/enforce.h"
#include "paddle/memory/memory.h"
#include "paddle/platform/place.h"
namespace paddle {
namespace framework {
class Tensor {
using paddle::platform::Place;
using paddle::platform::get_place;
public:
template <typename T>
const T* data() const {
PADDLE_ASSERT(holder_ != nullptr,
"Tensor::data must be called after Tensor::mutable_data");
return static_cast<const T*>(holder->Ptr());
PADDLE_ENFORCE(holder_ != nullptr,
"Tensor::data must be called after Tensor::mutable_data.");
return static_cast<const T*>(holder_->Ptr());
}
template <typename T, // must be POD types
typename = std::enable_if<std::is_pod<T>::value>::type>
T* mutable_data(DDim dims, Place place) {
if (holder_ == nullptr || holder_->Place() != place ||
holder_->Size() < dims.product() * sizeof(T)) {
holder_.reset(new PlaceholderImpl(place, dims.product() * sizeof(T)));
typename std::enable_if<std::is_pod<T>::value>::type* = nullptr>
T* mutable_data(DDim dims, paddle::platform::Place place) {
if (holder_ == nullptr ||
!(holder_->Place() ==
place) /* some versions of boost::variant don't have operator!= */
|| holder_->Size() < product(dims) * sizeof(T)) {
holder_.reset(new PlaceholderImpl<T>(place, product(dims) * sizeof(T)));
}
return static_cast<T*>(holder_->Ptr());
}
template <typename T, // must be POD types
typename = std::enable_if<std::is_pod<T>::value>::type>
typename std::enable_if<std::is_pod<T>::value>::type* = nullptr>
T* mutable_data(DDim dims) {
return mutable_data<T>(dims, paddle::platform::get_place());
}
......@@ -51,27 +57,41 @@ class Tensor {
struct Placeholder {
virtual ~Placeholder() {}
virtual void* Ptr() const = 0;
virtual Place Place() const = 0;
virtual paddle::platform::Place Place() const = 0;
virtual size_t Size() const = 0;
};
template <typename T>
struct PlaceholderImpl : public Placeholder {
PlaceholderImpl(Place pl, size_t size)
: ptr_(paddle::memory::Alloc(pl, size), paddle::memory::Deleter(pl)),
place_(pl),
private:
class Deleter {
public:
Deleter(platform::Place place) : place_(place) {}
void operator()(T* ptr) {
paddle::memory::Free(place_, static_cast<void*>(ptr));
}
private:
paddle::platform::Place place_;
};
public:
PlaceholderImpl(paddle::platform::Place place, size_t size)
: ptr_(static_cast<T*>(paddle::memory::Alloc(place, size)),
Deleter(place)),
place_(place),
size_(size) {}
virtual void* Ptr() const { return static_cast<void*>(ptr_.get()); }
virtual size_t Size() const { return size_; }
virtual Place Place() const { return place_; }
virtual paddle::platform::Place Place() const { return place_; }
std::unique_ptr<T, memory::Deleter> ptr_;
Place place_; // record the place of ptr_.
size_t size_; // size of the memory block.
std::unique_ptr<T, Deleter> ptr_;
paddle::platform::Place place_; // record the place of ptr_.
size_t size_; // size of the memory block.
};
std::unique_ptr<Placeholder> holder_; // holds the memory block if allocated.
std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated.
};
} // namespace framework
......
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "paddle/framework/tensor.h"
#include <gtest/gtest.h>
#include <string>
TEST(Tensor, ASSERT) {
paddle::framework::Tensor cpu_tensor;
bool caught = false;
try {
const double* p __attribute__((unused)) = cpu_tensor.data<double>();
} catch (paddle::framework::EnforceNotMet err) {
caught = true;
std::string msg = "Tensor::data must be called after Tensor::mutable_data.";
const char* what = err.what();
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
}
ASSERT_TRUE(caught);
}
/* mutable_data() is not tested at present
because Memory::Alloc() and Memory::Free() have not been ready.
TEST(Tensor, MutableData) {
using namespace paddle::framework;
using namespace paddle::platform;
{
Tensor cpu_tensor;
float* p1 = nullptr;
float* p2 = nullptr;
// initialization
p1 = cpu_tensor.mutable_data<float>(make_ddim({1, 2, 3}), CPUPlace());
EXPECT_NE(p1, nullptr);
// set cpu_tensor a new dim with large size
// momery is supposed to be re-allocated
p2 = cpu_tensor.mutable_data<float>(make_ddim({3, 4}));
EXPECT_NE(p2, nullptr);
EXPECT_NE(p1, p2);
// set cpu_tensor a new dim with same size
// momery block is supposed to be unchanged
p1 = cpu_tensor.mutable_data<float>(make_ddim({2, 2, 3}));
EXPECT_EQ(p1, p2);
// set cpu_tensor a new dim with smaller size
// momery block is supposed to be unchanged
p2 = cpu_tensor.mutable_data<float>(make_ddim({2, 2}));
EXPECT_EQ(p1, p2);
}
{
Tensor gpu_tensor;
float* p1 = nullptr;
float* p2 = nullptr;
// initialization
p1 = gpu_tensor.mutable_data<float>(make_ddim({1, 2, 3}), GPUPlace());
EXPECT_NE(p1, nullptr);
// set gpu_tensor a new dim with large size
// momery is supposed to be re-allocated
p2 = gpu_tensor.mutable_data<float>(make_ddim({3, 4}));
EXPECT_NE(p2, nullptr);
EXPECT_NE(p1, p2);
// set gpu_tensor a new dim with same size
// momery block is supposed to be unchanged
p1 = gpu_tensor.mutable_data<float>(make_ddim({2, 2, 3}));
EXPECT_EQ(p1, p2);
// set gpu_tensor a new dim with smaller size
// momery block is supposed to be unchanged
p2 = gpu_tensor.mutable_data<float>(make_ddim({2, 2}));
EXPECT_EQ(p1, p2);
}
}
*/
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "DetectionOutputLayer.h"
namespace paddle {
REGISTER_LAYER(detection_output, DetectionOutputLayer);
bool DetectionOutputLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
auto& layerConf = config_.inputs(0).detection_output_conf();
numClasses_ = layerConf.num_classes();
inputNum_ = layerConf.input_num();
nmsThreshold_ = layerConf.nms_threshold();
confidenceThreshold_ = layerConf.confidence_threshold();
nmsTopK_ = layerConf.nms_top_k();
keepTopK_ = layerConf.keep_top_k();
backgroundId_ = layerConf.background_id();
return true;
}
void DetectionOutputLayer::forward(PassType passType) {
Layer::forward(passType);
size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight();
locSizeSum_ = 0;
confSizeSum_ = 0;
for (size_t n = 0; n < inputNum_; ++n) {
const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n));
const MatrixPtr inConf = getInputValue(*getConfInputLayer(n));
locSizeSum_ += inLoc->getElementCnt();
confSizeSum_ += inConf->getElementCnt();
}
Matrix::resizeOrCreate(locTmpBuffer_, 1, locSizeSum_, false, useGpu_);
Matrix::resizeOrCreate(
confTmpBuffer_, confSizeSum_ / numClasses_, numClasses_, false, useGpu_);
size_t locOffset = 0;
size_t confOffset = 0;
auto& layerConf = config_.inputs(0).detection_output_conf();
for (size_t n = 0; n < inputNum_; ++n) {
const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n));
const MatrixPtr inConf = getInputValue(*getConfInputLayer(n));
size_t height = getInput(*getLocInputLayer(n)).getFrameHeight();
if (!height) height = layerConf.height();
size_t width = getInput(*getLocInputLayer(n)).getFrameWidth();
if (!width) width = layerConf.width();
locOffset += appendWithPermute(*inLoc,
height,
width,
locSizeSum_,
locOffset,
batchSize,
*locTmpBuffer_,
kNCHWToNHWC);
confOffset += appendWithPermute(*inConf,
height,
width,
confSizeSum_,
confOffset,
batchSize,
*confTmpBuffer_,
kNCHWToNHWC);
}
CHECK_EQ(locOffset, locSizeSum_ / batchSize);
CHECK_EQ(confOffset, confSizeSum_ / batchSize);
MatrixPtr priorValue;
if (useGpu_) {
Matrix::resizeOrCreate(locCpuBuffer_, 1, locSizeSum_, false, false);
Matrix::resizeOrCreate(
confCpuBuffer_, confSizeSum_ / numClasses_, numClasses_, false, false);
MatrixPtr priorTmpValue = getInputValue(*getPriorBoxLayer());
Matrix::resizeOrCreate(
priorCpuValue_, 1, priorTmpValue->getElementCnt(), false, false);
locCpuBuffer_->copyFrom(*locTmpBuffer_);
confCpuBuffer_->copyFrom(*confTmpBuffer_);
priorCpuValue_->copyFrom(*priorTmpValue);
locBuffer_ = locCpuBuffer_;
confBuffer_ = confCpuBuffer_;
priorValue = priorCpuValue_;
} else {
priorValue = getInputValue(*getPriorBoxLayer());
locBuffer_ = locTmpBuffer_;
confBuffer_ = confTmpBuffer_;
}
confBuffer_->softmax(*confBuffer_);
size_t numPriors = priorValue->getElementCnt() / 8;
std::vector<std::vector<NormalizedBBox>> allDecodedBBoxes;
for (size_t n = 0; n < batchSize; ++n) {
std::vector<NormalizedBBox> decodedBBoxes;
for (size_t i = 0; i < numPriors; ++i) {
size_t priorOffset = i * 8;
size_t locPredOffset = n * numPriors * 4 + i * 4;
std::vector<NormalizedBBox> priorBBoxVec;
getBBoxFromPriorData(
priorValue->getData() + priorOffset, 1, priorBBoxVec);
std::vector<std::vector<real>> priorBBoxVar;
getBBoxVarFromPriorData(
priorValue->getData() + priorOffset, 1, priorBBoxVar);
std::vector<real> locPredData;
for (size_t j = 0; j < 4; ++j)
locPredData.push_back(*(locBuffer_->getData() + locPredOffset + j));
NormalizedBBox bbox =
decodeBBoxWithVar(priorBBoxVec[0], priorBBoxVar[0], locPredData);
decodedBBoxes.push_back(bbox);
}
allDecodedBBoxes.push_back(decodedBBoxes);
}
std::vector<std::map<size_t, std::vector<size_t>>> allIndices;
size_t numKept = getDetectionIndices(confBuffer_->getData(),
numPriors,
numClasses_,
backgroundId_,
batchSize,
confidenceThreshold_,
nmsTopK_,
nmsThreshold_,
keepTopK_,
allDecodedBBoxes,
&allIndices);
resetOutput(numKept, 7);
MatrixPtr outV = getOutputValue();
getDetectionOutput(confBuffer_->getData(),
numKept,
numPriors,
numClasses_,
batchSize,
allIndices,
allDecodedBBoxes,
*outV);
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <map>
#include <vector>
#include "DetectionUtil.h"
#include "Layer.h"
namespace paddle {
/**
* The detection output layer for a SSD detection task. This layer applies the
* Non-maximum suppression to the all predicted bounding box and keeps the
* Top-K bounding boxes.
* - Input: This layer needs three input layers: The first input layer
* is the priorbox layer. The rest two input layers are convolution
* layers for generating bbox location offset and the classification
* confidence.
* - Output: The predict bounding box locations.
*/
class DetectionOutputLayer : public Layer {
public:
explicit DetectionOutputLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr) {}
protected:
inline LayerPtr getPriorBoxLayer() { return inputLayers_[0]; }
inline LayerPtr getLocInputLayer(size_t index) {
return inputLayers_[1 + index];
}
inline LayerPtr getConfInputLayer(size_t index) {
return inputLayers_[1 + inputNum_ + index];
}
private:
size_t numClasses_; // number of classes
size_t inputNum_; // number of input layers
real nmsThreshold_;
real confidenceThreshold_;
size_t nmsTopK_;
size_t keepTopK_;
size_t backgroundId_;
size_t locSizeSum_;
size_t confSizeSum_;
MatrixPtr locBuffer_;
MatrixPtr confBuffer_;
MatrixPtr locTmpBuffer_;
MatrixPtr confTmpBuffer_;
MatrixPtr priorCpuValue_;
MatrixPtr locCpuBuffer_;
MatrixPtr confCpuBuffer_;
};
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "MultiBoxLossLayer.h"
#include <float.h>
#include <vector>
#include "DataLayer.h"
namespace paddle {
REGISTER_LAYER(multibox_loss, MultiBoxLossLayer);
bool MultiBoxLossLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
auto layerConf = config_.inputs(0).multibox_loss_conf();
numClasses_ = layerConf.num_classes();
inputNum_ = layerConf.input_num();
overlapThreshold_ = layerConf.overlap_threshold();
negPosRatio_ = layerConf.neg_pos_ratio();
negOverlap_ = layerConf.neg_overlap();
backgroundId_ = layerConf.background_id();
return true;
}
void MultiBoxLossLayer::forward(PassType passType) {
Layer::forward(passType);
size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight();
resetOutput(batchSize, 1);
// all location data and confidence score data
locSizeSum_ = 0;
confSizeSum_ = 0;
for (size_t n = 0; n < inputNum_; ++n) {
const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n));
const MatrixPtr inConf = getInputValue(*getConfInputLayer(n));
locSizeSum_ += inLoc->getElementCnt();
confSizeSum_ += inConf->getElementCnt();
}
// locBuffer layout:
// | xmin1 | ymin1 | xmax1 | ymax1 | xmin2 ......
Matrix::resizeOrCreate(locTmpBuffer_, 1, locSizeSum_, false, useGpu_);
locBuffer_ = locTmpBuffer_;
// confBuffer layout:
// | class1 score | class2 score | ... |classN score | class1 score | ......
Matrix::resizeOrCreate(confTmpBuffer_, 1, confSizeSum_, false, useGpu_);
confBuffer_ = confTmpBuffer_;
// concate location data and confidence score data
size_t locOffset = 0;
size_t confOffset = 0;
auto& layerConf = config_.inputs(0).multibox_loss_conf();
for (size_t n = 0; n < inputNum_; ++n) {
const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n));
const MatrixPtr inConf = getInputValue(*getConfInputLayer(n));
size_t height = getInput(*getLocInputLayer(n)).getFrameHeight();
if (!height) height = layerConf.height();
size_t width = getInput(*getLocInputLayer(n)).getFrameWidth();
if (!width) width = layerConf.width();
locOffset += appendWithPermute(*inLoc,
height,
width,
locSizeSum_,
locOffset,
batchSize,
*locBuffer_,
kNCHWToNHWC);
confOffset += appendWithPermute(*inConf,
height,
width,
confSizeSum_,
confOffset,
batchSize,
*confBuffer_,
kNCHWToNHWC);
}
CHECK_EQ(locOffset, locSizeSum_ / batchSize);
CHECK_EQ(confOffset, confSizeSum_ / batchSize);
// priorValue layout:
// | xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var
// | xmin2 | ......
MatrixPtr priorValue;
// labelValue layout:
// | class1_1 | xmin1_1 | ymin1_1 | xmax1_1 | ymax1_1 | difficult1_1 | ......
MatrixPtr labelValue;
// Copy data from GPU to CPU if use GPU
if (useGpu_) {
Matrix::resizeOrCreate(locCpuBuffer_, 1, locSizeSum_, false, false);
Matrix::resizeOrCreate(confCpuBuffer_, 1, confSizeSum_, false, false);
MatrixPtr priorTmpValue = getInputValue(*getPriorBoxLayer());
Matrix::resizeOrCreate(
priorCpuValue_, 1, priorTmpValue->getElementCnt(), false, false);
MatrixPtr labelTmpValue = getInputValue(*getLabelLayer());
Matrix::resizeOrCreate(labelCpuValue_,
labelTmpValue->getHeight(),
labelTmpValue->getWidth(),
false,
false);
locCpuBuffer_->copyFrom(*locTmpBuffer_);
confCpuBuffer_->copyFrom(*confTmpBuffer_);
priorCpuValue_->copyFrom(*priorTmpValue);
labelCpuValue_->copyFrom(*labelTmpValue);
locBuffer_ = locCpuBuffer_;
confBuffer_ = confCpuBuffer_;
priorValue = priorCpuValue_;
labelValue = labelCpuValue_;
} else {
priorValue = getInputValue(*getPriorBoxLayer());
labelValue = getInputValue(*getLabelLayer());
}
// Get max scores for each prior bbox. Used in negative mining
std::vector<std::vector<real>> allMaxConfScore;
numPriors_ = priorValue->getElementCnt() / 8;
getMaxConfidenceScores(confBuffer_->getData(),
batchSize,
numPriors_,
numClasses_,
backgroundId_,
&allMaxConfScore);
// Match prior bbox to groundtruth bbox
Argument label = getInput(*getLabelLayer());
const int* labelIndex = label.sequenceStartPositions->getData(false);
size_t seqNum = label.getNumSequences();
numMatches_ = 0;
numNegs_ = 0;
allMatchIndices_.clear();
allNegIndices_.clear();
std::pair<size_t, size_t> retPair = generateMatchIndices(*priorValue,
numPriors_,
*labelValue,
labelIndex,
seqNum,
allMaxConfScore,
batchSize,
overlapThreshold_,
negOverlap_,
negPosRatio_,
&allMatchIndices_,
&allNegIndices_);
numMatches_ = retPair.first;
numNegs_ = retPair.second;
// BBox location L1 smooth loss
locLoss_ = 0.0;
if (numMatches_ >= 1) {
size_t count = 0;
MatrixPtr locLossOutput;
Matrix::resizeOrCreate(locLossOutput, numMatches_ * 4, 1, false, false);
Matrix::resizeOrCreate(locGTData_, numMatches_ * 4, 1, false, false);
Matrix::resizeOrCreate(locDiff_, numMatches_ * 4, 1, false, false);
locDiff_->zeroMem();
std::vector<real> locGTData;
real* locDiffData = locDiff_->getData();
const real* locBufferData = locBuffer_->getData();
for (size_t n = 0; n < batchSize; ++n) {
for (size_t i = 0; i < numPriors_; ++i) {
if (allMatchIndices_[n][i] == -1) continue; // match none
size_t locOffset =
n * (locBuffer_->getElementCnt() / batchSize) + i * 4;
std::copy(locBufferData + locOffset,
locBufferData + locOffset + 4,
locDiffData + count);
count += 4;
const int gtIdx = allMatchIndices_[n][i];
size_t priorOffset = i * 8;
std::vector<NormalizedBBox> priorBBoxVec;
getBBoxFromPriorData(
priorValue->getData() + priorOffset, 1, priorBBoxVec);
std::vector<std::vector<real>> priorBBoxVar;
getBBoxVarFromPriorData(
priorValue->getData() + priorOffset, 1, priorBBoxVar);
size_t labelOffset = (labelIndex[n] + gtIdx) * 6;
std::vector<NormalizedBBox> gtBBoxVec;
getBBoxFromLabelData(labelValue->getData() + labelOffset, 1, gtBBoxVec);
std::vector<real> gtEncode;
encodeBBoxWithVar(
priorBBoxVec[0], priorBBoxVar[0], gtBBoxVec[0], gtEncode);
locGTData.insert(locGTData.end(), gtEncode.begin(), gtEncode.end());
}
}
locGTData_->copyFrom(&locGTData[0], numMatches_ * 4);
locLossOutput->smoothL1(*locDiff_, *locGTData_, 0.0);
locLoss_ = locLossOutput->getSum() / numMatches_;
}
// BBox confidence softmax loss
confLoss_ = 0;
numConf_ = numMatches_ + numNegs_;
if (numConf_ >= 1) {
Matrix::resizeOrCreate(confProb_, numConf_, numClasses_, false, false);
IVector::resizeOrCreate(confGTData_, numConf_, false);
confProb_->zeroMem();
size_t count = 0;
std::vector<real> confPredData;
real* confProbData = confProb_->getData();
const real* confBufferData = confBuffer_->getData();
for (size_t n = 0; n < batchSize; ++n) {
for (size_t i = 0; i < numPriors_; ++i) {
if (allMatchIndices_[n][i] == -1) continue;
size_t labelOffset = (labelIndex[n] + allMatchIndices_[n][i]) * 6;
const int gtLabel = (labelValue->getData() + labelOffset)[0];
confGTData_->getData()[count] = gtLabel;
size_t confOffset = n * numPriors_ * numClasses_ + i * numClasses_;
std::copy(confBufferData + confOffset,
confBufferData + confOffset + numClasses_,
confProbData + count * numClasses_);
confPredData.reserve(confPredData.size() + numClasses_);
confPredData.insert(confPredData.end(),
confBufferData + confOffset,
confBufferData + confOffset + numClasses_);
++count;
}
// Negative mining samples
for (size_t i = 0; i < allNegIndices_[n].size(); ++i) {
confGTData_->getData()[count] = backgroundId_;
size_t confOffset =
n * numPriors_ * numClasses_ + allNegIndices_[n][i] * numClasses_;
std::copy(confBufferData + confOffset,
confBufferData + confOffset + numClasses_,
confProbData + count * numClasses_);
confPredData.reserve(confPredData.size() + numClasses_);
confPredData.insert(confPredData.end(),
confBufferData + confOffset,
confBufferData + confOffset + numClasses_);
++count;
}
}
CHECK_EQ(numConf_, count);
confProb_->softmax(*confProb_);
MatrixPtr confLossOutput;
Matrix::resizeOrCreate(confLossOutput, numConf_, 1, false, false);
confLossOutput->oneHotCrossEntropy(*confProb_, *confGTData_);
confLoss_ = confLossOutput->getSum() / numMatches_;
}
real loss = locLoss_ + confLoss_;
MatrixPtr outV = getOutputValue();
outV->assign(loss);
}
void MultiBoxLossLayer::backward(const UpdateCallback& callback) {
size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight();
locBuffer_->zeroMem();
confBuffer_->zeroMem();
// Back propagate on location prediction
if (numMatches_ >= 1) {
MatrixPtr locDiffBuffer;
Matrix::resizeOrCreate(locDiffBuffer, numMatches_ * 4, 1, false, false);
locDiffBuffer->smoothL1Bp(*locDiff_, *locGTData_, 0.0);
locDiff_->copyFrom(*locDiffBuffer);
// scale gradient
for (size_t i = 0; i < numMatches_ * 4; ++i)
locDiff_->getData()[i] *= (1. / numMatches_);
// Copy gradient back
size_t count = 0;
const real* locDiffData = locDiff_->getData();
for (size_t n = 0; n < batchSize; ++n) {
for (size_t i = 0; i < numPriors_; ++i) {
if (allMatchIndices_[n][i] == -1) continue;
real* locBufferData =
locBuffer_->getData() + n * numPriors_ * 4 + i * 4;
std::copy(locDiffData + count * 4,
locDiffData + (count + 1) * 4,
locBufferData);
++count;
}
}
CHECK_EQ(count, numMatches_);
}
if (numConf_ >= 1) {
for (size_t i = 0; i < numConf_; ++i)
confProb_->getData()[i * numClasses_ + confGTData_->getData()[i]] -= 1;
for (size_t i = 0; i < numConf_ * numClasses_; ++i)
confProb_->getData()[i] *= (1. / numMatches_);
size_t count = 0;
const real* confProbData = confProb_->getData();
for (size_t n = 0; n < batchSize; ++n) {
for (size_t i = 0; i < numPriors_; ++i) {
if (allMatchIndices_[n][i] == -1) continue;
real* confDiffData = confBuffer_->getData() +
n * numPriors_ * numClasses_ + i * numClasses_;
std::copy(confProbData + count * numClasses_,
confProbData + (count + 1) * numClasses_,
confDiffData);
++count;
}
for (size_t i = 0; i < allNegIndices_[n].size(); ++i) {
int idx = allNegIndices_[n][i];
real* confDiffData = confBuffer_->getData() +
n * numPriors_ * numClasses_ + idx * numClasses_;
std::copy(confProbData + count * numClasses_,
confProbData + (count + 1) * numClasses_,
confDiffData);
++count;
}
}
CHECK_EQ(count, numConf_);
}
if (useGpu_) {
locTmpBuffer_->copyFrom(*locCpuBuffer_);
confTmpBuffer_->copyFrom(*confCpuBuffer_);
locBuffer_ = locTmpBuffer_;
confBuffer_ = confTmpBuffer_;
}
// copy back
size_t locOffset = 0;
size_t confOffset = 0;
auto layerConf = config_.inputs(0).multibox_loss_conf();
for (size_t n = 0; n < inputNum_; ++n) {
const MatrixPtr inLocG = getInputGrad(*getLocInputLayer(n));
const MatrixPtr inConfG = getInputGrad(*getConfInputLayer(n));
size_t height = getInput(*getLocInputLayer(n)).getFrameHeight();
// only for unittest, there are no width and height information
// when constructing matrix in unittest, so we should
// set the shape in configuration
if (!height) height = layerConf.height();
size_t width = getInput(*getLocInputLayer(n)).getFrameWidth();
if (!width) width = layerConf.width();
// NHWC to NCHW
MatrixPtr locGBuffer;
Matrix::resizeOrCreate(
locGBuffer, inLocG->getHeight(), inLocG->getWidth(), false, useGpu_);
MatrixPtr confGBuffer;
Matrix::resizeOrCreate(
confGBuffer, inConfG->getHeight(), inConfG->getWidth(), false, useGpu_);
locOffset += decomposeWithPermute(*locBuffer_,
height,
width,
locSizeSum_,
locOffset,
batchSize,
*locGBuffer,
kNHWCToNCHW);
inLocG->add(*locGBuffer);
confOffset += decomposeWithPermute(*confBuffer_,
height,
width,
confSizeSum_,
confOffset,
batchSize,
*confGBuffer,
kNHWCToNCHW);
inConfG->add(*confGBuffer);
}
CHECK_EQ(locOffset, locSizeSum_ / batchSize);
CHECK_EQ(confOffset, confSizeSum_ / batchSize);
}
} // namespace paddle
/* copyright (c) 2016 paddlepaddle authors. all rights reserve.
licensed under the apache license, version 2.0 (the "license");
you may not use this file except in compliance with the license.
you may obtain a copy of the license at
http://www.apache.org/licenses/license-2.0
unless required by applicable law or agreed to in writing, software
distributed under the license is distributed on an "as is" basis,
without warranties or conditions of any kind, either express or implied.
see the license for the specific language governing permissions and
limitations under the license. */
#pragma once
#include <vector>
#include "CostLayer.h"
#include "DataLayer.h"
#include "DetectionUtil.h"
#include "Layer.h"
using std::vector;
using std::pair;
namespace paddle {
/**
* The multibox loss layer for a SSD detection task.
* The loss is composed by the location loss and the confidence loss.
* The location loss is a smooth L1 loss and the confidence loss is
* a softmax loss.
* - Input: This layer needs four input layers: The first input layer
* is the priorbox layer and the second layer is a label layer.
* The rest two input layers are convolution layers for generating
* bbox location offset and the classification confidence.
* - Output: The Single Shot Multibox Detection loss value.
* Reference:
* Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
* Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector
*/
class MultiBoxLossLayer : public CostLayer {
public:
explicit MultiBoxLossLayer(const LayerConfig& config) : CostLayer(config) {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr);
void forwardImp(Matrix& output, Argument& label, Matrix& cost) {}
void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {}
protected:
inline LayerPtr getPriorBoxLayer() { return inputLayers_[0]; }
inline LayerPtr getLabelLayer() { return inputLayers_[1]; }
inline LayerPtr getLocInputLayer(size_t index) {
return inputLayers_[2 + index];
}
inline LayerPtr getConfInputLayer(size_t index) {
return inputLayers_[2 + inputNum_ + index];
}
protected:
size_t numClasses_;
real overlapThreshold_;
real negPosRatio_;
real negOverlap_;
size_t inputNum_;
size_t backgroundId_;
real locLoss_;
real confLoss_;
size_t numPriors_;
size_t numMatches_;
size_t numNegs_;
size_t numConf_;
size_t locSizeSum_;
size_t confSizeSum_;
vector<vector<int>> allMatchIndices_;
vector<vector<int>> allNegIndices_;
MatrixPtr locGTData_;
IVectorPtr confGTData_;
MatrixPtr locBuffer_;
MatrixPtr confBuffer_;
MatrixPtr locDiff_;
MatrixPtr confProb_;
MatrixPtr labelCpuValue_;
MatrixPtr priorCpuValue_;
MatrixPtr locCpuBuffer_;
MatrixPtr confCpuBuffer_;
MatrixPtr locTmpBuffer_;
MatrixPtr confTmpBuffer_;
};
} // namespace paddle
......@@ -45,6 +45,13 @@ add_unittest_without_exec(test_PriorBox
add_test(NAME test_PriorBox
COMMAND test_PriorBox)
################# test_DetectionOutput #######################
add_unittest_without_exec(test_DetectionOutput
test_DetectionOutput.cpp
LayerGradUtil.cpp)
add_test(NAME test_DetectionOutput
COMMAND test_DetectionOutput)
################# test_ConvUnify #######################
add_unittest_without_exec(test_ConvUnify
test_ConvUnify.cpp
......
......@@ -387,6 +387,31 @@ void initDataLayer(TestConfig testConf,
data.value->sigmoid(*data.value);
data.grad->zeroMem();
break;
case INPUT_SELF_DEFINE_DATA: {
size_t height = testConf.inputDefs[i].selfDefinedData->getHeight();
size_t width = testConf.inputDefs[i].selfDefinedData->getWidth();
CHECK_GT(static_cast<int>(height), 0);
CHECK_GT(static_cast<int>(width), 0);
data.value = Matrix::create(height, width, false, useGpu);
data.grad = Matrix::create(height, width, false, useGpu);
data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData);
data.grad->zeroMem();
const std::vector<int>& labelSeqStartPositions =
testConf.inputDefs[i].labelSeqStartPositions;
if (labelSeqStartPositions.size() != 0) {
CHECK(!sequenceStartPositions);
CHECK_GE(static_cast<int>(labelSeqStartPositions.size()), 2);
sequenceStartPositions =
ICpuGpuVector::create(labelSeqStartPositions.size(), useGpu);
sequenceStartPositions->copyFrom(labelSeqStartPositions.data(),
labelSeqStartPositions.size(),
useGpu);
data.sequenceStartPositions = sequenceStartPositions;
}
break;
}
default:
LOG(FATAL) << " unknown inputType ";
return;
......
......@@ -31,7 +31,8 @@ enum InputType {
INPUT_SEQUENCE_LABEL,
INPUT_SPARSE_NON_VALUE_DATA,
INPUT_SPARSE_FLOAT_VALUE_DATA,
INPUT_DENSE_DIM_DATA, // using sequence length to init dense data
INPUT_DENSE_DIM_DATA, // using sequence length to init dense data
INPUT_SELF_DEFINE_DATA, // support customizing for input value
};
struct ParaSparse {
......@@ -66,6 +67,7 @@ struct InputDef {
bool isStatic;
std::vector<int> labelInitValue;
std::vector<int> labelSeqStartPositions;
MatrixPtr selfDefinedData;
InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) {
inputType = type;
......@@ -76,6 +78,20 @@ struct InputDef {
isStatic = false;
}
InputDef(InputType type,
string nameIn,
MatrixPtr selfDefinedData,
std::vector<int> selfDefinedSeqStartPos = {})
: labelSeqStartPositions(selfDefinedSeqStartPos),
selfDefinedData(selfDefinedData) {
inputType = type;
name = nameIn;
dim = 0;
sparse = {""};
paraSize = 0;
isStatic = false;
}
InputDef(InputType type,
string nameIn,
size_t dimIn,
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle; // NOLINT
using namespace std; // NOLINT
// Do one forward pass of priorBox layer and check to see if its output
// matches the given result
void doOneDetectionOutputTest(MatrixPtr& inputLoc,
MatrixPtr& inputConf,
MatrixPtr& inputPriorBox,
size_t feature_map_width,
size_t feature_map_height,
real nms_threshold,
bool use_gpu,
MatrixPtr& result) {
// Setting up the detection output layer
TestConfig configt;
configt.layerConfig.set_type("detection_output");
LayerInputConfig* input = configt.layerConfig.add_inputs();
configt.layerConfig.add_inputs();
configt.layerConfig.add_inputs();
DetectionOutputConfig* detOutput = input->mutable_detection_output_conf();
detOutput->set_width(feature_map_width);
detOutput->set_height(feature_map_height);
detOutput->set_nms_threshold(nms_threshold);
detOutput->set_num_classes(2);
detOutput->set_nms_top_k(20);
detOutput->set_keep_top_k(10);
detOutput->set_background_id(0);
detOutput->set_confidence_threshold(0.01);
detOutput->set_input_num(1);
configt.inputDefs.push_back({INPUT_DATA_TARGET, "priorbox", 32, 0});
configt.inputDefs.push_back({INPUT_DATA, "input_loc", 16, 0});
configt.inputDefs.push_back({INPUT_DATA, "input_conf", 8, 0});
// data layer initialize
std::vector<DataLayerPtr> dataLayers;
LayerMap layerMap;
vector<Argument> datas;
initDataLayer(
configt, &dataLayers, &datas, &layerMap, "priorbox", 1, false, use_gpu);
dataLayers[0]->getOutputValue()->copyFrom(*inputPriorBox);
dataLayers[1]->getOutputValue()->copyFrom(*inputLoc);
dataLayers[2]->getOutputValue()->copyFrom(*inputConf);
// test layer initialize
bool store_FLAGS_use_gpu = FLAGS_use_gpu;
FLAGS_use_gpu = use_gpu;
std::vector<ParameterPtr> parameters;
LayerPtr detectionOutputLayer;
initTestLayer(configt, &layerMap, &parameters, &detectionOutputLayer);
FLAGS_use_gpu = store_FLAGS_use_gpu;
detectionOutputLayer->forward(PASS_GC);
checkMatrixEqual(detectionOutputLayer->getOutputValue(), result);
}
TEST(Layer, detectionOutputLayerFwd) {
bool useGpu = false;
// CPU case 1.
MatrixPtr inputLoc;
MatrixPtr inputConf;
MatrixPtr inputPriorBox;
MatrixPtr result, result2, result3, result4;
real nmsTreshold = 0.01;
real inputLocData[] = {0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1};
real inputConfData[] = {0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6};
real inputPriorBoxData[] = {0.1, 0.1, 0.5, 0.5, 0.1, 0.1, 0.2, 0.2,
0.2, 0.2, 0.6, 0.6, 0.1, 0.1, 0.2, 0.2,
0.3, 0.3, 0.7, 0.7, 0.1, 0.1, 0.2, 0.2,
0.4, 0.4, 0.8, 0.8, 0.1, 0.1, 0.2, 0.2};
real resultData[] = {
0, 1, 0.68997443, 0.099959746, 0.099959746, 0.50804031, 0.50804031};
inputLoc = Matrix::create(1, 16, false, useGpu);
inputConf = Matrix::create(1, 8, false, useGpu);
inputPriorBox = Matrix::create(1, 32, false, useGpu);
result = Matrix::create(1, 7, false, useGpu);
inputLoc->setData(inputLocData);
inputConf->setData(inputConfData);
inputPriorBox->setData(inputPriorBoxData);
result->setData(resultData);
doOneDetectionOutputTest(inputLoc,
inputConf,
inputPriorBox,
/* feature_map_width */ 1,
/* feature_map_height */ 1,
nmsTreshold,
useGpu,
result);
// CPU case 2.
nmsTreshold = 0.2;
result2 = Matrix::create(2, 7, false, useGpu);
real resultData2[] = {0,
1,
0.68997443,
0.099959746,
0.099959746,
0.50804031,
0.50804031,
0,
1,
0.59868765,
0.29995975,
0.29995975,
0.70804024,
0.70804024};
result2->setData(resultData2);
doOneDetectionOutputTest(inputLoc,
inputConf,
inputPriorBox,
/* feature_map_width */ 1,
/* feature_map_height */ 1,
nmsTreshold,
useGpu,
result2);
#ifndef PADDLE_ONLY_CPU
// GPU case 1.
useGpu = true;
inputLoc = Matrix::create(1, 16, false, useGpu);
inputConf = Matrix::create(1, 8, false, useGpu);
inputPriorBox = Matrix::create(1, 32, false, useGpu);
inputLoc->copyFrom(inputLocData, 16);
inputConf->copyFrom(inputConfData, 8);
inputPriorBox->copyFrom(inputPriorBoxData, 32);
nmsTreshold = 0.01;
result3 = Matrix::create(1, 7, false, useGpu);
result3->copyFrom(resultData, 7);
doOneDetectionOutputTest(inputLoc,
inputConf,
inputPriorBox,
/* feature_map_width */ 1,
/* feature_map_height */ 1,
nmsTreshold,
useGpu,
result3);
// GPU case 2.
nmsTreshold = 0.2;
result4 = Matrix::create(2, 7, false, useGpu);
result4->copyFrom(resultData2, 14);
doOneDetectionOutputTest(inputLoc,
inputConf,
inputPriorBox,
/* feature_map_width */ 1,
/* feature_map_height */ 1,
nmsTreshold,
useGpu,
result4);
#endif
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
return RUN_ALL_TESTS();
}
......@@ -1692,6 +1692,70 @@ TEST(Layer, smooth_l1) {
}
}
TEST(Layer, multibox_loss) {
TestConfig config;
config.layerConfig.set_type("multibox_loss");
config.biasSize = 0;
LayerInputConfig* input = config.layerConfig.add_inputs();
MultiBoxLossConfig* multiboxLoss = input->mutable_multibox_loss_conf();
multiboxLoss->set_num_classes(21);
multiboxLoss->set_input_num(1);
multiboxLoss->set_overlap_threshold(0.5);
multiboxLoss->set_neg_pos_ratio(3);
multiboxLoss->set_neg_overlap(0.5);
multiboxLoss->set_background_id(0);
multiboxLoss->set_height(3);
multiboxLoss->set_width(3);
size_t gtNum = 1;
MatrixPtr labelValue = Matrix::create(gtNum, 6, false, false);
labelValue->randomizeUniform();
labelValue->add(-0.5);
labelValue->sigmoid(*labelValue);
real* labelData = labelValue->getData();
size_t labelWidth = labelValue->getWidth();
for (size_t i = 0; i < gtNum; ++i) {
*(labelData + i * labelWidth) = std::rand() % 20 + 1;
*(labelData + i * labelWidth + 1) = 0.400259;
*(labelData + i * labelWidth + 2) = 0.377857;
*(labelData + i * labelWidth + 3) = 0.525712;
*(labelData + i * labelWidth + 4) = 0.519368;
}
vector<int> seqStartPositions(gtNum + 1, 0);
for (size_t i = 1; i <= gtNum; ++i) {
seqStartPositions[i] = i;
}
// Ensure at lease one matched bbox
MatrixPtr priorValue = Matrix::create(1, 72, false, false);
priorValue->randomizeUniform();
priorValue->add(-0.5);
priorValue->sigmoid(*priorValue);
real* priorData = priorValue->getData();
*(priorData) = 0.424811;
*(priorData + 1) = 0.397059;
*(priorData + 2) = 0.538905;
*(priorData + 3) = 0.447091;
*(priorData + 4) = 0.425720;
*(priorData + 5) = 0.515228;
*(priorData + 6) = 0.519452;
*(priorData + 7) = 0.591065;
config.inputDefs.push_back(
{INPUT_SELF_DEFINE_DATA, "priorbox", priorValue, {}});
config.inputDefs.push_back(
{INPUT_SELF_DEFINE_DATA, "label", labelValue, seqStartPositions});
config.inputDefs.push_back({INPUT_DATA, "locPred", 36, 0});
config.inputDefs.push_back({INPUT_DATA, "confPred", 189, 0});
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config, "multibox_loss", 1, false, useGpu, false);
}
}
TEST(Layer, TransLayer) {
TestConfig config;
const int height = 128;
......
......@@ -15,7 +15,9 @@ limitations under the License. */
#pragma once
#include <cublas_v2.h>
#include "paddle/platform/dynamic_loader.h"
#include <dlfcn.h>
#include <mutex>
#include "paddle/platform/dynload/dynamic_loader.h"
namespace paddle {
namespace platform {
......
......@@ -15,7 +15,9 @@ limitations under the License. */
#pragma once
#include <cudnn.h>
#include "paddle/platform/dynamic_loader.h"
#include <dlfcn.h>
#include <mutex>
#include "paddle/platform/dynload/dynamic_loader.h"
namespace paddle {
namespace platform {
......
......@@ -15,7 +15,9 @@ limitations under the License. */
#pragma once
#include <curand.h>
#include "paddle/platform/dynamic_loader.h"
#include <dlfcn.h>
#include <mutex>
#include "paddle/platform/dynload/dynamic_loader.h"
namespace paddle {
namespace platform {
......
......@@ -12,13 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "dynamic_loader.h"
#include "paddle/platform/dynload/dynamic_loader.h"
#include <dlfcn.h>
#include <memory>
#include <mutex>
#include <string>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/framework/enforce.h"
DEFINE_string(cudnn_dir, "",
"Specify path for loading libcudnn.so. For instance, "
......@@ -72,13 +73,12 @@ static inline void GetDsoHandleFromDefaultPath(std::string& dso_path,
*dso_handle = dlopen(dso_path.c_str(), dynload_flags);
if (nullptr == *dso_handle) {
if (dso_path == "libcudnn.dylib") {
LOG(FATAL)
<< "Note: [Recommend] copy cudnn into /usr/local/cuda/ \n" // NOLINT
<< "For instance, sudo tar -xzf "
"cudnn-7.5-osx-x64-v5.0-ga.tgz -C " // NOLINT
<< "/usr/local \n sudo chmod a+r "
"/usr/local/cuda/include/cudnn.h " // NOLINT
<< "/usr/local/cuda/lib/libcudnn*";
PADDLE_ENFORCE(true,
"Note: [Recommend] copy cudnn into /usr/local/cuda/ \n "
"For instance, sudo tar -xzf "
"cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local \n sudo "
"chmod a+r /usr/local/cuda/include/cudnn.h "
"/usr/local/cuda/lib/libcudnn*");
}
}
}
......@@ -106,22 +106,15 @@ static inline void GetDsoHandleFromSearchPath(const std::string& search_root,
GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags);
}
}
CHECK(nullptr != *dso_handle) << "Failed to find dynamic library: " << dlPath
<< " (" << dlerror() << ") \n"
<< "Please specify its path correctly using "
"following ways: \n"
<< "Method. set environment variable "
"LD_LIBRARY_PATH on Linux or "
<< "DYLD_LIBRARY_PATH on Mac OS. \n"
<< "For instance, issue command: export "
"LD_LIBRARY_PATH=... \n"
<< "Note: After Mac OS 10.11, using the "
"DYLD_LIBRARY_PATH is impossible "
<< "unless System Integrity Protection (SIP) "
"is disabled.";
PADDLE_ENFORCE(nullptr != *dso_handle,
"Failed to find dynamic library: %s ( %s ) \n Please specify "
"its path correctly using following ways: \n Method. set "
"environment variable LD_LIBRARY_PATH on Linux or "
"DYLD_LIBRARY_PATH on Mac OS. \n For instance, issue command: "
"export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, "
"using the DYLD_LIBRARY_PATH is impossible unless System "
"Integrity Protection (SIP) is disabled.",
dlPath, dlerror());
}
void GetCublasDsoHandle(void** dso_handle) {
......
......@@ -266,6 +266,29 @@ message PadConfig {
repeated uint32 pad_w = 4;
}
message MultiBoxLossConfig {
required uint32 num_classes = 1;
required float overlap_threshold = 2;
required float neg_pos_ratio = 3;
required float neg_overlap = 4;
required uint32 background_id = 5;
required uint32 input_num = 6;
optional uint32 height = 7 [default = 1];
optional uint32 width = 8 [default = 1];
}
message DetectionOutputConfig {
required uint32 num_classes = 1;
required float nms_threshold = 2;
required uint32 nms_top_k = 3;
required uint32 background_id = 4;
required uint32 input_num = 5;
required uint32 keep_top_k = 6;
required float confidence_threshold = 7;
optional uint32 height = 8 [default = 1];
optional uint32 width = 9 [default = 1];
}
message LayerInputConfig {
required string input_layer_name = 1;
optional string input_parameter_name = 2;
......@@ -284,6 +307,8 @@ message LayerInputConfig {
optional PriorBoxConfig priorbox_conf = 13;
optional PadConfig pad_conf = 14;
optional RowConvConfig row_conv_conf = 15;
optional MultiBoxLossConfig multibox_loss_conf = 16;
optional DetectionOutputConfig detection_output_conf = 17;
}
message LayerConfig {
......
......@@ -1674,6 +1674,52 @@ class PriorBoxLayer(LayerBase):
self.config.size = size
@config_layer('multibox_loss')
class MultiBoxLossLayer(LayerBase):
def __init__(self, name, inputs, input_num, num_classes, overlap_threshold,
neg_pos_ratio, neg_overlap, background_id, **xargs):
super(MultiBoxLossLayer, self).__init__(name, 'multibox_loss', 0,
inputs)
config_assert(
len(inputs) == (input_num * 2 + 2),
'MultiBoxLossLayer does not have enough inputs')
config_assert(num_classes > background_id,
'Classes number must greater than background ID')
self.config.inputs[0].multibox_loss_conf.num_classes = num_classes
self.config.inputs[
0].multibox_loss_conf.overlap_threshold = overlap_threshold
self.config.inputs[0].multibox_loss_conf.neg_pos_ratio = neg_pos_ratio
self.config.inputs[0].multibox_loss_conf.neg_overlap = neg_overlap
self.config.inputs[0].multibox_loss_conf.background_id = background_id
self.config.inputs[0].multibox_loss_conf.input_num = input_num
self.config.size = 1
@config_layer('detection_output')
class DetectionOutputLayer(LayerBase):
def __init__(self, name, inputs, size, input_num, num_classes,
nms_threshold, nms_top_k, keep_top_k, confidence_threshold,
background_id, **xargs):
super(DetectionOutputLayer, self).__init__(name, 'detection_output', 0,
inputs)
config_assert(
len(inputs) == (input_num * 2 + 1),
'DetectionOutputLayer does not have enough inputs')
config_assert(num_classes > background_id,
'Classes number must greater than background ID')
self.config.inputs[0].detection_output_conf.num_classes = num_classes
self.config.inputs[
0].detection_output_conf.nms_threshold = nms_threshold
self.config.inputs[0].detection_output_conf.nms_top_k = nms_top_k
self.config.inputs[0].detection_output_conf.keep_top_k = keep_top_k
self.config.inputs[
0].detection_output_conf.confidence_threshold = confidence_threshold
self.config.inputs[
0].detection_output_conf.background_id = background_id
self.config.inputs[0].detection_output_conf.input_num = input_num
self.config.size = size
@config_layer('data')
class DataLayer(LayerBase):
def __init__(self, name, size, height=None, width=None, device=None):
......
......@@ -115,6 +115,8 @@ __all__ = [
'print_layer',
'priorbox_layer',
'cross_channel_norm_layer',
'multibox_loss_layer',
'detection_output_layer',
'spp_layer',
'pad_layer',
'eos_layer',
......@@ -195,6 +197,8 @@ class LayerType(object):
PRINT_LAYER = 'print'
PRIORBOX_LAYER = 'priorbox'
MULTIBOX_LOSS_LAYER = 'multibox_loss'
DETECTION_OUTPUT_LAYER = 'detection_output'
CTC_LAYER = 'ctc'
WARP_CTC_LAYER = 'warp_ctc'
......@@ -1041,6 +1045,158 @@ def priorbox_layer(input,
size=size)
@wrap_name_default("multibox_loss")
def multibox_loss_layer(input_loc,
input_conf,
priorbox,
label,
num_classes,
overlap_threshold=0.5,
neg_pos_ratio=3.0,
neg_overlap=0.5,
background_id=0,
name=None):
"""
Compute the location loss and the confidence loss for ssd.
:param name: The Layer Name.
:type name: basestring
:param input_loc: The input predict locations.
:type input_loc: LayerOutput | List of LayerOutput
:param input_conf: The input priorbox confidence.
:type input_conf: LayerOutput | List of LayerOutput
:param priorbox: The input priorbox location and the variance.
:type priorbox: LayerOutput
:param label: The input label.
:type label: LayerOutput
:param num_classes: The number of the classification.
:type num_classes: int
:param overlap_threshold: The threshold of the overlap.
:type overlap_threshold: float
:param neg_pos_ratio: The ratio of the negative bbox to the positive bbox.
:type neg_pos_ratio: float
:param neg_overlap: The negative bbox overlap threshold.
:type neg_overlap: float
:param background_id: The background class index.
:type background_id: int
:return: LayerOutput
"""
if isinstance(input_loc, LayerOutput):
input_loc = [input_loc]
assert isinstance(input_loc, collections.Sequence) # list or tuple
for each in input_loc:
assert isinstance(each, LayerOutput)
input_loc_num = len(input_loc)
if isinstance(input_conf, LayerOutput):
input_conf = [input_conf]
assert isinstance(input_conf, collections.Sequence) # list or tuple
for each in input_conf:
assert isinstance(each, LayerOutput)
input_conf_num = len(input_conf)
# Check the input layer number.
assert input_loc_num == input_conf_num
inputs = [priorbox.name, label.name]
inputs.extend([l.name for l in input_loc])
inputs.extend([l.name for l in input_conf])
parents = [priorbox, label]
parents.extend(input_loc)
parents.extend(input_conf)
Layer(
name=name,
type=LayerType.MULTIBOX_LOSS_LAYER,
inputs=inputs,
input_num=input_loc_num,
num_classes=num_classes,
overlap_threshold=overlap_threshold,
neg_pos_ratio=neg_pos_ratio,
neg_overlap=neg_overlap,
background_id=background_id)
return LayerOutput(
name, LayerType.MULTIBOX_LOSS_LAYER, parents=parents, size=1)
@wrap_name_default("detection_output")
def detection_output_layer(input_loc,
input_conf,
priorbox,
num_classes,
nms_threshold=0.45,
nms_top_k=400,
keep_top_k=200,
confidence_threshold=0.01,
background_id=0,
name=None):
"""
Apply the NMS to the output of network and compute the predict bounding
box location.
:param name: The Layer Name.
:type name: basestring
:param input_loc: The input predict locations.
:type input_loc: LayerOutput | List of LayerOutput.
:param input_conf: The input priorbox confidence.
:type input_conf: LayerOutput | List of LayerOutput.
:param priorbox: The input priorbox location and the variance.
:type priorbox: LayerOutput
:param num_classes: The number of the classification.
:type num_classes: int
:param nms_threshold: The Non-maximum suppression threshold.
:type nms_threshold: float
:param nms_top_k: The bbox number kept of the NMS's output
:type nms_top_k: int
:param keep_top_k: The bbox number kept of the layer's output
:type keep_top_k: int
:param confidence_threshold: The classification confidence threshold
:type confidence_threshold: float
:param background_id: The background class index.
:type background_id: int
:return: LayerOutput
"""
if isinstance(input_loc, LayerOutput):
input_loc = [input_loc]
assert isinstance(input_loc, collections.Sequence) # list or tuple
for each in input_loc:
assert isinstance(each, LayerOutput)
input_loc_num = len(input_loc)
if isinstance(input_conf, LayerOutput):
input_conf = [input_conf]
assert isinstance(input_conf, collections.Sequence) # list or tuple
for each in input_conf:
assert isinstance(each, LayerOutput)
input_conf_num = len(input_conf)
# Check the input layer number.
assert input_loc_num == input_conf_num
inputs = [priorbox.name]
inputs.extend([l.name for l in input_loc])
inputs.extend([l.name for l in input_conf])
parents = [priorbox]
parents.extend(input_loc)
parents.extend(input_conf)
size = keep_top_k * 7
Layer(
name=name,
type=LayerType.DETECTION_OUTPUT_LAYER,
inputs=inputs,
size=size,
input_num=input_loc_num,
num_classes=num_classes,
nms_threshold=nms_threshold,
nms_top_k=nms_top_k,
keep_top_k=keep_top_k,
confidence_threshold=confidence_threshold,
background_id=background_id)
return LayerOutput(
name, LayerType.DETECTION_OUTPUT_LAYER, parents=parents, size=size)
@wrap_name_default("cross_channel_norm")
def cross_channel_norm_layer(input, name=None, param_attr=None):
"""
......
......@@ -6,6 +6,6 @@ img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cos
test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv)
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer)
export whole_configs=(test_split_datasource)
type: "nn"
layers {
name: "input_loc"
type: "data"
size: 16
active_type: ""
height: 16
width: 1
}
layers {
name: "input_conf"
type: "data"
size: 8
active_type: ""
height: 1
width: 8
}
layers {
name: "priorbox"
type: "data"
size: 32
active_type: ""
height: 4
width: 8
}
layers {
name: "test_detection_output"
type: "detection_output"
size: 1400
active_type: ""
inputs {
input_layer_name: "priorbox"
detection_output_conf {
num_classes: 21
nms_threshold: 0.45
nms_top_k: 400
background_id: 0
input_num: 1
keep_top_k: 200
confidence_threshold: 0.01
}
}
inputs {
input_layer_name: "input_loc"
}
inputs {
input_layer_name: "input_conf"
}
}
input_layer_names: "priorbox"
input_layer_names: "input_loc"
input_layer_names: "input_conf"
output_layer_names: "test_detection_output"
sub_models {
name: "root"
layer_names: "input_loc"
layer_names: "input_conf"
layer_names: "priorbox"
layer_names: "test_detection_output"
input_layer_names: "priorbox"
input_layer_names: "input_loc"
input_layer_names: "input_conf"
output_layer_names: "test_detection_output"
is_recurrent_layer_group: false
}
type: "nn"
layers {
name: "input_loc"
type: "data"
size: 16
active_type: ""
height: 16
width: 1
}
layers {
name: "input_conf"
type: "data"
size: 8
active_type: ""
height: 1
width: 8
}
layers {
name: "priorbox"
type: "data"
size: 32
active_type: ""
height: 4
width: 8
}
layers {
name: "label"
type: "data"
size: 24
active_type: ""
height: 4
width: 6
}
layers {
name: "test_multibox_loss"
type: "multibox_loss"
size: 1
active_type: ""
inputs {
input_layer_name: "priorbox"
multibox_loss_conf {
num_classes: 21
overlap_threshold: 0.5
neg_pos_ratio: 3.0
neg_overlap: 0.5
background_id: 0
input_num: 1
}
}
inputs {
input_layer_name: "label"
}
inputs {
input_layer_name: "input_loc"
}
inputs {
input_layer_name: "input_conf"
}
}
input_layer_names: "priorbox"
input_layer_names: "label"
input_layer_names: "input_loc"
input_layer_names: "input_conf"
output_layer_names: "test_multibox_loss"
sub_models {
name: "root"
layer_names: "input_loc"
layer_names: "input_conf"
layer_names: "priorbox"
layer_names: "label"
layer_names: "test_multibox_loss"
input_layer_names: "priorbox"
input_layer_names: "label"
input_layer_names: "input_loc"
input_layer_names: "input_conf"
output_layer_names: "test_multibox_loss"
is_recurrent_layer_group: false
}
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
input_loc = data_layer(name='input_loc', size=16, height=16, width=1)
input_conf = data_layer(name='input_conf', size=8, height=1, width=8)
priorbox = data_layer(name='priorbox', size=32, height=4, width=8)
detout = detection_output_layer(
input_loc=input_loc,
input_conf=input_conf,
priorbox=priorbox,
num_classes=21,
nms_threshold=0.45,
nms_top_k=400,
keep_top_k=200,
confidence_threshold=0.01,
background_id=0,
name='test_detection_output')
outputs(detout)
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
input_loc = data_layer(name='input_loc', size=16, height=16, width=1)
input_conf = data_layer(name='input_conf', size=8, height=1, width=8)
priorbox = data_layer(name='priorbox', size=32, height=4, width=8)
label = data_layer(name='label', size=24, height=4, width=6)
multibox_loss = multibox_loss_layer(
input_loc=input_loc,
input_conf=input_conf,
priorbox=priorbox,
label=label,
num_classes=21,
overlap_threshold=0.5,
neg_pos_ratio=3.0,
neg_overlap=0.5,
background_id=0,
name='test_multibox_loss')
outputs(multibox_loss)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册