diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index c7b017bc07b25bc606fd838a5fb9d3715f4faecb..4f4a9187bcbe8ef902e923622552909808b121d6 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -445,6 +445,11 @@ smooth_l1_cost .. autoclass:: paddle.v2.layer.smooth_l1_cost :noindex: +multibox_loss +-------------- +.. autoclass:: paddle.v2.layer.multibox_loss + :noindex: + Check Layer ============ @@ -468,3 +473,11 @@ prelu -------- .. autoclass:: paddle.v2.layer.prelu :noindex: + +Detection output Layer +====================== + +detection_output +---------------- +.. autoclass:: paddle.v2.layer.detection_output + :noindex: diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index baad38e3c1eb3041923e297b2ebae89e68e185a4..dcd70d285174a600b77523b606fbffc832ea68c3 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -2,9 +2,13 @@ cc_library(ddim SRCS ddim.cc) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) +cc_test(tensor_test SRCS tensor_test.cc DEPS ddim) cc_test(variable_test SRCS variable_test.cc) cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) proto_library(attr_type SRCS attr_type.proto) proto_library(op_proto SRCS op_proto.proto DEPS attr_type) cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) + +proto_library(op_desc SRCS op_desc.proto DEPS attr_type) +cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf) diff --git a/paddle/framework/op_desc.proto b/paddle/framework/op_desc.proto new file mode 100644 index 0000000000000000000000000000000000000000..89497f3c16bc28aa93b25a83c1f2eccafdf1c5b4 --- /dev/null +++ b/paddle/framework/op_desc.proto @@ -0,0 +1,56 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +syntax="proto2"; +package paddle.framework; + +import "attr_type.proto"; + +// AttrDesc is used to describe Attributes of an Operator. It contain's +// name, type, and value of Attribute. +// +// e.g, for scale=3.0: name=scala, type=AttrType.FLOAT, value=3.0 +message AttrDesc { + required string name = 1; + required AttrType type = 2; + optional int32 i = 3; + optional float f = 4; + optional string s = 5; + repeated int32 ints = 6; + repeated float floats = 7; + repeated string strings = 8; +}; + +// Protocol Message to describe an Operator. +// +// In PaddlePaddle, Operator is used to do a certain computation such +// as "add", "sub", "cosine", etc. +// (1) Operator needs to know the input and output variable names. +// (2) Some ops may have special attributes such as "scale" in "CosineOp". +// +// 3rd-party language can build this proto message and call +// AddOp(const OpDesc& op_desc) of Paddle core to create an Operator. +message OpDesc { + // input names of this Operator. + repeated string inputs = 1; + + // output names of this Operator. + repeated string outputs = 2; + + // type of this Operator, such as "add", "sub", "fc". + required string type = 3; + + // Attributes of this Operator. e.g., scale=3.0 in cosine op. + repeated AttrDesc attrs = 4; +}; \ No newline at end of file diff --git a/paddle/framework/op_desc_test.cc b/paddle/framework/op_desc_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..d0c52523b64725ee11c281b086f9ffed6a09e787 --- /dev/null +++ b/paddle/framework/op_desc_test.cc @@ -0,0 +1,35 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +TEST(OpDesc, Create) { + paddle::framework::OpDesc op_desc; + op_desc.set_type("add"); + op_desc.add_inputs("X"); + op_desc.add_inputs("Y"); + op_desc.add_outputs("Z"); + + auto attr = op_desc.mutable_attrs()->Add(); + attr->set_type(paddle::framework::AttrType::FLOAT); + attr->set_f(3.14); + + // required field name is not set, so IsInitialized should be false. + ASSERT_FALSE(op_desc.IsInitialized()); + + attr->set_name("add"); + // after all required fields are set, IsInitialized should be true now. + ASSERT_TRUE(op_desc.IsInitialized()); +} \ No newline at end of file diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 067f2a85264b462e96b65946b60b046172765a1d..ce5d98b04e6b53fcedc4fc4610d9390e64846b2a 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -14,33 +14,39 @@ limitations under the License. */ #pragma once +#include +#include +#include "paddle/framework/ddim.h" +#include "paddle/framework/enforce.h" +#include "paddle/memory/memory.h" +#include "paddle/platform/place.h" + namespace paddle { namespace framework { class Tensor { - using paddle::platform::Place; - using paddle::platform::get_place; - public: template const T* data() const { - PADDLE_ASSERT(holder_ != nullptr, - "Tensor::data must be called after Tensor::mutable_data"); - return static_cast(holder->Ptr()); + PADDLE_ENFORCE(holder_ != nullptr, + "Tensor::data must be called after Tensor::mutable_data."); + return static_cast(holder_->Ptr()); } template ::value>::type> - T* mutable_data(DDim dims, Place place) { - if (holder_ == nullptr || holder_->Place() != place || - holder_->Size() < dims.product() * sizeof(T)) { - holder_.reset(new PlaceholderImpl(place, dims.product() * sizeof(T))); + typename std::enable_if::value>::type* = nullptr> + T* mutable_data(DDim dims, paddle::platform::Place place) { + if (holder_ == nullptr || + !(holder_->Place() == + place) /* some versions of boost::variant don't have operator!= */ + || holder_->Size() < product(dims) * sizeof(T)) { + holder_.reset(new PlaceholderImpl(place, product(dims) * sizeof(T))); } return static_cast(holder_->Ptr()); } template ::value>::type> + typename std::enable_if::value>::type* = nullptr> T* mutable_data(DDim dims) { return mutable_data(dims, paddle::platform::get_place()); } @@ -51,27 +57,41 @@ class Tensor { struct Placeholder { virtual ~Placeholder() {} virtual void* Ptr() const = 0; - virtual Place Place() const = 0; + virtual paddle::platform::Place Place() const = 0; virtual size_t Size() const = 0; }; template struct PlaceholderImpl : public Placeholder { - PlaceholderImpl(Place pl, size_t size) - : ptr_(paddle::memory::Alloc(pl, size), paddle::memory::Deleter(pl)), - place_(pl), + private: + class Deleter { + public: + Deleter(platform::Place place) : place_(place) {} + void operator()(T* ptr) { + paddle::memory::Free(place_, static_cast(ptr)); + } + + private: + paddle::platform::Place place_; + }; + + public: + PlaceholderImpl(paddle::platform::Place place, size_t size) + : ptr_(static_cast(paddle::memory::Alloc(place, size)), + Deleter(place)), + place_(place), size_(size) {} virtual void* Ptr() const { return static_cast(ptr_.get()); } virtual size_t Size() const { return size_; } - virtual Place Place() const { return place_; } + virtual paddle::platform::Place Place() const { return place_; } - std::unique_ptr ptr_; - Place place_; // record the place of ptr_. - size_t size_; // size of the memory block. + std::unique_ptr ptr_; + paddle::platform::Place place_; // record the place of ptr_. + size_t size_; // size of the memory block. }; - std::unique_ptr holder_; // holds the memory block if allocated. + std::shared_ptr holder_; // holds the memory block if allocated. }; } // namespace framework diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..727d81f8d72e39ec564c42a48bf7ff64204adfff --- /dev/null +++ b/paddle/framework/tensor_test.cc @@ -0,0 +1,85 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "paddle/framework/tensor.h" +#include +#include + +TEST(Tensor, ASSERT) { + paddle::framework::Tensor cpu_tensor; + + bool caught = false; + try { + const double* p __attribute__((unused)) = cpu_tensor.data(); + } catch (paddle::framework::EnforceNotMet err) { + caught = true; + std::string msg = "Tensor::data must be called after Tensor::mutable_data."; + const char* what = err.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); + } + } + ASSERT_TRUE(caught); +} + +/* mutable_data() is not tested at present + because Memory::Alloc() and Memory::Free() have not been ready. + +TEST(Tensor, MutableData) { + using namespace paddle::framework; + using namespace paddle::platform; + { + Tensor cpu_tensor; + float* p1 = nullptr; + float* p2 = nullptr; + // initialization + p1 = cpu_tensor.mutable_data(make_ddim({1, 2, 3}), CPUPlace()); + EXPECT_NE(p1, nullptr); + // set cpu_tensor a new dim with large size + // momery is supposed to be re-allocated + p2 = cpu_tensor.mutable_data(make_ddim({3, 4})); + EXPECT_NE(p2, nullptr); + EXPECT_NE(p1, p2); + // set cpu_tensor a new dim with same size + // momery block is supposed to be unchanged + p1 = cpu_tensor.mutable_data(make_ddim({2, 2, 3})); + EXPECT_EQ(p1, p2); + // set cpu_tensor a new dim with smaller size + // momery block is supposed to be unchanged + p2 = cpu_tensor.mutable_data(make_ddim({2, 2})); + EXPECT_EQ(p1, p2); + } + + { + Tensor gpu_tensor; + float* p1 = nullptr; + float* p2 = nullptr; + // initialization + p1 = gpu_tensor.mutable_data(make_ddim({1, 2, 3}), GPUPlace()); + EXPECT_NE(p1, nullptr); + // set gpu_tensor a new dim with large size + // momery is supposed to be re-allocated + p2 = gpu_tensor.mutable_data(make_ddim({3, 4})); + EXPECT_NE(p2, nullptr); + EXPECT_NE(p1, p2); + // set gpu_tensor a new dim with same size + // momery block is supposed to be unchanged + p1 = gpu_tensor.mutable_data(make_ddim({2, 2, 3})); + EXPECT_EQ(p1, p2); + // set gpu_tensor a new dim with smaller size + // momery block is supposed to be unchanged + p2 = gpu_tensor.mutable_data(make_ddim({2, 2})); + EXPECT_EQ(p1, p2); + } +} +*/ diff --git a/paddle/gserver/layers/DetectionOutputLayer.cpp b/paddle/gserver/layers/DetectionOutputLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8ab838e191314ab25469631626c0b0564d7fffda --- /dev/null +++ b/paddle/gserver/layers/DetectionOutputLayer.cpp @@ -0,0 +1,154 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "DetectionOutputLayer.h" + +namespace paddle { + +REGISTER_LAYER(detection_output, DetectionOutputLayer); + +bool DetectionOutputLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + auto& layerConf = config_.inputs(0).detection_output_conf(); + numClasses_ = layerConf.num_classes(); + inputNum_ = layerConf.input_num(); + nmsThreshold_ = layerConf.nms_threshold(); + confidenceThreshold_ = layerConf.confidence_threshold(); + nmsTopK_ = layerConf.nms_top_k(); + keepTopK_ = layerConf.keep_top_k(); + backgroundId_ = layerConf.background_id(); + return true; +} + +void DetectionOutputLayer::forward(PassType passType) { + Layer::forward(passType); + size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight(); + + locSizeSum_ = 0; + confSizeSum_ = 0; + for (size_t n = 0; n < inputNum_; ++n) { + const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n)); + const MatrixPtr inConf = getInputValue(*getConfInputLayer(n)); + locSizeSum_ += inLoc->getElementCnt(); + confSizeSum_ += inConf->getElementCnt(); + } + + Matrix::resizeOrCreate(locTmpBuffer_, 1, locSizeSum_, false, useGpu_); + Matrix::resizeOrCreate( + confTmpBuffer_, confSizeSum_ / numClasses_, numClasses_, false, useGpu_); + + size_t locOffset = 0; + size_t confOffset = 0; + auto& layerConf = config_.inputs(0).detection_output_conf(); + for (size_t n = 0; n < inputNum_; ++n) { + const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n)); + const MatrixPtr inConf = getInputValue(*getConfInputLayer(n)); + + size_t height = getInput(*getLocInputLayer(n)).getFrameHeight(); + if (!height) height = layerConf.height(); + size_t width = getInput(*getLocInputLayer(n)).getFrameWidth(); + if (!width) width = layerConf.width(); + locOffset += appendWithPermute(*inLoc, + height, + width, + locSizeSum_, + locOffset, + batchSize, + *locTmpBuffer_, + kNCHWToNHWC); + confOffset += appendWithPermute(*inConf, + height, + width, + confSizeSum_, + confOffset, + batchSize, + *confTmpBuffer_, + kNCHWToNHWC); + } + CHECK_EQ(locOffset, locSizeSum_ / batchSize); + CHECK_EQ(confOffset, confSizeSum_ / batchSize); + + MatrixPtr priorValue; + if (useGpu_) { + Matrix::resizeOrCreate(locCpuBuffer_, 1, locSizeSum_, false, false); + Matrix::resizeOrCreate( + confCpuBuffer_, confSizeSum_ / numClasses_, numClasses_, false, false); + MatrixPtr priorTmpValue = getInputValue(*getPriorBoxLayer()); + Matrix::resizeOrCreate( + priorCpuValue_, 1, priorTmpValue->getElementCnt(), false, false); + + locCpuBuffer_->copyFrom(*locTmpBuffer_); + confCpuBuffer_->copyFrom(*confTmpBuffer_); + priorCpuValue_->copyFrom(*priorTmpValue); + + locBuffer_ = locCpuBuffer_; + confBuffer_ = confCpuBuffer_; + priorValue = priorCpuValue_; + } else { + priorValue = getInputValue(*getPriorBoxLayer()); + locBuffer_ = locTmpBuffer_; + confBuffer_ = confTmpBuffer_; + } + confBuffer_->softmax(*confBuffer_); + + size_t numPriors = priorValue->getElementCnt() / 8; + std::vector> allDecodedBBoxes; + for (size_t n = 0; n < batchSize; ++n) { + std::vector decodedBBoxes; + for (size_t i = 0; i < numPriors; ++i) { + size_t priorOffset = i * 8; + size_t locPredOffset = n * numPriors * 4 + i * 4; + std::vector priorBBoxVec; + getBBoxFromPriorData( + priorValue->getData() + priorOffset, 1, priorBBoxVec); + std::vector> priorBBoxVar; + getBBoxVarFromPriorData( + priorValue->getData() + priorOffset, 1, priorBBoxVar); + std::vector locPredData; + for (size_t j = 0; j < 4; ++j) + locPredData.push_back(*(locBuffer_->getData() + locPredOffset + j)); + NormalizedBBox bbox = + decodeBBoxWithVar(priorBBoxVec[0], priorBBoxVar[0], locPredData); + decodedBBoxes.push_back(bbox); + } + allDecodedBBoxes.push_back(decodedBBoxes); + } + + std::vector>> allIndices; + size_t numKept = getDetectionIndices(confBuffer_->getData(), + numPriors, + numClasses_, + backgroundId_, + batchSize, + confidenceThreshold_, + nmsTopK_, + nmsThreshold_, + keepTopK_, + allDecodedBBoxes, + &allIndices); + + resetOutput(numKept, 7); + MatrixPtr outV = getOutputValue(); + getDetectionOutput(confBuffer_->getData(), + numKept, + numPriors, + numClasses_, + batchSize, + allIndices, + allDecodedBBoxes, + *outV); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/DetectionOutputLayer.h b/paddle/gserver/layers/DetectionOutputLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..a232af0a69141e238ae38c519a204cce25e9598d --- /dev/null +++ b/paddle/gserver/layers/DetectionOutputLayer.h @@ -0,0 +1,77 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "DetectionUtil.h" +#include "Layer.h" + +namespace paddle { + +/** + * The detection output layer for a SSD detection task. This layer applies the + * Non-maximum suppression to the all predicted bounding box and keeps the + * Top-K bounding boxes. + * - Input: This layer needs three input layers: The first input layer + * is the priorbox layer. The rest two input layers are convolution + * layers for generating bbox location offset and the classification + * confidence. + * - Output: The predict bounding box locations. + */ + +class DetectionOutputLayer : public Layer { +public: + explicit DetectionOutputLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + void forward(PassType passType); + + void backward(const UpdateCallback& callback = nullptr) {} + +protected: + inline LayerPtr getPriorBoxLayer() { return inputLayers_[0]; } + + inline LayerPtr getLocInputLayer(size_t index) { + return inputLayers_[1 + index]; + } + + inline LayerPtr getConfInputLayer(size_t index) { + return inputLayers_[1 + inputNum_ + index]; + } + +private: + size_t numClasses_; // number of classes + size_t inputNum_; // number of input layers + real nmsThreshold_; + real confidenceThreshold_; + size_t nmsTopK_; + size_t keepTopK_; + size_t backgroundId_; + + size_t locSizeSum_; + size_t confSizeSum_; + + MatrixPtr locBuffer_; + MatrixPtr confBuffer_; + MatrixPtr locTmpBuffer_; + MatrixPtr confTmpBuffer_; + MatrixPtr priorCpuValue_; + MatrixPtr locCpuBuffer_; + MatrixPtr confCpuBuffer_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MultiBoxLossLayer.cpp b/paddle/gserver/layers/MultiBoxLossLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bbf1166dceddb1dbd672c42e8af22c113d2e3f54 --- /dev/null +++ b/paddle/gserver/layers/MultiBoxLossLayer.cpp @@ -0,0 +1,376 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MultiBoxLossLayer.h" +#include +#include +#include "DataLayer.h" + +namespace paddle { + +REGISTER_LAYER(multibox_loss, MultiBoxLossLayer); + +bool MultiBoxLossLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + auto layerConf = config_.inputs(0).multibox_loss_conf(); + numClasses_ = layerConf.num_classes(); + inputNum_ = layerConf.input_num(); + overlapThreshold_ = layerConf.overlap_threshold(); + negPosRatio_ = layerConf.neg_pos_ratio(); + negOverlap_ = layerConf.neg_overlap(); + backgroundId_ = layerConf.background_id(); + return true; +} + +void MultiBoxLossLayer::forward(PassType passType) { + Layer::forward(passType); + size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight(); + resetOutput(batchSize, 1); + + // all location data and confidence score data + locSizeSum_ = 0; + confSizeSum_ = 0; + for (size_t n = 0; n < inputNum_; ++n) { + const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n)); + const MatrixPtr inConf = getInputValue(*getConfInputLayer(n)); + locSizeSum_ += inLoc->getElementCnt(); + confSizeSum_ += inConf->getElementCnt(); + } + + // locBuffer layout: + // | xmin1 | ymin1 | xmax1 | ymax1 | xmin2 ...... + Matrix::resizeOrCreate(locTmpBuffer_, 1, locSizeSum_, false, useGpu_); + locBuffer_ = locTmpBuffer_; + + // confBuffer layout: + // | class1 score | class2 score | ... |classN score | class1 score | ...... + Matrix::resizeOrCreate(confTmpBuffer_, 1, confSizeSum_, false, useGpu_); + confBuffer_ = confTmpBuffer_; + + // concate location data and confidence score data + size_t locOffset = 0; + size_t confOffset = 0; + auto& layerConf = config_.inputs(0).multibox_loss_conf(); + for (size_t n = 0; n < inputNum_; ++n) { + const MatrixPtr inLoc = getInputValue(*getLocInputLayer(n)); + const MatrixPtr inConf = getInputValue(*getConfInputLayer(n)); + size_t height = getInput(*getLocInputLayer(n)).getFrameHeight(); + if (!height) height = layerConf.height(); + size_t width = getInput(*getLocInputLayer(n)).getFrameWidth(); + if (!width) width = layerConf.width(); + locOffset += appendWithPermute(*inLoc, + height, + width, + locSizeSum_, + locOffset, + batchSize, + *locBuffer_, + kNCHWToNHWC); + confOffset += appendWithPermute(*inConf, + height, + width, + confSizeSum_, + confOffset, + batchSize, + *confBuffer_, + kNCHWToNHWC); + } + CHECK_EQ(locOffset, locSizeSum_ / batchSize); + CHECK_EQ(confOffset, confSizeSum_ / batchSize); + + // priorValue layout: + // | xmin1 | ymin1 | xmax1 | ymax1 | xmin1Var | ymin1Var | xmax1Var | ymax1Var + // | xmin2 | ...... + MatrixPtr priorValue; + + // labelValue layout: + // | class1_1 | xmin1_1 | ymin1_1 | xmax1_1 | ymax1_1 | difficult1_1 | ...... + MatrixPtr labelValue; + + // Copy data from GPU to CPU if use GPU + if (useGpu_) { + Matrix::resizeOrCreate(locCpuBuffer_, 1, locSizeSum_, false, false); + Matrix::resizeOrCreate(confCpuBuffer_, 1, confSizeSum_, false, false); + MatrixPtr priorTmpValue = getInputValue(*getPriorBoxLayer()); + Matrix::resizeOrCreate( + priorCpuValue_, 1, priorTmpValue->getElementCnt(), false, false); + MatrixPtr labelTmpValue = getInputValue(*getLabelLayer()); + Matrix::resizeOrCreate(labelCpuValue_, + labelTmpValue->getHeight(), + labelTmpValue->getWidth(), + false, + false); + + locCpuBuffer_->copyFrom(*locTmpBuffer_); + confCpuBuffer_->copyFrom(*confTmpBuffer_); + priorCpuValue_->copyFrom(*priorTmpValue); + labelCpuValue_->copyFrom(*labelTmpValue); + + locBuffer_ = locCpuBuffer_; + confBuffer_ = confCpuBuffer_; + priorValue = priorCpuValue_; + labelValue = labelCpuValue_; + } else { + priorValue = getInputValue(*getPriorBoxLayer()); + labelValue = getInputValue(*getLabelLayer()); + } + + // Get max scores for each prior bbox. Used in negative mining + std::vector> allMaxConfScore; + numPriors_ = priorValue->getElementCnt() / 8; + getMaxConfidenceScores(confBuffer_->getData(), + batchSize, + numPriors_, + numClasses_, + backgroundId_, + &allMaxConfScore); + + // Match prior bbox to groundtruth bbox + Argument label = getInput(*getLabelLayer()); + const int* labelIndex = label.sequenceStartPositions->getData(false); + size_t seqNum = label.getNumSequences(); + numMatches_ = 0; + numNegs_ = 0; + allMatchIndices_.clear(); + allNegIndices_.clear(); + + std::pair retPair = generateMatchIndices(*priorValue, + numPriors_, + *labelValue, + labelIndex, + seqNum, + allMaxConfScore, + batchSize, + overlapThreshold_, + negOverlap_, + negPosRatio_, + &allMatchIndices_, + &allNegIndices_); + numMatches_ = retPair.first; + numNegs_ = retPair.second; + + // BBox location L1 smooth loss + locLoss_ = 0.0; + if (numMatches_ >= 1) { + size_t count = 0; + MatrixPtr locLossOutput; + Matrix::resizeOrCreate(locLossOutput, numMatches_ * 4, 1, false, false); + Matrix::resizeOrCreate(locGTData_, numMatches_ * 4, 1, false, false); + Matrix::resizeOrCreate(locDiff_, numMatches_ * 4, 1, false, false); + locDiff_->zeroMem(); + std::vector locGTData; + + real* locDiffData = locDiff_->getData(); + const real* locBufferData = locBuffer_->getData(); + for (size_t n = 0; n < batchSize; ++n) { + for (size_t i = 0; i < numPriors_; ++i) { + if (allMatchIndices_[n][i] == -1) continue; // match none + size_t locOffset = + n * (locBuffer_->getElementCnt() / batchSize) + i * 4; + std::copy(locBufferData + locOffset, + locBufferData + locOffset + 4, + locDiffData + count); + count += 4; + const int gtIdx = allMatchIndices_[n][i]; + size_t priorOffset = i * 8; + std::vector priorBBoxVec; + getBBoxFromPriorData( + priorValue->getData() + priorOffset, 1, priorBBoxVec); + std::vector> priorBBoxVar; + getBBoxVarFromPriorData( + priorValue->getData() + priorOffset, 1, priorBBoxVar); + size_t labelOffset = (labelIndex[n] + gtIdx) * 6; + std::vector gtBBoxVec; + getBBoxFromLabelData(labelValue->getData() + labelOffset, 1, gtBBoxVec); + std::vector gtEncode; + encodeBBoxWithVar( + priorBBoxVec[0], priorBBoxVar[0], gtBBoxVec[0], gtEncode); + locGTData.insert(locGTData.end(), gtEncode.begin(), gtEncode.end()); + } + } + locGTData_->copyFrom(&locGTData[0], numMatches_ * 4); + locLossOutput->smoothL1(*locDiff_, *locGTData_, 0.0); + locLoss_ = locLossOutput->getSum() / numMatches_; + } + + // BBox confidence softmax loss + confLoss_ = 0; + numConf_ = numMatches_ + numNegs_; + if (numConf_ >= 1) { + Matrix::resizeOrCreate(confProb_, numConf_, numClasses_, false, false); + IVector::resizeOrCreate(confGTData_, numConf_, false); + confProb_->zeroMem(); + size_t count = 0; + + std::vector confPredData; + real* confProbData = confProb_->getData(); + const real* confBufferData = confBuffer_->getData(); + for (size_t n = 0; n < batchSize; ++n) { + for (size_t i = 0; i < numPriors_; ++i) { + if (allMatchIndices_[n][i] == -1) continue; + size_t labelOffset = (labelIndex[n] + allMatchIndices_[n][i]) * 6; + const int gtLabel = (labelValue->getData() + labelOffset)[0]; + confGTData_->getData()[count] = gtLabel; + size_t confOffset = n * numPriors_ * numClasses_ + i * numClasses_; + std::copy(confBufferData + confOffset, + confBufferData + confOffset + numClasses_, + confProbData + count * numClasses_); + confPredData.reserve(confPredData.size() + numClasses_); + confPredData.insert(confPredData.end(), + confBufferData + confOffset, + confBufferData + confOffset + numClasses_); + ++count; + } + // Negative mining samples + for (size_t i = 0; i < allNegIndices_[n].size(); ++i) { + confGTData_->getData()[count] = backgroundId_; + size_t confOffset = + n * numPriors_ * numClasses_ + allNegIndices_[n][i] * numClasses_; + std::copy(confBufferData + confOffset, + confBufferData + confOffset + numClasses_, + confProbData + count * numClasses_); + confPredData.reserve(confPredData.size() + numClasses_); + confPredData.insert(confPredData.end(), + confBufferData + confOffset, + confBufferData + confOffset + numClasses_); + ++count; + } + } + CHECK_EQ(numConf_, count); + confProb_->softmax(*confProb_); + MatrixPtr confLossOutput; + Matrix::resizeOrCreate(confLossOutput, numConf_, 1, false, false); + confLossOutput->oneHotCrossEntropy(*confProb_, *confGTData_); + confLoss_ = confLossOutput->getSum() / numMatches_; + } + real loss = locLoss_ + confLoss_; + MatrixPtr outV = getOutputValue(); + outV->assign(loss); +} + +void MultiBoxLossLayer::backward(const UpdateCallback& callback) { + size_t batchSize = getInputValue(*getLocInputLayer(0))->getHeight(); + locBuffer_->zeroMem(); + confBuffer_->zeroMem(); + + // Back propagate on location prediction + if (numMatches_ >= 1) { + MatrixPtr locDiffBuffer; + Matrix::resizeOrCreate(locDiffBuffer, numMatches_ * 4, 1, false, false); + locDiffBuffer->smoothL1Bp(*locDiff_, *locGTData_, 0.0); + locDiff_->copyFrom(*locDiffBuffer); + // scale gradient + for (size_t i = 0; i < numMatches_ * 4; ++i) + locDiff_->getData()[i] *= (1. / numMatches_); + // Copy gradient back + size_t count = 0; + const real* locDiffData = locDiff_->getData(); + for (size_t n = 0; n < batchSize; ++n) { + for (size_t i = 0; i < numPriors_; ++i) { + if (allMatchIndices_[n][i] == -1) continue; + real* locBufferData = + locBuffer_->getData() + n * numPriors_ * 4 + i * 4; + std::copy(locDiffData + count * 4, + locDiffData + (count + 1) * 4, + locBufferData); + ++count; + } + } + CHECK_EQ(count, numMatches_); + } + + if (numConf_ >= 1) { + for (size_t i = 0; i < numConf_; ++i) + confProb_->getData()[i * numClasses_ + confGTData_->getData()[i]] -= 1; + for (size_t i = 0; i < numConf_ * numClasses_; ++i) + confProb_->getData()[i] *= (1. / numMatches_); + size_t count = 0; + const real* confProbData = confProb_->getData(); + for (size_t n = 0; n < batchSize; ++n) { + for (size_t i = 0; i < numPriors_; ++i) { + if (allMatchIndices_[n][i] == -1) continue; + real* confDiffData = confBuffer_->getData() + + n * numPriors_ * numClasses_ + i * numClasses_; + std::copy(confProbData + count * numClasses_, + confProbData + (count + 1) * numClasses_, + confDiffData); + ++count; + } + for (size_t i = 0; i < allNegIndices_[n].size(); ++i) { + int idx = allNegIndices_[n][i]; + real* confDiffData = confBuffer_->getData() + + n * numPriors_ * numClasses_ + idx * numClasses_; + std::copy(confProbData + count * numClasses_, + confProbData + (count + 1) * numClasses_, + confDiffData); + ++count; + } + } + CHECK_EQ(count, numConf_); + } + if (useGpu_) { + locTmpBuffer_->copyFrom(*locCpuBuffer_); + confTmpBuffer_->copyFrom(*confCpuBuffer_); + locBuffer_ = locTmpBuffer_; + confBuffer_ = confTmpBuffer_; + } + // copy back + size_t locOffset = 0; + size_t confOffset = 0; + auto layerConf = config_.inputs(0).multibox_loss_conf(); + for (size_t n = 0; n < inputNum_; ++n) { + const MatrixPtr inLocG = getInputGrad(*getLocInputLayer(n)); + const MatrixPtr inConfG = getInputGrad(*getConfInputLayer(n)); + size_t height = getInput(*getLocInputLayer(n)).getFrameHeight(); + // only for unittest, there are no width and height information + // when constructing matrix in unittest, so we should + // set the shape in configuration + if (!height) height = layerConf.height(); + size_t width = getInput(*getLocInputLayer(n)).getFrameWidth(); + if (!width) width = layerConf.width(); + + // NHWC to NCHW + MatrixPtr locGBuffer; + Matrix::resizeOrCreate( + locGBuffer, inLocG->getHeight(), inLocG->getWidth(), false, useGpu_); + MatrixPtr confGBuffer; + Matrix::resizeOrCreate( + confGBuffer, inConfG->getHeight(), inConfG->getWidth(), false, useGpu_); + + locOffset += decomposeWithPermute(*locBuffer_, + height, + width, + locSizeSum_, + locOffset, + batchSize, + *locGBuffer, + kNHWCToNCHW); + inLocG->add(*locGBuffer); + confOffset += decomposeWithPermute(*confBuffer_, + height, + width, + confSizeSum_, + confOffset, + batchSize, + *confGBuffer, + kNHWCToNCHW); + inConfG->add(*confGBuffer); + } + CHECK_EQ(locOffset, locSizeSum_ / batchSize); + CHECK_EQ(confOffset, confSizeSum_ / batchSize); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/MultiBoxLossLayer.h b/paddle/gserver/layers/MultiBoxLossLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..9935da56446c1508549906becfd28548d5deecde --- /dev/null +++ b/paddle/gserver/layers/MultiBoxLossLayer.h @@ -0,0 +1,103 @@ +/* copyright (c) 2016 paddlepaddle authors. all rights reserve. + +licensed under the apache license, version 2.0 (the "license"); +you may not use this file except in compliance with the license. +you may obtain a copy of the license at + + http://www.apache.org/licenses/license-2.0 + +unless required by applicable law or agreed to in writing, software +distributed under the license is distributed on an "as is" basis, +without warranties or conditions of any kind, either express or implied. +see the license for the specific language governing permissions and +limitations under the license. */ + +#pragma once + +#include +#include "CostLayer.h" +#include "DataLayer.h" +#include "DetectionUtil.h" +#include "Layer.h" + +using std::vector; +using std::pair; + +namespace paddle { + +/** + * The multibox loss layer for a SSD detection task. + * The loss is composed by the location loss and the confidence loss. + * The location loss is a smooth L1 loss and the confidence loss is + * a softmax loss. + * - Input: This layer needs four input layers: The first input layer + * is the priorbox layer and the second layer is a label layer. + * The rest two input layers are convolution layers for generating + * bbox location offset and the classification confidence. + * - Output: The Single Shot Multibox Detection loss value. + * Reference: + * Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, + * Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector + */ + +class MultiBoxLossLayer : public CostLayer { +public: + explicit MultiBoxLossLayer(const LayerConfig& config) : CostLayer(config) {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + void forward(PassType passType); + + void backward(const UpdateCallback& callback = nullptr); + + void forwardImp(Matrix& output, Argument& label, Matrix& cost) {} + + void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad) {} + +protected: + inline LayerPtr getPriorBoxLayer() { return inputLayers_[0]; } + inline LayerPtr getLabelLayer() { return inputLayers_[1]; } + inline LayerPtr getLocInputLayer(size_t index) { + return inputLayers_[2 + index]; + } + inline LayerPtr getConfInputLayer(size_t index) { + return inputLayers_[2 + inputNum_ + index]; + } + +protected: + size_t numClasses_; + real overlapThreshold_; + real negPosRatio_; + real negOverlap_; + size_t inputNum_; + size_t backgroundId_; + + real locLoss_; + real confLoss_; + + size_t numPriors_; + size_t numMatches_; + size_t numNegs_; + size_t numConf_; + size_t locSizeSum_; + size_t confSizeSum_; + + vector> allMatchIndices_; + vector> allNegIndices_; + MatrixPtr locGTData_; + IVectorPtr confGTData_; + + MatrixPtr locBuffer_; + MatrixPtr confBuffer_; + MatrixPtr locDiff_; + MatrixPtr confProb_; + + MatrixPtr labelCpuValue_; + MatrixPtr priorCpuValue_; + MatrixPtr locCpuBuffer_; + MatrixPtr confCpuBuffer_; + MatrixPtr locTmpBuffer_; + MatrixPtr confTmpBuffer_; +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 3c4128b5b8a0ea420bd3027b9a36e5f75087c3cb..92f6cbcfe5a0e23c5939b1689a3e339367450387 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -45,6 +45,13 @@ add_unittest_without_exec(test_PriorBox add_test(NAME test_PriorBox COMMAND test_PriorBox) +################# test_DetectionOutput ####################### +add_unittest_without_exec(test_DetectionOutput + test_DetectionOutput.cpp + LayerGradUtil.cpp) + +add_test(NAME test_DetectionOutput + COMMAND test_DetectionOutput) ################# test_ConvUnify ####################### add_unittest_without_exec(test_ConvUnify test_ConvUnify.cpp diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index a0b1cd471dd02fd20bb2247395bdb74651610bbf..e3591ba4df88f547e48bf07d4339d5f25db95e81 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -387,6 +387,31 @@ void initDataLayer(TestConfig testConf, data.value->sigmoid(*data.value); data.grad->zeroMem(); break; + case INPUT_SELF_DEFINE_DATA: { + size_t height = testConf.inputDefs[i].selfDefinedData->getHeight(); + size_t width = testConf.inputDefs[i].selfDefinedData->getWidth(); + CHECK_GT(static_cast(height), 0); + CHECK_GT(static_cast(width), 0); + data.value = Matrix::create(height, width, false, useGpu); + data.grad = Matrix::create(height, width, false, useGpu); + data.value->copyFrom(*testConf.inputDefs[i].selfDefinedData); + data.grad->zeroMem(); + + const std::vector& labelSeqStartPositions = + testConf.inputDefs[i].labelSeqStartPositions; + if (labelSeqStartPositions.size() != 0) { + CHECK(!sequenceStartPositions); + CHECK_GE(static_cast(labelSeqStartPositions.size()), 2); + + sequenceStartPositions = + ICpuGpuVector::create(labelSeqStartPositions.size(), useGpu); + sequenceStartPositions->copyFrom(labelSeqStartPositions.data(), + labelSeqStartPositions.size(), + useGpu); + data.sequenceStartPositions = sequenceStartPositions; + } + break; + } default: LOG(FATAL) << " unknown inputType "; return; diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index 9f68eb64d0b4ad27306d3b20387d74a7e438d910..18a6525a145fbf7539e8e84bd162a3b4345394dc 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -31,7 +31,8 @@ enum InputType { INPUT_SEQUENCE_LABEL, INPUT_SPARSE_NON_VALUE_DATA, INPUT_SPARSE_FLOAT_VALUE_DATA, - INPUT_DENSE_DIM_DATA, // using sequence length to init dense data + INPUT_DENSE_DIM_DATA, // using sequence length to init dense data + INPUT_SELF_DEFINE_DATA, // support customizing for input value }; struct ParaSparse { @@ -66,6 +67,7 @@ struct InputDef { bool isStatic; std::vector labelInitValue; std::vector labelSeqStartPositions; + MatrixPtr selfDefinedData; InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) { inputType = type; @@ -76,6 +78,20 @@ struct InputDef { isStatic = false; } + InputDef(InputType type, + string nameIn, + MatrixPtr selfDefinedData, + std::vector selfDefinedSeqStartPos = {}) + : labelSeqStartPositions(selfDefinedSeqStartPos), + selfDefinedData(selfDefinedData) { + inputType = type; + name = nameIn; + dim = 0; + sparse = {""}; + paraSize = 0; + isStatic = false; + } + InputDef(InputType type, string nameIn, size_t dimIn, diff --git a/paddle/gserver/tests/test_DetectionOutput.cpp b/paddle/gserver/tests/test_DetectionOutput.cpp new file mode 100644 index 0000000000000000000000000000000000000000..af43dc51fad35c834635b543b1a016f6d717de1e --- /dev/null +++ b/paddle/gserver/tests/test_DetectionOutput.cpp @@ -0,0 +1,194 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include + +#include "LayerGradUtil.h" +#include "paddle/testing/TestUtil.h" + +using namespace paddle; // NOLINT +using namespace std; // NOLINT + +// Do one forward pass of priorBox layer and check to see if its output +// matches the given result +void doOneDetectionOutputTest(MatrixPtr& inputLoc, + MatrixPtr& inputConf, + MatrixPtr& inputPriorBox, + size_t feature_map_width, + size_t feature_map_height, + real nms_threshold, + bool use_gpu, + MatrixPtr& result) { + // Setting up the detection output layer + TestConfig configt; + configt.layerConfig.set_type("detection_output"); + LayerInputConfig* input = configt.layerConfig.add_inputs(); + configt.layerConfig.add_inputs(); + configt.layerConfig.add_inputs(); + + DetectionOutputConfig* detOutput = input->mutable_detection_output_conf(); + detOutput->set_width(feature_map_width); + detOutput->set_height(feature_map_height); + detOutput->set_nms_threshold(nms_threshold); + detOutput->set_num_classes(2); + detOutput->set_nms_top_k(20); + detOutput->set_keep_top_k(10); + detOutput->set_background_id(0); + detOutput->set_confidence_threshold(0.01); + detOutput->set_input_num(1); + configt.inputDefs.push_back({INPUT_DATA_TARGET, "priorbox", 32, 0}); + configt.inputDefs.push_back({INPUT_DATA, "input_loc", 16, 0}); + configt.inputDefs.push_back({INPUT_DATA, "input_conf", 8, 0}); + + // data layer initialize + std::vector dataLayers; + LayerMap layerMap; + vector datas; + initDataLayer( + configt, &dataLayers, &datas, &layerMap, "priorbox", 1, false, use_gpu); + + dataLayers[0]->getOutputValue()->copyFrom(*inputPriorBox); + dataLayers[1]->getOutputValue()->copyFrom(*inputLoc); + dataLayers[2]->getOutputValue()->copyFrom(*inputConf); + + // test layer initialize + bool store_FLAGS_use_gpu = FLAGS_use_gpu; + FLAGS_use_gpu = use_gpu; + std::vector parameters; + LayerPtr detectionOutputLayer; + initTestLayer(configt, &layerMap, ¶meters, &detectionOutputLayer); + FLAGS_use_gpu = store_FLAGS_use_gpu; + detectionOutputLayer->forward(PASS_GC); + checkMatrixEqual(detectionOutputLayer->getOutputValue(), result); +} + +TEST(Layer, detectionOutputLayerFwd) { + bool useGpu = false; + // CPU case 1. + MatrixPtr inputLoc; + MatrixPtr inputConf; + MatrixPtr inputPriorBox; + MatrixPtr result, result2, result3, result4; + real nmsTreshold = 0.01; + real inputLocData[] = {0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1}; + real inputConfData[] = {0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6}; + real inputPriorBoxData[] = {0.1, 0.1, 0.5, 0.5, 0.1, 0.1, 0.2, 0.2, + 0.2, 0.2, 0.6, 0.6, 0.1, 0.1, 0.2, 0.2, + 0.3, 0.3, 0.7, 0.7, 0.1, 0.1, 0.2, 0.2, + 0.4, 0.4, 0.8, 0.8, 0.1, 0.1, 0.2, 0.2}; + real resultData[] = { + 0, 1, 0.68997443, 0.099959746, 0.099959746, 0.50804031, 0.50804031}; + inputLoc = Matrix::create(1, 16, false, useGpu); + inputConf = Matrix::create(1, 8, false, useGpu); + inputPriorBox = Matrix::create(1, 32, false, useGpu); + result = Matrix::create(1, 7, false, useGpu); + inputLoc->setData(inputLocData); + inputConf->setData(inputConfData); + inputPriorBox->setData(inputPriorBoxData); + result->setData(resultData); + doOneDetectionOutputTest(inputLoc, + inputConf, + inputPriorBox, + /* feature_map_width */ 1, + /* feature_map_height */ 1, + nmsTreshold, + useGpu, + result); + + // CPU case 2. + nmsTreshold = 0.2; + result2 = Matrix::create(2, 7, false, useGpu); + real resultData2[] = {0, + 1, + 0.68997443, + 0.099959746, + 0.099959746, + 0.50804031, + 0.50804031, + 0, + 1, + 0.59868765, + 0.29995975, + 0.29995975, + 0.70804024, + 0.70804024}; + result2->setData(resultData2); + doOneDetectionOutputTest(inputLoc, + inputConf, + inputPriorBox, + /* feature_map_width */ 1, + /* feature_map_height */ 1, + nmsTreshold, + useGpu, + result2); + +#ifndef PADDLE_ONLY_CPU + // GPU case 1. + useGpu = true; + inputLoc = Matrix::create(1, 16, false, useGpu); + inputConf = Matrix::create(1, 8, false, useGpu); + inputPriorBox = Matrix::create(1, 32, false, useGpu); + inputLoc->copyFrom(inputLocData, 16); + inputConf->copyFrom(inputConfData, 8); + inputPriorBox->copyFrom(inputPriorBoxData, 32); + + nmsTreshold = 0.01; + result3 = Matrix::create(1, 7, false, useGpu); + result3->copyFrom(resultData, 7); + doOneDetectionOutputTest(inputLoc, + inputConf, + inputPriorBox, + /* feature_map_width */ 1, + /* feature_map_height */ 1, + nmsTreshold, + useGpu, + result3); + + // GPU case 2. + nmsTreshold = 0.2; + result4 = Matrix::create(2, 7, false, useGpu); + result4->copyFrom(resultData2, 14); + doOneDetectionOutputTest(inputLoc, + inputConf, + inputPriorBox, + /* feature_map_width */ 1, + /* feature_map_height */ 1, + nmsTreshold, + useGpu, + result4); +#endif +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + initMain(argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 297756025bcad79d49ec321414ed2e91f1c0758a..59d1e9273d42d6a53ec284c6ed684096b3f42321 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1692,6 +1692,70 @@ TEST(Layer, smooth_l1) { } } +TEST(Layer, multibox_loss) { + TestConfig config; + config.layerConfig.set_type("multibox_loss"); + config.biasSize = 0; + LayerInputConfig* input = config.layerConfig.add_inputs(); + MultiBoxLossConfig* multiboxLoss = input->mutable_multibox_loss_conf(); + multiboxLoss->set_num_classes(21); + multiboxLoss->set_input_num(1); + multiboxLoss->set_overlap_threshold(0.5); + multiboxLoss->set_neg_pos_ratio(3); + multiboxLoss->set_neg_overlap(0.5); + multiboxLoss->set_background_id(0); + multiboxLoss->set_height(3); + multiboxLoss->set_width(3); + + size_t gtNum = 1; + MatrixPtr labelValue = Matrix::create(gtNum, 6, false, false); + labelValue->randomizeUniform(); + labelValue->add(-0.5); + labelValue->sigmoid(*labelValue); + real* labelData = labelValue->getData(); + size_t labelWidth = labelValue->getWidth(); + for (size_t i = 0; i < gtNum; ++i) { + *(labelData + i * labelWidth) = std::rand() % 20 + 1; + *(labelData + i * labelWidth + 1) = 0.400259; + *(labelData + i * labelWidth + 2) = 0.377857; + *(labelData + i * labelWidth + 3) = 0.525712; + *(labelData + i * labelWidth + 4) = 0.519368; + } + vector seqStartPositions(gtNum + 1, 0); + for (size_t i = 1; i <= gtNum; ++i) { + seqStartPositions[i] = i; + } + + // Ensure at lease one matched bbox + MatrixPtr priorValue = Matrix::create(1, 72, false, false); + priorValue->randomizeUniform(); + priorValue->add(-0.5); + priorValue->sigmoid(*priorValue); + real* priorData = priorValue->getData(); + *(priorData) = 0.424811; + *(priorData + 1) = 0.397059; + *(priorData + 2) = 0.538905; + *(priorData + 3) = 0.447091; + *(priorData + 4) = 0.425720; + *(priorData + 5) = 0.515228; + *(priorData + 6) = 0.519452; + *(priorData + 7) = 0.591065; + + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "priorbox", priorValue, {}}); + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "label", labelValue, seqStartPositions}); + config.inputDefs.push_back({INPUT_DATA, "locPred", 36, 0}); + config.inputDefs.push_back({INPUT_DATA, "confPred", 189, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "multibox_loss", 1, false, useGpu, false); + } +} + TEST(Layer, TransLayer) { TestConfig config; const int height = 128; diff --git a/paddle/platform/dynload/cublas.h b/paddle/platform/dynload/cublas.h index c9150ac5738f3292f530f053d7c0bb8a1203f13d..258cc88031a71e9fee65b5445bd5537d6782e226 100644 --- a/paddle/platform/dynload/cublas.h +++ b/paddle/platform/dynload/cublas.h @@ -15,7 +15,9 @@ limitations under the License. */ #pragma once #include -#include "paddle/platform/dynamic_loader.h" +#include +#include +#include "paddle/platform/dynload/dynamic_loader.h" namespace paddle { namespace platform { diff --git a/paddle/platform/dynload/cudnn.h b/paddle/platform/dynload/cudnn.h index c03424b375e5dc80610eb4ca8146069b7300e016..0a9562c573cdfe059ef7caa39ba62efa87225e41 100644 --- a/paddle/platform/dynload/cudnn.h +++ b/paddle/platform/dynload/cudnn.h @@ -15,7 +15,9 @@ limitations under the License. */ #pragma once #include -#include "paddle/platform/dynamic_loader.h" +#include +#include +#include "paddle/platform/dynload/dynamic_loader.h" namespace paddle { namespace platform { diff --git a/paddle/platform/dynload/curand.h b/paddle/platform/dynload/curand.h index 1ef7a8c833d7488ec18f7c8df55b16006e94cc72..9dc0a25c0fbdc3f73f1dd82206e8940972b0b7f5 100644 --- a/paddle/platform/dynload/curand.h +++ b/paddle/platform/dynload/curand.h @@ -15,7 +15,9 @@ limitations under the License. */ #pragma once #include -#include "paddle/platform/dynamic_loader.h" +#include +#include +#include "paddle/platform/dynload/dynamic_loader.h" namespace paddle { namespace platform { diff --git a/paddle/platform/dynload/dynamic_loader.cc b/paddle/platform/dynload/dynamic_loader.cc index 8ef67bad8c63cd1dfff967dbf1627ed84645e0d0..dd914e006d54c423ffea56ffaaafe7dcba416361 100644 --- a/paddle/platform/dynload/dynamic_loader.cc +++ b/paddle/platform/dynload/dynamic_loader.cc @@ -12,13 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "dynamic_loader.h" +#include "paddle/platform/dynload/dynamic_loader.h" #include #include #include #include #include "gflags/gflags.h" #include "glog/logging.h" +#include "paddle/framework/enforce.h" DEFINE_string(cudnn_dir, "", "Specify path for loading libcudnn.so. For instance, " @@ -72,13 +73,12 @@ static inline void GetDsoHandleFromDefaultPath(std::string& dso_path, *dso_handle = dlopen(dso_path.c_str(), dynload_flags); if (nullptr == *dso_handle) { if (dso_path == "libcudnn.dylib") { - LOG(FATAL) - << "Note: [Recommend] copy cudnn into /usr/local/cuda/ \n" // NOLINT - << "For instance, sudo tar -xzf " - "cudnn-7.5-osx-x64-v5.0-ga.tgz -C " // NOLINT - << "/usr/local \n sudo chmod a+r " - "/usr/local/cuda/include/cudnn.h " // NOLINT - << "/usr/local/cuda/lib/libcudnn*"; + PADDLE_ENFORCE(true, + "Note: [Recommend] copy cudnn into /usr/local/cuda/ \n " + "For instance, sudo tar -xzf " + "cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local \n sudo " + "chmod a+r /usr/local/cuda/include/cudnn.h " + "/usr/local/cuda/lib/libcudnn*"); } } } @@ -106,22 +106,15 @@ static inline void GetDsoHandleFromSearchPath(const std::string& search_root, GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags); } } - - CHECK(nullptr != *dso_handle) << "Failed to find dynamic library: " << dlPath - << " (" << dlerror() << ") \n" - << "Please specify its path correctly using " - "following ways: \n" - - << "Method. set environment variable " - "LD_LIBRARY_PATH on Linux or " - << "DYLD_LIBRARY_PATH on Mac OS. \n" - << "For instance, issue command: export " - "LD_LIBRARY_PATH=... \n" - - << "Note: After Mac OS 10.11, using the " - "DYLD_LIBRARY_PATH is impossible " - << "unless System Integrity Protection (SIP) " - "is disabled."; + PADDLE_ENFORCE(nullptr != *dso_handle, + "Failed to find dynamic library: %s ( %s ) \n Please specify " + "its path correctly using following ways: \n Method. set " + "environment variable LD_LIBRARY_PATH on Linux or " + "DYLD_LIBRARY_PATH on Mac OS. \n For instance, issue command: " + "export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, " + "using the DYLD_LIBRARY_PATH is impossible unless System " + "Integrity Protection (SIP) is disabled.", + dlPath, dlerror()); } void GetCublasDsoHandle(void** dso_handle) { diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index ebe4f5cbb569ff37a46eb44de6362a7df337fe38..37cd16c79890738f6d8966579e15686c653d4df3 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -266,6 +266,29 @@ message PadConfig { repeated uint32 pad_w = 4; } +message MultiBoxLossConfig { + required uint32 num_classes = 1; + required float overlap_threshold = 2; + required float neg_pos_ratio = 3; + required float neg_overlap = 4; + required uint32 background_id = 5; + required uint32 input_num = 6; + optional uint32 height = 7 [default = 1]; + optional uint32 width = 8 [default = 1]; +} + +message DetectionOutputConfig { + required uint32 num_classes = 1; + required float nms_threshold = 2; + required uint32 nms_top_k = 3; + required uint32 background_id = 4; + required uint32 input_num = 5; + required uint32 keep_top_k = 6; + required float confidence_threshold = 7; + optional uint32 height = 8 [default = 1]; + optional uint32 width = 9 [default = 1]; +} + message LayerInputConfig { required string input_layer_name = 1; optional string input_parameter_name = 2; @@ -284,6 +307,8 @@ message LayerInputConfig { optional PriorBoxConfig priorbox_conf = 13; optional PadConfig pad_conf = 14; optional RowConvConfig row_conv_conf = 15; + optional MultiBoxLossConfig multibox_loss_conf = 16; + optional DetectionOutputConfig detection_output_conf = 17; } message LayerConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index b7418101d83fde1b91781d3a42b056cc7708cba9..370529ed97b1f1427ebc088a3031437a7f65e0cf 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1674,6 +1674,52 @@ class PriorBoxLayer(LayerBase): self.config.size = size +@config_layer('multibox_loss') +class MultiBoxLossLayer(LayerBase): + def __init__(self, name, inputs, input_num, num_classes, overlap_threshold, + neg_pos_ratio, neg_overlap, background_id, **xargs): + super(MultiBoxLossLayer, self).__init__(name, 'multibox_loss', 0, + inputs) + config_assert( + len(inputs) == (input_num * 2 + 2), + 'MultiBoxLossLayer does not have enough inputs') + config_assert(num_classes > background_id, + 'Classes number must greater than background ID') + self.config.inputs[0].multibox_loss_conf.num_classes = num_classes + self.config.inputs[ + 0].multibox_loss_conf.overlap_threshold = overlap_threshold + self.config.inputs[0].multibox_loss_conf.neg_pos_ratio = neg_pos_ratio + self.config.inputs[0].multibox_loss_conf.neg_overlap = neg_overlap + self.config.inputs[0].multibox_loss_conf.background_id = background_id + self.config.inputs[0].multibox_loss_conf.input_num = input_num + self.config.size = 1 + + +@config_layer('detection_output') +class DetectionOutputLayer(LayerBase): + def __init__(self, name, inputs, size, input_num, num_classes, + nms_threshold, nms_top_k, keep_top_k, confidence_threshold, + background_id, **xargs): + super(DetectionOutputLayer, self).__init__(name, 'detection_output', 0, + inputs) + config_assert( + len(inputs) == (input_num * 2 + 1), + 'DetectionOutputLayer does not have enough inputs') + config_assert(num_classes > background_id, + 'Classes number must greater than background ID') + self.config.inputs[0].detection_output_conf.num_classes = num_classes + self.config.inputs[ + 0].detection_output_conf.nms_threshold = nms_threshold + self.config.inputs[0].detection_output_conf.nms_top_k = nms_top_k + self.config.inputs[0].detection_output_conf.keep_top_k = keep_top_k + self.config.inputs[ + 0].detection_output_conf.confidence_threshold = confidence_threshold + self.config.inputs[ + 0].detection_output_conf.background_id = background_id + self.config.inputs[0].detection_output_conf.input_num = input_num + self.config.size = size + + @config_layer('data') class DataLayer(LayerBase): def __init__(self, name, size, height=None, width=None, device=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index a601d5c84ad222785e68b9fa81c51b1e120b4f29..206de1f8e1c7d3f9f977b4ca97522065c9ed0cab 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -115,6 +115,8 @@ __all__ = [ 'print_layer', 'priorbox_layer', 'cross_channel_norm_layer', + 'multibox_loss_layer', + 'detection_output_layer', 'spp_layer', 'pad_layer', 'eos_layer', @@ -195,6 +197,8 @@ class LayerType(object): PRINT_LAYER = 'print' PRIORBOX_LAYER = 'priorbox' + MULTIBOX_LOSS_LAYER = 'multibox_loss' + DETECTION_OUTPUT_LAYER = 'detection_output' CTC_LAYER = 'ctc' WARP_CTC_LAYER = 'warp_ctc' @@ -1041,6 +1045,158 @@ def priorbox_layer(input, size=size) +@wrap_name_default("multibox_loss") +def multibox_loss_layer(input_loc, + input_conf, + priorbox, + label, + num_classes, + overlap_threshold=0.5, + neg_pos_ratio=3.0, + neg_overlap=0.5, + background_id=0, + name=None): + """ + Compute the location loss and the confidence loss for ssd. + + :param name: The Layer Name. + :type name: basestring + :param input_loc: The input predict locations. + :type input_loc: LayerOutput | List of LayerOutput + :param input_conf: The input priorbox confidence. + :type input_conf: LayerOutput | List of LayerOutput + :param priorbox: The input priorbox location and the variance. + :type priorbox: LayerOutput + :param label: The input label. + :type label: LayerOutput + :param num_classes: The number of the classification. + :type num_classes: int + :param overlap_threshold: The threshold of the overlap. + :type overlap_threshold: float + :param neg_pos_ratio: The ratio of the negative bbox to the positive bbox. + :type neg_pos_ratio: float + :param neg_overlap: The negative bbox overlap threshold. + :type neg_overlap: float + :param background_id: The background class index. + :type background_id: int + :return: LayerOutput + """ + if isinstance(input_loc, LayerOutput): + input_loc = [input_loc] + assert isinstance(input_loc, collections.Sequence) # list or tuple + for each in input_loc: + assert isinstance(each, LayerOutput) + input_loc_num = len(input_loc) + + if isinstance(input_conf, LayerOutput): + input_conf = [input_conf] + assert isinstance(input_conf, collections.Sequence) # list or tuple + for each in input_conf: + assert isinstance(each, LayerOutput) + input_conf_num = len(input_conf) + # Check the input layer number. + assert input_loc_num == input_conf_num + + inputs = [priorbox.name, label.name] + inputs.extend([l.name for l in input_loc]) + inputs.extend([l.name for l in input_conf]) + parents = [priorbox, label] + parents.extend(input_loc) + parents.extend(input_conf) + + Layer( + name=name, + type=LayerType.MULTIBOX_LOSS_LAYER, + inputs=inputs, + input_num=input_loc_num, + num_classes=num_classes, + overlap_threshold=overlap_threshold, + neg_pos_ratio=neg_pos_ratio, + neg_overlap=neg_overlap, + background_id=background_id) + return LayerOutput( + name, LayerType.MULTIBOX_LOSS_LAYER, parents=parents, size=1) + + +@wrap_name_default("detection_output") +def detection_output_layer(input_loc, + input_conf, + priorbox, + num_classes, + nms_threshold=0.45, + nms_top_k=400, + keep_top_k=200, + confidence_threshold=0.01, + background_id=0, + name=None): + """ + Apply the NMS to the output of network and compute the predict bounding + box location. + + :param name: The Layer Name. + :type name: basestring + :param input_loc: The input predict locations. + :type input_loc: LayerOutput | List of LayerOutput. + :param input_conf: The input priorbox confidence. + :type input_conf: LayerOutput | List of LayerOutput. + :param priorbox: The input priorbox location and the variance. + :type priorbox: LayerOutput + :param num_classes: The number of the classification. + :type num_classes: int + :param nms_threshold: The Non-maximum suppression threshold. + :type nms_threshold: float + :param nms_top_k: The bbox number kept of the NMS's output + :type nms_top_k: int + :param keep_top_k: The bbox number kept of the layer's output + :type keep_top_k: int + :param confidence_threshold: The classification confidence threshold + :type confidence_threshold: float + :param background_id: The background class index. + :type background_id: int + :return: LayerOutput + """ + if isinstance(input_loc, LayerOutput): + input_loc = [input_loc] + assert isinstance(input_loc, collections.Sequence) # list or tuple + for each in input_loc: + assert isinstance(each, LayerOutput) + input_loc_num = len(input_loc) + + if isinstance(input_conf, LayerOutput): + input_conf = [input_conf] + assert isinstance(input_conf, collections.Sequence) # list or tuple + for each in input_conf: + assert isinstance(each, LayerOutput) + input_conf_num = len(input_conf) + + # Check the input layer number. + assert input_loc_num == input_conf_num + + inputs = [priorbox.name] + inputs.extend([l.name for l in input_loc]) + inputs.extend([l.name for l in input_conf]) + parents = [priorbox] + parents.extend(input_loc) + parents.extend(input_conf) + + size = keep_top_k * 7 + + Layer( + name=name, + type=LayerType.DETECTION_OUTPUT_LAYER, + inputs=inputs, + size=size, + input_num=input_loc_num, + num_classes=num_classes, + nms_threshold=nms_threshold, + nms_top_k=nms_top_k, + keep_top_k=keep_top_k, + confidence_threshold=confidence_threshold, + background_id=background_id) + return LayerOutput( + name, LayerType.DETECTION_OUTPUT_LAYER, parents=parents, size=size) + + @wrap_name_default("cross_channel_norm") def cross_channel_norm_layer(input, name=None, param_attr=None): """ diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index c0e87d6de372dfdd9c7e694af71df8f3b011d43a..a939c41ad01922e421f7bcd93851df7447a6799f 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -6,6 +6,6 @@ img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cos test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer -test_prelu_layer test_row_conv) +test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..6690f9852a31b1909df7df99720db639eb2a564d --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_detection_output_layer.protostr @@ -0,0 +1,66 @@ +type: "nn" +layers { + name: "input_loc" + type: "data" + size: 16 + active_type: "" + height: 16 + width: 1 +} +layers { + name: "input_conf" + type: "data" + size: 8 + active_type: "" + height: 1 + width: 8 +} +layers { + name: "priorbox" + type: "data" + size: 32 + active_type: "" + height: 4 + width: 8 +} +layers { + name: "test_detection_output" + type: "detection_output" + size: 1400 + active_type: "" + inputs { + input_layer_name: "priorbox" + detection_output_conf { + num_classes: 21 + nms_threshold: 0.45 + nms_top_k: 400 + background_id: 0 + input_num: 1 + keep_top_k: 200 + confidence_threshold: 0.01 + } + } + inputs { + input_layer_name: "input_loc" + } + inputs { + input_layer_name: "input_conf" + } +} +input_layer_names: "priorbox" +input_layer_names: "input_loc" +input_layer_names: "input_conf" +output_layer_names: "test_detection_output" +sub_models { + name: "root" + layer_names: "input_loc" + layer_names: "input_conf" + layer_names: "priorbox" + layer_names: "test_detection_output" + input_layer_names: "priorbox" + input_layer_names: "input_loc" + input_layer_names: "input_conf" + output_layer_names: "test_detection_output" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..0ba84dcc6db6b7025a98b2698312f5fc9e0ed634 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_multibox_loss_layer.protostr @@ -0,0 +1,79 @@ +type: "nn" +layers { + name: "input_loc" + type: "data" + size: 16 + active_type: "" + height: 16 + width: 1 +} +layers { + name: "input_conf" + type: "data" + size: 8 + active_type: "" + height: 1 + width: 8 +} +layers { + name: "priorbox" + type: "data" + size: 32 + active_type: "" + height: 4 + width: 8 +} +layers { + name: "label" + type: "data" + size: 24 + active_type: "" + height: 4 + width: 6 +} +layers { + name: "test_multibox_loss" + type: "multibox_loss" + size: 1 + active_type: "" + inputs { + input_layer_name: "priorbox" + multibox_loss_conf { + num_classes: 21 + overlap_threshold: 0.5 + neg_pos_ratio: 3.0 + neg_overlap: 0.5 + background_id: 0 + input_num: 1 + } + } + inputs { + input_layer_name: "label" + } + inputs { + input_layer_name: "input_loc" + } + inputs { + input_layer_name: "input_conf" + } +} +input_layer_names: "priorbox" +input_layer_names: "label" +input_layer_names: "input_loc" +input_layer_names: "input_conf" +output_layer_names: "test_multibox_loss" +sub_models { + name: "root" + layer_names: "input_loc" + layer_names: "input_conf" + layer_names: "priorbox" + layer_names: "label" + layer_names: "test_multibox_loss" + input_layer_names: "priorbox" + input_layer_names: "label" + input_layer_names: "input_loc" + input_layer_names: "input_conf" + output_layer_names: "test_multibox_loss" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..3572a2cb07d95ffaec261bdc63492ade734ea8b9 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py @@ -0,0 +1,23 @@ +from paddle.trainer_config_helpers import * + +settings(batch_size=1000, learning_rate=1e-5) + +input_loc = data_layer(name='input_loc', size=16, height=16, width=1) + +input_conf = data_layer(name='input_conf', size=8, height=1, width=8) + +priorbox = data_layer(name='priorbox', size=32, height=4, width=8) + +detout = detection_output_layer( + input_loc=input_loc, + input_conf=input_conf, + priorbox=priorbox, + num_classes=21, + nms_threshold=0.45, + nms_top_k=400, + keep_top_k=200, + confidence_threshold=0.01, + background_id=0, + name='test_detection_output') + +outputs(detout) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..c3376c47bded5a3aad15331936a61e12ac883b17 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py @@ -0,0 +1,25 @@ +from paddle.trainer_config_helpers import * + +settings(batch_size=1000, learning_rate=1e-5) + +input_loc = data_layer(name='input_loc', size=16, height=16, width=1) + +input_conf = data_layer(name='input_conf', size=8, height=1, width=8) + +priorbox = data_layer(name='priorbox', size=32, height=4, width=8) + +label = data_layer(name='label', size=24, height=4, width=6) + +multibox_loss = multibox_loss_layer( + input_loc=input_loc, + input_conf=input_conf, + priorbox=priorbox, + label=label, + num_classes=21, + overlap_threshold=0.5, + neg_pos_ratio=3.0, + neg_overlap=0.5, + background_id=0, + name='test_multibox_loss') + +outputs(multibox_loss)