diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 7f56737ca99906ad7a62ec2fb8c4a6c1f3d5a221..bfcc70b31defd378fba0e505b1522471ca285ac4 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -53,7 +53,6 @@ function(op_library TARGET) if (${op_library_DEPS_len} GREATER 0) set(DEPS_OPS ${TARGET} ${DEPS_OPS} PARENT_SCOPE) endif() - if (WITH_GPU) nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) @@ -187,6 +186,36 @@ endfunction() add_subdirectory(math) add_subdirectory(nccl) +set(DEPS_OPS + cond_op + cross_entropy_op + recurrent_op + softmax_with_cross_entropy_op + softmax_op + sequence_softmax_op + sum_op + pool_op + maxout_op + unpool_op + pool_with_index_op + conv_op + conv_transpose_op + nccl_op + sequence_conv_op + sequence_pool_op + lod_rank_table_op + lod_tensor_to_array_op + array_to_lod_tensor_op + max_sequence_len_op + lstm_op + gru_op + adagrad_op + sgd_op + save_op + load_op + send_op + recv_op + detection_output_op) if(WITH_GPU) op_library(nccl_op DEPS nccl_common) else() @@ -210,6 +239,7 @@ op_library(cond_op DEPS framework_proto tensor net_op) op_library(cross_entropy_op DEPS cross_entropy) op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) op_library(softmax_op DEPS softmax) +op_library(detection_output_op DEPS softmax) op_library(sequence_softmax_op DEPS softmax) op_library(sum_op DEPS selected_rows_functor) op_library(sgd_op DEPS selected_rows_functor) diff --git a/paddle/operators/detection_output_op.cc b/paddle/operators/detection_output_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..ea44cd32678d7e8a5836c1886cf9c1b4961970aa --- /dev/null +++ b/paddle/operators/detection_output_op.cc @@ -0,0 +1,89 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +Indicesou may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/detection_output_op.h" +namespace paddle { +namespace operators { + +class DetectionOutputOpMaker : public framework::OpProtoAndCheckerMaker { + public: + DetectionOutputOpMaker(OpProto* proto, OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Loc", + "(Tensor) The input tensor of detection_output operator." + "The input predict locations" + "The format of input tensor is kNCHW. Where K is priorbox point " + "numbers," + "N is How many boxes are there on each point, " + "C is 4, H and W both are 1."); + AddInput("Conf", + "(Tensor) The input tensor of detection_output operator." + "The input priorbox confidence." + "The format of input tensor is kNCHW. Where K is priorbox point " + "numbers," + "N is How many boxes are there on each point, " + "C is the number of classes, H and W both are 1."); + AddInput("PriorBox", + "(Tensor) The input tensor of detection_output operator." + "The format of input tensor is the position and variance " + "of the boxes"); + AddOutput("Out", + "(Tensor) The output tensor of detection_output operator."); + AddAttr("background_label_id", "(int), The background class index."); + AddAttr("num_classes", "(int), The number of the classification."); + AddAttr("nms_threshold", + "(float), The Non-maximum suppression threshold."); + AddAttr("confidence_threshold", + "(float), The classification confidence threshold."); + AddAttr("top_k", "(int), The bbox number kept of the layer’s output."); + AddAttr("nms_top_k", + "(int), The bbox number kept of the NMS’s output."); + AddComment(R"DOC( + detection output for SSD(single shot multibox detector) + Apply the NMS to the output of network and compute the predict + bounding box location. The output’s shape of this layer could + be zero if there is no valid bounding box. + )DOC"); + } +}; + +class DetectionOutputOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Loc"), + "Input(X) of DetectionOutputOp" + "should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Conf"), + "Input(X) of DetectionOutputOp" + "should not be null."); + PADDLE_ENFORCE(ctx->HasInput("PriorBox"), + "Input(X) of DetectionOutputOp" + "should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of DetectionOutputOp should not be null."); + std::vector output_shape({1, 7}); + ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(detection_output, ops::DetectionOutputOp, + ops::DetectionOutputOpMaker); +REGISTER_OP_CPU_KERNEL( + detection_output, + ops::DetectionOutputKernel, + ops::DetectionOutputKernel); diff --git a/paddle/operators/detection_output_op.cu.cc b/paddle/operators/detection_output_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..4a6560e0492c559afd06e5152c34fab545b7ce61 --- /dev/null +++ b/paddle/operators/detection_output_op.cu.cc @@ -0,0 +1,21 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +Indicesou may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/detection_output_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + detection_output, + ops::DetectionOutputKernel, + ops::DetectionOutputKernel); diff --git a/paddle/operators/detection_output_op.h b/paddle/operators/detection_output_op.h new file mode 100644 index 0000000000000000000000000000000000000000..f8abd5b6406f05747b87fcfd464baeb705a7f7f2 --- /dev/null +++ b/paddle/operators/detection_output_op.h @@ -0,0 +1,167 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +Indicesou may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/op_registry.h" +#include "paddle/framework/tensor.h" +#include "paddle/operators/math/detection_util.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/softmax.h" +#include "paddle/operators/strided_memcpy.h" +namespace paddle { +namespace operators { +template +inline void transpose_fun(const framework::ExecutionContext& context, + const framework::Tensor& src, + framework::Tensor* dst) { + int input_nums = src.dims()[0]; + int offset = 0; + for (int j = 0; j < input_nums; ++j) { + framework::Tensor in_p_tensor = src.Slice(j, j + 1); + std::vector shape_vec( + {in_p_tensor.dims()[0], in_p_tensor.dims()[1], in_p_tensor.dims()[3], + in_p_tensor.dims()[4], in_p_tensor.dims()[2]}); + framework::DDim shape(framework::make_ddim(shape_vec)); + framework::Tensor in_p_tensor_transpose; + in_p_tensor_transpose.mutable_data(shape, context.GetPlace()); + std::vector shape_axis({0, 1, 3, 4, 2}); + math::Transpose trans5; + trans5(context.template device_context(), in_p_tensor, + &in_p_tensor_transpose, shape_axis); + auto dst_stride = framework::stride(dst->dims()); + auto src_stride = framework::stride(in_p_tensor_transpose.dims()); + StridedMemcpy(context.device_context(), in_p_tensor_transpose.data(), + src_stride, in_p_tensor_transpose.dims(), dst_stride, + dst->data() + offset); + offset += in_p_tensor_transpose.dims()[4] * src_stride[4]; + } +} +template +class DetectionOutputKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const framework::Tensor* in_loc = context.Input("Loc"); + const framework::Tensor* in_conf = context.Input("Conf"); + const framework::Tensor* in_priorbox = + context.Input("PriorBox"); + auto* out = context.Output("Out"); + int num_classes = context.template Attr("num_classes"); + int top_k = context.template Attr("top_k"); + int nms_top_k = context.template Attr("nms_top_k"); + int background_label_id = context.template Attr("background_label_id"); + float nms_threshold = context.template Attr("nms_threshold"); + float confidence_threshold = + context.template Attr("confidence_threshold"); + size_t batch_size = in_conf->dims()[1]; + int conf_sum_size = in_conf->numel(); + // for softmax + std::vector conf_shape_softmax_vec( + {conf_sum_size / num_classes, num_classes}); + framework::DDim conf_shape_softmax( + framework::make_ddim(conf_shape_softmax_vec)); + // for knchw => nhwc + std::vector loc_shape_vec({1, in_loc->dims()[1], in_loc->dims()[3], + in_loc->dims()[4], + in_loc->dims()[2] * in_loc->dims()[0]}); + std::vector conf_shape_vec( + {1, in_conf->dims()[1], in_conf->dims()[3], in_conf->dims()[4], + in_conf->dims()[2] * in_conf->dims()[0]}); + framework::DDim loc_shape(framework::make_ddim(loc_shape_vec)); + framework::DDim conf_shape(framework::make_ddim(conf_shape_vec)); + framework::Tensor loc_tensor; + framework::Tensor conf_tensor; + loc_tensor.mutable_data(loc_shape, context.GetPlace()); + conf_tensor.mutable_data(conf_shape, context.GetPlace()); + // for cpu + framework::Tensor loc_cpu; + framework::Tensor conf_cpu; + framework::Tensor priorbox_cpu; + const T* priorbox_data = in_priorbox->data(); + transpose_fun(context, *in_loc, &loc_tensor); + transpose_fun(context, *in_conf, &conf_tensor); + conf_tensor.Resize(conf_shape_softmax); + math::SoftmaxFunctor()( + context.template device_context(), &conf_tensor, + &conf_tensor); + T* loc_data = loc_tensor.data(); + T* conf_data = conf_tensor.data(); + if (platform::is_gpu_place(context.GetPlace())) { + loc_cpu.mutable_data(loc_tensor.dims(), platform::CPUPlace()); + framework::CopyFrom(loc_tensor, platform::CPUPlace(), + context.device_context(), &loc_cpu); + loc_data = loc_cpu.data(); + conf_cpu.mutable_data(conf_tensor.dims(), platform::CPUPlace()); + framework::CopyFrom(conf_tensor, platform::CPUPlace(), + context.device_context(), &conf_cpu); + conf_data = conf_cpu.data(); + priorbox_cpu.mutable_data(in_priorbox->dims(), platform::CPUPlace()); + framework::CopyFrom(*in_priorbox, platform::CPUPlace(), + context.device_context(), &priorbox_cpu); + priorbox_data = priorbox_cpu.data(); + } + // get decode bboxes + size_t num_priors = in_priorbox->numel() / 8; + std::vector>> all_decoded_bboxes; + for (size_t n = 0; n < batch_size; ++n) { + std::vector> decoded_bboxes; + for (size_t i = 0; i < num_priors; ++i) { + size_t prior_offset = i * 8; + size_t loc_pred_offset = n * num_priors * 4 + i * 4; + std::vector> prior_bbox_vec; + math::GetBBoxFromPriorData(priorbox_data + prior_offset, 1, + prior_bbox_vec); + std::vector> prior_bbox_var; + math::GetBBoxVarFromPriorData(priorbox_data + prior_offset, 1, + prior_bbox_var); + std::vector loc_pred_data; + for (size_t j = 0; j < 4; ++j) + loc_pred_data.push_back(*(loc_data + loc_pred_offset + j)); + math::BBox bbox = math::DecodeBBoxWithVar( + prior_bbox_vec[0], prior_bbox_var[0], loc_pred_data); + decoded_bboxes.push_back(bbox); + } + all_decoded_bboxes.push_back(decoded_bboxes); + } + std::vector>> all_indices; + int num_kept = math::GetDetectionIndices( + conf_data, num_priors, num_classes, background_label_id, batch_size, + confidence_threshold, nms_top_k, nms_threshold, top_k, + all_decoded_bboxes, &all_indices); + + if (num_kept <= 0) { + std::vector out_shape_vec({0, 0}); + framework::DDim out_shape(framework::make_ddim(out_shape_vec)); + out->Resize(out_shape); + return; + } + std::vector out_shape_vec({num_kept, 7}); + framework::DDim out_shape(framework::make_ddim(out_shape_vec)); + out->mutable_data(out_shape, context.GetPlace()); + framework::Tensor out_cpu; + T* out_data = out->data(); + if (platform::is_gpu_place(context.GetPlace())) { + out_cpu.mutable_data(out->dims(), platform::CPUPlace()); + out_data = out_cpu.data(); + } + math::GetDetectionOutput(conf_data, num_kept, num_priors, num_classes, + batch_size, all_indices, all_decoded_bboxes, + out_data); + if (platform::is_gpu_place(context.GetPlace())) { + framework::CopyFrom(out_cpu, platform::CUDAPlace(), + context.device_context(), out); + } + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/detection_util.h b/paddle/operators/math/detection_util.h new file mode 100644 index 0000000000000000000000000000000000000000..e3a3ef2badc37924d866ded8ee7a7338fbc4b2d2 --- /dev/null +++ b/paddle/operators/math/detection_util.h @@ -0,0 +1,300 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once +#include +#include "paddle/framework/selected_rows.h" +#include "paddle/platform/device_context.h" + +namespace paddle { +namespace operators { +namespace math { +template +struct BBox { + BBox(T x_min, T y_min, T x_max, T y_max) + : x_min(x_min), + y_min(y_min), + x_max(x_max), + y_max(y_max), + is_difficult(false) {} + + BBox() {} + + T get_width() const { return x_max - x_min; } + + T get_height() const { return y_max - y_min; } + + T get_center_x() const { return (x_min + x_max) / 2; } + + T get_center_y() const { return (y_min + y_max) / 2; } + + T get_area() const { return get_width() * get_height(); } + + // coordinate of bounding box + T x_min; + T y_min; + T x_max; + T y_max; + // whether difficult object (e.g. object with heavy occlusion is difficult) + bool is_difficult; +}; +// KNCHW ==> NHWC +// template +template +void GetBBoxFromPriorData(const T* prior_data, const size_t num_bboxes, + std::vector>& bbox_vec); +template +void GetBBoxVarFromPriorData(const T* prior_data, const size_t num, + std::vector>& var_vec); +template +BBox DecodeBBoxWithVar(BBox& prior_bbox, + const std::vector& prior_bbox_var, + const std::vector& loc_pred_data); +template +bool SortScorePairDescend(const std::pair& pair1, + const std::pair& pair2); +template +bool SortScorePairDescend(const std::pair>& pair1, + const std::pair>& pair2); +template +T jaccard_overlap(const BBox& bbox1, const BBox& bbox2); + +template +void ApplyNmsFast(const std::vector>& bboxes, const T* conf_score_data, + size_t class_idx, size_t top_k, T conf_threshold, + T nms_threshold, size_t num_priors, size_t num_classes, + std::vector* indices); +template +int GetDetectionIndices( + const T* conf_data, const size_t num_priors, const size_t num_classes, + const size_t background_label_id, const size_t batch_size, + const T conf_threshold, const size_t nms_top_k, const T nms_threshold, + const size_t top_k, + const std::vector>>& all_decoded_bboxes, + std::vector>>* all_detection_indices); +template +BBox ClipBBox(const BBox& bbox); +template +void GetDetectionOutput( + const T* conf_data, const size_t num_kept, const size_t num_priors, + const size_t num_classes, const size_t batch_size, + const std::vector>>& all_indices, + const std::vector>>& all_decoded_bboxes, T* out_data); +template +void GetBBoxFromPriorData(const T* prior_data, const size_t num_bboxes, + std::vector>& bbox_vec) { + size_t out_offset = bbox_vec.size(); + bbox_vec.resize(bbox_vec.size() + num_bboxes); + for (size_t i = 0; i < num_bboxes; ++i) { + BBox bbox; + bbox.x_min = *(prior_data + i * 8); + bbox.y_min = *(prior_data + i * 8 + 1); + bbox.x_max = *(prior_data + i * 8 + 2); + bbox.y_max = *(prior_data + i * 8 + 3); + bbox_vec[out_offset + i] = bbox; + } +} +template +void GetBBoxVarFromPriorData(const T* prior_data, const size_t num, + std::vector>& var_vec) { + size_t out_offset = var_vec.size(); + var_vec.resize(var_vec.size() + num); + for (size_t i = 0; i < num; ++i) { + std::vector var; + var.push_back(*(prior_data + i * 8 + 4)); + var.push_back(*(prior_data + i * 8 + 5)); + var.push_back(*(prior_data + i * 8 + 6)); + var.push_back(*(prior_data + i * 8 + 7)); + var_vec[out_offset + i] = var; + } +} +template +BBox DecodeBBoxWithVar(BBox& prior_bbox, + const std::vector& prior_bbox_var, + const std::vector& loc_pred_data) { + T prior_bbox_width = prior_bbox.get_width(); + T prior_bbox_height = prior_bbox.get_height(); + T prior_bbox_center_x = prior_bbox.get_center_x(); + T prior_bbox_center_y = prior_bbox.get_center_y(); + + T decoded_bbox_center_x = + prior_bbox_var[0] * loc_pred_data[0] * prior_bbox_width + + prior_bbox_center_x; + T decoded_bbox_center_y = + prior_bbox_var[1] * loc_pred_data[1] * prior_bbox_height + + prior_bbox_center_y; + T decoded_bbox_width = + std::exp(prior_bbox_var[2] * loc_pred_data[2]) * prior_bbox_width; + T decoded_bbox_height = + std::exp(prior_bbox_var[3] * loc_pred_data[3]) * prior_bbox_height; + + BBox decoded_bbox; + decoded_bbox.x_min = decoded_bbox_center_x - decoded_bbox_width / 2; + decoded_bbox.y_min = decoded_bbox_center_y - decoded_bbox_height / 2; + decoded_bbox.x_max = decoded_bbox_center_x + decoded_bbox_width / 2; + decoded_bbox.y_max = decoded_bbox_center_y + decoded_bbox_height / 2; + + return decoded_bbox; +} +template +bool SortScorePairDescend(const std::pair& pair1, + const std::pair& pair2) { + return pair1.first > pair2.first; +} +template +T jaccard_overlap(const BBox& bbox1, const BBox& bbox2) { + if (bbox2.x_min > bbox1.x_max || bbox2.x_max < bbox1.x_min || + bbox2.y_min > bbox1.y_max || bbox2.y_max < bbox1.y_min) { + return 0.0; + } else { + T inter_x_min = std::max(bbox1.x_min, bbox2.x_min); + T inter_y_min = std::max(bbox1.y_min, bbox2.y_min); + T interX_max = std::min(bbox1.x_max, bbox2.x_max); + T interY_max = std::min(bbox1.y_max, bbox2.y_max); + + T inter_width = interX_max - inter_x_min; + T inter_height = interY_max - inter_y_min; + T inter_area = inter_width * inter_height; + + T bbox_area1 = bbox1.get_area(); + T bbox_area2 = bbox2.get_area(); + + return inter_area / (bbox_area1 + bbox_area2 - inter_area); + } +} + +template +void ApplyNmsFast(const std::vector>& bboxes, const T* conf_score_data, + size_t class_idx, size_t top_k, T conf_threshold, + T nms_threshold, size_t num_priors, size_t num_classes, + std::vector* indices) { + std::vector> scores; + for (size_t i = 0; i < num_priors; ++i) { + size_t conf_offset = i * num_classes + class_idx; + if (conf_score_data[conf_offset] > conf_threshold) + scores.push_back(std::make_pair(conf_score_data[conf_offset], i)); + } + std::stable_sort(scores.begin(), scores.end(), + SortScorePairDescend); + if (top_k > 0 && top_k < scores.size()) scores.resize(top_k); + while (scores.size() > 0) { + const size_t idx = scores.front().second; + bool keep = true; + for (size_t i = 0; i < indices->size(); ++i) { + if (keep) { + const size_t saved_idx = (*indices)[i]; + T overlap = jaccard_overlap(bboxes[idx], bboxes[saved_idx]); + keep = overlap <= nms_threshold; + } else { + break; + } + } + if (keep) indices->push_back(idx); + scores.erase(scores.begin()); + } +} +template +int GetDetectionIndices( + const T* conf_data, const size_t num_priors, const size_t num_classes, + const size_t background_label_id, const size_t batch_size, + const T conf_threshold, const size_t nms_top_k, const T nms_threshold, + const size_t top_k, + const std::vector>>& all_decoded_bboxes, + std::vector>>* all_detection_indices) { + int total_keep_num = 0; + for (size_t n = 0; n < batch_size; ++n) { + const std::vector>& decoded_bboxes = all_decoded_bboxes[n]; + size_t num_detected = 0; + std::map> indices; + size_t conf_offset = n * num_priors * num_classes; + for (size_t c = 0; c < num_classes; ++c) { + if (c == background_label_id) continue; + ApplyNmsFast(decoded_bboxes, conf_data + conf_offset, c, nms_top_k, + conf_threshold, nms_threshold, num_priors, num_classes, + &(indices[c])); + num_detected += indices[c].size(); + } + if (top_k > 0 && num_detected > top_k) { + // std::vector> score_index_pairs; + std::vector>> score_index_pairs; + for (size_t c = 0; c < num_classes; ++c) { + const std::vector& label_indices = indices[c]; + for (size_t i = 0; i < label_indices.size(); ++i) { + size_t idx = label_indices[i]; + score_index_pairs.push_back( + std::make_pair((conf_data + conf_offset)[idx * num_classes + c], + std::make_pair(c, idx))); + } + } + std::sort(score_index_pairs.begin(), score_index_pairs.end(), + SortScorePairDescend>); + score_index_pairs.resize(top_k); + std::map> new_indices; + for (size_t i = 0; i < score_index_pairs.size(); ++i) { + size_t label = score_index_pairs[i].second.first; + size_t idx = score_index_pairs[i].second.second; + new_indices[label].push_back(idx); + } + all_detection_indices->push_back(new_indices); + total_keep_num += top_k; + } else { + all_detection_indices->push_back(indices); + total_keep_num += num_detected; + } + } + return total_keep_num; +} +template +BBox ClipBBox(const BBox& bbox) { + T one = static_cast(1.0); + T zero = static_cast(0.0); + BBox clipped_bbox; + clipped_bbox.x_min = std::max(std::min(bbox.x_min, one), zero); + clipped_bbox.y_min = std::max(std::min(bbox.y_min, one), zero); + clipped_bbox.x_max = std::max(std::min(bbox.x_max, one), zero); + clipped_bbox.y_max = std::max(std::min(bbox.y_max, one), zero); + return clipped_bbox; +} +template +void GetDetectionOutput( + const T* conf_data, const size_t num_kept, const size_t num_priors, + const size_t num_classes, const size_t batch_size, + const std::vector>>& all_indices, + const std::vector>>& all_decoded_bboxes, T* out_data) { + size_t count = 0; + for (size_t n = 0; n < batch_size; ++n) { + for (std::map>::const_iterator it = + all_indices[n].begin(); + it != all_indices[n].end(); ++it) { + size_t label = it->first; + const std::vector& indices = it->second; + const std::vector>& decoded_bboxes = all_decoded_bboxes[n]; + for (size_t i = 0; i < indices.size(); ++i) { + size_t idx = indices[i]; + size_t conf_offset = n * num_priors * num_classes + idx * num_classes; + out_data[count * 7] = n; + out_data[count * 7 + 1] = label; + out_data[count * 7 + 2] = (conf_data + conf_offset)[label]; + BBox clipped_bbox = ClipBBox(decoded_bboxes[idx]); + out_data[count * 7 + 3] = clipped_bbox.x_min; + out_data[count * 7 + 4] = clipped_bbox.y_min; + out_data[count * 7 + 5] = clipped_bbox.x_max; + out_data[count * 7 + 6] = clipped_bbox.y_max; + ++count; + } + } + } +} +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/python/paddle/v2/fluid/tests/test_detection_output_op.py b/python/paddle/v2/fluid/tests/test_detection_output_op.py new file mode 100644 index 0000000000000000000000000000000000000000..080a9743b0182cb7e6dd0030fc306a7f82510a05 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_detection_output_op.py @@ -0,0 +1,57 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestUnpoolOp(OpTest): + def setUp(self): + self.op_type = "detection_output" + self.init_test_case() + + #loc.shape ((1, 4, 4, 1, 1)) + #conf.shape ((1, 4, 2, 1, 1)) + + loc = np.array([[[[[0.1]], [[0.1]], [[0.1]], [[0.1]]], + [[[0.1]], [[0.1]], [[0.1]], [[0.1]]], + [[[0.1]], [[0.1]], [[0.1]], [[0.1]]], + [[[0.1]], [[0.1]], [[0.1]], [[0.1]]]]]) + conf = np.array([[[[[0.1]], [[0.9]]], [[[0.2]], [[0.8]]], + [[[0.3]], [[0.7]]], [[[0.4]], [[0.6]]]]]) + priorbox = np.array([ + 0.1, 0.1, 0.5, 0.5, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.6, 0.6, 0.1, + 0.1, 0.2, 0.2, 0.3, 0.3, 0.7, 0.7, 0.1, 0.1, 0.2, 0.2, 0.4, 0.4, + 0.8, 0.8, 0.1, 0.1, 0.2, 0.2 + ]) + + output = np.array([ + 0, 1, 0.68997443, 0.099959746, 0.099959746, 0.50804031, 0.50804031 + ]) + self.inputs = { + 'Loc': loc.astype('float32'), + 'Conf': conf.astype('float32'), + 'PriorBox': priorbox.astype('float32') + } + self.attrs = { + 'num_classes': self.num_classes, + 'top_k': self.top_k, + 'nms_top_k': self.nms_top_k, + 'background_label_id': self.background_label_id, + 'nms_threshold': self.nms_threshold, + 'confidence_threshold': self.confidence_threshold, + } + self.outputs = {'Out': output.astype('float32')} + + def test_check_output(self): + self.check_output() + + def init_test_case(self): + self.num_classes = 2 + self.top_k = 10 + self.nms_top_k = 20 + self.background_label_id = 0 + self.nms_threshold = 0.01 + self.confidence_threshold = 0.01 + + +if __name__ == '__main__': + unittest.main()