提交 664e19cc 编写于 作者: J juncaipeng 提交者: Xiaoyang LI

rewrite multiclass_nms according to fluid, test=develop (#1945)

* add ops for faster rcnn

* disable test for generate_proposals and roi_align, test=develop

* remove .swp file

* remove log in tensor slice

* finish the unit test for roi_align, test=develop

* add box_clip op and fix tensor slice bug

* remove add four op twice

* rewrite the implement for box_coder and sequence_expand, add faster_rcnn_test, test=develop

* fix test bug of box_clip in x86 server, test=develop

* rewrite multiclass_nms according to fluid, test=develop

* fix param load bug in box_coder and multiclass_nms op, test=develop

* fix value transfor error in multiclass_nms, test=develop
上级 088d741f
......@@ -78,19 +78,13 @@ void TestModel(const std::vector<Place>& valid_places,
auto* out = predictor.GetOutput(0);
auto* out_data = out->data<float>();
LOG(INFO) << "==========output data===============";
LOG(INFO) << out->dims();
for (int i = 0; i < out->numel(); i++) {
// LOG(INFO) << out_data[i];
LOG(INFO) << out_data[i];
}
/*
ASSERT_EQ(out->dims()[1], 6);
ASSERT_EQ(out->lod().size(), 1);
ASSERT_EQ(out->lod()[0].size(), 2);
ASSERT_EQ(out->lod()[0][0], 0);
ASSERT_EQ(out->lod()[0][1], 100);
*/
}
TEST(MobileNetV1_YoloV3, test_arm) {
TEST(Faster_RCNN, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
......
......@@ -6,4 +6,4 @@ add_kernel(reshape_compute_host Host basic SRCS reshape_compute.cc DEPS ${lite_k
add_kernel(multiclass_nms_compute_host Host basic SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps})
lite_cc_test(test_reshape_compute_host SRCS reshape_compute_test.cc DEPS reshape_compute_host any)
lite_cc_test(test_multiclass_nms_compute_host SRCS multiclass_nms_compute_test.cc DEPS multiclass_nms_compute_host any)
#lite_cc_test(test_multiclass_nms_compute_host SRCS multiclass_nms_compute_test.cc DEPS multiclass_nms_compute_host any)
......@@ -22,329 +22,365 @@ namespace lite {
namespace kernels {
namespace host {
template <typename dtype>
static bool sort_score_pair_descend(const std::pair<float, dtype>& pair1,
const std::pair<float, dtype>& pair2) {
template <class T>
bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2) {
return pair1.first > pair2.first;
}
template <typename dtype>
void get_max_score_index(const dtype* scores,
int num,
float threshold,
int top_k,
std::vector<std::pair<dtype, int>>* score_index_vec) {
//! Generate index score pairs.
for (int i = 0; i < num; ++i) {
template <class T>
static void GetMaxScoreIndex(const std::vector<T>& scores,
const T threshold,
int top_k,
std::vector<std::pair<T, int>>* sorted_indices) {
for (size_t i = 0; i < scores.size(); ++i) {
if (scores[i] > threshold) {
score_index_vec->push_back(std::make_pair(scores[i], i));
sorted_indices->push_back(std::make_pair(scores[i], i));
}
}
//! Sort the score pair according to the scores in descending order
std::stable_sort(score_index_vec->begin(),
score_index_vec->end(),
sort_score_pair_descend<int>);
//! Keep top_k scores if needed.
if (top_k > -1 && top_k < score_index_vec->size()) {
score_index_vec->resize(top_k);
// Sort the score pair according to the scores in descending order
std::stable_sort(sorted_indices->begin(),
sorted_indices->end(),
SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
sorted_indices->resize(top_k);
}
}
template <typename dtype>
dtype bbox_size(const dtype* bbox, bool normalized = true) {
if (bbox[2] < bbox[0] || bbox[3] < bbox[1]) {
// If bbox is invalid (e.g. xmax < xmin or ymax < ymin), return 0.
return dtype(0.);
template <class T>
static T BBoxArea(const T* box, const bool normalized) {
if (box[2] < box[0] || box[3] < box[1]) {
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
return static_cast<T>(0.);
} else {
const dtype width = bbox[2] - bbox[0];
const dtype height = bbox[3] - bbox[1];
const T w = box[2] - box[0];
const T h = box[3] - box[1];
if (normalized) {
return width * height;
return w * h;
} else {
// If bbox is not within range [0, 1].
return (width + 1) * (height + 1);
// If coordinate values are not within range [0, 1].
return (w + 1) * (h + 1);
}
}
}
template <typename dtype>
dtype jaccard_overlap(const dtype* bbox1, const dtype* bbox2) {
if (bbox2[0] > bbox1[2] || bbox2[2] < bbox1[0] || bbox2[1] > bbox1[3] ||
bbox2[3] < bbox1[1]) {
return dtype(0.);
template <class T>
static T JaccardOverlap(const T* box1, const T* box2, const bool normalized) {
if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
box2[3] < box1[1]) {
return static_cast<T>(0.);
} else {
const dtype inter_xmin = std::max(bbox1[0], bbox2[0]);
const dtype inter_ymin = std::max(bbox1[1], bbox2[1]);
const dtype inter_xmax = std::min(bbox1[2], bbox2[2]);
const dtype inter_ymax = std::min(bbox1[3], bbox2[3]);
const dtype inter_width = inter_xmax - inter_xmin;
const dtype inter_height = inter_ymax - inter_ymin;
const dtype inter_size = inter_width * inter_height;
const dtype bbox1_size = bbox_size(bbox1);
const dtype bbox2_size = bbox_size(bbox2);
return inter_size / (bbox1_size + bbox2_size - inter_size);
const T inter_xmin = std::max(box1[0], box2[0]);
const T inter_ymin = std::max(box1[1], box2[1]);
const T inter_xmax = std::min(box1[2], box2[2]);
const T inter_ymax = std::min(box1[3], box2[3]);
T norm = normalized ? static_cast<T>(0.) : static_cast<T>(1.);
T inter_w = inter_xmax - inter_xmin + norm;
T inter_h = inter_ymax - inter_ymin + norm;
const T inter_area = inter_w * inter_h;
const T bbox1_area = BBoxArea<T>(box1, normalized);
const T bbox2_area = BBoxArea<T>(box2, normalized);
return inter_area / (bbox1_area + bbox2_area - inter_area);
}
}
template <typename dtype>
void apply_nms_fast(const dtype* bboxes,
const dtype* scores,
int num,
float score_threshold,
float nms_threshold,
float eta,
int top_k,
std::vector<int>* indices) {
// Get top_k scores (with corresponding indices).
std::vector<std::pair<dtype, int>> score_index_vec;
get_max_score_index(scores, num, score_threshold, top_k, &score_index_vec);
template <class T>
T PolyIoU(const T* box1,
const T* box2,
const size_t box_size,
const bool normalized) {
LOG(FATAL) << "PolyIoU not implement.";
}
// Do nms.
float adaptive_threshold = nms_threshold;
indices->clear();
template <class T>
void SliceOneClass(const Tensor& items,
const int class_id,
Tensor* one_class_item) {
T* item_data = one_class_item->mutable_data<T>();
const T* items_data = items.data<T>();
const int64_t num_item = items.dims()[0];
const int64_t class_num = items.dims()[1];
if (items.dims().size() == 3) {
int64_t item_size = items.dims()[2];
for (int i = 0; i < num_item; ++i) {
std::memcpy(item_data + i * item_size,
items_data + i * class_num * item_size + class_id * item_size,
sizeof(T) * item_size);
}
} else {
for (int i = 0; i < num_item; ++i) {
item_data[i] = items_data[i * class_num + class_id];
}
}
}
while (score_index_vec.size() != 0) {
const int idx = score_index_vec.front().second;
template <typename T>
void NMSFast(const Tensor& bbox,
const Tensor& scores,
const T score_threshold,
const T nms_threshold,
const T eta,
const int64_t top_k,
std::vector<int>* selected_indices,
const bool normalized) {
// The total boxes for each instance.
int64_t num_boxes = bbox.dims()[0];
// 4: [xmin ymin xmax ymax]
// 8: [x1 y1 x2 y2 x3 y3 x4 y4]
// 16, 24, or 32: [x1 y1 x2 y2 ... xn yn], n = 8, 12 or 16
int64_t box_size = bbox.dims()[1];
std::vector<T> scores_data(num_boxes);
std::copy_n(scores.data<T>(), num_boxes, scores_data.begin());
std::vector<std::pair<T, int>> sorted_indices;
GetMaxScoreIndex(scores_data, score_threshold, top_k, &sorted_indices);
selected_indices->clear();
T adaptive_threshold = nms_threshold;
const T* bbox_data = bbox.data<T>();
while (sorted_indices.size() != 0) {
const int idx = sorted_indices.front().second;
bool keep = true;
for (int k = 0; k < indices->size(); ++k) {
for (size_t k = 0; k < selected_indices->size(); ++k) {
if (keep) {
const int kept_idx = (*indices)[k];
float overlap =
jaccard_overlap(bboxes + idx * 4, bboxes + kept_idx * 4);
const int kept_idx = (*selected_indices)[k];
T overlap = T(0.);
// 4: [xmin ymin xmax ymax]
if (box_size == 4) {
overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size,
normalized);
}
// 8: [x1 y1 x2 y2 x3 y3 x4 y4] or 16, 24, 32
if (box_size == 8 || box_size == 16 || box_size == 24 ||
box_size == 32) {
overlap = PolyIoU<T>(bbox_data + idx * box_size,
bbox_data + kept_idx * box_size,
box_size,
normalized);
}
keep = overlap <= adaptive_threshold;
} else {
break;
}
}
if (keep) {
indices->push_back(idx);
selected_indices->push_back(idx);
}
score_index_vec.erase(score_index_vec.begin());
sorted_indices.erase(sorted_indices.begin());
if (keep && eta < 1 && adaptive_threshold > 0.5) {
adaptive_threshold *= eta;
}
}
}
template <typename dtype>
void multiclass_nms(const dtype* bbox_cpu_data,
const dtype* conf_cpu_data,
std::vector<dtype>* result,
const std::vector<int>& priors,
int class_num,
int background_id,
int keep_topk,
int nms_topk,
float conf_thresh,
float nms_thresh,
float nms_eta,
bool share_location) {
int num_kept = 0;
std::vector<std::map<int, std::vector<int>>> all_indices;
int64_t conf_offset = 0;
int64_t bbox_offset = 0;
for (int i = 0; i < priors.size(); ++i) {
std::map<int, std::vector<int>> indices;
int num_det = 0;
int num_priors = priors[i];
int conf_idx = class_num * conf_offset;
int bbox_idx =
share_location ? bbox_offset * 4 : bbox_offset * 4 * class_num;
template <typename T>
void MultiClassNMS(const operators::MulticlassNmsParam& param,
const Tensor& scores,
const Tensor& bboxes,
const int scores_size,
std::map<int, std::vector<int>>* indices,
int* num_nmsed_out) {
int64_t background_label = param.background_label;
int64_t nms_top_k = param.nms_top_k;
int64_t keep_top_k = param.keep_top_k;
bool normalized = param.normalized;
T nms_threshold = static_cast<T>(param.nms_threshold);
T nms_eta = static_cast<T>(param.nms_eta);
T score_threshold = static_cast<T>(param.score_threshold);
int num_det = 0;
int64_t class_num = scores_size == 3 ? scores.dims()[0] : scores.dims()[1];
Tensor bbox_slice, score_slice;
for (int64_t c = 0; c < class_num; ++c) {
if (c == background_label) continue;
if (scores_size == 3) {
score_slice = scores.Slice<T>(c, c + 1);
bbox_slice = bboxes;
} else {
score_slice.Resize({scores.dims()[0], 1});
bbox_slice.Resize({scores.dims()[0], 4});
SliceOneClass<T>(scores, c, &score_slice);
SliceOneClass<T>(bboxes, c, &bbox_slice);
}
NMSFast(bbox_slice,
score_slice,
score_threshold,
nms_threshold,
nms_eta,
nms_top_k,
&((*indices)[c]),
normalized);
if (scores_size == 2) {
std::stable_sort((*indices)[c].begin(), (*indices)[c].end());
}
num_det += (*indices)[c].size();
}
for (int c = 0; c < class_num; ++c) {
if (c == background_id) {
// Ignore background class
continue;
*num_nmsed_out = num_det;
const T* scores_data = scores.data<T>();
if (keep_top_k > -1 && num_det > keep_top_k) {
const T* sdata;
std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
for (const auto& it : *indices) {
int label = it.first;
if (scores_size == 3) {
sdata = scores_data + label * scores.dims()[1];
} else {
score_slice.Resize({scores.dims()[0], 1});
SliceOneClass<T>(scores, label, &score_slice);
sdata = score_slice.data<T>();
}
const dtype* cur_conf_data = conf_cpu_data + conf_idx + c * num_priors;
const dtype* cur_bbox_data = bbox_cpu_data + bbox_idx;
if (!share_location) {
cur_bbox_data += c * num_priors * 4;
const std::vector<int>& label_indices = it.second;
for (size_t j = 0; j < label_indices.size(); ++j) {
int idx = label_indices[j];
score_index_pairs.push_back(
std::make_pair(sdata[idx], std::make_pair(label, idx)));
}
apply_nms_fast(cur_bbox_data,
cur_conf_data,
num_priors,
conf_thresh,
nms_thresh,
nms_eta,
nms_topk,
&(indices[c]));
num_det += indices[c].size();
}
if (keep_topk > -1 && num_det > keep_topk) {
std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
for (auto it = indices.begin(); it != indices.end(); ++it) {
int label = it->first;
const std::vector<int>& label_indices = it->second;
for (int j = 0; j < label_indices.size(); ++j) {
int idx = label_indices[j];
float score = conf_cpu_data[conf_idx + label * num_priors + idx];
score_index_pairs.push_back(
std::make_pair(score, std::make_pair(label, idx)));
}
}
// Keep top k results per image.
std::stable_sort(score_index_pairs.begin(),
score_index_pairs.end(),
sort_score_pair_descend<std::pair<int, int>>);
score_index_pairs.resize(keep_topk);
// Store the new indices.
std::map<int, std::vector<int>> new_indices;
for (int j = 0; j < score_index_pairs.size(); ++j) {
int label = score_index_pairs[j].second.first;
int idx = score_index_pairs[j].second.second;
new_indices[label].push_back(idx);
// Keep top k results per image.
std::stable_sort(score_index_pairs.begin(),
score_index_pairs.end(),
SortScorePairDescend<std::pair<int, int>>);
score_index_pairs.resize(keep_top_k);
// Store the new indices.
std::map<int, std::vector<int>> new_indices;
for (size_t j = 0; j < score_index_pairs.size(); ++j) {
int label = score_index_pairs[j].second.first;
int idx = score_index_pairs[j].second.second;
new_indices[label].push_back(idx);
}
if (scores_size == 2) {
for (const auto& it : new_indices) {
int label = it.first;
std::stable_sort(new_indices[label].begin(), new_indices[label].end());
}
all_indices.push_back(new_indices);
num_kept += keep_topk;
} else {
all_indices.push_back(indices);
num_kept += num_det;
}
conf_offset += num_priors;
bbox_offset += num_priors;
new_indices.swap(*indices);
*num_nmsed_out = keep_top_k;
}
}
if (num_kept == 0) {
(*result).clear();
return;
} else {
(*result).resize(num_kept * 7);
template <typename T>
void MultiClassOutput(const Tensor& scores,
const Tensor& bboxes,
const std::map<int, std::vector<int>>& selected_indices,
const int scores_size,
Tensor* outs) {
int64_t class_num = scores.dims()[1];
int64_t predict_dim = scores.dims()[1];
int64_t box_size = bboxes.dims()[1];
if (scores_size == 2) {
box_size = bboxes.dims()[2];
}
int64_t out_dim = box_size + 2;
auto* scores_data = scores.data<T>();
auto* bboxes_data = bboxes.data<T>();
auto* odata = outs->mutable_data<T>();
const T* sdata;
Tensor bbox;
bbox.Resize({scores.dims()[0], box_size});
int count = 0;
conf_offset = 0;
bbox_offset = 0;
for (int i = 0; i < priors.size(); ++i) {
int num_priors = priors[i];
int conf_idx = class_num * conf_offset;
int bbox_idx =
share_location ? bbox_offset * 4 : bbox_offset * 4 * class_num;
for (auto it = all_indices[i].begin(); it != all_indices[i].end(); ++it) {
int label = it->first;
std::vector<int>& indices = it->second;
const dtype* cur_conf_data =
conf_cpu_data + conf_idx + label * num_priors;
const dtype* cur_bbox_data = bbox_cpu_data + bbox_idx;
if (!share_location) {
cur_bbox_data += label * num_priors * 4;
}
for (int j = 0; j < indices.size(); ++j) {
int idx = indices[j];
(*result)[count * 7] = i;
(*result)[count * 7 + 1] = label;
(*result)[count * 7 + 2] = cur_conf_data[idx];
for (int k = 0; k < 4; ++k) {
(*result)[count * 7 + 3 + k] = cur_bbox_data[idx * 4 + k];
}
++count;
for (const auto& it : selected_indices) {
int label = it.first;
const std::vector<int>& indices = it.second;
if (scores_size == 2) {
SliceOneClass<T>(bboxes, label, &bbox);
} else {
sdata = scores_data + label * predict_dim;
}
for (size_t j = 0; j < indices.size(); ++j) {
int idx = indices[j];
odata[count * out_dim] = label; // label
const T* bdata;
if (scores_size == 3) {
bdata = bboxes_data + idx * box_size;
odata[count * out_dim + 1] = sdata[idx]; // score
} else {
bdata = bbox.data<T>() + idx * box_size;
odata[count * out_dim + 1] = *(scores_data + idx * class_num + label);
}
// xmin, ymin, xmax, ymax or multi-points coordinates
std::memcpy(odata + count * out_dim + 2, bdata, box_size * sizeof(T));
count++;
}
conf_offset += num_priors;
bbox_offset += num_priors;
}
}
void MulticlassNmsCompute::Run() {
auto& param = Param<operators::MulticlassNmsParam>();
// bbox shape : N, M, 4
// scores shape : N, C, M
const float* bbox_data = param.bbox_data->data<float>();
const float* conf_data = param.conf_data->data<float>();
CHECK_EQ(param.bbox_data->dims().production() % 4, 0);
std::vector<float> result;
int N = param.bbox_data->dims()[0];
int M = param.bbox_data->dims()[1];
std::vector<int> priors(N, M);
int class_num = param.conf_data->dims()[1];
int background_label = param.background_label;
int keep_top_k = param.keep_top_k;
int nms_top_k = param.nms_top_k;
float score_threshold = param.score_threshold;
float nms_threshold = param.nms_threshold;
float nms_eta = param.nms_eta;
bool share_location = param.share_location;
auto* boxes = param.bboxes;
auto* scores = param.scores;
auto* outs = param.out;
multiclass_nms(bbox_data,
conf_data,
&result,
priors,
class_num,
background_label,
keep_top_k,
nms_top_k,
score_threshold,
nms_threshold,
nms_eta,
share_location);
auto score_dims = scores->dims();
auto score_size = score_dims.size();
lite::LoD lod;
std::vector<uint64_t> lod_info;
lod_info.push_back(0);
std::vector<float> result_corrected;
int tmp_batch_id;
uint64_t num = 0;
for (int i = 0; i < result.size(); ++i) {
if (i == 0) {
tmp_batch_id = result[i];
}
if (i % 7 == 0) {
if (result[i] == tmp_batch_id) {
++num;
} else {
lod_info.push_back(num);
++num;
tmp_batch_id = result[i];
}
std::vector<std::map<int, std::vector<int>>> all_indices;
std::vector<uint64_t> batch_starts = {0};
int64_t batch_size = score_dims[0];
int64_t box_dim = boxes->dims()[2];
int64_t out_dim = box_dim + 2;
int num_nmsed_out = 0;
Tensor boxes_slice, scores_slice;
int n = score_size == 3 ? batch_size : boxes->lod().back().size() - 1;
for (int i = 0; i < n; ++i) {
if (score_size == 3) {
scores_slice = scores->Slice<float>(i, i + 1);
scores_slice.Resize({score_dims[1], score_dims[2]});
boxes_slice = boxes->Slice<float>(i, i + 1);
boxes_slice.Resize({score_dims[2], box_dim});
} else {
result_corrected.push_back(result[i]);
auto boxes_lod = boxes->lod().back();
scores_slice = scores->Slice<float>(boxes_lod[i], boxes_lod[i + 1]);
boxes_slice = boxes->Slice<float>(boxes_lod[i], boxes_lod[i + 1]);
}
std::map<int, std::vector<int>> indices;
MultiClassNMS<float>(
param, scores_slice, boxes_slice, score_size, &indices, &num_nmsed_out);
all_indices.push_back(indices);
batch_starts.push_back(batch_starts.back() + num_nmsed_out);
}
lod_info.push_back(num);
lod.push_back(lod_info);
if (result_corrected.empty()) {
lod.clear();
lod.push_back(std::vector<uint64_t>({0, 1}));
param.out->Resize({static_cast<int64_t>(1)});
param.out->mutable_data<float>()[0] = -1.;
param.out->set_lod(lod);
uint64_t num_kept = batch_starts.back();
if (num_kept == 0) {
outs->Resize({1, 1});
float* od = outs->mutable_data<float>();
od[0] = -1;
batch_starts = {0, 1};
} else {
param.out->Resize({static_cast<int64_t>(result_corrected.size() / 6), 6});
float* out = param.out->mutable_data<float>();
std::memcpy(
out, result_corrected.data(), sizeof(float) * result_corrected.size());
param.out->set_lod(lod);
outs->Resize({static_cast<int64_t>(num_kept), out_dim});
for (int i = 0; i < n; ++i) {
if (score_size == 3) {
scores_slice = scores->Slice<float>(i, i + 1);
boxes_slice = boxes->Slice<float>(i, i + 1);
scores_slice.Resize({score_dims[1], score_dims[2]});
boxes_slice.Resize({score_dims[2], box_dim});
} else {
auto boxes_lod = boxes->lod().back();
scores_slice = scores->Slice<float>(boxes_lod[i], boxes_lod[i + 1]);
boxes_slice = boxes->Slice<float>(boxes_lod[i], boxes_lod[i + 1]);
}
int64_t s = static_cast<int64_t>(batch_starts[i]);
int64_t e = static_cast<int64_t>(batch_starts[i + 1]);
if (e > s) {
Tensor out = outs->Slice<float>(s, e);
MultiClassOutput<float>(
scores_slice, boxes_slice, all_indices[i], score_dims.size(), &out);
}
}
}
}
LoD lod;
lod.emplace_back(batch_starts);
outs->set_lod(lod);
}
} // namespace host
} // namespace kernels
} // namespace lite
......
......@@ -139,18 +139,18 @@ void apply_nms_fast(const dtype* bboxes,
template <typename dtype>
void multiclass_nms_compute_ref(const operators::MulticlassNmsParam& param,
int class_num,
const std::vector<int>& priors,
bool share_location,
std::vector<float>* result) {
const std::vector<int>& priors = param.priors;
int class_num = param.class_num;
int background_id = param.background_label;
int keep_topk = param.keep_top_k;
int nms_topk = param.nms_top_k;
float conf_thresh = param.score_threshold;
float nms_thresh = param.nms_threshold;
float nms_eta = param.nms_eta;
bool share_location = param.share_location;
const dtype* bbox_data = param.bbox_data->data<const dtype>();
const dtype* conf_data = param.conf_data->data<const dtype>();
const dtype* bbox_data = param.bboxes->data<const dtype>();
const dtype* conf_data = param.scores->data<const dtype>();
dtype* out = param.out->mutable_data<dtype>();
(*result).clear();
......@@ -325,23 +325,21 @@ TEST(multiclass_nms_host, compute) {
for (int i = 0; i < conf_dim->production(); ++i) {
conf_data[i] = i * 1. / conf_dim->production();
}
param.bbox_data = &bbox;
param.conf_data = &conf;
param.bboxes = &bbox;
param.scores = &conf;
param.out = &out;
param.priors = priors;
param.class_num = class_num;
param.background_label = background_id;
param.keep_top_k = keep_topk;
param.nms_top_k = nms_topk;
param.score_threshold = conf_thresh;
param.nms_threshold = nms_thresh;
param.nms_eta = nms_eta;
param.share_location = share_location;
multiclass_nms.SetParam(param);
multiclass_nms.Run();
auto* out_data = out.mutable_data<float>();
out_ref.clear();
multiclass_nms_compute_ref<float>(param, &out_ref);
multiclass_nms_compute_ref<float>(
param, class_num, priors, share_location, &out_ref);
EXPECT_EQ(out.dims().production(), out_ref.size());
if (out.dims().production() == out_ref.size()) {
auto* out_ref_data = out_ref.data();
......
......@@ -89,7 +89,9 @@ bool BoxCoderOpLite::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) {
param_.code_type = opdesc.GetAttr<std::string>("code_type");
param_.box_normalized = opdesc.GetAttr<bool>("box_normalized");
param_.axis = opdesc.GetAttr<int>("axis");
if (opdesc.HasAttr("axis")) {
param_.axis = opdesc.GetAttr<int>("axis");
}
if (opdesc.HasAttr("variance")) {
param_.variance = opdesc.GetAttr<std::vector<float>>("variance");
......
......@@ -20,34 +20,55 @@ namespace lite {
namespace operators {
bool MulticlassNmsOpLite::CheckShape() const {
CHECK_OR_FALSE(param_.bbox_data);
CHECK_OR_FALSE(param_.conf_data);
CHECK_OR_FALSE(param_.bboxes);
CHECK_OR_FALSE(param_.scores);
CHECK_OR_FALSE(param_.out);
auto box_dims = param_.bboxes->dims();
auto score_dims = param_.scores->dims();
auto score_size = score_dims.size();
CHECK_OR_FALSE(score_size == 2 || score_size == 3);
CHECK_OR_FALSE(box_dims.size() == 3);
if (score_size == 3) {
CHECK_OR_FALSE(box_dims[2] == 4 || box_dims[2] == 8 || box_dims[2] == 16 ||
box_dims[2] == 24 || box_dims[2] == 32);
CHECK_OR_FALSE(box_dims[1] == score_dims[2]);
} else {
CHECK_OR_FALSE(box_dims[2] == 4);
CHECK_OR_FALSE(box_dims[1] == score_dims[1]);
}
return true;
}
bool MulticlassNmsOpLite::InferShape() const {
// param_.out->Resize(param_.loc_data->dims());
auto box_dims = param_.bboxes->dims();
auto score_dims = param_.scores->dims();
auto score_size = score_dims.size();
if (score_size == 3) {
param_.out->Resize({box_dims[1], box_dims[2], 3});
} else {
param_.out->Resize({-1, box_dims[2] + 2});
}
return true;
}
bool MulticlassNmsOpLite::AttachImpl(const cpp::OpDesc& opdesc,
lite::Scope* scope) {
auto Bbox_name = opdesc.Input("BBoxes").front();
auto Conf_name = opdesc.Input("Scores").front();
auto Out_name = opdesc.Output("Out").front();
param_.bbox_data = GetVar<lite::Tensor>(scope, Bbox_name);
param_.conf_data = GetVar<lite::Tensor>(scope, Conf_name);
param_.out = GetMutableVar<lite::Tensor>(scope, Out_name);
auto bboxes_name = opdesc.Input("BBoxes").front();
auto scores_name = opdesc.Input("Scores").front();
auto out_name = opdesc.Output("Out").front();
param_.bboxes = GetVar<lite::Tensor>(scope, bboxes_name);
param_.scores = GetVar<lite::Tensor>(scope, scores_name);
param_.out = GetMutableVar<lite::Tensor>(scope, out_name);
param_.background_label = opdesc.GetAttr<int>("background_label");
param_.keep_top_k = opdesc.GetAttr<int>("keep_top_k");
param_.nms_top_k = opdesc.GetAttr<int>("nms_top_k");
param_.score_threshold = opdesc.GetAttr<float>("score_threshold");
param_.nms_threshold = opdesc.GetAttr<float>("nms_threshold");
param_.nms_eta = opdesc.GetAttr<float>("nms_eta");
if (opdesc.HasAttr("share_location")) {
param_.share_location = opdesc.GetAttr<bool>("share_location");
if (opdesc.HasAttr("normalized")) {
param_.normalized = opdesc.GetAttr<bool>("normalized");
}
return true;
}
......
......@@ -499,18 +499,16 @@ struct BoxCoderParam {
/// ----------------------- multiclass_nms operators ----------------------
struct MulticlassNmsParam {
const lite::Tensor* bbox_data{};
const lite::Tensor* conf_data{};
lite::Tensor* out;
std::vector<int> priors;
int class_num;
int background_label;
const lite::Tensor* bboxes{};
const lite::Tensor* scores{};
lite::Tensor* out{};
int background_label{0};
float score_threshold{};
int nms_top_k{};
float nms_threshold{0.3};
float nms_eta{1.0};
int keep_top_k;
int nms_top_k;
float score_threshold;
float nms_threshold;
float nms_eta;
bool share_location{true};
bool normalized{true};
};
/// ----------------------- priorbox operators ----------------------
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册