提交 2bec4623 编写于 作者: J jiaopu 提交者: jackzhang235

add yolo_box in x86

上级 5009a3b6
...@@ -63,6 +63,7 @@ add_kernel(sequence_topk_avg_pooling_compute_x86 X86 basic SRCS sequence_topk_av ...@@ -63,6 +63,7 @@ add_kernel(sequence_topk_avg_pooling_compute_x86 X86 basic SRCS sequence_topk_av
add_kernel(search_fc_compute_x86 X86 basic SRCS search_fc_compute.cc DEPS ${lite_kernel_deps} search_fc) add_kernel(search_fc_compute_x86 X86 basic SRCS search_fc_compute.cc DEPS ${lite_kernel_deps} search_fc)
add_kernel(matmul_compute_x86 X86 basic SRCS matmul_compute.cc DEPS ${lite_kernel_deps} blas) add_kernel(matmul_compute_x86 X86 basic SRCS matmul_compute.cc DEPS ${lite_kernel_deps} blas)
add_kernel(yolo_box_compute_x86 X86 basic SRCS yolo_box_compute.cc DEPS ${lite_kernel_deps})
lite_cc_test(test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86) lite_cc_test(test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86)
lite_cc_test(test_mul_compute_x86 SRCS mul_compute_test.cc DEPS mul_compute_x86) lite_cc_test(test_mul_compute_x86 SRCS mul_compute_test.cc DEPS mul_compute_x86)
...@@ -101,3 +102,5 @@ lite_cc_test(test_var_conv_2d_compute_x86 SRCS var_conv_2d_compute_test.cc DEPS ...@@ -101,3 +102,5 @@ lite_cc_test(test_var_conv_2d_compute_x86 SRCS var_conv_2d_compute_test.cc DEPS
#lite_cc_test(test_attention_padding_mask_compute_x86 SRCS attention_padding_mask_compute_test.cc DEPS attention_padding_mask_compute_x86) #lite_cc_test(test_attention_padding_mask_compute_x86 SRCS attention_padding_mask_compute_test.cc DEPS attention_padding_mask_compute_x86)
lite_cc_test(test_sequence_arithmetic_compute_x86 SRCS sequence_arithmetic_compute_test.cc DEPS sequence_arithmetic_compute_x86) lite_cc_test(test_sequence_arithmetic_compute_x86 SRCS sequence_arithmetic_compute_test.cc DEPS sequence_arithmetic_compute_x86)
lite_cc_test(test_leaky_relu_compute_x86 SRCS leaky_relu_compute_test.cc DEPS activation_compute_x86) lite_cc_test(test_leaky_relu_compute_x86 SRCS leaky_relu_compute_test.cc DEPS activation_compute_x86)
lite_cc_test(test_yolo_box_compute_x86 SRCS yolo_box_compute_test.cc DEPS
yolo_box_compute_x86)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/yolo_box_compute.h"
REGISTER_LITE_KERNEL(yolo_box,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::YoloBoxCompute,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput("ImgSize",
{LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt32))})
.BindOutput("Boxes", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Scores", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <Eigen/Core>
#include "lite/core/kernel.h"
#include "lite/core/op_lite.h"
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
#include "lite/operators/yolo_box_op.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
template <typename T>
T sigmoid(T x) {
return 1.f / (1.f + expf(-x));
}
template <typename T>
void get_yolo_box(T* box,
const T* x,
const int* anchors,
int i,
int j,
int an_idx,
int grid_size,
int input_size,
int index,
int stride,
int img_height,
int img_width) {
box[0] = (i + sigmoid(x[index])) * img_height / grid_size;
box[1] = (j + sigmoid(x[index + stride])) * img_height / grid_size;
box[2] = std::exp(x[index + stride]) * anchors[2 * an_idx] * img_width /
input_size;
box[3] = std::exp(x[index + 3 * stride]) * anchors[2 * an_idx + 1] *
img_height / input_size;
}
inline int get_entry_index(int batch,
int an_idx,
int hw_idx,
int an_num,
int an_stride,
int stride,
int entry) {
return (batch * an_num + an_idx) * an_stride + entry * stride + hw_idx;
}
template <typename T>
void calc_detection_box(T* boxes,
T* box,
const int box_idx,
const int img_height,
const int img_width) {
boxes[box_idx] = box[0] - box[2] / 2;
boxes[box_idx + 1] = box[1] - box[3] / 2;
boxes[box_idx + 2] = box[0] + box[2] / 2;
boxes[box_idx + 3] = box[1] + box[3] / 2;
boxes[box_idx] = boxes[box_idx] > 0 ? boxes[box_idx] : static_cast<float>(0);
boxes[box_idx + 1] =
boxes[box_idx + 1] > 0 ? boxes[box_idx + 1] : static_cast<float>(0);
boxes[box_idx + 2] = boxes[box_idx + 2] < img_width - 1
? boxes[box_idx + 2]
: static_cast<float>(img_width - 1);
boxes[box_idx + 3] = boxes[box_idx + 3] < img_height - 1
? boxes[box_idx + 3]
: static_cast<float>(img_height - 1);
}
template <typename T>
void calc_label_score(T* scores,
const T* input,
const int label_idx,
const int score_idx,
const int class_num,
const T conf,
const int stride) {
for (int i = 0; i < class_num; i++) {
scores[score_idx + i] = conf * sigmoid(input[label_idx + i * stride]);
}
}
class YoloBoxCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::YoloBoxParam;
void Run() override {
auto& param = *param_.get_mutable<param_t>();
const int n = param.X->dims()[0];
const int h = param.X->dims()[2];
const int w = param.X->dims()[3];
const int b_num = param.Boxes->dims()[1];
const int an_num = param.anchors.size() / 2;
int X_size = param.downsample_ratio * h;
const int stride = h * w;
const int an_stride = (param.class_num + 5) * stride;
auto anchors_data = param.anchors.data();
const float* X_data = param.X->data<float>();
int* ImgSize_data = param.ImgSize->mutable_data<int>();
float* Boxes_data = param.Boxes->mutable_data<float>();
float* Scores_data = param.Scores->mutable_data<float>();
float box[4];
for (int i = 0; i < n; i++) {
int img_height = ImgSize_data[2 * i];
int img_width = ImgSize_data[2 * i + 1];
for (int j = 0; j < an_num; j++) {
for (int k = 0; k < h; k++) {
for (int l = 0; l < w; l++) {
int obj_idx =
get_entry_index(i, j, k * w + l, an_num, an_stride, stride, 4);
float conf = sigmoid(X_data[obj_idx]);
if (conf < param.conf_thresh) {
continue;
}
int box_idx =
get_entry_index(i, j, k * w + l, an_num, an_stride, stride, 0);
get_yolo_box(box,
X_data,
anchors_data,
l,
k,
j,
h,
X_size,
box_idx,
stride,
img_height,
img_width);
box_idx = (i * b_num + j * stride + k * w + l) * 4;
calc_detection_box(Boxes_data, box, box_idx, img_height, img_width);
int label_idx =
get_entry_index(i, j, k * w + l, an_num, an_stride, stride, 5);
int score_idx =
(i * b_num + j * stride + k * w + l) * param.class_num;
calc_label_score(Scores_data,
X_data,
label_idx,
score_idx,
param.class_num,
conf,
stride);
}
}
}
}
}
virtual ~YoloBoxCompute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/yolo_box_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <memory>
#include <utility>
#include <vector>
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
namespace test {
float sigmoid_base(float x) { return 1.f / (1.f + expf(-x)); }
void get_yolo_box_base(float* box,
const float* x,
const int* anchors,
int i,
int j,
int an_idx,
int grid_size,
int input_size,
int index,
int stride,
int img_height,
int img_width) {
box[0] = (i + sigmoid_base(x[index])) * img_width / grid_size;
box[1] = (j + sigmoid_base(x[index + stride])) * img_height / grid_size;
box[2] = std::exp(x[index + 2 * stride]) * anchors[2 * an_idx] * img_width /
input_size;
box[3] = std::exp(x[index + 3 * stride]) * anchors[2 * an_idx + 1] *
img_height / input_size;
}
int get_entry_index_base(int batch,
int an_idx,
int hw_idx,
int an_num,
int an_stride,
int stride,
int entry) {
return (batch * an_num + an_idx) * an_stride + entry * stride + hw_idx;
}
void calc_detection_box_base(float* boxes,
float* box,
const int box_idx,
const int img_height,
const int img_width) {
boxes[box_idx] = box[0] - box[2] / 2;
boxes[box_idx + 1] = box[1] - box[3] / 2;
boxes[box_idx + 2] = box[0] + box[2] / 2;
boxes[box_idx + 3] = box[1] + box[3] / 2;
boxes[box_idx] = boxes[box_idx] > 0 ? boxes[box_idx] : static_cast<float>(0);
boxes[box_idx + 1] =
boxes[box_idx + 1] > 0 ? boxes[box_idx + 1] : static_cast<float>(0);
boxes[box_idx + 2] = boxes[box_idx + 2] < img_width - 1
? boxes[box_idx + 2]
: static_cast<float>(img_width - 1);
boxes[box_idx + 3] = boxes[box_idx + 3] < img_height - 1
? boxes[box_idx + 3]
: static_cast<float>(img_height - 1);
}
void calc_label_score_base(float* scores,
const float* input,
const int label_idx,
const int score_idx,
const int class_num,
const float conf,
const int stride) {
for (int i = 0; i < class_num; i++) {
scores[score_idx + i] = conf * sigmoid_base(input[label_idx + i * stride]);
}
}
void RunBaseline(const lite::Tensor* X,
const lite::Tensor* ImgSize,
lite::Tensor* Boxes,
lite::Tensor* Scores,
int class_num,
float conf_thresh,
int downsample_ratio,
std::vector<int> anchors) {
auto* in = X;
auto* imgsize = ImgSize;
const int n = in->dims()[0];
const int h = in->dims()[2];
const int w = in->dims()[3];
const int an_num = anchors.size() / 2;
int in_size = downsample_ratio * h;
int box_num = in->dims()[2] * in->dims()[3] * an_num;
Boxes->Resize({in->dims()[0], box_num, 4});
Scores->Resize({in->dims()[0], box_num, class_num});
auto* boxes = Boxes;
auto* scores = Scores;
const int b_num = boxes->dims()[0];
const int stride = h * w;
const int an_stride = (class_num + 5) * stride;
auto anchors_data = anchors.data();
const float* in_data = in->data<float>();
const int* imgsize_data = imgsize->data<int>();
float* boxes_data = boxes->mutable_data<float>();
float* scores_data = scores->mutable_data<float>();
float box[4];
for (int i = 0; i < n; i++) {
int img_height = imgsize_data[2 * i];
int img_width = imgsize_data[2 * i + 1];
for (int j = 0; j < an_num; j++) {
for (int k = 0; k < h; k++) {
for (int l = 0; l < w; l++) {
int obj_idx = test::get_entry_index_base(
i, j, k * w + l, an_num, an_stride, stride, 4);
float conf = test::sigmoid_base(in_data[obj_idx]);
if (conf < conf_thresh) {
continue;
}
int box_idx = test::get_entry_index_base(
i, j, k * w + l, an_num, an_stride, stride, 0);
test::get_yolo_box_base(box,
in_data,
anchors_data,
l,
k,
j,
h,
in_size,
box_idx,
stride,
img_height,
img_width);
box_idx = (i * b_num + j * stride + k * w + l) * 4;
test::calc_detection_box_base(
boxes_data, box, box_idx, img_height, img_width);
int label_idx = test::get_entry_index_base(
i, j, k * w + l, an_num, an_stride, stride, 5);
int score_idx = (i * b_num + j * stride + k * w + l) * class_num;
test::calc_label_score_base(scores_data,
in_data,
label_idx,
score_idx,
class_num,
conf,
stride);
}
}
}
}
}
} // namespace test
TEST(yolo_box_x86, retrive_op) {
auto yolo_box =
KernelRegistry::Global().Create<TARGET(kX86), PRECISION(kFloat)>(
"yolo_box");
ASSERT_FALSE(yolo_box.empty());
ASSERT_TRUE(yolo_box.front());
}
TEST(yolo_box_x86, init) {
YoloBoxCompute<float> yolo_box;
ASSERT_EQ(yolo_box.precision(), PRECISION(kFloat));
ASSERT_EQ(yolo_box.target(), TARGET(kX86));
}
TEST(yolo_box_x86, run_test) {
lite::Tensor X, ImgSize, Boxes, Scores, Boxes_base, Scores_base;
YoloBoxCompute<float> yolo_box;
operators::YoloBoxParam param;
int s = 3, cls = 4;
int n = 1, c = s * (5 + cls), h = 16, w = 16;
param.anchors = {2, 3, 4, 5, 8, 10};
param.downsample_ratio = 2;
param.conf_thresh = 0.5;
param.class_num = cls;
int m = h * w * param.anchors.size() / 2;
X.Resize({n, c, h, w});
ImgSize.Resize({1, 2});
Boxes.Resize({n, m, 4});
Boxes_base.Resize({n, m, 4});
Scores.Resize({n, cls, m});
Scores_base.Resize({n, cls, m});
auto x_data = X.mutable_data<float>();
auto imgsize_data = ImgSize.mutable_data<float>();
auto boxes_data = Boxes.mutable_data<float>();
auto scores_data = Scores.mutable_data<float>();
auto boxes_base_data = Boxes_base.mutable_data<float>();
auto scores_base_data = Scores_base.mutable_data<float>();
for (int i = 0; i < X.dims().production(); i++) {
x_data[i] = static_cast<float>(i);
}
for (int i = 0; i < ImgSize.dims().production(); i++) {
imgsize_data[i] = static_cast<float>(i);
}
test::RunBaseline(&X,
&ImgSize,
&Boxes_base,
&Scores_base,
param.class_num,
param.conf_thresh,
param.downsample_ratio,
param.anchors);
param.X = &X;
param.ImgSize = &ImgSize;
param.Boxes = &Boxes;
param.Scores = &Scores;
std::unique_ptr<KernelContext> ctx(new KernelContext);
ctx->As<X86Context>();
yolo_box.SetContext(std::move(ctx));
yolo_box.SetParam(std::move(param));
yolo_box.Run();
for (int i = 0; i < Boxes.dims().production(); i++) {
EXPECT_NEAR(boxes_data[i], boxes_base_data[i], 1e-5);
}
for (int i = 0; i < Scores.dims().production(); i++) {
EXPECT_NEAR(scores_data[i], scores_base_data[i], 1e-5);
}
}
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_KERNEL(yolo_box, kX86, kFloat, kNCHW, def);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册