From 848decd5216938f140e9582d4fb8f22d17a7f484 Mon Sep 17 00:00:00 2001 From: zhangwen31 Date: Fri, 25 Sep 2020 06:47:38 +0000 Subject: [PATCH] [cuda][kernel]fix: yolo_box cuda kernel updated with paddle-fluid --- lite/kernels/bm/bridges/yolo_box_op.cc | 1 + lite/kernels/cuda/yolo_box_compute.cu | 36 ++++++++++++++++------ lite/kernels/cuda/yolo_box_compute_test.cc | 36 ++++++++++++++++------ 3 files changed, 55 insertions(+), 18 deletions(-) diff --git a/lite/kernels/bm/bridges/yolo_box_op.cc b/lite/kernels/bm/bridges/yolo_box_op.cc index c1f8fa100f..0029e654ff 100644 --- a/lite/kernels/bm/bridges/yolo_box_op.cc +++ b/lite/kernels/bm/bridges/yolo_box_op.cc @@ -26,6 +26,7 @@ namespace lite { namespace subgraph { namespace bm { +// fixme: yolo box has updated, check arm kernel to get more info int YoloBoxConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(ctx != nullptr); CHECK(op != nullptr); diff --git a/lite/kernels/cuda/yolo_box_compute.cu b/lite/kernels/cuda/yolo_box_compute.cu index 9ece6cec77..b50eabfba1 100644 --- a/lite/kernels/cuda/yolo_box_compute.cu +++ b/lite/kernels/cuda/yolo_box_compute.cu @@ -49,9 +49,12 @@ __host__ __device__ inline void GetYoloBox(T* box, int index, int stride, int img_height, - int img_width) { - box[0] = (i + sigmoid(x[index])) * img_width / grid_size; - box[1] = (j + sigmoid(x[index + stride])) * img_height / grid_size; + int img_width, + float scale, + float bias) { + box[0] = (i + sigmoid(x[index]) * scale + bias) * img_width / grid_size; + box[1] = (j + sigmoid(x[index + stride]) * scale + bias) * img_height / + grid_size; box[2] = std::exp(x[index + 2 * stride]) * anchors[2 * an_idx] * img_width / input_size; box[3] = std::exp(x[index + 3 * stride]) * anchors[2 * an_idx + 1] * @@ -63,12 +66,16 @@ __host__ __device__ inline void CalcDetectionBox(T* boxes, T* box, const int box_idx, const int img_height, - const int img_width) { + const int img_width, + bool clip_bbox) { boxes[box_idx] = box[0] - box[2] / 2; boxes[box_idx + 1] = box[1] - box[3] / 2; boxes[box_idx + 2] = box[0] + box[2] / 2; boxes[box_idx + 3] = box[1] + box[3] / 2; + if (!clip_bbox) { + return; + } boxes[box_idx] = boxes[box_idx] > 0 ? boxes[box_idx] : static_cast(0); boxes[box_idx + 1] = boxes[box_idx + 1] > 0 ? boxes[box_idx + 1] : static_cast(0); @@ -106,7 +113,10 @@ __global__ void KeYoloBoxFw(const T* input, const int an_num, const int class_num, const int box_num, - int input_size) { + int input_size, + bool clip_bbox, + float scale, + float bias) { int tid = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; T box[4]; @@ -141,9 +151,11 @@ __global__ void KeYoloBoxFw(const T* input, box_idx, grid_num, img_height, - img_width); + img_width, + scale, + bias); box_idx = (i * box_num + j * grid_num + k * w + l) * 4; - CalcDetectionBox(boxes, box, box_idx, img_height, img_width); + CalcDetectionBox(boxes, box, box_idx, img_height, img_width, clip_bbox); int label_idx = GetEntryIndex(i, j, k * w + l, an_num, an_stride, grid_num, 5); @@ -152,7 +164,7 @@ __global__ void KeYoloBoxFw(const T* input, scores, input, label_idx, score_idx, class_num, conf, grid_num); } } -// fixme: yolo box has updated, check arm kernel to get more info + void YoloBoxCompute::Run() { auto& param = this->Param(); auto& ctx = this->ctx_->template As(); @@ -166,6 +178,9 @@ void YoloBoxCompute::Run() { int class_num = param.class_num; float conf_thresh = param.conf_thresh; int downsample_ratio = param.downsample_ratio; + bool clip_bbox = param.clip_bbox; + float scale_x_y = param.scale_x_y; + float bias = -0.5 * (scale_x_y - 1.); const float* input = X->data(); const int* imgsize = ImgSize->data(); @@ -207,7 +222,10 @@ void YoloBoxCompute::Run() { an_num, class_num, box_num, - input_size); + input_size, + clip_bbox, + scale_x_y, + bias); cudaError_t error = cudaGetLastError(); if (error != cudaSuccess) LOG(INFO) << cudaGetErrorString(error); } diff --git a/lite/kernels/cuda/yolo_box_compute_test.cc b/lite/kernels/cuda/yolo_box_compute_test.cc index 994251b249..821e8cb09c 100644 --- a/lite/kernels/cuda/yolo_box_compute_test.cc +++ b/lite/kernels/cuda/yolo_box_compute_test.cc @@ -35,9 +35,12 @@ inline static void get_yolo_box(float* box, int index, int stride, int img_height, - int img_width) { - box[0] = (i + sigmoid(x[index])) * img_width / grid_size; - box[1] = (j + sigmoid(x[index + stride])) * img_height / grid_size; + int img_width, + float scale, + float bias) { + box[0] = (i + sigmoid(x[index]) * scale + bias) * img_width / grid_size; + box[1] = + (j + sigmoid(x[index + stride] * scale + bias)) * img_height / grid_size; box[2] = std::exp(x[index + 2 * stride]) * anchors[2 * an_idx] * img_width / input_size; box[3] = std::exp(x[index + 3 * stride]) * anchors[2 * an_idx + 1] * @@ -58,12 +61,15 @@ inline static void calc_detection_box(float* boxes, float* box, const int box_idx, const int img_height, - const int img_width) { + const int img_width, + bool clip_bbox) { boxes[box_idx] = box[0] - box[2] / 2; boxes[box_idx + 1] = box[1] - box[3] / 2; boxes[box_idx + 2] = box[0] + box[2] / 2; boxes[box_idx + 3] = box[1] + box[3] / 2; - + if (!clip_bbox) { + return; + } boxes[box_idx] = boxes[box_idx] > 0 ? boxes[box_idx] : static_cast(0); boxes[box_idx + 1] = boxes[box_idx + 1] > 0 ? boxes[box_idx + 1] : static_cast(0); @@ -100,7 +106,10 @@ static void YoloBoxRef(const T* input, const int an_num, const int class_num, const int box_num, - int input_size) { + int input_size, + bool clip_bbox, + float scale, + float bias) { const int stride = h * w; const int an_stride = (class_num + 5) * stride; float box[4]; @@ -132,9 +141,12 @@ static void YoloBoxRef(const T* input, box_idx, stride, img_height, - img_width); + img_width, + scale, + bias); box_idx = (i * box_num + j * stride + k * w + l) * 4; - calc_detection_box(boxes, box, box_idx, img_height, img_width); + calc_detection_box( + boxes, box, box_idx, img_height, img_width, clip_bbox); int label_idx = get_entry_index(i, j, k * w + l, an_num, an_stride, stride, 5); @@ -163,6 +175,9 @@ TEST(yolo_box, normal) { param.downsample_ratio = 2; param.conf_thresh = 0.5; param.class_num = cls; + param.clip_bbox = true; + param.scale_x_y = 1.0; + float bias = -0.5 * (param.scale_x_y - 1.); int m = h * w * param.anchors.size() / 2; x.Resize({n, c, h, w}); @@ -240,7 +255,10 @@ TEST(yolo_box, normal) { param.anchors.size() / 2, cls, m, - param.downsample_ratio * h); + param.downsample_ratio * h, + param.clip_bbox, + param.scale_x_y, + bias); for (int i = 0; i < boxes.numel(); i++) { EXPECT_NEAR(boxes_cpu_data[i], boxes_ref_data[i], 1e-5); -- GitLab