提交 c816121d 编写于 作者: B baiyf 提交者: qingqing01

optimized iou_similarity_op (#10231)

上级 6d934560
......@@ -41,22 +41,24 @@ struct IOUSimilarityFunctor {
IOUSimilarityFunctor(const T* x, const T* y, T* z, int cols)
: x_(x), y_(y), z_(z), cols_(static_cast<size_t>(cols)) {}
inline HOSTDEVICE void operator()(size_t row_id) const {
inline HOSTDEVICE void operator()(size_t tid) const {
size_t row_id = tid / cols_;
size_t col_id = tid % cols_;
T x_min1 = x_[row_id * 4];
T y_min1 = x_[row_id * 4 + 1];
T x_max1 = x_[row_id * 4 + 2];
T y_max1 = x_[row_id * 4 + 3];
for (size_t i = 0; i < cols_; ++i) {
T x_min2 = y_[i * 4];
T y_min2 = y_[i * 4 + 1];
T x_max2 = y_[i * 4 + 2];
T y_max2 = y_[i * 4 + 3];
T x_min2 = y_[col_id * 4];
T y_min2 = y_[col_id * 4 + 1];
T x_max2 = y_[col_id * 4 + 2];
T y_max2 = y_[col_id * 4 + 3];
T sim = IOUSimilarity(x_min1, y_min1, x_max1, y_max1, x_min2, y_min2,
x_max2, y_max2);
z_[row_id * cols_ + i] = sim;
}
z_[row_id * cols_ + col_id] = sim;
}
const T* x_;
const T* y_;
......@@ -81,7 +83,7 @@ class IOUSimilarityKernel : public framework::OpKernel<T> {
out->mutable_data<T>(ctx.GetPlace()), y_n);
platform::ForRange<DeviceContext> for_range(
static_cast<const DeviceContext&>(ctx.device_context()), x_n);
static_cast<const DeviceContext&>(ctx.device_context()), x_n * y_n);
for_range(functor);
}
}; // namespace operators
......
......@@ -14,6 +14,7 @@
import unittest
import numpy as np
import numpy.random as random
import sys
import math
from op_test import OpTest
......@@ -25,14 +26,27 @@ class TestIOUSimilarityOp(OpTest):
def setUp(self):
self.op_type = "iou_similarity"
self.boxes1 = np.array(
[[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]]).astype('float32')
self.boxes2 = np.array([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
[0.0, 0.0, 20.0, 20.0]]).astype('float32')
self.output = np.array(
[[2.0 / 16.0, 0, 6.0 / 400.0],
[1.0 / 16.0, 0.0, 5.0 / 400.0]]).astype('float32')
self.boxes1 = random.rand(2, 4).astype('float32')
self.boxes2 = random.rand(3, 4).astype('float32')
self.output = random.rand(2, 3).astype('float32')
for row in range(self.boxes1.shape[0]):
for col in range(self.boxes2.shape[0]):
xmin1, ymin1, xmax1, ymax1 = self.boxes1[row]
xmin2, ymin2, xmax2, ymax2 = self.boxes2[col]
area1 = (ymax1 - ymin1) * (xmax1 - xmin1)
area2 = (ymax2 - ymin2) * (xmax2 - xmin2)
inter_xmax = min(xmax1, xmax2)
inter_ymax = min(ymax1, ymax2)
inter_xmin = max(xmin1, xmin2)
inter_ymin = max(ymin1, ymin2)
inter_height = inter_ymax - inter_ymin
inter_width = inter_xmax - inter_xmin
inter_height = max(inter_height, 0)
inter_width = max(inter_width, 0)
inter_area = inter_width * inter_height
union_area = area1 + area2 - inter_area
sim_score = inter_area / union_area
self.output[row, col] = sim_score
self.inputs = {'X': self.boxes1, 'Y': self.boxes2}
self.outputs = {'Out': self.output}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册