未验证 提交 0947171b 编写于 作者: C cnn 提交者: GitHub

[dev] s2anet fix export and deploy (#2919)

* s2anet fix export and deploy

* remove redundant code

* fix cpp for s2anet deploy

* fix bug of get_categories

* rename poly_to_rbox to poly2rbox

* add some comment for function
上级 912833f2
......@@ -9,7 +9,7 @@ TrainReader:
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- RboxPadBatch: {pad_to_stride: 32, pad_gt: true}
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: true
drop_last: true
......@@ -22,7 +22,7 @@ EvalReader:
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- RboxPadBatch: {pad_to_stride: 32, pad_gt: false}
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false
......@@ -36,7 +36,7 @@ TestReader:
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- RboxPadBatch: {pad_to_stride: 32, pad_gt: false}
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false
......@@ -27,7 +27,7 @@ parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
if parent_path not in sys.path:
sys.path.append(parent_path)
from ppdet.modeling.bbox_utils import poly_to_rbox
from ppdet.modeling.bbox_utils import poly2rbox
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
......@@ -118,7 +118,7 @@ def dota_2_coco(image_dir,
# rbox or bbox
if is_obb:
polys = [single_obj_poly]
rboxs = poly_to_rbox(polys)
rboxs = poly2rbox(polys)
rbox = rboxs[0].tolist()
single_obj['bbox'] = rbox
single_obj['area'] = rbox[2] * rbox[3]
......
......@@ -51,7 +51,8 @@ std::vector<int> GenerateColorMap(int num_class);
cv::Mat VisualizeResult(const cv::Mat& img,
const std::vector<ObjectResult>& results,
const std::vector<std::string>& lable_list,
const std::vector<int>& colormap);
const std::vector<int>& colormap,
const bool is_rbox);
class ObjectDetector {
......@@ -120,7 +121,8 @@ class ObjectDetector {
// Postprocess result
void Postprocess(
const cv::Mat& raw_mat,
std::vector<ObjectResult>* result);
std::vector<ObjectResult>* result,
bool is_rbox);
std::shared_ptr<Predictor> predictor_;
Preprocessor preprocessor_;
......
......@@ -195,23 +195,42 @@ void PredictVideo(const std::string& video_path,
// Capture all frames and do inference
cv::Mat frame;
int frame_id = 0;
bool is_rbox = false;
while (capture.read(frame)) {
if (frame.empty()) {
break;
}
det->Predict(frame, 0.5, 0, 1, &result, &det_times);
cv::Mat out_im = PaddleDetection::VisualizeResult(
frame, result, labels, colormap);
for (const auto& item : result) {
printf("In frame id %d, we detect: class=%d confidence=%.2f rect=[%d %d %d %d]\n",
frame_id,
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3]);
}
if (item.rect.size() > 6){
is_rbox = true;
printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3],
item.rect[4],
item.rect[5],
item.rect[6],
item.rect[7]);
}
else{
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3]);
}
}
cv::Mat out_im = PaddleDetection::VisualizeResult(
frame, result, labels, colormap, is_rbox);
video_out.write(out_im);
frame_id += 1;
}
......@@ -231,24 +250,41 @@ void PredictImage(const std::vector<std::string> all_img_list,
// Store all detected result
std::vector<PaddleDetection::ObjectResult> result;
std::vector<double> det_times;
bool is_rbox = false;
if (run_benchmark) {
det->Predict(im, threshold, 10, 10, &result, &det_times);
} else {
det->Predict(im, 0.5, 0, 1, &result, &det_times);
for (const auto& item : result) {
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
if (item.rect.size() > 6){
is_rbox = true;
printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3],
item.rect[4],
item.rect[5],
item.rect[6],
item.rect[7]);
}
else{
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3]);
}
}
// Visualization result
auto labels = det->GetLabelList();
auto colormap = PaddleDetection::GenerateColorMap(labels.size());
cv::Mat vis_img = PaddleDetection::VisualizeResult(
im, result, labels, colormap);
im, result, labels, colormap, is_rbox);
std::vector<int> compression_params;
compression_params.push_back(CV_IMWRITE_JPEG_QUALITY);
compression_params.push_back(95);
......
......@@ -94,13 +94,10 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
cv::Mat VisualizeResult(const cv::Mat& img,
const std::vector<ObjectResult>& results,
const std::vector<std::string>& lable_list,
const std::vector<int>& colormap) {
const std::vector<int>& colormap,
const bool is_rbox=false) {
cv::Mat vis_img = img.clone();
for (int i = 0; i < results.size(); ++i) {
int w = results[i].rect[1] - results[i].rect[0];
int h = results[i].rect[3] - results[i].rect[2];
cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[2], w, h);
// Configure color and text size
std::ostringstream oss;
oss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
......@@ -120,17 +117,37 @@ cv::Mat VisualizeResult(const cv::Mat& img,
thickness,
nullptr);
cv::Point origin;
origin.x = roi.x;
origin.y = roi.y;
if (is_rbox)
{
// Draw object, text, and background
for (int k=0; k<4; k++)
{
cv::Point pt1 = cv::Point(results[i].rect[(k*2)%8],
results[i].rect[(k*2+1)%8]);
cv::Point pt2 = cv::Point(results[i].rect[(k*2+2)%8],
results[i].rect[(k*2+3)%8]);
cv::line(vis_img, pt1, pt2, roi_color, 2);
}
}
else
{
int w = results[i].rect[1] - results[i].rect[0];
int h = results[i].rect[3] - results[i].rect[2];
cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[2], w, h);
// Draw roi object, text, and background
cv::rectangle(vis_img, roi, roi_color, 2);
}
origin.x = results[i].rect[0];
origin.y = results[i].rect[1];
// Configure text background
cv::Rect text_back = cv::Rect(results[i].rect[0],
results[i].rect[2] - text_size.height,
text_size.width,
text_size.height);
// Draw roi object, text, and background
cv::rectangle(vis_img, roi, roi_color, 2);
// Draw text, and background
cv::rectangle(vis_img, text_back, roi_color, -1);
cv::putText(vis_img,
text,
......@@ -152,7 +169,8 @@ void ObjectDetector::Preprocess(const cv::Mat& ori_im) {
void ObjectDetector::Postprocess(
const cv::Mat& raw_mat,
std::vector<ObjectResult>* result) {
std::vector<ObjectResult>* result,
bool is_rbox=false) {
result->clear();
int rh = 1;
int rw = 1;
......@@ -161,24 +179,52 @@ void ObjectDetector::Postprocess(
rw = raw_mat.cols;
}
int total_size = output_data_.size() / 6;
for (int j = 0; j < total_size; ++j) {
// Class id
int class_id = static_cast<int>(round(output_data_[0 + j * 6]));
// Confidence score
float score = output_data_[1 + j * 6];
int xmin = (output_data_[2 + j * 6] * rw);
int ymin = (output_data_[3 + j * 6] * rh);
int xmax = (output_data_[4 + j * 6] * rw);
int ymax = (output_data_[5 + j * 6] * rh);
int wd = xmax - xmin;
int hd = ymax - ymin;
if (score > threshold_ && class_id > -1) {
ObjectResult result_item;
result_item.rect = {xmin, xmax, ymin, ymax};
result_item.class_id = class_id;
result_item.confidence = score;
result->push_back(result_item);
if (is_rbox)
{
int total_size = output_data_.size() / 10;
for (int j = 0; j < total_size; ++j) {
// Class id
int class_id = static_cast<int>(round(output_data_[0 + j * 10]));
// Confidence score
float score = output_data_[1 + j * 10];
int x1 = (output_data_[2 + j * 10] * rw);
int y1 = (output_data_[3 + j * 10] * rh);
int x2 = (output_data_[4 + j * 10] * rw);
int y2 = (output_data_[5 + j * 10] * rh);
int x3 = (output_data_[6 + j * 10] * rw);
int y3 = (output_data_[7 + j * 10] * rh);
int x4 = (output_data_[8 + j * 10] * rw);
int y4 = (output_data_[9 + j * 10] * rh);
if (score > threshold_ && class_id > -1) {
ObjectResult result_item;
result_item.rect = {x1, y1, x2, y2, x3, y3, x4, y4};
result_item.class_id = class_id;
result_item.confidence = score;
result->push_back(result_item);
}
}
}
else
{
int total_size = output_data_.size() / 6;
for (int j = 0; j < total_size; ++j) {
// Class id
int class_id = static_cast<int>(round(output_data_[0 + j * 6]));
// Confidence score
float score = output_data_[1 + j * 6];
int xmin = (output_data_[2 + j * 6] * rw);
int ymin = (output_data_[3 + j * 6] * rh);
int xmax = (output_data_[4 + j * 6] * rw);
int ymax = (output_data_[5 + j * 6] * rh);
int wd = xmax - xmin;
int hd = ymax - ymin;
if (score > threshold_ && class_id > -1) {
ObjectResult result_item;
result_item.rect = {xmin, xmax, ymin, ymax};
result_item.class_id = class_id;
result_item.confidence = score;
result->push_back(result_item);
}
}
}
}
......@@ -231,6 +277,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
out_tensor->CopyToCpu(output_data_.data());
}
bool is_rbox = false;
auto inference_start = std::chrono::steady_clock::now();
for (int i = 0; i < repeats; i++)
{
......@@ -244,6 +291,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
for (int j = 0; j < output_shape.size(); ++j) {
output_size *= output_shape[j];
}
is_rbox = output_shape[output_shape.size()-1] % 10 == 0;
if (output_size < 6) {
std::cerr << "[WARNING] No object detected." << std::endl;
......@@ -254,7 +302,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
auto inference_end = std::chrono::steady_clock::now();
auto postprocess_start = std::chrono::steady_clock::now();
// Postprocessing result
Postprocess(im, result);
Postprocess(im, result, is_rbox);
auto postprocess_end = std::chrono::steady_clock::now();
std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start;
......@@ -263,6 +311,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
times->push_back(double(inference_diff.count() / repeats * 1000));
std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start;
times->push_back(double(postprocess_diff.count() * 1000));
}
std::vector<int> GenerateColorMap(int num_class) {
......
......@@ -37,6 +37,7 @@ SUPPORT_MODELS = {
'FCOS',
'SOLOv2',
'TTFNet',
'S2ANet',
}
......
......@@ -130,22 +130,29 @@ def draw_box(im, np_boxes, labels, threshold=0.5):
for dt in np_boxes:
clsid, bbox, score = int(dt[0]), dt[2:], dt[1]
xmin, ymin, xmax, ymax = bbox
print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],'
'right_bottom:[{:.2f},{:.2f}]'.format(
int(clsid), score, xmin, ymin, xmax, ymax))
w = xmax - xmin
h = ymax - ymin
if clsid not in clsid2color:
clsid2color[clsid] = color_list[clsid]
color = tuple(clsid2color[clsid])
# draw bbox
draw.line(
[(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
(xmin, ymin)],
width=draw_thickness,
fill=color)
if len(bbox) == 4:
xmin, ymin, xmax, ymax = bbox
print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],'
'right_bottom:[{:.2f},{:.2f}]'.format(
int(clsid), score, xmin, ymin, xmax, ymax))
# draw bbox
draw.line(
[(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
(xmin, ymin)],
width=draw_thickness,
fill=color)
elif len(bbox) == 8:
x1, y1, x2, y2, x3, y3, x4, y4 = bbox
draw.line(
[(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
width=2,
fill=color)
xmin = min(x1, x2, x3, x4)
ymin = min(y1, y2, y3, y4)
# draw label
text = "{} {:.4f}".format(labels[clsid], score)
......
......@@ -33,7 +33,7 @@ logger = setup_logger(__name__)
__all__ = [
'PadBatch', 'BatchRandomResize', 'Gt2YoloTarget', 'Gt2FCOSTarget',
'Gt2TTFTarget', 'Gt2Solov2Target', 'RboxPadBatch'
'Gt2TTFTarget', 'Gt2Solov2Target'
]
......@@ -87,6 +87,12 @@ class PadBatch(BaseOperator):
padding_segm[:, :im_h, :im_w] = gt_segm
data['gt_segm'] = padding_segm
if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
# ploy to rbox
polys = data['gt_rbox2poly']
rbox = bbox_utils.poly2rbox(polys)
data['gt_rbox'] = rbox
return samples
......@@ -739,111 +745,3 @@ class Gt2Solov2Target(BaseOperator):
data['grid_order{}'.format(idx)] = gt_grid_order
return samples
@register_op
class RboxPadBatch(BaseOperator):
"""
Pad a batch of samples so they can be divisible by a stride.
The layout of each image should be 'CHW'. And convert poly to rbox.
Args:
pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
height and width is divisible by `pad_to_stride`.
"""
def __init__(self, pad_to_stride=0, pad_gt=False):
super(RboxPadBatch, self).__init__()
self.pad_to_stride = pad_to_stride
self.pad_gt = pad_gt
def __call__(self, samples, context=None):
"""
Args:
samples (list): a batch of sample, each is dict.
"""
coarsest_stride = self.pad_to_stride
max_shape = np.array([data['image'].shape for data in samples]).max(
axis=0)
if coarsest_stride > 0:
max_shape[1] = int(
np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
max_shape[2] = int(
np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
for data in samples:
im = data['image']
im_c, im_h, im_w = im.shape[:]
padding_im = np.zeros(
(im_c, max_shape[1], max_shape[2]), dtype=np.float32)
padding_im[:, :im_h, :im_w] = im
data['image'] = padding_im
if 'semantic' in data and data['semantic'] is not None:
semantic = data['semantic']
padding_sem = np.zeros(
(1, max_shape[1], max_shape[2]), dtype=np.float32)
padding_sem[:, :im_h, :im_w] = semantic
data['semantic'] = padding_sem
if 'gt_segm' in data and data['gt_segm'] is not None:
gt_segm = data['gt_segm']
padding_segm = np.zeros(
(gt_segm.shape[0], max_shape[1], max_shape[2]),
dtype=np.uint8)
padding_segm[:, :im_h, :im_w] = gt_segm
data['gt_segm'] = padding_segm
if self.pad_gt:
gt_num = []
if 'gt_poly' in data and data['gt_poly'] is not None and len(data[
'gt_poly']) > 0:
pad_mask = True
else:
pad_mask = False
if pad_mask:
poly_num = []
poly_part_num = []
point_num = []
for data in samples:
gt_num.append(data['gt_bbox'].shape[0])
if pad_mask:
poly_num.append(len(data['gt_poly']))
for poly in data['gt_poly']:
poly_part_num.append(int(len(poly)))
for p_p in poly:
point_num.append(int(len(p_p) / 2))
gt_num_max = max(gt_num)
for i, sample in enumerate(samples):
assert 'gt_rbox' in sample
assert 'gt_rbox2poly' in sample
gt_box_data = -np.ones([gt_num_max, 4], dtype=np.float32)
gt_class_data = -np.ones([gt_num_max], dtype=np.int32)
is_crowd_data = np.ones([gt_num_max], dtype=np.int32)
if pad_mask:
poly_num_max = max(poly_num)
poly_part_num_max = max(poly_part_num)
point_num_max = max(point_num)
gt_masks_data = -np.ones(
[poly_num_max, poly_part_num_max, point_num_max, 2],
dtype=np.float32)
gt_num = sample['gt_bbox'].shape[0]
gt_box_data[0:gt_num, :] = sample['gt_bbox']
gt_class_data[0:gt_num] = np.squeeze(sample['gt_class'])
is_crowd_data[0:gt_num] = np.squeeze(sample['is_crowd'])
if pad_mask:
for j, poly in enumerate(sample['gt_poly']):
for k, p_p in enumerate(poly):
pp_np = np.array(p_p).reshape(-1, 2)
gt_masks_data[j, k, :pp_np.shape[0], :] = pp_np
sample['gt_poly'] = gt_masks_data
sample['gt_bbox'] = gt_box_data
sample['gt_class'] = gt_class_data
sample['is_crowd'] = is_crowd_data
# ploy to rbox
polys = sample['gt_rbox2poly']
rbox = bbox_utils.poly_to_rbox(polys)
sample['gt_rbox'] = rbox
return samples
......@@ -2007,7 +2007,7 @@ class Rbox2Poly(BaseOperator):
x2 = x_ctr + width / 2.0
y2 = y_ctr + height / 2.0
sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
polys = bbox_utils.rbox2poly(rrects)
polys = bbox_utils.rbox2poly_np(rrects)
sample['gt_rbox2poly'] = polys
return sample
......
......@@ -376,7 +376,8 @@ class Trainer(object):
imid2path = self.dataset.get_imid2path()
anno_file = self.dataset.get_anno()
clsid2catid, catid2name = get_categories(self.cfg.metric, anno_file)
clsid2catid, catid2name = get_categories(
self.cfg.metric, anno_file=anno_file)
# Run Infer
self.status['mode'] = 'test'
......
......@@ -263,146 +263,7 @@ def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9):
return iou
def rect2rbox(bboxes):
"""
:param bboxes: shape (n, 4) (xmin, ymin, xmax, ymax)
:return: dbboxes: shape (n, 5) (x_ctr, y_ctr, w, h, angle)
"""
bboxes = bboxes.reshape(-1, 4)
num_boxes = bboxes.shape[0]
x_ctr = (bboxes[:, 2] + bboxes[:, 0]) / 2.0
y_ctr = (bboxes[:, 3] + bboxes[:, 1]) / 2.0
edges1 = np.abs(bboxes[:, 2] - bboxes[:, 0])
edges2 = np.abs(bboxes[:, 3] - bboxes[:, 1])
angles = np.zeros([num_boxes], dtype=bboxes.dtype)
inds = edges1 < edges2
rboxes = np.stack((x_ctr, y_ctr, edges1, edges2, angles), axis=1)
rboxes[inds, 2] = edges2[inds]
rboxes[inds, 3] = edges1[inds]
rboxes[inds, 4] = np.pi / 2.0
return rboxes
def delta2rbox(Rrois,
deltas,
means=[0, 0, 0, 0, 0],
stds=[1, 1, 1, 1, 1],
wh_ratio_clip=1e-6):
"""
:param Rrois: (cx, cy, w, h, theta)
:param deltas: (dx, dy, dw, dh, dtheta)
:param means:
:param stds:
:param wh_ratio_clip:
:return:
"""
deltas = paddle.reshape(deltas, [-1, deltas.shape[-1]])
denorm_deltas = deltas * stds + means
dx = denorm_deltas[:, 0]
dy = denorm_deltas[:, 1]
dw = denorm_deltas[:, 2]
dh = denorm_deltas[:, 3]
dangle = denorm_deltas[:, 4]
max_ratio = np.abs(np.log(wh_ratio_clip))
dw = paddle.clip(dw, min=-max_ratio, max=max_ratio)
dh = paddle.clip(dh, min=-max_ratio, max=max_ratio)
Rroi_x = Rrois[:, 0]
Rroi_y = Rrois[:, 1]
Rroi_w = Rrois[:, 2]
Rroi_h = Rrois[:, 3]
Rroi_angle = Rrois[:, 4]
gx = dx * Rroi_w * paddle.cos(Rroi_angle) - dy * Rroi_h * paddle.sin(
Rroi_angle) + Rroi_x
gy = dx * Rroi_w * paddle.sin(Rroi_angle) + dy * Rroi_h * paddle.cos(
Rroi_angle) + Rroi_y
gw = Rroi_w * dw.exp()
gh = Rroi_h * dh.exp()
ga = np.pi * dangle + Rroi_angle
ga = (ga + np.pi / 4) % np.pi - np.pi / 4
ga = paddle.to_tensor(ga)
gw = paddle.to_tensor(gw, dtype='float32')
gh = paddle.to_tensor(gh, dtype='float32')
bboxes = paddle.stack([gx, gy, gw, gh, ga], axis=-1)
return bboxes
def rbox2delta(proposals, gt, means=[0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1]):
"""
Args:
proposals:
gt:
means: 1x5
stds: 1x5
Returns:
"""
proposals = proposals.astype(np.float64)
PI = np.pi
gt_widths = gt[..., 2]
gt_heights = gt[..., 3]
gt_angle = gt[..., 4]
proposals_widths = proposals[..., 2]
proposals_heights = proposals[..., 3]
proposals_angle = proposals[..., 4]
coord = gt[..., 0:2] - proposals[..., 0:2]
dx = (np.cos(proposals[..., 4]) * coord[..., 0] + np.sin(proposals[..., 4])
* coord[..., 1]) / proposals_widths
dy = (-np.sin(proposals[..., 4]) * coord[..., 0] + np.cos(proposals[..., 4])
* coord[..., 1]) / proposals_heights
dw = np.log(gt_widths / proposals_widths)
dh = np.log(gt_heights / proposals_heights)
da = (gt_angle - proposals_angle)
da = (da + PI / 4) % PI - PI / 4
da /= PI
deltas = np.stack([dx, dy, dw, dh, da], axis=-1)
means = np.array(means, dtype=deltas.dtype)
stds = np.array(stds, dtype=deltas.dtype)
deltas = (deltas - means) / stds
deltas = deltas.astype(np.float32)
return deltas
def bbox_decode(bbox_preds,
anchors,
means=[0, 0, 0, 0, 0],
stds=[1, 1, 1, 1, 1]):
"""decode bbox from deltas
Args:
bbox_preds: [N,H,W,5]
anchors: [H*W,5]
return:
bboxes: [N,H,W,5]
"""
num_imgs, H, W, _ = bbox_preds.shape
bboxes_list = []
for img_id in range(num_imgs):
bbox_pred = bbox_preds[img_id]
# bbox_pred.shape=[5,H,W]
bbox_delta = bbox_pred
bboxes = delta2rbox(
anchors, bbox_delta, means, stds, wh_ratio_clip=1e-6)
bboxes = paddle.reshape(bboxes, [H, W, 5])
bboxes_list.append(bboxes)
return paddle.stack(bboxes_list, axis=0)
def poly_to_rbox(polys):
def poly2rbox(polys):
"""
poly:[x0,y0,x1,y1,x2,y2,x3,y3]
to
......@@ -479,37 +340,16 @@ def get_best_begin_point_single(coordinate):
return np.array(combinate[force_flag]).reshape(8)
def rbox2poly_single(rrect):
"""
rrect:[x_ctr,y_ctr,w,h,angle]
to
poly:[x0,y0,x1,y1,x2,y2,x3,y3]
"""
x_ctr, y_ctr, width, height, angle = rrect[:5]
tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
# rect 2x4
rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
R = np.array([[np.cos(angle), -np.sin(angle)],
[np.sin(angle), np.cos(angle)]])
# poly
poly = R.dot(rect)
x0, x1, x2, x3 = poly[0, :4] + x_ctr
y0, y1, y2, y3 = poly[1, :4] + y_ctr
poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float32)
poly = get_best_begin_point_single(poly)
return poly
def rbox2poly(rrects):
def rbox2poly_np(rrects):
"""
rrect:[x_ctr,y_ctr,w,h,angle]
to
poly:[x0,y0,x1,y1,x2,y2,x3,y3]
"""
polys = []
rrects = rrects.numpy()
for i in range(rrects.shape[0]):
rrect = rrects[i]
# x_ctr, y_ctr, width, height, angle = rrect[:5]
x_ctr = rrect[0]
y_ctr = rrect[1]
width = rrect[2]
......@@ -529,13 +369,13 @@ def rbox2poly(rrects):
return polys
def pd_rbox2poly(rrects):
def rbox2poly(rrects):
"""
rrect:[x_ctr,y_ctr,w,h,angle]
to
poly:[x0,y0,x1,y1,x2,y2,x3,y3]
"""
N = rrects.shape[0]
N = paddle.shape(rrects)[0]
x_ctr = rrects[:, 0]
y_ctr = rrects[:, 1]
......@@ -561,14 +401,10 @@ def pd_rbox2poly(rrects):
polys = paddle.transpose(polys, [2, 1, 0])
polys = paddle.reshape(polys, [-1, N])
polys = paddle.transpose(polys, [1, 0])
polys[:, 0] += x_ctr
polys[:, 2] += x_ctr
polys[:, 4] += x_ctr
polys[:, 6] += x_ctr
polys[:, 1] += y_ctr
polys[:, 3] += y_ctr
polys[:, 5] += y_ctr
polys[:, 7] += y_ctr
tmp = paddle.stack(
[x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr], axis=1)
polys = polys + tmp
return polys
......
此差异已折叠。
......@@ -17,7 +17,7 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ppdet.modeling.bbox_utils import nonempty_bbox, rbox2poly, pd_rbox2poly
from ppdet.modeling.bbox_utils import nonempty_bbox, rbox2poly, rbox2poly
try:
from collections.abc import Sequence
except Exception:
......@@ -214,7 +214,7 @@ class FCOSPostProcess(object):
@register
class S2ANetBBoxPostProcess(object):
class S2ANetBBoxPostProcess(nn.Layer):
__shared__ = ['num_classes']
__inject__ = ['nms']
......@@ -225,41 +225,43 @@ class S2ANetBBoxPostProcess(object):
self.min_bbox_size = min_bbox_size
self.nms = nms
self.origin_shape_list = []
self.fake_pred_cls_score_bbox = paddle.to_tensor(
np.array(
[[-1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
dtype='float32'))
self.fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
def __call__(self, pred_scores, pred_bboxes):
def forward(self, pred_scores, pred_bboxes):
"""
pred_scores : [N, M] score
pred_bboxes : [N, 5] xc, yc, w, h, a
im_shape : [N, 2] im_shape
scale_factor : [N, 2] scale_factor
"""
pred_ploys = pd_rbox2poly(pred_bboxes)
pred_ploys = paddle.reshape(
pred_ploys, [1, pred_ploys.shape[0], pred_ploys.shape[1]])
pred_ploys0 = rbox2poly(pred_bboxes)
pred_ploys = paddle.unsqueeze(pred_ploys0, axis=0)
pred_scores = paddle.to_tensor(pred_scores)
# pred_scores [NA, 16] --> [16, NA]
pred_scores = paddle.transpose(pred_scores, [1, 0])
pred_scores = paddle.reshape(
pred_scores, [1, pred_scores.shape[0], pred_scores.shape[1]])
pred_scores0 = paddle.transpose(pred_scores, [1, 0])
pred_scores = paddle.unsqueeze(pred_scores0, axis=0)
pred_cls_score_bbox, bbox_num, _ = self.nms(pred_ploys, pred_scores,
self.num_classes)
# Prevent empty bbox_pred from decode or NMS.
# Bboxes and score before NMS may be empty due to the score threshold.
if pred_cls_score_bbox.shape[0] == 0:
pred_cls_score_bbox = paddle.to_tensor(
np.array(
[[-1, 0.0, 0.0, 0.0, 0.0, 0.0]], dtype='float32'))
bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
if pred_cls_score_bbox.shape[0] <= 0 or pred_cls_score_bbox.shape[
1] <= 1:
pred_cls_score_bbox = self.fake_pred_cls_score_bbox
bbox_num = self.fake_bbox_num
pred_cls_score_bbox = paddle.reshape(pred_cls_score_bbox, [-1, 10])
assert pred_cls_score_bbox.shape[1] == 10
return pred_cls_score_bbox, bbox_num
def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
"""
Rescale, clip and filter the bbox from the output of NMS to
get final prediction.
Args:
bboxes(Tensor): bboxes [N, 10]
bbox_num(Tensor): bbox_num
......@@ -270,6 +272,7 @@ class S2ANetBBoxPostProcess(object):
including labels, scores and bboxes. The size of
bboxes are corresponding to the original image.
"""
assert bboxes.shape[1] == 10
origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
origin_shape_list = []
......@@ -292,7 +295,7 @@ class S2ANetBBoxPostProcess(object):
# bboxes: [N, 10], label, score, bbox
pred_label_score = bboxes[:, 0:2]
pred_bbox = bboxes[:, 2:10:1]
pred_bbox = bboxes[:, 2:]
# rescale bbox to original image
scaled_bbox = pred_bbox / scale_factor_list
......
......@@ -16,7 +16,6 @@ import paddle
from ppdet.core.workspace import register, serializable
from .target import rpn_anchor_target, generate_proposal_target, generate_mask_target, libra_generate_proposal_target
from ppdet.modeling import bbox_utils
import numpy as np
......@@ -283,13 +282,58 @@ class RBoxAssigner(object):
"""
if anchors.ndim == 3:
anchors = anchors.reshape(-1, anchor.shape[-1])
anchors = anchors.reshape(-1, anchors.shape[-1])
assert anchors.ndim == 2
anchor_num = anchors.shape[0]
anchor_valid = np.ones((anchor_num), np.uint8)
anchor_inds = np.arange(anchor_num)
return anchor_inds
def rbox2delta(self,
proposals,
gt,
means=[0, 0, 0, 0, 0],
stds=[1, 1, 1, 1, 1]):
"""
Args:
proposals: tensor [N, 5]
gt: gt [N, 5]
means: means [5]
stds: stds [5]
Returns:
"""
proposals = proposals.astype(np.float64)
PI = np.pi
gt_widths = gt[..., 2]
gt_heights = gt[..., 3]
gt_angle = gt[..., 4]
proposals_widths = proposals[..., 2]
proposals_heights = proposals[..., 3]
proposals_angle = proposals[..., 4]
coord = gt[..., 0:2] - proposals[..., 0:2]
dx = (np.cos(proposals[..., 4]) * coord[..., 0] +
np.sin(proposals[..., 4]) * coord[..., 1]) / proposals_widths
dy = (-np.sin(proposals[..., 4]) * coord[..., 0] +
np.cos(proposals[..., 4]) * coord[..., 1]) / proposals_heights
dw = np.log(gt_widths / proposals_widths)
dh = np.log(gt_heights / proposals_heights)
da = (gt_angle - proposals_angle)
da = (da + PI / 4) % PI - PI / 4
da /= PI
deltas = np.stack([dx, dy, dw, dh, da], axis=-1)
means = np.array(means, dtype=deltas.dtype)
stds = np.array(stds, dtype=deltas.dtype)
deltas = (deltas - means) / stds
deltas = deltas.astype(np.float32)
return deltas
def assign_anchor(self,
anchors,
gt_bboxes,
......@@ -405,8 +449,8 @@ class RBoxAssigner(object):
#print('ancho target pos_inds', pos_inds, len(pos_inds))
pos_sampled_gt_boxes = gt_bboxes[anchor_gt_bbox_inds[pos_inds]]
if len(pos_inds) > 0:
pos_bbox_targets = bbox_utils.rbox2delta(pos_sampled_anchors,
pos_sampled_gt_boxes)
pos_bbox_targets = self.rbox2delta(pos_sampled_anchors,
pos_sampled_gt_boxes)
bbox_targets[pos_inds, :] = pos_bbox_targets
bbox_weights[pos_inds, :] = 1.0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册