未验证 提交 0947171b 编写于 作者: C cnn 提交者: GitHub

[dev] s2anet fix export and deploy (#2919)

* s2anet fix export and deploy

* remove redundant code

* fix cpp for s2anet deploy

* fix bug of get_categories

* rename poly_to_rbox to poly2rbox

* add some comment for function
上级 912833f2
...@@ -9,7 +9,7 @@ TrainReader: ...@@ -9,7 +9,7 @@ TrainReader:
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {} - Permute: {}
batch_transforms: batch_transforms:
- RboxPadBatch: {pad_to_stride: 32, pad_gt: true} - PadBatch: {pad_to_stride: 32}
batch_size: 1 batch_size: 1
shuffle: true shuffle: true
drop_last: true drop_last: true
...@@ -22,7 +22,7 @@ EvalReader: ...@@ -22,7 +22,7 @@ EvalReader:
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {} - Permute: {}
batch_transforms: batch_transforms:
- RboxPadBatch: {pad_to_stride: 32, pad_gt: false} - PadBatch: {pad_to_stride: 32}
batch_size: 1 batch_size: 1
shuffle: false shuffle: false
drop_last: false drop_last: false
...@@ -36,7 +36,7 @@ TestReader: ...@@ -36,7 +36,7 @@ TestReader:
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {} - Permute: {}
batch_transforms: batch_transforms:
- RboxPadBatch: {pad_to_stride: 32, pad_gt: false} - PadBatch: {pad_to_stride: 32}
batch_size: 1 batch_size: 1
shuffle: false shuffle: false
drop_last: false drop_last: false
...@@ -27,7 +27,7 @@ parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3))) ...@@ -27,7 +27,7 @@ parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
if parent_path not in sys.path: if parent_path not in sys.path:
sys.path.append(parent_path) sys.path.append(parent_path)
from ppdet.modeling.bbox_utils import poly_to_rbox from ppdet.modeling.bbox_utils import poly2rbox
from ppdet.utils.logger import setup_logger from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__) logger = setup_logger(__name__)
...@@ -118,7 +118,7 @@ def dota_2_coco(image_dir, ...@@ -118,7 +118,7 @@ def dota_2_coco(image_dir,
# rbox or bbox # rbox or bbox
if is_obb: if is_obb:
polys = [single_obj_poly] polys = [single_obj_poly]
rboxs = poly_to_rbox(polys) rboxs = poly2rbox(polys)
rbox = rboxs[0].tolist() rbox = rboxs[0].tolist()
single_obj['bbox'] = rbox single_obj['bbox'] = rbox
single_obj['area'] = rbox[2] * rbox[3] single_obj['area'] = rbox[2] * rbox[3]
......
...@@ -51,7 +51,8 @@ std::vector<int> GenerateColorMap(int num_class); ...@@ -51,7 +51,8 @@ std::vector<int> GenerateColorMap(int num_class);
cv::Mat VisualizeResult(const cv::Mat& img, cv::Mat VisualizeResult(const cv::Mat& img,
const std::vector<ObjectResult>& results, const std::vector<ObjectResult>& results,
const std::vector<std::string>& lable_list, const std::vector<std::string>& lable_list,
const std::vector<int>& colormap); const std::vector<int>& colormap,
const bool is_rbox);
class ObjectDetector { class ObjectDetector {
...@@ -120,7 +121,8 @@ class ObjectDetector { ...@@ -120,7 +121,8 @@ class ObjectDetector {
// Postprocess result // Postprocess result
void Postprocess( void Postprocess(
const cv::Mat& raw_mat, const cv::Mat& raw_mat,
std::vector<ObjectResult>* result); std::vector<ObjectResult>* result,
bool is_rbox);
std::shared_ptr<Predictor> predictor_; std::shared_ptr<Predictor> predictor_;
Preprocessor preprocessor_; Preprocessor preprocessor_;
......
...@@ -195,23 +195,42 @@ void PredictVideo(const std::string& video_path, ...@@ -195,23 +195,42 @@ void PredictVideo(const std::string& video_path,
// Capture all frames and do inference // Capture all frames and do inference
cv::Mat frame; cv::Mat frame;
int frame_id = 0; int frame_id = 0;
bool is_rbox = false;
while (capture.read(frame)) { while (capture.read(frame)) {
if (frame.empty()) { if (frame.empty()) {
break; break;
} }
det->Predict(frame, 0.5, 0, 1, &result, &det_times); det->Predict(frame, 0.5, 0, 1, &result, &det_times);
cv::Mat out_im = PaddleDetection::VisualizeResult(
frame, result, labels, colormap);
for (const auto& item : result) { for (const auto& item : result) {
printf("In frame id %d, we detect: class=%d confidence=%.2f rect=[%d %d %d %d]\n", if (item.rect.size() > 6){
frame_id, is_rbox = true;
item.class_id, printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
item.confidence, item.class_id,
item.rect[0], item.confidence,
item.rect[1], item.rect[0],
item.rect[2], item.rect[1],
item.rect[3]); item.rect[2],
} item.rect[3],
item.rect[4],
item.rect[5],
item.rect[6],
item.rect[7]);
}
else{
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3]);
}
}
cv::Mat out_im = PaddleDetection::VisualizeResult(
frame, result, labels, colormap, is_rbox);
video_out.write(out_im); video_out.write(out_im);
frame_id += 1; frame_id += 1;
} }
...@@ -231,24 +250,41 @@ void PredictImage(const std::vector<std::string> all_img_list, ...@@ -231,24 +250,41 @@ void PredictImage(const std::vector<std::string> all_img_list,
// Store all detected result // Store all detected result
std::vector<PaddleDetection::ObjectResult> result; std::vector<PaddleDetection::ObjectResult> result;
std::vector<double> det_times; std::vector<double> det_times;
bool is_rbox = false;
if (run_benchmark) { if (run_benchmark) {
det->Predict(im, threshold, 10, 10, &result, &det_times); det->Predict(im, threshold, 10, 10, &result, &det_times);
} else { } else {
det->Predict(im, 0.5, 0, 1, &result, &det_times); det->Predict(im, 0.5, 0, 1, &result, &det_times);
for (const auto& item : result) { for (const auto& item : result) {
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n", if (item.rect.size() > 6){
is_rbox = true;
printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3],
item.rect[4],
item.rect[5],
item.rect[6],
item.rect[7]);
}
else{
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
item.class_id, item.class_id,
item.confidence, item.confidence,
item.rect[0], item.rect[0],
item.rect[1], item.rect[1],
item.rect[2], item.rect[2],
item.rect[3]); item.rect[3]);
}
} }
// Visualization result // Visualization result
auto labels = det->GetLabelList(); auto labels = det->GetLabelList();
auto colormap = PaddleDetection::GenerateColorMap(labels.size()); auto colormap = PaddleDetection::GenerateColorMap(labels.size());
cv::Mat vis_img = PaddleDetection::VisualizeResult( cv::Mat vis_img = PaddleDetection::VisualizeResult(
im, result, labels, colormap); im, result, labels, colormap, is_rbox);
std::vector<int> compression_params; std::vector<int> compression_params;
compression_params.push_back(CV_IMWRITE_JPEG_QUALITY); compression_params.push_back(CV_IMWRITE_JPEG_QUALITY);
compression_params.push_back(95); compression_params.push_back(95);
......
...@@ -94,13 +94,10 @@ void ObjectDetector::LoadModel(const std::string& model_dir, ...@@ -94,13 +94,10 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
cv::Mat VisualizeResult(const cv::Mat& img, cv::Mat VisualizeResult(const cv::Mat& img,
const std::vector<ObjectResult>& results, const std::vector<ObjectResult>& results,
const std::vector<std::string>& lable_list, const std::vector<std::string>& lable_list,
const std::vector<int>& colormap) { const std::vector<int>& colormap,
const bool is_rbox=false) {
cv::Mat vis_img = img.clone(); cv::Mat vis_img = img.clone();
for (int i = 0; i < results.size(); ++i) { for (int i = 0; i < results.size(); ++i) {
int w = results[i].rect[1] - results[i].rect[0];
int h = results[i].rect[3] - results[i].rect[2];
cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[2], w, h);
// Configure color and text size // Configure color and text size
std::ostringstream oss; std::ostringstream oss;
oss << std::setiosflags(std::ios::fixed) << std::setprecision(4); oss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
...@@ -120,17 +117,37 @@ cv::Mat VisualizeResult(const cv::Mat& img, ...@@ -120,17 +117,37 @@ cv::Mat VisualizeResult(const cv::Mat& img,
thickness, thickness,
nullptr); nullptr);
cv::Point origin; cv::Point origin;
origin.x = roi.x;
origin.y = roi.y; if (is_rbox)
{
// Draw object, text, and background
for (int k=0; k<4; k++)
{
cv::Point pt1 = cv::Point(results[i].rect[(k*2)%8],
results[i].rect[(k*2+1)%8]);
cv::Point pt2 = cv::Point(results[i].rect[(k*2+2)%8],
results[i].rect[(k*2+3)%8]);
cv::line(vis_img, pt1, pt2, roi_color, 2);
}
}
else
{
int w = results[i].rect[1] - results[i].rect[0];
int h = results[i].rect[3] - results[i].rect[2];
cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[2], w, h);
// Draw roi object, text, and background
cv::rectangle(vis_img, roi, roi_color, 2);
}
origin.x = results[i].rect[0];
origin.y = results[i].rect[1];
// Configure text background // Configure text background
cv::Rect text_back = cv::Rect(results[i].rect[0], cv::Rect text_back = cv::Rect(results[i].rect[0],
results[i].rect[2] - text_size.height, results[i].rect[2] - text_size.height,
text_size.width, text_size.width,
text_size.height); text_size.height);
// Draw text, and background
// Draw roi object, text, and background
cv::rectangle(vis_img, roi, roi_color, 2);
cv::rectangle(vis_img, text_back, roi_color, -1); cv::rectangle(vis_img, text_back, roi_color, -1);
cv::putText(vis_img, cv::putText(vis_img,
text, text,
...@@ -152,7 +169,8 @@ void ObjectDetector::Preprocess(const cv::Mat& ori_im) { ...@@ -152,7 +169,8 @@ void ObjectDetector::Preprocess(const cv::Mat& ori_im) {
void ObjectDetector::Postprocess( void ObjectDetector::Postprocess(
const cv::Mat& raw_mat, const cv::Mat& raw_mat,
std::vector<ObjectResult>* result) { std::vector<ObjectResult>* result,
bool is_rbox=false) {
result->clear(); result->clear();
int rh = 1; int rh = 1;
int rw = 1; int rw = 1;
...@@ -161,24 +179,52 @@ void ObjectDetector::Postprocess( ...@@ -161,24 +179,52 @@ void ObjectDetector::Postprocess(
rw = raw_mat.cols; rw = raw_mat.cols;
} }
int total_size = output_data_.size() / 6; if (is_rbox)
for (int j = 0; j < total_size; ++j) { {
// Class id int total_size = output_data_.size() / 10;
int class_id = static_cast<int>(round(output_data_[0 + j * 6])); for (int j = 0; j < total_size; ++j) {
// Confidence score // Class id
float score = output_data_[1 + j * 6]; int class_id = static_cast<int>(round(output_data_[0 + j * 10]));
int xmin = (output_data_[2 + j * 6] * rw); // Confidence score
int ymin = (output_data_[3 + j * 6] * rh); float score = output_data_[1 + j * 10];
int xmax = (output_data_[4 + j * 6] * rw); int x1 = (output_data_[2 + j * 10] * rw);
int ymax = (output_data_[5 + j * 6] * rh); int y1 = (output_data_[3 + j * 10] * rh);
int wd = xmax - xmin; int x2 = (output_data_[4 + j * 10] * rw);
int hd = ymax - ymin; int y2 = (output_data_[5 + j * 10] * rh);
if (score > threshold_ && class_id > -1) { int x3 = (output_data_[6 + j * 10] * rw);
ObjectResult result_item; int y3 = (output_data_[7 + j * 10] * rh);
result_item.rect = {xmin, xmax, ymin, ymax}; int x4 = (output_data_[8 + j * 10] * rw);
result_item.class_id = class_id; int y4 = (output_data_[9 + j * 10] * rh);
result_item.confidence = score; if (score > threshold_ && class_id > -1) {
result->push_back(result_item); ObjectResult result_item;
result_item.rect = {x1, y1, x2, y2, x3, y3, x4, y4};
result_item.class_id = class_id;
result_item.confidence = score;
result->push_back(result_item);
}
}
}
else
{
int total_size = output_data_.size() / 6;
for (int j = 0; j < total_size; ++j) {
// Class id
int class_id = static_cast<int>(round(output_data_[0 + j * 6]));
// Confidence score
float score = output_data_[1 + j * 6];
int xmin = (output_data_[2 + j * 6] * rw);
int ymin = (output_data_[3 + j * 6] * rh);
int xmax = (output_data_[4 + j * 6] * rw);
int ymax = (output_data_[5 + j * 6] * rh);
int wd = xmax - xmin;
int hd = ymax - ymin;
if (score > threshold_ && class_id > -1) {
ObjectResult result_item;
result_item.rect = {xmin, xmax, ymin, ymax};
result_item.class_id = class_id;
result_item.confidence = score;
result->push_back(result_item);
}
} }
} }
} }
...@@ -231,6 +277,7 @@ void ObjectDetector::Predict(const cv::Mat& im, ...@@ -231,6 +277,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
out_tensor->CopyToCpu(output_data_.data()); out_tensor->CopyToCpu(output_data_.data());
} }
bool is_rbox = false;
auto inference_start = std::chrono::steady_clock::now(); auto inference_start = std::chrono::steady_clock::now();
for (int i = 0; i < repeats; i++) for (int i = 0; i < repeats; i++)
{ {
...@@ -244,6 +291,7 @@ void ObjectDetector::Predict(const cv::Mat& im, ...@@ -244,6 +291,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
for (int j = 0; j < output_shape.size(); ++j) { for (int j = 0; j < output_shape.size(); ++j) {
output_size *= output_shape[j]; output_size *= output_shape[j];
} }
is_rbox = output_shape[output_shape.size()-1] % 10 == 0;
if (output_size < 6) { if (output_size < 6) {
std::cerr << "[WARNING] No object detected." << std::endl; std::cerr << "[WARNING] No object detected." << std::endl;
...@@ -254,7 +302,7 @@ void ObjectDetector::Predict(const cv::Mat& im, ...@@ -254,7 +302,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
auto inference_end = std::chrono::steady_clock::now(); auto inference_end = std::chrono::steady_clock::now();
auto postprocess_start = std::chrono::steady_clock::now(); auto postprocess_start = std::chrono::steady_clock::now();
// Postprocessing result // Postprocessing result
Postprocess(im, result); Postprocess(im, result, is_rbox);
auto postprocess_end = std::chrono::steady_clock::now(); auto postprocess_end = std::chrono::steady_clock::now();
std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start; std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start;
...@@ -263,6 +311,7 @@ void ObjectDetector::Predict(const cv::Mat& im, ...@@ -263,6 +311,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
times->push_back(double(inference_diff.count() / repeats * 1000)); times->push_back(double(inference_diff.count() / repeats * 1000));
std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start; std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start;
times->push_back(double(postprocess_diff.count() * 1000)); times->push_back(double(postprocess_diff.count() * 1000));
} }
std::vector<int> GenerateColorMap(int num_class) { std::vector<int> GenerateColorMap(int num_class) {
......
...@@ -37,6 +37,7 @@ SUPPORT_MODELS = { ...@@ -37,6 +37,7 @@ SUPPORT_MODELS = {
'FCOS', 'FCOS',
'SOLOv2', 'SOLOv2',
'TTFNet', 'TTFNet',
'S2ANet',
} }
......
...@@ -130,22 +130,29 @@ def draw_box(im, np_boxes, labels, threshold=0.5): ...@@ -130,22 +130,29 @@ def draw_box(im, np_boxes, labels, threshold=0.5):
for dt in np_boxes: for dt in np_boxes:
clsid, bbox, score = int(dt[0]), dt[2:], dt[1] clsid, bbox, score = int(dt[0]), dt[2:], dt[1]
xmin, ymin, xmax, ymax = bbox
print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],'
'right_bottom:[{:.2f},{:.2f}]'.format(
int(clsid), score, xmin, ymin, xmax, ymax))
w = xmax - xmin
h = ymax - ymin
if clsid not in clsid2color: if clsid not in clsid2color:
clsid2color[clsid] = color_list[clsid] clsid2color[clsid] = color_list[clsid]
color = tuple(clsid2color[clsid]) color = tuple(clsid2color[clsid])
# draw bbox if len(bbox) == 4:
draw.line( xmin, ymin, xmax, ymax = bbox
[(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],'
(xmin, ymin)], 'right_bottom:[{:.2f},{:.2f}]'.format(
width=draw_thickness, int(clsid), score, xmin, ymin, xmax, ymax))
fill=color) # draw bbox
draw.line(
[(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
(xmin, ymin)],
width=draw_thickness,
fill=color)
elif len(bbox) == 8:
x1, y1, x2, y2, x3, y3, x4, y4 = bbox
draw.line(
[(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
width=2,
fill=color)
xmin = min(x1, x2, x3, x4)
ymin = min(y1, y2, y3, y4)
# draw label # draw label
text = "{} {:.4f}".format(labels[clsid], score) text = "{} {:.4f}".format(labels[clsid], score)
......
...@@ -33,7 +33,7 @@ logger = setup_logger(__name__) ...@@ -33,7 +33,7 @@ logger = setup_logger(__name__)
__all__ = [ __all__ = [
'PadBatch', 'BatchRandomResize', 'Gt2YoloTarget', 'Gt2FCOSTarget', 'PadBatch', 'BatchRandomResize', 'Gt2YoloTarget', 'Gt2FCOSTarget',
'Gt2TTFTarget', 'Gt2Solov2Target', 'RboxPadBatch' 'Gt2TTFTarget', 'Gt2Solov2Target'
] ]
...@@ -87,6 +87,12 @@ class PadBatch(BaseOperator): ...@@ -87,6 +87,12 @@ class PadBatch(BaseOperator):
padding_segm[:, :im_h, :im_w] = gt_segm padding_segm[:, :im_h, :im_w] = gt_segm
data['gt_segm'] = padding_segm data['gt_segm'] = padding_segm
if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
# ploy to rbox
polys = data['gt_rbox2poly']
rbox = bbox_utils.poly2rbox(polys)
data['gt_rbox'] = rbox
return samples return samples
...@@ -739,111 +745,3 @@ class Gt2Solov2Target(BaseOperator): ...@@ -739,111 +745,3 @@ class Gt2Solov2Target(BaseOperator):
data['grid_order{}'.format(idx)] = gt_grid_order data['grid_order{}'.format(idx)] = gt_grid_order
return samples return samples
@register_op
class RboxPadBatch(BaseOperator):
"""
Pad a batch of samples so they can be divisible by a stride.
The layout of each image should be 'CHW'. And convert poly to rbox.
Args:
pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
height and width is divisible by `pad_to_stride`.
"""
def __init__(self, pad_to_stride=0, pad_gt=False):
super(RboxPadBatch, self).__init__()
self.pad_to_stride = pad_to_stride
self.pad_gt = pad_gt
def __call__(self, samples, context=None):
"""
Args:
samples (list): a batch of sample, each is dict.
"""
coarsest_stride = self.pad_to_stride
max_shape = np.array([data['image'].shape for data in samples]).max(
axis=0)
if coarsest_stride > 0:
max_shape[1] = int(
np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
max_shape[2] = int(
np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
for data in samples:
im = data['image']
im_c, im_h, im_w = im.shape[:]
padding_im = np.zeros(
(im_c, max_shape[1], max_shape[2]), dtype=np.float32)
padding_im[:, :im_h, :im_w] = im
data['image'] = padding_im
if 'semantic' in data and data['semantic'] is not None:
semantic = data['semantic']
padding_sem = np.zeros(
(1, max_shape[1], max_shape[2]), dtype=np.float32)
padding_sem[:, :im_h, :im_w] = semantic
data['semantic'] = padding_sem
if 'gt_segm' in data and data['gt_segm'] is not None:
gt_segm = data['gt_segm']
padding_segm = np.zeros(
(gt_segm.shape[0], max_shape[1], max_shape[2]),
dtype=np.uint8)
padding_segm[:, :im_h, :im_w] = gt_segm
data['gt_segm'] = padding_segm
if self.pad_gt:
gt_num = []
if 'gt_poly' in data and data['gt_poly'] is not None and len(data[
'gt_poly']) > 0:
pad_mask = True
else:
pad_mask = False
if pad_mask:
poly_num = []
poly_part_num = []
point_num = []
for data in samples:
gt_num.append(data['gt_bbox'].shape[0])
if pad_mask:
poly_num.append(len(data['gt_poly']))
for poly in data['gt_poly']:
poly_part_num.append(int(len(poly)))
for p_p in poly:
point_num.append(int(len(p_p) / 2))
gt_num_max = max(gt_num)
for i, sample in enumerate(samples):
assert 'gt_rbox' in sample
assert 'gt_rbox2poly' in sample
gt_box_data = -np.ones([gt_num_max, 4], dtype=np.float32)
gt_class_data = -np.ones([gt_num_max], dtype=np.int32)
is_crowd_data = np.ones([gt_num_max], dtype=np.int32)
if pad_mask:
poly_num_max = max(poly_num)
poly_part_num_max = max(poly_part_num)
point_num_max = max(point_num)
gt_masks_data = -np.ones(
[poly_num_max, poly_part_num_max, point_num_max, 2],
dtype=np.float32)
gt_num = sample['gt_bbox'].shape[0]
gt_box_data[0:gt_num, :] = sample['gt_bbox']
gt_class_data[0:gt_num] = np.squeeze(sample['gt_class'])
is_crowd_data[0:gt_num] = np.squeeze(sample['is_crowd'])
if pad_mask:
for j, poly in enumerate(sample['gt_poly']):
for k, p_p in enumerate(poly):
pp_np = np.array(p_p).reshape(-1, 2)
gt_masks_data[j, k, :pp_np.shape[0], :] = pp_np
sample['gt_poly'] = gt_masks_data
sample['gt_bbox'] = gt_box_data
sample['gt_class'] = gt_class_data
sample['is_crowd'] = is_crowd_data
# ploy to rbox
polys = sample['gt_rbox2poly']
rbox = bbox_utils.poly_to_rbox(polys)
sample['gt_rbox'] = rbox
return samples
...@@ -2007,7 +2007,7 @@ class Rbox2Poly(BaseOperator): ...@@ -2007,7 +2007,7 @@ class Rbox2Poly(BaseOperator):
x2 = x_ctr + width / 2.0 x2 = x_ctr + width / 2.0
y2 = y_ctr + height / 2.0 y2 = y_ctr + height / 2.0
sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1) sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
polys = bbox_utils.rbox2poly(rrects) polys = bbox_utils.rbox2poly_np(rrects)
sample['gt_rbox2poly'] = polys sample['gt_rbox2poly'] = polys
return sample return sample
......
...@@ -376,7 +376,8 @@ class Trainer(object): ...@@ -376,7 +376,8 @@ class Trainer(object):
imid2path = self.dataset.get_imid2path() imid2path = self.dataset.get_imid2path()
anno_file = self.dataset.get_anno() anno_file = self.dataset.get_anno()
clsid2catid, catid2name = get_categories(self.cfg.metric, anno_file) clsid2catid, catid2name = get_categories(
self.cfg.metric, anno_file=anno_file)
# Run Infer # Run Infer
self.status['mode'] = 'test' self.status['mode'] = 'test'
......
...@@ -263,146 +263,7 @@ def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9): ...@@ -263,146 +263,7 @@ def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9):
return iou return iou
def rect2rbox(bboxes): def poly2rbox(polys):
"""
:param bboxes: shape (n, 4) (xmin, ymin, xmax, ymax)
:return: dbboxes: shape (n, 5) (x_ctr, y_ctr, w, h, angle)
"""
bboxes = bboxes.reshape(-1, 4)
num_boxes = bboxes.shape[0]
x_ctr = (bboxes[:, 2] + bboxes[:, 0]) / 2.0
y_ctr = (bboxes[:, 3] + bboxes[:, 1]) / 2.0
edges1 = np.abs(bboxes[:, 2] - bboxes[:, 0])
edges2 = np.abs(bboxes[:, 3] - bboxes[:, 1])
angles = np.zeros([num_boxes], dtype=bboxes.dtype)
inds = edges1 < edges2
rboxes = np.stack((x_ctr, y_ctr, edges1, edges2, angles), axis=1)
rboxes[inds, 2] = edges2[inds]
rboxes[inds, 3] = edges1[inds]
rboxes[inds, 4] = np.pi / 2.0
return rboxes
def delta2rbox(Rrois,
deltas,
means=[0, 0, 0, 0, 0],
stds=[1, 1, 1, 1, 1],
wh_ratio_clip=1e-6):
"""
:param Rrois: (cx, cy, w, h, theta)
:param deltas: (dx, dy, dw, dh, dtheta)
:param means:
:param stds:
:param wh_ratio_clip:
:return:
"""
deltas = paddle.reshape(deltas, [-1, deltas.shape[-1]])
denorm_deltas = deltas * stds + means
dx = denorm_deltas[:, 0]
dy = denorm_deltas[:, 1]
dw = denorm_deltas[:, 2]
dh = denorm_deltas[:, 3]
dangle = denorm_deltas[:, 4]
max_ratio = np.abs(np.log(wh_ratio_clip))
dw = paddle.clip(dw, min=-max_ratio, max=max_ratio)
dh = paddle.clip(dh, min=-max_ratio, max=max_ratio)
Rroi_x = Rrois[:, 0]
Rroi_y = Rrois[:, 1]
Rroi_w = Rrois[:, 2]
Rroi_h = Rrois[:, 3]
Rroi_angle = Rrois[:, 4]
gx = dx * Rroi_w * paddle.cos(Rroi_angle) - dy * Rroi_h * paddle.sin(
Rroi_angle) + Rroi_x
gy = dx * Rroi_w * paddle.sin(Rroi_angle) + dy * Rroi_h * paddle.cos(
Rroi_angle) + Rroi_y
gw = Rroi_w * dw.exp()
gh = Rroi_h * dh.exp()
ga = np.pi * dangle + Rroi_angle
ga = (ga + np.pi / 4) % np.pi - np.pi / 4
ga = paddle.to_tensor(ga)
gw = paddle.to_tensor(gw, dtype='float32')
gh = paddle.to_tensor(gh, dtype='float32')
bboxes = paddle.stack([gx, gy, gw, gh, ga], axis=-1)
return bboxes
def rbox2delta(proposals, gt, means=[0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1]):
"""
Args:
proposals:
gt:
means: 1x5
stds: 1x5
Returns:
"""
proposals = proposals.astype(np.float64)
PI = np.pi
gt_widths = gt[..., 2]
gt_heights = gt[..., 3]
gt_angle = gt[..., 4]
proposals_widths = proposals[..., 2]
proposals_heights = proposals[..., 3]
proposals_angle = proposals[..., 4]
coord = gt[..., 0:2] - proposals[..., 0:2]
dx = (np.cos(proposals[..., 4]) * coord[..., 0] + np.sin(proposals[..., 4])
* coord[..., 1]) / proposals_widths
dy = (-np.sin(proposals[..., 4]) * coord[..., 0] + np.cos(proposals[..., 4])
* coord[..., 1]) / proposals_heights
dw = np.log(gt_widths / proposals_widths)
dh = np.log(gt_heights / proposals_heights)
da = (gt_angle - proposals_angle)
da = (da + PI / 4) % PI - PI / 4
da /= PI
deltas = np.stack([dx, dy, dw, dh, da], axis=-1)
means = np.array(means, dtype=deltas.dtype)
stds = np.array(stds, dtype=deltas.dtype)
deltas = (deltas - means) / stds
deltas = deltas.astype(np.float32)
return deltas
def bbox_decode(bbox_preds,
anchors,
means=[0, 0, 0, 0, 0],
stds=[1, 1, 1, 1, 1]):
"""decode bbox from deltas
Args:
bbox_preds: [N,H,W,5]
anchors: [H*W,5]
return:
bboxes: [N,H,W,5]
"""
num_imgs, H, W, _ = bbox_preds.shape
bboxes_list = []
for img_id in range(num_imgs):
bbox_pred = bbox_preds[img_id]
# bbox_pred.shape=[5,H,W]
bbox_delta = bbox_pred
bboxes = delta2rbox(
anchors, bbox_delta, means, stds, wh_ratio_clip=1e-6)
bboxes = paddle.reshape(bboxes, [H, W, 5])
bboxes_list.append(bboxes)
return paddle.stack(bboxes_list, axis=0)
def poly_to_rbox(polys):
""" """
poly:[x0,y0,x1,y1,x2,y2,x3,y3] poly:[x0,y0,x1,y1,x2,y2,x3,y3]
to to
...@@ -479,37 +340,16 @@ def get_best_begin_point_single(coordinate): ...@@ -479,37 +340,16 @@ def get_best_begin_point_single(coordinate):
return np.array(combinate[force_flag]).reshape(8) return np.array(combinate[force_flag]).reshape(8)
def rbox2poly_single(rrect): def rbox2poly_np(rrects):
"""
rrect:[x_ctr,y_ctr,w,h,angle]
to
poly:[x0,y0,x1,y1,x2,y2,x3,y3]
"""
x_ctr, y_ctr, width, height, angle = rrect[:5]
tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
# rect 2x4
rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
R = np.array([[np.cos(angle), -np.sin(angle)],
[np.sin(angle), np.cos(angle)]])
# poly
poly = R.dot(rect)
x0, x1, x2, x3 = poly[0, :4] + x_ctr
y0, y1, y2, y3 = poly[1, :4] + y_ctr
poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float32)
poly = get_best_begin_point_single(poly)
return poly
def rbox2poly(rrects):
""" """
rrect:[x_ctr,y_ctr,w,h,angle] rrect:[x_ctr,y_ctr,w,h,angle]
to to
poly:[x0,y0,x1,y1,x2,y2,x3,y3] poly:[x0,y0,x1,y1,x2,y2,x3,y3]
""" """
polys = [] polys = []
rrects = rrects.numpy()
for i in range(rrects.shape[0]): for i in range(rrects.shape[0]):
rrect = rrects[i] rrect = rrects[i]
# x_ctr, y_ctr, width, height, angle = rrect[:5]
x_ctr = rrect[0] x_ctr = rrect[0]
y_ctr = rrect[1] y_ctr = rrect[1]
width = rrect[2] width = rrect[2]
...@@ -529,13 +369,13 @@ def rbox2poly(rrects): ...@@ -529,13 +369,13 @@ def rbox2poly(rrects):
return polys return polys
def pd_rbox2poly(rrects): def rbox2poly(rrects):
""" """
rrect:[x_ctr,y_ctr,w,h,angle] rrect:[x_ctr,y_ctr,w,h,angle]
to to
poly:[x0,y0,x1,y1,x2,y2,x3,y3] poly:[x0,y0,x1,y1,x2,y2,x3,y3]
""" """
N = rrects.shape[0] N = paddle.shape(rrects)[0]
x_ctr = rrects[:, 0] x_ctr = rrects[:, 0]
y_ctr = rrects[:, 1] y_ctr = rrects[:, 1]
...@@ -561,14 +401,10 @@ def pd_rbox2poly(rrects): ...@@ -561,14 +401,10 @@ def pd_rbox2poly(rrects):
polys = paddle.transpose(polys, [2, 1, 0]) polys = paddle.transpose(polys, [2, 1, 0])
polys = paddle.reshape(polys, [-1, N]) polys = paddle.reshape(polys, [-1, N])
polys = paddle.transpose(polys, [1, 0]) polys = paddle.transpose(polys, [1, 0])
polys[:, 0] += x_ctr
polys[:, 2] += x_ctr tmp = paddle.stack(
polys[:, 4] += x_ctr [x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr], axis=1)
polys[:, 6] += x_ctr polys = polys + tmp
polys[:, 1] += y_ctr
polys[:, 3] += y_ctr
polys[:, 5] += y_ctr
polys[:, 7] += y_ctr
return polys return polys
......
此差异已折叠。
...@@ -17,7 +17,7 @@ import paddle ...@@ -17,7 +17,7 @@ import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
from ppdet.core.workspace import register from ppdet.core.workspace import register
from ppdet.modeling.bbox_utils import nonempty_bbox, rbox2poly, pd_rbox2poly from ppdet.modeling.bbox_utils import nonempty_bbox, rbox2poly, rbox2poly
try: try:
from collections.abc import Sequence from collections.abc import Sequence
except Exception: except Exception:
...@@ -214,7 +214,7 @@ class FCOSPostProcess(object): ...@@ -214,7 +214,7 @@ class FCOSPostProcess(object):
@register @register
class S2ANetBBoxPostProcess(object): class S2ANetBBoxPostProcess(nn.Layer):
__shared__ = ['num_classes'] __shared__ = ['num_classes']
__inject__ = ['nms'] __inject__ = ['nms']
...@@ -225,41 +225,43 @@ class S2ANetBBoxPostProcess(object): ...@@ -225,41 +225,43 @@ class S2ANetBBoxPostProcess(object):
self.min_bbox_size = min_bbox_size self.min_bbox_size = min_bbox_size
self.nms = nms self.nms = nms
self.origin_shape_list = [] self.origin_shape_list = []
self.fake_pred_cls_score_bbox = paddle.to_tensor(
np.array(
[[-1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
dtype='float32'))
self.fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
def __call__(self, pred_scores, pred_bboxes): def forward(self, pred_scores, pred_bboxes):
""" """
pred_scores : [N, M] score pred_scores : [N, M] score
pred_bboxes : [N, 5] xc, yc, w, h, a pred_bboxes : [N, 5] xc, yc, w, h, a
im_shape : [N, 2] im_shape im_shape : [N, 2] im_shape
scale_factor : [N, 2] scale_factor scale_factor : [N, 2] scale_factor
""" """
pred_ploys = pd_rbox2poly(pred_bboxes) pred_ploys0 = rbox2poly(pred_bboxes)
pred_ploys = paddle.reshape( pred_ploys = paddle.unsqueeze(pred_ploys0, axis=0)
pred_ploys, [1, pred_ploys.shape[0], pred_ploys.shape[1]])
pred_scores = paddle.to_tensor(pred_scores)
# pred_scores [NA, 16] --> [16, NA] # pred_scores [NA, 16] --> [16, NA]
pred_scores = paddle.transpose(pred_scores, [1, 0]) pred_scores0 = paddle.transpose(pred_scores, [1, 0])
pred_scores = paddle.reshape( pred_scores = paddle.unsqueeze(pred_scores0, axis=0)
pred_scores, [1, pred_scores.shape[0], pred_scores.shape[1]])
pred_cls_score_bbox, bbox_num, _ = self.nms(pred_ploys, pred_scores, pred_cls_score_bbox, bbox_num, _ = self.nms(pred_ploys, pred_scores,
self.num_classes) self.num_classes)
# Prevent empty bbox_pred from decode or NMS. # Prevent empty bbox_pred from decode or NMS.
# Bboxes and score before NMS may be empty due to the score threshold. # Bboxes and score before NMS may be empty due to the score threshold.
if pred_cls_score_bbox.shape[0] == 0: if pred_cls_score_bbox.shape[0] <= 0 or pred_cls_score_bbox.shape[
pred_cls_score_bbox = paddle.to_tensor( 1] <= 1:
np.array( pred_cls_score_bbox = self.fake_pred_cls_score_bbox
[[-1, 0.0, 0.0, 0.0, 0.0, 0.0]], dtype='float32')) bbox_num = self.fake_bbox_num
bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
pred_cls_score_bbox = paddle.reshape(pred_cls_score_bbox, [-1, 10])
assert pred_cls_score_bbox.shape[1] == 10
return pred_cls_score_bbox, bbox_num return pred_cls_score_bbox, bbox_num
def get_pred(self, bboxes, bbox_num, im_shape, scale_factor): def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
""" """
Rescale, clip and filter the bbox from the output of NMS to Rescale, clip and filter the bbox from the output of NMS to
get final prediction. get final prediction.
Args: Args:
bboxes(Tensor): bboxes [N, 10] bboxes(Tensor): bboxes [N, 10]
bbox_num(Tensor): bbox_num bbox_num(Tensor): bbox_num
...@@ -270,6 +272,7 @@ class S2ANetBBoxPostProcess(object): ...@@ -270,6 +272,7 @@ class S2ANetBBoxPostProcess(object):
including labels, scores and bboxes. The size of including labels, scores and bboxes. The size of
bboxes are corresponding to the original image. bboxes are corresponding to the original image.
""" """
assert bboxes.shape[1] == 10
origin_shape = paddle.floor(im_shape / scale_factor + 0.5) origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
origin_shape_list = [] origin_shape_list = []
...@@ -292,7 +295,7 @@ class S2ANetBBoxPostProcess(object): ...@@ -292,7 +295,7 @@ class S2ANetBBoxPostProcess(object):
# bboxes: [N, 10], label, score, bbox # bboxes: [N, 10], label, score, bbox
pred_label_score = bboxes[:, 0:2] pred_label_score = bboxes[:, 0:2]
pred_bbox = bboxes[:, 2:10:1] pred_bbox = bboxes[:, 2:]
# rescale bbox to original image # rescale bbox to original image
scaled_bbox = pred_bbox / scale_factor_list scaled_bbox = pred_bbox / scale_factor_list
......
...@@ -16,7 +16,6 @@ import paddle ...@@ -16,7 +16,6 @@ import paddle
from ppdet.core.workspace import register, serializable from ppdet.core.workspace import register, serializable
from .target import rpn_anchor_target, generate_proposal_target, generate_mask_target, libra_generate_proposal_target from .target import rpn_anchor_target, generate_proposal_target, generate_mask_target, libra_generate_proposal_target
from ppdet.modeling import bbox_utils
import numpy as np import numpy as np
...@@ -283,13 +282,58 @@ class RBoxAssigner(object): ...@@ -283,13 +282,58 @@ class RBoxAssigner(object):
""" """
if anchors.ndim == 3: if anchors.ndim == 3:
anchors = anchors.reshape(-1, anchor.shape[-1]) anchors = anchors.reshape(-1, anchors.shape[-1])
assert anchors.ndim == 2 assert anchors.ndim == 2
anchor_num = anchors.shape[0] anchor_num = anchors.shape[0]
anchor_valid = np.ones((anchor_num), np.uint8) anchor_valid = np.ones((anchor_num), np.uint8)
anchor_inds = np.arange(anchor_num) anchor_inds = np.arange(anchor_num)
return anchor_inds return anchor_inds
def rbox2delta(self,
proposals,
gt,
means=[0, 0, 0, 0, 0],
stds=[1, 1, 1, 1, 1]):
"""
Args:
proposals: tensor [N, 5]
gt: gt [N, 5]
means: means [5]
stds: stds [5]
Returns:
"""
proposals = proposals.astype(np.float64)
PI = np.pi
gt_widths = gt[..., 2]
gt_heights = gt[..., 3]
gt_angle = gt[..., 4]
proposals_widths = proposals[..., 2]
proposals_heights = proposals[..., 3]
proposals_angle = proposals[..., 4]
coord = gt[..., 0:2] - proposals[..., 0:2]
dx = (np.cos(proposals[..., 4]) * coord[..., 0] +
np.sin(proposals[..., 4]) * coord[..., 1]) / proposals_widths
dy = (-np.sin(proposals[..., 4]) * coord[..., 0] +
np.cos(proposals[..., 4]) * coord[..., 1]) / proposals_heights
dw = np.log(gt_widths / proposals_widths)
dh = np.log(gt_heights / proposals_heights)
da = (gt_angle - proposals_angle)
da = (da + PI / 4) % PI - PI / 4
da /= PI
deltas = np.stack([dx, dy, dw, dh, da], axis=-1)
means = np.array(means, dtype=deltas.dtype)
stds = np.array(stds, dtype=deltas.dtype)
deltas = (deltas - means) / stds
deltas = deltas.astype(np.float32)
return deltas
def assign_anchor(self, def assign_anchor(self,
anchors, anchors,
gt_bboxes, gt_bboxes,
...@@ -405,8 +449,8 @@ class RBoxAssigner(object): ...@@ -405,8 +449,8 @@ class RBoxAssigner(object):
#print('ancho target pos_inds', pos_inds, len(pos_inds)) #print('ancho target pos_inds', pos_inds, len(pos_inds))
pos_sampled_gt_boxes = gt_bboxes[anchor_gt_bbox_inds[pos_inds]] pos_sampled_gt_boxes = gt_bboxes[anchor_gt_bbox_inds[pos_inds]]
if len(pos_inds) > 0: if len(pos_inds) > 0:
pos_bbox_targets = bbox_utils.rbox2delta(pos_sampled_anchors, pos_bbox_targets = self.rbox2delta(pos_sampled_anchors,
pos_sampled_gt_boxes) pos_sampled_gt_boxes)
bbox_targets[pos_inds, :] = pos_bbox_targets bbox_targets[pos_inds, :] = pos_bbox_targets
bbox_weights[pos_inds, :] = 1.0 bbox_weights[pos_inds, :] = 1.0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册