未验证 提交 0947171b 编写于 作者: C cnn 提交者: GitHub

[dev] s2anet fix export and deploy (#2919)

* s2anet fix export and deploy

* remove redundant code

* fix cpp for s2anet deploy

* fix bug of get_categories

* rename poly_to_rbox to poly2rbox

* add some comment for function
上级 912833f2
......@@ -9,7 +9,7 @@ TrainReader:
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- RboxPadBatch: {pad_to_stride: 32, pad_gt: true}
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: true
drop_last: true
......@@ -22,7 +22,7 @@ EvalReader:
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- RboxPadBatch: {pad_to_stride: 32, pad_gt: false}
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false
......@@ -36,7 +36,7 @@ TestReader:
- NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- Permute: {}
batch_transforms:
- RboxPadBatch: {pad_to_stride: 32, pad_gt: false}
- PadBatch: {pad_to_stride: 32}
batch_size: 1
shuffle: false
drop_last: false
......@@ -27,7 +27,7 @@ parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
if parent_path not in sys.path:
sys.path.append(parent_path)
from ppdet.modeling.bbox_utils import poly_to_rbox
from ppdet.modeling.bbox_utils import poly2rbox
from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
......@@ -118,7 +118,7 @@ def dota_2_coco(image_dir,
# rbox or bbox
if is_obb:
polys = [single_obj_poly]
rboxs = poly_to_rbox(polys)
rboxs = poly2rbox(polys)
rbox = rboxs[0].tolist()
single_obj['bbox'] = rbox
single_obj['area'] = rbox[2] * rbox[3]
......
......@@ -51,7 +51,8 @@ std::vector<int> GenerateColorMap(int num_class);
cv::Mat VisualizeResult(const cv::Mat& img,
const std::vector<ObjectResult>& results,
const std::vector<std::string>& lable_list,
const std::vector<int>& colormap);
const std::vector<int>& colormap,
const bool is_rbox);
class ObjectDetector {
......@@ -120,7 +121,8 @@ class ObjectDetector {
// Postprocess result
void Postprocess(
const cv::Mat& raw_mat,
std::vector<ObjectResult>* result);
std::vector<ObjectResult>* result,
bool is_rbox);
std::shared_ptr<Predictor> predictor_;
Preprocessor preprocessor_;
......
......@@ -195,23 +195,42 @@ void PredictVideo(const std::string& video_path,
// Capture all frames and do inference
cv::Mat frame;
int frame_id = 0;
bool is_rbox = false;
while (capture.read(frame)) {
if (frame.empty()) {
break;
}
det->Predict(frame, 0.5, 0, 1, &result, &det_times);
cv::Mat out_im = PaddleDetection::VisualizeResult(
frame, result, labels, colormap);
for (const auto& item : result) {
printf("In frame id %d, we detect: class=%d confidence=%.2f rect=[%d %d %d %d]\n",
frame_id,
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3]);
}
if (item.rect.size() > 6){
is_rbox = true;
printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3],
item.rect[4],
item.rect[5],
item.rect[6],
item.rect[7]);
}
else{
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3]);
}
}
cv::Mat out_im = PaddleDetection::VisualizeResult(
frame, result, labels, colormap, is_rbox);
video_out.write(out_im);
frame_id += 1;
}
......@@ -231,24 +250,41 @@ void PredictImage(const std::vector<std::string> all_img_list,
// Store all detected result
std::vector<PaddleDetection::ObjectResult> result;
std::vector<double> det_times;
bool is_rbox = false;
if (run_benchmark) {
det->Predict(im, threshold, 10, 10, &result, &det_times);
} else {
det->Predict(im, 0.5, 0, 1, &result, &det_times);
for (const auto& item : result) {
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
if (item.rect.size() > 6){
is_rbox = true;
printf("class=%d confidence=%.4f rect=[%d %d %d %d %d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3],
item.rect[4],
item.rect[5],
item.rect[6],
item.rect[7]);
}
else{
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
item.class_id,
item.confidence,
item.rect[0],
item.rect[1],
item.rect[2],
item.rect[3]);
}
}
// Visualization result
auto labels = det->GetLabelList();
auto colormap = PaddleDetection::GenerateColorMap(labels.size());
cv::Mat vis_img = PaddleDetection::VisualizeResult(
im, result, labels, colormap);
im, result, labels, colormap, is_rbox);
std::vector<int> compression_params;
compression_params.push_back(CV_IMWRITE_JPEG_QUALITY);
compression_params.push_back(95);
......
......@@ -94,13 +94,10 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
cv::Mat VisualizeResult(const cv::Mat& img,
const std::vector<ObjectResult>& results,
const std::vector<std::string>& lable_list,
const std::vector<int>& colormap) {
const std::vector<int>& colormap,
const bool is_rbox=false) {
cv::Mat vis_img = img.clone();
for (int i = 0; i < results.size(); ++i) {
int w = results[i].rect[1] - results[i].rect[0];
int h = results[i].rect[3] - results[i].rect[2];
cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[2], w, h);
// Configure color and text size
std::ostringstream oss;
oss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
......@@ -120,17 +117,37 @@ cv::Mat VisualizeResult(const cv::Mat& img,
thickness,
nullptr);
cv::Point origin;
origin.x = roi.x;
origin.y = roi.y;
if (is_rbox)
{
// Draw object, text, and background
for (int k=0; k<4; k++)
{
cv::Point pt1 = cv::Point(results[i].rect[(k*2)%8],
results[i].rect[(k*2+1)%8]);
cv::Point pt2 = cv::Point(results[i].rect[(k*2+2)%8],
results[i].rect[(k*2+3)%8]);
cv::line(vis_img, pt1, pt2, roi_color, 2);
}
}
else
{
int w = results[i].rect[1] - results[i].rect[0];
int h = results[i].rect[3] - results[i].rect[2];
cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[2], w, h);
// Draw roi object, text, and background
cv::rectangle(vis_img, roi, roi_color, 2);
}
origin.x = results[i].rect[0];
origin.y = results[i].rect[1];
// Configure text background
cv::Rect text_back = cv::Rect(results[i].rect[0],
results[i].rect[2] - text_size.height,
text_size.width,
text_size.height);
// Draw roi object, text, and background
cv::rectangle(vis_img, roi, roi_color, 2);
// Draw text, and background
cv::rectangle(vis_img, text_back, roi_color, -1);
cv::putText(vis_img,
text,
......@@ -152,7 +169,8 @@ void ObjectDetector::Preprocess(const cv::Mat& ori_im) {
void ObjectDetector::Postprocess(
const cv::Mat& raw_mat,
std::vector<ObjectResult>* result) {
std::vector<ObjectResult>* result,
bool is_rbox=false) {
result->clear();
int rh = 1;
int rw = 1;
......@@ -161,24 +179,52 @@ void ObjectDetector::Postprocess(
rw = raw_mat.cols;
}
int total_size = output_data_.size() / 6;
for (int j = 0; j < total_size; ++j) {
// Class id
int class_id = static_cast<int>(round(output_data_[0 + j * 6]));
// Confidence score
float score = output_data_[1 + j * 6];
int xmin = (output_data_[2 + j * 6] * rw);
int ymin = (output_data_[3 + j * 6] * rh);
int xmax = (output_data_[4 + j * 6] * rw);
int ymax = (output_data_[5 + j * 6] * rh);
int wd = xmax - xmin;
int hd = ymax - ymin;
if (score > threshold_ && class_id > -1) {
ObjectResult result_item;
result_item.rect = {xmin, xmax, ymin, ymax};
result_item.class_id = class_id;
result_item.confidence = score;
result->push_back(result_item);
if (is_rbox)
{
int total_size = output_data_.size() / 10;
for (int j = 0; j < total_size; ++j) {
// Class id
int class_id = static_cast<int>(round(output_data_[0 + j * 10]));
// Confidence score
float score = output_data_[1 + j * 10];
int x1 = (output_data_[2 + j * 10] * rw);
int y1 = (output_data_[3 + j * 10] * rh);
int x2 = (output_data_[4 + j * 10] * rw);
int y2 = (output_data_[5 + j * 10] * rh);
int x3 = (output_data_[6 + j * 10] * rw);
int y3 = (output_data_[7 + j * 10] * rh);
int x4 = (output_data_[8 + j * 10] * rw);
int y4 = (output_data_[9 + j * 10] * rh);
if (score > threshold_ && class_id > -1) {
ObjectResult result_item;
result_item.rect = {x1, y1, x2, y2, x3, y3, x4, y4};
result_item.class_id = class_id;
result_item.confidence = score;
result->push_back(result_item);
}
}
}
else
{
int total_size = output_data_.size() / 6;
for (int j = 0; j < total_size; ++j) {
// Class id
int class_id = static_cast<int>(round(output_data_[0 + j * 6]));
// Confidence score
float score = output_data_[1 + j * 6];
int xmin = (output_data_[2 + j * 6] * rw);
int ymin = (output_data_[3 + j * 6] * rh);
int xmax = (output_data_[4 + j * 6] * rw);
int ymax = (output_data_[5 + j * 6] * rh);
int wd = xmax - xmin;
int hd = ymax - ymin;
if (score > threshold_ && class_id > -1) {
ObjectResult result_item;
result_item.rect = {xmin, xmax, ymin, ymax};
result_item.class_id = class_id;
result_item.confidence = score;
result->push_back(result_item);
}
}
}
}
......@@ -231,6 +277,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
out_tensor->CopyToCpu(output_data_.data());
}
bool is_rbox = false;
auto inference_start = std::chrono::steady_clock::now();
for (int i = 0; i < repeats; i++)
{
......@@ -244,6 +291,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
for (int j = 0; j < output_shape.size(); ++j) {
output_size *= output_shape[j];
}
is_rbox = output_shape[output_shape.size()-1] % 10 == 0;
if (output_size < 6) {
std::cerr << "[WARNING] No object detected." << std::endl;
......@@ -254,7 +302,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
auto inference_end = std::chrono::steady_clock::now();
auto postprocess_start = std::chrono::steady_clock::now();
// Postprocessing result
Postprocess(im, result);
Postprocess(im, result, is_rbox);
auto postprocess_end = std::chrono::steady_clock::now();
std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start;
......@@ -263,6 +311,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
times->push_back(double(inference_diff.count() / repeats * 1000));
std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start;
times->push_back(double(postprocess_diff.count() * 1000));
}
std::vector<int> GenerateColorMap(int num_class) {
......
......@@ -37,6 +37,7 @@ SUPPORT_MODELS = {
'FCOS',
'SOLOv2',
'TTFNet',
'S2ANet',
}
......
......@@ -130,22 +130,29 @@ def draw_box(im, np_boxes, labels, threshold=0.5):
for dt in np_boxes:
clsid, bbox, score = int(dt[0]), dt[2:], dt[1]
xmin, ymin, xmax, ymax = bbox
print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],'
'right_bottom:[{:.2f},{:.2f}]'.format(
int(clsid), score, xmin, ymin, xmax, ymax))
w = xmax - xmin
h = ymax - ymin
if clsid not in clsid2color:
clsid2color[clsid] = color_list[clsid]
color = tuple(clsid2color[clsid])
# draw bbox
draw.line(
[(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
(xmin, ymin)],
width=draw_thickness,
fill=color)
if len(bbox) == 4:
xmin, ymin, xmax, ymax = bbox
print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],'
'right_bottom:[{:.2f},{:.2f}]'.format(
int(clsid), score, xmin, ymin, xmax, ymax))
# draw bbox
draw.line(
[(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
(xmin, ymin)],
width=draw_thickness,
fill=color)
elif len(bbox) == 8:
x1, y1, x2, y2, x3, y3, x4, y4 = bbox
draw.line(
[(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
width=2,
fill=color)
xmin = min(x1, x2, x3, x4)
ymin = min(y1, y2, y3, y4)
# draw label
text = "{} {:.4f}".format(labels[clsid], score)
......
......@@ -33,7 +33,7 @@ logger = setup_logger(__name__)
__all__ = [
'PadBatch', 'BatchRandomResize', 'Gt2YoloTarget', 'Gt2FCOSTarget',
'Gt2TTFTarget', 'Gt2Solov2Target', 'RboxPadBatch'
'Gt2TTFTarget', 'Gt2Solov2Target'
]
......@@ -87,6 +87,12 @@ class PadBatch(BaseOperator):
padding_segm[:, :im_h, :im_w] = gt_segm
data['gt_segm'] = padding_segm
if 'gt_rbox2poly' in data and data['gt_rbox2poly'] is not None:
# ploy to rbox
polys = data['gt_rbox2poly']
rbox = bbox_utils.poly2rbox(polys)
data['gt_rbox'] = rbox
return samples
......@@ -739,111 +745,3 @@ class Gt2Solov2Target(BaseOperator):
data['grid_order{}'.format(idx)] = gt_grid_order
return samples
@register_op
class RboxPadBatch(BaseOperator):
"""
Pad a batch of samples so they can be divisible by a stride.
The layout of each image should be 'CHW'. And convert poly to rbox.
Args:
pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
height and width is divisible by `pad_to_stride`.
"""
def __init__(self, pad_to_stride=0, pad_gt=False):
super(RboxPadBatch, self).__init__()
self.pad_to_stride = pad_to_stride
self.pad_gt = pad_gt
def __call__(self, samples, context=None):
"""
Args:
samples (list): a batch of sample, each is dict.
"""
coarsest_stride = self.pad_to_stride
max_shape = np.array([data['image'].shape for data in samples]).max(
axis=0)
if coarsest_stride > 0:
max_shape[1] = int(
np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
max_shape[2] = int(
np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
for data in samples:
im = data['image']
im_c, im_h, im_w = im.shape[:]
padding_im = np.zeros(
(im_c, max_shape[1], max_shape[2]), dtype=np.float32)
padding_im[:, :im_h, :im_w] = im
data['image'] = padding_im
if 'semantic' in data and data['semantic'] is not None:
semantic = data['semantic']
padding_sem = np.zeros(
(1, max_shape[1], max_shape[2]), dtype=np.float32)
padding_sem[:, :im_h, :im_w] = semantic
data['semantic'] = padding_sem
if 'gt_segm' in data and data['gt_segm'] is not None:
gt_segm = data['gt_segm']
padding_segm = np.zeros(
(gt_segm.shape[0], max_shape[1], max_shape[2]),
dtype=np.uint8)
padding_segm[:, :im_h, :im_w] = gt_segm
data['gt_segm'] = padding_segm
if self.pad_gt:
gt_num = []
if 'gt_poly' in data and data['gt_poly'] is not None and len(data[
'gt_poly']) > 0:
pad_mask = True
else:
pad_mask = False
if pad_mask:
poly_num = []
poly_part_num = []
point_num = []
for data in samples:
gt_num.append(data['gt_bbox'].shape[0])
if pad_mask:
poly_num.append(len(data['gt_poly']))
for poly in data['gt_poly']:
poly_part_num.append(int(len(poly)))
for p_p in poly:
point_num.append(int(len(p_p) / 2))
gt_num_max = max(gt_num)
for i, sample in enumerate(samples):
assert 'gt_rbox' in sample
assert 'gt_rbox2poly' in sample
gt_box_data = -np.ones([gt_num_max, 4], dtype=np.float32)
gt_class_data = -np.ones([gt_num_max], dtype=np.int32)
is_crowd_data = np.ones([gt_num_max], dtype=np.int32)
if pad_mask:
poly_num_max = max(poly_num)
poly_part_num_max = max(poly_part_num)
point_num_max = max(point_num)
gt_masks_data = -np.ones(
[poly_num_max, poly_part_num_max, point_num_max, 2],
dtype=np.float32)
gt_num = sample['gt_bbox'].shape[0]
gt_box_data[0:gt_num, :] = sample['gt_bbox']
gt_class_data[0:gt_num] = np.squeeze(sample['gt_class'])
is_crowd_data[0:gt_num] = np.squeeze(sample['is_crowd'])
if pad_mask:
for j, poly in enumerate(sample['gt_poly']):
for k, p_p in enumerate(poly):
pp_np = np.array(p_p).reshape(-1, 2)
gt_masks_data[j, k, :pp_np.shape[0], :] = pp_np
sample['gt_poly'] = gt_masks_data
sample['gt_bbox'] = gt_box_data
sample['gt_class'] = gt_class_data
sample['is_crowd'] = is_crowd_data
# ploy to rbox
polys = sample['gt_rbox2poly']
rbox = bbox_utils.poly_to_rbox(polys)
sample['gt_rbox'] = rbox
return samples
......@@ -2007,7 +2007,7 @@ class Rbox2Poly(BaseOperator):
x2 = x_ctr + width / 2.0
y2 = y_ctr + height / 2.0
sample['gt_bbox'] = np.stack([x1, y1, x2, y2], axis=1)
polys = bbox_utils.rbox2poly(rrects)
polys = bbox_utils.rbox2poly_np(rrects)
sample['gt_rbox2poly'] = polys
return sample
......
......@@ -376,7 +376,8 @@ class Trainer(object):
imid2path = self.dataset.get_imid2path()
anno_file = self.dataset.get_anno()
clsid2catid, catid2name = get_categories(self.cfg.metric, anno_file)
clsid2catid, catid2name = get_categories(
self.cfg.metric, anno_file=anno_file)
# Run Infer
self.status['mode'] = 'test'
......
......@@ -263,146 +263,7 @@ def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9):
return iou
def rect2rbox(bboxes):
"""
:param bboxes: shape (n, 4) (xmin, ymin, xmax, ymax)
:return: dbboxes: shape (n, 5) (x_ctr, y_ctr, w, h, angle)
"""
bboxes = bboxes.reshape(-1, 4)
num_boxes = bboxes.shape[0]
x_ctr = (bboxes[:, 2] + bboxes[:, 0]) / 2.0
y_ctr = (bboxes[:, 3] + bboxes[:, 1]) / 2.0
edges1 = np.abs(bboxes[:, 2] - bboxes[:, 0])
edges2 = np.abs(bboxes[:, 3] - bboxes[:, 1])
angles = np.zeros([num_boxes], dtype=bboxes.dtype)
inds = edges1 < edges2
rboxes = np.stack((x_ctr, y_ctr, edges1, edges2, angles), axis=1)
rboxes[inds, 2] = edges2[inds]
rboxes[inds, 3] = edges1[inds]
rboxes[inds, 4] = np.pi / 2.0
return rboxes
def delta2rbox(Rrois,
deltas,
means=[0, 0, 0, 0, 0],
stds=[1, 1, 1, 1, 1],
wh_ratio_clip=1e-6):
"""
:param Rrois: (cx, cy, w, h, theta)
:param deltas: (dx, dy, dw, dh, dtheta)
:param means:
:param stds:
:param wh_ratio_clip:
:return:
"""
deltas = paddle.reshape(deltas, [-1, deltas.shape[-1]])
denorm_deltas = deltas * stds + means
dx = denorm_deltas[:, 0]
dy = denorm_deltas[:, 1]
dw = denorm_deltas[:, 2]
dh = denorm_deltas[:, 3]
dangle = denorm_deltas[:, 4]
max_ratio = np.abs(np.log(wh_ratio_clip))
dw = paddle.clip(dw, min=-max_ratio, max=max_ratio)
dh = paddle.clip(dh, min=-max_ratio, max=max_ratio)
Rroi_x = Rrois[:, 0]
Rroi_y = Rrois[:, 1]
Rroi_w = Rrois[:, 2]
Rroi_h = Rrois[:, 3]
Rroi_angle = Rrois[:, 4]
gx = dx * Rroi_w * paddle.cos(Rroi_angle) - dy * Rroi_h * paddle.sin(
Rroi_angle) + Rroi_x
gy = dx * Rroi_w * paddle.sin(Rroi_angle) + dy * Rroi_h * paddle.cos(
Rroi_angle) + Rroi_y
gw = Rroi_w * dw.exp()
gh = Rroi_h * dh.exp()
ga = np.pi * dangle + Rroi_angle
ga = (ga + np.pi / 4) % np.pi - np.pi / 4
ga = paddle.to_tensor(ga)
gw = paddle.to_tensor(gw, dtype='float32')
gh = paddle.to_tensor(gh, dtype='float32')
bboxes = paddle.stack([gx, gy, gw, gh, ga], axis=-1)
return bboxes
def rbox2delta(proposals, gt, means=[0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1]):
"""
Args:
proposals:
gt:
means: 1x5
stds: 1x5
Returns:
"""
proposals = proposals.astype(np.float64)
PI = np.pi
gt_widths = gt[..., 2]
gt_heights = gt[..., 3]
gt_angle = gt[..., 4]
proposals_widths = proposals[..., 2]
proposals_heights = proposals[..., 3]
proposals_angle = proposals[..., 4]
coord = gt[..., 0:2] - proposals[..., 0:2]
dx = (np.cos(proposals[..., 4]) * coord[..., 0] + np.sin(proposals[..., 4])
* coord[..., 1]) / proposals_widths
dy = (-np.sin(proposals[..., 4]) * coord[..., 0] + np.cos(proposals[..., 4])
* coord[..., 1]) / proposals_heights
dw = np.log(gt_widths / proposals_widths)
dh = np.log(gt_heights / proposals_heights)
da = (gt_angle - proposals_angle)
da = (da + PI / 4) % PI - PI / 4
da /= PI
deltas = np.stack([dx, dy, dw, dh, da], axis=-1)
means = np.array(means, dtype=deltas.dtype)
stds = np.array(stds, dtype=deltas.dtype)
deltas = (deltas - means) / stds
deltas = deltas.astype(np.float32)
return deltas
def bbox_decode(bbox_preds,
anchors,
means=[0, 0, 0, 0, 0],
stds=[1, 1, 1, 1, 1]):
"""decode bbox from deltas
Args:
bbox_preds: [N,H,W,5]
anchors: [H*W,5]
return:
bboxes: [N,H,W,5]
"""
num_imgs, H, W, _ = bbox_preds.shape
bboxes_list = []
for img_id in range(num_imgs):
bbox_pred = bbox_preds[img_id]
# bbox_pred.shape=[5,H,W]
bbox_delta = bbox_pred
bboxes = delta2rbox(
anchors, bbox_delta, means, stds, wh_ratio_clip=1e-6)
bboxes = paddle.reshape(bboxes, [H, W, 5])
bboxes_list.append(bboxes)
return paddle.stack(bboxes_list, axis=0)
def poly_to_rbox(polys):
def poly2rbox(polys):
"""
poly:[x0,y0,x1,y1,x2,y2,x3,y3]
to
......@@ -479,37 +340,16 @@ def get_best_begin_point_single(coordinate):
return np.array(combinate[force_flag]).reshape(8)
def rbox2poly_single(rrect):
"""
rrect:[x_ctr,y_ctr,w,h,angle]
to
poly:[x0,y0,x1,y1,x2,y2,x3,y3]
"""
x_ctr, y_ctr, width, height, angle = rrect[:5]
tl_x, tl_y, br_x, br_y = -width / 2, -height / 2, width / 2, height / 2
# rect 2x4
rect = np.array([[tl_x, br_x, br_x, tl_x], [tl_y, tl_y, br_y, br_y]])
R = np.array([[np.cos(angle), -np.sin(angle)],
[np.sin(angle), np.cos(angle)]])
# poly
poly = R.dot(rect)
x0, x1, x2, x3 = poly[0, :4] + x_ctr
y0, y1, y2, y3 = poly[1, :4] + y_ctr
poly = np.array([x0, y0, x1, y1, x2, y2, x3, y3], dtype=np.float32)
poly = get_best_begin_point_single(poly)
return poly
def rbox2poly(rrects):
def rbox2poly_np(rrects):
"""
rrect:[x_ctr,y_ctr,w,h,angle]
to
poly:[x0,y0,x1,y1,x2,y2,x3,y3]
"""
polys = []
rrects = rrects.numpy()
for i in range(rrects.shape[0]):
rrect = rrects[i]
# x_ctr, y_ctr, width, height, angle = rrect[:5]
x_ctr = rrect[0]
y_ctr = rrect[1]
width = rrect[2]
......@@ -529,13 +369,13 @@ def rbox2poly(rrects):
return polys
def pd_rbox2poly(rrects):
def rbox2poly(rrects):
"""
rrect:[x_ctr,y_ctr,w,h,angle]
to
poly:[x0,y0,x1,y1,x2,y2,x3,y3]
"""
N = rrects.shape[0]
N = paddle.shape(rrects)[0]
x_ctr = rrects[:, 0]
y_ctr = rrects[:, 1]
......@@ -561,14 +401,10 @@ def pd_rbox2poly(rrects):
polys = paddle.transpose(polys, [2, 1, 0])
polys = paddle.reshape(polys, [-1, N])
polys = paddle.transpose(polys, [1, 0])
polys[:, 0] += x_ctr
polys[:, 2] += x_ctr
polys[:, 4] += x_ctr
polys[:, 6] += x_ctr
polys[:, 1] += y_ctr
polys[:, 3] += y_ctr
polys[:, 5] += y_ctr
polys[:, 7] += y_ctr
tmp = paddle.stack(
[x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr, x_ctr, y_ctr], axis=1)
polys = polys + tmp
return polys
......
......@@ -17,26 +17,21 @@ import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn.initializer import Normal, Constant
from ppdet.core.workspace import register
from ppdet.modeling import ops
from ppdet.modeling import bbox_utils
from ppdet.modeling.proposal_generator.target_layer import RBoxAssigner
import numpy as np
class S2ANetAnchorGenerator(object):
class S2ANetAnchorGenerator(nn.Layer):
"""
S2ANetAnchorGenerator by np
AnchorGenerator by paddle
"""
def __init__(self,
base_size=8,
scales=1.0,
ratios=1.0,
scale_major=True,
ctr=None):
def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
super(S2ANetAnchorGenerator, self).__init__()
self.base_size = base_size
self.scales = scales
self.ratios = ratios
self.scales = paddle.to_tensor(scales)
self.ratios = paddle.to_tensor(ratios)
self.scale_major = scale_major
self.ctr = ctr
self.base_anchors = self.gen_base_anchors()
......@@ -54,7 +49,7 @@ class S2ANetAnchorGenerator(object):
else:
x_ctr, y_ctr = self.ctr
h_ratios = np.sqrt(self.ratios)
h_ratios = paddle.sqrt(self.ratios)
w_ratios = 1 / h_ratios
if self.scale_major:
ws = (w * w_ratios[:] * self.scales[:]).reshape([-1])
......@@ -63,61 +58,51 @@ class S2ANetAnchorGenerator(object):
ws = (w * self.scales[:] * w_ratios[:]).reshape([-1])
hs = (h * self.scales[:] * h_ratios[:]).reshape([-1])
# yapf: disable
base_anchors = np.stack(
base_anchors = paddle.stack(
[
x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
],
axis=-1)
base_anchors = np.round(base_anchors)
# yapf: enable
base_anchors = paddle.round(base_anchors)
return base_anchors
def _meshgrid(self, x, y, row_major=True):
xx, yy = np.meshgrid(x, y)
xx = xx.reshape(-1)
yy = yy.reshape(-1)
yy, xx = paddle.meshgrid(x, y)
yy = yy.reshape([-1])
xx = xx.reshape([-1])
if row_major:
return xx, yy
else:
return yy, xx
def grid_anchors(self, featmap_size, stride=16):
def forward(self, featmap_size, stride=16):
# featmap_size*stride project it to original area
base_anchors = self.base_anchors
feat_h, feat_w = featmap_size
shift_x = np.arange(0, feat_w, 1, 'int32') * stride
shift_y = np.arange(0, feat_h, 1, 'int32') * stride
feat_h = featmap_size[0]
feat_w = featmap_size[1]
shift_x = paddle.arange(0, feat_w, 1, 'int32') * stride
shift_y = paddle.arange(0, feat_h, 1, 'int32') * stride
shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
shifts = np.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1)
# shifts = shifts.type_as(base_anchors)
# first feat_w elements correspond to the first row of shifts
# add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
# shifted anchors (K, A, 4), reshape to (K*A, 4)
#all_anchors = base_anchors[:, :] + shifts[:, :]
all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
# all_anchors = all_anchors.reshape([-1, 4])
# first A rows correspond to A anchors of (0, 0) in feature map,
# then (0, 1), (0, 2), ...
shifts = paddle.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1)
all_anchors = base_anchors[:, :] + shifts[:, :]
all_anchors = all_anchors.reshape([feat_h * feat_w, 4])
return all_anchors
def valid_flags(self, featmap_size, valid_size):
feat_h, feat_w = featmap_size
valid_h, valid_w = valid_size
assert valid_h <= feat_h and valid_w <= feat_w
valid_x = np.zeros([feat_w], dtype='uint8')
valid_y = np.zeros([feat_h], dtype='uint8')
valid_x = paddle.zeros([feat_w], dtype='uint8')
valid_y = paddle.zeros([feat_h], dtype='uint8')
valid_x[:valid_w] = 1
valid_y[:valid_h] = 1
valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
valid = valid_xx & valid_yy
valid = valid.reshape([-1])
# valid = valid[:, None].expand(
# [valid.size(0), self.num_base_anchors]).reshape([-1])
valid = valid[:, None].expand(
[valid.size(0), self.num_base_anchors]).reshape([-1])
return valid
......@@ -240,8 +225,8 @@ class S2ANetHead(nn.Layer):
anchor_strides=[8, 16, 32, 64, 128],
anchor_scales=[4],
anchor_ratios=[1.0],
target_means=(.0, .0, .0, .0, .0),
target_stds=(1.0, 1.0, 1.0, 1.0, 1.0),
target_means=0.0,
target_stds=1.0,
align_conv_type='AlignConv',
align_conv_size=3,
use_sigmoid_cls=True,
......@@ -278,6 +263,8 @@ class S2ANetHead(nn.Layer):
self.anchor_generators.append(
S2ANetAnchorGenerator(anchor_base, anchor_scales,
anchor_ratios))
self.anchor_generators = paddle.nn.LayerList(self.anchor_generators)
self.add_sublayer('s2anet_anchor_gen', self.anchor_generators)
self.fam_cls_convs = nn.Sequential()
self.fam_reg_convs = nn.Sequential()
......@@ -412,9 +399,9 @@ class S2ANetHead(nn.Layer):
weight_attr=ParamAttr(initializer=Normal(0.0, 0.01)),
bias_attr=ParamAttr(initializer=Constant(0)))
self.base_anchors = dict()
self.featmap_sizes = dict()
self.base_anchors = dict()
self.featmap_size_list = []
self.init_anchors_list = []
self.rbox_anchors_list = []
self.refine_anchor_list = []
def forward(self, feats):
......@@ -424,13 +411,27 @@ class S2ANetHead(nn.Layer):
odm_reg_branch_list = []
odm_cls_branch_list = []
self.featmap_sizes = dict()
self.base_anchors = dict()
fam_reg1_branch_list = []
self.featmap_size_list = []
self.init_anchors_list = []
self.rbox_anchors_list = []
self.refine_anchor_list = []
for i, feat in enumerate(feats):
fam_cls_feat = self.fam_cls_convs(feat)
# prepare anchor
featmap_size = paddle.shape(feat)[-2:]
self.featmap_size_list.append(featmap_size)
init_anchors = self.anchor_generators[i](featmap_size,
self.anchor_strides[i])
init_anchors = paddle.reshape(
init_anchors, [featmap_size[0] * featmap_size[1], 4])
self.init_anchors_list.append(init_anchors)
rbox_anchors = self.rect2rbox(init_anchors)
self.rbox_anchors_list.append(rbox_anchors)
fam_cls_feat = self.fam_cls_convs(feat)
fam_cls = self.fam_cls(fam_cls_feat)
# [N, CLS, H, W] --> [N, H, W, CLS]
fam_cls = fam_cls.transpose([0, 2, 3, 1])
......@@ -446,29 +447,13 @@ class S2ANetHead(nn.Layer):
fam_reg_reshape = paddle.reshape(fam_reg, [fam_reg.shape[0], -1, 5])
fam_reg_branch_list.append(fam_reg_reshape)
# prepare anchor
featmap_size = feat.shape[-2:]
self.featmap_sizes[i] = featmap_size
init_anchors = self.anchor_generators[i].grid_anchors(
featmap_size, self.anchor_strides[i])
init_anchors = bbox_utils.rect2rbox(init_anchors)
self.base_anchors[(i, featmap_size[0])] = init_anchors
# refine anchors
fam_reg1 = fam_reg.clone()
fam_reg1.stop_gradient = True
pd_target_means = paddle.to_tensor(
np.array(
self.target_means, dtype=np.float32), dtype='float32')
pd_target_stds = paddle.to_tensor(
np.array(
self.target_stds, dtype=np.float32), dtype='float32')
pd_init_anchors = paddle.to_tensor(
np.array(
init_anchors, dtype=np.float32), dtype='float32')
refine_anchor = bbox_utils.bbox_decode(
fam_reg1, pd_init_anchors, pd_target_means, pd_target_stds)
rbox_anchors.stop_gradient = True
fam_reg1_branch_list.append(fam_reg1)
refine_anchor = self.bbox_decode(
fam_reg1, rbox_anchors, self.target_stds, self.target_means)
self.refine_anchor_list.append(refine_anchor)
if self.align_conv_type == 'AlignConv':
......@@ -508,6 +493,87 @@ class S2ANetHead(nn.Layer):
odm_cls_branch_list, odm_reg_branch_list)
return self.s2anet_head_out
def rect2rbox(self, bboxes):
"""
:param bboxes: shape (n, 4) (xmin, ymin, xmax, ymax)
:return: dbboxes: shape (n, 5) (x_ctr, y_ctr, w, h, angle)
"""
num_boxes = paddle.shape(bboxes)[0]
x_ctr = (bboxes[:, 2] + bboxes[:, 0]) / 2.0
y_ctr = (bboxes[:, 3] + bboxes[:, 1]) / 2.0
edges1 = paddle.abs(bboxes[:, 2] - bboxes[:, 0])
edges2 = paddle.abs(bboxes[:, 3] - bboxes[:, 1])
rbox_w = paddle.maximum(edges1, edges2)
rbox_h = paddle.minimum(edges1, edges2)
# set angle
inds = edges1 < edges2
inds = paddle.cast(inds, 'int32')
inds1 = inds * paddle.arange(0, num_boxes)
rboxes_angle = inds1 * np.pi / 2.0
rboxes = paddle.stack(
(x_ctr, y_ctr, rbox_w, rbox_h, rboxes_angle), axis=1)
return rboxes
# deltas to rbox
def delta2rbox(self, rrois, deltas, means, stds, wh_ratio_clip=1e-6):
"""
:param rrois: (cx, cy, w, h, theta)
:param deltas: (dx, dy, dw, dh, dtheta)
:param means: means of anchor
:param stds: stds of anchor
:param wh_ratio_clip: clip threshold of wh_ratio
:return:
"""
deltas = paddle.reshape(deltas, [-1, 5])
rrois = paddle.reshape(rrois, [-1, 5])
pd_means = paddle.ones(shape=[5]) * means
pd_stds = paddle.ones(shape=[5]) * stds
denorm_deltas = deltas * pd_stds + pd_means
dx = denorm_deltas[:, 0]
dy = denorm_deltas[:, 1]
dw = denorm_deltas[:, 2]
dh = denorm_deltas[:, 3]
dangle = denorm_deltas[:, 4]
max_ratio = np.abs(np.log(wh_ratio_clip))
dw = paddle.clip(dw, min=-max_ratio, max=max_ratio)
dh = paddle.clip(dh, min=-max_ratio, max=max_ratio)
rroi_x = rrois[:, 0]
rroi_y = rrois[:, 1]
rroi_w = rrois[:, 2]
rroi_h = rrois[:, 3]
rroi_angle = rrois[:, 4]
gx = dx * rroi_w * paddle.cos(rroi_angle) - dy * rroi_h * paddle.sin(
rroi_angle) + rroi_x
gy = dx * rroi_w * paddle.sin(rroi_angle) + dy * rroi_h * paddle.cos(
rroi_angle) + rroi_y
gw = rroi_w * dw.exp()
gh = rroi_h * dh.exp()
ga = np.pi * dangle + rroi_angle
ga = (ga + np.pi / 4) % np.pi - np.pi / 4
bboxes = paddle.stack([gx, gy, gw, gh, ga], axis=-1)
return bboxes
def bbox_decode(self, bbox_preds, anchors, stds, means, wh_ratio_clip=1e-6):
"""decode bbox from deltas
Args:
bbox_preds: bbox_preds, shape=[N,H,W,5]
anchors: anchors, shape=[H,W,5]
return:
bboxes: return decoded bboxes, shape=[N*H*W,5]
"""
num_imgs, H, W, _ = bbox_preds.shape
bbox_delta = paddle.reshape(bbox_preds, [-1, 5])
bboxes = self.delta2rbox(anchors, bbox_delta, means, stds,
wh_ratio_clip)
return bboxes
def get_prediction(self, nms_pre):
refine_anchors = self.refine_anchor_list
fam_cls_branch_list, fam_reg_branch_list, odm_cls_branch_list, odm_reg_branch_list = self.s2anet_head_out
......@@ -518,6 +584,7 @@ class S2ANetHead(nn.Layer):
nms_pre,
cls_out_channels=self.cls_out_channels,
use_sigmoid_cls=self.use_sigmoid_cls)
return pred_scores, pred_bboxes
def smooth_l1_loss(self, pred, label, delta=1.0 / 9.0):
......@@ -536,172 +603,130 @@ class S2ANetHead(nn.Layer):
return loss
def get_fam_loss(self, fam_target, s2anet_head_out):
(labels, label_weights, bbox_targets, bbox_weights, pos_inds,
neg_inds) = fam_target
fam_cls_branch_list, fam_reg_branch_list, odm_cls_branch_list, odm_reg_branch_list = s2anet_head_out
fam_cls_losses = []
fam_bbox_losses = []
st_idx = 0
featmap_sizes = [self.featmap_sizes[e] for e in self.featmap_sizes]
(feat_labels, feat_label_weights, feat_bbox_targets, feat_bbox_weights,
pos_inds, neg_inds) = fam_target
fam_cls_score, fam_bbox_pred = s2anet_head_out
# step1: sample count
num_total_samples = len(pos_inds) + len(
neg_inds) if self.sampling else len(pos_inds)
num_total_samples = max(1, num_total_samples)
for idx, feat_size in enumerate(featmap_sizes):
feat_anchor_num = feat_size[0] * feat_size[1]
# step1: get data
feat_labels = labels[st_idx:st_idx + feat_anchor_num]
feat_label_weights = label_weights[st_idx:st_idx + feat_anchor_num]
feat_bbox_targets = bbox_targets[st_idx:st_idx + feat_anchor_num, :]
feat_bbox_weights = bbox_weights[st_idx:st_idx + feat_anchor_num, :]
st_idx += feat_anchor_num
# step2: calc cls loss
feat_labels = feat_labels.reshape(-1)
feat_label_weights = feat_label_weights.reshape(-1)
fam_cls_score = fam_cls_branch_list[idx]
fam_cls_score = paddle.squeeze(fam_cls_score, axis=0)
fam_cls_score1 = fam_cls_score
# gt_classes 0~14(data), feat_labels 0~14, sigmoid_focal_loss need class>=1
feat_labels = paddle.to_tensor(feat_labels)
feat_labels_one_hot = paddle.nn.functional.one_hot(
feat_labels, self.cls_out_channels + 1)
feat_labels_one_hot = feat_labels_one_hot[:, 1:]
feat_labels_one_hot.stop_gradient = True
num_total_samples = paddle.to_tensor(
num_total_samples, dtype='float32', stop_gradient=True)
fam_cls = F.sigmoid_focal_loss(
fam_cls_score1,
feat_labels_one_hot,
normalizer=num_total_samples,
reduction='none')
feat_label_weights = feat_label_weights.reshape(
feat_label_weights.shape[0], 1)
feat_label_weights = np.repeat(
feat_label_weights, self.cls_out_channels, axis=1)
feat_label_weights = paddle.to_tensor(
feat_label_weights, stop_gradient=True)
fam_cls = fam_cls * feat_label_weights
fam_cls_total = paddle.sum(fam_cls)
fam_cls_losses.append(fam_cls_total)
# step3: regression loss
fam_bbox_pred = fam_reg_branch_list[idx]
feat_bbox_targets = paddle.to_tensor(
feat_bbox_targets, dtype='float32', stop_gradient=True)
feat_bbox_targets = paddle.reshape(feat_bbox_targets, [-1, 5])
fam_bbox_pred = fam_reg_branch_list[idx]
fam_bbox_pred = paddle.squeeze(fam_bbox_pred, axis=0)
fam_bbox_pred = paddle.reshape(fam_bbox_pred, [-1, 5])
fam_bbox = self.smooth_l1_loss(fam_bbox_pred, feat_bbox_targets)
loss_weight = paddle.to_tensor(
self.reg_loss_weight, dtype='float32', stop_gradient=True)
fam_bbox = paddle.multiply(fam_bbox, loss_weight)
feat_bbox_weights = paddle.to_tensor(
feat_bbox_weights, stop_gradient=True)
fam_bbox = fam_bbox * feat_bbox_weights
fam_bbox_total = paddle.sum(fam_bbox) / num_total_samples
fam_bbox_losses.append(fam_bbox_total)
fam_cls_loss = paddle.add_n(fam_cls_losses)
# step2: calc cls loss
feat_labels = feat_labels.reshape(-1)
feat_label_weights = feat_label_weights.reshape(-1)
fam_cls_score = paddle.squeeze(fam_cls_score, axis=0)
fam_cls_score1 = fam_cls_score
# gt_classes 0~14(data), feat_labels 0~14, sigmoid_focal_loss need class>=1
feat_labels = feat_labels + 1
feat_labels = paddle.to_tensor(feat_labels)
feat_labels_one_hot = F.one_hot(feat_labels, self.cls_out_channels + 1)
feat_labels_one_hot = feat_labels_one_hot[:, 1:]
feat_labels_one_hot.stop_gradient = True
num_total_samples = paddle.to_tensor(
num_total_samples, dtype='float32', stop_gradient=True)
fam_cls = F.sigmoid_focal_loss(
fam_cls_score1,
feat_labels_one_hot,
normalizer=num_total_samples,
reduction='none')
feat_label_weights = feat_label_weights.reshape(
feat_label_weights.shape[0], 1)
feat_label_weights = np.repeat(
feat_label_weights, self.cls_out_channels, axis=1)
feat_label_weights = paddle.to_tensor(
feat_label_weights, stop_gradient=True)
fam_cls = fam_cls * feat_label_weights
fam_cls_total = paddle.sum(fam_cls)
# step3: regression loss
feat_bbox_targets = paddle.to_tensor(
feat_bbox_targets, dtype='float32', stop_gradient=True)
feat_bbox_targets = paddle.reshape(feat_bbox_targets, [-1, 5])
fam_bbox_pred = paddle.squeeze(fam_bbox_pred, axis=0)
fam_bbox_pred = paddle.reshape(fam_bbox_pred, [-1, 5])
fam_bbox = self.smooth_l1_loss(fam_bbox_pred, feat_bbox_targets)
loss_weight = paddle.to_tensor(
self.reg_loss_weight, dtype='float32', stop_gradient=True)
fam_bbox = paddle.multiply(fam_bbox, loss_weight)
feat_bbox_weights = paddle.to_tensor(
feat_bbox_weights, stop_gradient=True)
fam_bbox = fam_bbox * feat_bbox_weights
fam_bbox_total = paddle.sum(fam_bbox) / num_total_samples
fam_cls_loss_weight = paddle.to_tensor(
self.cls_loss_weight[0], dtype='float32', stop_gradient=True)
fam_cls_loss = fam_cls_loss * fam_cls_loss_weight
fam_reg_loss = paddle.add_n(fam_bbox_losses)
fam_cls_loss = fam_cls_total * fam_cls_loss_weight
fam_reg_loss = paddle.add_n(fam_bbox_total)
return fam_cls_loss, fam_reg_loss
def get_odm_loss(self, odm_target, s2anet_head_out):
(labels, label_weights, bbox_targets, bbox_weights, pos_inds,
neg_inds) = odm_target
fam_cls_branch_list, fam_reg_branch_list, odm_cls_branch_list, odm_reg_branch_list = s2anet_head_out
odm_cls_losses = []
odm_bbox_losses = []
st_idx = 0
featmap_sizes = [self.featmap_sizes[e] for e in self.featmap_sizes]
(feat_labels, feat_label_weights, feat_bbox_targets, feat_bbox_weights,
pos_inds, neg_inds) = odm_target
odm_cls_score, odm_bbox_pred = s2anet_head_out
# step1: sample count
num_total_samples = len(pos_inds) + len(
neg_inds) if self.sampling else len(pos_inds)
num_total_samples = max(1, num_total_samples)
for idx, feat_size in enumerate(featmap_sizes):
feat_anchor_num = feat_size[0] * feat_size[1]
# step1: get data
feat_labels = labels[st_idx:st_idx + feat_anchor_num]
feat_label_weights = label_weights[st_idx:st_idx + feat_anchor_num]
feat_bbox_targets = bbox_targets[st_idx:st_idx + feat_anchor_num, :]
feat_bbox_weights = bbox_weights[st_idx:st_idx + feat_anchor_num, :]
st_idx += feat_anchor_num
# step2: calc cls loss
feat_labels = feat_labels.reshape(-1)
feat_label_weights = feat_label_weights.reshape(-1)
odm_cls_score = odm_cls_branch_list[idx]
odm_cls_score = paddle.squeeze(odm_cls_score, axis=0)
odm_cls_score1 = odm_cls_score
# gt_classes 0~14(data), feat_labels 0~14, sigmoid_focal_loss need class>=1
feat_labels = paddle.to_tensor(feat_labels)
feat_labels_one_hot = paddle.nn.functional.one_hot(
feat_labels, self.cls_out_channels + 1)
feat_labels_one_hot = feat_labels_one_hot[:, 1:]
feat_labels_one_hot.stop_gradient = True
num_total_samples = paddle.to_tensor(
num_total_samples, dtype='float32', stop_gradient=True)
odm_cls = F.sigmoid_focal_loss(
odm_cls_score1,
feat_labels_one_hot,
normalizer=num_total_samples,
reduction='none')
feat_label_weights = feat_label_weights.reshape(
feat_label_weights.shape[0], 1)
feat_label_weights = np.repeat(
feat_label_weights, self.cls_out_channels, axis=1)
feat_label_weights = paddle.to_tensor(feat_label_weights)
feat_label_weights.stop_gradient = True
odm_cls = odm_cls * feat_label_weights
odm_cls_total = paddle.sum(odm_cls)
odm_cls_losses.append(odm_cls_total)
# # step3: regression loss
feat_bbox_targets = paddle.to_tensor(
feat_bbox_targets, dtype='float32')
feat_bbox_targets = paddle.reshape(feat_bbox_targets, [-1, 5])
feat_bbox_targets.stop_gradient = True
odm_bbox_pred = odm_reg_branch_list[idx]
odm_bbox_pred = paddle.squeeze(odm_bbox_pred, axis=0)
odm_bbox_pred = paddle.reshape(odm_bbox_pred, [-1, 5])
odm_bbox = self.smooth_l1_loss(odm_bbox_pred, feat_bbox_targets)
loss_weight = paddle.to_tensor(
self.reg_loss_weight, dtype='float32', stop_gradient=True)
odm_bbox = paddle.multiply(odm_bbox, loss_weight)
feat_bbox_weights = paddle.to_tensor(
feat_bbox_weights, stop_gradient=True)
odm_bbox = odm_bbox * feat_bbox_weights
odm_bbox_total = paddle.sum(odm_bbox) / num_total_samples
odm_bbox_losses.append(odm_bbox_total)
odm_cls_loss = paddle.add_n(odm_cls_losses)
# step2: calc cls loss
feat_labels = feat_labels.reshape(-1)
feat_label_weights = feat_label_weights.reshape(-1)
odm_cls_score = paddle.squeeze(odm_cls_score, axis=0)
odm_cls_score1 = odm_cls_score
# gt_classes 0~14(data), feat_labels 0~14, sigmoid_focal_loss need class>=1
# for debug 0426
feat_labels = feat_labels + 1
feat_labels = paddle.to_tensor(feat_labels)
feat_labels_one_hot = F.one_hot(feat_labels, self.cls_out_channels + 1)
feat_labels_one_hot = feat_labels_one_hot[:, 1:]
feat_labels_one_hot.stop_gradient = True
num_total_samples = paddle.to_tensor(
num_total_samples, dtype='float32', stop_gradient=True)
odm_cls = F.sigmoid_focal_loss(
odm_cls_score1,
feat_labels_one_hot,
normalizer=num_total_samples,
reduction='none')
feat_label_weights = feat_label_weights.reshape(
feat_label_weights.shape[0], 1)
feat_label_weights = np.repeat(
feat_label_weights, self.cls_out_channels, axis=1)
feat_label_weights = paddle.to_tensor(
feat_label_weights, stop_gradient=True)
odm_cls = odm_cls * feat_label_weights
odm_cls_total = paddle.sum(odm_cls)
# step3: regression loss
feat_bbox_targets = paddle.to_tensor(
feat_bbox_targets, dtype='float32', stop_gradient=True)
feat_bbox_targets = paddle.reshape(feat_bbox_targets, [-1, 5])
odm_bbox_pred = paddle.squeeze(odm_bbox_pred, axis=0)
odm_bbox_pred = paddle.reshape(odm_bbox_pred, [-1, 5])
odm_bbox = self.smooth_l1_loss(odm_bbox_pred, feat_bbox_targets)
loss_weight = paddle.to_tensor(
self.reg_loss_weight, dtype='float32', stop_gradient=True)
odm_bbox = paddle.multiply(odm_bbox, loss_weight)
feat_bbox_weights = paddle.to_tensor(
feat_bbox_weights, stop_gradient=True)
odm_bbox = odm_bbox * feat_bbox_weights
odm_bbox_total = paddle.sum(odm_bbox) / num_total_samples
odm_cls_loss_weight = paddle.to_tensor(
self.cls_loss_weight[1], dtype='float32', stop_gradient=True)
odm_cls_loss = odm_cls_loss * odm_cls_loss_weight
odm_reg_loss = paddle.add_n(odm_bbox_losses)
self.cls_loss_weight[0], dtype='float32', stop_gradient=True)
odm_cls_loss = odm_cls_total * odm_cls_loss_weight
odm_reg_loss = paddle.add_n(odm_bbox_total)
return odm_cls_loss, odm_reg_loss
def get_loss(self, inputs):
......@@ -723,46 +748,38 @@ class S2ANetHead(nn.Layer):
is_crowd = inputs['is_crowd'][im_id].numpy()
gt_labels = gt_labels + 1
# featmap_sizes
featmap_sizes = [self.featmap_sizes[e] for e in self.featmap_sizes]
anchors_list, valid_flag_list = self.get_init_anchors(featmap_sizes,
np_im_shape)
anchors_list_all = []
for ii, anchor in enumerate(anchors_list):
anchor = anchor.reshape(-1, 4)
anchor = bbox_utils.rect2rbox(anchor)
anchors_list_all.extend(anchor)
anchors_list_all = np.array(anchors_list_all)
# get im_feat
fam_cls_feats_list = [e[im_id] for e in self.s2anet_head_out[0]]
fam_reg_feats_list = [e[im_id] for e in self.s2anet_head_out[1]]
odm_cls_feats_list = [e[im_id] for e in self.s2anet_head_out[2]]
odm_reg_feats_list = [e[im_id] for e in self.s2anet_head_out[3]]
im_s2anet_head_out = (fam_cls_feats_list, fam_reg_feats_list,
odm_cls_feats_list, odm_reg_feats_list)
# FAM
im_fam_target = self.anchor_assign(anchors_list_all, gt_bboxes,
gt_labels, is_crowd)
if im_fam_target is not None:
for idx, rbox_anchors in enumerate(self.rbox_anchors_list):
rbox_anchors = rbox_anchors.numpy()
rbox_anchors = rbox_anchors.reshape(-1, 5)
im_fam_target = self.anchor_assign(rbox_anchors, gt_bboxes,
gt_labels, is_crowd)
# feat
fam_cls_feat = self.s2anet_head_out[0][idx][im_id]
fam_reg_feat = self.s2anet_head_out[1][idx][im_id]
im_s2anet_fam_feat = (fam_cls_feat, fam_reg_feat)
im_fam_cls_loss, im_fam_reg_loss = self.get_fam_loss(
im_fam_target, im_s2anet_head_out)
im_fam_target, im_s2anet_fam_feat)
fam_cls_loss_lst.append(im_fam_cls_loss)
fam_reg_loss_lst.append(im_fam_reg_loss)
# ODM
refine_anchors_list, valid_flag_list = self.get_refine_anchors(
featmap_sizes, image_shape=np_im_shape)
refine_anchors_list = np.array(refine_anchors_list)
im_odm_target = self.anchor_assign(refine_anchors_list, gt_bboxes,
gt_labels, is_crowd)
for idx, refine_anchors in enumerate(self.refine_anchor_list):
refine_anchors = refine_anchors.numpy()
refine_anchors = refine_anchors.reshape(-1, 5)
im_odm_target = self.anchor_assign(refine_anchors, gt_bboxes,
gt_labels, is_crowd)
odm_cls_feat = self.s2anet_head_out[2][idx][im_id]
odm_reg_feat = self.s2anet_head_out[3][idx][im_id]
if im_odm_target is not None:
im_s2anet_odm_feat = (odm_cls_feat, odm_reg_feat)
im_odm_cls_loss, im_odm_reg_loss = self.get_odm_loss(
im_odm_target, im_s2anet_head_out)
im_odm_target, im_s2anet_odm_feat)
odm_cls_loss_lst.append(im_odm_cls_loss)
odm_reg_loss_lst.append(im_odm_reg_loss)
fam_cls_loss = paddle.add_n(fam_cls_loss_lst)
fam_reg_loss = paddle.add_n(fam_reg_loss_lst)
odm_cls_loss = paddle.add_n(odm_cls_loss_lst)
......@@ -774,65 +791,6 @@ class S2ANetHead(nn.Layer):
'odm_reg_loss': odm_reg_loss
}
def get_init_anchors(self, featmap_sizes, image_shape):
"""Get anchors according to feature map sizes.
Args:
featmap_sizes (list[tuple]): Multi-level feature map sizes.
image_shape (list[dict]): Image meta info.
Returns:
tuple: anchors of each image, valid flags of each image
"""
num_levels = len(featmap_sizes)
# since feature map sizes of all images are the same, we only compute
# anchors for one time
anchor_list = []
for i in range(num_levels):
anchors = self.anchor_generators[i].grid_anchors(
featmap_sizes[i], self.anchor_strides[i])
anchor_list.append(anchors)
# for each image, we compute valid flags of multi level anchors
valid_flag_list = []
for i in range(num_levels):
anchor_stride = self.anchor_strides[i]
feat_h, feat_w = featmap_sizes[i]
h, w = image_shape
valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h)
valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w)
flags = self.anchor_generators[i].valid_flags(
(feat_h, feat_w), (valid_feat_h, valid_feat_w))
valid_flag_list.append(flags)
return anchor_list, valid_flag_list
def get_refine_anchors(self, featmap_sizes, image_shape):
num_levels = len(featmap_sizes)
refine_anchors_list = []
for i in range(num_levels):
refine_anchor = self.refine_anchor_list[i]
refine_anchor = paddle.squeeze(refine_anchor, axis=0)
refine_anchor = refine_anchor.numpy()
refine_anchor = np.reshape(refine_anchor,
[-1, refine_anchor.shape[-1]])
refine_anchors_list.extend(refine_anchor)
# for each image, we compute valid flags of multi level anchors
valid_flag_list = []
for i in range(num_levels):
anchor_stride = self.anchor_strides[i]
feat_h, feat_w = featmap_sizes[i]
h, w = image_shape
valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h)
valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w)
flags = self.anchor_generators[i].valid_flags(
(feat_h, feat_w), (valid_feat_h, valid_feat_w))
valid_flag_list.append(flags)
return refine_anchors_list, valid_flag_list
def get_bboxes(self, cls_score_list, bbox_pred_list, mlvl_anchors, nms_pre,
cls_out_channels, use_sigmoid_cls):
assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
......@@ -866,14 +824,8 @@ class S2ANetHead(nn.Layer):
bbox_pred = paddle.gather(bbox_pred, topk_inds)
scores = paddle.gather(scores, topk_inds)
pd_target_means = paddle.to_tensor(
np.array(
self.target_means, dtype=np.float32), dtype='float32')
pd_target_stds = paddle.to_tensor(
np.array(
self.target_stds, dtype=np.float32), dtype='float32')
bboxes = bbox_utils.delta2rbox(anchors, bbox_pred, pd_target_means,
pd_target_stds)
bboxes = self.delta2rbox(anchors, bbox_pred, self.target_means,
self.target_stds)
mlvl_bboxes.append(bboxes)
mlvl_scores.append(scores)
......
......@@ -17,7 +17,7 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ppdet.modeling.bbox_utils import nonempty_bbox, rbox2poly, pd_rbox2poly
from ppdet.modeling.bbox_utils import nonempty_bbox, rbox2poly, rbox2poly
try:
from collections.abc import Sequence
except Exception:
......@@ -214,7 +214,7 @@ class FCOSPostProcess(object):
@register
class S2ANetBBoxPostProcess(object):
class S2ANetBBoxPostProcess(nn.Layer):
__shared__ = ['num_classes']
__inject__ = ['nms']
......@@ -225,41 +225,43 @@ class S2ANetBBoxPostProcess(object):
self.min_bbox_size = min_bbox_size
self.nms = nms
self.origin_shape_list = []
self.fake_pred_cls_score_bbox = paddle.to_tensor(
np.array(
[[-1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
dtype='float32'))
self.fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
def __call__(self, pred_scores, pred_bboxes):
def forward(self, pred_scores, pred_bboxes):
"""
pred_scores : [N, M] score
pred_bboxes : [N, 5] xc, yc, w, h, a
im_shape : [N, 2] im_shape
scale_factor : [N, 2] scale_factor
"""
pred_ploys = pd_rbox2poly(pred_bboxes)
pred_ploys = paddle.reshape(
pred_ploys, [1, pred_ploys.shape[0], pred_ploys.shape[1]])
pred_ploys0 = rbox2poly(pred_bboxes)
pred_ploys = paddle.unsqueeze(pred_ploys0, axis=0)
pred_scores = paddle.to_tensor(pred_scores)
# pred_scores [NA, 16] --> [16, NA]
pred_scores = paddle.transpose(pred_scores, [1, 0])
pred_scores = paddle.reshape(
pred_scores, [1, pred_scores.shape[0], pred_scores.shape[1]])
pred_scores0 = paddle.transpose(pred_scores, [1, 0])
pred_scores = paddle.unsqueeze(pred_scores0, axis=0)
pred_cls_score_bbox, bbox_num, _ = self.nms(pred_ploys, pred_scores,
self.num_classes)
# Prevent empty bbox_pred from decode or NMS.
# Bboxes and score before NMS may be empty due to the score threshold.
if pred_cls_score_bbox.shape[0] == 0:
pred_cls_score_bbox = paddle.to_tensor(
np.array(
[[-1, 0.0, 0.0, 0.0, 0.0, 0.0]], dtype='float32'))
bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
if pred_cls_score_bbox.shape[0] <= 0 or pred_cls_score_bbox.shape[
1] <= 1:
pred_cls_score_bbox = self.fake_pred_cls_score_bbox
bbox_num = self.fake_bbox_num
pred_cls_score_bbox = paddle.reshape(pred_cls_score_bbox, [-1, 10])
assert pred_cls_score_bbox.shape[1] == 10
return pred_cls_score_bbox, bbox_num
def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
"""
Rescale, clip and filter the bbox from the output of NMS to
get final prediction.
Args:
bboxes(Tensor): bboxes [N, 10]
bbox_num(Tensor): bbox_num
......@@ -270,6 +272,7 @@ class S2ANetBBoxPostProcess(object):
including labels, scores and bboxes. The size of
bboxes are corresponding to the original image.
"""
assert bboxes.shape[1] == 10
origin_shape = paddle.floor(im_shape / scale_factor + 0.5)
origin_shape_list = []
......@@ -292,7 +295,7 @@ class S2ANetBBoxPostProcess(object):
# bboxes: [N, 10], label, score, bbox
pred_label_score = bboxes[:, 0:2]
pred_bbox = bboxes[:, 2:10:1]
pred_bbox = bboxes[:, 2:]
# rescale bbox to original image
scaled_bbox = pred_bbox / scale_factor_list
......
......@@ -16,7 +16,6 @@ import paddle
from ppdet.core.workspace import register, serializable
from .target import rpn_anchor_target, generate_proposal_target, generate_mask_target, libra_generate_proposal_target
from ppdet.modeling import bbox_utils
import numpy as np
......@@ -283,13 +282,58 @@ class RBoxAssigner(object):
"""
if anchors.ndim == 3:
anchors = anchors.reshape(-1, anchor.shape[-1])
anchors = anchors.reshape(-1, anchors.shape[-1])
assert anchors.ndim == 2
anchor_num = anchors.shape[0]
anchor_valid = np.ones((anchor_num), np.uint8)
anchor_inds = np.arange(anchor_num)
return anchor_inds
def rbox2delta(self,
proposals,
gt,
means=[0, 0, 0, 0, 0],
stds=[1, 1, 1, 1, 1]):
"""
Args:
proposals: tensor [N, 5]
gt: gt [N, 5]
means: means [5]
stds: stds [5]
Returns:
"""
proposals = proposals.astype(np.float64)
PI = np.pi
gt_widths = gt[..., 2]
gt_heights = gt[..., 3]
gt_angle = gt[..., 4]
proposals_widths = proposals[..., 2]
proposals_heights = proposals[..., 3]
proposals_angle = proposals[..., 4]
coord = gt[..., 0:2] - proposals[..., 0:2]
dx = (np.cos(proposals[..., 4]) * coord[..., 0] +
np.sin(proposals[..., 4]) * coord[..., 1]) / proposals_widths
dy = (-np.sin(proposals[..., 4]) * coord[..., 0] +
np.cos(proposals[..., 4]) * coord[..., 1]) / proposals_heights
dw = np.log(gt_widths / proposals_widths)
dh = np.log(gt_heights / proposals_heights)
da = (gt_angle - proposals_angle)
da = (da + PI / 4) % PI - PI / 4
da /= PI
deltas = np.stack([dx, dy, dw, dh, da], axis=-1)
means = np.array(means, dtype=deltas.dtype)
stds = np.array(stds, dtype=deltas.dtype)
deltas = (deltas - means) / stds
deltas = deltas.astype(np.float32)
return deltas
def assign_anchor(self,
anchors,
gt_bboxes,
......@@ -405,8 +449,8 @@ class RBoxAssigner(object):
#print('ancho target pos_inds', pos_inds, len(pos_inds))
pos_sampled_gt_boxes = gt_bboxes[anchor_gt_bbox_inds[pos_inds]]
if len(pos_inds) > 0:
pos_bbox_targets = bbox_utils.rbox2delta(pos_sampled_anchors,
pos_sampled_gt_boxes)
pos_bbox_targets = self.rbox2delta(pos_sampled_anchors,
pos_sampled_gt_boxes)
bbox_targets[pos_inds, :] = pos_bbox_targets
bbox_weights[pos_inds, :] = 1.0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册