未验证 提交 c103d025 编写于 作者: W wangguanzhong 提交者: GitHub

Clean fluid (#6075)

* clean fluid

* mv static to legacy

* remove yolo box

* revert legacy dir

* revert static link

* update in_dynamic_mode

* clean iou_similarity, collect_fpn_proposals, bipartite_match
上级 ba3ebe20
...@@ -23,7 +23,7 @@ else: ...@@ -23,7 +23,7 @@ else:
import numpy as np import numpy as np
from paddle.io import DataLoader, DistributedBatchSampler from paddle.io import DataLoader, DistributedBatchSampler
from paddle.fluid.dataloader.collate import default_collate_fn from .utils import default_collate_fn
from ppdet.core.workspace import register from ppdet.core.workspace import register
from . import transform from . import transform
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import numbers
import numpy as np
try:
from collections.abc import Sequence, Mapping
except:
from collections import Sequence, Mapping
def default_collate_fn(batch):
"""
Default batch collating function for :code:`paddle.io.DataLoader`,
get input data as a list of sample datas, each element in list
if the data of a sample, and sample data should composed of list,
dictionary, string, number, numpy array, this
function will parse input data recursively and stack number,
numpy array and paddle.Tensor datas as batch datas. e.g. for
following input data:
[{'image': np.array(shape=[3, 224, 224]), 'label': 1},
{'image': np.array(shape=[3, 224, 224]), 'label': 3},
{'image': np.array(shape=[3, 224, 224]), 'label': 4},
{'image': np.array(shape=[3, 224, 224]), 'label': 5},]
This default collate function zipped each number and numpy array
field together and stack each field as the batch field as follows:
{'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])}
Args:
batch(list of sample data): batch should be a list of sample data.
Returns:
Batched data: batched each number, numpy array and paddle.Tensor
in input data.
"""
sample = batch[0]
if isinstance(sample, np.ndarray):
batch = np.stack(batch, axis=0)
return batch
elif isinstance(sample, numbers.Number):
batch = np.array(batch)
return batch
elif isinstance(sample, (str, bytes)):
return batch
elif isinstance(sample, Mapping):
return {
key: default_collate_fn([d[key] for d in batch])
for key in sample
}
elif isinstance(sample, Sequence):
sample_fields_num = len(sample)
if not all(len(sample) == sample_fields_num for sample in iter(batch)):
raise RuntimeError(
"fileds number not same among samples in a batch")
return [default_collate_fn(fields) for fields in zip(*batch)]
raise TypeError("batch data con only contains: tensor, numpy.ndarray, "
"dict, list, number, but got {}".format(type(sample)))
...@@ -23,7 +23,6 @@ import random ...@@ -23,7 +23,6 @@ import random
import paddle import paddle
import paddle.nn.functional as F import paddle.nn.functional as F
import paddle.distributed as dist import paddle.distributed as dist
from ppdet.modeling.ops import paddle_distributed_is_initialized
__all__ = ['YOLOX'] __all__ = ['YOLOX']
......
...@@ -22,8 +22,7 @@ import paddle.nn as nn ...@@ -22,8 +22,7 @@ import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
from ppdet.core.workspace import register from ppdet.core.workspace import register
from ..ops import iou_similarity from ..bbox_utils import iou_similarity, batch_iou_similarity
from ..bbox_utils import iou_similarity as batch_iou_similarity
from ..bbox_utils import bbox_center from ..bbox_utils import bbox_center
from .utils import (check_points_inside_bboxes, compute_max_iou_anchor, from .utils import (check_points_inside_bboxes, compute_max_iou_anchor,
compute_max_iou_gt) compute_max_iou_gt)
......
...@@ -21,7 +21,7 @@ import paddle.nn as nn ...@@ -21,7 +21,7 @@ import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
from ppdet.core.workspace import register from ppdet.core.workspace import register
from ..bbox_utils import iou_similarity from ..bbox_utils import batch_iou_similarity
from .utils import (gather_topk_anchors, check_points_inside_bboxes, from .utils import (gather_topk_anchors, check_points_inside_bboxes,
compute_max_iou_anchor) compute_max_iou_anchor)
......
...@@ -482,8 +482,7 @@ class BasicLayer(nn.Layer): ...@@ -482,8 +482,7 @@ class BasicLayer(nn.Layer):
# calculate attention mask for SW-MSA # calculate attention mask for SW-MSA
Hp = int(np.ceil(H / self.window_size)) * self.window_size Hp = int(np.ceil(H / self.window_size)) * self.window_size
Wp = int(np.ceil(W / self.window_size)) * self.window_size Wp = int(np.ceil(W / self.window_size)) * self.window_size
img_mask = paddle.fluid.layers.zeros( img_mask = paddle.zeros([1, Hp, Wp, 1], dtype='float32') # 1 Hp Wp 1
[1, Hp, Wp, 1], dtype='float32') # 1 Hp Wp 1
h_slices = (slice(0, -self.window_size), h_slices = (slice(0, -self.window_size),
slice(-self.window_size, -self.shift_size), slice(-self.window_size, -self.shift_size),
slice(-self.shift_size, None)) slice(-self.shift_size, None))
......
...@@ -278,8 +278,8 @@ def decode_yolo(box, anchor, downsample_ratio): ...@@ -278,8 +278,8 @@ def decode_yolo(box, anchor, downsample_ratio):
return [x1, y1, w1, h1] return [x1, y1, w1, h1]
def iou_similarity(box1, box2, eps=1e-9): def batch_iou_similarity(box1, box2, eps=1e-9):
"""Calculate iou of box1 and box2 """Calculate iou of box1 and box2 in batch
Args: Args:
box1 (Tensor): box with the shape [N, M1, 4] box1 (Tensor): box with the shape [N, M1, 4]
...@@ -866,3 +866,26 @@ def bbox2delta_v2(src_boxes, ...@@ -866,3 +866,26 @@ def bbox2delta_v2(src_boxes,
stds = paddle.to_tensor(stds, place=src_boxes.place) stds = paddle.to_tensor(stds, place=src_boxes.place)
deltas = (deltas - means) / stds deltas = (deltas - means) / stds
return deltas return deltas
def iou_similarity(box1, box2, eps=1e-10):
"""Calculate iou of box1 and box2
Args:
box1 (Tensor): box with the shape [M1, 4]
box2 (Tensor): box with the shape [M2, 4]
Return:
iou (Tensor): iou between box1 and box2 with the shape [M1, M2]
"""
box1 = box1.unsqueeze(1) # [M1, 4] -> [M1, 1, 4]
box2 = box2.unsqueeze(0) # [M2, 4] -> [1, M2, 4]
px1y1, px2y2 = box1[:, :, 0:2], box1[:, :, 2:4]
gx1y1, gx2y2 = box2[:, :, 0:2], box2[:, :, 2:4]
x1y1 = paddle.maximum(px1y1, gx1y1)
x2y2 = paddle.minimum(px2y2, gx2y2)
overlap = (x2y2 - x1y1).clip(0).prod(-1)
area1 = (px2y2 - px1y1).clip(0).prod(-1)
area2 = (gx2y2 - gx1y1).clip(0).prod(-1)
union = area1 + area2 - overlap + eps
return overlap / union
...@@ -23,7 +23,6 @@ import paddle.nn as nn ...@@ -23,7 +23,6 @@ import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
from paddle import ParamAttr from paddle import ParamAttr
from paddle.nn.initializer import Normal, Constant from paddle.nn.initializer import Normal, Constant
from paddle.fluid.dygraph import parallel_helper
from ppdet.modeling.ops import get_static_shape from ppdet.modeling.ops import get_static_shape
from ..initializer import normal_ from ..initializer import normal_
...@@ -726,8 +725,7 @@ class PicoHeadV2(GFLHead): ...@@ -726,8 +725,7 @@ class PicoHeadV2(GFLHead):
loss_dfl = paddle.zeros([1]) loss_dfl = paddle.zeros([1])
avg_factor = flatten_assigned_scores.sum() avg_factor = flatten_assigned_scores.sum()
if paddle.fluid.core.is_compiled_with_dist( if paddle.distributed.get_world_size() > 1:
) and parallel_helper._is_parallel_ctx_initialized():
paddle.distributed.all_reduce(avg_factor) paddle.distributed.all_reduce(avg_factor)
avg_factor = paddle.clip( avg_factor = paddle.clip(
avg_factor / paddle.distributed.get_world_size(), min=1) avg_factor / paddle.distributed.get_world_size(), min=1)
......
...@@ -22,7 +22,7 @@ from ..losses import GIoULoss ...@@ -22,7 +22,7 @@ from ..losses import GIoULoss
from ..initializer import bias_init_with_prob, constant_, normal_ from ..initializer import bias_init_with_prob, constant_, normal_
from ..assigners.utils import generate_anchors_for_grid_cell from ..assigners.utils import generate_anchors_for_grid_cell
from ppdet.modeling.backbones.cspresnet import ConvBNLayer from ppdet.modeling.backbones.cspresnet import ConvBNLayer
from ppdet.modeling.ops import get_static_shape, paddle_distributed_is_initialized, get_act_fn from ppdet.modeling.ops import get_static_shape, get_act_fn
from ppdet.modeling.layers import MultiClassNMS from ppdet.modeling.layers import MultiClassNMS
__all__ = ['PPYOLOEHead'] __all__ = ['PPYOLOEHead']
...@@ -343,7 +343,7 @@ class PPYOLOEHead(nn.Layer): ...@@ -343,7 +343,7 @@ class PPYOLOEHead(nn.Layer):
loss_cls = self._focal_loss(pred_scores, assigned_scores, alpha_l) loss_cls = self._focal_loss(pred_scores, assigned_scores, alpha_l)
assigned_scores_sum = assigned_scores.sum() assigned_scores_sum = assigned_scores.sum()
if paddle_distributed_is_initialized(): if paddle.distributed.get_world_size() > 1:
paddle.distributed.all_reduce(assigned_scores_sum) paddle.distributed.all_reduce(assigned_scores_sum)
assigned_scores_sum = paddle.clip( assigned_scores_sum = paddle.clip(
assigned_scores_sum / paddle.distributed.get_world_size(), assigned_scores_sum / paddle.distributed.get_world_size(),
......
...@@ -554,10 +554,15 @@ class YOLOBox(object): ...@@ -554,10 +554,15 @@ class YOLOBox(object):
origin_shape = im_shape / scale_factor origin_shape = im_shape / scale_factor
origin_shape = paddle.cast(origin_shape, 'int32') origin_shape = paddle.cast(origin_shape, 'int32')
for i, head_out in enumerate(yolo_head_out): for i, head_out in enumerate(yolo_head_out):
boxes, scores = ops.yolo_box(head_out, origin_shape, anchors[i], boxes, scores = paddle.vision.ops.yolo_box(
self.num_classes, self.conf_thresh, head_out,
origin_shape,
anchors[i],
self.num_classes,
self.conf_thresh,
self.downsample_ratio // 2**i, self.downsample_ratio // 2**i,
self.clip_bbox, self.scale_x_y) self.clip_bbox,
scale_x_y=self.scale_x_y)
boxes_list.append(boxes) boxes_list.append(boxes)
scores_list.append(paddle.transpose(scores, perm=[0, 2, 1])) scores_list.append(paddle.transpose(scores, perm=[0, 2, 1]))
yolo_boxes = paddle.concat(boxes_list, axis=1) yolo_boxes = paddle.concat(boxes_list, axis=1)
...@@ -622,94 +627,6 @@ class SSDBox(object): ...@@ -622,94 +627,6 @@ class SSDBox(object):
return output_boxes, output_scores return output_boxes, output_scores
@register
@serializable
class AnchorGrid(object):
"""Generate anchor grid
Args:
image_size (int or list): input image size, may be a single integer or
list of [h, w]. Default: 512
min_level (int): min level of the feature pyramid. Default: 3
max_level (int): max level of the feature pyramid. Default: 7
anchor_base_scale: base anchor scale. Default: 4
num_scales: number of anchor scales. Default: 3
aspect_ratios: aspect ratios. default: [[1, 1], [1.4, 0.7], [0.7, 1.4]]
"""
def __init__(self,
image_size=512,
min_level=3,
max_level=7,
anchor_base_scale=4,
num_scales=3,
aspect_ratios=[[1, 1], [1.4, 0.7], [0.7, 1.4]]):
super(AnchorGrid, self).__init__()
if isinstance(image_size, Integral):
self.image_size = [image_size, image_size]
else:
self.image_size = image_size
for dim in self.image_size:
assert dim % 2 ** max_level == 0, \
"image size should be multiple of the max level stride"
self.min_level = min_level
self.max_level = max_level
self.anchor_base_scale = anchor_base_scale
self.num_scales = num_scales
self.aspect_ratios = aspect_ratios
@property
def base_cell(self):
if not hasattr(self, '_base_cell'):
self._base_cell = self.make_cell()
return self._base_cell
def make_cell(self):
scales = [2**(i / self.num_scales) for i in range(self.num_scales)]
scales = np.array(scales)
ratios = np.array(self.aspect_ratios)
ws = np.outer(scales, ratios[:, 0]).reshape(-1, 1)
hs = np.outer(scales, ratios[:, 1]).reshape(-1, 1)
anchors = np.hstack((-0.5 * ws, -0.5 * hs, 0.5 * ws, 0.5 * hs))
return anchors
def make_grid(self, stride):
cell = self.base_cell * stride * self.anchor_base_scale
x_steps = np.arange(stride // 2, self.image_size[1], stride)
y_steps = np.arange(stride // 2, self.image_size[0], stride)
offset_x, offset_y = np.meshgrid(x_steps, y_steps)
offset_x = offset_x.flatten()
offset_y = offset_y.flatten()
offsets = np.stack((offset_x, offset_y, offset_x, offset_y), axis=-1)
offsets = offsets[:, np.newaxis, :]
return (cell + offsets).reshape(-1, 4)
def generate(self):
return [
self.make_grid(2**l)
for l in range(self.min_level, self.max_level + 1)
]
def __call__(self):
if not hasattr(self, '_anchor_vars'):
anchor_vars = []
helper = LayerHelper('anchor_grid')
for idx, l in enumerate(range(self.min_level, self.max_level + 1)):
stride = 2**l
anchors = self.make_grid(stride)
var = helper.create_parameter(
attr=ParamAttr(name='anchors_{}'.format(idx)),
shape=anchors.shape,
dtype='float32',
stop_gradient=True,
default_initializer=NumpyArrayInitializer(anchors))
anchor_vars.append(var)
var.persistable = True
self._anchor_vars = anchor_vars
return self._anchor_vars
@register @register
@serializable @serializable
class FCOSBox(object): class FCOSBox(object):
......
...@@ -20,8 +20,7 @@ import paddle ...@@ -20,8 +20,7 @@ import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
from ppdet.core.workspace import register from ppdet.core.workspace import register
from ..ops import iou_similarity from ..bbox_utils import iou_similarity, bbox2delta
from ..bbox_utils import bbox2delta
__all__ = ['SSDLoss'] __all__ = ['SSDLoss']
......
...@@ -21,7 +21,7 @@ import paddle.nn as nn ...@@ -21,7 +21,7 @@ import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
from ppdet.core.workspace import register from ppdet.core.workspace import register
from ..bbox_utils import decode_yolo, xywh2xyxy, iou_similarity from ..bbox_utils import decode_yolo, xywh2xyxy, batch_iou_similarity
__all__ = ['YOLOv3Loss'] __all__ = ['YOLOv3Loss']
...@@ -80,7 +80,7 @@ class YOLOv3Loss(nn.Layer): ...@@ -80,7 +80,7 @@ class YOLOv3Loss(nn.Layer):
gwh = gbox[:, :, 0:2] + gbox[:, :, 2:4] * 0.5 gwh = gbox[:, :, 0:2] + gbox[:, :, 2:4] * 0.5
gbox = paddle.concat([gxy, gwh], axis=-1) gbox = paddle.concat([gxy, gwh], axis=-1)
iou = iou_similarity(pbox, gbox) iou = batch_iou_similarity(pbox, gbox)
iou.stop_gradient = True iou.stop_gradient = True
iou_max = iou.max(2) # [N, M1] iou_max = iou.max(2) # [N, M1]
iou_mask = paddle.cast(iou_max <= self.ignore_thresh, dtype=pbox.dtype) iou_mask = paddle.cast(iou_max <= self.ignore_thresh, dtype=pbox.dtype)
......
...@@ -19,23 +19,17 @@ from paddle import ParamAttr ...@@ -19,23 +19,17 @@ from paddle import ParamAttr
from paddle.regularizer import L2Decay from paddle.regularizer import L2Decay
from paddle import _C_ops from paddle import _C_ops
from paddle.fluid.framework import Variable, in_dygraph_mode from paddle import in_dynamic_mode
from paddle.fluid import core from paddle.common_ops_import import Variable, LayerHelper, check_variable_and_dtype, check_type, check_dtype
from paddle.fluid.dygraph import parallel_helper
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype
__all__ = [ __all__ = [
'roi_pool', 'roi_pool',
'roi_align', 'roi_align',
'prior_box', 'prior_box',
'generate_proposals', 'generate_proposals',
'iou_similarity',
'box_coder', 'box_coder',
'yolo_box',
'multiclass_nms', 'multiclass_nms',
'distribute_fpn_proposals', 'distribute_fpn_proposals',
'collect_fpn_proposals',
'matrix_nms', 'matrix_nms',
'batch_norm', 'batch_norm',
'mish', 'mish',
...@@ -189,7 +183,7 @@ def roi_pool(input, ...@@ -189,7 +183,7 @@ def roi_pool(input,
output_size = (output_size, output_size) output_size = (output_size, output_size)
pooled_height, pooled_width = output_size pooled_height, pooled_width = output_size
if in_dygraph_mode(): if in_dynamic_mode():
assert rois_num is not None, "rois_num should not be None in dygraph mode." assert rois_num is not None, "rois_num should not be None in dygraph mode."
pool_out, argmaxes = _C_ops.roi_pool( pool_out, argmaxes = _C_ops.roi_pool(
input, rois, rois_num, "pooled_height", pooled_height, input, rois, rois_num, "pooled_height", pooled_height,
...@@ -296,7 +290,7 @@ def roi_align(input, ...@@ -296,7 +290,7 @@ def roi_align(input,
pooled_height, pooled_width = output_size pooled_height, pooled_width = output_size
if in_dygraph_mode(): if in_dynamic_mode():
assert rois_num is not None, "rois_num should not be None in dygraph mode." assert rois_num is not None, "rois_num should not be None in dygraph mode."
align_out = _C_ops.roi_align( align_out = _C_ops.roi_align(
input, rois, rois_num, "pooled_height", pooled_height, input, rois, rois_num, "pooled_height", pooled_height,
...@@ -332,183 +326,6 @@ def roi_align(input, ...@@ -332,183 +326,6 @@ def roi_align(input,
return align_out return align_out
@paddle.jit.not_to_static
def iou_similarity(x, y, box_normalized=True, name=None):
"""
Computes intersection-over-union (IOU) between two box lists.
Box list 'X' should be a LoDTensor and 'Y' is a common Tensor,
boxes in 'Y' are shared by all instance of the batched inputs of X.
Given two boxes A and B, the calculation of IOU is as follows:
$$
IOU(A, B) =
\\frac{area(A\\cap B)}{area(A)+area(B)-area(A\\cap B)}
$$
Args:
x (Tensor): Box list X is a 2-D Tensor with shape [N, 4] holds N
boxes, each box is represented as [xmin, ymin, xmax, ymax],
the shape of X is [N, 4]. [xmin, ymin] is the left top
coordinate of the box if the input is image feature map, they
are close to the origin of the coordinate system.
[xmax, ymax] is the right bottom coordinate of the box.
The data type is float32 or float64.
y (Tensor): Box list Y holds M boxes, each box is represented as
[xmin, ymin, xmax, ymax], the shape of X is [N, 4].
[xmin, ymin] is the left top coordinate of the box if the
input is image feature map, and [xmax, ymax] is the right
bottom coordinate of the box. The data type is float32 or float64.
box_normalized(bool): Whether treat the priorbox as a normalized box.
Set true by default.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: The output of iou_similarity op, a tensor with shape [N, M]
representing pairwise iou scores. The data type is same with x.
Examples:
.. code-block:: python
import paddle
from ppdet.modeling import ops
paddle.enable_static()
x = paddle.static.data(name='x', shape=[None, 4], dtype='float32')
y = paddle.static.data(name='y', shape=[None, 4], dtype='float32')
iou = ops.iou_similarity(x=x, y=y)
"""
if in_dygraph_mode():
out = _C_ops.iou_similarity(x, y, 'box_normalized', box_normalized)
return out
else:
helper = LayerHelper("iou_similarity", **locals())
out = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(
type="iou_similarity",
inputs={"X": x,
"Y": y},
attrs={"box_normalized": box_normalized},
outputs={"Out": out})
return out
@paddle.jit.not_to_static
def collect_fpn_proposals(multi_rois,
multi_scores,
min_level,
max_level,
post_nms_top_n,
rois_num_per_level=None,
name=None):
"""
**This OP only supports LoDTensor as input**. Concat multi-level RoIs
(Region of Interest) and select N RoIs with respect to multi_scores.
This operation performs the following steps:
1. Choose num_level RoIs and scores as input: num_level = max_level - min_level
2. Concat multi-level RoIs and scores
3. Sort scores and select post_nms_top_n scores
4. Gather RoIs by selected indices from scores
5. Re-sort RoIs by corresponding batch_id
Args:
multi_rois(list): List of RoIs to collect. Element in list is 2-D
LoDTensor with shape [N, 4] and data type is float32 or float64,
N is the number of RoIs.
multi_scores(list): List of scores of RoIs to collect. Element in list
is 2-D LoDTensor with shape [N, 1] and data type is float32 or
float64, N is the number of RoIs.
min_level(int): The lowest level of FPN layer to collect
max_level(int): The highest level of FPN layer to collect
post_nms_top_n(int): The number of selected RoIs
rois_num_per_level(list, optional): The List of RoIs' numbers.
Each element is 1-D Tensor which contains the RoIs' number of each
image on each level and the shape is [B] and data type is
int32, B is the number of images. If it is not None then return
a 1-D Tensor contains the output RoIs' number of each image and
the shape is [B]. Default: None
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Variable:
fpn_rois(Variable): 2-D LoDTensor with shape [N, 4] and data type is
float32 or float64. Selected RoIs.
rois_num(Tensor): 1-D Tensor contains the RoIs's number of each
image. The shape is [B] and data type is int32. B is the number of
images.
Examples:
.. code-block:: python
import paddle
from ppdet.modeling import ops
paddle.enable_static()
multi_rois = []
multi_scores = []
for i in range(4):
multi_rois.append(paddle.static.data(
name='roi_'+str(i), shape=[None, 4], dtype='float32', lod_level=1))
for i in range(4):
multi_scores.append(paddle.static.data(
name='score_'+str(i), shape=[None, 1], dtype='float32', lod_level=1))
fpn_rois = ops.collect_fpn_proposals(
multi_rois=multi_rois,
multi_scores=multi_scores,
min_level=2,
max_level=5,
post_nms_top_n=2000)
"""
check_type(multi_rois, 'multi_rois', list, 'collect_fpn_proposals')
check_type(multi_scores, 'multi_scores', list, 'collect_fpn_proposals')
num_lvl = max_level - min_level + 1
input_rois = multi_rois[:num_lvl]
input_scores = multi_scores[:num_lvl]
if in_dygraph_mode():
assert rois_num_per_level is not None, "rois_num_per_level should not be None in dygraph mode."
attrs = ('post_nms_topN', post_nms_top_n)
output_rois, rois_num = _C_ops.collect_fpn_proposals(
input_rois, input_scores, rois_num_per_level, *attrs)
return output_rois, rois_num
else:
helper = LayerHelper('collect_fpn_proposals', **locals())
dtype = helper.input_dtype('multi_rois')
check_dtype(dtype, 'multi_rois', ['float32', 'float64'],
'collect_fpn_proposals')
output_rois = helper.create_variable_for_type_inference(dtype)
output_rois.stop_gradient = True
inputs = {
'MultiLevelRois': input_rois,
'MultiLevelScores': input_scores,
}
outputs = {'FpnRois': output_rois}
if rois_num_per_level is not None:
inputs['MultiLevelRoIsNum'] = rois_num_per_level
rois_num = helper.create_variable_for_type_inference(dtype='int32')
rois_num.stop_gradient = True
outputs['RoisNum'] = rois_num
else:
rois_num = None
helper.append_op(
type='collect_fpn_proposals',
inputs=inputs,
outputs=outputs,
attrs={'post_nms_topN': post_nms_top_n})
return output_rois, rois_num
@paddle.jit.not_to_static @paddle.jit.not_to_static
def distribute_fpn_proposals(fpn_rois, def distribute_fpn_proposals(fpn_rois,
min_level, min_level,
...@@ -587,7 +404,7 @@ def distribute_fpn_proposals(fpn_rois, ...@@ -587,7 +404,7 @@ def distribute_fpn_proposals(fpn_rois,
""" """
num_lvl = max_level - min_level + 1 num_lvl = max_level - min_level + 1
if in_dygraph_mode(): if in_dynamic_mode():
assert rois_num is not None, "rois_num should not be None in dygraph mode." assert rois_num is not None, "rois_num should not be None in dygraph mode."
attrs = ('min_level', min_level, 'max_level', max_level, 'refer_level', attrs = ('min_level', min_level, 'max_level', max_level, 'refer_level',
refer_level, 'refer_scale', refer_scale, 'pixel_offset', refer_level, 'refer_scale', refer_scale, 'pixel_offset',
...@@ -638,143 +455,6 @@ def distribute_fpn_proposals(fpn_rois, ...@@ -638,143 +455,6 @@ def distribute_fpn_proposals(fpn_rois,
return multi_rois, restore_ind, rois_num_per_level return multi_rois, restore_ind, rois_num_per_level
@paddle.jit.not_to_static
def yolo_box(
x,
origin_shape,
anchors,
class_num,
conf_thresh,
downsample_ratio,
clip_bbox=True,
scale_x_y=1.,
name=None, ):
"""
This operator generates YOLO detection boxes from output of YOLOv3 network.
The output of previous network is in shape [N, C, H, W], while H and W
should be the same, H and W specify the grid size, each grid point predict
given number boxes, this given number, which following will be represented as S,
is specified by the number of anchors. In the second dimension(the channel
dimension), C should be equal to S * (5 + class_num), class_num is the object
category number of source dataset(such as 80 in coco dataset), so the
second(channel) dimension, apart from 4 box location coordinates x, y, w, h,
also includes confidence score of the box and class one-hot key of each anchor
box.
Assume the 4 location coordinates are :math:`t_x, t_y, t_w, t_h`, the box
predictions should be as follows:
$$
b_x = \\sigma(t_x) + c_x
$$
$$
b_y = \\sigma(t_y) + c_y
$$
$$
b_w = p_w e^{t_w}
$$
$$
b_h = p_h e^{t_h}
$$
in the equation above, :math:`c_x, c_y` is the left top corner of current grid
and :math:`p_w, p_h` is specified by anchors.
The logistic regression value of the 5th channel of each anchor prediction boxes
represents the confidence score of each prediction box, and the logistic
regression value of the last :attr:`class_num` channels of each anchor prediction
boxes represents the classifcation scores. Boxes with confidence scores less than
:attr:`conf_thresh` should be ignored, and box final scores is the product of
confidence scores and classification scores.
$$
score_{pred} = score_{conf} * score_{class}
$$
Args:
x (Tensor): The input tensor of YoloBox operator is a 4-D tensor with shape of [N, C, H, W].
The second dimension(C) stores box locations, confidence score and
classification one-hot keys of each anchor box. Generally, X should be the output of YOLOv3 network.
The data type is float32 or float64.
origin_shape (Tensor): The image size tensor of YoloBox operator, This is a 2-D tensor with shape of [N, 2].
This tensor holds height and width of each input image used for resizing output box in input image
scale. The data type is int32.
anchors (list|tuple): The anchor width and height, it will be parsed pair by pair.
class_num (int): The number of classes to predict.
conf_thresh (float): The confidence scores threshold of detection boxes. Boxes with confidence scores
under threshold should be ignored.
downsample_ratio (int): The downsample ratio from network input to YoloBox operator input,
so 32, 16, 8 should be set for the first, second, and thrid YoloBox operators.
clip_bbox (bool): Whether clip output bonding box in Input(ImgSize) boundary. Default true.
scale_x_y (float): Scale the center point of decoded bounding box. Default 1.0.
name (string): The default value is None. Normally there is no need
for user to set this property. For more information,
please refer to :ref:`api_guide_Name`
Returns:
boxes Tensor: A 3-D tensor with shape [N, M, 4], the coordinates of boxes, N is the batch num,
M is output box number, and the 3rd dimension stores [xmin, ymin, xmax, ymax] coordinates of boxes.
scores Tensor: A 3-D tensor with shape [N, M, :attr:`class_num`], the coordinates of boxes, N is the batch num,
M is output box number.
Raises:
TypeError: Attr anchors of yolo box must be list or tuple
TypeError: Attr class_num of yolo box must be an integer
TypeError: Attr conf_thresh of yolo box must be a float number
Examples:
.. code-block:: python
import paddle
from ppdet.modeling import ops
paddle.enable_static()
x = paddle.static.data(name='x', shape=[None, 255, 13, 13], dtype='float32')
img_size = paddle.static.data(name='img_size',shape=[None, 2],dtype='int64')
anchors = [10, 13, 16, 30, 33, 23]
boxes,scores = ops.yolo_box(x=x, img_size=img_size, class_num=80, anchors=anchors,
conf_thresh=0.01, downsample_ratio=32)
"""
helper = LayerHelper('yolo_box', **locals())
if not isinstance(anchors, list) and not isinstance(anchors, tuple):
raise TypeError("Attr anchors of yolo_box must be list or tuple")
if not isinstance(class_num, int):
raise TypeError("Attr class_num of yolo_box must be an integer")
if not isinstance(conf_thresh, float):
raise TypeError("Attr ignore_thresh of yolo_box must be a float number")
if in_dygraph_mode():
attrs = ('anchors', anchors, 'class_num', class_num, 'conf_thresh',
conf_thresh, 'downsample_ratio', downsample_ratio, 'clip_bbox',
clip_bbox, 'scale_x_y', scale_x_y)
boxes, scores = _C_ops.yolo_box(x, origin_shape, *attrs)
return boxes, scores
else:
boxes = helper.create_variable_for_type_inference(dtype=x.dtype)
scores = helper.create_variable_for_type_inference(dtype=x.dtype)
attrs = {
"anchors": anchors,
"class_num": class_num,
"conf_thresh": conf_thresh,
"downsample_ratio": downsample_ratio,
"clip_bbox": clip_bbox,
"scale_x_y": scale_x_y,
}
helper.append_op(
type='yolo_box',
inputs={
"X": x,
"ImgSize": origin_shape,
},
outputs={
'Boxes': boxes,
'Scores': scores,
},
attrs=attrs)
return boxes, scores
@paddle.jit.not_to_static @paddle.jit.not_to_static
def prior_box(input, def prior_box(input,
image, image,
...@@ -877,7 +557,7 @@ def prior_box(input, ...@@ -877,7 +557,7 @@ def prior_box(input,
max_sizes = [max_sizes] max_sizes = [max_sizes]
cur_max_sizes = max_sizes cur_max_sizes = max_sizes
if in_dygraph_mode(): if in_dynamic_mode():
attrs = ('min_sizes', min_sizes, 'aspect_ratios', aspect_ratios, attrs = ('min_sizes', min_sizes, 'aspect_ratios', aspect_ratios,
'variances', variance, 'flip', flip, 'clip', clip, 'step_w', 'variances', variance, 'flip', flip, 'clip', clip, 'step_w',
steps[0], 'step_h', steps[1], 'offset', offset, steps[0], 'step_h', steps[1], 'offset', offset,
...@@ -1022,7 +702,7 @@ def multiclass_nms(bboxes, ...@@ -1022,7 +702,7 @@ def multiclass_nms(bboxes,
""" """
helper = LayerHelper('multiclass_nms3', **locals()) helper = LayerHelper('multiclass_nms3', **locals())
if in_dygraph_mode(): if in_dynamic_mode():
attrs = ('background_label', background_label, 'score_threshold', attrs = ('background_label', background_label, 'score_threshold',
score_threshold, 'nms_top_k', nms_top_k, 'nms_threshold', score_threshold, 'nms_top_k', nms_top_k, 'nms_threshold',
nms_threshold, 'keep_top_k', keep_top_k, 'nms_eta', nms_eta, nms_threshold, 'keep_top_k', keep_top_k, 'nms_eta', nms_eta,
...@@ -1163,7 +843,7 @@ def matrix_nms(bboxes, ...@@ -1163,7 +843,7 @@ def matrix_nms(bboxes,
check_type(gaussian_sigma, 'gaussian_sigma', float, 'matrix_nms') check_type(gaussian_sigma, 'gaussian_sigma', float, 'matrix_nms')
check_type(background_label, 'background_label', int, 'matrix_nms') check_type(background_label, 'background_label', int, 'matrix_nms')
if in_dygraph_mode(): if in_dynamic_mode():
attrs = ('background_label', background_label, 'score_threshold', attrs = ('background_label', background_label, 'score_threshold',
score_threshold, 'post_threshold', post_threshold, 'nms_top_k', score_threshold, 'post_threshold', post_threshold, 'nms_top_k',
nms_top_k, 'gaussian_sigma', gaussian_sigma, 'use_gaussian', nms_top_k, 'gaussian_sigma', gaussian_sigma, 'use_gaussian',
...@@ -1208,111 +888,6 @@ def matrix_nms(bboxes, ...@@ -1208,111 +888,6 @@ def matrix_nms(bboxes,
return output, rois_num, index return output, rois_num, index
def bipartite_match(dist_matrix,
match_type=None,
dist_threshold=None,
name=None):
"""
This operator implements a greedy bipartite matching algorithm, which is
used to obtain the matching with the maximum distance based on the input
distance matrix. For input 2D matrix, the bipartite matching algorithm can
find the matched column for each row (matched means the largest distance),
also can find the matched row for each column. And this operator only
calculate matched indices from column to row. For each instance,
the number of matched indices is the column number of the input distance
matrix. **The OP only supports CPU**.
There are two outputs, matched indices and distance.
A simple description, this algorithm matched the best (maximum distance)
row entity to the column entity and the matched indices are not duplicated
in each row of ColToRowMatchIndices. If the column entity is not matched
any row entity, set -1 in ColToRowMatchIndices.
NOTE: the input DistMat can be LoDTensor (with LoD) or Tensor.
If LoDTensor with LoD, the height of ColToRowMatchIndices is batch size.
If Tensor, the height of ColToRowMatchIndices is 1.
NOTE: This API is a very low level API. It is used by :code:`ssd_loss`
layer. Please consider to use :code:`ssd_loss` instead.
Args:
dist_matrix(Tensor): This input is a 2-D LoDTensor with shape
[K, M]. The data type is float32 or float64. It is pair-wise
distance matrix between the entities represented by each row and
each column. For example, assumed one entity is A with shape [K],
another entity is B with shape [M]. The dist_matrix[i][j] is the
distance between A[i] and B[j]. The bigger the distance is, the
better matching the pairs are. NOTE: This tensor can contain LoD
information to represent a batch of inputs. One instance of this
batch can contain different numbers of entities.
match_type(str, optional): The type of matching method, should be
'bipartite' or 'per_prediction'. None ('bipartite') by default.
dist_threshold(float32, optional): If `match_type` is 'per_prediction',
this threshold is to determine the extra matching bboxes based
on the maximum distance, 0.5 by default.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tuple:
matched_indices(Tensor): A 2-D Tensor with shape [N, M]. The data
type is int32. N is the batch size. If match_indices[i][j] is -1, it
means B[j] does not match any entity in i-th instance.
Otherwise, it means B[j] is matched to row
match_indices[i][j] in i-th instance. The row number of
i-th instance is saved in match_indices[i][j].
matched_distance(Tensor): A 2-D Tensor with shape [N, M]. The data
type is float32. N is batch size. If match_indices[i][j] is -1,
match_distance[i][j] is also -1.0. Otherwise, assumed
match_distance[i][j] = d, and the row offsets of each instance
are called LoD. Then match_distance[i][j] =
dist_matrix[d+LoD[i]][j].
Examples:
.. code-block:: python
import paddle
from ppdet.modeling import ops
from ppdet.modeling.utils import iou_similarity
paddle.enable_static()
x = paddle.static.data(name='x', shape=[None, 4], dtype='float32')
y = paddle.static.data(name='y', shape=[None, 4], dtype='float32')
iou = iou_similarity(x=x, y=y)
matched_indices, matched_dist = ops.bipartite_match(iou)
"""
check_variable_and_dtype(dist_matrix, 'dist_matrix',
['float32', 'float64'], 'bipartite_match')
if in_dygraph_mode():
match_indices, match_distance = _C_ops.bipartite_match(
dist_matrix, "match_type", match_type, "dist_threshold",
dist_threshold)
return match_indices, match_distance
helper = LayerHelper('bipartite_match', **locals())
match_indices = helper.create_variable_for_type_inference(dtype='int32')
match_distance = helper.create_variable_for_type_inference(
dtype=dist_matrix.dtype)
helper.append_op(
type='bipartite_match',
inputs={'DistMat': dist_matrix},
attrs={
'match_type': match_type,
'dist_threshold': dist_threshold,
},
outputs={
'ColToRowMatchIndices': match_indices,
'ColToRowMatchDist': match_distance
})
return match_indices, match_distance
@paddle.jit.not_to_static @paddle.jit.not_to_static
def box_coder(prior_box, def box_coder(prior_box,
prior_box_var, prior_box_var,
...@@ -1425,7 +1000,7 @@ def box_coder(prior_box, ...@@ -1425,7 +1000,7 @@ def box_coder(prior_box,
check_variable_and_dtype(target_box, 'target_box', ['float32', 'float64'], check_variable_and_dtype(target_box, 'target_box', ['float32', 'float64'],
'box_coder') 'box_coder')
if in_dygraph_mode(): if in_dynamic_mode():
if isinstance(prior_box_var, Variable): if isinstance(prior_box_var, Variable):
output_box = _C_ops.box_coder( output_box = _C_ops.box_coder(
prior_box, prior_box_var, target_box, "code_type", code_type, prior_box, prior_box_var, target_box, "code_type", code_type,
...@@ -1550,7 +1125,7 @@ def generate_proposals(scores, ...@@ -1550,7 +1125,7 @@ def generate_proposals(scores,
rois, roi_probs = ops.generate_proposals(scores, bbox_deltas, rois, roi_probs = ops.generate_proposals(scores, bbox_deltas,
im_shape, anchors, variances) im_shape, anchors, variances)
""" """
if in_dygraph_mode(): if in_dynamic_mode():
assert return_rois_num, "return_rois_num should be True in dygraph mode." assert return_rois_num, "return_rois_num should be True in dygraph mode."
attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN', post_nms_top_n, attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN', post_nms_top_n,
'nms_thresh', nms_thresh, 'min_size', min_size, 'eta', eta, 'nms_thresh', nms_thresh, 'min_size', min_size, 'eta', eta,
...@@ -1656,8 +1231,3 @@ def get_static_shape(tensor): ...@@ -1656,8 +1231,3 @@ def get_static_shape(tensor):
shape = paddle.shape(tensor) shape = paddle.shape(tensor)
shape.stop_gradient = True shape.stop_gradient = True
return shape return shape
def paddle_distributed_is_initialized():
return core.is_compiled_with_dist(
) and parallel_helper._is_parallel_ctx_initialized()
...@@ -18,9 +18,7 @@ import unittest ...@@ -18,9 +18,7 @@ import unittest
import contextlib import contextlib
import paddle import paddle
import paddle.fluid as fluid from paddle.static import Program
from paddle.fluid.framework import Program
from paddle.fluid import core
class LayerTest(unittest.TestCase): class LayerTest(unittest.TestCase):
...@@ -35,19 +33,17 @@ class LayerTest(unittest.TestCase): ...@@ -35,19 +33,17 @@ class LayerTest(unittest.TestCase):
def _get_place(self, force_to_use_cpu=False): def _get_place(self, force_to_use_cpu=False):
# this option for ops that only have cpu kernel # this option for ops that only have cpu kernel
if force_to_use_cpu: if force_to_use_cpu:
return core.CPUPlace() return 'cpu'
else: else:
if core.is_compiled_with_cuda(): return paddle.device.get_device()
return core.CUDAPlace(0)
return core.CPUPlace()
@contextlib.contextmanager @contextlib.contextmanager
def static_graph(self): def static_graph(self):
paddle.enable_static() paddle.enable_static()
scope = fluid.core.Scope() scope = paddle.static.Scope()
program = Program() program = Program()
with fluid.scope_guard(scope): with paddle.static.scope_guard(scope):
with fluid.program_guard(program): with paddle.static.program_guard(program):
paddle.seed(self.seed) paddle.seed(self.seed)
paddle.framework.random._manual_program_seed(self.seed) paddle.framework.random._manual_program_seed(self.seed)
yield yield
...@@ -57,9 +53,9 @@ class LayerTest(unittest.TestCase): ...@@ -57,9 +53,9 @@ class LayerTest(unittest.TestCase):
fetch_list, fetch_list,
with_lod=False, with_lod=False,
force_to_use_cpu=False): force_to_use_cpu=False):
exe = fluid.Executor(self._get_place(force_to_use_cpu)) exe = paddle.static.Executor(self._get_place(force_to_use_cpu))
exe.run(fluid.default_startup_program()) exe.run(paddle.static.default_startup_program())
return exe.run(fluid.default_main_program(), return exe.run(paddle.static.default_main_program(),
feed=feed, feed=feed,
fetch_list=fetch_list, fetch_list=fetch_list,
return_numpy=(not with_lod)) return_numpy=(not with_lod))
...@@ -67,8 +63,8 @@ class LayerTest(unittest.TestCase): ...@@ -67,8 +63,8 @@ class LayerTest(unittest.TestCase):
@contextlib.contextmanager @contextlib.contextmanager
def dynamic_graph(self, force_to_use_cpu=False): def dynamic_graph(self, force_to_use_cpu=False):
paddle.disable_static() paddle.disable_static()
with fluid.dygraph.guard( place = self._get_place(force_to_use_cpu=force_to_use_cpu)
self._get_place(force_to_use_cpu=force_to_use_cpu)): paddle.device.set_device(place)
paddle.seed(self.seed) paddle.seed(self.seed)
paddle.framework.random._manual_program_seed(self.seed) paddle.framework.random._manual_program_seed(self.seed)
yield yield
...@@ -23,8 +23,6 @@ import unittest ...@@ -23,8 +23,6 @@ import unittest
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import base
import ppdet.modeling.ops as ops import ppdet.modeling.ops as ops
from ppdet.modeling.tests.test_base import LayerTest from ppdet.modeling.tests.test_base import LayerTest
...@@ -50,127 +48,6 @@ def softmax(x): ...@@ -50,127 +48,6 @@ def softmax(x):
return exps / np.sum(exps) return exps / np.sum(exps)
class TestCollectFpnProposals(LayerTest):
def test_collect_fpn_proposals(self):
multi_bboxes_np = []
multi_scores_np = []
rois_num_per_level_np = []
for i in range(4):
bboxes_np = np.random.rand(5, 4).astype('float32')
scores_np = np.random.rand(5, 1).astype('float32')
rois_num = np.array([2, 3]).astype('int32')
multi_bboxes_np.append(bboxes_np)
multi_scores_np.append(scores_np)
rois_num_per_level_np.append(rois_num)
with self.static_graph():
multi_bboxes = []
multi_scores = []
rois_num_per_level = []
for i in range(4):
bboxes = paddle.static.data(
name='rois' + str(i),
shape=[5, 4],
dtype='float32',
lod_level=1)
scores = paddle.static.data(
name='scores' + str(i),
shape=[5, 1],
dtype='float32',
lod_level=1)
rois_num = paddle.static.data(
name='rois_num' + str(i), shape=[None], dtype='int32')
multi_bboxes.append(bboxes)
multi_scores.append(scores)
rois_num_per_level.append(rois_num)
fpn_rois, rois_num = ops.collect_fpn_proposals(
multi_bboxes,
multi_scores,
2,
5,
10,
rois_num_per_level=rois_num_per_level)
feed = {}
for i in range(4):
feed['rois' + str(i)] = multi_bboxes_np[i]
feed['scores' + str(i)] = multi_scores_np[i]
feed['rois_num' + str(i)] = rois_num_per_level_np[i]
fpn_rois_stat, rois_num_stat = self.get_static_graph_result(
feed=feed, fetch_list=[fpn_rois, rois_num], with_lod=True)
fpn_rois_stat = np.array(fpn_rois_stat)
rois_num_stat = np.array(rois_num_stat)
with self.dynamic_graph():
multi_bboxes_dy = []
multi_scores_dy = []
rois_num_per_level_dy = []
for i in range(4):
bboxes_dy = base.to_variable(multi_bboxes_np[i])
scores_dy = base.to_variable(multi_scores_np[i])
rois_num_dy = base.to_variable(rois_num_per_level_np[i])
multi_bboxes_dy.append(bboxes_dy)
multi_scores_dy.append(scores_dy)
rois_num_per_level_dy.append(rois_num_dy)
fpn_rois_dy, rois_num_dy = ops.collect_fpn_proposals(
multi_bboxes_dy,
multi_scores_dy,
2,
5,
10,
rois_num_per_level=rois_num_per_level_dy)
fpn_rois_dy = fpn_rois_dy.numpy()
rois_num_dy = rois_num_dy.numpy()
self.assertTrue(np.array_equal(fpn_rois_stat, fpn_rois_dy))
self.assertTrue(np.array_equal(rois_num_stat, rois_num_dy))
def test_collect_fpn_proposals_error(self):
def generate_input(bbox_type, score_type, name):
multi_bboxes = []
multi_scores = []
for i in range(4):
bboxes = paddle.static.data(
name='rois' + name + str(i),
shape=[10, 4],
dtype=bbox_type,
lod_level=1)
scores = paddle.static.data(
name='scores' + name + str(i),
shape=[10, 1],
dtype=score_type,
lod_level=1)
multi_bboxes.append(bboxes)
multi_scores.append(scores)
return multi_bboxes, multi_scores
with self.static_graph():
bbox1 = paddle.static.data(
name='rois', shape=[5, 10, 4], dtype='float32', lod_level=1)
score1 = paddle.static.data(
name='scores', shape=[5, 10, 1], dtype='float32', lod_level=1)
bbox2, score2 = generate_input('int32', 'float32', '2')
self.assertRaises(
TypeError,
ops.collect_fpn_proposals,
multi_rois=bbox1,
multi_scores=score1,
min_level=2,
max_level=5,
post_nms_top_n=2000)
self.assertRaises(
TypeError,
ops.collect_fpn_proposals,
multi_rois=bbox2,
multi_scores=score2,
min_level=2,
max_level=5,
post_nms_top_n=2000)
paddle.disable_static()
class TestDistributeFpnProposals(LayerTest): class TestDistributeFpnProposals(LayerTest):
def test_distribute_fpn_proposals(self): def test_distribute_fpn_proposals(self):
rois_np = np.random.rand(10, 4).astype('float32') rois_np = np.random.rand(10, 4).astype('float32')
...@@ -200,8 +77,8 @@ class TestDistributeFpnProposals(LayerTest): ...@@ -200,8 +77,8 @@ class TestDistributeFpnProposals(LayerTest):
output_stat_np.append(output_np) output_stat_np.append(output_np)
with self.dynamic_graph(): with self.dynamic_graph():
rois_dy = base.to_variable(rois_np) rois_dy = paddle.to_tensor(rois_np)
rois_num_dy = base.to_variable(rois_num_np) rois_num_dy = paddle.to_tensor(rois_num_np)
multi_rois_dy, restore_ind_dy, rois_num_per_level_dy = ops.distribute_fpn_proposals( multi_rois_dy, restore_ind_dy, rois_num_per_level_dy = ops.distribute_fpn_proposals(
fpn_rois=rois_dy, fpn_rois=rois_dy,
min_level=2, min_level=2,
...@@ -266,9 +143,9 @@ class TestROIAlign(LayerTest): ...@@ -266,9 +143,9 @@ class TestROIAlign(LayerTest):
with_lod=False) with_lod=False)
with self.dynamic_graph(): with self.dynamic_graph():
inputs_dy = base.to_variable(inputs_np) inputs_dy = paddle.to_tensor(inputs_np)
rois_dy = base.to_variable(rois_np) rois_dy = paddle.to_tensor(rois_np)
rois_num_dy = base.to_variable(rois_num_np) rois_num_dy = paddle.to_tensor(rois_num_np)
output_dy = ops.roi_align( output_dy = ops.roi_align(
input=inputs_dy, input=inputs_dy,
...@@ -326,9 +203,9 @@ class TestROIPool(LayerTest): ...@@ -326,9 +203,9 @@ class TestROIPool(LayerTest):
with_lod=False) with_lod=False)
with self.dynamic_graph(): with self.dynamic_graph():
inputs_dy = base.to_variable(inputs_np) inputs_dy = paddle.to_tensor(inputs_np)
rois_dy = base.to_variable(rois_np) rois_dy = paddle.to_tensor(rois_np)
rois_num_dy = base.to_variable(rois_num_np) rois_num_dy = paddle.to_tensor(rois_num_np)
output_dy, _ = ops.roi_pool( output_dy, _ = ops.roi_pool(
input=inputs_dy, input=inputs_dy,
...@@ -355,134 +232,6 @@ class TestROIPool(LayerTest): ...@@ -355,134 +232,6 @@ class TestROIPool(LayerTest):
paddle.disable_static() paddle.disable_static()
class TestIoUSimilarity(LayerTest):
def test_iou_similarity(self):
b, c, h, w = 2, 12, 20, 20
inputs_np = np.random.rand(b, c, h, w).astype('float32')
output_size = (7, 7)
x_np = make_rois(h, w, [20], output_size)
y_np = make_rois(h, w, [10], output_size)
with self.static_graph():
x = paddle.static.data(name='x', shape=[20, 4], dtype='float32')
y = paddle.static.data(name='y', shape=[10, 4], dtype='float32')
iou = ops.iou_similarity(x=x, y=y)
iou_np, = self.get_static_graph_result(
feed={
'x': x_np,
'y': y_np,
}, fetch_list=[iou], with_lod=False)
with self.dynamic_graph():
x_dy = base.to_variable(x_np)
y_dy = base.to_variable(y_np)
iou_dy = ops.iou_similarity(x=x_dy, y=y_dy)
iou_dy_np = iou_dy.numpy()
self.assertTrue(np.array_equal(iou_np, iou_dy_np))
class TestBipartiteMatch(LayerTest):
def test_bipartite_match(self):
distance = np.random.random((20, 10)).astype('float32')
with self.static_graph():
x = paddle.static.data(name='x', shape=[20, 10], dtype='float32')
match_indices, match_dist = ops.bipartite_match(
x, match_type='per_prediction', dist_threshold=0.5)
match_indices_np, match_dist_np = self.get_static_graph_result(
feed={'x': distance, },
fetch_list=[match_indices, match_dist],
with_lod=False)
with self.dynamic_graph():
x_dy = base.to_variable(distance)
match_indices_dy, match_dist_dy = ops.bipartite_match(
x_dy, match_type='per_prediction', dist_threshold=0.5)
match_indices_dy_np = match_indices_dy.numpy()
match_dist_dy_np = match_dist_dy.numpy()
self.assertTrue(np.array_equal(match_indices_np, match_indices_dy_np))
self.assertTrue(np.array_equal(match_dist_np, match_dist_dy_np))
class TestYoloBox(LayerTest):
def test_yolo_box(self):
# x shape [N C H W], C=K * (5 + class_num), class_num=10, K=2
np_x = np.random.random([1, 30, 7, 7]).astype('float32')
np_origin_shape = np.array([[608, 608]], dtype='int32')
class_num = 10
conf_thresh = 0.01
downsample_ratio = 32
scale_x_y = 1.2
# static
with self.static_graph():
# x shape [N C H W], C=K * (5 + class_num), class_num=10, K=2
x = paddle.static.data(
name='x', shape=[1, 30, 7, 7], dtype='float32')
origin_shape = paddle.static.data(
name='origin_shape', shape=[1, 2], dtype='int32')
boxes, scores = ops.yolo_box(
x,
origin_shape, [10, 13, 30, 13],
class_num,
conf_thresh,
downsample_ratio,
scale_x_y=scale_x_y)
boxes_np, scores_np = self.get_static_graph_result(
feed={
'x': np_x,
'origin_shape': np_origin_shape,
},
fetch_list=[boxes, scores],
with_lod=False)
# dygraph
with self.dynamic_graph():
x_dy = fluid.layers.assign(np_x)
origin_shape_dy = fluid.layers.assign(np_origin_shape)
boxes_dy, scores_dy = ops.yolo_box(
x_dy,
origin_shape_dy, [10, 13, 30, 13],
10,
0.01,
32,
scale_x_y=scale_x_y)
boxes_dy_np = boxes_dy.numpy()
scores_dy_np = scores_dy.numpy()
self.assertTrue(np.array_equal(boxes_np, boxes_dy_np))
self.assertTrue(np.array_equal(scores_np, scores_dy_np))
def test_yolo_box_error(self):
with self.static_graph():
# x shape [N C H W], C=K * (5 + class_num), class_num=10, K=2
x = paddle.static.data(
name='x', shape=[1, 30, 7, 7], dtype='float32')
origin_shape = paddle.static.data(
name='origin_shape', shape=[1, 2], dtype='int32')
self.assertRaises(
TypeError,
ops.yolo_box,
x,
origin_shape, [10, 13, 30, 13],
10.123,
0.01,
32,
scale_x_y=1.2)
paddle.disable_static()
class TestPriorBox(LayerTest): class TestPriorBox(LayerTest):
def test_prior_box(self): def test_prior_box(self):
input_np = np.random.rand(2, 10, 32, 32).astype('float32') input_np = np.random.rand(2, 10, 32, 32).astype('float32')
...@@ -509,8 +258,8 @@ class TestPriorBox(LayerTest): ...@@ -509,8 +258,8 @@ class TestPriorBox(LayerTest):
with_lod=False) with_lod=False)
with self.dynamic_graph(): with self.dynamic_graph():
inputs_dy = base.to_variable(input_np) inputs_dy = paddle.to_tensor(input_np)
image_dy = base.to_variable(image_np) image_dy = paddle.to_tensor(image_np)
box_dy, var_dy = ops.prior_box( box_dy, var_dy = ops.prior_box(
input=inputs_dy, input=inputs_dy,
...@@ -582,9 +331,9 @@ class TestMulticlassNms(LayerTest): ...@@ -582,9 +331,9 @@ class TestMulticlassNms(LayerTest):
nms_rois_num_np = np.array(nms_rois_num_np) nms_rois_num_np = np.array(nms_rois_num_np)
with self.dynamic_graph(): with self.dynamic_graph():
boxes_dy = base.to_variable(boxes_np) boxes_dy = paddle.to_tensor(boxes_np)
scores_dy = base.to_variable(scores_np) scores_dy = paddle.to_tensor(scores_np)
rois_num_dy = base.to_variable(rois_num_np) rois_num_dy = paddle.to_tensor(rois_num_np)
out_dy, index_dy, nms_rois_num_dy = ops.multiclass_nms( out_dy, index_dy, nms_rois_num_dy = ops.multiclass_nms(
bboxes=boxes_dy, bboxes=boxes_dy,
...@@ -666,8 +415,8 @@ class TestMatrixNMS(LayerTest): ...@@ -666,8 +415,8 @@ class TestMatrixNMS(LayerTest):
with_lod=True) with_lod=True)
with self.dynamic_graph(): with self.dynamic_graph():
boxes_dy = base.to_variable(boxes_np) boxes_dy = paddle.to_tensor(boxes_np)
scores_dy = base.to_variable(scores_np) scores_dy = paddle.to_tensor(scores_np)
out_dy, index_dy, _ = ops.matrix_nms( out_dy, index_dy, _ = ops.matrix_nms(
bboxes=boxes_dy, bboxes=boxes_dy,
...@@ -737,9 +486,9 @@ class TestBoxCoder(LayerTest): ...@@ -737,9 +486,9 @@ class TestBoxCoder(LayerTest):
# dygraph # dygraph
with self.dynamic_graph(): with self.dynamic_graph():
prior_box_dy = base.to_variable(prior_box_np) prior_box_dy = paddle.to_tensor(prior_box_np)
prior_box_var_dy = base.to_variable(prior_box_var_np) prior_box_var_dy = paddle.to_tensor(prior_box_var_np)
target_box_dy = base.to_variable(target_box_np) target_box_dy = paddle.to_tensor(target_box_np)
boxes_dy = ops.box_coder( boxes_dy = ops.box_coder(
prior_box=prior_box_dy, prior_box=prior_box_dy,
...@@ -808,11 +557,11 @@ class TestGenerateProposals(LayerTest): ...@@ -808,11 +557,11 @@ class TestGenerateProposals(LayerTest):
with_lod=True) with_lod=True)
with self.dynamic_graph(): with self.dynamic_graph():
scores_dy = base.to_variable(scores_np) scores_dy = paddle.to_tensor(scores_np)
bbox_deltas_dy = base.to_variable(bbox_deltas_np) bbox_deltas_dy = paddle.to_tensor(bbox_deltas_np)
im_shape_dy = base.to_variable(im_shape_np) im_shape_dy = paddle.to_tensor(im_shape_np)
anchors_dy = base.to_variable(anchors_np) anchors_dy = paddle.to_tensor(anchors_np)
variances_dy = base.to_variable(variances_np) variances_dy = paddle.to_tensor(variances_np)
rois, roi_probs, rois_num = ops.generate_proposals( rois, roi_probs, rois_num = ops.generate_proposals(
scores_dy, scores_dy,
bbox_deltas_dy, bbox_deltas_dy,
......
...@@ -17,7 +17,7 @@ from __future__ import division ...@@ -17,7 +17,7 @@ from __future__ import division
import unittest import unittest
import paddle import paddle
from paddle import fluid import paddle.nn.functional as F
# add python path of PadleDetection to sys.path # add python path of PadleDetection to sys.path
import os import os
import sys import sys
...@@ -27,19 +27,9 @@ if parent_path not in sys.path: ...@@ -27,19 +27,9 @@ if parent_path not in sys.path:
from ppdet.modeling.losses import YOLOv3Loss from ppdet.modeling.losses import YOLOv3Loss
from ppdet.data.transform.op_helper import jaccard_overlap from ppdet.data.transform.op_helper import jaccard_overlap
from ppdet.modeling.bbox_utils import iou_similarity
import numpy as np import numpy as np
np.random.seed(0)
def _split_ioup(output, an_num, num_classes):
"""
Split output feature map to output, predicted iou
along channel dimension
"""
ioup = fluid.layers.slice(output, axes=[1], starts=[0], ends=[an_num])
ioup = fluid.layers.sigmoid(ioup)
oriout = fluid.layers.slice(
output, axes=[1], starts=[an_num], ends=[an_num * (num_classes + 6)])
return (ioup, oriout)
def _split_output(output, an_num, num_classes): def _split_output(output, an_num, num_classes):
...@@ -47,31 +37,31 @@ def _split_output(output, an_num, num_classes): ...@@ -47,31 +37,31 @@ def _split_output(output, an_num, num_classes):
Split output feature map to x, y, w, h, objectness, classification Split output feature map to x, y, w, h, objectness, classification
along channel dimension along channel dimension
""" """
x = fluid.layers.strided_slice( x = paddle.strided_slice(
output, output,
axes=[1], axes=[1],
starts=[0], starts=[0],
ends=[output.shape[1]], ends=[output.shape[1]],
strides=[5 + num_classes]) strides=[5 + num_classes])
y = fluid.layers.strided_slice( y = paddle.strided_slice(
output, output,
axes=[1], axes=[1],
starts=[1], starts=[1],
ends=[output.shape[1]], ends=[output.shape[1]],
strides=[5 + num_classes]) strides=[5 + num_classes])
w = fluid.layers.strided_slice( w = paddle.strided_slice(
output, output,
axes=[1], axes=[1],
starts=[2], starts=[2],
ends=[output.shape[1]], ends=[output.shape[1]],
strides=[5 + num_classes]) strides=[5 + num_classes])
h = fluid.layers.strided_slice( h = paddle.strided_slice(
output, output,
axes=[1], axes=[1],
starts=[3], starts=[3],
ends=[output.shape[1]], ends=[output.shape[1]],
strides=[5 + num_classes]) strides=[5 + num_classes])
obj = fluid.layers.strided_slice( obj = paddle.strided_slice(
output, output,
axes=[1], axes=[1],
starts=[4], starts=[4],
...@@ -81,14 +71,12 @@ def _split_output(output, an_num, num_classes): ...@@ -81,14 +71,12 @@ def _split_output(output, an_num, num_classes):
stride = output.shape[1] // an_num stride = output.shape[1] // an_num
for m in range(an_num): for m in range(an_num):
clss.append( clss.append(
fluid.layers.slice( paddle.slice(
output, output,
axes=[1], axes=[1],
starts=[stride * m + 5], starts=[stride * m + 5],
ends=[stride * m + 5 + num_classes])) ends=[stride * m + 5 + num_classes]))
cls = fluid.layers.transpose( cls = paddle.transpose(paddle.stack(clss, axis=1), perm=[0, 1, 3, 4, 2])
fluid.layers.stack(
clss, axis=1), perm=[0, 1, 3, 4, 2])
return (x, y, w, h, obj, cls) return (x, y, w, h, obj, cls)
...@@ -104,7 +92,7 @@ def _split_target(target): ...@@ -104,7 +92,7 @@ def _split_target(target):
th = target[:, :, 3, :, :] th = target[:, :, 3, :, :]
tscale = target[:, :, 4, :, :] tscale = target[:, :, 4, :, :]
tobj = target[:, :, 5, :, :] tobj = target[:, :, 5, :, :]
tcls = fluid.layers.transpose(target[:, :, 6:, :, :], perm=[0, 1, 3, 4, 2]) tcls = paddle.transpose(target[:, :, 6:, :, :], perm=[0, 1, 3, 4, 2])
tcls.stop_gradient = True tcls.stop_gradient = True
return (tx, ty, tw, th, tscale, tobj, tcls) return (tx, ty, tw, th, tscale, tobj, tcls)
...@@ -115,9 +103,9 @@ def _calc_obj_loss(output, obj, tobj, gt_box, batch_size, anchors, num_classes, ...@@ -115,9 +103,9 @@ def _calc_obj_loss(output, obj, tobj, gt_box, batch_size, anchors, num_classes,
# objectness loss will be ignored, process as follows: # objectness loss will be ignored, process as follows:
# 1. get pred bbox, which is same with YOLOv3 infer mode, use yolo_box here # 1. get pred bbox, which is same with YOLOv3 infer mode, use yolo_box here
# NOTE: img_size is set as 1.0 to get noramlized pred bbox # NOTE: img_size is set as 1.0 to get noramlized pred bbox
bbox, prob = fluid.layers.yolo_box( bbox, prob = paddle.vision.ops.yolo_box(
x=output, x=output,
img_size=fluid.layers.ones( img_size=paddle.ones(
shape=[batch_size, 2], dtype="int32"), shape=[batch_size, 2], dtype="int32"),
anchors=anchors, anchors=anchors,
class_num=num_classes, class_num=num_classes,
...@@ -128,8 +116,8 @@ def _calc_obj_loss(output, obj, tobj, gt_box, batch_size, anchors, num_classes, ...@@ -128,8 +116,8 @@ def _calc_obj_loss(output, obj, tobj, gt_box, batch_size, anchors, num_classes,
# 2. split pred bbox and gt bbox by sample, calculate IoU between pred bbox # 2. split pred bbox and gt bbox by sample, calculate IoU between pred bbox
# and gt bbox in each sample # and gt bbox in each sample
if batch_size > 1: if batch_size > 1:
preds = fluid.layers.split(bbox, batch_size, dim=0) preds = paddle.split(bbox, batch_size, axis=0)
gts = fluid.layers.split(gt_box, batch_size, dim=0) gts = paddle.split(gt_box, batch_size, axis=0)
else: else:
preds = [bbox] preds = [bbox]
gts = [gt_box] gts = [gt_box]
...@@ -142,7 +130,7 @@ def _calc_obj_loss(output, obj, tobj, gt_box, batch_size, anchors, num_classes, ...@@ -142,7 +130,7 @@ def _calc_obj_loss(output, obj, tobj, gt_box, batch_size, anchors, num_classes,
y = box[:, 1] y = box[:, 1]
w = box[:, 2] w = box[:, 2]
h = box[:, 3] h = box[:, 3]
return fluid.layers.stack( return paddle.stack(
[ [
x - w / 2., x - w / 2.,
y - h / 2., y - h / 2.,
...@@ -150,28 +138,29 @@ def _calc_obj_loss(output, obj, tobj, gt_box, batch_size, anchors, num_classes, ...@@ -150,28 +138,29 @@ def _calc_obj_loss(output, obj, tobj, gt_box, batch_size, anchors, num_classes,
y + h / 2., y + h / 2.,
], axis=1) ], axis=1)
pred = fluid.layers.squeeze(pred, axes=[0]) pred = paddle.squeeze(pred, axis=[0])
gt = box_xywh2xyxy(fluid.layers.squeeze(gt, axes=[0])) gt = box_xywh2xyxy(paddle.squeeze(gt, axis=[0]))
ious.append(fluid.layers.iou_similarity(pred, gt)) ious.append(iou_similarity(pred, gt))
iou = fluid.layers.stack(ious, axis=0) iou = paddle.stack(ious, axis=0)
# 3. Get iou_mask by IoU between gt bbox and prediction bbox, # 3. Get iou_mask by IoU between gt bbox and prediction bbox,
# Get obj_mask by tobj(holds gt_score), calculate objectness loss # Get obj_mask by tobj(holds gt_score), calculate objectness loss
max_iou = fluid.layers.reduce_max(iou, dim=-1) max_iou = paddle.max(iou, axis=-1)
iou_mask = fluid.layers.cast(max_iou <= ignore_thresh, dtype="float32") iou_mask = paddle.cast(max_iou <= ignore_thresh, dtype="float32")
output_shape = fluid.layers.shape(output) output_shape = paddle.shape(output)
an_num = len(anchors) // 2 an_num = len(anchors) // 2
iou_mask = fluid.layers.reshape(iou_mask, (-1, an_num, output_shape[2], iou_mask = paddle.reshape(iou_mask, (-1, an_num, output_shape[2],
output_shape[3])) output_shape[3]))
iou_mask.stop_gradient = True iou_mask.stop_gradient = True
# NOTE: tobj holds gt_score, obj_mask holds object existence mask # NOTE: tobj holds gt_score, obj_mask holds object existence mask
obj_mask = fluid.layers.cast(tobj > 0., dtype="float32") obj_mask = paddle.cast(tobj > 0., dtype="float32")
obj_mask.stop_gradient = True obj_mask.stop_gradient = True
# For positive objectness grids, objectness loss should be calculated # For positive objectness grids, objectness loss should be calculated
# For negative objectness grids, objectness loss is calculated only iou_mask == 1.0 # For negative objectness grids, objectness loss is calculated only iou_mask == 1.0
loss_obj = fluid.layers.sigmoid_cross_entropy_with_logits(obj, obj_mask) obj_sigmoid = F.sigmoid(obj)
loss_obj_pos = fluid.layers.reduce_sum(loss_obj * tobj, dim=[1, 2, 3]) loss_obj = F.binary_cross_entropy(obj_sigmoid, obj_mask, reduction='none')
loss_obj_neg = fluid.layers.reduce_sum( loss_obj_pos = paddle.sum(loss_obj * tobj, axis=[1, 2, 3])
loss_obj * (1.0 - obj_mask) * iou_mask, dim=[1, 2, 3]) loss_obj_neg = paddle.sum(loss_obj * (1.0 - obj_mask) * iou_mask,
axis=[1, 2, 3])
return loss_obj_pos, loss_obj_neg return loss_obj_pos, loss_obj_neg
...@@ -194,45 +183,48 @@ def fine_grained_loss(output, ...@@ -194,45 +183,48 @@ def fine_grained_loss(output,
scale_x_y = scale_x_y scale_x_y = scale_x_y
if (abs(scale_x_y - 1.0) < eps): if (abs(scale_x_y - 1.0) < eps):
loss_x = fluid.layers.sigmoid_cross_entropy_with_logits( x = F.sigmoid(x)
x, tx) * tscale_tobj y = F.sigmoid(y)
loss_x = fluid.layers.reduce_sum(loss_x, dim=[1, 2, 3]) loss_x = F.binary_cross_entropy(x, tx, reduction='none') * tscale_tobj
loss_y = fluid.layers.sigmoid_cross_entropy_with_logits( loss_x = paddle.sum(loss_x, axis=[1, 2, 3])
y, ty) * tscale_tobj loss_y = F.binary_cross_entropy(y, ty, reduction='none') * tscale_tobj
loss_y = fluid.layers.reduce_sum(loss_y, dim=[1, 2, 3]) loss_y = paddle.sum(loss_y, axis=[1, 2, 3])
else: else:
dx = scale_x_y * fluid.layers.sigmoid(x) - 0.5 * (scale_x_y - 1.0) dx = scale_x_y * F.sigmoid(x) - 0.5 * (scale_x_y - 1.0)
dy = scale_x_y * fluid.layers.sigmoid(y) - 0.5 * (scale_x_y - 1.0) dy = scale_x_y * F.sigmoid(y) - 0.5 * (scale_x_y - 1.0)
loss_x = fluid.layers.abs(dx - tx) * tscale_tobj loss_x = paddle.abs(dx - tx) * tscale_tobj
loss_x = fluid.layers.reduce_sum(loss_x, dim=[1, 2, 3]) loss_x = paddle.sum(loss_x, axis=[1, 2, 3])
loss_y = fluid.layers.abs(dy - ty) * tscale_tobj loss_y = paddle.abs(dy - ty) * tscale_tobj
loss_y = fluid.layers.reduce_sum(loss_y, dim=[1, 2, 3]) loss_y = paddle.sum(loss_y, axis=[1, 2, 3])
# NOTE: we refined loss function of (w, h) as L1Loss # NOTE: we refined loss function of (w, h) as L1Loss
loss_w = fluid.layers.abs(w - tw) * tscale_tobj loss_w = paddle.abs(w - tw) * tscale_tobj
loss_w = fluid.layers.reduce_sum(loss_w, dim=[1, 2, 3]) loss_w = paddle.sum(loss_w, axis=[1, 2, 3])
loss_h = fluid.layers.abs(h - th) * tscale_tobj loss_h = paddle.abs(h - th) * tscale_tobj
loss_h = fluid.layers.reduce_sum(loss_h, dim=[1, 2, 3]) loss_h = paddle.sum(loss_h, axis=[1, 2, 3])
loss_obj_pos, loss_obj_neg = _calc_obj_loss( loss_obj_pos, loss_obj_neg = _calc_obj_loss(
output, obj, tobj, gt_box, batch_size, anchors, num_classes, downsample, output, obj, tobj, gt_box, batch_size, anchors, num_classes, downsample,
ignore_thresh, scale_x_y) ignore_thresh, scale_x_y)
loss_cls = fluid.layers.sigmoid_cross_entropy_with_logits(cls, tcls) cls = F.sigmoid(cls)
loss_cls = fluid.layers.elementwise_mul(loss_cls, tobj, axis=0) loss_cls = F.binary_cross_entropy(cls, tcls, reduction='none')
loss_cls = fluid.layers.reduce_sum(loss_cls, dim=[1, 2, 3, 4]) tobj = paddle.unsqueeze(tobj, axis=-1)
loss_cls = paddle.multiply(loss_cls, tobj)
loss_cls = paddle.sum(loss_cls, axis=[1, 2, 3, 4])
loss_xys = fluid.layers.reduce_mean(loss_x + loss_y) loss_xys = paddle.mean(loss_x + loss_y)
loss_whs = fluid.layers.reduce_mean(loss_w + loss_h) loss_whs = paddle.mean(loss_w + loss_h)
loss_objs = fluid.layers.reduce_mean(loss_obj_pos + loss_obj_neg) loss_objs = paddle.mean(loss_obj_pos + loss_obj_neg)
loss_clss = fluid.layers.reduce_mean(loss_cls) loss_clss = paddle.mean(loss_cls)
losses_all = { losses_all = {
"loss_xy": fluid.layers.sum(loss_xys), "loss_xy": paddle.sum(loss_xys),
"loss_wh": fluid.layers.sum(loss_whs), "loss_wh": paddle.sum(loss_whs),
"loss_loc": fluid.layers.sum(loss_xys) + fluid.layers.sum(loss_whs), "loss_loc": paddle.sum(loss_xys) + paddle.sum(loss_whs),
"loss_obj": fluid.layers.sum(loss_objs), "loss_obj": paddle.sum(loss_objs),
"loss_cls": fluid.layers.sum(loss_clss), "loss_cls": paddle.sum(loss_clss),
} }
return losses_all, x, y, tx, ty return losses_all, x, y, tx, ty
......
...@@ -20,7 +20,7 @@ import sys ...@@ -20,7 +20,7 @@ import sys
import paddle import paddle
import six import six
import paddle.version as fluid_version import paddle.version as paddle_version
from .logger import setup_logger from .logger import setup_logger
logger = setup_logger(__name__) logger = setup_logger(__name__)
...@@ -97,8 +97,8 @@ def check_version(version='2.0'): ...@@ -97,8 +97,8 @@ def check_version(version='2.0'):
"Please make sure the version is good with your code.".format(version) "Please make sure the version is good with your code.".format(version)
version_installed = [ version_installed = [
fluid_version.major, fluid_version.minor, fluid_version.patch, paddle_version.major, paddle_version.minor, paddle_version.patch,
fluid_version.rc paddle_version.rc
] ]
if version_installed == ['0', '0', '0', '0']: if version_installed == ['0', '0', '0', '0']:
return return
......
...@@ -26,17 +26,31 @@ extern "C" { ...@@ -26,17 +26,31 @@ extern "C" {
*/ */
JNIEXPORT jlong JNICALL JNIEXPORT jlong JNICALL
Java_com_baidu_paddledetection_detection_Native_nativeInit( Java_com_baidu_paddledetection_detection_Native_nativeInit(
JNIEnv *env, jclass thiz, jstring jModelDir, jstring jLabelPath, JNIEnv *env,
jint cpuThreadNum, jstring jCPUPowerMode, jint inputWidth, jint inputHeight, jclass thiz,
jfloatArray jInputMean, jfloatArray jInputStd, jfloat scoreThreshold) { jstring jModelDir,
jstring jLabelPath,
jint cpuThreadNum,
jstring jCPUPowerMode,
jint inputWidth,
jint inputHeight,
jfloatArray jInputMean,
jfloatArray jInputStd,
jfloat scoreThreshold) {
std::string modelDir = jstring_to_cpp_string(env, jModelDir); std::string modelDir = jstring_to_cpp_string(env, jModelDir);
std::string labelPath = jstring_to_cpp_string(env, jLabelPath); std::string labelPath = jstring_to_cpp_string(env, jLabelPath);
std::string cpuPowerMode = jstring_to_cpp_string(env, jCPUPowerMode); std::string cpuPowerMode = jstring_to_cpp_string(env, jCPUPowerMode);
std::vector<float> inputMean = jfloatarray_to_float_vector(env, jInputMean); std::vector<float> inputMean = jfloatarray_to_float_vector(env, jInputMean);
std::vector<float> inputStd = jfloatarray_to_float_vector(env, jInputStd); std::vector<float> inputStd = jfloatarray_to_float_vector(env, jInputStd);
return reinterpret_cast<jlong>( return reinterpret_cast<jlong>(new Pipeline(modelDir,
new Pipeline(modelDir, labelPath, cpuThreadNum, cpuPowerMode, inputWidth, labelPath,
inputHeight, inputMean, inputStd, scoreThreshold)); cpuThreadNum,
cpuPowerMode,
inputWidth,
inputHeight,
inputMean,
inputStd,
scoreThreshold));
} }
/* /*
...@@ -45,8 +59,9 @@ Java_com_baidu_paddledetection_detection_Native_nativeInit( ...@@ -45,8 +59,9 @@ Java_com_baidu_paddledetection_detection_Native_nativeInit(
* Signature: (J)Z * Signature: (J)Z
*/ */
JNIEXPORT jboolean JNICALL JNIEXPORT jboolean JNICALL
Java_com_baidu_paddledetection_detection_Native_nativeRelease( Java_com_baidu_paddledetection_detection_Native_nativeRelease(JNIEnv *env,
JNIEnv *env, jclass thiz, jlong ctx) { jclass thiz,
jlong ctx) {
if (ctx == 0) { if (ctx == 0) {
return JNI_FALSE; return JNI_FALSE;
} }
...@@ -62,15 +77,21 @@ Java_com_baidu_paddledetection_detection_Native_nativeRelease( ...@@ -62,15 +77,21 @@ Java_com_baidu_paddledetection_detection_Native_nativeRelease(
*/ */
JNIEXPORT jboolean JNICALL JNIEXPORT jboolean JNICALL
Java_com_baidu_paddledetection_detection_Native_nativeProcess( Java_com_baidu_paddledetection_detection_Native_nativeProcess(
JNIEnv *env, jclass thiz, jlong ctx, jint inTextureId, jint outTextureId, JNIEnv *env,
jint textureWidth, jint textureHeight, jstring jsavedImagePath) { jclass thiz,
jlong ctx,
jint inTextureId,
jint outTextureId,
jint textureWidth,
jint textureHeight,
jstring jsavedImagePath) {
if (ctx == 0) { if (ctx == 0) {
return JNI_FALSE; return JNI_FALSE;
} }
std::string savedImagePath = jstring_to_cpp_string(env, jsavedImagePath); std::string savedImagePath = jstring_to_cpp_string(env, jsavedImagePath);
Pipeline *pipeline = reinterpret_cast<Pipeline *>(ctx); Pipeline *pipeline = reinterpret_cast<Pipeline *>(ctx);
return pipeline->Process(inTextureId, outTextureId, textureWidth, return pipeline->Process(
textureHeight, savedImagePath); inTextureId, outTextureId, textureWidth, textureHeight, savedImagePath);
} }
#ifdef __cplusplus #ifdef __cplusplus
......
...@@ -50,8 +50,8 @@ inline jstring cpp_string_to_jstring(JNIEnv *env, std::string str) { ...@@ -50,8 +50,8 @@ inline jstring cpp_string_to_jstring(JNIEnv *env, std::string str) {
env->GetMethodID(strClass, "<init>", "([BLjava/lang/String;)V"); env->GetMethodID(strClass, "<init>", "([BLjava/lang/String;)V");
jbyteArray bytes = env->NewByteArray(strlen(data)); jbyteArray bytes = env->NewByteArray(strlen(data));
env->SetByteArrayRegion(bytes, 0, strlen(data), env->SetByteArrayRegion(
reinterpret_cast<const jbyte *>(data)); bytes, 0, strlen(data), reinterpret_cast<const jbyte *>(data));
jstring encoding = env->NewStringUTF("UTF-8"); jstring encoding = env->NewStringUTF("UTF-8");
jstring res = (jstring)( jstring res = (jstring)(
...@@ -64,21 +64,24 @@ inline jstring cpp_string_to_jstring(JNIEnv *env, std::string str) { ...@@ -64,21 +64,24 @@ inline jstring cpp_string_to_jstring(JNIEnv *env, std::string str) {
return res; return res;
} }
inline jfloatArray cpp_array_to_jfloatarray(JNIEnv *env, const float *buf, inline jfloatArray cpp_array_to_jfloatarray(JNIEnv *env,
const float *buf,
int64_t len) { int64_t len) {
jfloatArray result = env->NewFloatArray(len); jfloatArray result = env->NewFloatArray(len);
env->SetFloatArrayRegion(result, 0, len, buf); env->SetFloatArrayRegion(result, 0, len, buf);
return result; return result;
} }
inline jintArray cpp_array_to_jintarray(JNIEnv *env, const int *buf, inline jintArray cpp_array_to_jintarray(JNIEnv *env,
const int *buf,
int64_t len) { int64_t len) {
jintArray result = env->NewIntArray(len); jintArray result = env->NewIntArray(len);
env->SetIntArrayRegion(result, 0, len, buf); env->SetIntArrayRegion(result, 0, len, buf);
return result; return result;
} }
inline jbyteArray cpp_array_to_jbytearray(JNIEnv *env, const int8_t *buf, inline jbyteArray cpp_array_to_jbytearray(JNIEnv *env,
const int8_t *buf,
int64_t len) { int64_t len) {
jbyteArray result = env->NewByteArray(len); jbyteArray result = env->NewByteArray(len);
env->SetByteArrayRegion(result, 0, len, buf); env->SetByteArrayRegion(result, 0, len, buf);
......
...@@ -14,13 +14,20 @@ ...@@ -14,13 +14,20 @@
#include "Pipeline.h" #include "Pipeline.h"
Detector::Detector(const std::string &modelDir, const std::string &labelPath, Detector::Detector(const std::string &modelDir,
const int cpuThreadNum, const std::string &cpuPowerMode, const std::string &labelPath,
int inputWidth, int inputHeight, const int cpuThreadNum,
const std::string &cpuPowerMode,
int inputWidth,
int inputHeight,
const std::vector<float> &inputMean, const std::vector<float> &inputMean,
const std::vector<float> &inputStd, float scoreThreshold) const std::vector<float> &inputStd,
: inputWidth_(inputWidth), inputHeight_(inputHeight), inputMean_(inputMean), float scoreThreshold)
inputStd_(inputStd), scoreThreshold_(scoreThreshold) { : inputWidth_(inputWidth),
inputHeight_(inputHeight),
inputMean_(inputMean),
inputStd_(inputStd),
scoreThreshold_(scoreThreshold) {
paddle::lite_api::MobileConfig config; paddle::lite_api::MobileConfig config;
config.set_model_from_file(modelDir + "/model.nb"); config.set_model_from_file(modelDir + "/model.nb");
config.set_threads(cpuThreadNum); config.set_threads(cpuThreadNum);
...@@ -71,13 +78,16 @@ void Detector::Preprocess(const cv::Mat &rgbaImage) { ...@@ -71,13 +78,16 @@ void Detector::Preprocess(const cv::Mat &rgbaImage) {
inputTensor->Resize(inputShape); inputTensor->Resize(inputShape);
auto inputData = inputTensor->mutable_data<float>(); auto inputData = inputTensor->mutable_data<float>();
cv::Mat resizedRGBAImage; cv::Mat resizedRGBAImage;
cv::resize(rgbaImage, resizedRGBAImage, cv::resize(
cv::Size(inputShape[3], inputShape[2])); rgbaImage, resizedRGBAImage, cv::Size(inputShape[3], inputShape[2]));
cv::Mat resizedRGBImage; cv::Mat resizedRGBImage;
cv::cvtColor(resizedRGBAImage, resizedRGBImage, cv::COLOR_BGRA2RGB); cv::cvtColor(resizedRGBAImage, resizedRGBImage, cv::COLOR_BGRA2RGB);
resizedRGBImage.convertTo(resizedRGBImage, CV_32FC3, 1.0 / 255.0f); resizedRGBImage.convertTo(resizedRGBImage, CV_32FC3, 1.0 / 255.0f);
NHWC3ToNC3HW(reinterpret_cast<const float *>(resizedRGBImage.data), inputData, NHWC3ToNC3HW(reinterpret_cast<const float *>(resizedRGBImage.data),
inputMean_.data(), inputStd_.data(), inputShape[3], inputData,
inputMean_.data(),
inputStd_.data(),
inputShape[3],
inputShape[2]); inputShape[2]);
// Set the size of input image // Set the size of input image
auto sizeTensor = predictor_->GetInput(1); auto sizeTensor = predictor_->GetInput(1);
...@@ -97,8 +107,7 @@ void Detector::Postprocess(std::vector<RESULT> *results) { ...@@ -97,8 +107,7 @@ void Detector::Postprocess(std::vector<RESULT> *results) {
auto class_id = static_cast<int>(round(outputData[i])); auto class_id = static_cast<int>(round(outputData[i]));
// Confidence score // Confidence score
auto score = outputData[i + 1]; auto score = outputData[i + 1];
if (score < scoreThreshold_) if (score < scoreThreshold_) continue;
continue;
RESULT object; RESULT object;
object.class_name = class_id >= 0 && class_id < labelList_.size() object.class_name = class_id >= 0 && class_id < labelList_.size()
? labelList_[class_id] ? labelList_[class_id]
...@@ -115,8 +124,10 @@ void Detector::Postprocess(std::vector<RESULT> *results) { ...@@ -115,8 +124,10 @@ void Detector::Postprocess(std::vector<RESULT> *results) {
} }
} }
void Detector::Predict(const cv::Mat &rgbaImage, std::vector<RESULT> *results, void Detector::Predict(const cv::Mat &rgbaImage,
double *preprocessTime, double *predictTime, std::vector<RESULT> *results,
double *preprocessTime,
double *predictTime,
double *postprocessTime) { double *postprocessTime) {
auto t = GetCurrentTime(); auto t = GetCurrentTime();
...@@ -136,13 +147,23 @@ void Detector::Predict(const cv::Mat &rgbaImage, std::vector<RESULT> *results, ...@@ -136,13 +147,23 @@ void Detector::Predict(const cv::Mat &rgbaImage, std::vector<RESULT> *results,
LOGD("Detector postprocess costs %f ms", *postprocessTime); LOGD("Detector postprocess costs %f ms", *postprocessTime);
} }
Pipeline::Pipeline(const std::string &modelDir, const std::string &labelPath, Pipeline::Pipeline(const std::string &modelDir,
const int cpuThreadNum, const std::string &cpuPowerMode, const std::string &labelPath,
int inputWidth, int inputHeight, const int cpuThreadNum,
const std::string &cpuPowerMode,
int inputWidth,
int inputHeight,
const std::vector<float> &inputMean, const std::vector<float> &inputMean,
const std::vector<float> &inputStd, float scoreThreshold) { const std::vector<float> &inputStd,
detector_.reset(new Detector(modelDir, labelPath, cpuThreadNum, cpuPowerMode, float scoreThreshold) {
inputWidth, inputHeight, inputMean, inputStd, detector_.reset(new Detector(modelDir,
labelPath,
cpuThreadNum,
cpuPowerMode,
inputWidth,
inputHeight,
inputMean,
inputStd,
scoreThreshold)); scoreThreshold));
} }
...@@ -169,15 +190,24 @@ void Pipeline::VisualizeResults(const std::vector<RESULT> &results, ...@@ -169,15 +190,24 @@ void Pipeline::VisualizeResults(const std::vector<RESULT> &results,
cv::Point2d(boundingBox.x, cv::Point2d(boundingBox.x,
boundingBox.y - round(textSize.height * 1.25f)), boundingBox.y - round(textSize.height * 1.25f)),
cv::Point2d(boundingBox.x + boundingBox.width, boundingBox.y), cv::Point2d(boundingBox.x + boundingBox.width, boundingBox.y),
object.fill_color, -1); object.fill_color,
cv::putText(*rgbaImage, text, cv::Point2d(boundingBox.x, boundingBox.y), -1);
fontFace, fontScale, cv::Scalar(255, 255, 255), fontThickness); cv::putText(*rgbaImage,
text,
cv::Point2d(boundingBox.x, boundingBox.y),
fontFace,
fontScale,
cv::Scalar(255, 255, 255),
fontThickness);
} }
} }
void Pipeline::VisualizeStatus(double readGLFBOTime, double writeGLTextureTime, void Pipeline::VisualizeStatus(double readGLFBOTime,
double preprocessTime, double predictTime, double writeGLTextureTime,
double postprocessTime, cv::Mat *rgbaImage) { double preprocessTime,
double predictTime,
double postprocessTime,
cv::Mat *rgbaImage) {
char text[255]; char text[255];
cv::Scalar fontColor = cv::Scalar(255, 255, 255); cv::Scalar fontColor = cv::Scalar(255, 255, 255);
int fontFace = cv::FONT_HERSHEY_PLAIN; int fontFace = cv::FONT_HERSHEY_PLAIN;
...@@ -188,47 +218,54 @@ void Pipeline::VisualizeStatus(double readGLFBOTime, double writeGLTextureTime, ...@@ -188,47 +218,54 @@ void Pipeline::VisualizeStatus(double readGLFBOTime, double writeGLTextureTime,
cv::getTextSize(text, fontFace, fontScale, fontThickness, nullptr); cv::getTextSize(text, fontFace, fontScale, fontThickness, nullptr);
textSize.height *= 1.25f; textSize.height *= 1.25f;
cv::Point2d offset(10, textSize.height + 15); cv::Point2d offset(10, textSize.height + 15);
cv::putText(*rgbaImage, text, offset, fontFace, fontScale, fontColor, cv::putText(
fontThickness); *rgbaImage, text, offset, fontFace, fontScale, fontColor, fontThickness);
sprintf(text, "Write GLTexture time: %.1f ms", writeGLTextureTime); sprintf(text, "Write GLTexture time: %.1f ms", writeGLTextureTime);
offset.y += textSize.height; offset.y += textSize.height;
cv::putText(*rgbaImage, text, offset, fontFace, fontScale, fontColor, cv::putText(
fontThickness); *rgbaImage, text, offset, fontFace, fontScale, fontColor, fontThickness);
sprintf(text, "Preprocess time: %.1f ms", preprocessTime); sprintf(text, "Preprocess time: %.1f ms", preprocessTime);
offset.y += textSize.height; offset.y += textSize.height;
cv::putText(*rgbaImage, text, offset, fontFace, fontScale, fontColor, cv::putText(
fontThickness); *rgbaImage, text, offset, fontFace, fontScale, fontColor, fontThickness);
sprintf(text, "Predict time: %.1f ms", predictTime); sprintf(text, "Predict time: %.1f ms", predictTime);
offset.y += textSize.height; offset.y += textSize.height;
cv::putText(*rgbaImage, text, offset, fontFace, fontScale, fontColor, cv::putText(
fontThickness); *rgbaImage, text, offset, fontFace, fontScale, fontColor, fontThickness);
sprintf(text, "Postprocess time: %.1f ms", postprocessTime); sprintf(text, "Postprocess time: %.1f ms", postprocessTime);
offset.y += textSize.height; offset.y += textSize.height;
cv::putText(*rgbaImage, text, offset, fontFace, fontScale, fontColor, cv::putText(
fontThickness); *rgbaImage, text, offset, fontFace, fontScale, fontColor, fontThickness);
} }
bool Pipeline::Process(int inTexureId, int outTextureId, int textureWidth, bool Pipeline::Process(int inTexureId,
int textureHeight, std::string savedImagePath) { int outTextureId,
int textureWidth,
int textureHeight,
std::string savedImagePath) {
static double readGLFBOTime = 0, writeGLTextureTime = 0; static double readGLFBOTime = 0, writeGLTextureTime = 0;
double preprocessTime = 0, predictTime = 0, postprocessTime = 0; double preprocessTime = 0, predictTime = 0, postprocessTime = 0;
// Read pixels from FBO texture to CV image // Read pixels from FBO texture to CV image
cv::Mat rgbaImage; cv::Mat rgbaImage;
CreateRGBAImageFromGLFBOTexture(textureWidth, textureHeight, &rgbaImage, CreateRGBAImageFromGLFBOTexture(
&readGLFBOTime); textureWidth, textureHeight, &rgbaImage, &readGLFBOTime);
// Feed the image, run inference and parse the results // Feed the image, run inference and parse the results
std::vector<RESULT> results; std::vector<RESULT> results;
detector_->Predict(rgbaImage, &results, &preprocessTime, &predictTime, detector_->Predict(
&postprocessTime); rgbaImage, &results, &preprocessTime, &predictTime, &postprocessTime);
// Visualize the objects to the origin image // Visualize the objects to the origin image
VisualizeResults(results, &rgbaImage); VisualizeResults(results, &rgbaImage);
// Visualize the status(performance data) to the origin image // Visualize the status(performance data) to the origin image
VisualizeStatus(readGLFBOTime, writeGLTextureTime, preprocessTime, VisualizeStatus(readGLFBOTime,
predictTime, postprocessTime, &rgbaImage); writeGLTextureTime,
preprocessTime,
predictTime,
postprocessTime,
&rgbaImage);
// Dump modified image if savedImagePath is set // Dump modified image if savedImagePath is set
if (!savedImagePath.empty()) { if (!savedImagePath.empty()) {
......
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
#pragma once #pragma once
#include "Utils.h"
#include "paddle_api.h"
#include <EGL/egl.h> #include <EGL/egl.h>
#include <GLES2/gl2.h> #include <GLES2/gl2.h>
#include <opencv2/core.hpp> #include <opencv2/core.hpp>
...@@ -24,6 +22,8 @@ ...@@ -24,6 +22,8 @@
#include <opencv2/imgproc.hpp> #include <opencv2/imgproc.hpp>
#include <string> #include <string>
#include <vector> #include <vector>
#include "Utils.h"
#include "paddle_api.h"
struct RESULT { struct RESULT {
std::string class_name; std::string class_name;
...@@ -36,24 +36,30 @@ struct RESULT { ...@@ -36,24 +36,30 @@ struct RESULT {
}; };
class Detector { class Detector {
public: public:
explicit Detector(const std::string &modelDir, const std::string &labelPath, explicit Detector(const std::string &modelDir,
const int cpuThreadNum, const std::string &cpuPowerMode, const std::string &labelPath,
int inputWidth, int inputHeight, const int cpuThreadNum,
const std::string &cpuPowerMode,
int inputWidth,
int inputHeight,
const std::vector<float> &inputMean, const std::vector<float> &inputMean,
const std::vector<float> &inputStd, float scoreThreshold); const std::vector<float> &inputStd,
float scoreThreshold);
void Predict(const cv::Mat &rgbImage, std::vector<RESULT> *results, void Predict(const cv::Mat &rgbImage,
double *preprocessTime, double *predictTime, std::vector<RESULT> *results,
double *preprocessTime,
double *predictTime,
double *postprocessTime); double *postprocessTime);
private: private:
std::vector<std::string> LoadLabelList(const std::string &path); std::vector<std::string> LoadLabelList(const std::string &path);
std::vector<cv::Scalar> GenerateColorMap(int numOfClasses); std::vector<cv::Scalar> GenerateColorMap(int numOfClasses);
void Preprocess(const cv::Mat &rgbaImage); void Preprocess(const cv::Mat &rgbaImage);
void Postprocess(std::vector<RESULT> *results); void Postprocess(std::vector<RESULT> *results);
private: private:
int inputWidth_; int inputWidth_;
int inputHeight_; int inputHeight_;
std::vector<float> inputMean_; std::vector<float> inputMean_;
...@@ -65,36 +71,58 @@ private: ...@@ -65,36 +71,58 @@ private:
}; };
class Pipeline { class Pipeline {
public: public:
Pipeline(const std::string &modelDir, const std::string &labelPath, Pipeline(const std::string &modelDir,
const int cpuThreadNum, const std::string &cpuPowerMode, const std::string &labelPath,
int inputWidth, int inputHeight, const std::vector<float> &inputMean, const int cpuThreadNum,
const std::vector<float> &inputStd, float scoreThreshold); const std::string &cpuPowerMode,
int inputWidth,
int inputHeight,
const std::vector<float> &inputMean,
const std::vector<float> &inputStd,
float scoreThreshold);
bool Process(int inTextureId, int outTextureId, int textureWidth, bool Process(int inTextureId,
int textureHeight, std::string savedImagePath); int outTextureId,
int textureWidth,
int textureHeight,
std::string savedImagePath);
private: private:
// Read pixels from FBO texture to CV image // Read pixels from FBO texture to CV image
void CreateRGBAImageFromGLFBOTexture(int textureWidth, int textureHeight, void CreateRGBAImageFromGLFBOTexture(int textureWidth,
int textureHeight,
cv::Mat *rgbaImage, cv::Mat *rgbaImage,
double *readGLFBOTime) { double *readGLFBOTime) {
auto t = GetCurrentTime(); auto t = GetCurrentTime();
rgbaImage->create(textureHeight, textureWidth, CV_8UC4); rgbaImage->create(textureHeight, textureWidth, CV_8UC4);
glReadPixels(0, 0, textureWidth, textureHeight, GL_RGBA, GL_UNSIGNED_BYTE, glReadPixels(0,
0,
textureWidth,
textureHeight,
GL_RGBA,
GL_UNSIGNED_BYTE,
rgbaImage->data); rgbaImage->data);
*readGLFBOTime = GetElapsedTime(t); *readGLFBOTime = GetElapsedTime(t);
LOGD("Read from FBO texture costs %f ms", *readGLFBOTime); LOGD("Read from FBO texture costs %f ms", *readGLFBOTime);
} }
// Write back to texture2D // Write back to texture2D
void WriteRGBAImageBackToGLTexture(const cv::Mat &rgbaImage, int textureId, void WriteRGBAImageBackToGLTexture(const cv::Mat &rgbaImage,
int textureId,
double *writeGLTextureTime) { double *writeGLTextureTime) {
auto t = GetCurrentTime(); auto t = GetCurrentTime();
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, textureId); glBindTexture(GL_TEXTURE_2D, textureId);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, rgbaImage.cols, rgbaImage.rows, glTexSubImage2D(GL_TEXTURE_2D,
GL_RGBA, GL_UNSIGNED_BYTE, rgbaImage.data); 0,
0,
0,
rgbaImage.cols,
rgbaImage.rows,
GL_RGBA,
GL_UNSIGNED_BYTE,
rgbaImage.data);
*writeGLTextureTime = GetElapsedTime(t); *writeGLTextureTime = GetElapsedTime(t);
LOGD("Write back to texture2D costs %f ms", *writeGLTextureTime); LOGD("Write back to texture2D costs %f ms", *writeGLTextureTime);
} }
...@@ -103,10 +131,13 @@ private: ...@@ -103,10 +131,13 @@ private:
void VisualizeResults(const std::vector<RESULT> &results, cv::Mat *rgbaImage); void VisualizeResults(const std::vector<RESULT> &results, cv::Mat *rgbaImage);
// Visualize the status(performace data) to origin image // Visualize the status(performace data) to origin image
void VisualizeStatus(double readGLFBOTime, double writeGLTextureTime, void VisualizeStatus(double readGLFBOTime,
double preprocessTime, double predictTime, double writeGLTextureTime,
double postprocessTime, cv::Mat *rgbaImage); double preprocessTime,
double predictTime,
double postprocessTime,
cv::Mat *rgbaImage);
private: private:
std::shared_ptr<Detector> detector_; std::shared_ptr<Detector> detector_;
}; };
...@@ -17,13 +17,16 @@ ...@@ -17,13 +17,16 @@
int64_t ShapeProduction(const std::vector<int64_t> &shape) { int64_t ShapeProduction(const std::vector<int64_t> &shape) {
int64_t res = 1; int64_t res = 1;
for (auto i : shape) for (auto i : shape) res *= i;
res *= i;
return res; return res;
} }
void NHWC3ToNC3HW(const float *src, float *dst, const float *mean, void NHWC3ToNC3HW(const float *src,
const float *std, int width, int height) { float *dst,
const float *mean,
const float *std,
int width,
int height) {
int size = height * width; int size = height * width;
float32x4_t vmean0 = vdupq_n_f32(mean ? mean[0] : 0.0f); float32x4_t vmean0 = vdupq_n_f32(mean ? mean[0] : 0.0f);
float32x4_t vmean1 = vdupq_n_f32(mean ? mean[1] : 0.0f); float32x4_t vmean1 = vdupq_n_f32(mean ? mean[1] : 0.0f);
...@@ -58,8 +61,12 @@ void NHWC3ToNC3HW(const float *src, float *dst, const float *mean, ...@@ -58,8 +61,12 @@ void NHWC3ToNC3HW(const float *src, float *dst, const float *mean,
} }
} }
void NHWC1ToNC1HW(const float *src, float *dst, const float *mean, void NHWC1ToNC1HW(const float *src,
const float *std, int width, int height) { float *dst,
const float *mean,
const float *std,
int width,
int height) {
int size = height * width; int size = height * width;
float32x4_t vmean = vdupq_n_f32(mean ? mean[0] : 0.0f); float32x4_t vmean = vdupq_n_f32(mean ? mean[0] : 0.0f);
float32x4_t vscale = vdupq_n_f32(std ? (1.0f / std[0]) : 1.0f); float32x4_t vscale = vdupq_n_f32(std ? (1.0f / std[0]) : 1.0f);
......
...@@ -14,11 +14,11 @@ ...@@ -14,11 +14,11 @@
#pragma once #pragma once
#include "paddle_api.h"
#include <android/log.h> #include <android/log.h>
#include <fstream> #include <fstream>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle_api.h"
#define TAG "JNI" #define TAG "JNI"
#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, TAG, __VA_ARGS__) #define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, TAG, __VA_ARGS__)
...@@ -85,8 +85,16 @@ inline paddle::lite_api::PowerMode ParsePowerMode(std::string mode) { ...@@ -85,8 +85,16 @@ inline paddle::lite_api::PowerMode ParsePowerMode(std::string mode) {
return paddle::lite_api::LITE_POWER_NO_BIND; return paddle::lite_api::LITE_POWER_NO_BIND;
} }
void NHWC3ToNC3HW(const float *src, float *dst, const float *mean, void NHWC3ToNC3HW(const float *src,
const float *std, int width, int height); float *dst,
const float *mean,
const float *std,
int width,
int height);
void NHWC1ToNC1HW(const float *src, float *dst, const float *mean, void NHWC1ToNC1HW(const float *src,
const float *std, int width, int height); float *dst,
const float *mean,
const float *std,
int width,
int height);
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
#pragma once #pragma once
#include <iostream> #include <iostream>
#include <vector>
#include <string>
#include <map> #include <map>
#include <string>
#include <vector>
#include "yaml-cpp/yaml.h" #include "yaml-cpp/yaml.h"
...@@ -47,8 +47,7 @@ class ConfigPaser { ...@@ -47,8 +47,7 @@ class ConfigPaser {
mode_ = config["mode"].as<std::string>(); mode_ = config["mode"].as<std::string>();
} else { } else {
std::cerr << "Please set mode, " std::cerr << "Please set mode, "
<< "support value : fluid/trt_fp16/trt_fp32." << "support value : fluid/trt_fp16/trt_fp32." << std::endl;
<< std::endl;
return false; return false;
} }
...@@ -110,4 +109,3 @@ class ConfigPaser { ...@@ -110,4 +109,3 @@ class ConfigPaser {
}; };
} // namespace PaddleDetection } // namespace PaddleDetection
...@@ -14,21 +14,20 @@ ...@@ -14,21 +14,20 @@
#pragma once #pragma once
#include <string> #include <ctime>
#include <vector>
#include <memory> #include <memory>
#include <string>
#include <utility> #include <utility>
#include <ctime> #include <vector>
#include <opencv2/core/core.hpp> #include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp> #include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "paddle_inference_api.h" // NOLINT #include "paddle_inference_api.h" // NOLINT
#include "include/preprocess_op.h"
#include "include/config_parser.h" #include "include/config_parser.h"
#include "include/preprocess_op.h"
namespace PaddleDetection { namespace PaddleDetection {
// Object Detection Result // Object Detection Result
...@@ -41,40 +40,42 @@ struct ObjectResult { ...@@ -41,40 +40,42 @@ struct ObjectResult {
float confidence; float confidence;
}; };
// Generate visualization colormap for each class // Generate visualization colormap for each class
std::vector<int> GenerateColorMap(int num_class); std::vector<int> GenerateColorMap(int num_class);
// Visualiztion Detection Result // Visualiztion Detection Result
cv::Mat VisualizeResult(const cv::Mat& img, cv::Mat VisualizeResult(const cv::Mat& img,
const std::vector<ObjectResult>& results, const std::vector<ObjectResult>& results,
const std::vector<std::string>& lable_list, const std::vector<std::string>& lable_list,
const std::vector<int>& colormap); const std::vector<int>& colormap);
class ObjectDetector { class ObjectDetector {
public: public:
explicit ObjectDetector(const std::string& model_dir, explicit ObjectDetector(const std::string& model_dir,
const std::string& device, const std::string& device,
const std::string& run_mode="fluid", const std::string& run_mode = "fluid",
const int gpu_id=0, const int gpu_id = 0,
bool trt_calib_mode=false) { bool trt_calib_mode = false) {
config_.load_config(model_dir); config_.load_config(model_dir);
threshold_ = config_.draw_threshold_; threshold_ = config_.draw_threshold_;
preprocessor_.Init(config_.preprocess_info_, config_.arch_); preprocessor_.Init(config_.preprocess_info_, config_.arch_);
LoadModel(model_dir, device, config_.min_subgraph_size_, 1, run_mode, gpu_id, trt_calib_mode); LoadModel(model_dir,
device,
config_.min_subgraph_size_,
1,
run_mode,
gpu_id,
trt_calib_mode);
} }
// Load Paddle inference model // Load Paddle inference model
void LoadModel( void LoadModel(const std::string& model_dir,
const std::string& model_dir,
const std::string& device, const std::string& device,
const int min_subgraph_size, const int min_subgraph_size,
const int batch_size = 1, const int batch_size = 1,
const std::string& run_mode = "fluid", const std::string& run_mode = "fluid",
const int gpu_id=0, const int gpu_id = 0,
bool trt_calib_mode=false); bool trt_calib_mode = false);
// Run predictor // Run predictor
void Predict(const cv::Mat& im, void Predict(const cv::Mat& im,
...@@ -93,9 +94,7 @@ class ObjectDetector { ...@@ -93,9 +94,7 @@ class ObjectDetector {
// Preprocess image and copy data to input buffer // Preprocess image and copy data to input buffer
void Preprocess(const cv::Mat& image_mat); void Preprocess(const cv::Mat& image_mat);
// Postprocess result // Postprocess result
void Postprocess( void Postprocess(const cv::Mat& raw_mat, std::vector<ObjectResult>* result);
const cv::Mat& raw_mat,
std::vector<ObjectResult>* result);
std::unique_ptr<paddle::PaddlePredictor> predictor_; std::unique_ptr<paddle::PaddlePredictor> predictor_;
Preprocessor preprocessor_; Preprocessor preprocessor_;
......
...@@ -16,15 +16,15 @@ ...@@ -16,15 +16,15 @@
#include <yaml-cpp/yaml.h> #include <yaml-cpp/yaml.h>
#include <vector>
#include <string>
#include <utility>
#include <memory> #include <memory>
#include <string>
#include <unordered_map> #include <unordered_map>
#include <utility>
#include <vector>
#include <opencv2/core/core.hpp> #include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp> #include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
namespace PaddleDetection { namespace PaddleDetection {
...@@ -50,7 +50,7 @@ class PreprocessOp { ...@@ -50,7 +50,7 @@ class PreprocessOp {
virtual void Run(cv::Mat* im, ImageBlob* data) = 0; virtual void Run(cv::Mat* im, ImageBlob* data) = 0;
}; };
class InitInfo : public PreprocessOp{ class InitInfo : public PreprocessOp {
public: public:
virtual void Init(const YAML::Node& item, const std::string& arch) {} virtual void Init(const YAML::Node& item, const std::string& arch) {}
virtual void Run(cv::Mat* im, ImageBlob* data); virtual void Run(cv::Mat* im, ImageBlob* data);
...@@ -166,4 +166,3 @@ class Preprocessor { ...@@ -166,4 +166,3 @@ class Preprocessor {
}; };
} // namespace PaddleDetection } // namespace PaddleDetection
...@@ -14,12 +14,12 @@ ...@@ -14,12 +14,12 @@
#include <glog/logging.h> #include <glog/logging.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <algorithm>
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <vector> #include <vector>
#include <sys/types.h>
#include <sys/stat.h>
#include <algorithm>
#ifdef _WIN32 #ifdef _WIN32
#include <direct.h> #include <direct.h>
...@@ -29,25 +29,35 @@ ...@@ -29,25 +29,35 @@
#include <sys/stat.h> #include <sys/stat.h>
#endif #endif
#include "include/object_detector.h"
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include "include/object_detector.h"
DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_string(model_dir, "", "Path of inference model");
DEFINE_string(image_file, "", "Path of input image"); DEFINE_string(image_file, "", "Path of input image");
DEFINE_string(video_path, "", "Path of input video"); DEFINE_string(video_path, "", "Path of input video");
DEFINE_bool(use_gpu, false, "Deprecated, please use `--device` to set the device you want to run."); DEFINE_bool(
DEFINE_string(device, "CPU", "Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."); use_gpu,
false,
"Deprecated, please use `--device` to set the device you want to run.");
DEFINE_string(device,
"CPU",
"Choose the device you want to run, it can be: CPU/GPU/XPU, "
"default is CPU.");
DEFINE_bool(use_camera, false, "Use camera or not"); DEFINE_bool(use_camera, false, "Use camera or not");
DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16)"); DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16)");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute");
DEFINE_int32(camera_id, -1, "Device id of camera to predict"); DEFINE_int32(camera_id, -1, "Device id of camera to predict");
DEFINE_bool(run_benchmark, false, "Whether to predict a image_file repeatedly for benchmark"); DEFINE_bool(run_benchmark,
false,
"Whether to predict a image_file repeatedly for benchmark");
DEFINE_double(threshold, 0.5, "Threshold of score."); DEFINE_double(threshold, 0.5, "Threshold of score.");
DEFINE_string(output_dir, "output", "Directory of output visualization files."); DEFINE_string(output_dir, "output", "Directory of output visualization files.");
DEFINE_bool(trt_calib_mode, false, "If the model is produced by TRT offline quantitative calibration, trt_calib_mode need to set True"); DEFINE_bool(trt_calib_mode,
false,
"If the model is produced by TRT offline quantitative calibration, "
"trt_calib_mode need to set True");
static std::string DirName(const std::string &filepath) { static std::string DirName(const std::string& filepath) {
auto pos = filepath.rfind(OS_PATH_SEP); auto pos = filepath.rfind(OS_PATH_SEP);
if (pos == std::string::npos) { if (pos == std::string::npos) {
return ""; return "";
...@@ -55,7 +65,7 @@ static std::string DirName(const std::string &filepath) { ...@@ -55,7 +65,7 @@ static std::string DirName(const std::string &filepath) {
return filepath.substr(0, pos); return filepath.substr(0, pos);
} }
static bool PathExists(const std::string& path){ static bool PathExists(const std::string& path) {
#ifdef _WIN32 #ifdef _WIN32
struct _stat buffer; struct _stat buffer;
return (_stat(path.c_str(), &buffer) == 0); return (_stat(path.c_str(), &buffer) == 0);
...@@ -92,9 +102,9 @@ void PredictVideo(const std::string& video_path, ...@@ -92,9 +102,9 @@ void PredictVideo(const std::string& video_path,
PaddleDetection::ObjectDetector* det) { PaddleDetection::ObjectDetector* det) {
// Open video // Open video
cv::VideoCapture capture; cv::VideoCapture capture;
if (FLAGS_camera_id != -1){ if (FLAGS_camera_id != -1) {
capture.open(FLAGS_camera_id); capture.open(FLAGS_camera_id);
}else{ } else {
capture.open(video_path.c_str()); capture.open(video_path.c_str());
} }
if (!capture.isOpened()) { if (!capture.isOpened()) {
...@@ -131,10 +141,12 @@ void PredictVideo(const std::string& video_path, ...@@ -131,10 +141,12 @@ void PredictVideo(const std::string& video_path,
break; break;
} }
det->Predict(frame, 0.5, 0, 1, false, &result); det->Predict(frame, 0.5, 0, 1, false, &result);
cv::Mat out_im = PaddleDetection::VisualizeResult( cv::Mat out_im =
frame, result, labels, colormap); PaddleDetection::VisualizeResult(frame, result, labels, colormap);
for (const auto& item : result) { for (const auto& item : result) {
printf("In frame id %d, we detect: class=%d confidence=%.2f rect=[%d %d %d %d]\n", printf(
"In frame id %d, we detect: class=%d confidence=%.2f rect=[%d %d %d "
"%d]\n",
frame_id, frame_id,
item.class_id, item.class_id,
item.confidence, item.confidence,
...@@ -159,11 +171,9 @@ void PredictImage(const std::string& image_path, ...@@ -159,11 +171,9 @@ void PredictImage(const std::string& image_path,
cv::Mat im = cv::imread(image_path, 1); cv::Mat im = cv::imread(image_path, 1);
// Store all detected result // Store all detected result
std::vector<PaddleDetection::ObjectResult> result; std::vector<PaddleDetection::ObjectResult> result;
if (run_benchmark) if (run_benchmark) {
{
det->Predict(im, threshold, 100, 100, run_benchmark, &result); det->Predict(im, threshold, 100, 100, run_benchmark, &result);
}else } else {
{
det->Predict(im, 0.5, 0, 1, run_benchmark, &result); det->Predict(im, 0.5, 0, 1, run_benchmark, &result);
for (const auto& item : result) { for (const auto& item : result) {
printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n", printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n",
...@@ -177,8 +187,8 @@ void PredictImage(const std::string& image_path, ...@@ -177,8 +187,8 @@ void PredictImage(const std::string& image_path,
// Visualization result // Visualization result
auto labels = det->GetLabelList(); auto labels = det->GetLabelList();
auto colormap = PaddleDetection::GenerateColorMap(labels.size()); auto colormap = PaddleDetection::GenerateColorMap(labels.size());
cv::Mat vis_img = PaddleDetection::VisualizeResult( cv::Mat vis_img =
im, result, labels, colormap); PaddleDetection::VisualizeResult(im, result, labels, colormap);
std::vector<int> compression_params; std::vector<int> compression_params;
compression_params.push_back(CV_IMWRITE_JPEG_QUALITY); compression_params.push_back(CV_IMWRITE_JPEG_QUALITY);
compression_params.push_back(95); compression_params.push_back(95);
...@@ -195,30 +205,39 @@ void PredictImage(const std::string& image_path, ...@@ -195,30 +205,39 @@ void PredictImage(const std::string& image_path,
int main(int argc, char** argv) { int main(int argc, char** argv) {
// Parsing command-line // Parsing command-line
google::ParseCommandLineFlags(&argc, &argv, true); google::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir.empty() if (FLAGS_model_dir.empty() ||
|| (FLAGS_image_file.empty() && FLAGS_video_path.empty())) { (FLAGS_image_file.empty() && FLAGS_video_path.empty())) {
std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ " std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ "
<< "--image_file=/PATH/TO/INPUT/IMAGE/" << std::endl; << "--image_file=/PATH/TO/INPUT/IMAGE/" << std::endl;
return -1; return -1;
} }
if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" ||
|| FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) { FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) {
std::cout << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; std::cout
<< "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
return -1; return -1;
} }
transform(FLAGS_device.begin(),FLAGS_device.end(),FLAGS_device.begin(),::toupper); transform(FLAGS_device.begin(),
if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || FLAGS_device == "XPU")) { FLAGS_device.end(),
FLAGS_device.begin(),
::toupper);
if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" ||
FLAGS_device == "XPU")) {
std::cout << "device should be 'CPU', 'GPU' or 'XPU'."; std::cout << "device should be 'CPU', 'GPU' or 'XPU'.";
return -1; return -1;
} }
if (FLAGS_use_gpu) { if (FLAGS_use_gpu) {
std::cout << "Deprecated, please use `--device` to set the device you want to run."; std::cout << "Deprecated, please use `--device` to set the device you want "
"to run.";
return -1; return -1;
} }
// Load model and create a object detector // Load model and create a object detector
PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_device, PaddleDetection::ObjectDetector det(FLAGS_model_dir,
FLAGS_run_mode, FLAGS_gpu_id, FLAGS_trt_calib_mode); FLAGS_device,
FLAGS_run_mode,
FLAGS_gpu_id,
FLAGS_trt_calib_mode);
// Do inference on input video or image // Do inference on input video or image
if (!FLAGS_video_path.empty() || FLAGS_use_camera) { if (!FLAGS_video_path.empty() || FLAGS_use_camera) {
PredictVideo(FLAGS_video_path, &det); PredictVideo(FLAGS_video_path, &det);
...@@ -226,7 +245,11 @@ int main(int argc, char** argv) { ...@@ -226,7 +245,11 @@ int main(int argc, char** argv) {
if (!PathExists(FLAGS_output_dir)) { if (!PathExists(FLAGS_output_dir)) {
MkDirs(FLAGS_output_dir); MkDirs(FLAGS_output_dir);
} }
PredictImage(FLAGS_image_file, FLAGS_threshold, FLAGS_run_benchmark, &det, FLAGS_output_dir); PredictImage(FLAGS_image_file,
FLAGS_threshold,
FLAGS_run_benchmark,
&det,
FLAGS_output_dir);
} }
return 0; return 0;
} }
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
// limitations under the License. // limitations under the License.
#include <sstream> #include <sstream>
// for setprecision // for setprecision
#include <iomanip>
#include <chrono> #include <chrono>
#include <iomanip>
#include "include/object_detector.h" #include "include/object_detector.h"
namespace PaddleDetection { namespace PaddleDetection {
...@@ -41,18 +41,18 @@ void ObjectDetector::LoadModel(const std::string& model_dir, ...@@ -41,18 +41,18 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
} else if (run_mode == "trt_int8") { } else if (run_mode == "trt_int8") {
precision = paddle::AnalysisConfig::Precision::kInt8; precision = paddle::AnalysisConfig::Precision::kInt8;
} else { } else {
printf("run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'"); printf(
"run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'");
} }
config.EnableTensorRtEngine( config.EnableTensorRtEngine(1 << 10,
1 << 10,
batch_size, batch_size,
min_subgraph_size, min_subgraph_size,
precision, precision,
false, false,
trt_calib_mode); trt_calib_mode);
} }
} else if (device == "XPU"){ } else if (device == "XPU") {
config.EnableXpu(10*1024*1024); config.EnableXpu(10 * 1024 * 1024);
} else { } else {
config.DisableGpu(); config.DisableGpu();
} }
...@@ -88,11 +88,8 @@ cv::Mat VisualizeResult(const cv::Mat& img, ...@@ -88,11 +88,8 @@ cv::Mat VisualizeResult(const cv::Mat& img,
int font_face = cv::FONT_HERSHEY_COMPLEX_SMALL; int font_face = cv::FONT_HERSHEY_COMPLEX_SMALL;
double font_scale = 0.5f; double font_scale = 0.5f;
float thickness = 0.5; float thickness = 0.5;
cv::Size text_size = cv::getTextSize(text, cv::Size text_size =
font_face, cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
font_scale,
thickness,
nullptr);
cv::Point origin; cv::Point origin;
origin.x = roi.x; origin.x = roi.x;
origin.y = roi.y; origin.y = roi.y;
...@@ -124,8 +121,7 @@ void ObjectDetector::Preprocess(const cv::Mat& ori_im) { ...@@ -124,8 +121,7 @@ void ObjectDetector::Preprocess(const cv::Mat& ori_im) {
preprocessor_.Run(&im, &inputs_); preprocessor_.Run(&im, &inputs_);
} }
void ObjectDetector::Postprocess( void ObjectDetector::Postprocess(const cv::Mat& raw_mat,
const cv::Mat& raw_mat,
std::vector<ObjectResult>* result) { std::vector<ObjectResult>* result) {
result->clear(); result->clear();
int rh = 1; int rh = 1;
...@@ -189,8 +185,7 @@ void ObjectDetector::Predict(const cv::Mat& im, ...@@ -189,8 +185,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
} }
} }
// Run predictor // Run predictor
for (int i = 0; i < warmup; i++) for (int i = 0; i < warmup; i++) {
{
predictor_->ZeroCopyRun(); predictor_->ZeroCopyRun();
// Get output tensor // Get output tensor
auto output_names = predictor_->GetOutputNames(); auto output_names = predictor_->GetOutputNames();
...@@ -210,8 +205,7 @@ void ObjectDetector::Predict(const cv::Mat& im, ...@@ -210,8 +205,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
} }
auto start = std::chrono::steady_clock::now(); auto start = std::chrono::steady_clock::now();
for (int i = 0; i < repeats; i++) for (int i = 0; i < repeats; i++) {
{
predictor_->ZeroCopyRun(); predictor_->ZeroCopyRun();
// Get output tensor // Get output tensor
auto output_names = predictor_->GetOutputNames(); auto output_names = predictor_->GetOutputNames();
...@@ -234,7 +228,7 @@ void ObjectDetector::Predict(const cv::Mat& im, ...@@ -234,7 +228,7 @@ void ObjectDetector::Predict(const cv::Mat& im,
float ms = diff.count() / repeats * 1000; float ms = diff.count() / repeats * 1000;
printf("Inference: %f ms per batch image\n", ms); printf("Inference: %f ms per batch image\n", ms);
// Postprocessing result // Postprocessing result
if(!run_benchmark) { if (!run_benchmark) {
Postprocess(im, result); Postprocess(im, result);
} }
} }
......
...@@ -12,28 +12,19 @@ ...@@ -12,28 +12,19 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <vector>
#include <string> #include <string>
#include <vector>
#include "include/preprocess_op.h" #include "include/preprocess_op.h"
namespace PaddleDetection { namespace PaddleDetection {
void InitInfo::Run(cv::Mat* im, ImageBlob* data) { void InitInfo::Run(cv::Mat* im, ImageBlob* data) {
data->ori_im_size_ = { data->ori_im_size_ = {static_cast<int>(im->rows), static_cast<int>(im->cols)};
static_cast<int>(im->rows),
static_cast<int>(im->cols)
};
data->ori_im_size_f_ = { data->ori_im_size_f_ = {
static_cast<float>(im->rows), static_cast<float>(im->rows), static_cast<float>(im->cols), 1.0};
static_cast<float>(im->cols),
1.0
};
data->eval_im_size_f_ = { data->eval_im_size_f_ = {
static_cast<float>(im->rows), static_cast<float>(im->rows), static_cast<float>(im->cols), 1.0};
static_cast<float>(im->cols),
1.0
};
data->scale_factor_f_ = {1., 1., 1., 1.}; data->scale_factor_f_ = {1., 1., 1., 1.};
} }
...@@ -46,11 +37,11 @@ void Normalize::Run(cv::Mat* im, ImageBlob* data) { ...@@ -46,11 +37,11 @@ void Normalize::Run(cv::Mat* im, ImageBlob* data) {
for (int h = 0; h < im->rows; h++) { for (int h = 0; h < im->rows; h++) {
for (int w = 0; w < im->cols; w++) { for (int w = 0; w < im->cols; w++) {
im->at<cv::Vec3f>(h, w)[0] = im->at<cv::Vec3f>(h, w)[0] =
(im->at<cv::Vec3f>(h, w)[0] - mean_[0] ) / scale_[0]; (im->at<cv::Vec3f>(h, w)[0] - mean_[0]) / scale_[0];
im->at<cv::Vec3f>(h, w)[1] = im->at<cv::Vec3f>(h, w)[1] =
(im->at<cv::Vec3f>(h, w)[1] - mean_[1] ) / scale_[1]; (im->at<cv::Vec3f>(h, w)[1] - mean_[1]) / scale_[1];
im->at<cv::Vec3f>(h, w)[2] = im->at<cv::Vec3f>(h, w)[2] =
(im->at<cv::Vec3f>(h, w)[2] - mean_[2] ) / scale_[2]; (im->at<cv::Vec3f>(h, w)[2] - mean_[2]) / scale_[2];
} }
} }
} }
...@@ -63,7 +54,8 @@ void Permute::Run(cv::Mat* im, ImageBlob* data) { ...@@ -63,7 +54,8 @@ void Permute::Run(cv::Mat* im, ImageBlob* data) {
float* base = (data->im_data_).data(); float* base = (data->im_data_).data();
for (int i = 0; i < rc; ++i) { for (int i = 0; i < rc; ++i) {
int cur_c = to_bgr_ ? rc - i - 1 : i; int cur_c = to_bgr_ ? rc - i - 1 : i;
cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, base + cur_c * rh * rw), i); cv::extractChannel(
*im, cv::Mat(rh, rw, CV_32FC1, base + cur_c * rh * rw), i);
} }
} }
...@@ -73,8 +65,7 @@ void Resize::Run(cv::Mat* im, ImageBlob* data) { ...@@ -73,8 +65,7 @@ void Resize::Run(cv::Mat* im, ImageBlob* data) {
*im, *im, cv::Size(), resize_scale.first, resize_scale.second, interp_); *im, *im, cv::Size(), resize_scale.first, resize_scale.second, interp_);
if (max_size_ != 0 && !image_shape_.empty()) { if (max_size_ != 0 && !image_shape_.empty()) {
// Padding the image with 0 border // Padding the image with 0 border
cv::copyMakeBorder( cv::copyMakeBorder(*im,
*im,
*im, *im,
0, 0,
max_size_ - im->rows, max_size_ - im->rows,
...@@ -83,17 +74,13 @@ void Resize::Run(cv::Mat* im, ImageBlob* data) { ...@@ -83,17 +74,13 @@ void Resize::Run(cv::Mat* im, ImageBlob* data) {
cv::BORDER_CONSTANT, cv::BORDER_CONSTANT,
cv::Scalar(0)); cv::Scalar(0));
} }
data->eval_im_size_f_ = { data->eval_im_size_f_ = {static_cast<float>(im->rows),
static_cast<float>(im->rows),
static_cast<float>(im->cols), static_cast<float>(im->cols),
resize_scale.first resize_scale.first};
}; data->scale_factor_f_ = {resize_scale.first,
data->scale_factor_f_ = {
resize_scale.first,
resize_scale.second, resize_scale.second,
resize_scale.first, resize_scale.first,
resize_scale.second resize_scale.second};
};
} }
std::pair<float, float> Resize::GenerateScale(const cv::Mat& im) { std::pair<float, float> Resize::GenerateScale(const cv::Mat& im) {
...@@ -132,23 +119,14 @@ void PadStride::Run(cv::Mat* im, ImageBlob* data) { ...@@ -132,23 +119,14 @@ void PadStride::Run(cv::Mat* im, ImageBlob* data) {
int nh = (rh / stride_) * stride_ + (rh % stride_ != 0) * stride_; int nh = (rh / stride_) * stride_ + (rh % stride_ != 0) * stride_;
int nw = (rw / stride_) * stride_ + (rw % stride_ != 0) * stride_; int nw = (rw / stride_) * stride_ + (rw % stride_ != 0) * stride_;
cv::copyMakeBorder( cv::copyMakeBorder(
*im, *im, *im, 0, nh - rh, 0, nw - rw, cv::BORDER_CONSTANT, cv::Scalar(0));
*im,
0,
nh - rh,
0,
nw - rw,
cv::BORDER_CONSTANT,
cv::Scalar(0));
(data->eval_im_size_f_)[0] = static_cast<float>(im->rows); (data->eval_im_size_f_)[0] = static_cast<float>(im->rows);
(data->eval_im_size_f_)[1] = static_cast<float>(im->cols); (data->eval_im_size_f_)[1] = static_cast<float>(im->cols);
} }
// Preprocessor op running order // Preprocessor op running order
const std::vector<std::string> Preprocessor::RUN_ORDER = { const std::vector<std::string> Preprocessor::RUN_ORDER = {
"InitInfo", "Resize", "Normalize", "PadStride", "Permute" "InitInfo", "Resize", "Normalize", "PadStride", "Permute"};
};
void Preprocessor::Run(cv::Mat* im, ImageBlob* data) { void Preprocessor::Run(cv::Mat* im, ImageBlob* data) {
for (const auto& name : RUN_ORDER) { for (const auto& name : RUN_ORDER) {
......
...@@ -12,17 +12,16 @@ ...@@ -12,17 +12,16 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <chrono>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <vector>
#include <chrono>
#include <numeric> #include <numeric>
#include <vector>
#include "opencv2/core.hpp" #include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp" #include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp" #include "opencv2/imgproc.hpp"
#include "paddle_api.h" // NOLINT #include "paddle_api.h" // NOLINT
using namespace paddle::lite_api; // NOLINT using namespace paddle::lite_api; // NOLINT
using namespace std; using namespace std;
...@@ -57,13 +56,14 @@ void PrintBenchmarkLog(std::vector<double> det_time, ...@@ -57,13 +56,14 @@ void PrintBenchmarkLog(std::vector<double> det_time,
std::cout << "---------------- Perf info ---------------------" << std::endl; std::cout << "---------------- Perf info ---------------------" << std::endl;
std::cout << "Total number of predicted data: " << img_num std::cout << "Total number of predicted data: " << img_num
<< " and total time spent(s): " << " and total time spent(s): "
<< std::accumulate(det_time.begin(), det_time.end(), 0) << std::endl; << std::accumulate(det_time.begin(), det_time.end(), 0)
<< std::endl;
std::cout << "preproce_time(ms): " << det_time[0] / img_num std::cout << "preproce_time(ms): " << det_time[0] / img_num
<< ", inference_time(ms): " << det_time[1] / img_num << ", inference_time(ms): " << det_time[1] / img_num
<< ", postprocess_time(ms): " << det_time[2] << std::endl; << ", postprocess_time(ms): " << det_time[2] << std::endl;
} }
std::vector<std::string> LoadLabels(const std::string &path) { std::vector<std::string> LoadLabels(const std::string& path) {
std::ifstream file; std::ifstream file;
std::vector<std::string> labels; std::vector<std::string> labels;
file.open(path); file.open(path);
...@@ -96,18 +96,17 @@ std::vector<std::string> ReadDict(std::string path) { ...@@ -96,18 +96,17 @@ std::vector<std::string> ReadDict(std::string path) {
return m_vec; return m_vec;
} }
std::vector<std::string> split(const std::string &str, std::vector<std::string> split(const std::string& str,
const std::string &delim) { const std::string& delim) {
std::vector<std::string> res; std::vector<std::string> res;
if ("" == str) if ("" == str) return res;
return res; char* strs = new char[str.length() + 1];
char *strs = new char[str.length() + 1];
std::strcpy(strs, str.c_str()); std::strcpy(strs, str.c_str());
char *d = new char[delim.length() + 1]; char* d = new char[delim.length() + 1];
std::strcpy(d, delim.c_str()); std::strcpy(d, delim.c_str());
char *p = std::strtok(strs, d); char* p = std::strtok(strs, d);
while (p) { while (p) {
string s = p; string s = p;
res.push_back(s); res.push_back(s);
...@@ -128,7 +127,7 @@ std::map<std::string, std::string> LoadConfigTxt(std::string config_path) { ...@@ -128,7 +127,7 @@ std::map<std::string, std::string> LoadConfigTxt(std::string config_path) {
return dict; return dict;
} }
void PrintConfig(const std::map<std::string, std::string> &config) { void PrintConfig(const std::map<std::string, std::string>& config) {
std::cout << "=======PaddleDetection lite demo config======" << std::endl; std::cout << "=======PaddleDetection lite demo config======" << std::endl;
for (auto iter = config.begin(); iter != config.end(); iter++) { for (auto iter = config.begin(); iter != config.end(); iter++) {
std::cout << iter->first << " : " << iter->second << std::endl; std::cout << iter->first << " : " << iter->second << std::endl;
...@@ -136,7 +135,6 @@ void PrintConfig(const std::map<std::string, std::string> &config) { ...@@ -136,7 +135,6 @@ void PrintConfig(const std::map<std::string, std::string> &config) {
std::cout << "===End of PaddleDetection lite demo config===" << std::endl; std::cout << "===End of PaddleDetection lite demo config===" << std::endl;
} }
// fill tensor with mean and scale and trans layout: nhwc -> nchw, neon speed up // fill tensor with mean and scale and trans layout: nhwc -> nchw, neon speed up
void neon_mean_scale(const float* din, void neon_mean_scale(const float* din,
float* dout, float* dout,
...@@ -186,7 +184,7 @@ std::vector<Object> visualize_result( ...@@ -186,7 +184,7 @@ std::vector<Object> visualize_result(
int count, int count,
float thresh, float thresh,
cv::Mat& image, cv::Mat& image,
const std::vector<std::string> &class_names) { const std::vector<std::string>& class_names) {
if (data == nullptr) { if (data == nullptr) {
std::cerr << "[ERROR] data can not be nullptr\n"; std::cerr << "[ERROR] data can not be nullptr\n";
exit(1); exit(1);
...@@ -258,54 +256,59 @@ std::shared_ptr<PaddlePredictor> LoadModel(std::string model_file, ...@@ -258,54 +256,59 @@ std::shared_ptr<PaddlePredictor> LoadModel(std::string model_file,
} }
ImageBlob prepare_imgdata(const cv::Mat& img, ImageBlob prepare_imgdata(const cv::Mat& img,
std::map<std::string, std::map<std::string, std::string> config) {
std::string> config) {
ImageBlob img_data; ImageBlob img_data;
std::vector<int> target_size_; std::vector<int> target_size_;
std::vector<std::string> size_str = split(config.at("Resize"), ","); std::vector<std::string> size_str = split(config.at("Resize"), ",");
transform(size_str.begin(), size_str.end(), back_inserter(target_size_), transform(size_str.begin(),
[](std::string const& s){return stoi(s);}); size_str.end(),
back_inserter(target_size_),
[](std::string const& s) { return stoi(s); });
int width = target_size_[0]; int width = target_size_[0];
int height = target_size_[1]; int height = target_size_[1];
img_data.im_shape_ = { img_data.im_shape_ = {static_cast<int>(target_size_[0]),
static_cast<int>(target_size_[0]), static_cast<int>(target_size_[1])};
static_cast<int>(target_size_[1])
};
std::vector<float> mean_; std::vector<float> mean_;
std::vector<float> scale_; std::vector<float> scale_;
std::vector<std::string> mean_str = split(config.at("mean"), ","); std::vector<std::string> mean_str = split(config.at("mean"), ",");
std::vector<std::string> std_str = split(config.at("std"), ","); std::vector<std::string> std_str = split(config.at("std"), ",");
transform(mean_str.begin(), mean_str.end(), back_inserter(mean_), transform(mean_str.begin(),
[](std::string const& s){return stof(s);}); mean_str.end(),
transform(std_str.begin(), std_str.end(), back_inserter(scale_), back_inserter(mean_),
[](std::string const& s){return stof(s);}); [](std::string const& s) { return stof(s); });
transform(std_str.begin(),
std_str.end(),
back_inserter(scale_),
[](std::string const& s) { return stof(s); });
img_data.mean_ = mean_; img_data.mean_ = mean_;
img_data.scale_ = scale_; img_data.scale_ = scale_;
return img_data; return img_data;
} }
void preprocess(const cv::Mat& img, const ImageBlob img_data, float* data) { void preprocess(const cv::Mat& img, const ImageBlob img_data, float* data) {
cv::Mat rgb_img; cv::Mat rgb_img;
cv::cvtColor(img, rgb_img, cv::COLOR_BGR2RGB); cv::cvtColor(img, rgb_img, cv::COLOR_BGR2RGB);
cv::resize( cv::resize(rgb_img,
rgb_img, rgb_img, cv::Size(img_data.im_shape_[0],img_data.im_shape_[1]), rgb_img,
0.f, 0.f, cv::INTER_CUBIC); cv::Size(img_data.im_shape_[0], img_data.im_shape_[1]),
0.f,
0.f,
cv::INTER_CUBIC);
cv::Mat imgf; cv::Mat imgf;
rgb_img.convertTo(imgf, CV_32FC3, 1 / 255.f); rgb_img.convertTo(imgf, CV_32FC3, 1 / 255.f);
const float* dimg = reinterpret_cast<const float*>(imgf.data); const float* dimg = reinterpret_cast<const float*>(imgf.data);
neon_mean_scale( neon_mean_scale(dimg,
dimg, data, int(img_data.im_shape_[0] * img_data.im_shape_[1]), data,
img_data.mean_, img_data.scale_); int(img_data.im_shape_[0] * img_data.im_shape_[1]),
img_data.mean_,
img_data.scale_);
} }
void RunModel(std::map<std::string, std::string> config, void RunModel(std::map<std::string, std::string> config,
std::string img_path, std::string img_path,
const int repeats, const int repeats,
std::vector<double>* times) { std::vector<double>* times) {
std::string model_file = config.at("model_file"); std::string model_file = config.at("model_file");
std::string label_path = config.at("label_path"); std::string label_path = config.at("label_path");
// Load Labels // Load Labels
...@@ -334,14 +337,12 @@ void RunModel(std::map<std::string, std::string> config, ...@@ -334,14 +337,12 @@ void RunModel(std::map<std::string, std::string> config,
// 2. Run predictor // 2. Run predictor
// warm up // warm up
for (int i = 0; i < repeats / 2; i++) for (int i = 0; i < repeats / 2; i++) {
{
predictor->Run(); predictor->Run();
} }
auto inference_start = std::chrono::steady_clock::now(); auto inference_start = std::chrono::steady_clock::now();
for (int i = 0; i < repeats; i++) for (int i = 0; i < repeats; i++) {
{
predictor->Run(); predictor->Run();
} }
auto inference_end = std::chrono::steady_clock::now(); auto inference_end = std::chrono::steady_clock::now();
......
...@@ -530,7 +530,7 @@ def predict_video(detector, camera_id): ...@@ -530,7 +530,7 @@ def predict_video(detector, camera_id):
fps = 30 fps = 30
width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v') fourcc = cv2.VideoWriter_fourcc(* 'mp4v')
if not os.path.exists(FLAGS.output_dir): if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir) os.makedirs(FLAGS.output_dir)
out_path = os.path.join(FLAGS.output_dir, video_name) out_path = os.path.join(FLAGS.output_dir, video_name)
...@@ -660,6 +660,8 @@ if __name__ == '__main__': ...@@ -660,6 +660,8 @@ if __name__ == '__main__':
assert FLAGS.device in ['CPU', 'GPU', 'XPU' assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU" ], "device should be CPU, GPU or XPU"
assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device" assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
assert not (FLAGS.enable_mkldnn==False and FLAGS.enable_mkldnn_bfloat16==True),"To turn on mkldnn_bfloat, please set both enable_mkldnn and enable_mkldnn_bfloat16 True" assert not (
FLAGS.enable_mkldnn == False and FLAGS.enable_mkldnn_bfloat16 == True
), "To turn on mkldnn_bfloat, please set both enable_mkldnn and enable_mkldnn_bfloat16 True"
main() main()
...@@ -18,7 +18,7 @@ namespace operators { ...@@ -18,7 +18,7 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
class BottomPoolOp : public framework::OperatorWithKernel { class BottomPoolOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
...@@ -27,7 +27,7 @@ public: ...@@ -27,7 +27,7 @@ public:
ctx->ShareDim("X", /*->*/ "Output"); ctx->ShareDim("X", /*->*/ "Output");
} }
protected: protected:
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(ctx.Input<Tensor>("X")->type(), return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
...@@ -36,10 +36,9 @@ protected: ...@@ -36,10 +36,9 @@ protected:
}; };
class BottomPoolOpMaker : public framework::OpProtoAndCheckerMaker { class BottomPoolOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", AddInput("X", "Input with shape (batch, C, H, W)");
"Input with shape (batch, C, H, W)");
AddOutput("MaxMap", "Max map with index of maximum value of input"); AddOutput("MaxMap", "Max map with index of maximum value of input");
AddOutput("Output", "output with same shape as input(X)"); AddOutput("Output", "output with same shape as input(X)");
AddComment( AddComment(
...@@ -52,10 +51,10 @@ The output has the same shape with input. ...@@ -52,10 +51,10 @@ The output has the same shape with input.
}; };
class BottomPoolOpGrad : public framework::OperatorWithKernel { class BottomPoolOpGrad : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput("MaxMap"), "Input(MaxMap) should not be null"); PADDLE_ENFORCE(ctx->HasInput("MaxMap"), "Input(MaxMap) should not be null");
...@@ -75,10 +74,10 @@ protected: ...@@ -75,10 +74,10 @@ protected:
template <typename T> template <typename T>
class BottomPoolGradDescMaker : public framework::SingleGradOpMaker<T> { class BottomPoolGradDescMaker : public framework::SingleGradOpMaker<T> {
public: public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker; using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected: protected:
void Apply(GradOpPtr<T> op) const override { void Apply(GradOpPtr<T> op) const override {
op->SetType("bottom_pool_grad"); op->SetType("bottom_pool_grad");
op->SetInput("X", this->Input("X")); op->SetInput("X", this->Input("X"));
......
...@@ -11,10 +11,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -11,10 +11,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/memory/memory.h"
#include <vector> #include "paddle/fluid/platform/cuda_primitives.h"
#include "util.cu.h" #include "util.cu.h"
namespace paddle { namespace paddle {
...@@ -32,14 +32,14 @@ static inline int NumBlocks(const int N) { ...@@ -32,14 +32,14 @@ static inline int NumBlocks(const int N) {
template <typename T> template <typename T>
class BottomPoolOpCUDAKernel : public framework::OpKernel<T> { class BottomPoolOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"This kernel only runs on GPU device."); "This kernel only runs on GPU device.");
auto *x = ctx.Input<Tensor>("X"); auto* x = ctx.Input<Tensor>("X");
auto *max_map = ctx.Output<Tensor>("MaxMap"); auto* max_map = ctx.Output<Tensor>("MaxMap");
auto *output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
auto *x_data = x->data<T>(); auto* x_data = x->data<T>();
auto x_dims = x->dims(); auto x_dims = x->dims();
int NC_num = x_dims[0] * x_dims[1]; int NC_num = x_dims[0] * x_dims[1];
int height = x_dims[2]; int height = x_dims[2];
...@@ -47,8 +47,8 @@ public: ...@@ -47,8 +47,8 @@ public:
int num = x->numel(); int num = x->numel();
auto& dev_ctx = ctx.cuda_device_context(); auto& dev_ctx = ctx.cuda_device_context();
int *max_map_data = max_map->mutable_data<int>(x_dims, dev_ctx.GetPlace()); int* max_map_data = max_map->mutable_data<int>(x_dims, dev_ctx.GetPlace());
T *output_data = output->mutable_data<T>(x_dims, dev_ctx.GetPlace()); T* output_data = output->mutable_data<T>(x_dims, dev_ctx.GetPlace());
auto gpu_place = boost::get<platform::CUDAPlace>(dev_ctx.GetPlace()); auto gpu_place = boost::get<platform::CUDAPlace>(dev_ctx.GetPlace());
int threads = kNumCUDAThreads; int threads = kNumCUDAThreads;
...@@ -59,10 +59,19 @@ public: ...@@ -59,10 +59,19 @@ public:
auto max_ind_ptr = memory::Alloc(gpu_place, num / height * sizeof(int)); auto max_ind_ptr = memory::Alloc(gpu_place, num / height * sizeof(int));
int* max_ind_data = reinterpret_cast<int*>(max_ind_ptr->ptr()); int* max_ind_data = reinterpret_cast<int*>(max_ind_ptr->ptr());
GetMaxInfo<T><<<blocks, threads, 0, dev_ctx.stream()>>>(x->data<T>(), NC_num, height, width, 2, false, max_val_data, max_ind_data, max_map_data); GetMaxInfo<T><<<blocks, threads, 0, dev_ctx.stream()>>>(x->data<T>(),
NC_num,
height,
width,
2,
false,
max_val_data,
max_ind_data,
max_map_data);
blocks = NumBlocks(num); blocks = NumBlocks(num);
ScatterAddFw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(x->data<T>(), max_map_data, NC_num, height, width, 2, output_data); ScatterAddFw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
x->data<T>(), max_map_data, NC_num, height, width, 2, output_data);
} }
}; };
...@@ -86,9 +95,17 @@ class BottomPoolGradOpCUDAKernel : public framework::OpKernel<T> { ...@@ -86,9 +95,17 @@ class BottomPoolGradOpCUDAKernel : public framework::OpKernel<T> {
int width = x_dims[3]; int width = x_dims[3];
int grad_num = in_grad->numel(); int grad_num = in_grad->numel();
int blocks = NumBlocks(grad_num); int blocks = NumBlocks(grad_num);
FillConstant<T><<<blocks, threads, 0, dev_ctx.stream()>>>(in_grad_data, 0, grad_num); FillConstant<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
in_grad_data, 0, grad_num);
ScatterAddBw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(out_grad->data<T>(), max_map->data<int>(), NC_num, height, width, 2, in_grad_data);
ScatterAddBw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
out_grad->data<T>(),
max_map->data<int>(),
NC_num,
height,
width,
2,
in_grad_data);
} }
}; };
......
...@@ -18,7 +18,7 @@ namespace operators { ...@@ -18,7 +18,7 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
class LeftPoolOp : public framework::OperatorWithKernel { class LeftPoolOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
...@@ -27,7 +27,7 @@ public: ...@@ -27,7 +27,7 @@ public:
ctx->ShareDim("X", /*->*/ "Output"); ctx->ShareDim("X", /*->*/ "Output");
} }
protected: protected:
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(ctx.Input<Tensor>("X")->type(), return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
...@@ -36,10 +36,9 @@ protected: ...@@ -36,10 +36,9 @@ protected:
}; };
class LeftPoolOpMaker : public framework::OpProtoAndCheckerMaker { class LeftPoolOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", AddInput("X", "Input with shape (batch, C, H, W)");
"Input with shape (batch, C, H, W)");
AddOutput("MaxMap", "Max map with index of maximum value of input"); AddOutput("MaxMap", "Max map with index of maximum value of input");
AddOutput("Output", "output with same shape as input(X)"); AddOutput("Output", "output with same shape as input(X)");
AddComment( AddComment(
...@@ -52,10 +51,10 @@ The output has the same shape with input. ...@@ -52,10 +51,10 @@ The output has the same shape with input.
}; };
class LeftPoolOpGrad : public framework::OperatorWithKernel { class LeftPoolOpGrad : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput("MaxMap"), "Input(MaxMap) should not be null"); PADDLE_ENFORCE(ctx->HasInput("MaxMap"), "Input(MaxMap) should not be null");
...@@ -75,10 +74,10 @@ protected: ...@@ -75,10 +74,10 @@ protected:
template <typename T> template <typename T>
class LeftPoolGradDescMaker : public framework::SingleGradOpMaker<T> { class LeftPoolGradDescMaker : public framework::SingleGradOpMaker<T> {
public: public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker; using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected: protected:
void Apply(GradOpPtr<T> op) const override { void Apply(GradOpPtr<T> op) const override {
op->SetType("left_pool_grad"); op->SetType("left_pool_grad");
op->SetInput("X", this->Input("X")); op->SetInput("X", this->Input("X"));
......
...@@ -11,10 +11,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -11,10 +11,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/memory/memory.h"
#include <vector> #include "paddle/fluid/platform/cuda_primitives.h"
#include "util.cu.h" #include "util.cu.h"
namespace paddle { namespace paddle {
...@@ -32,14 +32,14 @@ static inline int NumBlocks(const int N) { ...@@ -32,14 +32,14 @@ static inline int NumBlocks(const int N) {
template <typename T> template <typename T>
class LeftPoolOpCUDAKernel : public framework::OpKernel<T> { class LeftPoolOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"This kernel only runs on GPU device."); "This kernel only runs on GPU device.");
auto *x = ctx.Input<Tensor>("X"); auto* x = ctx.Input<Tensor>("X");
auto *max_map = ctx.Output<Tensor>("MaxMap"); auto* max_map = ctx.Output<Tensor>("MaxMap");
auto *output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
auto *x_data = x->data<T>(); auto* x_data = x->data<T>();
auto x_dims = x->dims(); auto x_dims = x->dims();
int NC_num = x_dims[0] * x_dims[1]; int NC_num = x_dims[0] * x_dims[1];
int height = x_dims[2]; int height = x_dims[2];
...@@ -47,8 +47,8 @@ public: ...@@ -47,8 +47,8 @@ public:
int num = x->numel(); int num = x->numel();
auto& dev_ctx = ctx.cuda_device_context(); auto& dev_ctx = ctx.cuda_device_context();
int *max_map_data = max_map->mutable_data<int>(x_dims, dev_ctx.GetPlace()); int* max_map_data = max_map->mutable_data<int>(x_dims, dev_ctx.GetPlace());
T *output_data = output->mutable_data<T>(x_dims, dev_ctx.GetPlace()); T* output_data = output->mutable_data<T>(x_dims, dev_ctx.GetPlace());
auto gpu_place = boost::get<platform::CUDAPlace>(dev_ctx.GetPlace()); auto gpu_place = boost::get<platform::CUDAPlace>(dev_ctx.GetPlace());
int threads = kNumCUDAThreads; int threads = kNumCUDAThreads;
...@@ -59,11 +59,19 @@ public: ...@@ -59,11 +59,19 @@ public:
auto max_ind_ptr = memory::Alloc(gpu_place, num / width * sizeof(int)); auto max_ind_ptr = memory::Alloc(gpu_place, num / width * sizeof(int));
int* max_ind_data = reinterpret_cast<int*>(max_ind_ptr->ptr()); int* max_ind_data = reinterpret_cast<int*>(max_ind_ptr->ptr());
GetMaxInfo<T><<<blocks, threads, 0, dev_ctx.stream()>>>(x->data<T>(), NC_num, height, width, 3, true, max_val_data, max_ind_data, max_map_data); GetMaxInfo<T><<<blocks, threads, 0, dev_ctx.stream()>>>(x->data<T>(),
NC_num,
height,
width,
3,
true,
max_val_data,
max_ind_data,
max_map_data);
blocks = NumBlocks(num); blocks = NumBlocks(num);
ScatterAddFw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(x->data<T>(), max_map_data, NC_num, height, width, 3, output_data); ScatterAddFw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
x->data<T>(), max_map_data, NC_num, height, width, 3, output_data);
} }
}; };
...@@ -87,13 +95,20 @@ class LeftPoolGradOpCUDAKernel : public framework::OpKernel<T> { ...@@ -87,13 +95,20 @@ class LeftPoolGradOpCUDAKernel : public framework::OpKernel<T> {
int width = x_dims[3]; int width = x_dims[3];
int grad_num = in_grad->numel(); int grad_num = in_grad->numel();
int blocks = NumBlocks(grad_num); int blocks = NumBlocks(grad_num);
FillConstant<T><<<blocks, threads, 0, dev_ctx.stream()>>>(in_grad_data, 0, grad_num); FillConstant<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
in_grad_data, 0, grad_num);
ScatterAddBw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(out_grad->data<T>(), max_map->data<int>(), NC_num, height, width, 3, in_grad_data);
ScatterAddBw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
out_grad->data<T>(),
max_map->data<int>(),
NC_num,
height,
width,
3,
in_grad_data);
} }
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
......
...@@ -18,7 +18,7 @@ namespace operators { ...@@ -18,7 +18,7 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
class RightPoolOp : public framework::OperatorWithKernel { class RightPoolOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
...@@ -27,7 +27,7 @@ public: ...@@ -27,7 +27,7 @@ public:
ctx->ShareDim("X", /*->*/ "Output"); ctx->ShareDim("X", /*->*/ "Output");
} }
protected: protected:
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(ctx.Input<Tensor>("X")->type(), return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
...@@ -36,10 +36,9 @@ protected: ...@@ -36,10 +36,9 @@ protected:
}; };
class RightPoolOpMaker : public framework::OpProtoAndCheckerMaker { class RightPoolOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", AddInput("X", "Input with shape (batch, C, H, W)");
"Input with shape (batch, C, H, W)");
AddOutput("MaxMap", "Max map with index of maximum value of input"); AddOutput("MaxMap", "Max map with index of maximum value of input");
AddOutput("Output", "output with same shape as input(X)"); AddOutput("Output", "output with same shape as input(X)");
AddComment( AddComment(
...@@ -52,10 +51,10 @@ The output has the same shape with input. ...@@ -52,10 +51,10 @@ The output has the same shape with input.
}; };
class RightPoolOpGrad : public framework::OperatorWithKernel { class RightPoolOpGrad : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput("MaxMap"), "Input(MaxMap) should not be null"); PADDLE_ENFORCE(ctx->HasInput("MaxMap"), "Input(MaxMap) should not be null");
...@@ -75,10 +74,10 @@ protected: ...@@ -75,10 +74,10 @@ protected:
template <typename T> template <typename T>
class RightPoolGradDescMaker : public framework::SingleGradOpMaker<T> { class RightPoolGradDescMaker : public framework::SingleGradOpMaker<T> {
public: public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker; using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected: protected:
void Apply(GradOpPtr<T> op) const override { void Apply(GradOpPtr<T> op) const override {
op->SetType("right_pool_grad"); op->SetType("right_pool_grad");
op->SetInput("X", this->Input("X")); op->SetInput("X", this->Input("X"));
......
...@@ -11,10 +11,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -11,10 +11,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/memory/memory.h"
#include <vector> #include "paddle/fluid/platform/cuda_primitives.h"
#include "util.cu.h" #include "util.cu.h"
namespace paddle { namespace paddle {
...@@ -32,14 +32,14 @@ static inline int NumBlocks(const int N) { ...@@ -32,14 +32,14 @@ static inline int NumBlocks(const int N) {
template <typename T> template <typename T>
class RightPoolOpCUDAKernel : public framework::OpKernel<T> { class RightPoolOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"This kernel only runs on GPU device."); "This kernel only runs on GPU device.");
auto *x = ctx.Input<Tensor>("X"); auto* x = ctx.Input<Tensor>("X");
auto *max_map = ctx.Output<Tensor>("MaxMap"); auto* max_map = ctx.Output<Tensor>("MaxMap");
auto *output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
auto *x_data = x->data<T>(); auto* x_data = x->data<T>();
auto x_dims = x->dims(); auto x_dims = x->dims();
int NC_num = x_dims[0] * x_dims[1]; int NC_num = x_dims[0] * x_dims[1];
int height = x_dims[2]; int height = x_dims[2];
...@@ -47,8 +47,8 @@ public: ...@@ -47,8 +47,8 @@ public:
int num = x->numel(); int num = x->numel();
auto& dev_ctx = ctx.cuda_device_context(); auto& dev_ctx = ctx.cuda_device_context();
int *max_map_data = max_map->mutable_data<int>(x_dims, dev_ctx.GetPlace()); int* max_map_data = max_map->mutable_data<int>(x_dims, dev_ctx.GetPlace());
T *output_data = output->mutable_data<T>(x_dims, dev_ctx.GetPlace()); T* output_data = output->mutable_data<T>(x_dims, dev_ctx.GetPlace());
auto gpu_place = boost::get<platform::CUDAPlace>(dev_ctx.GetPlace()); auto gpu_place = boost::get<platform::CUDAPlace>(dev_ctx.GetPlace());
int threads = kNumCUDAThreads; int threads = kNumCUDAThreads;
...@@ -59,11 +59,19 @@ public: ...@@ -59,11 +59,19 @@ public:
auto max_ind_ptr = memory::Alloc(gpu_place, num / width * sizeof(int)); auto max_ind_ptr = memory::Alloc(gpu_place, num / width * sizeof(int));
int* max_ind_data = reinterpret_cast<int*>(max_ind_ptr->ptr()); int* max_ind_data = reinterpret_cast<int*>(max_ind_ptr->ptr());
GetMaxInfo<T><<<blocks, threads, 0, dev_ctx.stream()>>>(x->data<T>(), NC_num, height, width, 3, false, max_val_data, max_ind_data, max_map_data); GetMaxInfo<T><<<blocks, threads, 0, dev_ctx.stream()>>>(x->data<T>(),
NC_num,
height,
width,
3,
false,
max_val_data,
max_ind_data,
max_map_data);
blocks = NumBlocks(num); blocks = NumBlocks(num);
ScatterAddFw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(x->data<T>(), max_map_data, NC_num, height, width, 3, output_data); ScatterAddFw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
x->data<T>(), max_map_data, NC_num, height, width, 3, output_data);
} }
}; };
...@@ -87,9 +95,17 @@ class RightPoolGradOpCUDAKernel : public framework::OpKernel<T> { ...@@ -87,9 +95,17 @@ class RightPoolGradOpCUDAKernel : public framework::OpKernel<T> {
int width = x_dims[3]; int width = x_dims[3];
int grad_num = in_grad->numel(); int grad_num = in_grad->numel();
int blocks = NumBlocks(grad_num); int blocks = NumBlocks(grad_num);
FillConstant<T><<<blocks, threads, 0, dev_ctx.stream()>>>(in_grad_data, 0, grad_num); FillConstant<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
in_grad_data, 0, grad_num);
ScatterAddBw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(out_grad->data<T>(), max_map->data<int>(), NC_num, height, width, 3, in_grad_data);
ScatterAddBw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
out_grad->data<T>(),
max_map->data<int>(),
NC_num,
height,
width,
3,
in_grad_data);
} }
}; };
......
...@@ -18,7 +18,7 @@ namespace operators { ...@@ -18,7 +18,7 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
class TopPoolOp : public framework::OperatorWithKernel { class TopPoolOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
...@@ -27,7 +27,7 @@ public: ...@@ -27,7 +27,7 @@ public:
ctx->ShareDim("X", /*->*/ "Output"); ctx->ShareDim("X", /*->*/ "Output");
} }
protected: protected:
framework::OpKernelType GetExpectedKernelType( framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(ctx.Input<Tensor>("X")->type(), return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
...@@ -36,10 +36,9 @@ protected: ...@@ -36,10 +36,9 @@ protected:
}; };
class TopPoolOpMaker : public framework::OpProtoAndCheckerMaker { class TopPoolOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", AddInput("X", "Input with shape (batch, C, H, W)");
"Input with shape (batch, C, H, W)");
AddOutput("MaxMap", "Max map with index of maximum value of input"); AddOutput("MaxMap", "Max map with index of maximum value of input");
AddOutput("Output", "Output with same shape as input(X)"); AddOutput("Output", "Output with same shape as input(X)");
AddComment( AddComment(
...@@ -52,10 +51,10 @@ The output has the same shape with input. ...@@ -52,10 +51,10 @@ The output has the same shape with input.
}; };
class TopPoolOpGrad : public framework::OperatorWithKernel { class TopPoolOpGrad : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
protected: protected:
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput("MaxMap"), "Input(MaxMap) should not be null"); PADDLE_ENFORCE(ctx->HasInput("MaxMap"), "Input(MaxMap) should not be null");
......
...@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/memory/memory.h"
#include <vector> #include "paddle/fluid/platform/cuda_primitives.h"
#include "util.cu.h" #include "util.cu.h"
namespace paddle { namespace paddle {
...@@ -33,14 +33,14 @@ static inline int NumBlocks(const int N) { ...@@ -33,14 +33,14 @@ static inline int NumBlocks(const int N) {
template <typename T> template <typename T>
class TopPoolOpCUDAKernel : public framework::OpKernel<T> { class TopPoolOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"This kernel only runs on GPU device."); "This kernel only runs on GPU device.");
auto *x = ctx.Input<Tensor>("X"); auto* x = ctx.Input<Tensor>("X");
auto *max_map = ctx.Output<Tensor>("MaxMap"); auto* max_map = ctx.Output<Tensor>("MaxMap");
auto *output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
auto *x_data = x->data<T>(); auto* x_data = x->data<T>();
auto x_dims = x->dims(); auto x_dims = x->dims();
int NC_num = x_dims[0] * x_dims[1]; int NC_num = x_dims[0] * x_dims[1];
int height = x_dims[2]; int height = x_dims[2];
...@@ -48,8 +48,8 @@ public: ...@@ -48,8 +48,8 @@ public:
int num = x->numel(); int num = x->numel();
auto& dev_ctx = ctx.cuda_device_context(); auto& dev_ctx = ctx.cuda_device_context();
int *max_map_data = max_map->mutable_data<int>(x_dims, dev_ctx.GetPlace()); int* max_map_data = max_map->mutable_data<int>(x_dims, dev_ctx.GetPlace());
T *output_data = output->mutable_data<T>(x_dims, dev_ctx.GetPlace()); T* output_data = output->mutable_data<T>(x_dims, dev_ctx.GetPlace());
auto gpu_place = boost::get<platform::CUDAPlace>(dev_ctx.GetPlace()); auto gpu_place = boost::get<platform::CUDAPlace>(dev_ctx.GetPlace());
int threads = kNumCUDAThreads; int threads = kNumCUDAThreads;
...@@ -60,10 +60,19 @@ public: ...@@ -60,10 +60,19 @@ public:
auto max_ind_ptr = memory::Alloc(gpu_place, num / height * sizeof(int)); auto max_ind_ptr = memory::Alloc(gpu_place, num / height * sizeof(int));
int* max_ind_data = reinterpret_cast<int*>(max_ind_ptr->ptr()); int* max_ind_data = reinterpret_cast<int*>(max_ind_ptr->ptr());
GetMaxInfo<T><<<blocks, threads, 0, dev_ctx.stream()>>>(x->data<T>(), NC_num, height, width, 2, true, max_val_data, max_ind_data, max_map_data); GetMaxInfo<T><<<blocks, threads, 0, dev_ctx.stream()>>>(x->data<T>(),
NC_num,
height,
width,
2,
true,
max_val_data,
max_ind_data,
max_map_data);
blocks = NumBlocks(num); blocks = NumBlocks(num);
ScatterAddFw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(x->data<T>(), max_map_data, NC_num, height, width, 2, output_data); ScatterAddFw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
x->data<T>(), max_map_data, NC_num, height, width, 2, output_data);
} }
}; };
...@@ -86,9 +95,17 @@ class TopPoolGradOpCUDAKernel : public framework::OpKernel<T> { ...@@ -86,9 +95,17 @@ class TopPoolGradOpCUDAKernel : public framework::OpKernel<T> {
int width = x_dims[3]; int width = x_dims[3];
int grad_num = in_grad->numel(); int grad_num = in_grad->numel();
int blocks = NumBlocks(grad_num); int blocks = NumBlocks(grad_num);
FillConstant<T><<<blocks, threads, 0, dev_ctx.stream()>>>(in_grad_data, 0, grad_num); FillConstant<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
in_grad_data, 0, grad_num);
ScatterAddBw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(out_grad->data<T>(), max_map->data<int>(), NC_num, height, width, 2, in_grad_data);
ScatterAddBw<T><<<blocks, threads, 0, dev_ctx.stream()>>>(
out_grad->data<T>(),
max_map->data<int>(),
NC_num,
height,
width,
2,
in_grad_data);
} }
}; };
......
...@@ -11,10 +11,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -11,10 +11,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/memory/memory.h"
#include <vector> #include "paddle/fluid/platform/cuda_primitives.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -27,14 +27,17 @@ using framework::Tensor; ...@@ -27,14 +27,17 @@ using framework::Tensor;
template <typename T> template <typename T>
__global__ void FillConstant(T* x, int num, int fill_num) { __global__ void FillConstant(T* x, int num, int fill_num) {
CUDA_1D_KERNEL_LOOP(i, fill_num) { CUDA_1D_KERNEL_LOOP(i, fill_num) { x[i] = static_cast<T>(num); }
x[i] = static_cast<T>(num);
}
} }
template <typename T> template <typename T>
__global__ void SliceOnAxis(const T* x, const int NC_num, const int H, const int W, __global__ void SliceOnAxis(const T* x,
const int axis, const int start, const int end, const int NC_num,
const int H,
const int W,
const int axis,
const int start,
const int end,
T* output) { T* output) {
int HW_num = H * W; int HW_num = H * W;
int length = axis == 2 ? W : H; int length = axis == 2 ? W : H;
...@@ -44,7 +47,7 @@ __global__ void SliceOnAxis(const T* x, const int NC_num, const int H, const int ...@@ -44,7 +47,7 @@ __global__ void SliceOnAxis(const T* x, const int NC_num, const int H, const int
CUDA_1D_KERNEL_LOOP(i, NC_num * cur_HW_num) { CUDA_1D_KERNEL_LOOP(i, NC_num * cur_HW_num) {
int NC_id = i / cur_HW_num; int NC_id = i / cur_HW_num;
int HW_id = i % cur_HW_num; int HW_id = i % cur_HW_num;
if (axis == 2){ if (axis == 2) {
output[i] = x[NC_id * HW_num + start * W + HW_id]; output[i] = x[NC_id * HW_num + start * W + HW_id];
} else if (axis == 3) { } else if (axis == 3) {
int col = HW_id % sliced_len; int col = HW_id % sliced_len;
...@@ -55,9 +58,15 @@ __global__ void SliceOnAxis(const T* x, const int NC_num, const int H, const int ...@@ -55,9 +58,15 @@ __global__ void SliceOnAxis(const T* x, const int NC_num, const int H, const int
} }
template <typename T> template <typename T>
__global__ void MaxOut(const T* input, const int next_ind, const int NC_num, __global__ void MaxOut(const T* input,
const int H, const int W, const int axis, const int next_ind,
const int start, const int end, T* output) { const int NC_num,
const int H,
const int W,
const int axis,
const int start,
const int end,
T* output) {
int HW_num = H * W; int HW_num = H * W;
int length = axis == 2 ? W : H; int length = axis == 2 ? W : H;
T cur = static_cast<T>(0.); T cur = static_cast<T>(0.);
...@@ -70,7 +79,7 @@ __global__ void MaxOut(const T* input, const int next_ind, const int NC_num, ...@@ -70,7 +79,7 @@ __global__ void MaxOut(const T* input, const int next_ind, const int NC_num,
int NC_id = i / cur_HW_num; int NC_id = i / cur_HW_num;
int HW_id = i % cur_HW_num; int HW_id = i % cur_HW_num;
if (axis == 2){ if (axis == 2) {
cur = input[NC_id * HW_num + start * W + HW_id]; cur = input[NC_id * HW_num + start * W + HW_id];
next = input[NC_id * HW_num + next_ind * W + HW_id]; next = input[NC_id * HW_num + next_ind * W + HW_id];
max_v = cur > next ? cur : next; max_v = cur > next ? cur : next;
...@@ -88,9 +97,14 @@ __global__ void MaxOut(const T* input, const int next_ind, const int NC_num, ...@@ -88,9 +97,14 @@ __global__ void MaxOut(const T* input, const int next_ind, const int NC_num,
} }
template <typename T> template <typename T>
__global__ void UpdateMaxInfo(const T* input, const int NC_num, __global__ void UpdateMaxInfo(const T* input,
const int H, const int W, const int axis, const int NC_num,
const int index, T* max_val, int* max_ind) { const int H,
const int W,
const int axis,
const int index,
T* max_val,
int* max_ind) {
int length = axis == 2 ? W : H; int length = axis == 2 ? W : H;
int HW_num = H * W; int HW_num = H * W;
T val = static_cast<T>(0.); T val = static_cast<T>(0.);
...@@ -111,7 +125,14 @@ __global__ void UpdateMaxInfo(const T* input, const int NC_num, ...@@ -111,7 +125,14 @@ __global__ void UpdateMaxInfo(const T* input, const int NC_num,
} }
template <typename T> template <typename T>
__global__ void ScatterAddOnAxis(const T* input, const int start, const int* max_ind, const int NC_num, const int H, const int W, const int axis, T* output) { __global__ void ScatterAddOnAxis(const T* input,
const int start,
const int* max_ind,
const int NC_num,
const int H,
const int W,
const int axis,
T* output) {
int length = axis == 2 ? W : H; int length = axis == 2 ? W : H;
int HW_num = H * W; int HW_num = H * W;
CUDA_1D_KERNEL_LOOP(i, NC_num * length) { CUDA_1D_KERNEL_LOOP(i, NC_num * length) {
...@@ -119,37 +140,46 @@ __global__ void ScatterAddOnAxis(const T* input, const int start, const int* ma ...@@ -119,37 +140,46 @@ __global__ void ScatterAddOnAxis(const T* input, const int start, const int* ma
int length_id = i % length; int length_id = i % length;
int id_ = max_ind[i]; int id_ = max_ind[i];
if (axis == 2) { if (axis == 2) {
platform::CudaAtomicAdd(output + NC_id * HW_num + id_ * W + length_id, input[NC_id * HW_num + start * W + length_id]); platform::CudaAtomicAdd(output + NC_id * HW_num + id_ * W + length_id,
//output[NC_id * HW_num + id_ * W + length_id] += input[NC_id * HW_num + start * W + length_id]; input[NC_id * HW_num + start * W + length_id]);
// output[NC_id * HW_num + id_ * W + length_id] += input[NC_id * HW_num +
// start * W + length_id];
} else if (axis == 3) { } else if (axis == 3) {
platform::CudaAtomicAdd(output + NC_id * HW_num + length_id * W + id_, input[NC_id * HW_num + length_id * W + start]); platform::CudaAtomicAdd(output + NC_id * HW_num + length_id * W + id_,
//output[NC_id * HW_num + length_id * W + id_] += input[NC_id * HW_num + length_id * W + start]; input[NC_id * HW_num + length_id * W + start]);
// output[NC_id * HW_num + length_id * W + id_] += input[NC_id * HW_num +
// length_id * W + start];
} }
__syncthreads(); __syncthreads();
} }
} }
template <typename T> template <typename T>
__global__ void GetMaxInfo(const T* input, const int NC_num, __global__ void GetMaxInfo(const T* input,
const int H, const int W, const int axis, const int NC_num,
const bool reverse, T* max_val, int* max_ind, const int H,
const int W,
const int axis,
const bool reverse,
T* max_val,
int* max_ind,
int* max_map) { int* max_map) {
int start = 0; int start = 0;
int end = axis == 2 ? H: W; int end = axis == 2 ? H : W;
int s = reverse ? end-1 : start; int s = reverse ? end - 1 : start;
int e = reverse ? start-1 : end; int e = reverse ? start - 1 : end;
int step = reverse ? -1 : 1; int step = reverse ? -1 : 1;
int len = axis == 2 ? W : H; int len = axis == 2 ? W : H;
int loc = 0; int loc = 0;
T val = static_cast<T>(0.); T val = static_cast<T>(0.);
for (int i = s; ; ) { for (int i = s;;) {
if (i == s) { if (i == s) {
CUDA_1D_KERNEL_LOOP(j, NC_num * len) { CUDA_1D_KERNEL_LOOP(j, NC_num * len) {
int NC_id = j / len; int NC_id = j / len;
int len_id = j % len; int len_id = j % len;
if (axis == 2) { if (axis == 2) {
loc = NC_id * H * W + i * W + len_id; loc = NC_id * H * W + i * W + len_id;
} else if (axis == 3){ } else if (axis == 3) {
loc = NC_id * H * W + len_id * W + i; loc = NC_id * H * W + len_id * W + i;
} }
max_ind[j] = i; max_ind[j] = i;
...@@ -164,7 +194,7 @@ __global__ void GetMaxInfo(const T* input, const int NC_num, ...@@ -164,7 +194,7 @@ __global__ void GetMaxInfo(const T* input, const int NC_num,
if (axis == 2) { if (axis == 2) {
loc = NC_id * H * W + i * W + len_id; loc = NC_id * H * W + i * W + len_id;
} else if (axis == 3){ } else if (axis == 3) {
loc = NC_id * H * W + len_id * W + i; loc = NC_id * H * W + len_id * W + i;
} }
val = input[loc]; val = input[loc];
...@@ -186,7 +216,13 @@ __global__ void GetMaxInfo(const T* input, const int NC_num, ...@@ -186,7 +216,13 @@ __global__ void GetMaxInfo(const T* input, const int NC_num,
} }
template <typename T> template <typename T>
__global__ void ScatterAddFw(const T* input, const int* max_map, const int NC_num, const int H, const int W, const int axis, T* output){ __global__ void ScatterAddFw(const T* input,
const int* max_map,
const int NC_num,
const int H,
const int W,
const int axis,
T* output) {
CUDA_1D_KERNEL_LOOP(i, NC_num * H * W) { CUDA_1D_KERNEL_LOOP(i, NC_num * H * W) {
int loc = max_map[i]; int loc = max_map[i];
int NC_id = i / (H * W); int NC_id = i / (H * W);
...@@ -202,7 +238,13 @@ __global__ void ScatterAddFw(const T* input, const int* max_map, const int NC_nu ...@@ -202,7 +238,13 @@ __global__ void ScatterAddFw(const T* input, const int* max_map, const int NC_nu
} }
template <typename T> template <typename T>
__global__ void ScatterAddBw(const T* input, const int* max_map, const int NC_num, const int H, const int W, const int axis, T* output){ __global__ void ScatterAddBw(const T* input,
const int* max_map,
const int NC_num,
const int H,
const int W,
const int axis,
T* output) {
CUDA_1D_KERNEL_LOOP(i, NC_num * H * W) { CUDA_1D_KERNEL_LOOP(i, NC_num * H * W) {
int loc = max_map[i]; int loc = max_map[i];
int NC_id = i / (H * W); int NC_id = i / (H * W);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册