Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Pinoxchio
apollo
提交
87b045f3
A
apollo
项目概览
Pinoxchio
/
apollo
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
A
apollo
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
87b045f3
编写于
4月 10, 2018
作者:
G
ghdawn
提交者:
Jiangtao Hu
4月 10, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
use caffe::SyncedMemory
上级
9330295a
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
25 addition
and
207 deletion
+25
-207
modules/perception/cuda_util/region_output.cu
modules/perception/cuda_util/region_output.cu
+1
-1
modules/perception/cuda_util/region_output.h
modules/perception/cuda_util/region_output.h
+2
-2
modules/perception/cuda_util/util.cc
modules/perception/cuda_util/util.cc
+0
-125
modules/perception/cuda_util/util.cu
modules/perception/cuda_util/util.cu
+4
-4
modules/perception/cuda_util/util.h
modules/perception/cuda_util/util.h
+2
-59
modules/perception/obstacle/camera/detector/yolo_camera_detector/yolo_camera_detector.cc
...era/detector/yolo_camera_detector/yolo_camera_detector.cc
+10
-10
modules/perception/obstacle/camera/detector/yolo_camera_detector/yolo_camera_detector.h
...mera/detector/yolo_camera_detector/yolo_camera_detector.h
+6
-6
未找到文件。
modules/perception/cuda_util/region_output.cu
浏览文件 @
87b045f3
...
...
@@ -100,7 +100,7 @@ void compute_overlapped_by_idx_gpu(const int nthreads,
void
apply_nms_gpu
(
const
float
*
bbox_data
,
const
float
*
conf_data
,
const
int
num_bboxes
,
const
float
confidence_threshold
,
const
int
top_k
,
const
float
nms_threshold
,
std
::
vector
<
int
>
*
indices
,
std
::
shared_ptr
<
SyncedMemory
>
overlapped
,
std
::
shared_ptr
<
SyncedMemory
>
idx_sm
)
{
std
::
shared_ptr
<
caffe
::
SyncedMemory
>
overlapped
,
std
::
shared_ptr
<
caffe
::
SyncedMemory
>
idx_sm
)
{
// Keep part of detections whose scores are higher than confidence threshold.
cudaDeviceSynchronize
();
std
::
vector
<
int
>
idx
;
...
...
modules/perception/cuda_util/region_output.h
浏览文件 @
87b045f3
...
...
@@ -112,8 +112,8 @@ void apply_nms_gpu(const float *bbox_data, const float *conf_data,
const
int
num_bboxes
,
const
float
confidence_threshold
,
const
int
top_k
,
const
float
nms_threshold
,
std
::
vector
<
int
>
*
indices
,
std
::
shared_ptr
<
SyncedMemory
>
overlappe
,
std
::
shared_ptr
<
SyncedMemory
>
idx_sm
);
std
::
shared_ptr
<
caffe
::
SyncedMemory
>
overlappe
,
std
::
shared_ptr
<
caffe
::
SyncedMemory
>
idx_sm
);
void
compute_overlapped_by_idx_gpu
(
const
int
nthreads
,
const
float
*
bbox_data
,
const
float
overlap_threshold
,
const
int
*
idx
,
const
int
num_idx
,
...
...
modules/perception/cuda_util/util.cc
浏览文件 @
87b045f3
...
...
@@ -19,131 +19,6 @@
namespace
apollo
{
namespace
perception
{
SyncedMemory
::~
SyncedMemory
()
{
if
(
cpu_ptr_
&&
own_cpu_data_
)
{
PerceptionFreeHost
(
cpu_ptr_
,
cpu_malloc_use_cuda_
);
}
if
(
gpu_ptr_
&&
own_gpu_data_
)
{
int
initial_device
=
-
1
;
cudaGetDevice
(
&
initial_device
);
if
(
gpu_device_
!=
-
1
)
{
CUDA_CHECK
(
cudaSetDevice
(
gpu_device_
));
}
CUDA_CHECK
(
cudaFree
(
gpu_ptr_
));
cudaSetDevice
(
initial_device
);
}
}
inline
void
SyncedMemory
::
to_cpu
()
{
switch
(
head_
)
{
case
UNINITIALIZED
:
PerceptionMallocHost
(
&
cpu_ptr_
,
size_
,
&
cpu_malloc_use_cuda_
);
perception_memset
(
size_
,
0
,
cpu_ptr_
);
head_
=
HEAD_AT_CPU
;
own_cpu_data_
=
true
;
break
;
case
HEAD_AT_GPU
:
if
(
cpu_ptr_
==
NULL
)
{
PerceptionMallocHost
(
&
cpu_ptr_
,
size_
,
&
cpu_malloc_use_cuda_
);
own_cpu_data_
=
true
;
}
gpu_memcpy
(
size_
,
gpu_ptr_
,
cpu_ptr_
);
head_
=
SYNCED
;
break
;
case
HEAD_AT_CPU
:
case
SYNCED
:
break
;
}
}
inline
void
SyncedMemory
::
to_gpu
()
{
switch
(
head_
)
{
case
UNINITIALIZED
:
CUDA_CHECK
(
cudaGetDevice
(
&
gpu_device_
));
CUDA_CHECK
(
cudaMalloc
(
&
gpu_ptr_
,
size_
));
perception_gpu_memset
(
size_
,
0
,
gpu_ptr_
);
head_
=
HEAD_AT_GPU
;
own_gpu_data_
=
true
;
break
;
case
HEAD_AT_CPU
:
if
(
gpu_ptr_
==
NULL
)
{
CUDA_CHECK
(
cudaGetDevice
(
&
gpu_device_
));
CUDA_CHECK
(
cudaMalloc
(
&
gpu_ptr_
,
size_
));
own_gpu_data_
=
true
;
}
gpu_memcpy
(
size_
,
cpu_ptr_
,
gpu_ptr_
);
head_
=
SYNCED
;
break
;
case
HEAD_AT_GPU
:
case
SYNCED
:
break
;
}
}
const
void
*
SyncedMemory
::
cpu_data
()
{
to_cpu
();
return
(
const
void
*
)
cpu_ptr_
;
}
void
SyncedMemory
::
set_cpu_data
(
void
*
data
)
{
if
(
data
==
nullptr
)
{
return
;
}
if
(
own_cpu_data_
)
{
PerceptionFreeHost
(
cpu_ptr_
,
cpu_malloc_use_cuda_
);
}
cpu_ptr_
=
data
;
head_
=
HEAD_AT_CPU
;
own_cpu_data_
=
false
;
}
const
void
*
SyncedMemory
::
gpu_data
()
{
to_gpu
();
return
(
const
void
*
)
gpu_ptr_
;
}
void
SyncedMemory
::
set_gpu_data
(
void
*
data
)
{
if
(
data
==
nullptr
)
{
return
;
}
if
(
own_gpu_data_
)
{
int
initial_device
=
-
1
;
cudaGetDevice
(
&
initial_device
);
if
(
gpu_device_
!=
-
1
)
{
CUDA_CHECK
(
cudaSetDevice
(
gpu_device_
));
}
CUDA_CHECK
(
cudaFree
(
gpu_ptr_
));
cudaSetDevice
(
initial_device
);
}
gpu_ptr_
=
data
;
head_
=
HEAD_AT_GPU
;
own_gpu_data_
=
false
;
}
void
*
SyncedMemory
::
mutable_cpu_data
()
{
to_cpu
();
head_
=
HEAD_AT_CPU
;
return
cpu_ptr_
;
}
void
*
SyncedMemory
::
mutable_gpu_data
()
{
to_gpu
();
head_
=
HEAD_AT_GPU
;
return
gpu_ptr_
;
}
void
SyncedMemory
::
async_gpu_push
(
const
cudaStream_t
&
stream
)
{
CHECK
(
head_
==
HEAD_AT_CPU
);
if
(
gpu_ptr_
==
NULL
)
{
CUDA_CHECK
(
cudaGetDevice
(
&
gpu_device_
));
CUDA_CHECK
(
cudaMalloc
(
&
gpu_ptr_
,
size_
));
own_gpu_data_
=
true
;
}
const
cudaMemcpyKind
put
=
cudaMemcpyHostToDevice
;
CUDA_CHECK
(
cudaMemcpyAsync
(
gpu_ptr_
,
cpu_ptr_
,
size_
,
put
,
stream
));
// Assume caller will synchronize on the stream before use
head_
=
SYNCED
;
}
}
// namespace perception
}
// namespace apollo
modules/perception/cuda_util/util.cu
浏览文件 @
87b045f3
...
...
@@ -126,7 +126,7 @@ void gpu_memcpy(const size_t N, const void *X, void *Y) {
}
}
void
resize
(
cv
::
Mat
frame
,
caffe
::
Blob
<
float
>
*
dst
,
std
::
shared_ptr
<
SyncedMemory
>
src_gpu
,
void
resize
(
cv
::
Mat
frame
,
caffe
::
Blob
<
float
>
*
dst
,
std
::
shared_ptr
<
caffe
::
SyncedMemory
>
src_gpu
,
int
start_axis
)
{
int
origin_width
=
frame
.
cols
;
int
origin_height
=
frame
.
rows
;
...
...
@@ -140,7 +140,7 @@ void resize(cv::Mat frame, caffe::Blob<float> *dst, std::shared_ptr <SyncedMemor
const
dim3
grid
(
divup
(
width
,
block
.
x
),
divup
(
height
,
block
.
y
));
if
(
src_gpu
==
nullptr
)
{
src_gpu
.
reset
(
new
SyncedMemory
(
origin_width
*
origin_height
*
channel
*
sizeof
(
unsigned
char
)));
new
caffe
::
SyncedMemory
(
origin_width
*
origin_height
*
channel
*
sizeof
(
unsigned
char
)));
}
src_gpu
->
set_cpu_data
(
frame
.
data
);
resize_linear_kernel
<<
<
grid
,
block
>>
>
((
const
unsigned
char
*
)
src_gpu
->
gpu_data
(),
dst
...
...
@@ -148,7 +148,7 @@ void resize(cv::Mat frame, caffe::Blob<float> *dst, std::shared_ptr <SyncedMemor
}
void
resize
(
cv
::
Mat
frame
,
caffe
::
Blob
<
float
>
*
dst
,
std
::
shared_ptr
<
SyncedMemory
>
src_gpu
,
void
resize
(
cv
::
Mat
frame
,
caffe
::
Blob
<
float
>
*
dst
,
std
::
shared_ptr
<
caffe
::
SyncedMemory
>
src_gpu
,
int
start_axis
,
const
float
mean_b
,
const
float
mean_g
,
const
float
mean_r
,
const
float
scale
)
{
int
origin_width
=
frame
.
cols
;
...
...
@@ -163,7 +163,7 @@ void resize(cv::Mat frame, caffe::Blob<float> *dst, std::shared_ptr <SyncedMemor
const
dim3
grid
(
divup
(
width
,
block
.
x
),
divup
(
height
,
block
.
y
));
if
(
src_gpu
==
nullptr
)
{
src_gpu
.
reset
(
new
SyncedMemory
(
origin_width
*
origin_height
*
channel
*
sizeof
(
unsigned
char
)));
new
caffe
::
SyncedMemory
(
origin_width
*
origin_height
*
channel
*
sizeof
(
unsigned
char
)));
}
src_gpu
->
set_cpu_data
(
frame
.
data
);
resize_linear_with_mean_scale_kernel
<<
<
grid
,
block
>>
>
((
const
unsigned
char
*
)
src_gpu
...
...
modules/perception/cuda_util/util.h
浏览文件 @
87b045f3
...
...
@@ -55,71 +55,14 @@ inline void PerceptionFreeHost(void *ptr, bool use_cuda) {
return
;
}
class
SyncedMemory
{
public:
SyncedMemory
()
:
cpu_ptr_
(
NULL
),
gpu_ptr_
(
NULL
),
size_
(
0
),
head_
(
UNINITIALIZED
),
own_cpu_data_
(
false
),
cpu_malloc_use_cuda_
(
false
),
own_gpu_data_
(
false
),
gpu_device_
(
-
1
)
{}
explicit
SyncedMemory
(
size_t
size
)
:
cpu_ptr_
(
NULL
),
gpu_ptr_
(
NULL
),
size_
(
size
),
head_
(
UNINITIALIZED
),
own_cpu_data_
(
false
),
cpu_malloc_use_cuda_
(
false
),
own_gpu_data_
(
false
),
gpu_device_
(
-
1
)
{}
~
SyncedMemory
();
const
void
*
cpu_data
();
void
set_cpu_data
(
void
*
data
);
const
void
*
gpu_data
();
void
set_gpu_data
(
void
*
data
);
void
*
mutable_cpu_data
();
void
*
mutable_gpu_data
();
enum
SyncedHead
{
UNINITIALIZED
,
HEAD_AT_CPU
,
HEAD_AT_GPU
,
SYNCED
};
SyncedHead
head
()
{
return
head_
;
}
size_t
size
()
{
return
size_
;
}
void
async_gpu_push
(
const
cudaStream_t
&
stream
);
private:
void
to_cpu
();
void
to_gpu
();
void
*
cpu_ptr_
;
void
*
gpu_ptr_
;
size_t
size_
;
SyncedHead
head_
;
bool
own_cpu_data_
;
bool
cpu_malloc_use_cuda_
;
bool
own_gpu_data_
;
int
gpu_device_
;
DISABLE_COPY_AND_ASSIGN
(
SyncedMemory
);
};
// class SyncedMemory
int
divup
(
int
a
,
int
b
);
void
resize
(
cv
::
Mat
frame
,
caffe
::
Blob
<
float
>
*
dst
,
std
::
shared_ptr
<
SyncedMemory
>
src_gpu
,
int
start_axis
);
std
::
shared_ptr
<
caffe
::
SyncedMemory
>
src_gpu
,
int
start_axis
);
// resize with mean and scale
void
resize
(
cv
::
Mat
frame
,
caffe
::
Blob
<
float
>
*
dst
,
std
::
shared_ptr
<
SyncedMemory
>
src_gpu
,
int
start_axis
,
std
::
shared_ptr
<
caffe
::
SyncedMemory
>
src_gpu
,
int
start_axis
,
const
float
mean_b
,
const
float
mean_g
,
const
float
mean_r
,
const
float
scale
);
}
// namespace perception
...
...
modules/perception/obstacle/camera/detector/yolo_camera_detector/yolo_camera_detector.cc
浏览文件 @
87b045f3
...
...
@@ -66,7 +66,7 @@ void YoloCameraDetector::init_anchor(const string &yolo_root) {
yolo
::
load_anchors
(
anchors_file
,
&
anchors
);
num_anchors_
=
anchors
.
size
()
/
2
;
obj_size_
=
output_height_
*
output_width_
*
anchors
.
size
()
/
2
;
anchor_
.
reset
(
new
SyncedMemory
(
anchors
.
size
()
*
sizeof
(
float
)));
anchor_
.
reset
(
new
caffe
::
SyncedMemory
(
anchors
.
size
()
*
sizeof
(
float
)));
auto
anchor_cpu_data
=
anchor_
->
mutable_cpu_data
();
memcpy
(
anchor_cpu_data
,
anchors
.
data
(),
anchors
.
size
()
*
sizeof
(
float
));
...
...
@@ -77,20 +77,20 @@ void YoloCameraDetector::init_anchor(const string &yolo_root) {
yolo
::
load_types
(
types_file
,
&
types_
);
res_box_tensor_
.
reset
(
new
SyncedMemory
(
obj_size_
*
s_box_block_size
*
sizeof
(
float
)));
new
caffe
::
SyncedMemory
(
obj_size_
*
s_box_block_size
*
sizeof
(
float
)));
res_box_tensor_
->
cpu_data
();
res_box_tensor_
->
gpu_data
();
res_cls_tensor_
.
reset
(
new
SyncedMemory
(
types_
.
size
()
*
obj_size_
*
sizeof
(
float
)));
new
caffe
::
SyncedMemory
(
types_
.
size
()
*
obj_size_
*
sizeof
(
float
)));
res_cls_tensor_
->
cpu_data
();
res_cls_tensor_
->
gpu_data
();
overlapped_
.
reset
(
new
SyncedMemory
(
top_k_
*
top_k_
*
sizeof
(
bool
)));
overlapped_
.
reset
(
new
caffe
::
SyncedMemory
(
top_k_
*
top_k_
*
sizeof
(
bool
)));
overlapped_
->
cpu_data
();
overlapped_
->
gpu_data
();
idx_sm_
.
reset
(
new
SyncedMemory
(
top_k_
*
sizeof
(
int
)));
idx_sm_
.
reset
(
new
caffe
::
SyncedMemory
(
top_k_
*
sizeof
(
int
)));
idx_sm_
->
cpu_data
();
idx_sm_
->
gpu_data
();
}
...
...
@@ -148,7 +148,7 @@ void YoloCameraDetector::load_intrinsic(
int
channel
=
3
;
image_data_
.
reset
(
new
SyncedMemory
(
roi_w
*
roi_h
*
channel
*
sizeof
(
unsigned
char
)));
new
caffe
::
SyncedMemory
(
roi_w
*
roi_h
*
channel
*
sizeof
(
unsigned
char
)));
}
bool
YoloCameraDetector
::
init_cnn
(
const
string
&
yolo_root
)
{
...
...
@@ -310,13 +310,13 @@ bool YoloCameraDetector::Detect(const cv::Mat &frame,
resize
(
frame
(
roi
),
input_blob
.
get
(),
image_data_
,
0
);
}
pre_time
.
Stop
();
A
DEBUG
<<
"Pre-processing: "
<<
pre_time
.
MilliSeconds
()
<<
" ms"
;
A
INFO
<<
"Pre-processing: "
<<
pre_time
.
MilliSeconds
()
<<
" ms"
;
/////////////////////////// detection part ///////////////////////////
caffe
::
Timer
det_time
;
det_time
.
Start
();
cnnadapter_
->
forward
();
A
DEBUG
<<
"Running detection: "
<<
det_time
.
MilliSeconds
()
<<
" ms"
;
A
INFO
<<
"Running detection: "
<<
det_time
.
MilliSeconds
()
<<
" ms"
;
caffe
::
Timer
post_time
;
post_time
.
Start
();
...
...
@@ -363,8 +363,8 @@ bool YoloCameraDetector::Detect(const cv::Mat &frame,
temp_objects
[
i
].
reset
();
}
temp_objects
.
clear
();
A
DEBUG
<<
"Post-processing: "
<<
post_time
.
MilliSeconds
()
<<
" ms"
;
A
DEBUG
<<
"Number of detected obstacles: "
<<
objects
->
size
();
A
INFO
<<
"Post-processing: "
<<
post_time
.
MilliSeconds
()
<<
" ms"
;
A
INFO
<<
"Number of detected obstacles: "
<<
objects
->
size
();
Extract
(
objects
);
yolo
::
recover_bbox
(
roi_w
,
roi_h
,
offset_y_
,
objects
);
...
...
modules/perception/obstacle/camera/detector/yolo_camera_detector/yolo_camera_detector.h
浏览文件 @
87b045f3
...
...
@@ -86,13 +86,13 @@ class YoloCameraDetector : public BaseCameraDetector {
private:
std
::
shared_ptr
<
CNNAdapter
>
cnnadapter_
;
std
::
shared_ptr
<
SyncedMemory
>
res_cls_tensor_
=
nullptr
;
std
::
shared_ptr
<
SyncedMemory
>
res_box_tensor_
=
nullptr
;
std
::
shared_ptr
<
caffe
::
SyncedMemory
>
res_cls_tensor_
=
nullptr
;
std
::
shared_ptr
<
caffe
::
SyncedMemory
>
res_box_tensor_
=
nullptr
;
std
::
shared_ptr
<
SyncedMemory
>
image_data_
=
nullptr
;
std
::
shared_ptr
<
SyncedMemory
>
overlapped_
=
nullptr
;
std
::
shared_ptr
<
SyncedMemory
>
idx_sm_
=
nullptr
;
std
::
shared_ptr
<
SyncedMemory
>
anchor_
=
nullptr
;
std
::
shared_ptr
<
caffe
::
SyncedMemory
>
image_data_
=
nullptr
;
std
::
shared_ptr
<
caffe
::
SyncedMemory
>
overlapped_
=
nullptr
;
std
::
shared_ptr
<
caffe
::
SyncedMemory
>
idx_sm_
=
nullptr
;
std
::
shared_ptr
<
caffe
::
SyncedMemory
>
anchor_
=
nullptr
;
int
height_
=
0
;
int
width_
=
0
;
float
min_2d_height_
=
0.0
f
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录