Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
cbdaa93e
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
cbdaa93e
编写于
7月 05, 2020
作者:
Y
YashasSamaga
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
reduce slice, concat to copy; enable more concat fusions
上级
657c8d1c
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
55 addition
and
15 deletion
+55
-15
modules/dnn/src/cuda/concat.cu
modules/dnn/src/cuda/concat.cu
+16
-0
modules/dnn/src/cuda/slice.cu
modules/dnn/src/cuda/slice.cu
+32
-0
modules/dnn/src/cuda4dnn/primitives/slice.hpp
modules/dnn/src/cuda4dnn/primitives/slice.hpp
+0
-14
modules/dnn/src/dnn.cpp
modules/dnn/src/dnn.cpp
+7
-1
未找到文件。
modules/dnn/src/cuda/concat.cu
浏览文件 @
cbdaa93e
...
...
@@ -16,6 +16,8 @@
#include "../cuda4dnn/csl/tensor.hpp"
#include "../cuda4dnn/csl/span.hpp"
#include "../cuda4dnn/kernels/fill_copy.hpp"
#include <cstddef>
#include <vector>
...
...
@@ -95,6 +97,20 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
TensorSpan
<
T
>
output
,
std
::
size_t
output_axis_offset
,
TensorView
<
T
>
input
,
std
::
size_t
axis
)
{
CV_Assert
(
output
.
rank
()
==
input
.
rank
());
CV_Assert
(
output_axis_offset
<
output
.
get_axis_size
(
axis
));
/* if axes preceeding the concat axis are all singleton, the concat blocks are contiguous
* in the output and we can copy each block directly
*/
if
(
output
.
size_range
(
0
,
axis
)
==
1
)
{
auto
stride
=
output
.
size_range
(
axis
+
1
,
output
.
rank
());
auto
sliced_output
=
Span
<
T
>
(
output
.
get
()
+
output_axis_offset
*
stride
,
input
.
size
());
kernels
::
copy
<
T
>
(
stream
,
sliced_output
,
input
);
return
;
}
/* let's call the axis of interest as the channel axis for the purpose of the following discussion
* even though it can be any axis
*
...
...
modules/dnn/src/cuda/slice.cu
浏览文件 @
cbdaa93e
...
...
@@ -15,11 +15,14 @@
#include "../cuda4dnn/csl/tensor.hpp"
#include "../cuda4dnn/csl/span.hpp"
#include "../cuda4dnn/kernels/fill_copy.hpp"
#include <opencv2/core.hpp>
#include <cstddef>
#include <vector>
#include <iostream>
#include <algorithm>
using
namespace
cv
::
dnn
::
cuda4dnn
::
csl
;
using
namespace
cv
::
dnn
::
cuda4dnn
::
csl
::
device
;
...
...
@@ -79,6 +82,14 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
CV_Assert
(
output
.
rank
()
==
input
.
rank
());
CV_Assert
(
output
.
rank
()
==
offsets
.
size
());
/* copy directly if no slicing is required */
if
(
is_shape_same
(
output
,
input
))
{
CV_Assert
(
std
::
all_of
(
std
::
begin
(
offsets
),
std
::
end
(
offsets
),
[]
(
std
::
size_t
x
)
{
return
x
==
0
;
}));
kernels
::
copy
<
T
>
(
stream
,
output
,
input
);
return
;
}
/* squeezable axes at the beginning of both tensors can be eliminated
*
* Reasoning:
...
...
@@ -146,6 +157,27 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
auto
rank
=
inShape
.
size
();
/* We can do a copy if the reduced rank is two and only the first axis is sliced.
* The general requirement is that only one axis is sliced and all the axes that
* preceed the sliced axis are singleton. However, the reductions above will remove
* all the leading singleton axes and merge the trailing unsliced axes into one, or
* zero if there are no trailing unsliced axes. The latter is handled separately.
*/
if
(
rank
==
2
&&
offsets
[
0
]
!=
0
&&
offsets
[
1
]
==
0
)
{
auto
stride
=
inShape
[
1
];
auto
sliced_input
=
View
<
T
>
(
input
.
get
()
+
offsets
[
0
]
*
stride
,
output
.
size
());
kernels
::
copy
<
T
>
(
stream
,
output
,
sliced_input
);
return
;
}
if
(
rank
==
1
)
{
auto
sliced_input
=
View
<
T
>
(
input
.
get
()
+
offsets
[
0
],
output
.
size
());
kernels
::
copy
<
T
>
(
stream
,
output
,
sliced_input
);
return
;
}
std
::
vector
<
std
::
size_t
>
inStride
(
rank
),
outStride
(
rank
);
inStride
.
back
()
=
1
;
outStride
.
back
()
=
1
;
...
...
modules/dnn/src/cuda4dnn/primitives/slice.hpp
浏览文件 @
cbdaa93e
...
...
@@ -47,20 +47,6 @@ namespace cv { namespace dnn { namespace cuda4dnn {
CV_Assert
(
offsets
.
size
()
==
outputs
.
size
());
/* one output with the same shape as the input => direct copy */
if
(
outputs
.
size
()
==
1
)
{
auto
output_wrapper
=
outputs
[
0
].
dynamicCast
<
wrapper_type
>
();
auto
output
=
output_wrapper
->
getSpan
();
if
(
is_shape_same
(
output
,
input
))
{
CV_Assert
(
std
::
all_of
(
std
::
begin
(
offsets
[
0
]),
std
::
end
(
offsets
[
0
]),
[]
(
std
::
size_t
x
)
{
return
x
==
0
;
}));
kernels
::
copy
<
T
>
(
stream
,
output
,
input
);
return
;
}
}
for
(
int
i
=
0
;
i
<
outputs
.
size
();
++
i
)
{
auto
output_wrapper
=
outputs
[
i
].
dynamicCast
<
wrapper_type
>
();
...
...
modules/dnn/src/dnn.cpp
浏览文件 @
cbdaa93e
...
...
@@ -2788,7 +2788,13 @@ struct Net::Impl : public detail::NetImplBase
if
(
preferableBackend
==
DNN_BACKEND_CUDA
&&
(
inp_i_data
->
layerInstance
->
supportBackend
(
DNN_BACKEND_CUDA
)
==
false
||
(
inp_i_data
->
layerInstance
->
type
!=
"Convolution"
&&
inp_i_data
->
layerInstance
->
type
!=
"Pooling"
)))
inp_i_data
->
layerInstance
->
type
!=
"Pooling"
&&
inp_i_data
->
layerInstance
->
type
!=
"Resize"
&&
inp_i_data
->
layerInstance
->
type
!=
"Flatten"
&&
inp_i_data
->
layerInstance
->
type
!=
"Permute"
&&
inp_i_data
->
layerInstance
->
type
!=
"Reorg"
&&
inp_i_data
->
layerInstance
->
type
!=
"Eltwise"
&&
inp_i_data
->
layerInstance
.
dynamicCast
<
ActivationLayer
>
().
empty
())))
{
break
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录