Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
20dac7ea
O
Opencv
项目概览
Greenplum
/
Opencv
8 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
20dac7ea
编写于
2月 17, 2023
作者:
Z
Zihao Mu
提交者:
GitHub
2月 17, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Merge pull request #23255 from zihaomu:fused_cuda_naryeltwise
DNN: fuse conv+naryEletwise on CUDA backend.
上级
923dbcc5
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
66 addition
and
22 deletion
+66
-22
modules/dnn/src/layers/convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+2
-1
modules/dnn/src/layers/nary_eltwise_layers.cpp
modules/dnn/src/layers/nary_eltwise_layers.cpp
+22
-11
modules/dnn/src/net_impl_fuse.cpp
modules/dnn/src/net_impl_fuse.cpp
+42
-10
未找到文件。
modules/dnn/src/layers/convolution_layer.cpp
浏览文件 @
20dac7ea
...
...
@@ -606,7 +606,8 @@ public:
if
(
IS_DNN_CUDA_TARGET
(
preferableTarget
))
{
Ptr
<
EltwiseLayer
>
eltwise
=
top
.
dynamicCast
<
EltwiseLayer
>
();
if
(
!
eltwise
.
empty
())
// && eltwise->op == EltwiseLayer::SUM && eltwise->coeffs.empty())
Ptr
<
NaryEltwiseLayer
>
naryEltwise
=
top
.
dynamicCast
<
NaryEltwiseLayer
>
();
if
(
!
eltwise
.
empty
()
||
!
naryEltwise
.
empty
())
{
/* we also need to check that the eltwise input does not require shortcut mechanism
* it's difficult to verify it here but we hope that `fuseLayers` has done the check already
...
...
modules/dnn/src/layers/nary_eltwise_layers.cpp
浏览文件 @
20dac7ea
...
...
@@ -681,17 +681,28 @@ public:
return
Ptr
<
BackendNode
>
();
}
auto
op_
=
[
this
]
{
switch
(
op
)
{
case
OPERATION
::
MAX
:
return
cuda4dnn
::
EltwiseOpType
::
MAX
;
case
OPERATION
::
MIN
:
return
cuda4dnn
::
EltwiseOpType
::
MIN
;
case
OPERATION
::
SUM
:
return
cuda4dnn
::
EltwiseOpType
::
SUM
;
case
OPERATION
::
PROD
:
return
cuda4dnn
::
EltwiseOpType
::
PRODUCT
;
case
OPERATION
::
DIV
:
return
cuda4dnn
::
EltwiseOpType
::
DIV
;
case
OPERATION
::
ADD
:
return
cuda4dnn
::
EltwiseOpType
::
SUM
;
default:
CV_Error
(
Error
::
StsNotImplemented
,
"Other operators except MAX, MIN, SUM, PRODUCT and DIV are not supported with cuda."
);
}
}();
cuda4dnn
::
EltwiseOpType
op_
=
cuda4dnn
::
EltwiseOpType
::
SUM
;
switch
(
op
)
{
case
OPERATION
::
MAX
:
op_
=
cuda4dnn
::
EltwiseOpType
::
MAX
;
break
;
case
OPERATION
::
MIN
:
op_
=
cuda4dnn
::
EltwiseOpType
::
MIN
;
break
;
case
OPERATION
::
SUM
:
op_
=
cuda4dnn
::
EltwiseOpType
::
SUM
;
break
;
case
OPERATION
::
PROD
:
op_
=
cuda4dnn
::
EltwiseOpType
::
PRODUCT
;
break
;
case
OPERATION
::
DIV
:
op_
=
cuda4dnn
::
EltwiseOpType
::
DIV
;
break
;
case
OPERATION
::
ADD
:
op_
=
cuda4dnn
::
EltwiseOpType
::
SUM
;
break
;
default:
return
Ptr
<
BackendNode
>
();
// return empty cuda_node if the EltwiseOpType is unsupported type.
};
return
make_cuda_node
<
cuda4dnn
::
EltwiseOp
>
(
preferableTarget
,
std
::
move
(
context
->
stream
),
op_
,
std
::
vector
<
float
>
());
}
...
...
modules/dnn/src/net_impl_fuse.cpp
浏览文件 @
20dac7ea
...
...
@@ -82,10 +82,11 @@ void Net::Impl::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
break
;
}
#endif
/* we use `tryFuse` member of convolution layer to fuse eltwise later
/* we use `tryFuse` member of convolution layer to fuse eltwise
/naryEltwise
later
* it's not intended to be fused here; hence, we stop when we encounter eltwise
*/
if
(
preferableBackend
==
DNN_BACKEND_CUDA
&&
ld
.
type
==
"Convolution"
&&
nextData
->
type
==
"Eltwise"
)
if
(
preferableBackend
==
DNN_BACKEND_CUDA
&&
ld
.
type
==
"Convolution"
&&
(
nextData
->
type
==
"Eltwise"
||
nextData
->
type
==
"NaryEltwise"
))
break
;
Ptr
<
Layer
>
nextLayer
=
nextData
->
layerInstance
;
if
(
currLayer
->
tryFuse
(
nextLayer
))
...
...
@@ -335,22 +336,31 @@ void Net::Impl::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
}
// OpenCL: fuse convolution layer followed by eltwise + relu
// CUDA: fuse convolution layer followed by eltwise (and optional activation)
// CUDA: fuse convolution layer followed by eltwise
/naryEltwise
(and optional activation)
while
(
nextData
&&
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
||
IS_DNN_CUDA_TARGET
(
preferableTarget
))
&&
ld
.
layerInstance
->
type
==
"Convolution"
)
// semantic of 'if'
{
Ptr
<
EltwiseLayer
>
nextEltwiseLayer
=
nextData
->
layerInstance
.
dynamicCast
<
EltwiseLayer
>
();
if
(
nextEltwiseLayer
.
empty
())
Ptr
<
NaryEltwiseLayer
>
nextNaryEltwiseLayer
=
nextData
->
layerInstance
.
dynamicCast
<
NaryEltwiseLayer
>
();
if
(
nextEltwiseLayer
.
empty
()
&&
nextNaryEltwiseLayer
.
empty
())
break
;
// TODO: fused the Conv+NaryEltwise on OpenCL backend. At present, we can only support it at CUDA backend.
if
(
IS_DNN_OPENCL_TARGET
(
preferableTarget
)
&&
nextNaryEltwiseLayer
)
break
;
#ifdef HAVE_CUDA
// CUDA backend supports fusion with eltwise sum (without variable channels)
if
(
IS_DNN_CUDA_TARGET
(
preferableTarget
)
&&
!
nextEltwiseLayer
.
empty
(
))
if
(
IS_DNN_CUDA_TARGET
(
preferableTarget
)
&&
(
!
nextEltwiseLayer
.
empty
()
||
!
nextNaryEltwiseLayer
.
empty
()
))
{
// we create a temporary backend node for eltwise layer to obtain the eltwise configuration
cuda4dnn
::
csl
::
CSLContext
context
;
// assume that initCUDA and EltwiseOp do not use the context during init
if
(
!
nextData
->
layerInstance
->
supportBackend
(
DNN_BACKEND_CUDA
))
break
;
const
auto
node
=
nextData
->
layerInstance
->
initCUDA
(
&
context
,
nextData
->
inputBlobsWrappers
,
nextData
->
outputBlobsWrappers
);
auto
eltwiseNode
=
node
.
dynamicCast
<
cuda4dnn
::
EltwiseOpBase
>
();
...
...
@@ -408,7 +418,7 @@ void Net::Impl::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
{
LayerData
*
eltwiseData
=
nextData
;
// Eltwise layer has two inputs. We need to determine which
// Eltwise
/NaryEltwise
layer has two inputs. We need to determine which
// is a base convolution layer and which could be used as it's bias.
LayerData
*
biasLayerData
=
0
;
for
(
int
i
=
0
;
i
<
2
;
++
i
)
...
...
@@ -483,7 +493,14 @@ void Net::Impl::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
* => activation(convolution + eltwise)
* > fuse eltwise and then activation
*/
auto
layer
=
nextEltwiseLayer
.
staticCast
<
Layer
>
();
Ptr
<
Layer
>
layer
=
nullptr
;
if
(
nextNaryEltwiseLayer
)
layer
=
nextNaryEltwiseLayer
.
staticCast
<
Layer
>
();
else
if
(
nextEltwiseLayer
)
layer
=
nextEltwiseLayer
.
staticCast
<
Layer
>
();
else
CV_Error
(
Error
::
StsError
,
"Both nextNaryEltwiseLayer and nextEltwiseLayer are empty!"
);
if
(
currLayer
->
tryFuse
(
layer
))
{
fuse_eltwise
=
true
;
/* eltwise was successfully fused */
...
...
@@ -511,7 +528,14 @@ void Net::Impl::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
CV_Assert
(
nextData
);
CV_Assert_N
(
biasLayerData
->
outputBlobsWrappers
.
size
()
==
1
,
ld
.
inputBlobsWrappers
.
size
()
==
1
);
ld
.
inputBlobsWrappers
.
push_back
(
biasLayerData
->
outputBlobsWrappers
[
0
]);
printf_
((
"
\t
fused with %s
\n
"
,
nextEltwiseLayer
->
name
.
c_str
()));
if
(
nextEltwiseLayer
)
printf_
((
"
\t
fused with %s
\n
"
,
nextEltwiseLayer
->
name
.
c_str
()));
else
if
(
nextNaryEltwiseLayer
)
printf_
((
"
\t
fused with %s
\n
"
,
nextEltwiseLayer
->
name
.
c_str
()));
else
CV_Error
(
Error
::
StsError
,
"Both nextNaryEltwiseLayer and nextEltwiseLayer are empty!"
);
printf_
((
"
\t
fused with %s
\n
"
,
nextFusabeleActivLayer
->
name
.
c_str
()));
eltwiseData
->
skip
=
true
;
nextData
->
skip
=
true
;
...
...
@@ -554,12 +578,19 @@ void Net::Impl::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
}
}
}
else
if
(
fuse_eltwise
)
// conv + eltwise (note: conv could have fused activations before eltwise)
else
if
(
fuse_eltwise
)
// conv + eltwise
/naryEltwise
(note: conv could have fused activations before eltwise)
{
CV_Assert
(
IS_DNN_CUDA_TARGET
(
preferableTarget
));
CV_Assert_N
(
biasLayerData
->
outputBlobsWrappers
.
size
()
==
1
,
ld
.
inputBlobsWrappers
.
size
()
==
1
);
ld
.
inputBlobsWrappers
.
push_back
(
biasLayerData
->
outputBlobsWrappers
[
0
]);
printf_
((
"
\t
fused with %s
\n
"
,
nextEltwiseLayer
->
name
.
c_str
()));
if
(
nextEltwiseLayer
)
printf_
((
"
\t
fused with %s
\n
"
,
nextEltwiseLayer
->
name
.
c_str
()));
else
if
(
nextNaryEltwiseLayer
)
printf_
((
"
\t
fused with %s
\n
"
,
nextEltwiseLayer
->
name
.
c_str
()));
else
CV_Error
(
Error
::
StsError
,
"Both nextNaryEltwiseLayer and nextEltwiseLayer are empty!"
);
eltwiseData
->
skip
=
true
;
// This optimization is for cases like
// some_layer conv (maybe fused with activ)
...
...
@@ -682,6 +713,7 @@ void Net::Impl::fuseLayers(const std::vector<LayerPin>& blobsToKeep_)
inp_i_data
->
layerInstance
->
type
!=
"Permute"
&&
inp_i_data
->
layerInstance
->
type
!=
"Reorg"
&&
inp_i_data
->
layerInstance
->
type
!=
"Eltwise"
&&
inp_i_data
->
layerInstance
->
type
!=
"NaryEltwise"
&&
inp_i_data
->
layerInstance
.
dynamicCast
<
ActivationLayer
>
().
empty
())))
{
break
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录