Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
a91eca6e
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a91eca6e
编写于
12月 06, 2019
作者:
Y
YashasSamaga
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add DIV support to EltwiseOp
上级
4b0132ed
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
58 addition
and
2 deletion
+58
-2
modules/dnn/src/cuda/eltwise_ops.cu
modules/dnn/src/cuda/eltwise_ops.cu
+48
-0
modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp
modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp
+3
-0
modules/dnn/src/cuda4dnn/primitives/eltwise.hpp
modules/dnn/src/cuda4dnn/primitives/eltwise.hpp
+4
-1
modules/dnn/src/layers/eltwise_layer.cpp
modules/dnn/src/layers/eltwise_layer.cpp
+2
-1
modules/dnn/test/test_onnx_importer.cpp
modules/dnn/test/test_onnx_importer.cpp
+1
-0
未找到文件。
modules/dnn/src/cuda/eltwise_ops.cu
浏览文件 @
a91eca6e
...
...
@@ -102,6 +102,26 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
v_store
(
output_vPtr
[
i
],
vec_x
);
}
}
template
<
class
T
,
std
::
size_t
N
>
__global__
void
eltwise_div_2_vec
(
Span
<
T
>
output
,
View
<
T
>
x
,
View
<
T
>
y
)
{
using
vector_type
=
get_vector_type_t
<
T
,
N
>
;
auto
output_vPtr
=
vector_type
::
get_pointer
(
output
.
data
());
auto
x_vPtr
=
vector_type
::
get_pointer
(
x
.
data
());
auto
y_vPtr
=
vector_type
::
get_pointer
(
y
.
data
());
for
(
auto
i
:
grid_stride_range
(
output
.
size
()
/
vector_type
::
size
()))
{
vector_type
vec_x
,
vec_y
;
v_load
(
vec_x
,
x_vPtr
[
i
]);
v_load
(
vec_y
,
y_vPtr
[
i
]);
for
(
int
j
=
0
;
j
<
vector_type
::
size
();
j
++
)
vec_x
.
data
[
j
]
=
vec_x
.
data
[
j
]
/
vec_y
.
data
[
j
];
v_store
(
output_vPtr
[
i
],
vec_x
);
}
}
}
template
<
class
T
,
std
::
size_t
N
>
...
...
@@ -221,4 +241,32 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template
void
eltwise_prod_2
(
const
Stream
&
stream
,
Span
<
__half
>
output
,
View
<
__half
>
x
,
View
<
__half
>
y
);
template
void
eltwise_prod_2
(
const
Stream
&
stream
,
Span
<
float
>
output
,
View
<
float
>
x
,
View
<
float
>
y
);
template
<
class
T
,
std
::
size_t
N
>
void
launch_vectorized_eltwise_div_2
(
const
Stream
&
stream
,
Span
<
T
>
output
,
View
<
T
>
x
,
View
<
T
>
y
)
{
CV_Assert
(
is_fully_aligned
<
T
>
(
output
,
N
));
CV_Assert
(
is_fully_aligned
<
T
>
(
x
,
N
));
CV_Assert
(
is_fully_aligned
<
T
>
(
y
,
N
));
auto
kernel
=
raw
::
eltwise_div_2_vec
<
T
,
N
>
;
auto
policy
=
make_policy
(
kernel
,
output
.
size
()
/
N
,
0
,
stream
);
launch_kernel
(
kernel
,
policy
,
output
,
x
,
y
);
}
template
<
class
T
>
void
eltwise_div_2
(
const
Stream
&
stream
,
Span
<
T
>
output
,
View
<
T
>
x
,
View
<
T
>
y
)
{
CV_Assert
(
x
.
size
()
==
y
.
size
());
CV_Assert
(
x
.
size
()
==
output
.
size
());
if
(
is_fully_aligned
<
T
>
(
output
,
4
)
&&
is_fully_aligned
<
T
>
(
x
,
4
)
&&
is_fully_aligned
<
T
>
(
y
,
4
))
{
launch_vectorized_eltwise_div_2
<
T
,
4
>
(
stream
,
output
,
x
,
y
);
}
else
if
(
is_fully_aligned
<
T
>
(
output
,
2
)
&&
is_fully_aligned
<
T
>
(
x
,
2
)
&&
is_fully_aligned
<
T
>
(
y
,
2
))
{
launch_vectorized_eltwise_div_2
<
T
,
2
>
(
stream
,
output
,
x
,
y
);
}
else
{
launch_vectorized_eltwise_div_2
<
T
,
1
>
(
stream
,
output
,
x
,
y
);
}
}
template
void
eltwise_div_2
(
const
Stream
&
stream
,
Span
<
__half
>
output
,
View
<
__half
>
x
,
View
<
__half
>
y
);
template
void
eltwise_div_2
(
const
Stream
&
stream
,
Span
<
float
>
output
,
View
<
float
>
x
,
View
<
float
>
y
);
}}}}
/* namespace cv::dnn::cuda4dnn::kernels */
modules/dnn/src/cuda4dnn/kernels/eltwise_ops.hpp
浏览文件 @
a91eca6e
...
...
@@ -24,6 +24,9 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace kernels {
template
<
class
T
>
void
eltwise_prod_2
(
const
csl
::
Stream
&
stream
,
csl
::
Span
<
T
>
output
,
csl
::
View
<
T
>
x
,
csl
::
View
<
T
>
y
);
template
<
class
T
>
void
eltwise_div_2
(
const
csl
::
Stream
&
stream
,
csl
::
Span
<
T
>
output
,
csl
::
View
<
T
>
x
,
csl
::
View
<
T
>
y
);
}}}}
/* namespace cv::dnn::cuda4dnn::kernels */
#endif
/* OPENCV_DNN_SRC_CUDA4DNN_KERNELS_ELTWISE_OPS_HPP */
modules/dnn/src/cuda4dnn/primitives/eltwise.hpp
浏览文件 @
a91eca6e
...
...
@@ -24,7 +24,8 @@ namespace cv { namespace dnn { namespace cuda4dnn {
enum
class
EltwiseOpType
{
MAX
,
SUM
,
PRODUCT
PRODUCT
,
DIV
};
template
<
class
T
>
...
...
@@ -64,6 +65,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
{
case
EltwiseOpType
::
MAX
:
kernels
::
eltwise_max_2
<
T
>
(
stream
,
output
,
input_x
,
input_y
);
break
;
case
EltwiseOpType
::
PRODUCT
:
kernels
::
eltwise_prod_2
<
T
>
(
stream
,
output
,
input_x
,
input_y
);
break
;
case
EltwiseOpType
::
DIV
:
kernels
::
eltwise_div_2
<
T
>
(
stream
,
output
,
input_x
,
input_y
);
break
;
case
EltwiseOpType
::
SUM
:
if
(
coeffs
.
empty
()
||
(
coeffs
[
0
]
==
1
&&
coeffs
[
1
]
==
1
))
kernels
::
eltwise_sum_2
<
T
>
(
stream
,
output
,
input_x
,
input_y
);
...
...
@@ -89,6 +91,7 @@ namespace cv { namespace dnn { namespace cuda4dnn {
{
case
EltwiseOpType
::
MAX
:
kernels
::
eltwise_max_2
<
T
>
(
stream
,
output
,
output
,
input
);
break
;
case
EltwiseOpType
::
PRODUCT
:
kernels
::
eltwise_prod_2
<
T
>
(
stream
,
output
,
output
,
input
);
break
;
case
EltwiseOpType
::
DIV
:
kernels
::
eltwise_div_2
<
T
>
(
stream
,
output
,
output
,
input
);
break
;
case
EltwiseOpType
::
SUM
:
if
(
coeffs
.
empty
()
||
coeffs
[
i
]
==
1
)
kernels
::
eltwise_sum_2
<
T
>
(
stream
,
output
,
output
,
input
);
...
...
modules/dnn/src/layers/eltwise_layer.cpp
浏览文件 @
a91eca6e
...
...
@@ -108,7 +108,7 @@ public:
virtual
bool
supportBackend
(
int
backendId
)
CV_OVERRIDE
{
return
backendId
==
DNN_BACKEND_OPENCV
||
(
backendId
==
DNN_BACKEND_CUDA
&&
op
!=
DIV
)
||
// TODO: not implemented, see PR #15811
backendId
==
DNN_BACKEND_CUDA
||
(
backendId
==
DNN_BACKEND_HALIDE
&&
op
!=
DIV
)
||
// TODO: not implemented, see PR #15811
((((
backendId
==
DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019
&&
(
preferableTarget
!=
DNN_TARGET_OPENCL
||
coeffs
.
empty
()))
||
backendId
==
DNN_BACKEND_INFERENCE_ENGINE_NGRAPH
)
&&
!
variableChannels
));
...
...
@@ -471,6 +471,7 @@ public:
case
MAX
:
return
cuda4dnn
::
EltwiseOpType
::
MAX
;
case
SUM
:
return
cuda4dnn
::
EltwiseOpType
::
SUM
;
case
PROD
:
return
cuda4dnn
::
EltwiseOpType
::
PRODUCT
;
case
DIV
:
return
cuda4dnn
::
EltwiseOpType
::
DIV
;
}
return
cuda4dnn
::
EltwiseOpType
::
SUM
;
}();
...
...
modules/dnn/test/test_onnx_importer.cpp
浏览文件 @
a91eca6e
...
...
@@ -380,6 +380,7 @@ TEST_P(Test_ONNX_layers, Div)
normAssert
(
ref
,
out
,
""
,
default_l1
,
default_lInf
);
expectNoFallbacksFromIE
(
net
);
expectNoFallbacksFromCUDA
(
net
);
}
TEST_P
(
Test_ONNX_layers
,
DynamicReshape
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录