Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
eab6744a
O
Opencv
项目概览
Greenplum
/
Opencv
10 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
eab6744a
编写于
2月 05, 2019
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
dnn(ocl): use compile-time LOCAL_SIZE parameter
instead of get_local_size(0) and dynamic local memory allocation
上级
f46cd9db
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
32 addition
and
19 deletion
+32
-19
modules/dnn/src/layers/mvn_layer.cpp
modules/dnn/src/layers/mvn_layer.cpp
+10
-9
modules/dnn/src/opencl/mvn.cl
modules/dnn/src/opencl/mvn.cl
+22
-10
未找到文件。
modules/dnn/src/layers/mvn_layer.cpp
浏览文件 @
eab6744a
...
...
@@ -138,9 +138,12 @@ public:
UMat
&
bnorm_weight
=
umat_scale
;
UMat
&
bnorm_bias
=
umat_shift
;
const
unsigned
LOCAL_SIZE
=
128
;
bool
use_half
=
(
inputs
[
0
].
depth
()
==
CV_16S
);
String
opts
=
format
(
" -DT=%s -DT4=%s -Dconvert_T=%s"
,
use_half
?
"half"
:
"float"
,
use_half
?
"half4"
:
"float4"
,
use_half
?
"convert_half4"
:
"convert_float4"
);
String
opts
=
format
(
" -DT=%s -DT4=%s -Dconvert_T=%s -DLOCAL_SIZE=%u"
,
use_half
?
"half"
:
"float"
,
use_half
?
"half4"
:
"float4"
,
use_half
?
"convert_half4"
:
"convert_float4"
,
LOCAL_SIZE
);
int
splitDim
=
(
acrossChannels
)
?
1
:
2
;
for
(
size_t
inpIdx
=
0
;
inpIdx
<
inputs
.
size
();
inpIdx
++
)
...
...
@@ -155,8 +158,8 @@ public:
float
alpha
=
1.0
f
/
s
[
1
];
String
buildopt
=
"-DNUM=4"
+
opts
;
ocl
::
Kernel
k
(
"mean_fuse4"
,
ocl
::
dnn
::
mvn_oclsrc
,
buildopt
);
size_t
localsize
[]
=
{
128
};
ocl
::
Kernel
k
(
"mean_fuse4"
,
ocl
::
dnn
::
mvn_oclsrc
,
buildopt
+
" -DKERNEL_MEAN_FUSE"
);
size_t
localsize
[]
=
{
LOCAL_SIZE
};
size_t
globalsize
[]
=
{
(
size_t
)
s
[
0
]
/
4
*
localsize
[
0
]
};
int
argId
=
0
;
...
...
@@ -165,7 +168,6 @@ public:
k
.
set
(
argId
++
,
alpha
);
k
.
set
(
argId
++
,
ocl
::
KernelArg
::
PtrWriteOnly
(
meanMat
));
k
.
set
(
argId
++
,
ocl
::
KernelArg
::
PtrWriteOnly
(
tmpMat
));
k
.
set
(
argId
++
,
NULL
,
localsize
[
0
]
*
sizeof
(
cl_float4
));
bool
ret
=
k
.
run
(
1
,
globalsize
,
localsize
,
false
);
if
(
!
ret
)
return
false
;
...
...
@@ -173,7 +175,7 @@ public:
buildopt
+=
format
(
" %s %s"
,
(
fuse_batch_norm
)
?
"-DFUSE_BATCH_NORM"
:
""
,
(
fuse_relu
)
?
"-DFUSE_RELU"
:
""
);
ocl
::
Kernel
k1
(
"mvn_fuse4"
,
ocl
::
dnn
::
mvn_oclsrc
,
buildopt
);
ocl
::
Kernel
k1
(
"mvn_fuse4"
,
ocl
::
dnn
::
mvn_oclsrc
,
buildopt
+
" -DKERNEL_MVN_FUSE"
);
argId
=
0
;
k1
.
set
(
argId
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
tmpMat
));
k1
.
set
(
argId
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
inpMat
));
...
...
@@ -185,7 +187,6 @@ public:
k1
.
set
(
argId
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
bnorm_weight
));
k1
.
set
(
argId
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
bnorm_bias
));
k1
.
set
(
argId
++
,
ocl
::
KernelArg
::
PtrWriteOnly
(
outMat
));
k1
.
set
(
argId
++
,
NULL
,
localsize
[
0
]
*
sizeof
(
cl_float4
));
ret
=
k1
.
run
(
1
,
globalsize
,
localsize
,
false
);
if
(
!
ret
)
return
false
;
...
...
@@ -243,7 +244,7 @@ public:
if
(
normVariance
)
{
String
kname
=
format
(
"calc_mean%d"
,
number
);
ocl
::
Kernel
kernel
(
kname
.
c_str
(),
ocl
::
dnn
::
mvn_oclsrc
,
buildopt
);
ocl
::
Kernel
kernel
(
kname
.
c_str
(),
ocl
::
dnn
::
mvn_oclsrc
,
buildopt
+
" -DKERNEL_MEAN"
);
if
(
kernel
.
empty
())
return
false
;
...
...
@@ -263,7 +264,7 @@ public:
}
String
kname
=
format
(
"mvn%d"
,
number
);
buildopt
+=
format
(
"%s%s%s"
,
(
normVariance
)
?
" -DNORM_VARIANCE"
:
""
,
buildopt
+=
format
(
"%s%s%s
-DKERNEL_MVN
"
,
(
normVariance
)
?
" -DNORM_VARIANCE"
:
""
,
(
fuse_batch_norm
)
?
" -DFUSE_BATCH_NORM"
:
""
,
(
fuse_relu
)
?
" -DFUSE_RELU"
:
""
);
ocl
::
Kernel
kernel1
(
kname
.
c_str
(),
ocl
::
dnn
::
mvn_oclsrc
,
buildopt
);
...
...
modules/dnn/src/opencl/mvn.cl
浏览文件 @
eab6744a
...
...
@@ -74,6 +74,8 @@
#
define
MVN_FUSE
mvn_fuse1
#
endif
#
ifdef
KERNEL_MEAN
__kernel
void
CALC_MEAN
(
__global
const
Dtype*
src,
const
int
rows,
const
int
cols,
...
...
@@ -94,6 +96,8 @@ __kernel void CALC_MEAN(__global const Dtype* src,
store(dst_vec, dst, index);
}
#elif defined KERNEL_MVN
__kernel void MVN(__global const Dtype* src,
const int rows,
const int cols,
...
...
@@ -140,12 +144,13 @@ __kernel void MVN(__global const Dtype* src,
store
(
dst_vec,
dst,
index
)
;
}
#
elif
defined
KERNEL_MEAN_FUSE
__kernel
void
MEAN_FUSE
(
__global
const
T
*
A,
unsigned
int
A_col_size,
float
alpha,
__global
T4
*
mean,
__global
Dtype
*
tmp,
__local
Dtype4
*
work
)
__global
Dtype
*
tmp
)
{
unsigned
int
row_gid
=
get_group_id
(
0
)
;
unsigned
int
lid
=
get_local_id
(
0
)
;
...
...
@@ -168,15 +173,16 @@ __kernel void MEAN_FUSE(__global const T * A,
dot2
+=
convert_float4
(
a2
)
;
dot3
+=
convert_float4
(
a3
)
;
i
+=
get_local_size
(
0
)
;
i
+=
LOCAL_SIZE
;
}
__local
Dtype4
work[LOCAL_SIZE]
;
work[lid].s0
=
dot
(
dot0,
b0
)
;
work[lid].s1
=
dot
(
dot1,
b0
)
;
work[lid].s2
=
dot
(
dot2,
b0
)
;
work[lid].s3
=
dot
(
dot3,
b0
)
;
for
(
unsigned
int
stride=
get_local_size
(
0
)
/2
; stride>0 ; stride>>=1)
for
(
unsigned
int
stride=
LOCAL_SIZE
/2
; stride>0 ; stride>>=1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
stride
)
...
...
@@ -212,10 +218,12 @@ __kernel void MEAN_FUSE(__global const T * A,
vstore4
(
dot2,
i,
dst0_read
+
2
*
A_col_size
)
;
vstore4
(
dot3,
i,
dst0_read
+
3
*
A_col_size
)
;
i
+=
get_local_size
(
0
)
;
i
+=
LOCAL_SIZE
;
}
}
#
elif
defined
KERNEL_MVN_FUSE
__kernel
void
MVN_FUSE
(
__global
const
Dtype
*
tmp,
__global
const
T
*
A,
__global
const
T4
*
mean,
...
...
@@ -225,8 +233,7 @@ __kernel void MVN_FUSE(__global const Dtype * tmp,
const
float
relu_slope,
__global
const
Dtype4
*
bnorm_weight,
__global
const
Dtype4
*
bnorm_bias,
__global
T
*
B,
__local
Dtype4
*
work
)
__global
T
*
B
)
{
unsigned
int
row_gid
=
get_group_id
(
0
)
;
unsigned
int
lid
=
get_local_id
(
0
)
;
...
...
@@ -250,15 +257,16 @@ __kernel void MVN_FUSE(__global const Dtype * tmp,
dot2
+=
a2
;
dot3
+=
a3
;
i
+=
get_local_size
(
0
)
;
i
+=
LOCAL_SIZE
;
}
__local
Dtype4
work[LOCAL_SIZE]
;
work[lid].s0
=
dot
(
dot0,
b0
)
;
work[lid].s1
=
dot
(
dot1,
b0
)
;
work[lid].s2
=
dot
(
dot2,
b0
)
;
work[lid].s3
=
dot
(
dot3,
b0
)
;
for
(
unsigned
int
stride=
get_local_size
(
0
)
/2
; stride>0 ; stride>>=1)
for
(
unsigned
int
stride=
LOCAL_SIZE
/2
; stride>0 ; stride>>=1)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
stride
)
...
...
@@ -314,6 +322,10 @@ __kernel void MVN_FUSE(__global const Dtype * tmp,
vstore4
(
convert_T
(
dot2
)
,
i,
dst0_read
+
2
*
A_col_size
)
;
vstore4
(
convert_T
(
dot3
)
,
i,
dst0_read
+
3
*
A_col_size
)
;
i
+=
get_local_size
(
0
)
;
i
+=
LOCAL_SIZE
;
}
}
#
else
#
error
"Configuration error!"
#
endif
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录