Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
6982006f
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
6982006f
编写于
3月 28, 2018
作者:
Y
yejianwu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add compatible method to space2depth and depth2space ops
上级
96452de2
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
68 addition
and
21 deletion
+68
-21
mace/kernels/opencl/cl/depth_to_space.cl
mace/kernels/opencl/cl/depth_to_space.cl
+35
-0
mace/kernels/opencl/depth_to_space_opencl.cc
mace/kernels/opencl/depth_to_space_opencl.cc
+33
-21
未找到文件。
mace/kernels/opencl/cl/depth_to_space.cl
浏览文件 @
6982006f
...
@@ -3,11 +3,28 @@
...
@@ -3,11 +3,28 @@
__kernel
void
depth_to_space
(
__read_only
image2d_t
input,
__kernel
void
depth_to_space
(
__read_only
image2d_t
input,
__private
const
int
block_size,
__private
const
int
block_size,
__private
const
int
output_depth,
__private
const
int
output_depth,
#
ifndef
USE_QUALCOMM_OPENCL_2_0
__write_only
image2d_t
output,
__private
const
int
global_size_dim0,
__private
const
int
global_size_dim1,
__private
const
int
global_size_dim2
)
{
#
else
__write_only
image2d_t
output
)
{
__write_only
image2d_t
output
)
{
#
endif
const
int
out_d
=
get_global_id
(
0
)
;
const
int
out_d
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_h
=
get_global_id
(
2
)
;
const
int
out_h
=
get_global_id
(
2
)
;
#
ifndef
USE_QUALCOMM_OPENCL_2_0
if
(
out_d
>=
global_size_dim0
|
| out_w >= global_size_dim1
|| out_h >= global_size_dim2) {
return;
}
const int output_width = global_size_dim1;
#else
const int output_width = get_global_size(1);
const int output_width = get_global_size(1);
#endif
const int out_pos = mad24(out_d, output_width, out_w);
const int out_pos = mad24(out_d, output_width, out_w);
...
@@ -30,11 +47,29 @@ __kernel void depth_to_space(__read_only image2d_t input,
...
@@ -30,11 +47,29 @@ __kernel void depth_to_space(__read_only image2d_t input,
__kernel void space_to_depth(__read_only image2d_t input,
__kernel void space_to_depth(__read_only image2d_t input,
__private const int block_size,
__private const int block_size,
__private const int input_depth,
__private const int input_depth,
#ifndef USE_QUALCOMM_OPENCL_2_0
__write_only image2d_t output,
__private const int global_size_dim0,
__private const int global_size_dim1,
__private const int global_size_dim2) {
#else
__write_only image2d_t output) {
__write_only image2d_t output) {
#endif
const int d = get_global_id(0);
const int d = get_global_id(0);
const int w = get_global_id(1);
const int w = get_global_id(1);
const int h = get_global_id(2);
const int h = get_global_id(2);
#ifndef USE_QUALCOMM_OPENCL_2_0
if (d >= global_size_dim0 || w >= global_size_dim1
|
|
h
>=
global_size_dim2
)
{
return
;
}
const
int
input_width
=
global_size_dim1
;
#
else
const
int
input_width
=
get_global_size
(
1
)
;
const
int
input_width
=
get_global_size
(
1
)
;
#
endif
const
int
in_pos
=
mad24
(
d,
input_width,
w
)
;
const
int
in_pos
=
mad24
(
d,
input_width,
w
)
;
const
int
output_width
=
input_width
/
block_size
;
const
int
output_width
=
input_width
/
block_size
;
...
...
mace/kernels/opencl/depth_to_space_opencl.cc
浏览文件 @
6982006f
...
@@ -45,8 +45,11 @@ void DepthToSpaceOpFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -45,8 +45,11 @@ void DepthToSpaceOpFunctor<DeviceType::OPENCL, T>::operator()(
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
&
image_shape
);
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
&
image_shape
);
output
->
ResizeImage
(
output_shape
,
image_shape
);
output
->
ResizeImage
(
output_shape
,
image_shape
);
if
(
kernel_
.
get
()
==
nullptr
)
{
auto
runtime
=
OpenCLRuntime
::
Global
();
auto
runtime
=
OpenCLRuntime
::
Global
();
const
bool
is_qualcomm_opencl200
=
IsQualcommOpenCL200
();
if
(
kernel_
.
get
()
==
nullptr
)
{
std
::
set
<
std
::
string
>
built_options
;
std
::
set
<
std
::
string
>
built_options
;
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
kernel_name
);
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
kernel_name
);
std
::
stringstream
kernel_name_ss
;
std
::
stringstream
kernel_name_ss
;
...
@@ -55,38 +58,47 @@ void DepthToSpaceOpFunctor<DeviceType::OPENCL, T>::operator()(
...
@@ -55,38 +58,47 @@ void DepthToSpaceOpFunctor<DeviceType::OPENCL, T>::operator()(
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
if
(
is_qualcomm_opencl200
)
{
built_options
.
emplace
(
"-DUSE_QUALCOMM_OPENCL_2_0"
);
}
kernel_
=
kernel_
=
runtime
->
BuildKernel
(
"depth_to_space"
,
kernel_name
,
built_options
);
runtime
->
BuildKernel
(
"depth_to_space"
,
kernel_name
,
built_options
);
}
}
uint32_t
gws
[
3
];
std
::
stringstream
ss
;
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
uint32_t
idx
=
0
;
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
block_size_
);
kernel_
.
setArg
(
idx
++
,
block_size_
);
kernel_
.
setArg
(
idx
++
,
depth_blocks
);
kernel_
.
setArg
(
idx
++
,
depth_blocks
);
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
}
if
(
d2s_
)
{
if
(
d2s_
)
{
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
depth_blocks
),
gws
[
0
]
=
static_cast
<
uint32_t
>
(
depth_blocks
);
static_cast
<
uint32_t
>
(
output_width
),
gws
[
1
]
=
static_cast
<
uint32_t
>
(
output_width
);
static_cast
<
uint32_t
>
(
output_height
*
batch
)};
gws
[
2
]
=
static_cast
<
uint32_t
>
(
output_height
*
batch
);
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"depth_to_space_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
ss
<<
"depth_to_space_opencl_kernel_"
<<
output
->
dim
(
0
)
<<
"_"
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
<<
output
->
dim
(
1
)
<<
"_"
<<
output
->
dim
(
2
)
<<
"_"
<<
output
->
dim
(
3
);
TuningOrRun3DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
else
{
}
else
{
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
depth_blocks
),
gws
[
0
]
=
static_cast
<
uint32_t
>
(
depth_blocks
);
static_cast
<
uint32_t
>
(
input_width
),
gws
[
1
]
=
static_cast
<
uint32_t
>
(
input_width
);
static_cast
<
uint32_t
>
(
input_height
*
batch
)};
gws
[
2
]
=
static_cast
<
uint32_t
>
(
input_height
*
batch
);
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
16
,
8
,
1
};
std
::
stringstream
ss
;
ss
<<
"space_to_depth_opencl_kernel_"
<<
input
->
dim
(
0
)
<<
"_"
ss
<<
"space_to_depth_opencl_kernel_"
<<
input
->
dim
(
0
)
<<
"_"
<<
input
->
dim
(
1
)
<<
"_"
<<
input
->
dim
(
2
)
<<
"_"
<<
input
->
dim
(
3
);
<<
input
->
dim
(
1
)
<<
"_"
<<
input
->
dim
(
2
)
<<
"_"
<<
input
->
dim
(
3
);
TuningOrRun3DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
}
kernel_
.
setArg
(
idx
++
,
gws
[
0
]);
kernel_
.
setArg
(
idx
++
,
gws
[
1
]);
kernel_
.
setArg
(
idx
++
,
gws
[
2
]);
input_shape_
=
input
->
shape
();
}
const
uint32_t
kwg_size
=
static_cast
<
uint32_t
>
(
runtime
->
GetKernelMaxWorkGroupSize
(
kernel_
));
const
std
::
vector
<
uint32_t
>
lws
=
{
8
,
kwg_size
/
64
,
8
,
1
};
TuningOrRun3DKernel
(
kernel_
,
ss
.
str
(),
gws
,
lws
,
future
);
}
}
template
struct
DepthToSpaceOpFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
DepthToSpaceOpFunctor
<
DeviceType
::
OPENCL
,
float
>;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录