Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
894096bd
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
894096bd
编写于
5月 15, 2019
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Feature: S2D and D2S support depth < 4.
上级
52a379a2
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
562 addition
and
125 deletion
+562
-125
mace/core/net_def_adapter.cc
mace/core/net_def_adapter.cc
+1
-0
mace/ops/opencl/cl/depth_to_space.cl
mace/ops/opencl/cl/depth_to_space.cl
+147
-22
mace/ops/opencl/cl/space_to_depth.cl
mace/ops/opencl/cl/space_to_depth.cl
+113
-22
mace/ops/opencl/image/depth_to_space.h
mace/ops/opencl/image/depth_to_space.h
+24
-20
mace/ops/opencl/image/space_to_depth.h
mace/ops/opencl/image/space_to_depth.h
+12
-10
mace/tools/benchmark/BUILD.bazel
mace/tools/benchmark/BUILD.bazel
+2
-8
test/ccbenchmark/mace/ops/depth_to_space_benchmark.cc
test/ccbenchmark/mace/ops/depth_to_space_benchmark.cc
+8
-2
test/ccbenchmark/mace/ops/space_to_depth_benchmark.cc
test/ccbenchmark/mace/ops/space_to_depth_benchmark.cc
+5
-1
test/ccunit/mace/ops/depth_to_space_test.cc
test/ccunit/mace/ops/depth_to_space_test.cc
+129
-23
test/ccunit/mace/ops/space_to_depth_test.cc
test/ccunit/mace/ops/space_to_depth_test.cc
+121
-17
未找到文件。
mace/core/net_def_adapter.cc
浏览文件 @
894096bd
...
@@ -164,6 +164,7 @@ MaceStatus NetDefAdapter::AdaptNetDef(
...
@@ -164,6 +164,7 @@ MaceStatus NetDefAdapter::AdaptNetDef(
input_info
->
set_dims
(
j
,
input_shape
[
j
]);
input_info
->
set_dims
(
j
,
input_shape
[
j
]);
}
}
}
}
tensor_shape_map
.
emplace
(
input_info
->
name
(),
input_shape
);
output_map
.
emplace
(
input_info
->
name
(),
InternalOutputInfo
(
output_map
.
emplace
(
input_info
->
name
(),
InternalOutputInfo
(
mem_type
,
input_info
->
data_type
(),
mem_type
,
input_info
->
data_type
(),
input_data_format
,
input_shape
,
-
1
));
input_data_format
,
input_shape
,
-
1
));
...
...
mace/ops/opencl/cl/depth_to_space.cl
浏览文件 @
894096bd
#
include
<common.h>
#
include
<common.h>
__kernel
void
depth_to_space
(
OUT_OF_RANGE_PARAMS
__kernel
void
depth_to_space
_d1_d2
(
OUT_OF_RANGE_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3
GLOBAL_WORK_GROUP_SIZE_DIM3
__read_only
image2d_t
input,
__read_only
image2d_t
input,
__private
const
int
block_size,
__private
const
int
input_height,
__private
const
int
input_hb,
__private
const
int
input_width,
__private
const
int
input_width,
__private
const
int
input_depth_blocks,
__private
const
int
block_size,
__private
const
int
output_height,
__private
const
int
output_width,
__private
const
int
output_width,
__private
const
int
output_depth_blocks
,
__private
const
int
output_depth
,
__write_only
image2d_t
output
)
{
__write_only
image2d_t
output
)
{
const
int
out_d
=
get_global_id
(
0
)
;
const
int
in_depth_blk_idx
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
in_width_idx
=
get_global_id
(
1
)
;
const
int
out_hb
=
get_global_id
(
2
)
;
const
int
in_hb_idx
=
get_global_id
(
2
)
;
#
ifndef
NON_UNIFORM_WORK_GROUP
#
ifndef
NON_UNIFORM_WORK_GROUP
if
(
out_d
>=
global_size_dim0
|
| out_w
>= global_size_dim1
if
(
in_depth_blk_idx
>=
global_size_dim0
|
| in_width_idx
>= global_size_dim1
||
out_hb
>= global_size_dim2) {
||
in_hb_idx
>= global_size_dim2) {
return;
return;
}
}
#endif
#endif
const int batch_idx = in_hb_idx / input_height;
const int in_height_idx = in_hb_idx - mul24(batch_idx, input_height);
int in_depth_idx = in_depth_blk_idx << 2;
int hw_block_size = in_depth_idx / output_depth;
int out_depth_idx = in_depth_idx - mul24(hw_block_size, output_depth);
int bottom_width_idx = mul24(in_width_idx, block_size);
int out_width_idx = bottom_width_idx + (hw_block_size % block_size);
int out_height_idx = mad24(in_height_idx, block_size,
hw_block_size / block_size);
const int out_pos = mad24(out_d, output_width, out_w);
const int in_x = mad24(in_depth_blk_idx, input_width, in_width_idx);
DATA_TYPE4 in_data = READ_IMAGET(input, SAMPLER, (int2)(in_x, in_hb_idx));
const int in_hb = out_hb / block_size;
int out_x_base = mul24((out_depth_idx >> 2), output_width);
const int offset_h = out_hb - mul24(in_hb, block_size);
int out_x = out_x_base + out_width_idx;
const int in_w = out_w / block_size;
int out_y = mad24(batch_idx, output_height, out_height_idx);
const int offset_w = out_w - mul24(in_w, block_size);
#if defined(DEPTH1)
const int offset_d = mul24(mad24(offset_h, block_size, offset_w), output_depth_blocks);
int top_width = mul24(in_width_idx + 1, block_size);
const int in_d = out_d + offset_d;
int top_height = mul24(in_height_idx + 1, block_size);
DATA_TYPE4 t_out_data = 0;
int t_out_x = out_x;
int t_width_idx = out_width_idx;
int t_height_idx = out_height_idx;
DATA_TYPE *in_data_ptr = (DATA_TYPE*)(&in_data);
for (int i = 0; i < 4; ++i) {
t_out_data.x = in_data_ptr[i];
WRITE_IMAGET(output, (int2)(t_out_x, out_y), t_out_data);
if (t_width_idx + 1 >= top_width) {
if (t_height_idx + 1 >= top_height) {
break;
}
t_width_idx = bottom_width_idx;
t_out_x = out_x_base + t_width_idx;
t_height_idx += 1;
out_y += 1;
} else {
t_width_idx += 1;
t_out_x += 1;
}
}
#elif defined(DEPTH2)
int top_width = mul24(in_width_idx + 1, block_size);
int top_height = mul24(in_height_idx + 1, block_size);
DATA_TYPE4 t_out_data = 0;
t_out_data.x = in_data.x;
t_out_data.y = in_data.y;
WRITE_IMAGET(output, (int2)(out_x, out_y), t_out_data);
t_out_data.x = in_data.z;
t_out_data.y = in_data.w;
if (out_width_idx + 1 >= top_width) {
if (out_height_idx + 1 < top_height) {
int t_out_x = out_x_base + bottom_width_idx;
WRITE_IMAGET(output, (int2)(t_out_x, out_y + 1), t_out_data);
}
} else {
WRITE_IMAGET(output, (int2)(out_x + 1, out_y), t_out_data);
}
#endif
}
if (in_hb >= input_hb || in_w >= input_width |
|
in_d
>=
input_depth_blocks
)
{
__kernel void depth_to_space(OUT_OF_RANGE_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3
__read_only image2d_t input,
__private const int input_height,
__private const int input_width,
__private const int block_size,
__private const int output_height,
__private const int output_width,
__private const int output_depth,
__write_only image2d_t output) {
const int out_depth_blk_idx = get_global_id(0);
const int out_width_idx = get_global_id(1);
const int out_hb_idx = get_global_id(2);
#ifndef NON_UNIFORM_WORK_GROUP
if (out_depth_blk_idx >= global_size_dim0 || out_width_idx >= global_size_dim1
|
|
out_hb_idx
>=
global_size_dim2
)
{
return
;
return
;
}
}
#
endif
const
int
batch_idx
=
out_hb_idx
/
output_height
;
const
int
out_height_idx
=
out_hb_idx
-
mul24
(
batch_idx,
output_height
)
;
const
int
in_pos
=
mad24
(
in_d,
input_width,
in_w
)
;
int
in_height_idx
=
out_height_idx
/
block_size
;
DATA_TYPE4
in_data
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_pos,
in_hb
))
;
int
height_idx_in_blk
=
out_height_idx
-
mul24
(
in_height_idx,
block_size
)
;
int
in_width_idx
=
out_width_idx
/
block_size
;
int
width_idx_in_blk
=
out_width_idx
-
mul24
(
in_width_idx,
block_size
)
;
int
in_depth_idx
=
mad24
(
mad24
(
height_idx_in_blk,
block_size,
width_idx_in_blk
)
,
output_depth,
out_depth_blk_idx
<<
2
)
;
int
in_depth_blk_idx
=
in_depth_idx
>>
2
;
int
in_x
=
mad24
(
in_depth_blk_idx,
input_width,
in_width_idx
)
;
int
in_y
=
mad24
(
batch_idx,
input_height,
in_height_idx
)
;
DATA_TYPE4
out_data
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_x,
in_y
))
;
#
ifdef
DEPTH3
DATA_TYPE4
t_out_data
=
out_data
;
int
left_part_size
=
4
-
(
in_depth_idx
&
0x3
)
;
switch
(
left_part_size
)
{
case
1:
out_data.x
=
t_out_data.w
;
break
;
case
2:
out_data.x
=
t_out_data.z
;
out_data.y
=
t_out_data.w
;
break
;
case
3:
out_data.x
=
t_out_data.y
;
out_data.y
=
t_out_data.z
;
out_data.z
=
t_out_data.w
;
break
;
case
4:
out_data.x
=
t_out_data.x
;
out_data.y
=
t_out_data.y
;
out_data.z
=
t_out_data.z
;
break
;
default:
out_data
=
0
;
}
int
right_part_size
=
3
-
left_part_size
;
if
(
right_part_size
>
0
)
{
int
in_depth_blks
=
mul24
(
mul24
(
block_size,
block_size
)
,
3
)
>>
2
;
in_x
=
select
(
-1
,
in_x
+
input_width,
in_depth_blk_idx
+
1
<
in_depth_blks
)
;
t_out_data
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_x,
in_y
))
;
switch
(
right_part_size
)
{
case
2:
out_data.y
=
t_out_data.x
;
out_data.z
=
t_out_data.y
;
break
;
case
1:
out_data.z
=
t_out_data.x
;
break
;
default:
out_data
=
0
;
}
}
out_data.w
=
0
;
#
endif
WRITE_IMAGET
(
output,
(
int2
)(
out_pos,
out_hb
)
,
in_data
)
;
int
out_x
=
mad24
(
out_depth_blk_idx,
output_width,
out_width_idx
)
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x,
out_hb_idx
)
,
out_data
)
;
}
}
mace/ops/opencl/cl/space_to_depth.cl
浏览文件 @
894096bd
...
@@ -3,39 +3,130 @@
...
@@ -3,39 +3,130 @@
__kernel
void
space_to_depth
(
OUT_OF_RANGE_PARAMS
__kernel
void
space_to_depth
(
OUT_OF_RANGE_PARAMS
GLOBAL_WORK_GROUP_SIZE_DIM3
GLOBAL_WORK_GROUP_SIZE_DIM3
__read_only
image2d_t
input,
__read_only
image2d_t
input,
__private
const
int
block_size
,
__private
const
int
input_height
,
__private
const
int
input_width,
__private
const
int
input_width,
__private
const
int
input_depth_blocks,
__private
const
int
input_depth,
__private
const
int
output_hb,
__private
const
int
block_size,
__private
const
int
output_height,
__private
const
int
output_width,
__private
const
int
output_width,
__private
const
int
output_depth_blocks,
__write_only
image2d_t
output
)
{
__write_only
image2d_t
output
)
{
const
int
d
=
get_global_id
(
0
)
;
const
int
out_depth_blk_idx
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
out_width_idx
=
get_global_id
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
const
int
out_hb_idx
=
get_global_id
(
2
)
;
#
ifndef
NON_UNIFORM_WORK_GROUP
#
ifndef
NON_UNIFORM_WORK_GROUP
if
(
d
>=
global_size_dim0
|
| w
>= global_size_dim1
if
(
out_depth_blk_idx
>=
global_size_dim0
|
| out_width_idx
>= global_size_dim1
||
hb
>= global_size_dim2) {
|
|
out_hb_idx
>=
global_size_dim2
)
{
return
;
return
;
}
}
#
endif
#
endif
const int in_pos = mad24(d, input_width, w);
const
int
batch_idx
=
out_hb_idx
/
output_height
;
const
int
out_height_idx
=
out_hb_idx
-
mul24
(
batch_idx,
output_height
)
;
const int out_hb = hb / block_size;
int
out_depth_idx
=
out_depth_blk_idx
<<
2
;
const int offset_h = hb - mul24(out_hb, block_size);
int
in_depth_idx
=
out_depth_idx
%
input_depth
;
const int out_w = w / block_size;
int
hw_block_size
=
out_depth_idx
/
input_depth
;
const int offset_w = w - mul24(out_w, block_size);
int
bottom_width_idx
=
mul24
(
out_width_idx,
block_size
)
;
const int offset_d = mul24(input_depth_blocks, mad24(offset_h, block_size, offset_w));
int
in_width_idx
=
bottom_width_idx
+
(
hw_block_size
%
block_size
)
;
const int out_d = d + offset_d;
int
in_height_idx
=
mad24
(
out_height_idx,
block_size,
hw_block_size
/
block_size
)
;
if (out_d >= output_depth_blocks || out_hb >= output_hb |
|
out_w
>=
output_width
)
{
DATA_TYPE4
in_data
=
0
;
return
;
int
in_x
=
mad24
((
in_depth_idx
>>
2
)
,
input_width,
in_width_idx
)
;
int
in_y
=
mad24
(
batch_idx,
input_height,
in_height_idx
)
;
#
if
defined
(
DEPTH1
)
int
top_width
=
mul24
(
out_width_idx
+
1
,
block_size
)
;
int
top_height
=
mul24
(
out_height_idx
+
1
,
block_size
)
;
DATA_TYPE4
t_in_data
=
0
;
int
t_in_x
=
in_x
;
int
t_width_idx
=
in_width_idx
;
int
t_height_idx
=
in_height_idx
;
DATA_TYPE
*in_data_ptr
=
(
DATA_TYPE*
)(
&in_data
)
;
for
(
int
i
=
0
; i < 4; ++i) {
t_in_data
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
t_in_x,
in_y
))
;
in_data_ptr[i]
=
t_in_data.x
;
if
(
t_width_idx
+
1
>=
top_width
)
{
if
(
t_height_idx
+
1
>=
top_height
)
{
break
;
}
}
t_width_idx
=
bottom_width_idx
;
t_in_x
=
mad24
((
in_depth_idx
>>
2
)
,
input_width,
t_width_idx
)
;
t_height_idx
+=
1
;
in_y
+=
1
;
}
else
{
t_width_idx
+=
1
;
t_in_x
+=
1
;
}
}
#
elif
defined
(
DEPTH2
)
int
top_width
=
mul24
(
out_width_idx
+
1
,
block_size
)
;
int
top_height
=
mul24
(
out_height_idx
+
1
,
block_size
)
;
DATA_TYPE4
t_in_data
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_x,
in_y
))
;
in_data.x
=
t_in_data.x
;
in_data.y
=
t_in_data.y
;
t_in_data
=
0
;
if
(
in_width_idx
+
1
>=
top_width
)
{
if
(
in_height_idx
+
1
<
top_height
)
{
int
t_in_x
=
mad24
((
in_depth_idx
>>
2
)
,
input_width,
bottom_width_idx
)
;
t_in_data
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
t_in_x,
in_y
+
1
))
;
}
}
else
{
t_in_data
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_x
+
1
,
in_y
))
;
}
in_data.z
=
t_in_data.x
;
in_data.w
=
t_in_data.y
;
#
elif
defined
(
DEPTH3
)
int
top_width
=
mul24
(
out_width_idx
+
1
,
block_size
)
;
int
top_height
=
mul24
(
out_height_idx
+
1
,
block_size
)
;
DATA_TYPE4
in_data0
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_x,
in_y
))
;
DATA_TYPE4
in_data1
=
0
;
if
(
in_width_idx
+
1
>=
top_width
)
{
if
(
in_height_idx
+
1
<
top_height
)
{
int
t_in_x
=
mad24
((
in_depth_idx
>>
2
)
,
input_width,
bottom_width_idx
)
;
in_data1
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
t_in_x,
in_y
+
1
))
;
}
}
else
{
in_data1
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_x
+
1
,
in_y
))
;
}
int
left_part_size
=
3
-
in_depth_idx
;
int
right_part_size
=
4
-
left_part_size
;
switch
(
left_part_size
)
{
case
3:
in_data.z
=
in_data0.z
;
in_data.y
=
in_data0.y
;
in_data.x
=
in_data0.x
;
break
;
case
2:
in_data.y
=
in_data0.z
;
in_data.x
=
in_data0.y
;
break
;
case
1:
in_data.x
=
in_data0.z
;
break
;
default:
in_data
=
0
;
}
switch
(
right_part_size
)
{
case
3:
in_data.y
=
in_data1.x
;
in_data.z
=
in_data1.y
;
in_data.w
=
in_data1.z
;
break
;
case
2:
in_data.z
=
in_data1.x
;
in_data.w
=
in_data1.y
;
break
;
case
1:
in_data.w
=
in_data1.x
;
break
;
default:
in_data
=
0
;
}
#
else
in_data
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_x,
in_y
))
;
#
endif
const
int
out_pos
=
mad24
(
out_d,
output_width,
out_w
)
;
const
int
out_x
=
mad24
(
out_depth_blk_idx,
output_width,
out_width_idx
)
;
DATA_TYPE4
in_data
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_pos,
hb
))
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x,
out_hb_idx
)
,
in_data
)
;
WRITE_IMAGET
(
output,
(
int2
)(
out_pos,
out_hb
)
,
in_data
)
;
}
}
mace/ops/opencl/image/depth_to_space.h
浏览文件 @
894096bd
...
@@ -58,19 +58,14 @@ MaceStatus DepthToSpaceKernel<T>::Compute(
...
@@ -58,19 +58,14 @@ MaceStatus DepthToSpaceKernel<T>::Compute(
const
index_t
input_depth
=
input
->
dim
(
3
);
const
index_t
input_depth
=
input
->
dim
(
3
);
MACE_CHECK
(
input_depth
%
(
block_size_
*
block_size_
)
==
0
,
MACE_CHECK
(
input_depth
%
(
block_size_
*
block_size_
)
==
0
,
"input depth should be dividable by block_size * block_size"
,
"input depth should be dividable by block_size * block_size
"
,
input_depth
);
input_depth
);
MACE_CHECK
((
input_depth
%
4
)
==
0
,
"input channel should be dividable by 4"
);
const
index_t
output_height
=
input_height
*
block_size_
;
const
index_t
output_height
=
input_height
*
block_size_
;
const
index_t
output_width
=
input_width
*
block_size_
;
const
index_t
output_width
=
input_width
*
block_size_
;
const
index_t
output_depth
=
input_depth
/
(
block_size_
*
block_size_
);
const
index_t
output_depth
=
input_depth
/
(
block_size_
*
block_size_
);
MACE_CHECK
(
output_depth
%
4
==
0
,
"output channel not support:"
)
MACE_CHECK
(
output_depth
%
4
==
0
||
output_depth
<
4
,
<<
output_depth
;
"output channel not support:"
)
<<
output_depth
;
const
index_t
input_depth_blocks
=
RoundUpDiv4
(
input_depth
);
const
index_t
output_depth_blocks
=
RoundUpDiv4
(
output_depth
);
std
::
vector
<
index_t
>
output_shape
=
{
batch
,
std
::
vector
<
index_t
>
output_shape
=
{
batch
,
output_height
,
output_height
,
...
@@ -82,11 +77,16 @@ MaceStatus DepthToSpaceKernel<T>::Compute(
...
@@ -82,11 +77,16 @@ MaceStatus DepthToSpaceKernel<T>::Compute(
&
image_shape
);
&
image_shape
);
MACE_RETURN_IF_ERROR
(
output
->
ResizeImage
(
output_shape
,
image_shape
));
MACE_RETURN_IF_ERROR
(
output
->
ResizeImage
(
output_shape
,
image_shape
));
const
uint32_t
gws
[
3
]
=
{
uint32_t
gws
[
3
];
static_cast
<
uint32_t
>
(
RoundUpDiv4
(
output_depth
)),
if
(
output_depth
<
3
)
{
static_cast
<
uint32_t
>
(
output_width
),
gws
[
0
]
=
static_cast
<
uint32_t
>
(
RoundUpDiv4
(
input_depth
));
static_cast
<
uint32_t
>
(
output_height
*
batch
)
gws
[
1
]
=
static_cast
<
uint32_t
>
(
input_width
);
};
gws
[
2
]
=
static_cast
<
uint32_t
>
(
input_height
*
batch
);
}
else
{
gws
[
0
]
=
static_cast
<
uint32_t
>
(
RoundUpDiv4
(
output_depth
));
gws
[
1
]
=
static_cast
<
uint32_t
>
(
output_width
);
gws
[
2
]
=
static_cast
<
uint32_t
>
(
output_height
*
batch
);
}
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
auto
runtime
=
context
->
device
()
->
gpu_runtime
()
->
opencl_runtime
();
MACE_OUT_OF_RANGE_DEFINITION
;
MACE_OUT_OF_RANGE_DEFINITION
;
...
@@ -95,6 +95,10 @@ MaceStatus DepthToSpaceKernel<T>::Compute(
...
@@ -95,6 +95,10 @@ MaceStatus DepthToSpaceKernel<T>::Compute(
MACE_OUT_OF_RANGE_CONFIG
;
MACE_OUT_OF_RANGE_CONFIG
;
MACE_NON_UNIFORM_WG_CONFIG
;
MACE_NON_UNIFORM_WG_CONFIG
;
const
char
*
kernel_name
=
"depth_to_space"
;
const
char
*
kernel_name
=
"depth_to_space"
;
if
(
output_depth
<
4
)
{
built_options
.
emplace
(
MakeString
(
"-DDEPTH"
,
output_depth
));
if
(
output_depth
!=
3
)
kernel_name
=
"depth_to_space_d1_d2"
;
}
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
kernel_name
);
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
kernel_name
);
std
::
stringstream
kernel_name_ss
;
std
::
stringstream
kernel_name_ss
;
kernel_name_ss
<<
"-D"
<<
kernel_name
<<
"="
<<
obfuscated_kernel_name
;
kernel_name_ss
<<
"-D"
<<
kernel_name
<<
"="
<<
obfuscated_kernel_name
;
...
@@ -116,18 +120,18 @@ MaceStatus DepthToSpaceKernel<T>::Compute(
...
@@ -116,18 +120,18 @@ MaceStatus DepthToSpaceKernel<T>::Compute(
MACE_OUT_OF_RANGE_SET_ARGS
(
kernel_
);
MACE_OUT_OF_RANGE_SET_ARGS
(
kernel_
);
MACE_SET_3D_GWS_ARGS
(
kernel_
,
gws
);
MACE_SET_3D_GWS_ARGS
(
kernel_
,
gws
);
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
block_size_
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input_height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input_height
*
batch
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input_width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input_width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input_depth_blocks
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
block_size_
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
output_height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
output_width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
output_width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
output_depth
_blocks
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
output_depth
));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
input_shape_
=
input
->
shape
();
}
}
std
::
string
tuning_key
=
Concat
(
"depth_to_space_opencl_kernel
"
,
std
::
string
tuning_key
=
Concat
(
"depth_to_space
"
,
batch
,
output_height
,
batch
,
output_height
,
output_width
,
output_depth
);
output_width
,
output_depth
);
const
std
::
vector
<
uint32_t
>
lws
=
Default3DLocalWS
(
runtime
,
gws
,
kwg_size_
);
const
std
::
vector
<
uint32_t
>
lws
=
Default3DLocalWS
(
runtime
,
gws
,
kwg_size_
);
...
...
mace/ops/opencl/image/space_to_depth.h
浏览文件 @
894096bd
...
@@ -57,7 +57,7 @@ MaceStatus SpaceToDepthKernel<T>::Compute(
...
@@ -57,7 +57,7 @@ MaceStatus SpaceToDepthKernel<T>::Compute(
const
index_t
input_width
=
input
->
dim
(
2
);
const
index_t
input_width
=
input
->
dim
(
2
);
const
index_t
input_depth
=
input
->
dim
(
3
);
const
index_t
input_depth
=
input
->
dim
(
3
);
MACE_CHECK
((
input_depth
%
4
)
==
0
,
MACE_CHECK
(
input_depth
<
4
||
(
input_depth
%
4
)
==
0
,
"input channel should be dividable by 4"
);
"input channel should be dividable by 4"
);
MACE_CHECK
(
MACE_CHECK
(
(
input_width
%
block_size_
==
0
)
&&
(
input_height
%
block_size_
==
0
),
(
input_width
%
block_size_
==
0
)
&&
(
input_height
%
block_size_
==
0
),
...
@@ -67,7 +67,6 @@ MaceStatus SpaceToDepthKernel<T>::Compute(
...
@@ -67,7 +67,6 @@ MaceStatus SpaceToDepthKernel<T>::Compute(
const
index_t
output_width
=
input_width
/
block_size_
;
const
index_t
output_width
=
input_width
/
block_size_
;
const
index_t
output_depth
=
input_depth
*
block_size_
*
block_size_
;
const
index_t
output_depth
=
input_depth
*
block_size_
*
block_size_
;
const
index_t
input_depth_blocks
=
RoundUpDiv4
(
input_depth
);
const
index_t
output_depth_blocks
=
RoundUpDiv4
(
output_depth
);
const
index_t
output_depth_blocks
=
RoundUpDiv4
(
output_depth
);
std
::
vector
<
index_t
>
output_shape
=
{
batch
,
output_height
,
output_width
,
std
::
vector
<
index_t
>
output_shape
=
{
batch
,
output_height
,
output_width
,
...
@@ -90,6 +89,9 @@ MaceStatus SpaceToDepthKernel<T>::Compute(
...
@@ -90,6 +89,9 @@ MaceStatus SpaceToDepthKernel<T>::Compute(
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
kernel_name
);
std
::
string
obfuscated_kernel_name
=
MACE_OBFUSCATE_SYMBOL
(
kernel_name
);
std
::
stringstream
kernel_name_ss
;
std
::
stringstream
kernel_name_ss
;
kernel_name_ss
<<
"-D"
<<
kernel_name
<<
"="
<<
obfuscated_kernel_name
;
kernel_name_ss
<<
"-D"
<<
kernel_name
<<
"="
<<
obfuscated_kernel_name
;
if
(
input_depth
<
4
)
{
built_options
.
emplace
(
MakeString
(
"-DDEPTH"
,
input_depth
));
}
built_options
.
emplace
(
kernel_name_ss
.
str
());
built_options
.
emplace
(
kernel_name_ss
.
str
());
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToCLDt
(
dt
));
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToCLDt
(
dt
));
...
@@ -102,28 +104,28 @@ MaceStatus SpaceToDepthKernel<T>::Compute(
...
@@ -102,28 +104,28 @@ MaceStatus SpaceToDepthKernel<T>::Compute(
static_cast
<
uint32_t
>
(
runtime
->
GetKernelMaxWorkGroupSize
(
kernel_
));
static_cast
<
uint32_t
>
(
runtime
->
GetKernelMaxWorkGroupSize
(
kernel_
));
}
}
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
in
put_depth_blocks
),
const
uint32_t
gws
[
3
]
=
{
static_cast
<
uint32_t
>
(
out
put_depth_blocks
),
static_cast
<
uint32_t
>
(
in
put_width
),
static_cast
<
uint32_t
>
(
out
put_width
),
static_cast
<
uint32_t
>
(
in
put_height
*
batch
)};
static_cast
<
uint32_t
>
(
out
put_height
*
batch
)};
MACE_OUT_OF_RANGE_INIT
(
kernel_
);
MACE_OUT_OF_RANGE_INIT
(
kernel_
);
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
if
(
!
IsVecEqual
(
input_shape_
,
input
->
shape
()))
{
uint32_t
idx
=
0
;
uint32_t
idx
=
0
;
MACE_OUT_OF_RANGE_SET_ARGS
(
kernel_
);
MACE_OUT_OF_RANGE_SET_ARGS
(
kernel_
);
MACE_SET_3D_GWS_ARGS
(
kernel_
,
gws
);
MACE_SET_3D_GWS_ARGS
(
kernel_
,
gws
);
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
input
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
block_size_
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input_height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input_width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input_width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input_depth_blocks
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
input_depth
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
output_height
*
batch
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
block_size_
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
output_height
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
output_width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
output_width
));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
output_depth_blocks
));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
kernel_
.
setArg
(
idx
++
,
*
(
output
->
opencl_image
()));
input_shape_
=
input
->
shape
();
input_shape_
=
input
->
shape
();
}
}
const
std
::
vector
<
uint32_t
>
lws
=
Default3DLocalWS
(
runtime
,
gws
,
kwg_size_
);
const
std
::
vector
<
uint32_t
>
lws
=
Default3DLocalWS
(
runtime
,
gws
,
kwg_size_
);
std
::
string
tuning_key
=
Concat
(
"space_to_depth
_opencl_kernel
"
,
input
->
dim
(
0
),
std
::
string
tuning_key
=
Concat
(
"space_to_depth"
,
input
->
dim
(
0
),
input
->
dim
(
1
),
input
->
dim
(
2
),
input
->
dim
(
3
));
input
->
dim
(
1
),
input
->
dim
(
2
),
input
->
dim
(
3
));
MACE_RETURN_IF_ERROR
(
TuningOrRun3DKernel
(
runtime
,
kernel_
,
tuning_key
,
MACE_RETURN_IF_ERROR
(
TuningOrRun3DKernel
(
runtime
,
kernel_
,
tuning_key
,
gws
,
lws
,
context
->
future
()));
gws
,
lws
,
context
->
future
()));
...
...
mace/tools/benchmark/BUILD.bazel
浏览文件 @
894096bd
...
@@ -28,10 +28,7 @@ cc_binary(
...
@@ -28,10 +28,7 @@ cc_binary(
"//mace/codegen:generated_models"
,
"//mace/codegen:generated_models"
,
"//mace/libmace"
,
"//mace/libmace"
,
"//mace/utils"
,
"//mace/utils"
,
]
+
if_opencl_enabled
([
],
"//mace/codegen:generated_opencl_binary"
,
"//mace/codegen:generated_opencl_parameter"
,
]),
)
)
cc_binary
(
cc_binary
(
...
@@ -60,10 +57,7 @@ cc_binary(
...
@@ -60,10 +57,7 @@ cc_binary(
"//mace/codegen:generated_mace_engine_factory"
,
"//mace/codegen:generated_mace_engine_factory"
,
"//mace/codegen:generated_models"
,
"//mace/codegen:generated_models"
,
"//mace/libmace:libmace_dynamic"
,
"//mace/libmace:libmace_dynamic"
,
]
+
if_opencl_enabled
([
],
"//mace/codegen:generated_opencl_binary"
,
"//mace/codegen:generated_opencl_parameter"
,
]),
)
)
cc_library
(
cc_library
(
...
...
test/ccbenchmark/mace/ops/depth_to_space_benchmark.cc
浏览文件 @
894096bd
...
@@ -78,9 +78,15 @@ void DepthToSpace(
...
@@ -78,9 +78,15 @@ void DepthToSpace(
MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, CPU)
MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, CPU)
#endif
#endif
MACE_BM_DEPTH_TO_SPACE
(
1
,
4
,
512
,
512
,
2
);
MACE_BM_DEPTH_TO_SPACE
(
1
,
8
,
256
,
256
,
2
);
MACE_BM_DEPTH_TO_SPACE
(
1
,
12
,
512
,
512
,
2
);
MACE_BM_DEPTH_TO_SPACE
(
1
,
9
,
512
,
512
,
3
);
MACE_BM_DEPTH_TO_SPACE
(
1
,
18
,
256
,
256
,
3
);
MACE_BM_DEPTH_TO_SPACE
(
1
,
27
,
512
,
512
,
3
);
MACE_BM_DEPTH_TO_SPACE
(
1
,
64
,
64
,
64
,
4
);
MACE_BM_DEPTH_TO_SPACE
(
1
,
64
,
64
,
64
,
4
);
MACE_BM_DEPTH_TO_SPACE
(
1
,
64
,
128
,
128
,
4
);
MACE_BM_DEPTH_TO_SPACE
(
1
,
128
,
128
,
128
,
4
);
MACE_BM_DEPTH_TO_SPACE
(
1
,
64
,
256
,
256
,
4
);
MACE_BM_DEPTH_TO_SPACE
(
1
,
256
,
256
,
256
,
4
);
}
// namespace test
}
// namespace test
}
// namespace ops
}
// namespace ops
...
...
test/ccbenchmark/mace/ops/space_to_depth_benchmark.cc
浏览文件 @
894096bd
...
@@ -29,7 +29,7 @@ void SpaceToDepth(
...
@@ -29,7 +29,7 @@ void SpaceToDepth(
// Add input data
// Add input data
if
(
D
==
DeviceType
::
CPU
)
{
if
(
D
==
DeviceType
::
CPU
)
{
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
channels
,
width
});
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
}
else
if
(
D
==
DeviceType
::
GPU
)
{
}
else
if
(
D
==
DeviceType
::
GPU
)
{
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
}
else
{
}
else
{
...
@@ -78,6 +78,10 @@ void SpaceToDepth(
...
@@ -78,6 +78,10 @@ void SpaceToDepth(
MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, float, CPU)
MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, float, CPU)
#endif
#endif
MACE_BM_SPACE_TO_DEPTH
(
1
,
1
,
513
,
513
,
3
);
MACE_BM_SPACE_TO_DEPTH
(
1
,
2
,
256
,
256
,
2
);
MACE_BM_SPACE_TO_DEPTH
(
1
,
3
,
512
,
512
,
2
);
MACE_BM_SPACE_TO_DEPTH
(
1
,
3
,
513
,
513
,
3
);
MACE_BM_SPACE_TO_DEPTH
(
1
,
64
,
64
,
64
,
4
);
MACE_BM_SPACE_TO_DEPTH
(
1
,
64
,
64
,
64
,
4
);
MACE_BM_SPACE_TO_DEPTH
(
1
,
64
,
128
,
128
,
4
);
MACE_BM_SPACE_TO_DEPTH
(
1
,
64
,
128
,
128
,
4
);
MACE_BM_SPACE_TO_DEPTH
(
1
,
64
,
256
,
256
,
4
);
MACE_BM_SPACE_TO_DEPTH
(
1
,
64
,
256
,
256
,
4
);
...
...
test/ccunit/mace/ops/depth_to_space_test.cc
浏览文件 @
894096bd
...
@@ -61,7 +61,45 @@ void RunDepthToSpace(const std::vector<index_t> &input_shape,
...
@@ -61,7 +61,45 @@ void RunDepthToSpace(const std::vector<index_t> &input_shape,
class
DepthToSpaceOpTest
:
public
OpsTestBase
{};
class
DepthToSpaceOpTest
:
public
OpsTestBase
{};
TEST_F
(
DepthToSpaceOpTest
,
Input1x2x16_B2_CPU
)
{
TEST_F
(
DepthToSpaceOpTest
,
CPUInputDepthLess4
)
{
RunDepthToSpace
<
DeviceType
::
CPU
>
(
{
1
,
1
,
2
,
9
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
},
3
,
{
1
,
3
,
6
,
1
},
{
0
,
1
,
2
,
9
,
10
,
11
,
3
,
4
,
5
,
12
,
13
,
14
,
6
,
7
,
8
,
15
,
16
,
17
});
RunDepthToSpace
<
DeviceType
::
CPU
>
(
{
1
,
1
,
2
,
18
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
,
33
,
34
,
35
},
3
,
{
1
,
3
,
6
,
2
},
{
0
,
1
,
2
,
3
,
4
,
5
,
18
,
19
,
20
,
21
,
22
,
23
,
6
,
7
,
8
,
9
,
10
,
11
,
24
,
25
,
26
,
27
,
28
,
29
,
12
,
13
,
14
,
15
,
16
,
17
,
30
,
31
,
32
,
33
,
34
,
35
});
RunDepthToSpace
<
DeviceType
::
CPU
>
(
{
1
,
1
,
2
,
12
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
},
2
,
{
1
,
2
,
4
,
3
},
{
0
,
1
,
2
,
3
,
4
,
5
,
12
,
13
,
14
,
15
,
16
,
17
,
6
,
7
,
8
,
9
,
10
,
11
,
18
,
19
,
20
,
21
,
22
,
23
});
RunDepthToSpace
<
DeviceType
::
CPU
>
(
{
1
,
1
,
1
,
27
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
},
3
,
{
1
,
3
,
3
,
3
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
});
}
TEST_F
(
DepthToSpaceOpTest
,
CPUInputDepth4
)
{
RunDepthToSpace
<
DeviceType
::
CPU
>
(
RunDepthToSpace
<
DeviceType
::
CPU
>
(
{
1
,
1
,
2
,
16
},
{
1
,
1
,
2
,
16
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
...
@@ -69,9 +107,57 @@ TEST_F(DepthToSpaceOpTest, Input1x2x16_B2_CPU) {
...
@@ -69,9 +107,57 @@ TEST_F(DepthToSpaceOpTest, Input1x2x16_B2_CPU) {
2
,
{
1
,
2
,
4
,
4
},
2
,
{
1
,
2
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
RunDepthToSpace
<
DeviceType
::
CPU
>
(
{
1
,
1
,
1
,
16
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
2
,
{
1
,
2
,
2
,
4
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
});
}
TEST_F
(
DepthToSpaceOpTest
,
OPENCLInputDepth1
)
{
RunDepthToSpace
<
DeviceType
::
GPU
>
(
{
1
,
1
,
2
,
9
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
},
3
,
{
1
,
3
,
6
,
1
},
{
0
,
1
,
2
,
9
,
10
,
11
,
3
,
4
,
5
,
12
,
13
,
14
,
6
,
7
,
8
,
15
,
16
,
17
});
}
TEST_F
(
DepthToSpaceOpTest
,
OPENCLInputDepth2
)
{
RunDepthToSpace
<
DeviceType
::
GPU
>
(
{
1
,
1
,
2
,
18
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
,
33
,
34
,
35
},
3
,
{
1
,
3
,
6
,
2
},
{
0
,
1
,
2
,
3
,
4
,
5
,
18
,
19
,
20
,
21
,
22
,
23
,
6
,
7
,
8
,
9
,
10
,
11
,
24
,
25
,
26
,
27
,
28
,
29
,
12
,
13
,
14
,
15
,
16
,
17
,
30
,
31
,
32
,
33
,
34
,
35
});
}
TEST_F
(
DepthToSpaceOpTest
,
OPENCLInputDepth3
)
{
RunDepthToSpace
<
DeviceType
::
GPU
>
(
{
1
,
1
,
2
,
12
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
},
2
,
{
1
,
2
,
4
,
3
},
{
0
,
1
,
2
,
3
,
4
,
5
,
12
,
13
,
14
,
15
,
16
,
17
,
6
,
7
,
8
,
9
,
10
,
11
,
18
,
19
,
20
,
21
,
22
,
23
});
RunDepthToSpace
<
DeviceType
::
GPU
>
(
{
1
,
1
,
1
,
27
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
},
3
,
{
1
,
3
,
3
,
3
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
});
}
}
TEST_F
(
DepthToSpaceOpTest
,
Input1x2x16_B2_OPENCL
)
{
TEST_F
(
DepthToSpaceOpTest
,
OPENCLInputDepth4
)
{
RunDepthToSpace
<
DeviceType
::
GPU
>
(
RunDepthToSpace
<
DeviceType
::
GPU
>
(
{
1
,
1
,
2
,
16
},
{
1
,
1
,
2
,
16
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
...
@@ -79,29 +165,12 @@ TEST_F(DepthToSpaceOpTest, Input1x2x16_B2_OPENCL) {
...
@@ -79,29 +165,12 @@ TEST_F(DepthToSpaceOpTest, Input1x2x16_B2_OPENCL) {
2
,
{
1
,
2
,
4
,
4
},
2
,
{
1
,
2
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
}
TEST_F
(
DepthToSpaceOpTest
,
Input1x1x16_B2_CPU
)
{
RunDepthToSpace
<
DeviceType
::
CPU
>
(
{
1
,
1
,
1
,
16
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
2
,
{
1
,
2
,
2
,
4
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
});
}
TEST_F
(
DepthToSpaceOpTest
,
Input1x1x16_B2_OPENCL
)
{
RunDepthToSpace
<
DeviceType
::
GPU
>
(
RunDepthToSpace
<
DeviceType
::
GPU
>
(
{
1
,
1
,
1
,
16
},
{
1
,
1
,
1
,
16
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
2
,
{
1
,
2
,
2
,
4
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
2
,
{
1
,
2
,
2
,
4
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
});
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
});
}
}
TEST_F
(
DepthToSpaceOpTest
,
InputLarger_B2_OPENCL
)
{
const
std
::
vector
<
float
>
in
=
std
::
vector
<
float
>
(
192
*
192
*
128
,
1.0
);
RunDepthToSpace
<
DeviceType
::
GPU
>
({
1
,
192
,
192
,
128
},
in
,
2
,
{
1
,
384
,
384
,
32
},
in
);
}
namespace
{
namespace
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
RandomTest
(
const
int
block_size
,
void
RandomTest
(
const
int
block_size
,
...
@@ -149,14 +218,51 @@ void RandomTest(const int block_size,
...
@@ -149,14 +218,51 @@ void RandomTest(const int block_size,
}
}
}
// namespace
}
// namespace
TEST_F
(
DepthToSpaceOpTest
,
OPENCLRandomFloatDepth1
)
{
RandomTest
<
DeviceType
::
GPU
,
float
>
(
2
,
{
1
,
192
,
192
,
4
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
3
,
{
1
,
111
,
111
,
9
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
5
,
{
1
,
20
,
20
,
25
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
7
,
{
1
,
14
,
14
,
49
});
}
TEST_F
(
DepthToSpaceOpTest
,
OPENCLRandomFloatDepth2
)
{
RandomTest
<
DeviceType
::
GPU
,
float
>
(
2
,
{
1
,
192
,
192
,
8
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
3
,
{
1
,
111
,
111
,
18
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
5
,
{
1
,
20
,
20
,
50
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
7
,
{
1
,
14
,
14
,
98
});
}
TEST_F
(
DepthToSpaceOpTest
,
OPENCLRandomFloatDepth3
)
{
RandomTest
<
DeviceType
::
GPU
,
float
>
(
2
,
{
1
,
192
,
192
,
12
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
3
,
{
1
,
111
,
111
,
27
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
5
,
{
1
,
20
,
20
,
75
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
7
,
{
1
,
14
,
14
,
147
});
}
TEST_F
(
DepthToSpaceOpTest
,
OPENCLRandomFloat
)
{
TEST_F
(
DepthToSpaceOpTest
,
OPENCLRandomFloat
)
{
RandomTest
<
DeviceType
::
GPU
,
float
>
(
2
,
{
1
,
192
,
192
,
128
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
2
,
{
1
,
192
,
192
,
16
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
3
,
{
1
,
222
,
222
,
144
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
5
,
{
1
,
100
,
100
,
200
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
7
,
{
1
,
98
,
98
,
196
});
}
}
TEST_F
(
DepthToSpaceOpTest
,
OPENCLRandomHalf
)
{
TEST_F
(
DepthToSpaceOpTest
,
OPENCLRandomHalf
)
{
RandomTest
<
DeviceType
::
GPU
,
half
>
(
2
,
{
1
,
192
,
192
,
128
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
2
,
{
1
,
192
,
192
,
4
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
3
,
{
1
,
111
,
111
,
18
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
5
,
{
1
,
20
,
20
,
75
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
7
,
{
1
,
14
,
14
,
147
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
2
,
{
1
,
384
,
384
,
8
});
}
}
TEST_F
(
DepthToSpaceOpTest
,
OPENCLRandomBatchHalf
)
{
RandomTest
<
DeviceType
::
GPU
,
half
>
(
2
,
{
2
,
192
,
192
,
4
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
3
,
{
3
,
111
,
111
,
18
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
5
,
{
2
,
20
,
20
,
75
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
7
,
{
3
,
14
,
14
,
147
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
2
,
{
2
,
384
,
384
,
8
});
}
}
// namespace test
}
// namespace test
}
// namespace ops
}
// namespace ops
}
// namespace mace
}
// namespace mace
test/ccunit/mace/ops/space_to_depth_test.cc
浏览文件 @
894096bd
...
@@ -61,38 +61,106 @@ void RunSpaceToDepth(const std::vector<index_t> &input_shape,
...
@@ -61,38 +61,106 @@ void RunSpaceToDepth(const std::vector<index_t> &input_shape,
class
SpaceToDepthOpTest
:
public
OpsTestBase
{};
class
SpaceToDepthOpTest
:
public
OpsTestBase
{};
TEST_F
(
SpaceToDepthOpTest
,
Input2x4x4_B2_CPU
)
{
TEST_F
(
SpaceToDepthOpTest
,
CPUInputDepthLess4
)
{
RunSpaceToDepth
<
DeviceType
::
CPU
>
(
RunSpaceToDepth
<
DeviceType
::
CPU
>
(
{
1
,
2
,
4
,
4
},
{
1
,
3
,
6
,
1
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
{
0
,
1
,
2
,
9
,
10
,
11
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
},
3
,
4
,
5
,
12
,
13
,
14
,
2
,
{
1
,
1
,
2
,
16
},
6
,
7
,
8
,
15
,
16
,
17
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
3
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
{
1
,
1
,
2
,
9
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
});
RunSpaceToDepth
<
DeviceType
::
CPU
>
(
{
1
,
3
,
6
,
2
},
{
0
,
1
,
2
,
3
,
4
,
5
,
18
,
19
,
20
,
21
,
22
,
23
,
6
,
7
,
8
,
9
,
10
,
11
,
24
,
25
,
26
,
27
,
28
,
29
,
12
,
13
,
14
,
15
,
16
,
17
,
30
,
31
,
32
,
33
,
34
,
35
},
3
,
{
1
,
1
,
2
,
18
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
,
33
,
34
,
35
});
RunSpaceToDepth
<
DeviceType
::
CPU
>
(
{
1
,
2
,
4
,
3
},
{
0
,
1
,
2
,
3
,
4
,
5
,
12
,
13
,
14
,
15
,
16
,
17
,
6
,
7
,
8
,
9
,
10
,
11
,
18
,
19
,
20
,
21
,
22
,
23
},
2
,
{
1
,
1
,
2
,
12
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
});
RunSpaceToDepth
<
DeviceType
::
CPU
>
(
{
1
,
3
,
3
,
3
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
},
3
,
{
1
,
1
,
1
,
27
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
});
}
}
TEST_F
(
SpaceToDepthOpTest
,
Input2x4x4_B2_OPENCL
)
{
TEST_F
(
SpaceToDepthOpTest
,
CPUInputDepth4
)
{
RunSpaceToDepth
<
DeviceType
::
G
PU
>
(
RunSpaceToDepth
<
DeviceType
::
C
PU
>
(
{
1
,
2
,
4
,
4
},
{
1
,
2
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
},
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
},
2
,
{
1
,
1
,
2
,
16
},
2
,
{
1
,
1
,
2
,
16
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
}
TEST_F
(
SpaceToDepthOpTest
,
Input2x2x4_B2_CPU
)
{
RunSpaceToDepth
<
DeviceType
::
CPU
>
(
RunSpaceToDepth
<
DeviceType
::
CPU
>
(
{
1
,
2
,
2
,
4
},
{
1
,
2
,
2
,
4
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
2
,
{
1
,
1
,
1
,
16
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
2
,
{
1
,
1
,
1
,
16
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
});
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
});
}
}
TEST_F
(
SpaceToDepthOpTest
,
Input4x4x1_B2_OPENCL
)
{
TEST_F
(
SpaceToDepthOpTest
,
OPENCLInputDepth1
)
{
RunSpaceToDepth
<
DeviceType
::
GPU
>
(
RunSpaceToDepth
<
DeviceType
::
GPU
>
(
{
1
,
2
,
2
,
4
},
{
1
,
3
,
6
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
2
,
{
1
,
1
,
1
,
16
},
{
0
,
1
,
2
,
9
,
10
,
11
,
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
});
3
,
4
,
5
,
12
,
13
,
14
,
6
,
7
,
8
,
15
,
16
,
17
},
3
,
{
1
,
1
,
2
,
9
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
});
}
TEST_F
(
SpaceToDepthOpTest
,
OPENCLInputDepth2
)
{
RunSpaceToDepth
<
DeviceType
::
GPU
>
(
{
1
,
3
,
6
,
2
},
{
0
,
1
,
2
,
3
,
4
,
5
,
18
,
19
,
20
,
21
,
22
,
23
,
6
,
7
,
8
,
9
,
10
,
11
,
24
,
25
,
26
,
27
,
28
,
29
,
12
,
13
,
14
,
15
,
16
,
17
,
30
,
31
,
32
,
33
,
34
,
35
},
3
,
{
1
,
1
,
2
,
18
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
,
33
,
34
,
35
});
}
TEST_F
(
SpaceToDepthOpTest
,
OPENCLInputDepth3
)
{
RunSpaceToDepth
<
DeviceType
::
GPU
>
(
{
1
,
2
,
4
,
3
},
{
0
,
1
,
2
,
3
,
4
,
5
,
12
,
13
,
14
,
15
,
16
,
17
,
6
,
7
,
8
,
9
,
10
,
11
,
18
,
19
,
20
,
21
,
22
,
23
},
2
,
{
1
,
1
,
2
,
12
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
});
RunSpaceToDepth
<
DeviceType
::
GPU
>
(
{
1
,
3
,
3
,
3
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
},
3
,
{
1
,
1
,
1
,
27
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
});
}
TEST_F
(
SpaceToDepthOpTest
,
OPENCLInputDepth4
)
{
RunSpaceToDepth
<
DeviceType
::
GPU
>
(
{
1
,
2
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
},
2
,
{
1
,
1
,
2
,
16
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
}
}
namespace
{
namespace
{
...
@@ -141,14 +209,50 @@ void RandomTest(const int block_size,
...
@@ -141,14 +209,50 @@ void RandomTest(const int block_size,
}
}
}
// namespace
}
// namespace
TEST_F
(
SpaceToDepthOpTest
,
OPENCLRandomFloatDepth1
)
{
RandomTest
<
DeviceType
::
GPU
,
float
>
(
2
,
{
1
,
384
,
384
,
1
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
3
,
{
1
,
333
,
333
,
1
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
5
,
{
1
,
100
,
100
,
1
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
7
,
{
1
,
98
,
98
,
1
});
}
TEST_F
(
SpaceToDepthOpTest
,
OPENCLRandomFloatDepth2
)
{
RandomTest
<
DeviceType
::
GPU
,
float
>
(
2
,
{
1
,
384
,
384
,
2
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
3
,
{
1
,
333
,
333
,
2
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
5
,
{
1
,
100
,
100
,
2
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
7
,
{
1
,
98
,
98
,
2
});
}
TEST_F
(
SpaceToDepthOpTest
,
OPENCLRandomFloatDepth3
)
{
RandomTest
<
DeviceType
::
GPU
,
float
>
(
2
,
{
1
,
384
,
384
,
3
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
3
,
{
1
,
333
,
333
,
3
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
5
,
{
1
,
100
,
100
,
3
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
7
,
{
1
,
98
,
98
,
3
});
}
TEST_F
(
SpaceToDepthOpTest
,
OPENCLRandomFloat
)
{
TEST_F
(
SpaceToDepthOpTest
,
OPENCLRandomFloat
)
{
RandomTest
<
DeviceType
::
GPU
,
float
>
(
2
,
{
1
,
384
,
384
,
32
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
2
,
{
1
,
384
,
384
,
4
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
3
,
{
1
,
333
,
333
,
16
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
5
,
{
1
,
100
,
100
,
32
});
RandomTest
<
DeviceType
::
GPU
,
float
>
(
7
,
{
1
,
98
,
98
,
64
});
}
}
TEST_F
(
SpaceToDepthOpTest
,
OPENCLRandomHalf
)
{
TEST_F
(
SpaceToDepthOpTest
,
OPENCLRandomHalf
)
{
RandomTest
<
DeviceType
::
GPU
,
half
>
(
2
,
{
1
,
384
,
384
,
1
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
3
,
{
1
,
333
,
333
,
2
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
5
,
{
1
,
100
,
100
,
3
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
7
,
{
1
,
98
,
98
,
4
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
2
,
{
1
,
384
,
384
,
32
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
2
,
{
1
,
384
,
384
,
32
});
}
}
TEST_F
(
SpaceToDepthOpTest
,
OPENCLBatchRandomHalf
)
{
RandomTest
<
DeviceType
::
GPU
,
half
>
(
2
,
{
2
,
384
,
384
,
1
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
3
,
{
3
,
333
,
333
,
2
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
5
,
{
2
,
100
,
100
,
3
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
7
,
{
3
,
98
,
98
,
4
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
2
,
{
2
,
384
,
384
,
32
});
}
}
// namespace test
}
// namespace test
}
// namespace ops
}
// namespace ops
}
// namespace mace
}
// namespace mace
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录