Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
8c838251
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
8c838251
编写于
3月 21, 2018
作者:
U
Unknown
提交者:
liutuo
3月 23, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add depth to space cpu
上级
a065adb8
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
59 addition
and
37 deletion
+59
-37
mace/kernels/depth_to_space.h
mace/kernels/depth_to_space.h
+23
-6
mace/kernels/opencl/cl/depth_to_space.cl
mace/kernels/opencl/cl/depth_to_space.cl
+18
-15
mace/ops/depth_to_space.cc
mace/ops/depth_to_space.cc
+2
-1
mace/ops/depth_to_space.h
mace/ops/depth_to_space.h
+9
-8
mace/ops/depth_to_space_benchmark.cc
mace/ops/depth_to_space_benchmark.cc
+1
-1
mace/ops/depth_to_space_test.cc
mace/ops/depth_to_space_test.cc
+6
-6
未找到文件。
mace/kernels/depth_to_space.h
浏览文件 @
8c838251
...
...
@@ -13,7 +13,7 @@ namespace kernels {
template
<
DeviceType
D
,
typename
T
>
struct
DepthToSpaceOpFunctor
{
DepthToSpaceOpFunctor
(
const
int
block_size
)
:
block_size_
(
block_size
)
{}
explicit
DepthToSpaceOpFunctor
(
const
int
block_size
)
:
block_size_
(
block_size
)
{}
void
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
...
...
@@ -22,6 +22,13 @@ struct DepthToSpaceOpFunctor {
const
int
input_height
=
input
->
dim
(
1
);
const
int
input_width
=
input
->
dim
(
2
);
const
int
input_depth
=
input
->
dim
(
3
);
std
::
cout
<<
"input shape: {"
<<
batch_size
<<
", "
;
std
::
cout
<<
input_height
<<
", "
;
std
::
cout
<<
input_width
<<
", "
;
std
::
cout
<<
input_depth
<<
", "
;
std
::
cout
<<
"block size= "
<<
block_size_
<<
std
::
endl
;
const
int
block_size_sq
=
block_size_
*
block_size_
;
...
...
@@ -32,11 +39,16 @@ struct DepthToSpaceOpFunctor {
output_shape
[
1
]
=
output_height
;
output_shape
[
2
]
=
output_width
;
output_shape
[
3
]
=
output_depth
;
std
::
cout
<<
"output shape: {"
<<
batch_size
<<
", "
;
std
::
cout
<<
output_height
<<
", "
;
std
::
cout
<<
output_width
<<
", "
;
std
::
cout
<<
output_depth
<<
", "
<<
std
::
endl
;
output
->
Resize
(
output_shape
);
Tensor
::
MappingGuard
logits_guard
(
input
);
Tensor
::
MappingGuard
output_guard
(
output
);
//
Tensor::MappingGuard logits_guard(input);
//
Tensor::MappingGuard output_guard(output);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
...
...
@@ -52,8 +64,8 @@ struct DepthToSpaceOpFunctor {
for
(
int
d
=
0
;
d
<
output_depth
;
++
d
)
{
const
int
in_d
=
d
+
offset_d
;
const
int
o_index
=
((
b
*
output_height
+
h
)
*
output_width
+
w
)
*
output_depth
+
d
;
const
int
i_index
=
((
b
*
input_height
+
in_h
)
*
input_width
+
in_w
)
*
input_depth
+
in_d
;
output_ptr
[
o_index
]
=
input
[
i_index
];
const
int
i_index
=
((
b
*
input_height
+
in_h
)
*
input_width
+
in_w
)
*
input_depth
+
in_d
;
output_ptr
[
o_index
]
=
input
_ptr
[
i_index
];
}
}
}
...
...
@@ -62,7 +74,12 @@ struct DepthToSpaceOpFunctor {
}
const
int
block_size_
;
};
/*
template <>
void DepthToSpaceOpFunctor<DeviceType::NEON, float>::operator()(const Tensor *input,
Tensor *output,
StatsFuture *future);
*/
template
<
typename
T
>
struct
DepthToSpaceOpFunctor
<
DeviceType
::
OPENCL
,
T
>
{
...
...
mace/kernels/opencl/cl/depth_to_space.cl
浏览文件 @
8c838251
#
include
<common.h>
//
assume
channes_per_group
mod
4
=
0
&&
groups
mod
4
==
0
__kernel
void
channel_shuffle
(
__read_only
image2d_t
input,
__private
const
int
groups,
__private
const
int
channels_per_group,
__kernel
void
depth_to-space
(
__read_only
image2d_t
input,
__private
const
int
block_size,
__private
const
int
batch_size,
__private
const
int
input_height,
__private
const
int
input_width,
__private
const
int
input_depth,
__private
const
int
output_height,
__private
const
int
output_width,
__private
const
int
output_depth,
__write_only
image2d_t
output
)
{
const
int
group_chan_blk_idx
=
get_global_id
(
0
)
;
const
int
width_idx
=
get_global_id
(
1
)
;
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
const
int
width
=
get_global_size
(
1
)
;
const
int
hb_idx
=
get_global_id
(
2
)
;
const
int
group_blks
=
groups
/
4
;
const
int
groups_blks_width
=
group_blks
*
width
;
const
int
channels_per_group_blks
=
channels_per_group
/
4
;
const
int
channels_per_group_blks_width
=
channels_per_group_blks
*
width
;
DATA_TYPE4
in_chan_data0,
in_chan_data1,
in_chan_data2,
in_chan_data3
;
DATA_TYPE4
out_chan_data0,
out_chan_data1,
out_chan_data2,
out_chan_data3
;
int
in_x
=
mad24
(
group_chan_blk_idx,
width,
width_idx
)
;
const
int
out_idx
=
mad24
(
ch_blk,
width,
w
)
;
const
int
d
=
out_idx
%
output_depth
;
const
int
out_idx2
=
out_idx
/
output_depth
;
const
int
w
=
out_idx2
%
output_width
for
(
short
g_blk
=
0
; g_blk < group_blks; ++g_blk) {
//
fetch
4
groups,
for
each
group
fetch
4
channels
in_chan_data0
=
READ_IMAGET
(
input,
SAMPLER,
(
int2
)(
in_x,
hb_idx
))
;
...
...
mace/ops/depth_to_space.cc
浏览文件 @
8c838251
...
...
@@ -13,7 +13,7 @@ void Register_DepthToSpace(OperatorRegistry *op_registry) {
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
DepthToSpaceOp
<
DeviceType
::
CPU
,
float
>
);
/*
REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthToSpace")
.Device(DeviceType::OPENCL)
.TypeConstraint<float>("T")
...
...
@@ -25,6 +25,7 @@ void Register_DepthToSpace(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
DepthToSpaceOp<DeviceType::OPENCL, half>);
*/
}
}
// namespace ops
...
...
mace/ops/depth_to_space.h
浏览文件 @
8c838251
...
...
@@ -19,30 +19,31 @@ class DepthToSpaceOp : public Operator<D, T> {
public:
DepthToSpaceOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
op_def
,
ws
),
block_size_
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"block_size"
,
1
)),
functor_
(
this
->
block_size_
)
{}
functor_
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"block_size"
,
1
))
{}
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
MACE_CHECK
(
input
->
dim_size
()
==
4
,
"input dim should be 4"
);
const
int
block_size
=
OperatorBase
::
GetSingleArgument
<
int
>
(
"block_size"
,
1
);
int
input_depth
=
input
->
dim
(
3
);
MACE_CHECK
(
input_depth
%
(
block_size
_
*
block_size_
)
==
0
,
MACE_CHECK
(
input_depth
%
(
block_size
*
block_size
)
==
0
,
"input depth should be dividable by block_size * block_size"
,
input
->
dim
(
3
));
std
::
cout
<<
"arg block_size: "
<<
block_size
<<
std
::
endl
;
functor_
(
input
,
output
,
future
);
return
true
;
}
private:
kernels
::
DepthToSpaceOpFunctor
<
D
,
T
>
functor_
;
protected:
const
int
block_size_
;
OP_INPUT_TAGS
(
INPUT
);
OP_OUTPUT_TAGS
(
OUTPUT
);
private:
kernels
::
DepthToSpaceOpFunctor
<
D
,
T
>
functor_
;
};
...
...
mace/ops/depth_to_space_benchmark.cc
浏览文件 @
8c838251
...
...
@@ -21,7 +21,7 @@ static void DepthToSpace(
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
float
>
(
&
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
OpDefBuilder
(
"DepthToSpace"
,
"DepthToSpaceBM"
)
...
...
mace/ops/depth_to_space_test.cc
浏览文件 @
8c838251
...
...
@@ -17,20 +17,20 @@ TEST_F(DepthToSpaceOpTest, C8G4_CPU) {
OpDefBuilder
(
"DepthToSpace"
,
"DepthToSpaceTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
AddIntArg
(
"block_size"
,
1
)
.
AddIntArg
(
"block_size"
,
2
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
1
,
2
,
8
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
});
"Input"
,
{
1
,
2
,
2
,
4
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
1
,
2
,
8
},
{
0
,
2
,
4
,
6
,
1
,
3
,
5
,
7
,
8
,
10
,
12
,
14
,
9
,
11
,
13
,
15
});
{
1
,
4
,
4
,
1
},
{
1
,
2
,
5
,
6
,
3
,
4
,
7
,
8
,
9
,
10
,
13
,
14
,
11
,
12
,
15
,
16
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -44,7 +44,7 @@ TEST_F(DepthToSpaceOpTest, C16G4_OPENCL) {
"Input"
,
{
1
,
1
,
2
,
16
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
&
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
OpDefBuilder
(
"DepthToSpace"
,
"DepthToSpaceTest"
)
...
...
@@ -57,7 +57,7 @@ TEST_F(DepthToSpaceOpTest, C16G4_OPENCL) {
net
.
RunOp
(
DeviceType
::
OPENCL
);
// Transfer output
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
&
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
// Check
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录