Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
9dc60eb1
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
9dc60eb1
编写于
5月 29, 2019
作者:
L
liyin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add quantized space to depth and depth to space
上级
2b820d8b
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
298 addition
and
7 deletion
+298
-7
mace/ops/depth_to_space.cc
mace/ops/depth_to_space.cc
+78
-3
mace/ops/space_to_depth.cc
mace/ops/space_to_depth.cc
+76
-3
mace/python/tools/converter_tool/transformer.py
mace/python/tools/converter_tool/transformer.py
+3
-1
test/ccunit/mace/ops/depth_to_space_test.cc
test/ccunit/mace/ops/depth_to_space_test.cc
+70
-0
test/ccunit/mace/ops/space_to_depth_test.cc
test/ccunit/mace/ops/space_to_depth_test.cc
+71
-0
未找到文件。
mace/ops/depth_to_space.cc
浏览文件 @
9dc60eb1
...
@@ -25,7 +25,10 @@ namespace mace {
...
@@ -25,7 +25,10 @@ namespace mace {
namespace
ops
{
namespace
ops
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
DepthToSpaceOp
:
public
Operation
{
class
DepthToSpaceOp
;
template
<
>
class
DepthToSpaceOp
<
CPU
,
float
>
:
public
Operation
{
public:
public:
explicit
DepthToSpaceOp
(
OpConstructContext
*
context
)
explicit
DepthToSpaceOp
(
OpConstructContext
*
context
)
:
Operation
(
context
),
:
Operation
(
context
),
...
@@ -55,8 +58,8 @@ class DepthToSpaceOp : public Operation {
...
@@ -55,8 +58,8 @@ class DepthToSpaceOp : public Operation {
Tensor
::
MappingGuard
logits_guard
(
input
);
Tensor
::
MappingGuard
logits_guard
(
input
);
Tensor
::
MappingGuard
output_guard
(
output
);
Tensor
::
MappingGuard
output_guard
(
output
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
float
*
input_ptr
=
input
->
data
<
float
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
float
*
output_ptr
=
output
->
mutable_data
<
float
>
();
for
(
index_t
b
=
0
;
b
<
batch_size
;
++
b
)
{
for
(
index_t
b
=
0
;
b
<
batch_size
;
++
b
)
{
for
(
index_t
d
=
0
;
d
<
output_depth
;
++
d
)
{
for
(
index_t
d
=
0
;
d
<
output_depth
;
++
d
)
{
...
@@ -89,6 +92,73 @@ class DepthToSpaceOp : public Operation {
...
@@ -89,6 +92,73 @@ class DepthToSpaceOp : public Operation {
const
int
block_size_
;
const
int
block_size_
;
};
};
#ifdef MACE_ENABLE_QUANTIZE
template
<
>
class
DepthToSpaceOp
<
CPU
,
uint8_t
>
:
public
Operation
{
public:
explicit
DepthToSpaceOp
(
OpConstructContext
*
context
)
:
Operation
(
context
),
block_size_
(
Operation
::
GetOptionalArg
<
int
>
(
"block_size"
,
1
))
{}
MaceStatus
Run
(
OpContext
*
context
)
override
{
MACE_UNUSED
(
context
);
const
Tensor
*
input
=
this
->
Input
(
0
);
Tensor
*
output
=
this
->
Output
(
0
);
MACE_CHECK
(
input
->
dim_size
()
==
4
,
"input dim should be 4"
);
const
index_t
batch_size
=
input
->
dim
(
0
);
const
index_t
input_depth
=
input
->
dim
(
3
);
const
index_t
input_height
=
input
->
dim
(
1
);
const
index_t
input_width
=
input
->
dim
(
2
);
MACE_CHECK
(
input_depth
%
(
block_size_
*
block_size_
)
==
0
,
"input depth should be dividable by block_size * block_size"
,
input_depth
);
const
index_t
output_depth
=
input_depth
/
(
block_size_
*
block_size_
);
const
index_t
output_width
=
input_width
*
block_size_
;
const
index_t
output_height
=
input_height
*
block_size_
;
std
::
vector
<
index_t
>
output_shape
=
{
batch_size
,
output_height
,
output_width
,
output_depth
};
MACE_RETURN_IF_ERROR
(
output
->
Resize
(
output_shape
));
Tensor
::
MappingGuard
logits_guard
(
input
);
Tensor
::
MappingGuard
output_guard
(
output
);
const
uint8_t
*
input_ptr
=
input
->
data
<
uint8_t
>
();
uint8_t
*
output_ptr
=
output
->
mutable_data
<
uint8_t
>
();
for
(
index_t
b
=
0
;
b
<
batch_size
;
++
b
)
{
for
(
index_t
h
=
0
;
h
<
output_height
;
++
h
)
{
const
index_t
in_h
=
h
/
block_size_
;
const
index_t
offset_h
=
(
h
%
block_size_
);
for
(
int
w
=
0
;
w
<
output_width
;
++
w
)
{
const
index_t
in_w
=
w
/
block_size_
;
const
index_t
offset_w
=
w
%
block_size_
;
const
index_t
offset_d
=
(
offset_h
*
block_size_
+
offset_w
)
*
output_depth
;
for
(
index_t
d
=
0
;
d
<
output_depth
;
++
d
)
{
const
index_t
in_d
=
d
+
offset_d
;
const
index_t
o_index
=
((
b
*
output_height
+
h
)
*
output_width
+
w
)
*
output_depth
+
d
;
const
index_t
i_index
=
((
b
*
input_height
+
in_h
)
*
input_width
+
in_w
)
*
input_depth
+
in_d
;
output_ptr
[
o_index
]
=
input_ptr
[
i_index
];
}
}
}
}
return
MaceStatus
::
MACE_SUCCESS
;
}
private:
const
int
block_size_
;
};
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_OPENCL
template
<
>
template
<
>
class
DepthToSpaceOp
<
DeviceType
::
GPU
,
float
>
:
public
Operation
{
class
DepthToSpaceOp
<
DeviceType
::
GPU
,
float
>
:
public
Operation
{
...
@@ -118,6 +188,11 @@ void RegisterDepthToSpace(OpRegistryBase *op_registry) {
...
@@ -118,6 +188,11 @@ void RegisterDepthToSpace(OpRegistryBase *op_registry) {
MACE_REGISTER_OP
(
op_registry
,
"DepthToSpace"
,
MACE_REGISTER_OP
(
op_registry
,
"DepthToSpace"
,
DepthToSpaceOp
,
DeviceType
::
CPU
,
float
);
DepthToSpaceOp
,
DeviceType
::
CPU
,
float
);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP
(
op_registry
,
"DepthToSpace"
,
DepthToSpaceOp
,
DeviceType
::
CPU
,
uint8_t
);
#endif // MACE_ENABLE_QUANTIZE
MACE_REGISTER_GPU_OP
(
op_registry
,
"DepthToSpace"
,
DepthToSpaceOp
);
MACE_REGISTER_GPU_OP
(
op_registry
,
"DepthToSpace"
,
DepthToSpaceOp
);
}
}
...
...
mace/ops/space_to_depth.cc
浏览文件 @
9dc60eb1
...
@@ -25,7 +25,10 @@ namespace mace {
...
@@ -25,7 +25,10 @@ namespace mace {
namespace
ops
{
namespace
ops
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
SpaceToDepthOp
:
public
Operation
{
class
SpaceToDepthOp
;
template
<
>
class
SpaceToDepthOp
<
CPU
,
float
>
:
public
Operation
{
public:
public:
explicit
SpaceToDepthOp
(
OpConstructContext
*
context
)
explicit
SpaceToDepthOp
(
OpConstructContext
*
context
)
:
Operation
(
context
),
:
Operation
(
context
),
...
@@ -55,8 +58,8 @@ class SpaceToDepthOp : public Operation {
...
@@ -55,8 +58,8 @@ class SpaceToDepthOp : public Operation {
Tensor
::
MappingGuard
logits_guard
(
input
);
Tensor
::
MappingGuard
logits_guard
(
input
);
Tensor
::
MappingGuard
output_guard
(
output
);
Tensor
::
MappingGuard
output_guard
(
output
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
float
*
input_ptr
=
input
->
data
<
float
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
float
*
output_ptr
=
output
->
mutable_data
<
float
>
();
for
(
index_t
b
=
0
;
b
<
batch_size
;
++
b
)
{
for
(
index_t
b
=
0
;
b
<
batch_size
;
++
b
)
{
for
(
index_t
d
=
0
;
d
<
input_depth
;
++
d
)
{
for
(
index_t
d
=
0
;
d
<
input_depth
;
++
d
)
{
...
@@ -87,6 +90,71 @@ class SpaceToDepthOp : public Operation {
...
@@ -87,6 +90,71 @@ class SpaceToDepthOp : public Operation {
const
int
block_size_
;
const
int
block_size_
;
};
};
#ifdef MACE_ENABLE_QUANTIZE
template
<
>
class
SpaceToDepthOp
<
CPU
,
uint8_t
>
:
public
Operation
{
public:
explicit
SpaceToDepthOp
(
OpConstructContext
*
context
)
:
Operation
(
context
),
block_size_
(
Operation
::
GetOptionalArg
<
int
>
(
"block_size"
,
1
))
{}
MaceStatus
Run
(
OpContext
*
context
)
override
{
MACE_UNUSED
(
context
);
const
Tensor
*
input
=
this
->
Input
(
0
);
Tensor
*
output
=
this
->
Output
(
0
);
MACE_CHECK
(
input
->
dim_size
()
==
4
,
"input dim should be 4"
);
const
index_t
batch_size
=
input
->
dim
(
0
);
const
index_t
input_depth
=
input
->
dim
(
3
);
const
index_t
input_height
=
input
->
dim
(
1
);
const
index_t
input_width
=
input
->
dim
(
2
);
MACE_CHECK
(
(
input_width
%
block_size_
==
0
)
&&
(
input_height
%
block_size_
==
0
),
"input width and height should be dividable by block_size"
);
const
index_t
output_depth
=
input_depth
*
block_size_
*
block_size_
;
const
index_t
output_width
=
input_width
/
block_size_
;
const
index_t
output_height
=
input_height
/
block_size_
;
std
::
vector
<
index_t
>
output_shape
=
{
batch_size
,
output_height
,
output_width
,
output_depth
};
MACE_RETURN_IF_ERROR
(
output
->
Resize
(
output_shape
));
Tensor
::
MappingGuard
logits_guard
(
input
);
Tensor
::
MappingGuard
output_guard
(
output
);
const
uint8_t
*
input_ptr
=
input
->
data
<
uint8_t
>
();
uint8_t
*
output_ptr
=
output
->
mutable_data
<
uint8_t
>
();
for
(
index_t
b
=
0
;
b
<
batch_size
;
++
b
)
{
for
(
index_t
h
=
0
;
h
<
input_height
;
++
h
)
{
const
index_t
out_h
=
h
/
block_size_
;
const
index_t
offset_h
=
(
h
%
block_size_
);
for
(
index_t
w
=
0
;
w
<
input_width
;
++
w
)
{
const
index_t
out_w
=
w
/
block_size_
;
const
index_t
offset_w
=
(
w
%
block_size_
);
const
index_t
offset_d
=
(
offset_h
*
block_size_
+
offset_w
)
*
input_depth
;
for
(
index_t
d
=
0
;
d
<
input_depth
;
++
d
)
{
const
index_t
out_d
=
d
+
offset_d
;
const
index_t
o_index
=
((
b
*
output_height
+
out_h
)
*
output_width
+
out_w
)
*
output_depth
+
out_d
;
const
index_t
i_index
=
((
b
*
input_height
+
h
)
*
input_width
+
w
)
*
input_depth
+
d
;
output_ptr
[
o_index
]
=
input_ptr
[
i_index
];
}
}
}
}
return
MaceStatus
::
MACE_SUCCESS
;
}
private:
const
int
block_size_
;
};
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_OPENCL
template
<
>
template
<
>
class
SpaceToDepthOp
<
DeviceType
::
GPU
,
float
>
:
public
Operation
{
class
SpaceToDepthOp
<
DeviceType
::
GPU
,
float
>
:
public
Operation
{
...
@@ -116,6 +184,11 @@ void RegisterSpaceToDepth(OpRegistryBase *op_registry) {
...
@@ -116,6 +184,11 @@ void RegisterSpaceToDepth(OpRegistryBase *op_registry) {
MACE_REGISTER_OP
(
op_registry
,
"SpaceToDepth"
,
MACE_REGISTER_OP
(
op_registry
,
"SpaceToDepth"
,
SpaceToDepthOp
,
DeviceType
::
CPU
,
float
);
SpaceToDepthOp
,
DeviceType
::
CPU
,
float
);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP
(
op_registry
,
"SpaceToDepth"
,
SpaceToDepthOp
,
DeviceType
::
CPU
,
uint8_t
);
#endif // MACE_ENABLE_QUANTIZE
MACE_REGISTER_GPU_OP
(
op_registry
,
"SpaceToDepth"
,
SpaceToDepthOp
);
MACE_REGISTER_GPU_OP
(
op_registry
,
"SpaceToDepth"
,
SpaceToDepthOp
);
}
}
...
...
mace/python/tools/converter_tool/transformer.py
浏览文件 @
9dc60eb1
...
@@ -1819,7 +1819,9 @@ class Transformer(base_converter.ConverterInterface):
...
@@ -1819,7 +1819,9 @@ class Transformer(base_converter.ConverterInterface):
MaceOp
.
Reshape
.
name
,
MaceOp
.
Reshape
.
name
,
MaceOp
.
ResizeBilinear
.
name
,
MaceOp
.
ResizeBilinear
.
name
,
MaceOp
.
BatchToSpaceND
.
name
,
MaceOp
.
BatchToSpaceND
.
name
,
MaceOp
.
SpaceToBatchND
.
name
]:
MaceOp
.
SpaceToBatchND
.
name
,
MaceOp
.
SpaceToDepth
.
name
,
MaceOp
.
DepthToSpace
.
name
]:
del
op
.
quantize_info
[:]
del
op
.
quantize_info
[:]
producer_op
=
self
.
_producer
[
op
.
input
[
0
]]
producer_op
=
self
.
_producer
[
op
.
input
[
0
]]
if
producer_op
.
output
[
0
]
in
self
.
_option
.
input_nodes
:
if
producer_op
.
output
[
0
]
in
self
.
_option
.
input_nodes
:
...
...
test/ccunit/mace/ops/depth_to_space_test.cc
浏览文件 @
9dc60eb1
...
@@ -262,6 +262,76 @@ TEST_F(DepthToSpaceOpTest, OPENCLRandomBatchHalf) {
...
@@ -262,6 +262,76 @@ TEST_F(DepthToSpaceOpTest, OPENCLRandomBatchHalf) {
RandomTest
<
DeviceType
::
GPU
,
half
>
(
2
,
{
2
,
384
,
384
,
8
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
2
,
{
2
,
384
,
384
,
8
});
}
}
namespace
{
void
TestDepthToSpaceQuantize
(
const
int
block_size
,
const
std
::
vector
<
index_t
>
&
shape
)
{
OpsTestNet
net
;
net
.
AddRandomInput
<
CPU
,
float
>
(
"Input"
,
shape
,
false
,
false
,
true
,
-
1.
f
,
1.
f
);
// run cpu
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
DataFormat
::
NHWC
,
"InputNCHW"
,
DataFormat
::
NCHW
);
OpDefBuilder
(
"DepthToSpace"
,
"DepthToSpaceTest"
)
.
Input
(
"InputNCHW"
)
.
AddIntArg
(
"block_size"
,
block_size
)
.
Output
(
"OutputNCHW"
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
CPU
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
DataFormat
::
NCHW
,
"OutputCPU"
,
DataFormat
::
NHWC
);
// run quantize
OpDefBuilder
(
"Quantize"
,
"QuantizeInput"
)
.
Input
(
"Input"
)
.
Output
(
"QuantizedInput"
)
.
OutputType
({
DT_UINT8
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
OpDefBuilder
(
"DepthToSpace"
,
"DepthToSpaceTest"
)
.
Input
(
"QuantizedInput"
)
.
Output
(
"QuantizedOutput"
)
.
AddIntArg
(
"block_size"
,
block_size
)
.
OutputType
({
DT_UINT8
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
Tensor
*
eq_output
=
net
.
GetTensor
(
"QuantizedInput"
);
Tensor
*
q_output
=
net
.
GetTensor
(
"QuantizedOutput"
);
q_output
->
SetScale
(
eq_output
->
scale
());
q_output
->
SetZeroPoint
(
eq_output
->
zero_point
());
OpDefBuilder
(
"Dequantize"
,
"DeQuantizeTest"
)
.
Input
(
"QuantizedOutput"
)
.
Output
(
"DequantizedOutput"
)
.
OutputType
({
DT_FLOAT
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
// Check
ExpectTensorSimilar
<
float
>
(
*
net
.
GetOutput
(
"OutputCPU"
),
*
net
.
GetTensor
(
"DequantizedOutput"
),
0.01
);
}
}
// namespace
TEST_F
(
DepthToSpaceOpTest
,
Quantize
)
{
TestDepthToSpaceQuantize
(
2
,
{
1
,
192
,
192
,
4
});
TestDepthToSpaceQuantize
(
3
,
{
1
,
111
,
111
,
9
});
TestDepthToSpaceQuantize
(
5
,
{
1
,
20
,
20
,
25
});
TestDepthToSpaceQuantize
(
7
,
{
1
,
14
,
14
,
49
});
}
}
// namespace test
}
// namespace test
}
// namespace ops
}
// namespace ops
...
...
test/ccunit/mace/ops/space_to_depth_test.cc
浏览文件 @
9dc60eb1
...
@@ -253,6 +253,77 @@ TEST_F(SpaceToDepthOpTest, OPENCLBatchRandomHalf) {
...
@@ -253,6 +253,77 @@ TEST_F(SpaceToDepthOpTest, OPENCLBatchRandomHalf) {
RandomTest
<
DeviceType
::
GPU
,
half
>
(
2
,
{
2
,
384
,
384
,
32
});
RandomTest
<
DeviceType
::
GPU
,
half
>
(
2
,
{
2
,
384
,
384
,
32
});
}
}
namespace
{
void
TestSpaceToDepthQuantize
(
int
block_size
,
const
std
::
vector
<
index_t
>
&
shape
)
{
OpsTestNet
net
;
net
.
AddRandomInput
<
CPU
,
float
>
(
"Input"
,
shape
,
false
,
false
,
true
,
-
1.
f
,
1.
f
);
// run cpu
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
DataFormat
::
NHWC
,
"InputNCHW"
,
DataFormat
::
NCHW
);
OpDefBuilder
(
"SpaceToDepth"
,
"SpaceToDepthTest"
)
.
Input
(
"InputNCHW"
)
.
AddIntArg
(
"block_size"
,
block_size
)
.
Output
(
"OutputNCHW"
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
CPU
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
DataFormat
::
NCHW
,
"OutputCPU"
,
DataFormat
::
NHWC
);
// run quantize
OpDefBuilder
(
"Quantize"
,
"QuantizeInput"
)
.
Input
(
"Input"
)
.
Output
(
"QuantizedInput"
)
.
OutputType
({
DT_UINT8
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
OpDefBuilder
(
"SpaceToDepth"
,
"SpaceToDepthTest"
)
.
Input
(
"QuantizedInput"
)
.
Output
(
"QuantizedOutput"
)
.
AddIntArg
(
"block_size"
,
block_size
)
.
OutputType
({
DT_UINT8
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
Tensor
*
eq_output
=
net
.
GetTensor
(
"QuantizedInput"
);
Tensor
*
q_output
=
net
.
GetTensor
(
"QuantizedOutput"
);
q_output
->
SetScale
(
eq_output
->
scale
());
q_output
->
SetZeroPoint
(
eq_output
->
zero_point
());
OpDefBuilder
(
"Dequantize"
,
"DeQuantizeTest"
)
.
Input
(
"QuantizedOutput"
)
.
Output
(
"DequantizedOutput"
)
.
OutputType
({
DT_FLOAT
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
// Check
ExpectTensorSimilar
<
float
>
(
*
net
.
GetOutput
(
"OutputCPU"
),
*
net
.
GetTensor
(
"DequantizedOutput"
),
0.01
);
}
TEST_F
(
SpaceToDepthOpTest
,
Quantize
)
{
TestSpaceToDepthQuantize
(
2
,
{
1
,
384
,
384
,
1
});
TestSpaceToDepthQuantize
(
3
,
{
1
,
333
,
333
,
1
});
TestSpaceToDepthQuantize
(
5
,
{
1
,
100
,
100
,
1
});
TestSpaceToDepthQuantize
(
7
,
{
1
,
98
,
98
,
1
});
}
}
// namespace
}
// namespace test
}
// namespace test
}
// namespace ops
}
// namespace ops
}
// namespace mace
}
// namespace mace
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录