Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
项目经理老王
Mace
提交
21b43e2b
Mace
项目概览
项目经理老王
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
21b43e2b
编写于
5月 24, 2019
作者:
李
李寅
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'compress' into 'master'
Dequantize weights to half See merge request !1115
上级
23d985f7
23bd28c8
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
105 addition
and
67 deletion
+105
-67
mace/core/quantize.cc
mace/core/quantize.cc
+11
-11
mace/core/quantize.h
mace/core/quantize.h
+35
-35
mace/core/runtime/apu/apu_wrapper.h
mace/core/runtime/apu/apu_wrapper.h
+1
-1
mace/core/runtime/hexagon/hexagon_hta_wrapper.h
mace/core/runtime/hexagon/hexagon_hta_wrapper.h
+1
-1
mace/core/types.h
mace/core/types.h
+10
-0
mace/core/workspace.cc
mace/core/workspace.cc
+37
-14
mace/ops/arm/q8/quantize.cc
mace/ops/arm/q8/quantize.cc
+2
-2
mace/proto/mace.proto
mace/proto/mace.proto
+1
-0
mace/python/tools/model_saver.py
mace/python/tools/model_saver.py
+1
-0
test/ccunit/mace/ops/conv_2d_test.cc
test/ccunit/mace/ops/conv_2d_test.cc
+2
-1
test/ccunit/mace/ops/depthwise_conv2d_test.cc
test/ccunit/mace/ops/depthwise_conv2d_test.cc
+2
-1
test/ccunit/mace/ops/fully_connected_test.cc
test/ccunit/mace/ops/fully_connected_test.cc
+2
-1
未找到文件。
mace/core/quantize.cc
浏览文件 @
21b43e2b
...
...
@@ -23,7 +23,7 @@ namespace mace {
#ifdef MACE_ENABLE_NEON
template
<
>
void
QuantizeUtil
<
uint8_t
>::
QuantizeWithScaleAndZeropoint
(
void
QuantizeUtil
<
float
,
uint8_t
>::
QuantizeWithScaleAndZeropoint
(
const
float
*
input
,
const
index_t
size
,
float
scale
,
...
...
@@ -65,11 +65,11 @@ void QuantizeUtil<uint8_t>::QuantizeWithScaleAndZeropoint(
}
template
<
>
void
QuantizeUtil
<
uint8_t
>::
Dequantize
(
const
uint8_t
*
input
,
const
index_t
size
,
const
float
scale
,
const
int32_t
zero_point
,
float
*
output
)
{
void
QuantizeUtil
<
float
,
uint8_t
>::
Dequantize
(
const
uint8_t
*
input
,
const
index_t
size
,
const
float
scale
,
const
int32_t
zero_point
,
float
*
output
)
{
const
index_t
block_count
=
size
/
16
;
const
int32x4_t
vzero
=
vdupq_n_s32
(
zero_point
);
const
float32x4_t
vscale
=
vdupq_n_f32
(
scale
);
...
...
@@ -104,11 +104,11 @@ void QuantizeUtil<uint8_t>::Dequantize(const uint8_t *input,
}
template
<
>
void
QuantizeUtil
<
int32_t
>::
Dequantize
(
const
int
*
input
,
const
index_t
size
,
const
float
scale
,
const
int32_t
zero_point
,
float
*
output
)
{
void
QuantizeUtil
<
float
,
int32_t
>::
Dequantize
(
const
int
*
input
,
const
index_t
size
,
const
float
scale
,
const
int32_t
zero_point
,
float
*
output
)
{
const
index_t
block_count
=
size
/
4
;
const
int32x4_t
vzero
=
vdupq_n_s32
(
zero_point
);
const
float32x4_t
vscale
=
vdupq_n_f32
(
scale
);
...
...
mace/core/quantize.h
浏览文件 @
21b43e2b
...
...
@@ -25,7 +25,7 @@
namespace
mace
{
template
<
typename
T
>
template
<
typename
Q
>
inline
void
AdjustRange
(
const
float
in_min_data
,
const
float
in_max_data
,
const
bool
non_zero
,
...
...
@@ -33,8 +33,8 @@ inline void AdjustRange(const float in_min_data,
int32_t
*
zero_point
)
{
// re-range to make range include zero float and
// make zero float as integer u8
const
T
quantized_min
=
std
::
numeric_limits
<
T
>::
lowest
();
const
T
quantized_max
=
std
::
numeric_limits
<
T
>::
max
();
const
Q
quantized_min
=
std
::
numeric_limits
<
Q
>::
lowest
();
const
Q
quantized_max
=
std
::
numeric_limits
<
Q
>::
max
();
if
(
quantized_min
<
0
)
{
MACE_ASSERT
(
!
non_zero
,
"Cannot nudge to non_zero quantize value."
);
}
...
...
@@ -65,15 +65,15 @@ inline void AdjustRange(const float in_min_data,
}
}
template
<
typename
T
>
inline
T
Saturate
(
float
value
)
{
template
<
typename
Q
>
inline
Q
Saturate
(
float
value
)
{
int
rounded_value
=
static_cast
<
int
>
(
value
);
if
(
rounded_value
<=
std
::
numeric_limits
<
T
>::
lowest
())
{
return
std
::
numeric_limits
<
T
>::
lowest
();
}
else
if
(
rounded_value
>=
std
::
numeric_limits
<
T
>::
max
())
{
return
std
::
numeric_limits
<
T
>::
max
();
if
(
rounded_value
<=
std
::
numeric_limits
<
Q
>::
lowest
())
{
return
std
::
numeric_limits
<
Q
>::
lowest
();
}
else
if
(
rounded_value
>=
std
::
numeric_limits
<
Q
>::
max
())
{
return
std
::
numeric_limits
<
Q
>::
max
();
}
else
{
return
static_cast
<
T
>
(
rounded_value
);
return
static_cast
<
Q
>
(
rounded_value
);
}
}
...
...
@@ -115,7 +115,7 @@ inline void GetOutputMultiplierAndShift(
MACE_CHECK
(
*
right_shift
>=
0
);
}
template
<
typename
T
>
template
<
typename
F
,
typename
Q
>
class
QuantizeUtil
{
public:
explicit
QuantizeUtil
(
utils
::
ThreadPool
*
thread_pool
)
...
...
@@ -125,11 +125,11 @@ class QuantizeUtil {
const
index_t
size
,
float
scale
,
int32_t
zero_point
,
T
*
output
)
{
Q
*
output
)
{
float
recip_scale
=
1
/
scale
;
thread_pool_
->
Compute1D
([
=
](
index_t
start
,
index_t
end
,
index_t
step
)
{
for
(
index_t
i
=
start
;
i
<
end
;
i
+=
step
)
{
output
[
i
]
=
Saturate
<
T
>
(
roundf
(
zero_point
+
recip_scale
*
input
[
i
]));
output
[
i
]
=
Saturate
<
Q
>
(
roundf
(
zero_point
+
recip_scale
*
input
[
i
]));
}
},
0
,
size
,
1
);
}
...
...
@@ -137,14 +137,14 @@ class QuantizeUtil {
void
Quantize
(
const
float
*
input
,
const
index_t
size
,
bool
non_zero
,
T
*
output
,
Q
*
output
,
float
*
scale
,
int32_t
*
zero_point
)
{
float
in_min_data
;
float
in_max_data
;
FindMinMax
(
input
,
size
,
&
in_min_data
,
&
in_max_data
);
AdjustRange
<
T
>
(
in_min_data
,
in_max_data
,
non_zero
,
AdjustRange
<
Q
>
(
in_min_data
,
in_max_data
,
non_zero
,
scale
,
zero_point
);
QuantizeWithScaleAndZeropoint
(
input
,
size
,
*
scale
,
*
zero_point
,
output
);
...
...
@@ -158,24 +158,24 @@ class QuantizeUtil {
Tensor
::
MappingGuard
input_guard
(
&
input
);
Tensor
::
MappingGuard
output_guard
(
output
);
auto
*
input_data
=
input
.
data
<
float
>
();
auto
*
output_data
=
output
->
mutable_data
<
T
>
();
auto
*
output_data
=
output
->
mutable_data
<
Q
>
();
float
scale
;
int32_t
zero_point
;
Quantize
(
input_data
,
input
.
size
(),
false
,
output_data
,
&
scale
,
&
zero_point
);
*
min_out
=
scale
*
(
std
::
numeric_limits
<
T
>::
lowest
()
-
zero_point
);
*
max_out
=
scale
*
(
std
::
numeric_limits
<
T
>::
max
()
-
zero_point
);
*
min_out
=
scale
*
(
std
::
numeric_limits
<
Q
>::
lowest
()
-
zero_point
);
*
max_out
=
scale
*
(
std
::
numeric_limits
<
Q
>::
max
()
-
zero_point
);
}
void
Dequantize
(
const
T
*
input
,
void
Dequantize
(
const
Q
*
input
,
const
index_t
size
,
const
float
scale
,
const
int32_t
zero_point
,
float
*
output
)
{
F
*
output
)
{
thread_pool_
->
Compute1D
([
=
](
index_t
start
,
index_t
end
,
index_t
step
)
{
for
(
index_t
i
=
start
;
i
<
end
;
i
+=
step
)
{
output
[
i
]
=
scale
*
(
input
[
i
]
-
zero_point
);
output
[
i
]
=
FloatCast
<
F
>
(
scale
*
(
input
[
i
]
-
zero_point
)
);
}
},
0
,
size
,
1
);
}
...
...
@@ -187,12 +187,12 @@ class QuantizeUtil {
MACE_CHECK
(
input
.
size
()
!=
0
);
Tensor
::
MappingGuard
input_guard
(
&
input
);
Tensor
::
MappingGuard
output_guard
(
output
);
auto
*
input_data
=
input
.
data
<
T
>
();
auto
*
output_data
=
output
->
mutable_data
<
float
>
();
auto
*
input_data
=
input
.
data
<
Q
>
();
auto
*
output_data
=
output
->
mutable_data
<
F
>
();
float
scale
;
int32_t
zero_point
;
AdjustRange
<
T
>
(
min_in
,
max_in
,
false
,
&
scale
,
&
zero_point
);
AdjustRange
<
Q
>
(
min_in
,
max_in
,
false
,
&
scale
,
&
zero_point
);
Dequantize
(
input_data
,
input
.
size
(),
scale
,
zero_point
,
output_data
);
}
...
...
@@ -204,7 +204,7 @@ class QuantizeUtil {
#ifdef MACE_ENABLE_NEON
template
<
>
void
QuantizeUtil
<
uint8_t
>::
QuantizeWithScaleAndZeropoint
(
void
QuantizeUtil
<
float
,
uint8_t
>::
QuantizeWithScaleAndZeropoint
(
const
float
*
input
,
const
index_t
size
,
float
scale
,
...
...
@@ -212,18 +212,18 @@ void QuantizeUtil<uint8_t>::QuantizeWithScaleAndZeropoint(
uint8_t
*
output
);
template
<
>
void
QuantizeUtil
<
uint8_t
>::
Dequantize
(
const
uint8_t
*
input
,
const
index_t
size
,
const
float
scale
,
const
int32_t
zero_point
,
float
*
output
);
void
QuantizeUtil
<
float
,
uint8_t
>::
Dequantize
(
const
uint8_t
*
input
,
const
index_t
size
,
const
float
scale
,
const
int32_t
zero_point
,
float
*
output
);
template
<
>
void
QuantizeUtil
<
int32_t
>::
Dequantize
(
const
int
*
input
,
const
index_t
size
,
const
float
scale
,
const
int32_t
zero_point
,
float
*
output
);
void
QuantizeUtil
<
float
,
int32_t
>::
Dequantize
(
const
int
*
input
,
const
index_t
size
,
const
float
scale
,
const
int32_t
zero_point
,
float
*
output
);
#endif
...
...
mace/core/runtime/apu/apu_wrapper.h
浏览文件 @
21b43e2b
...
...
@@ -55,7 +55,7 @@ struct tensor_info {
ApuFrontend
*
frontend
;
std
::
vector
<
tensor_info
>
input_infos
;
std
::
vector
<
tensor_info
>
output_infos
;
QuantizeUtil
<
uint8_t
>
quantize_util_
;
QuantizeUtil
<
float
,
uint8_t
>
quantize_util_
;
};
}
// namespace mace
...
...
mace/core/runtime/hexagon/hexagon_hta_wrapper.h
浏览文件 @
21b43e2b
...
...
@@ -50,7 +50,7 @@ class HexagonHTAWrapper : public HexagonControlWrapper {
void
SetDebugLevel
(
int
level
)
override
;
private:
QuantizeUtil
<
uint8_t
>
quantize_util_
;
QuantizeUtil
<
float
,
uint8_t
>
quantize_util_
;
MACE_DISABLE_COPY_AND_ASSIGN
(
HexagonHTAWrapper
);
};
}
// namespace mace
...
...
mace/core/types.h
浏览文件 @
21b43e2b
...
...
@@ -66,6 +66,16 @@ enum FrameworkType {
CAFFE
=
1
,
};
template
<
typename
T
>
inline
T
FloatCast
(
float
data
)
{
return
data
;
}
template
<
>
inline
half
FloatCast
(
float
data
)
{
return
half_float
::
half_cast
<
half
>
(
data
);
}
}
// namespace mace
#endif // MACE_CORE_TYPES_H_
mace/core/workspace.cc
浏览文件 @
21b43e2b
...
...
@@ -46,6 +46,24 @@ bool HasHalfTensor(const NetDef &net_def) {
return
false
;
}
template
<
typename
T
>
void
DequantizeTensor
(
Device
*
device
,
const
unsigned
char
*
model_data
,
const
ConstTensor
&
const_tensor
,
Tensor
*
output_tensor
)
{
Tensor
::
MappingGuard
guard
(
output_tensor
);
auto
quantized_data
=
reinterpret_cast
<
const
uint8_t
*>
(
model_data
+
const_tensor
.
offset
());
auto
dequantized_data
=
output_tensor
->
mutable_data
<
T
>
();
QuantizeUtil
<
T
,
uint8_t
>
quantize_util
(
&
device
->
cpu_runtime
()
->
thread_pool
());
quantize_util
.
Dequantize
(
quantized_data
,
output_tensor
->
size
(),
const_tensor
.
scale
(),
const_tensor
.
zero_point
(),
dequantized_data
);
}
}
// namespace
Workspace
::
Workspace
()
=
default
;
...
...
@@ -125,10 +143,15 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
}
DataType
dst_data_type
=
const_tensor
.
data_type
();
if
((
device_type
==
DeviceType
::
CPU
&&
const_tensor
.
data_type
()
==
DataType
::
DT_HALF
)
||
(
!
is_quantize_model
&&
const_tensor
.
quantized
()))
{
if
(
device_type
==
DeviceType
::
CPU
&&
const_tensor
.
data_type
()
==
DataType
::
DT_HALF
)
{
dst_data_type
=
DataType
::
DT_FLOAT
;
}
else
if
(
!
is_quantize_model
&&
const_tensor
.
quantized
())
{
if
(
device_type
==
GPU
&&
net_def
.
data_type
()
!=
DataType
::
DT_FLOAT
)
{
dst_data_type
=
DataType
::
DT_HALF
;
}
else
{
dst_data_type
=
DataType
::
DT_FLOAT
;
}
}
std
::
unique_ptr
<
Tensor
>
tensor
(
...
...
@@ -159,17 +182,17 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def,
}
}
else
if
(
!
is_quantize_model
&&
const_tensor
.
quantized
())
{
// uncompress the weights of uint8
Tensor
::
MappingGuard
guard
(
tensor
.
get
());
auto
quantized_data
=
reinterpret_cast
<
const
uint8_t
*>
(
model_data
+
const_tensor
.
offset
());
auto
dequantized_data
=
tensor
->
mutable_data
<
float
>
();
QuantizeUtil
<
uint8_t
>
quantize_util
(
&
device
->
cpu_runtime
()
->
thread_pool
());
quantize_util
.
Dequantize
(
quantized_data
,
tensor
->
size
()
,
const_tensor
.
scale
()
,
const_tensor
.
zero_point
(),
dequantized_data
);
if
(
dst_data_type
!=
DT_FLOAT
)
{
DequantizeTensor
<
half
>
(
device
,
model_data
,
const_tensor
,
tensor
.
get
());
}
else
{
DequantizeTensor
<
float
>
(
device
,
model_data
,
const_tensor
,
tensor
.
get
());
}
}
else
{
tensor
->
CopyBytes
(
model_data
+
const_tensor
.
offset
(),
const_tensor
.
data_size
()
*
...
...
mace/ops/arm/q8/quantize.cc
浏览文件 @
21b43e2b
...
...
@@ -72,7 +72,7 @@ class QuantizeOp<DeviceType::CPU, uint8_t> : public Operation {
private:
bool
non_zero_
;
bool
find_range_every_time_
;
QuantizeUtil
<
uint8_t
>
quantize_util_
;
QuantizeUtil
<
float
,
uint8_t
>
quantize_util_
;
};
template
<
DeviceType
D
,
class
T
>
...
...
@@ -103,7 +103,7 @@ class DequantizeOp<DeviceType::CPU, T> : public Operation {
}
private:
QuantizeUtil
<
T
>
quantize_util_
;
QuantizeUtil
<
float
,
T
>
quantize_util_
;
};
void
RegisterQuantize
(
OpRegistryBase
*
op_registry
)
{
...
...
mace/proto/mace.proto
浏览文件 @
21b43e2b
...
...
@@ -101,6 +101,7 @@ message NetDef {
repeated
OperatorDef
op
=
1
;
repeated
Argument
arg
=
2
;
repeated
ConstTensor
tensors
=
3
;
optional
DataType
data_type
=
4
[
default
=
DT_FLOAT
];
repeated
InputOutputInfo
input_info
=
100
;
repeated
InputOutputInfo
output_info
=
101
;
...
...
mace/python/tools/model_saver.py
浏览文件 @
21b43e2b
...
...
@@ -281,6 +281,7 @@ def save_model(option, net_def, model_checksum, weight_checksum, template_dir,
obfuscate_name
(
option
,
net_def
)
output_dir
=
output_dir
+
'/'
net_def
.
data_type
=
option
.
data_type
# update tensor type
update_tensor_infos
(
net_def
,
option
.
data_type
)
...
...
test/ccunit/mace/ops/conv_2d_test.cc
浏览文件 @
21b43e2b
...
...
@@ -1172,7 +1172,8 @@ void TestQuant(const index_t batch,
auto
bias_data
=
bias
->
data
<
float
>
();
float
bias_scale
=
q_input
->
scale
()
*
q_filter
->
scale
();
std
::
vector
<
int32_t
>
q_bias
(
bias
->
size
());
QuantizeUtil
<
int32_t
>
quantize_util
(
OpTestContext
::
Get
()
->
thread_pool
());
QuantizeUtil
<
float
,
int32_t
>
quantize_util
(
OpTestContext
::
Get
()
->
thread_pool
());
quantize_util
.
QuantizeWithScaleAndZeropoint
(
bias_data
,
bias
->
size
(),
bias_scale
,
0
,
q_bias
.
data
());
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
int32_t
>
(
...
...
test/ccunit/mace/ops/depthwise_conv2d_test.cc
浏览文件 @
21b43e2b
...
...
@@ -440,7 +440,8 @@ void TestQuant(const index_t batch,
auto
bias_data
=
bias
->
data
<
float
>
();
float
bias_scale
=
q_input
->
scale
()
*
q_filter
->
scale
();
std
::
vector
<
int32_t
>
q_bias
(
bias
->
size
());
QuantizeUtil
<
int32_t
>
quantize_util
(
OpTestContext
::
Get
()
->
thread_pool
());
QuantizeUtil
<
float
,
int32_t
>
quantize_util
(
OpTestContext
::
Get
()
->
thread_pool
());
quantize_util
.
QuantizeWithScaleAndZeropoint
(
bias_data
,
bias
->
size
(),
bias_scale
,
0
,
q_bias
.
data
());
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
int32_t
>
(
...
...
test/ccunit/mace/ops/fully_connected_test.cc
浏览文件 @
21b43e2b
...
...
@@ -267,7 +267,8 @@ void QuantRandom(const index_t batch,
float
bias_scale
=
q_input
->
scale
()
*
q_weight
->
scale
();
std
::
vector
<
int32_t
>
q_bias
(
bias
->
size
());
QuantizeUtil
<
int32_t
>
quantize_util
(
OpTestContext
::
Get
()
->
thread_pool
());
QuantizeUtil
<
float
,
int32_t
>
quantize_util
(
OpTestContext
::
Get
()
->
thread_pool
());
quantize_util
.
QuantizeWithScaleAndZeropoint
(
bias_data
,
bias
->
size
(),
bias_scale
,
0
,
q_bias
.
data
());
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
int32_t
>
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录