Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
3d6439b7
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3d6439b7
编写于
8月 18, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
8月 18, 2020
浏览文件
操作
浏览文件
下载
差异文件
!4668 Not Use ParamValueLite
Merge pull request !4668 from xutianchun/quant_ch
上级
82e8884e
09084d4b
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
231 addition
and
421 deletion
+231
-421
mindspore/lite/src/ir/primitive_t_value.h
mindspore/lite/src/ir/primitive_t_value.h
+4
-1
mindspore/lite/src/param_value_lite.h
mindspore/lite/src/param_value_lite.h
+0
-15
mindspore/lite/tools/anf_exporter/anf_exporter.cc
mindspore/lite/tools/anf_exporter/anf_exporter.cc
+0
-22
mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc
mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc
+11
-11
mindspore/lite/tools/converter/converter.cc
mindspore/lite/tools/converter/converter.cc
+0
-1
mindspore/lite/tools/converter/quantizer/CMakeLists.txt
mindspore/lite/tools/converter/quantizer/CMakeLists.txt
+0
-1
mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
...lite/tools/converter/quantizer/post_training_quantizer.cc
+59
-59
mindspore/lite/tools/converter/quantizer/post_training_quantizer.h
.../lite/tools/converter/quantizer/post_training_quantizer.h
+4
-4
mindspore/lite/tools/converter/quantizer/quantize_util.cc
mindspore/lite/tools/converter/quantizer/quantize_util.cc
+144
-97
mindspore/lite/tools/converter/quantizer/quantize_util.h
mindspore/lite/tools/converter/quantizer/quantize_util.h
+9
-7
mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
+0
-150
mindspore/lite/tools/converter/quantizer/weight_quantizer.h
mindspore/lite/tools/converter/quantizer/weight_quantizer.h
+0
-53
未找到文件。
mindspore/lite/src/ir/primitive_t_value.h
浏览文件 @
3d6439b7
...
@@ -47,7 +47,6 @@ class PrimitiveTValue : public Value {
...
@@ -47,7 +47,6 @@ class PrimitiveTValue : public Value {
}
}
}
}
void
SetInputQuantParam
(
const
std
::
vector
<
std
::
vector
<
schema
::
QuantParamT
>>
&
input_quant_param
)
{
void
SetInputQuantParam
(
const
std
::
vector
<
std
::
vector
<
schema
::
QuantParamT
>>
&
input_quant_param
)
{
this
->
input_quant_param_
=
input_quant_param
;
this
->
input_quant_param_
=
input_quant_param
;
}
}
...
@@ -56,6 +55,10 @@ class PrimitiveTValue : public Value {
...
@@ -56,6 +55,10 @@ class PrimitiveTValue : public Value {
this
->
output_quant_param_
=
output_quant_param
;
this
->
output_quant_param_
=
output_quant_param
;
}
}
void
ClearInputOutputQuantParam
()
{
input_quant_param_
.
clear
();
output_quant_param_
.
clear
();
}
void
AddInputQuantParam
(
std
::
vector
<
schema
::
QuantParamT
>
quant_param
)
{
void
AddInputQuantParam
(
std
::
vector
<
schema
::
QuantParamT
>
quant_param
)
{
this
->
input_quant_param_
.
emplace_back
(
quant_param
);
this
->
input_quant_param_
.
emplace_back
(
quant_param
);
...
...
mindspore/lite/src/param_value_lite.h
浏览文件 @
3d6439b7
...
@@ -25,16 +25,6 @@
...
@@ -25,16 +25,6 @@
#include "ir/dtype/type_id.h"
#include "ir/dtype/type_id.h"
namespace
mindspore
{
namespace
mindspore
{
struct
AnfQuantParam
{
double
scale
;
int32_t
zeroPoint
;
double
min
;
double
max
;
bool
narrowRange
;
bool
inited
;
int32_t
numBits
;
AnfQuantParam
()
:
scale
(
1.0
),
zeroPoint
(
0
),
min
(
0.0
),
max
(
0.0
),
narrowRange
(
false
),
numBits
(
8
),
inited
(
false
)
{}
};
class
ParamValueLite
:
public
Value
{
class
ParamValueLite
:
public
Value
{
public:
public:
ParamValueLite
()
:
tensor_addr_
(
nullptr
),
tensor_size_
(
0
)
{}
ParamValueLite
()
:
tensor_addr_
(
nullptr
),
tensor_size_
(
0
)
{}
...
@@ -59,10 +49,6 @@ class ParamValueLite : public Value {
...
@@ -59,10 +49,6 @@ class ParamValueLite : public Value {
}
}
return
size
;
return
size
;
}
}
std
::
vector
<
std
::
unique_ptr
<
AnfQuantParam
>>
&
quant_param
()
{
return
quant_params_
;
}
void
set_quant_param
(
std
::
unique_ptr
<
AnfQuantParam
>
&
quant_param
)
{
quant_params_
.
emplace_back
(
std
::
move
(
quant_param
));
}
bool
operator
==
(
const
Value
&
other
)
const
override
{
bool
operator
==
(
const
Value
&
other
)
const
override
{
return
this
==
&
other
;
return
this
==
&
other
;
...
@@ -73,7 +59,6 @@ class ParamValueLite : public Value {
...
@@ -73,7 +59,6 @@ class ParamValueLite : public Value {
size_t
tensor_size_
;
size_t
tensor_size_
;
std
::
vector
<
int
>
tensor_shape_
;
std
::
vector
<
int
>
tensor_shape_
;
TypeId
type_id_
;
TypeId
type_id_
;
std
::
vector
<
std
::
unique_ptr
<
AnfQuantParam
>>
quant_params_
;
};
};
using
ParamValueLitePtr
=
std
::
shared_ptr
<
ParamValueLite
>
;
using
ParamValueLitePtr
=
std
::
shared_ptr
<
ParamValueLite
>
;
...
...
mindspore/lite/tools/anf_exporter/anf_exporter.cc
浏览文件 @
3d6439b7
...
@@ -159,16 +159,6 @@ int AnfExporter::ConvertQuantParam(const std::unique_ptr<schema::MetaGraphT> &me
...
@@ -159,16 +159,6 @@ int AnfExporter::ConvertQuantParam(const std::unique_ptr<schema::MetaGraphT> &me
primitive
->
GetPrimitiveT
()
->
value
.
AsQuantDTypeCast
()
->
dstT
==
kNumberTypeFloat32
))
{
primitive
->
GetPrimitiveT
()
->
value
.
AsQuantDTypeCast
()
->
dstT
==
kNumberTypeFloat32
))
{
tensor_output
->
dataType
=
kNumberTypeInt8
;
tensor_output
->
dataType
=
kNumberTypeInt8
;
}
}
// // TensorType
// valuePtr = primitive->GetAttr(kInputTensorDataType);
// if (valuePtr != nullptr) {
// MS_LOG(INFO) << "node: " << node->name << " input tensor data
// type: " << GetValue<int>(valuePtr); for (auto input :
// node->inputIndex) {
// auto tensor = subGraph->allTensors[input].get();
// tensor->dataType = kNumberTypeUInt8;
// }
// }
}
}
return
RET_OK
;
return
RET_OK
;
}
}
...
@@ -295,18 +285,6 @@ int AnfExporter::ConvertInputParameter(const std::shared_ptr<AnfNode> input_anod
...
@@ -295,18 +285,6 @@ int AnfExporter::ConvertInputParameter(const std::shared_ptr<AnfNode> input_anod
paramTensor
->
nodeType
=
schema
::
NodeType_ValueNode
;
paramTensor
->
nodeType
=
schema
::
NodeType_ValueNode
;
paramTensor
->
data
.
resize
(
paramValue
->
tensor_size
());
paramTensor
->
data
.
resize
(
paramValue
->
tensor_size
());
memcpy
(
paramTensor
->
data
.
data
(),
paramValue
->
tensor_addr
(),
paramValue
->
tensor_size
());
memcpy
(
paramTensor
->
data
.
data
(),
paramValue
->
tensor_addr
(),
paramValue
->
tensor_size
());
for
(
auto
&
ite
:
paramValue
->
quant_param
())
{
auto
quantPar
=
std
::
make_unique
<
schema
::
QuantParamT
>
();
quantPar
->
scale
=
ite
->
scale
;
quantPar
->
zeroPoint
=
ite
->
zeroPoint
;
quantPar
->
min
=
ite
->
zeroPoint
;
quantPar
->
max
=
ite
->
max
;
quantPar
->
narrowRange
=
ite
->
narrowRange
;
quantPar
->
inited
=
ite
->
inited
;
quantPar
->
numBits
=
ite
->
numBits
;
paramTensor
->
quantParams
.
emplace_back
(
std
::
move
(
quantPar
));
paramTensor
->
dataType
=
paramValue
->
tensor_type
();
}
}
}
node_id_map_
[
paramNode
->
fullname_with_scope
()]
=
meta_graphT
->
allTensors
.
size
();
node_id_map_
[
paramNode
->
fullname_with_scope
()]
=
meta_graphT
->
allTensors
.
size
();
output_cnode
->
inputIndex
.
emplace_back
(
meta_graphT
->
allTensors
.
size
());
output_cnode
->
inputIndex
.
emplace_back
(
meta_graphT
->
allTensors
.
size
());
...
...
mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc
浏览文件 @
3d6439b7
...
@@ -61,17 +61,17 @@ int AnfImporterFromMetaGraphT::ConverterConstTensor() {
...
@@ -61,17 +61,17 @@ int AnfImporterFromMetaGraphT::ConverterConstTensor() {
param_value
->
set_tensor_addr
(
tensor_data
);
param_value
->
set_tensor_addr
(
tensor_data
);
param_value
->
set_tensor_size
(
size
);
param_value
->
set_tensor_size
(
size
);
}
}
if
(
!
tensor
->
quantParams
.
empty
())
{
//
if (!tensor->quantParams.empty()) {
std
::
unique_ptr
<
AnfQuantParam
>
quantParam
=
std
::
make_unique
<
AnfQuantParam
>
();
//
std::unique_ptr<AnfQuantParam> quantParam = std::make_unique<AnfQuantParam>();
quantParam
->
scale
=
tensor
->
quantParams
[
0
]
->
scale
;
//
quantParam->scale = tensor->quantParams[0]->scale;
quantParam
->
zeroPoint
=
tensor
->
quantParams
[
0
]
->
zeroPoint
;
//
quantParam->zeroPoint = tensor->quantParams[0]->zeroPoint;
quantParam
->
min
=
tensor
->
quantParams
[
0
]
->
min
;
//
quantParam->min = tensor->quantParams[0]->min;
quantParam
->
max
=
tensor
->
quantParams
[
0
]
->
max
;
//
quantParam->max = tensor->quantParams[0]->max;
quantParam
->
narrowRange
=
tensor
->
quantParams
[
0
]
->
narrowRange
;
//
quantParam->narrowRange = tensor->quantParams[0]->narrowRange;
quantParam
->
numBits
=
tensor
->
quantParams
[
0
]
->
numBits
;
//
quantParam->numBits = tensor->quantParams[0]->numBits;
quantParam
->
inited
=
tensor
->
quantParams
[
0
]
->
inited
;
//
quantParam->inited = tensor->quantParams[0]->inited;
param_value
->
set_quant_param
(
quantParam
);
//
param_value->set_quant_param(quantParam);
}
//
}
parameter
->
set_default_param
(
param_value
);
parameter
->
set_default_param
(
param_value
);
AddNode
(
i
,
parameter
);
AddNode
(
i
,
parameter
);
}
}
...
...
mindspore/lite/tools/converter/converter.cc
浏览文件 @
3d6439b7
...
@@ -31,7 +31,6 @@
...
@@ -31,7 +31,6 @@
#include "tools/anf_exporter/anf_exporter.h"
#include "tools/anf_exporter/anf_exporter.h"
#include "tools/anf_importer/import_from_protobuf.h"
#include "tools/anf_importer/import_from_protobuf.h"
#include "tools/converter/parser/onnx/onnx.pb.h"
#include "tools/converter/parser/onnx/onnx.pb.h"
#include "tools/converter/quantizer/weight_quantizer.h"
#include "tools/converter/quantizer/post_training_quantizer.h"
#include "tools/converter/quantizer/post_training_quantizer.h"
#include "tools/converter/quantizer/quant_cast.h"
#include "tools/converter/quantizer/quant_cast.h"
...
...
mindspore/lite/tools/converter/quantizer/CMakeLists.txt
浏览文件 @
3d6439b7
...
@@ -7,7 +7,6 @@ add_library(quantizer_mid OBJECT
...
@@ -7,7 +7,6 @@ add_library(quantizer_mid OBJECT
${
CMAKE_CURRENT_SOURCE_DIR
}
/calc_quant_param.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/calc_quant_param.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/quantizer.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/quantizer.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/aware_quantizer.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/aware_quantizer.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/weight_quantizer.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/quantize_util.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/quantize_util.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/general_bitpacking.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/general_bitpacking.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/post_training_quantizer.cc
${
CMAKE_CURRENT_SOURCE_DIR
}
/post_training_quantizer.cc
...
...
mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
浏览文件 @
3d6439b7
...
@@ -510,7 +510,6 @@ STATUS PostTrainingQuantizer::DoQuantInput(double scale, int zeropoint, struct M
...
@@ -510,7 +510,6 @@ STATUS PostTrainingQuantizer::DoQuantInput(double scale, int zeropoint, struct M
quant_param
.
narrowRange
=
false
;
quant_param
.
narrowRange
=
false
;
std
::
vector
<
schema
::
QuantParamT
>
quant_params
=
{
quant_param
};
std
::
vector
<
schema
::
QuantParamT
>
quant_params
=
{
quant_param
};
lite_primitive
->
AddInputQuantParam
(
quant_params
);
lite_primitive
->
AddInputQuantParam
(
quant_params
);
// p->AddAttr("quant_input_dataType", MakeValue((int)DataType_DT_FLOAT));
return
RET_OK
;
return
RET_OK
;
}
}
...
@@ -528,51 +527,67 @@ STATUS PostTrainingQuantizer::DoQuantOutput(double scale, int zeropoint, struct
...
@@ -528,51 +527,67 @@ STATUS PostTrainingQuantizer::DoQuantOutput(double scale, int zeropoint, struct
quant_param
.
narrowRange
=
false
;
quant_param
.
narrowRange
=
false
;
std
::
vector
<
schema
::
QuantParamT
>
quant_params
=
{
quant_param
};
std
::
vector
<
schema
::
QuantParamT
>
quant_params
=
{
quant_param
};
lite_primitive
->
AddOutputQuantParam
(
quant_params
);
lite_primitive
->
AddOutputQuantParam
(
quant_params
);
// p->AddAttr("quant_output_dataType", MakeValue((int)DataType_DT_FLOAT));
return
RET_OK
;
return
RET_OK
;
}
}
STATUS
PostTrainingQuantizer
::
DoWeightQuant
(
AnfNodePtr
node
)
{
STATUS
PostTrainingQuantizer
::
DoWeightQuant
(
AnfNodePtr
weight
,
std
::
shared_ptr
<
PrimitiveTValue
>
primitiveT_value
,
bool
depthwise
)
{
// const vector<int> dims = filter->dims;
// const vector<int> dims = filter->dims;
// perlayer
// perlayer
if
(
!
node
->
isa
<
Parameter
>
())
{
if
(
!
weight
->
isa
<
Parameter
>
())
{
MS_LOG
(
ERROR
)
<<
"not a parameter"
;
MS_LOG
(
ERROR
)
<<
"not a parameter"
;
return
RET_PARAM_INVALID
;
return
RET_PARAM_INVALID
;
}
}
auto
parameter
=
std
::
dynamic_pointer_cast
<
Parameter
>
(
node
);
auto
parameter
=
std
::
dynamic_pointer_cast
<
Parameter
>
(
weight
);
ParamValueLitePtr
paramValue
=
std
::
dynamic_pointer_cast
<
ParamValueLite
>
(
parameter
->
default_param
());
ParamValueLitePtr
paramValue
=
std
::
dynamic_pointer_cast
<
ParamValueLite
>
(
parameter
->
default_param
());
auto
status
=
QuantFilter
(
paramValue
,
QuantType_PostTraining
,
quant_max
,
quant_min
,
bit_num
,
per_channel_
);
auto
status
=
QuantFilter
(
paramValue
,
primitiveT_value
,
QuantType_PostTraining
,
quant_max
,
quant_min
,
bit_num
,
per_channel_
,
depthwise
);
if
(
status
!=
RET_OK
)
{
if
(
status
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"QuantFilter failed: "
<<
status
;
MS_LOG
(
ERROR
)
<<
"QuantFilter failed: "
<<
status
;
return
status
;
return
status
;
}
}
// set dtype
auto
abstractBase
=
parameter
->
abstract
();
if
(
abstractBase
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Abstract of parameter is nullptr, "
<<
parameter
->
name
();
return
RET_ERROR
;
}
if
(
!
utils
::
isa
<
abstract
::
AbstractTensorPtr
>
(
abstractBase
))
{
MS_LOG
(
ERROR
)
<<
"Abstract of parameter should be anstract tensor, "
<<
parameter
->
name
();
return
RET_ERROR
;
}
auto
abstractTensor
=
utils
::
cast
<
abstract
::
AbstractTensorPtr
>
(
abstractBase
);
abstractTensor
->
element
()
->
set_type
(
TypeIdToType
(
kNumberTypeInt8
));
return
RET_OK
;
return
RET_OK
;
}
}
STATUS
PostTrainingQuantizer
::
DoBiasQuant
(
std
::
shared_ptr
<
PrimitiveTValue
>
input
,
AnfNodePtr
weight
,
AnfNodePtr
bias
)
{
STATUS
PostTrainingQuantizer
::
DoBiasQuant
(
AnfNodePtr
bias
,
std
::
shared_ptr
<
PrimitiveTValue
>
primitiveT_value
)
{
if
(
input
==
nullptr
||
weight
==
nullptr
||
bias
==
nullptr
)
{
if
(
primitiveT_value
==
nullptr
||
bias
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"null pointer!"
;
MS_LOG
(
ERROR
)
<<
"null pointer!"
;
return
RET_NULL_PTR
;
return
RET_NULL_PTR
;
}
}
ParameterPtr
weightParameterPtr
=
std
::
dynamic_pointer_cast
<
Parameter
>
(
weight
);
auto
bias_parameter_ptr
=
std
::
dynamic_pointer_cast
<
Parameter
>
(
bias
);
auto
default_param
=
weightParameterPtr
->
default_param
();
auto
bias_default_param
=
bias_parameter_ptr
->
default_param
();
auto
weight_param
=
std
::
dynamic_pointer_cast
<
ParamValueLite
>
(
default_param
);
// std::vector<std::unique_ptr<mindspore::QuantParamT>> weight_quant_params = weight_param->get_quant_params();
ParameterPtr
biasParameterPtr
=
std
::
dynamic_pointer_cast
<
Parameter
>
(
bias
);
auto
bias_default_param
=
biasParameterPtr
->
default_param
();
auto
bias_param
=
std
::
dynamic_pointer_cast
<
ParamValueLite
>
(
bias_default_param
);
auto
bias_param
=
std
::
dynamic_pointer_cast
<
ParamValueLite
>
(
bias_default_param
);
auto
active_weight_quant_params
=
primitiveT_value
->
GetInputQuantParams
();
if
(
active_weight_quant_params
.
size
()
!=
2
)
{
MS_LOG
(
ERROR
)
<<
"unexpected active_weight_quant_params size: "
<<
active_weight_quant_params
.
size
();
return
RET_ERROR
;
}
auto
active_params
=
active_weight_quant_params
[
0
];
auto
weight_params
=
active_weight_quant_params
[
1
];
vector
<
double
>
input_scales
;
vector
<
double
>
input_scales
;
vector
<
double
>
filter_scales
;
vector
<
double
>
filter_scales
;
vector
<
double
>
bias_scales
;
vector
<
double
>
bias_scales
;
auto
quant_params
=
input
->
GetInputQuantParams
();
size_t
sizeX
=
active_params
.
size
();
size_t
sizeX
=
quant_params
.
size
();
for
(
size_t
i
=
0
;
i
<
sizeX
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
sizeX
;
i
++
)
{
input_scales
.
emplace_back
(
quant_params
[
i
].
front
()
.
scale
);
input_scales
.
emplace_back
(
active_params
[
i
]
.
scale
);
}
}
size_t
sizeY
=
weight_param
->
quant_param
()
.
size
();
size_t
sizeY
=
weight_param
s
.
size
();
if
(
sizeX
!=
sizeY
)
{
if
(
sizeX
!=
sizeY
)
{
if
(
sizeX
>
1
&&
sizeY
>
1
)
{
if
(
sizeX
>
1
&&
sizeY
>
1
)
{
MS_LOG
(
ERROR
)
<<
"input and filter's scale count cannot match!"
;
MS_LOG
(
ERROR
)
<<
"input and filter's scale count cannot match!"
;
...
@@ -580,8 +595,7 @@ STATUS PostTrainingQuantizer::DoBiasQuant(std::shared_ptr<PrimitiveTValue> input
...
@@ -580,8 +595,7 @@ STATUS PostTrainingQuantizer::DoBiasQuant(std::shared_ptr<PrimitiveTValue> input
}
}
}
}
for
(
size_t
i
=
0
;
i
<
sizeY
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
sizeY
;
i
++
)
{
auto
scale
=
weight_param
->
quant_param
()[
i
]
->
scale
;
filter_scales
.
emplace_back
(
weight_params
[
i
].
scale
);
filter_scales
.
push_back
(
scale
);
}
}
size_t
size
=
std
::
max
(
sizeX
,
sizeY
);
size_t
size
=
std
::
max
(
sizeX
,
sizeY
);
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
...
@@ -593,20 +607,22 @@ STATUS PostTrainingQuantizer::DoBiasQuant(std::shared_ptr<PrimitiveTValue> input
...
@@ -593,20 +607,22 @@ STATUS PostTrainingQuantizer::DoBiasQuant(std::shared_ptr<PrimitiveTValue> input
size_t
shape_size
=
bias_param
->
tensor_shape_size
();
size_t
shape_size
=
bias_param
->
tensor_shape_size
();
// set bias quant param
// set bias quant param
bias_param
->
quant_param
().
clear
()
;
vector
<
schema
::
QuantParamT
>
quant_params
;
for
(
size_t
i
=
0
;
i
<
bias_scales
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
bias_scales
.
size
();
i
++
)
{
std
::
unique_ptr
<
AnfQuantParam
>
param
(
new
(
std
::
nothrow
)
AnfQuantParam
());
schema
::
QuantParamT
quant_param
;
param
->
scale
=
bias_scales
[
i
];
quant_param
.
scale
=
bias_scales
[
i
];
param
->
zeroPoint
=
0
;
quant_param
.
zeroPoint
=
0
;
bias_param
->
quant_param
().
emplace_back
(
std
::
move
(
param
));
quant_param
.
inited
=
true
;
quant_params
.
emplace_back
(
quant_param
);
}
}
primitiveT_value
->
AddInputQuantParam
(
quant_params
);
// quant bias data
// quant bias data
int32_t
*
quant_datas
=
new
(
std
::
nothrow
)
int32_t
[
shape_size
];
int32_t
*
quant_datas
=
new
(
std
::
nothrow
)
int32_t
[
shape_size
];
if
(
quant_datas
==
nullptr
)
{
if
(
quant_datas
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"null pointer dereferencing."
;
MS_LOG
(
ERROR
)
<<
"null pointer dereferencing."
;
return
RET_NULL_PTR
;
return
RET_NULL_PTR
;
}
}
float
*
raw_datas
=
reinterpret
_cast
<
float
*>
(
bias_param
->
tensor_addr
());
float
*
raw_datas
=
static
_cast
<
float
*>
(
bias_param
->
tensor_addr
());
double
bias_scale_tmp
;
double
bias_scale_tmp
;
for
(
size_t
i
=
0
;
i
<
shape_size
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
shape_size
;
i
++
)
{
if
(
bias_scales
.
size
()
==
1
)
{
if
(
bias_scales
.
size
()
==
1
)
{
...
@@ -625,38 +641,21 @@ STATUS PostTrainingQuantizer::DoBiasQuant(std::shared_ptr<PrimitiveTValue> input
...
@@ -625,38 +641,21 @@ STATUS PostTrainingQuantizer::DoBiasQuant(std::shared_ptr<PrimitiveTValue> input
return
RET_ERROR
;
return
RET_ERROR
;
}
}
delete
[]
quant_datas
;
delete
[]
quant_datas
;
bias_param
->
set_tensor_type
(
kNumberTypeInt32
);
// set dtype
auto
abstractBase
=
bias_parameter_ptr
->
abstract
();
if
(
abstractBase
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Abstract of parameter is nullptr, "
<<
bias_parameter_ptr
->
name
();
return
RET_ERROR
;
}
if
(
!
utils
::
isa
<
abstract
::
AbstractTensorPtr
>
(
abstractBase
))
{
MS_LOG
(
ERROR
)
<<
"Abstract of parameter should be anstract tensor, "
<<
bias_parameter_ptr
->
name
();
return
RET_ERROR
;
}
auto
abstractTensor
=
utils
::
cast
<
abstract
::
AbstractTensorPtr
>
(
abstractBase
);
abstractTensor
->
element
()
->
set_type
(
TypeIdToType
(
kNumberTypeInt32
));
return
RET_OK
;
return
RET_OK
;
}
}
// STATUS PostTrainingQuantizer::reformatConvWeight(GraphDefT *graph) {
// for (auto &subGraph : graphDefT->subgraphs) {
// for (auto iter = subGraph->nodes.begin(); iter != subGraph->nodes.end(); iter++) {
// OpDefT *node = (*iter).get();
// bool isConv = false;
// kTransFilterType tansType;
// if ((*node).attr.type == OpT_Conv2D) {
// tansType = kKCHW2HWCK;
// isConv = true;
// }
// else if ((*node).attr.type == OpT_DepthwiseConv2D) {
// tansType = kCKHW2HWCK;
// isConv = true;
// }
// if (isConv) {
// auto status = TransFilterFormat<uint8_t>(&(*subGraph.get()->allTensors.at(node->inputIndex[1])),
// tansType);
// if (status != RET_OK) {
// return status;
// }
// TensorDefT *weight = subGraph->allTensors.at(node->inputIndex[1]).get();
// weight->format = Format_HWCK;
// PostBitPack(weight, bitNum);
// }
// }
// }
//}
STATUS
PostTrainingQuantizer
::
QuantNode
()
{
STATUS
PostTrainingQuantizer
::
QuantNode
()
{
auto
input_min_max
=
this
->
calibrator_
->
GetMinMax
(
this
->
calibrator_
->
GetInputDivergInfo
());
auto
input_min_max
=
this
->
calibrator_
->
GetMinMax
(
this
->
calibrator_
->
GetInputDivergInfo
());
auto
input_scale
=
this
->
calibrator_
->
GetResult
(
this
->
calibrator_
->
GetInputDivergInfo
());
auto
input_scale
=
this
->
calibrator_
->
GetResult
(
this
->
calibrator_
->
GetInputDivergInfo
());
...
@@ -682,7 +681,7 @@ STATUS PostTrainingQuantizer::QuantNode() {
...
@@ -682,7 +681,7 @@ STATUS PostTrainingQuantizer::QuantNode() {
primitiveT_value
->
SetQuantType
(
schema
::
QuantType_QUANT_NONE
);
primitiveT_value
->
SetQuantType
(
schema
::
QuantType_QUANT_NONE
);
continue
;
continue
;
}
}
auto
input_vec
=
cnode
->
inputs
();
primitiveT_value
->
ClearInputOutputQuantParam
();
auto
op_name
=
cnode
->
fullname_with_scope
();
auto
op_name
=
cnode
->
fullname_with_scope
();
auto
op_type
=
primitiveT_value
->
GetPrimitiveT
()
->
value
.
type
;
auto
op_type
=
primitiveT_value
->
GetPrimitiveT
()
->
value
.
type
;
MS_LOG
(
INFO
)
<<
"OpName: "
<<
op_name
;
MS_LOG
(
INFO
)
<<
"OpName: "
<<
op_name
;
...
@@ -711,11 +710,12 @@ STATUS PostTrainingQuantizer::QuantNode() {
...
@@ -711,11 +710,12 @@ STATUS PostTrainingQuantizer::QuantNode() {
DoQuantInput
(
scale
,
convInputzeropoint
,
&
input_min_max
[
cnode
],
primitiveT_value
);
DoQuantInput
(
scale
,
convInputzeropoint
,
&
input_min_max
[
cnode
],
primitiveT_value
);
// do weight quant
// do weight quant
auto
weight
=
cnode
->
input
(
2
);
auto
weight
=
cnode
->
input
(
2
);
DoWeightQuant
(
weight
);
bool
depthwise
=
op_type
==
PrimitiveType_DeDepthwiseConv2D
;
DoWeightQuant
(
weight
,
primitiveT_value
,
depthwise
);
// do bias quant
// do bias quant
if
(
cnode
->
inputs
().
size
()
==
4
)
{
if
(
cnode
->
inputs
().
size
()
==
4
)
{
auto
bias
=
cnode
->
input
(
3
);
auto
bias
=
cnode
->
input
(
3
);
DoBiasQuant
(
primitiveT_value
,
weight
,
bias
);
DoBiasQuant
(
bias
,
primitiveT_value
);
}
}
}
}
// do output quant
// do output quant
...
...
mindspore/lite/tools/converter/quantizer/post_training_quantizer.h
浏览文件 @
3d6439b7
...
@@ -65,8 +65,8 @@ class PostTrainingQuantizer : public Quantizer {
...
@@ -65,8 +65,8 @@ class PostTrainingQuantizer : public Quantizer {
STATUS
DoQuantize
(
FuncGraphPtr
funcGraph
)
override
;
STATUS
DoQuantize
(
FuncGraphPtr
funcGraph
)
override
;
size_t
bit_num
;
size_t
bit_num
;
int
quant_max
{
127
};
int
quant_max
{
INT8_MAX
};
int
quant_min
{
-
128
};
int
quant_min
{
INT8_MIN
};
private:
private:
bool
per_channel_
;
bool
per_channel_
;
...
@@ -96,9 +96,9 @@ class PostTrainingQuantizer : public Quantizer {
...
@@ -96,9 +96,9 @@ class PostTrainingQuantizer : public Quantizer {
STATUS
DoQuantInput
(
double
scale
,
int32_t
zeropoint
,
struct
MaxMin
*
max_min
,
std
::
shared_ptr
<
PrimitiveTValue
>
);
STATUS
DoQuantInput
(
double
scale
,
int32_t
zeropoint
,
struct
MaxMin
*
max_min
,
std
::
shared_ptr
<
PrimitiveTValue
>
);
STATUS
DoQuantOutput
(
double
scale
,
int32_t
zeropoint
,
struct
MaxMin
*
max_min
,
std
::
shared_ptr
<
PrimitiveTValue
>
);
STATUS
DoQuantOutput
(
double
scale
,
int32_t
zeropoint
,
struct
MaxMin
*
max_min
,
std
::
shared_ptr
<
PrimitiveTValue
>
);
STATUS
DoWeightQuant
(
AnfNodePtr
nod
e
);
STATUS
DoWeightQuant
(
AnfNodePtr
weight
,
std
::
shared_ptr
<
PrimitiveTValue
>
primitiveT_value
,
bool
depthwis
e
);
STATUS
DoBiasQuant
(
std
::
shared_ptr
<
PrimitiveTValue
>
input
,
AnfNodePtr
weight
,
AnfNodePtr
bias
);
STATUS
DoBiasQuant
(
AnfNodePtr
bias
,
std
::
shared_ptr
<
PrimitiveTValue
>
primitiveT_value
);
};
};
struct
DivergInfo
;
struct
DivergInfo
;
...
...
mindspore/lite/tools/converter/quantizer/quantize_util.cc
浏览文件 @
3d6439b7
...
@@ -99,7 +99,9 @@ bool QuantStrategy::CanOpPostQuantized(AnfNodePtr &node) const {
...
@@ -99,7 +99,9 @@ bool QuantStrategy::CanOpPostQuantized(AnfNodePtr &node) const {
schema
::
PrimitiveType_Nchw2Nhwc
,
schema
::
PrimitiveType_Nhwc2Nchw
,
schema
::
PrimitiveType_Nchw2Nhwc
,
schema
::
PrimitiveType_Nhwc2Nchw
,
schema
::
PrimitiveType_Conv2D
,
schema
::
PrimitiveType_DepthwiseConv2D
,
schema
::
PrimitiveType_Conv2D
,
schema
::
PrimitiveType_DepthwiseConv2D
,
schema
::
PrimitiveType_Add
,
schema
::
PrimitiveType_Pooling
,
schema
::
PrimitiveType_Add
,
schema
::
PrimitiveType_Pooling
,
schema
::
PrimitiveType_Concat
,
/*schema::PrimitiveType_SoftMax,*/
schema
::
PrimitiveType_Reshape
,
schema
::
PrimitiveType_Concat
,
/*schema::PrimitiveType_SoftMax,*/
schema
::
PrimitiveType_Reshape
,
/*schema::PrimitiveType_FullConnection,*/
schema
::
PrimitiveType_MatMul
,
schema
::
PrimitiveType_Activation
};
schema
::
PrimitiveType_Activation
};
return
IsContain
(
uint8OpList
,
type
);
return
IsContain
(
uint8OpList
,
type
);
}
}
...
@@ -191,7 +193,7 @@ void CalFakeNode(const AnfNodePtr &inTensor) {
...
@@ -191,7 +193,7 @@ void CalFakeNode(const AnfNodePtr &inTensor) {
// }
// }
}
}
STATUS
CalQuantizationParams
(
s
td
::
unique_ptr
<
AnfQuantParam
>
&
quantParam
,
double
mMin
,
double
mMax
,
bool
narrowRange
,
STATUS
CalQuantizationParams
(
s
chema
::
QuantParamT
*
quantParam
,
double
mMin
,
double
mMax
,
bool
narrowRange
,
int
quant_max
,
int
quant_min
,
int
num_bits
)
{
int
quant_max
,
int
quant_min
,
int
num_bits
)
{
MS_ASSERT
(
quantParam
!=
nullptr
);
MS_ASSERT
(
quantParam
!=
nullptr
);
if
(
mMin
>
0.0
f
)
{
if
(
mMin
>
0.0
f
)
{
...
@@ -306,133 +308,178 @@ STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, doubl
...
@@ -306,133 +308,178 @@ STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, doubl
return
RET_OK
;
return
RET_OK
;
}
}
STATUS
QuantFilter
(
ParamValueLitePtr
&
weightPtr
,
QuantType
quantType
,
int
quant_max
,
int
quant_min
,
size_t
bitNum
,
STATUS
QuantFilter
(
ParamValueLitePtr
weight
,
std
::
shared_ptr
<
PrimitiveTValue
>
primitiveT_value
,
QuantType
quantType
,
bool
per_channel
)
{
int
quant_max
,
int
quant_min
,
size_t
bitNum
,
bool
per_channel
,
bool
depth_wise
)
{
auto
dims
=
weightPtr
->
tensor_shape
();
auto
dims
=
weight
->
tensor_shape
();
if
(
dims
.
size
()
!=
4
)
{
if
(
per_channel
)
{
MS_LOG
(
ERROR
)
<<
"weight dims size error: "
<<
dims
.
size
()
<<
" Back to per layer."
;
if
(
dims
.
size
()
!=
4
)
{
per_channel
=
false
;
MS_LOG
(
ERROR
)
<<
"weight dims size error: "
<<
dims
.
size
()
<<
" Back to per layer."
;
}
else
{
per_channel
=
false
;
uint32_t
channels
=
dims
[
0
];
}
else
{
if
(
channels
==
0
)
{
uint32_t
channels
=
dims
[
0
];
MS_LOG
(
ERROR
)
<<
"channels is 0"
;
if
(
channels
==
0
)
{
return
RET_ERROR
;
MS_LOG
(
ERROR
)
<<
"channels is 0"
;
return
RET_ERROR
;
}
}
}
}
}
vector
<
schema
::
QuantParamT
>
quant_params
;
size_t
elem_count
=
weight
->
tensor_shape_size
();
auto
*
raw_datas
=
static_cast
<
float
*>
(
weight
->
tensor_addr
());
if
(
raw_datas
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"rawDatas is nullptr"
;
return
RET_ERROR
;
}
vector
<
int8_t
>
quant_datas
(
elem_count
);
if
(
per_channel
)
{
if
(
per_channel
)
{
// notice:
// notice:
// at now for tflite model, Conv2D's weight format is KHWC, so is DepthwiseConv2D
// at now for tflite model, Conv2D's weight format is KHWC, so is DepthwiseConv2D
// if TransWeightFormat is done before PostTraingingQuantization, the DepthwiseCon2D's weight is CHWK
// if TransWeightFormat is done before PostTraingingQuantization, the DepthwiseCon2D's weight is CHWK
size_t
shapeSize
=
weightPtr
->
tensor_shape_size
();
if
(
depth_wise
)
{
auto
channels
=
dims
[
0
];
// channel at last
size_t
oneFilterSize
=
shapeSize
/
channels
;
auto
channels
=
dims
[
3
];
auto
*
rawDatas
=
reinterpret_cast
<
const
float
*>
(
weightPtr
->
tensor_addr
());
if
(
channels
==
0
)
{
if
(
rawDatas
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"channels is zero"
;
MS_LOG
(
ERROR
)
<<
"rawDatas is nullptr"
;
return
RET_ERROR
;
return
RET_ERROR
;
}
}
size_t
one_filter_size
=
elem_count
/
channels
;
float
min
=
FLT_MAX
;
for
(
uint32_t
i
=
0
;
i
<
channels
;
i
++
)
{
float
max
=
-
FLT_MAX
;
float
min
=
FLT_MAX
;
weightPtr
->
quant_param
().
clear
();
float
max
=
-
FLT_MAX
;
vector
<
int8_t
>
qDatas
(
shapeSize
);
// find min and max
for
(
uint32_t
j
=
0
;
j
<
one_filter_size
;
j
++
)
{
for
(
uint32_t
i
=
0
;
i
<
channels
;
i
++
)
{
auto
index
=
i
+
j
*
channels
;
// find min and max
if
(
index
>=
elem_count
)
{
for
(
uint32_t
j
=
0
;
j
<
oneFilterSize
;
j
++
)
{
MS_LOG
(
ERROR
)
<<
"over flow!"
;
auto
index
=
j
+
i
*
channels
;
return
RET_ERROR
;
if
(
index
>=
shapeSize
)
{
}
MS_LOG
(
ERROR
)
<<
"over flow!"
;
min
=
std
::
min
(
min
,
raw_datas
[
index
]);
return
RET_ERROR
;
max
=
std
::
max
(
max
,
raw_datas
[
index
]);
}
schema
::
QuantParamT
quant_param
;
STATUS
status
=
CalQuantizationParams
(
&
quant_param
,
min
,
max
,
false
,
quant_max
,
quant_min
,
bitNum
);
if
(
status
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"CalQuantizationParams failed"
<<
status
;
return
status
;
}
}
min
=
std
::
min
(
min
,
rawDatas
[
index
]);
quant_params
.
emplace_back
(
quant_param
);
max
=
std
::
max
(
max
,
rawDatas
[
index
]);
// do quantization
for
(
uint32_t
j
=
0
;
j
<
one_filter_size
;
j
++
)
{
auto
index
=
i
+
j
*
channels
;
if
(
index
>=
elem_count
)
{
MS_LOG
(
ERROR
)
<<
"over flow!"
;
return
RET_ERROR
;
}
float
raw_data
=
raw_datas
[
index
];
auto
quant_data
=
QuantizeData
<
int8_t
>
(
raw_data
,
quant_param
,
quant_max
,
quant_min
);
quant_datas
[
index
]
=
quant_data
;
}
}
auto
ret
=
memcpy_s
(
const_cast
<
float
*>
(
raw_datas
),
weight
->
tensor_size
(),
quant_datas
.
data
(),
elem_count
*
sizeof
(
int8_t
));
if
(
ret
!=
EOK
)
{
MS_LOG
(
ERROR
)
<<
"memcpy error: "
<<
ret
;
return
RET_ERROR
;
}
}
std
::
unique_ptr
<
AnfQuantParam
>
quantParam
=
std
::
unique_ptr
<
AnfQuantParam
>
(
new
AnfQuantParam
);
if
(
quantType
==
QuantType_WeightQuant
)
{
STATUS
status
=
CalQuantizationParams
(
quantParam
,
min
,
max
,
false
,
quant_max
,
quant_min
,
bitNum
);
PostBitPack
(
const_cast
<
float
*>
(
raw_datas
),
elem_count
,
bitNum
);
if
(
status
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"CalQuantizationParams failed"
<<
status
;
return
status
;
}
}
// do quantization
for
(
uint32_t
j
=
0
;
j
<
oneFilterSize
;
j
++
)
{
weight
->
set_tensor_size
(
elem_count
*
sizeof
(
int8_t
));
auto
index
=
j
+
i
*
channels
;
}
else
{
if
(
index
>=
shapeSize
)
{
// channel at first
MS_LOG
(
ERROR
)
<<
"over flow!"
;
auto
channels
=
dims
[
0
];
return
RET_ERROR
;
if
(
channels
==
0
)
{
MS_LOG
(
ERROR
)
<<
"channels is zero"
;
return
RET_ERROR
;
}
size_t
one_filter_size
=
elem_count
/
channels
;
for
(
uint32_t
i
=
0
;
i
<
channels
;
i
++
)
{
float
min
=
FLT_MAX
;
float
max
=
-
FLT_MAX
;
// find min and max
for
(
uint32_t
j
=
0
;
j
<
one_filter_size
;
j
++
)
{
auto
index
=
j
+
i
*
one_filter_size
;
if
(
index
>=
elem_count
)
{
MS_LOG
(
ERROR
)
<<
"over flow!"
;
return
RET_ERROR
;
}
min
=
std
::
min
(
min
,
raw_datas
[
index
]);
max
=
std
::
max
(
max
,
raw_datas
[
index
]);
}
schema
::
QuantParamT
quant_param
;
STATUS
status
=
CalQuantizationParams
(
&
quant_param
,
min
,
max
,
false
,
quant_max
,
quant_min
,
bitNum
);
if
(
status
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"CalQuantizationParams failed"
<<
status
;
return
status
;
}
quant_params
.
emplace_back
(
quant_param
);
// do quantization
for
(
uint32_t
j
=
0
;
j
<
one_filter_size
;
j
++
)
{
auto
index
=
j
+
i
*
one_filter_size
;
if
(
index
>=
elem_count
)
{
MS_LOG
(
ERROR
)
<<
"over flow!"
;
return
RET_ERROR
;
}
float
raw_data
=
raw_datas
[
index
];
auto
quant_data
=
QuantizeData
<
int8_t
>
(
raw_data
,
quant_param
,
quant_max
,
quant_min
);
quant_datas
[
index
]
=
quant_data
;
}
}
float
rawData
=
rawDatas
[
index
];
auto
qData
=
QuantizeData
<
int8_t
>
(
rawData
,
quantParam
.
get
(),
quant_max
,
quant_min
);
qDatas
[
index
]
=
qData
;
}
}
weightPtr
->
set_quant_param
(
quantParam
);
auto
ret
=
}
memcpy_s
(
raw_datas
,
weight
->
tensor_size
(),
quant_datas
.
data
(),
elem_count
*
sizeof
(
int8_t
));
auto
ret
=
if
(
ret
!=
EOK
)
{
memcpy_s
(
const_cast
<
float
*>
(
rawDatas
),
weightPtr
->
tensor_size
(),
qDatas
.
data
(),
shapeSize
*
sizeof
(
int8_t
))
;
MS_LOG
(
ERROR
)
<<
"memcpy error: "
<<
ret
;
if
(
ret
!=
EOK
)
{
return
RET_ERROR
;
MS_LOG
(
ERROR
)
<<
"memcpy error: "
<<
ret
;
}
return
RET_ERROR
;
if
(
quantType
==
QuantType_WeightQuant
)
{
}
PostBitPack
(
const_cast
<
float
*>
(
raw_datas
),
elem_count
,
bitNum
);
if
(
quantType
==
QuantType_WeightQuant
)
{
}
PostBitPack
(
const_cast
<
float
*>
(
rawDatas
),
shapeSize
,
bitNum
);
weight
->
set_tensor_size
(
elem_count
*
sizeof
(
int8_t
)
);
}
}
weightPtr
->
set_tensor_type
(
kNumberTypeInt8
);
weightPtr
->
set_tensor_size
(
shapeSize
*
sizeof
(
int8_t
));
}
else
{
}
else
{
// per layer
// per layer
size_t
shapeSize
=
weightPtr
->
tensor_shape_size
();
float
min
=
FLT_MAX
;
auto
*
rawDatas
=
static_cast
<
float
*>
(
weightPtr
->
tensor_addr
());
float
max
=
-
FLT_MIN
;
if
(
rawDatas
==
nullptr
)
{
for
(
uint32_t
i
=
0
;
i
<
elem_count
;
i
++
)
{
MS_LOG
(
ERROR
)
<<
"rawDatas is nullptr"
;
return
RET_ERROR
;
}
weightPtr
->
quant_param
().
clear
();
vector
<
int8_t
>
qDatas
(
shapeSize
);
float
min
=
0
;
float
max
=
0
;
for
(
uint32_t
i
=
0
;
i
<
shapeSize
;
i
++
)
{
// find max min
// find max min
min
=
std
::
min
(
min
,
raw
D
atas
[
i
]);
min
=
std
::
min
(
min
,
raw
_d
atas
[
i
]);
max
=
std
::
max
(
max
,
raw
D
atas
[
i
]);
max
=
std
::
max
(
max
,
raw
_d
atas
[
i
]);
}
}
s
td
::
unique_ptr
<
AnfQuantParam
>
quantParam
=
std
::
unique_ptr
<
AnfQuantParam
>
(
new
AnfQuantParam
)
;
s
chema
::
QuantParamT
quant_param
;
STATUS
status
=
CalQuantizationParams
(
quantP
aram
,
min
,
max
,
false
,
quant_max
,
quant_min
,
bitNum
);
STATUS
status
=
CalQuantizationParams
(
&
quant_p
aram
,
min
,
max
,
false
,
quant_max
,
quant_min
,
bitNum
);
if
(
status
!=
RET_OK
)
{
if
(
status
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"CalQuantizationParams failed"
<<
status
;
MS_LOG
(
ERROR
)
<<
"CalQuantizationParams failed"
<<
status
;
return
status
;
return
status
;
}
}
quant_params
.
emplace_back
(
quant_param
);
// update data and datatype
// update data and datatype
for
(
uint32_t
i
=
0
;
i
<
shapeSize
;
i
++
)
{
for
(
uint32_t
i
=
0
;
i
<
elem_count
;
i
++
)
{
float
rawData
=
rawDatas
[
i
];
float
raw_data
=
raw_datas
[
i
];
auto
quant_data
=
std
::
round
(
rawData
/
quantParam
->
scale
+
quantParam
->
zeroPoint
);
auto
quant_data
=
QuantizeData
<
int8_t
>
(
raw_data
,
quant_param
,
quant_max
,
quant_min
);
if
(
quant_data
>
quant_max
)
{
quant_datas
[
i
]
=
quant_data
;
qDatas
[
i
]
=
quant_max
;
}
else
if
(
quant_data
<
quant_min
)
{
qDatas
[
i
]
=
quant_min
;
}
else
{
qDatas
[
i
]
=
static_cast
<
int8_t
>
(
quant_data
);
}
}
}
auto
ret
=
memcpy_s
(
raw_datas
,
weight
->
tensor_size
(),
quant_datas
.
data
(),
elem_count
*
sizeof
(
int8_t
));
weightPtr
->
set_quant_param
(
quantParam
);
auto
ret
=
memcpy_s
(
rawDatas
,
weightPtr
->
tensor_size
(),
qDatas
.
data
(),
shapeSize
*
sizeof
(
int8_t
));
if
(
ret
!=
EOK
)
{
if
(
ret
!=
EOK
)
{
MS_LOG
(
ERROR
)
<<
"memcpy error: "
<<
ret
;
MS_LOG
(
ERROR
)
<<
"memcpy error: "
<<
ret
;
return
RET_ERROR
;
return
RET_ERROR
;
}
}
if
(
quantType
==
QuantType_WeightQuant
)
{
if
(
quantType
==
QuantType_WeightQuant
)
{
PostBitPack
(
raw
Datas
,
shapeSize
,
bitNum
);
PostBitPack
(
raw
_datas
,
elem_count
,
bitNum
);
}
}
weight
->
set_tensor_size
(
elem_count
*
sizeof
(
int8_t
));
weightPtr
->
set_tensor_type
(
kNumberTypeInt8
);
weightPtr
->
set_tensor_size
(
shapeSize
*
sizeof
(
int8_t
));
}
}
if
(
quant_params
.
empty
())
{
MS_LOG
(
ERROR
)
<<
"quant_params empty"
;
return
RET_ERROR
;
}
primitiveT_value
->
AddInputQuantParam
(
quant_params
);
return
RET_OK
;
return
RET_OK
;
}
}
...
...
mindspore/lite/tools/converter/quantizer/quantize_util.h
浏览文件 @
3d6439b7
...
@@ -29,6 +29,7 @@
...
@@ -29,6 +29,7 @@
#include "ir/primitive.h"
#include "ir/primitive.h"
#include "abstract/dshape.h"
#include "abstract/dshape.h"
#include "mindspore/lite/tools/converter/quantizer/quantizer.h"
#include "mindspore/lite/tools/converter/quantizer/quantizer.h"
#include "mindspore/lite/src/ir/primitive_t_value.h"
namespace
mindspore
{
namespace
mindspore
{
namespace
lite
{
namespace
lite
{
...
@@ -58,7 +59,7 @@ class QuantStrategy {
...
@@ -58,7 +59,7 @@ class QuantStrategy {
static
const
std
::
array
<
std
::
string
,
4
>
mMulTypes
;
static
const
std
::
array
<
std
::
string
,
4
>
mMulTypes
;
};
};
STATUS
CalQuantizationParams
(
s
td
::
unique_ptr
<
AnfQuantParam
>
&
quantParam
,
double
mMin
,
double
mMax
,
STATUS
CalQuantizationParams
(
s
chema
::
QuantParamT
*
quantParam
,
double
mMin
,
double
mMax
,
bool
narrowRange
,
int
quant_max
,
int
quant_min
,
int
num_bits
);
bool
narrowRange
,
int
quant_max
,
int
quant_min
,
int
num_bits
);
STATUS
CalQuantizationParams
(
schema
::
QuantParamT
*
quantParam
,
double
mMin
,
double
mMax
,
STATUS
CalQuantizationParams
(
schema
::
QuantParamT
*
quantParam
,
double
mMin
,
double
mMax
,
...
@@ -97,12 +98,12 @@ T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
...
@@ -97,12 +98,12 @@ T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
}
}
template
<
typename
T
>
template
<
typename
T
>
T
QuantizeData
(
float
originData
,
const
AnfQuantParam
*
quantParam
,
int
quant_max
,
int
quant_min
)
{
T
QuantizeData
(
float
originData
,
const
schema
::
QuantParamT
&
quantParam
,
int
quant_max
,
int
quant_min
)
{
MS_ASSERT
(
quantParam
!=
nullptr
);
MS_ASSERT
(
quantParam
!=
nullptr
);
MS_ASSERT
(
quantParam
->
inited
);
MS_ASSERT
(
quantParam
->
inited
);
const
auto
scale
=
quantParam
->
scale
;
const
auto
scale
=
quantParam
.
scale
;
const
int
zeroPoint
=
quantParam
->
zeroPoint
;
const
int
zeroPoint
=
quantParam
.
zeroPoint
;
const
auto
narrowRange
=
quantParam
->
narrowRange
;
const
auto
narrowRange
=
quantParam
.
narrowRange
;
const
int
maxLimit
=
quant_max
;
const
int
maxLimit
=
quant_max
;
const
int
minLimit
=
quant_min
;
const
int
minLimit
=
quant_min
;
...
@@ -119,8 +120,9 @@ T QuantizeData(float originData, const AnfQuantParam *quantParam, int quant_max,
...
@@ -119,8 +120,9 @@ T QuantizeData(float originData, const AnfQuantParam *quantParam, int quant_max,
void
CalFakeNode
(
const
AnfNodePtr
&
inTensor
);
void
CalFakeNode
(
const
AnfNodePtr
&
inTensor
);
STATUS
QuantFilter
(
ParamValueLitePtr
&
weightPtr
,
QuantType
quantType
,
int
quant_max
,
int
quant_min
,
STATUS
QuantFilter
(
ParamValueLitePtr
weight
,
std
::
shared_ptr
<
PrimitiveTValue
>
primitiveT_value
,
QuantType
quantType
,
size_t
bitNum
=
UINT8_QUANTIZATION
,
bool
per_channel
=
false
);
int
quant_max
,
int
quant_min
,
size_t
bitNum
=
UINT8_QUANTIZATION
,
bool
per_channel
=
false
,
bool
depth_wise
=
false
);
STATUS
PostBitPack
(
float
*
weights
,
size_t
shapeSize
,
size_t
bitNum
=
UINT8_QUANTIZATION
);
STATUS
PostBitPack
(
float
*
weights
,
size_t
shapeSize
,
size_t
bitNum
=
UINT8_QUANTIZATION
);
}
// namespace quant
}
// namespace quant
...
...
mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
已删除
100644 → 0
浏览文件 @
82e8884e
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "tools/converter/quantizer/weight_quantizer.h"
#include <list>
#include <string>
#include "src/common/common.h"
#include "ir/dtype/type_id.h"
using
std
::
string
;
using
std
::
vector
;
namespace
mindspore
{
namespace
lite
{
namespace
quant
{
WeightQuantizer
::
WeightQuantizer
(
FuncGraphPtr
graph
,
const
string
&
weightSize
,
const
std
::
string
&
convWeightChannelThreshold
,
const
std
::
string
&
bitNum
)
:
Quantizer
(
graph
)
{
auto
quantSize
=
static_cast
<
size_t
>
(
std
::
stoull
(
weightSize
));
this
->
bitNum
=
static_cast
<
size_t
>
(
std
::
stoull
(
bitNum
));
auto
convQuantWeightChannelThreshold
=
static_cast
<
size_t
>
(
std
::
stoull
(
convWeightChannelThreshold
));
// TODO(...): update stractory
mStrategy
.
reset
(
new
QuantStrategy
(
quantSize
,
convQuantWeightChannelThreshold
));
}
// uint32_t GetConvChannel(TensorDefT *weight) {
// uint32_t channel = 0;
// const vector<int> dims = weight->dims;
// switch (weight->format) {
// case Format_NCHW:
// case Format_KCHW:
// case Format_NC4HW4:
// channel = static_cast<uint32_t>(dims[NCHW_N]);
// break;
// case Format_NHWC:
// case Format_HWKC:
// channel = static_cast<uint32_t>(dims[NHWC_N]);
// break;
// case Format_HWCK:
// channel = static_cast<uint32_t>(dims[HWCK_K]);
// break;
// case Format_CKHW:
// channel = static_cast<uint32_t>(dims[CKHW_K]);
// break;
// default:
// MS_LOGE("Unsupported format: %d", weight->format);
// return 0;
// }
// return channel;
// }
STATUS
WeightQuantizer
::
DoConvQuantize
(
const
std
::
list
<
CNodePtr
>
&
nodes
)
{
for
(
auto
&
cnode
:
nodes
)
{
if
(
!
mStrategy
->
CanConvOpQuantized
(
cnode
))
{
continue
;
}
auto
inputNode
=
cnode
->
input
(
2
);
if
(
!
inputNode
->
isa
<
Parameter
>
())
{
return
RET_ERROR
;
}
auto
paramNode
=
inputNode
->
cast
<
ParameterPtr
>
();
if
(
!
paramNode
->
has_default
())
{
return
RET_ERROR
;
}
ParamValueLitePtr
paramValue
=
std
::
static_pointer_cast
<
ParamValueLite
>
(
paramNode
->
default_param
());
auto
status
=
QuantFilter
(
paramValue
,
QuantType_WeightQuant
,
127
,
-
128
,
bitNum
);
if
(
status
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"QuantFilter failed : "
<<
status
;
return
status
;
}
}
return
RET_OK
;
}
STATUS
WeightQuantizer
::
DoMulQuantize
(
const
std
::
list
<
CNodePtr
>
&
nodes
)
{
for
(
auto
&
node
:
nodes
)
{
if
(
!
mStrategy
->
CanMulOpQuantized
(
node
))
{
continue
;
}
ParamValueLitePtr
paramValue
=
nullptr
;
for
(
size_t
i
=
1
;
i
<
node
->
size
();
i
++
)
{
auto
inputNode
=
node
->
input
(
i
);
if
(
inputNode
->
isa
<
Parameter
>
()
==
true
)
{
auto
paramNode
=
inputNode
->
cast
<
ParameterPtr
>
();
if
((
paramNode
!=
nullptr
)
&&
(
paramNode
->
has_default
()
==
true
))
{
paramValue
=
std
::
static_pointer_cast
<
ParamValueLite
>
(
paramNode
->
default_param
());
if
((
paramValue
==
nullptr
)
||
(
paramValue
->
tensor_size
()
==
0
)
||
(
paramValue
->
tensor_shape
().
size
()
!=
4
)
||
(
paramValue
->
tensor_addr
()
==
nullptr
)
||
(
paramValue
->
tensor_type
()
!=
mindspore
::
kNumberTypeFloat32
))
{
paramValue
=
nullptr
;
continue
;
}
else
{
break
;
}
}
}
}
if
(
paramValue
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"No valid input param node !"
;
continue
;
}
auto
status
=
QuantFilter
(
paramValue
,
QuantType_WeightQuant
,
127
,
-
128
,
bitNum
);
if
(
status
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"QunatFilter failed"
<<
status
;
return
RET_ERROR
;
}
}
return
RET_OK
;
}
STATUS
WeightQuantizer
::
DoQuantize
(
FuncGraphPtr
funcGraph
)
{
auto
ret
=
RET_OK
;
auto
cnodes
=
funcGraph
->
GetOrderedCnodes
();
ret
=
DoConvQuantize
(
cnodes
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"DoConvQuantize failed :"
<<
ret
;
return
ret
;
}
ret
=
DoMulQuantize
(
cnodes
);
if
(
ret
!=
RET_OK
)
{
MS_LOG
(
ERROR
)
<<
"DoMulQuantize failed :"
<<
ret
;
return
ret
;
}
return
ret
;
}
}
// namespace quant
}
// namespace lite
}
// namespace mindspore
mindspore/lite/tools/converter/quantizer/weight_quantizer.h
已删除
100644 → 0
浏览文件 @
82e8884e
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef WEIGHT_QUANTIZER_H
#define WEIGHT_QUANTIZER_H
#include <memory>
#include <list>
#include <string>
#include "tools/converter/quantizer/quantizer.h"
#include "tools/converter/quantizer/quantize_util.h"
#include "ir/func_graph.h"
#include "ir/anf.h"
#include "include/model.h"
#include "base/base.h"
#include "abstract/dshape.h"
namespace
mindspore
{
namespace
lite
{
namespace
quant
{
class
WeightQuantizer
:
public
Quantizer
{
public:
WeightQuantizer
(
FuncGraphPtr
graph
,
const
std
::
string
&
weightSize
,
const
std
::
string
&
covWeightChannelThreshold
,
const
std
::
string
&
bitNum
);
~
WeightQuantizer
()
=
default
;
STATUS
DoQuantize
(
FuncGraphPtr
funcGraph
)
override
;
STATUS
DoConvQuantize
(
const
std
::
list
<
CNodePtr
>
&
nodes
);
STATUS
DoMulQuantize
(
const
std
::
list
<
CNodePtr
>
&
nodes
);
private:
std
::
unique_ptr
<
QuantStrategy
>
mStrategy
;
size_t
bitNum
;
};
}
// namespace quant
}
// namespace lite
}
// namespace mindspore
#endif
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录