Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
698c7e76
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
698c7e76
编写于
7月 03, 2020
作者:
H
hong19860320
提交者:
GitHub
7月 03, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[APU] Adapting to the changing of the quantization parameters (#3863)
上级
cb138726
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
105 addition
and
202 deletion
+105
-202
lite/core/mir/quantized_op_attributes_inference_pass.cc
lite/core/mir/quantized_op_attributes_inference_pass.cc
+14
-15
lite/core/mir/subgraph/subgraph_detector.cc
lite/core/mir/subgraph/subgraph_detector.cc
+23
-33
lite/core/mir/type_precision_cast_pass.cc
lite/core/mir/type_precision_cast_pass.cc
+12
-47
lite/kernels/apu/bridges/conv_op.cc
lite/kernels/apu/bridges/conv_op.cc
+21
-38
lite/kernels/apu/bridges/fc_op.cc
lite/kernels/apu/bridges/fc_op.cc
+10
-20
lite/kernels/apu/bridges/pool_op.cc
lite/kernels/apu/bridges/pool_op.cc
+7
-18
lite/kernels/apu/bridges/softmax_op.cc
lite/kernels/apu/bridges/softmax_op.cc
+9
-20
lite/kernels/apu/subgraph_compute.cc
lite/kernels/apu/subgraph_compute.cc
+9
-11
未找到文件。
lite/core/mir/quantized_op_attributes_inference_pass.cc
浏览文件 @
698c7e76
...
...
@@ -38,12 +38,12 @@ void QuantizedOpAttributesInferencePass::Apply(
auto
op_info
=
inst
.
op_info
();
auto
op_type
=
op_info
->
Type
();
// Check
only if all
of the inputs of the op have scale value
bool
has_input_scale
=
tru
e
;
// Check
if any
of the inputs of the op have scale value
bool
has_input_scale
=
fals
e
;
for
(
auto
in_var_node
:
op_node
->
inlinks
)
{
CHECK
(
in_var_node
->
IsArg
());
auto
in_var_node_name
=
in_var_node
->
arg
()
->
name
;
has_input_scale
&
=
op_info
->
HasInputScale
(
in_var_node_name
);
has_input_scale
|
=
op_info
->
HasInputScale
(
in_var_node_name
);
}
if
(
!
has_input_scale
)
continue
;
...
...
@@ -52,32 +52,31 @@ void QuantizedOpAttributesInferencePass::Apply(
bool
is_quantized
=
true
;
for
(
auto
out_var_node
:
op_node
->
outlinks
)
{
CHECK
(
out_var_node
->
IsArg
());
bool
found
=
fals
e
;
float
output_scal
e
;
std
::
vector
<
float
>
output_scal
e
;
bool
has_output_scale
=
fals
e
;
auto
out_var_node_name
=
out_var_node
->
arg
()
->
name
;
for
(
auto
out_op_node
:
out_var_node
->
outlinks
)
{
CHECK
(
out_op_node
->
IsStmt
());
auto
&
out_inst
=
out_op_node
->
AsStmt
();
auto
out_op_info
=
out_inst
.
op_info
();
if
(
!
out_op_info
->
HasInputScale
(
out_var_node_name
))
continue
;
auto
input_scale
=
out_op_info
->
GetInputScale
(
out_var_node_name
)[
0
];
if
(
!
found
)
{
found
=
true
;
auto
input_scale
=
out_op_info
->
GetInputScale
(
out_var_node_name
);
if
(
!
has_output_scale
)
{
output_scale
=
input_scale
;
has_output_scale
=
true
;
}
else
{
CHECK_EQ
(
output_scale
,
input_scale
);
CHECK_EQ
(
output_scale
.
size
(),
input_scale
.
size
()
);
}
}
if
(
found
)
{
inst
.
mutable_op_info
()
->
SetOutputScale
(
out_var_node_name
,
{
output_scale
});
if
(
has_output_scale
)
{
inst
.
mutable_op_info
()
->
SetOutputScale
(
out_var_node_name
,
output_scale
);
}
else
if
(
op_info
->
HasAttr
(
"out_threshold"
))
{
// Only consider one output, there are only one out_threshold
int
bit_length
=
op_info
->
GetAttr
<
int
>
(
"bit_length"
);
int
range
=
(
1
<<
(
bit_length
-
1
))
-
1
;
output_scale
=
op_info
->
GetAttr
<
float
>
(
"out_threshold"
);
inst
.
mutable_op_info
()
->
SetOutputScale
(
out_var_node_name
,
{
output_scale
/
range
}
);
output_scale
=
std
::
vector
<
float
>
{
op_info
->
GetAttr
<
float
>
(
"out_threshold"
)
/
range
};
inst
.
mutable_op_info
()
->
SetOutputScale
(
out_var_node_name
,
output_scale
);
}
else
{
is_quantized
=
false
;
}
...
...
lite/core/mir/subgraph/subgraph_detector.cc
浏览文件 @
698c7e76
...
...
@@ -452,39 +452,6 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph,
subgraph_op_desc
.
SetAttr
<
std
::
vector
<
std
::
string
>>
(
"output_data_names"
,
output_var_names
);
// Set input/output scale values of input/output var nodes for
// type_precision_cast_pass.
std
::
vector
<
float
>
input_data_scales
;
std
::
vector
<
float
>
output_data_scales
;
for
(
auto
&
var_node
:
input_var_nodes
)
{
auto
var_node_name
=
var_node
->
arg
()
->
name
;
auto
any_op_node
=
var_node
->
outlinks
.
front
();
CHECK
(
any_op_node
->
IsStmt
());
auto
&
any_inst
=
any_op_node
->
AsStmt
();
if
(
any_inst
.
op_info
()
->
HasInputScale
(
var_node_name
))
{
input_data_scales
.
push_back
(
any_inst
.
op_info
()
->
GetInputScale
(
var_node_name
)[
0
]);
}
}
for
(
auto
&
var_node
:
output_var_nodes
)
{
auto
var_node_name
=
var_node
->
arg
()
->
name
;
auto
any_op_node
=
var_node
->
inlinks
.
front
();
CHECK
(
any_op_node
->
IsStmt
());
auto
&
any_inst
=
any_op_node
->
AsStmt
();
if
(
any_inst
.
op_info
()
->
HasOutputScale
(
var_node_name
))
{
output_data_scales
.
push_back
(
any_inst
.
op_info
()
->
GetOutputScale
(
var_node_name
)[
0
]);
}
}
if
(
input_data_scales
.
size
()
>
0
)
{
subgraph_op_desc
.
SetAttr
<
std
::
vector
<
float
>>
(
"input_data_scales"
,
input_data_scales
);
}
if
(
output_data_scales
.
size
()
>
0
)
{
subgraph_op_desc
.
SetAttr
<
std
::
vector
<
float
>>
(
"output_data_scales"
,
output_data_scales
);
}
// Set all of the inputs and outputs to the target subgraph op
// To prevent vars are removed in RuntimeProgram::UpdateVarsOfProgram()
for
(
auto
&
var_node
:
weight_var_nodes
)
{
...
...
@@ -504,6 +471,29 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph,
auto
any_op
=
(
*
subgraph_nodes
.
begin
())
->
AsStmt
().
op
();
subgraph_op
->
Attach
(
subgraph_op_desc
,
any_op
->
scope
());
// Export the scale values of the input/output var nodes of the inner op nodes
// only for type_precision_cast_pass.
for
(
auto
&
var_node
:
input_var_nodes
)
{
auto
var_node_name
=
var_node
->
arg
()
->
name
;
auto
any_op_node
=
var_node
->
outlinks
.
front
();
CHECK
(
any_op_node
->
IsStmt
());
auto
&
any_inst
=
any_op_node
->
AsStmt
();
if
(
any_inst
.
op_info
()
->
HasInputScale
(
var_node_name
))
{
subgraph_op
->
mutable_op_info
()
->
SetInputScale
(
var_node_name
,
any_inst
.
op_info
()
->
GetInputScale
(
var_node_name
));
}
}
for
(
auto
&
var_node
:
output_var_nodes
)
{
auto
var_node_name
=
var_node
->
arg
()
->
name
;
auto
any_op_node
=
var_node
->
inlinks
.
front
();
CHECK
(
any_op_node
->
IsStmt
());
auto
&
any_inst
=
any_op_node
->
AsStmt
();
if
(
any_inst
.
op_info
()
->
HasOutputScale
(
var_node_name
))
{
subgraph_op
->
mutable_op_info
()
->
SetOutputScale
(
var_node_name
,
any_inst
.
op_info
()
->
GetOutputScale
(
var_node_name
));
}
}
// Create and add a new subgraph node into the graph
auto
subgraph_op_node
=
graph
->
GraphCreateInstructNode
(
subgraph_op
,
any_op
->
valid_places
());
...
...
lite/core/mir/type_precision_cast_pass.cc
浏览文件 @
698c7e76
...
...
@@ -66,65 +66,30 @@ void UpdateInputs(OpLite* op, const std::string& from, const std::string& to) {
}
}
// Infer the scale value for the new calib op from the subgraph op
static
bool
InferScaleFromSubgraph
(
std
::
string
var_name
,
const
OpInfo
*
op_info
,
float
*
scale
,
bool
reverse
=
false
)
{
std
::
string
attr_name
=
reverse
?
"output_data_names"
:
"input_data_names"
;
if
(
!
op_info
->
HasAttr
(
attr_name
))
return
false
;
auto
input_or_output_names
=
op_info
->
GetAttr
<
std
::
vector
<
std
::
string
>>
(
attr_name
);
attr_name
=
reverse
?
"output_data_scales"
:
"input_data_scales"
;
if
(
!
op_info
->
HasAttr
(
attr_name
))
return
false
;
auto
input_or_output_scales
=
op_info
->
GetAttr
<
std
::
vector
<
float
>>
(
attr_name
);
auto
size
=
input_or_output_names
.
size
();
CHECK
(
size
==
input_or_output_scales
.
size
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
if
(
input_or_output_names
[
i
]
==
var_name
)
{
*
scale
=
input_or_output_scales
[
i
];
return
true
;
}
}
return
false
;
}
// Infer the scale value for the new calib op from the input_scale of the
// current op and output_scale of the previous op.
// case 1: prev_op->var_node->op_node(int8->any op, with input_scale).
// case 2: prev_op->var_node->op_node(subgraph op, int8->any, with
// input_data_scales).
// case 3: prev_op(any->int8, with output_scale)->var_node->op_node(fp32->any,
// case 2: prev_op(any->int8, with output_scale)->var_node->op_node(fp32->any,
// without input_scale).
// case 4: prev_op(any->int8, subgraph_op, with
// output_data_scales)->var_node->op_node(fp32->any, without input_scale).
static
bool
InferScale
(
Node
*
var_node
,
Node
*
op_node
,
float
*
scale
)
{
bool
found
=
false
;
auto
&
inst
=
op_node
->
AsStmt
();
auto
op_info
=
inst
.
op_info
();
auto
op_type
=
op_info
->
Type
();
auto
var_name
=
var_node
->
AsArg
().
name
;
if
(
op_type
==
"subgraph"
)
{
found
=
InferScaleFromSubgraph
(
var_name
,
op_info
,
scale
,
false
);
if
(
op_info
->
HasInputScale
(
var_name
))
{
*
scale
=
op_info
->
GetInputScale
(
var_name
)[
0
];
found
=
true
;
}
else
{
if
(
op_info
->
HasInputScale
(
var_name
))
{
*
scale
=
op_info
->
GetInputScale
(
var_name
)[
0
];
// Obtain the output_scale from one of its previous Ops
auto
prev_op_node
=
var_node
->
inlinks
.
front
();
CHECK
(
prev_op_node
->
IsStmt
());
auto
&
prev_inst
=
prev_op_node
->
AsStmt
();
auto
prev_op_info
=
prev_inst
.
op_info
();
auto
prev_op_type
=
prev_op_info
->
Type
();
if
(
prev_op_info
->
HasOutputScale
(
var_name
))
{
*
scale
=
prev_op_info
->
GetOutputScale
(
var_name
)[
0
];
found
=
true
;
}
else
{
// Obtain the output_scale from one of its previous Ops
auto
prev_op_node
=
var_node
->
inlinks
.
front
();
CHECK
(
prev_op_node
->
IsStmt
());
auto
&
prev_inst
=
prev_op_node
->
AsStmt
();
auto
prev_op_info
=
prev_inst
.
op_info
();
auto
prev_op_type
=
prev_op_info
->
Type
();
if
(
prev_op_type
==
"subgraph"
)
{
found
=
InferScaleFromSubgraph
(
var_name
,
prev_op_info
,
scale
,
true
);
}
else
{
if
(
prev_op_info
->
HasOutputScale
(
var_name
))
{
*
scale
=
prev_op_info
->
GetOutputScale
(
var_name
)[
0
];
found
=
true
;
}
}
}
}
return
found
;
...
...
lite/kernels/apu/bridges/conv_op.cc
浏览文件 @
698c7e76
...
...
@@ -35,6 +35,9 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
int
neuron_errCode
;
VLOG
(
3
)
<<
"[APU] Converting ["
<<
op_type
<<
"]"
;
CHECK
(
op_info
->
HasAttr
(
"enable_int8"
)
&&
op_info
->
GetAttr
<
bool
>
(
"enable_int8"
));
// Get input and output vars and op attributes
auto
input_name
=
op_info
->
Input
(
"Input"
).
front
();
auto
input
=
scope
->
FindMutableTensor
(
input_name
);
...
...
@@ -94,34 +97,18 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
input_dims
,
filter_dims
);
float
input_scale
;
float
output_scale
;
std
::
vector
<
float
>
weight_scale
;
if
(
op_info
->
HasAttr
(
"enable_int8"
))
{
if
(
op_info
->
GetAttr
<
bool
>
(
"enable_int8"
))
{
auto
input_name
=
op_info
->
Input
(
"Input"
).
front
();
auto
filter_name
=
op_info
->
Input
(
"Filter"
).
front
();
auto
output_name
=
op_info
->
Output
(
"Output"
).
front
();
if
(
op_info
->
HasInputScale
(
input_name
))
input_scale
=
op_info
->
GetInputScale
(
input_name
)[
0
];
if
(
op_info
->
HasInputScale
(
filter_name
))
weight_scale
=
op_info
->
GetInputScale
(
filter_name
);
if
(
op_info
->
HasOutputScale
(
output_name
))
{
output_scale
=
op_info
->
GetOutputScale
(
output_name
)[
0
];
}
VLOG
(
3
)
<<
"has output scale:"
<<
output_scale
;
}
else
{
return
FAILED
;
}
}
else
{
return
FAILED
;
}
CHECK
(
op_info
->
HasInputScale
(
input_name
));
auto
input_scale
=
op_info
->
GetInputScale
(
input_name
)[
0
];
CHECK
(
op_info
->
HasInputScale
(
filter_name
));
auto
filter_scale
=
op_info
->
GetInputScale
(
filter_name
);
CHECK
(
op_info
->
HasOutputScale
(
output_name
));
auto
output_scale
=
op_info
->
GetOutputScale
(
output_name
)[
0
];
VLOG
(
3
)
<<
"strides.size(): "
<<
strides
.
size
()
<<
" ,groups: "
<<
groups
<<
" ,dilations: "
<<
dilations
[
0
]
<<
":"
<<
dilations
[
1
];
VLOG
(
3
)
<<
"with_act: "
<<
with_act
<<
" ,act_type:"
<<
act_type
;
VLOG
(
3
)
<<
"input_dims: "
<<
input_dims
<<
" ,output_dims: "
<<
output_dims
<<
" ,
weight_scale size: "
<<
weight
_scale
.
size
();
<<
" ,
filter_scale size: "
<<
filter
_scale
.
size
();
VLOG
(
3
)
<<
"filter_dims: "
<<
filter_dims
<<
" ,memory_size: "
<<
filter
->
memory_size
()
<<
" ,data_size: "
<<
filter
->
data_size
();
...
...
@@ -220,10 +207,10 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
NeuronOperandType
filterType
;
NeuronOperandType
channelFilterType
;
NeuronSymmPerChannelQuantParams
symmPerChannelQuantParams
;
if
(
1
==
weight
_scale
.
size
())
{
if
(
1
==
filter
_scale
.
size
())
{
// Per layer type
filterType
.
type
=
NEURON_TENSOR_QUANT8_ASYMM
;
filterType
.
scale
=
weight
_scale
[
0
];
filterType
.
scale
=
filter
_scale
[
0
];
filterType
.
zeroPoint
=
128
;
filterType
.
dimensionCount
=
filter_dims
.
size
();
filterType
.
dimensions
=
&
dims_filter
[
0
];
...
...
@@ -241,17 +228,17 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
symmPerChannelQuantParams
.
channelDim
=
3
;
else
symmPerChannelQuantParams
.
channelDim
=
0
;
symmPerChannelQuantParams
.
scaleCount
=
weight
_scale
.
size
();
symmPerChannelQuantParams
.
scales
=
weight
_scale
.
data
();
symmPerChannelQuantParams
.
scaleCount
=
filter
_scale
.
size
();
symmPerChannelQuantParams
.
scales
=
filter
_scale
.
data
();
biasType
.
scale
=
0
;
}
std
::
shared_ptr
<
Node
>
filter_node
=
nullptr
;
if
(
1
==
weight
_scale
.
size
())
{
if
(
1
==
filter
_scale
.
size
())
{
NeuronModel_addOperand
(
model
,
&
filterType
);
// 1: filter
filter_node
=
graph
->
Add
(
filter_name
,
dims_filter
);
VLOG
(
3
)
<<
"filter node idx: "
<<
filter_node
->
index
()
<<
"
w
_scale[0]"
<<
weight
_scale
[
0
]
<<
": filterType: "
<<
filterType
.
dimensions
[
0
]
VLOG
(
3
)
<<
"filter node idx: "
<<
filter_node
->
index
()
<<
"
filter
_scale[0]"
<<
filter
_scale
[
0
]
<<
": filterType: "
<<
filterType
.
dimensions
[
0
]
<<
":"
<<
filterType
.
dimensions
[
1
]
<<
":"
<<
filterType
.
dimensions
[
2
]
<<
":"
<<
filterType
.
dimensions
[
3
];
memcpy
(
filter
->
mutable_data
<
int8_t
>
(),
...
...
@@ -267,8 +254,8 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
NeuronModel_addOperand
(
model
,
&
channelFilterType
);
// 1: filter
filter_node
=
graph
->
Add
(
filter_name
,
dims_filter
);
VLOG
(
3
)
<<
"chennel filter node idx: "
<<
filter_node
->
index
()
<<
" ,scale_count:"
<<
weight
_scale
.
size
()
<<
"
weight_scale[0]:"
<<
weight
_scale
.
data
()[
0
]
<<
" ,scale_count:"
<<
filter
_scale
.
size
()
<<
"
filter_scale[0]:"
<<
filter
_scale
.
data
()[
0
]
<<
" ,channelFilterType: "
<<
channelFilterType
.
dimensions
[
0
]
<<
":"
<<
channelFilterType
.
dimensions
[
1
]
<<
":"
<<
channelFilterType
.
dimensions
[
2
]
<<
":"
...
...
@@ -302,7 +289,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
std
::
shared_ptr
<
Node
>
bias_node
=
nullptr
;
if
(
HasInputArg
(
op_info
,
scope
,
"Bias"
))
{
auto
bias_name
=
op_info
->
Input
(
"Bias"
).
front
();
auto
bias_type
=
kernel
->
GetInputDeclType
(
"Bias"
);
auto
bias
=
scope
->
FindMutableTensor
(
bias_name
);
auto
bias_dims
=
bias
->
dims
();
...
...
@@ -368,10 +354,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Add output tensor type
NeuronOperandType
outType
;
outType
.
type
=
NEURON_TENSOR_QUANT8_ASYMM
;
if
(
graph
->
IsOutput
(
output_name
))
outType
.
scale
=
output_scale
/
127
;
else
outType
.
scale
=
output_scale
;
outType
.
scale
=
output_scale
;
outType
.
zeroPoint
=
128
;
outType
.
dimensionCount
=
output_dims
.
size
();
std
::
vector
<
uint32_t
>
dims_out
=
{(
uint32_t
)
output_dims
[
0
],
...
...
@@ -405,7 +388,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
int32_t
*
int32_bias_data
=
reinterpret_cast
<
int32_t
*>
(
bias
->
mutable_data
<
float
>
());
float2int32
(
bias
->
data
<
float
>
(),
input_scale
,
weight
_scale
,
int32_bias_data
);
bias
->
data
<
float
>
(),
input_scale
,
filter
_scale
,
int32_bias_data
);
VLOG
(
3
)
<<
"int32_bias_data: "
<<
int32_bias_data
[
0
]
<<
" : "
<<
int32_bias_data
[
1
]
<<
" : "
<<
int32_bias_data
[
2
]
<<
" : "
...
...
lite/kernels/apu/bridges/fc_op.cc
浏览文件 @
698c7e76
...
...
@@ -31,6 +31,10 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto
scope
=
op
->
scope
();
VLOG
(
3
)
<<
"[APU] Converting ["
+
op_type
+
"]"
;
CHECK
(
op_info
->
HasAttr
(
"enable_int8"
)
&&
op_info
->
GetAttr
<
bool
>
(
"enable_int8"
));
// Get input and output vars and op attributes
auto
input_name
=
op_info
->
Input
(
"Input"
).
front
();
auto
input
=
scope
->
FindMutableTensor
(
input_name
);
auto
input_dims
=
input
->
dims
();
...
...
@@ -52,26 +56,12 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
<<
" out_dims: "
<<
out_dims
<<
" m: "
<<
m
<<
" k: "
<<
k
<<
" n: "
<<
n
;
float
input_scale
=
1.0
f
;
float
out_scale
=
1.0
f
;
std
::
vector
<
float
>
w_scale
;
if
(
op_info
->
HasAttr
(
"enable_int8"
))
{
if
(
op_info
->
GetAttr
<
bool
>
(
"enable_int8"
))
{
auto
input_name
=
op_info
->
Input
(
"Input"
).
front
();
auto
weight_name
=
op_info
->
Input
(
"W"
).
front
();
auto
out_name
=
op_info
->
Output
(
"Out"
).
front
();
if
(
op_info
->
HasInputScale
(
input_name
))
input_scale
=
op_info
->
GetInputScale
(
input_name
)[
0
];
if
(
op_info
->
HasInputScale
(
weight_name
))
w_scale
=
op_info
->
GetInputScale
(
weight_name
);
if
(
op_info
->
HasOutputScale
(
out_name
))
out_scale
=
op_info
->
GetOutputScale
(
out_name
)[
0
];
}
else
{
return
FAILED
;
}
}
else
{
return
FAILED
;
}
CHECK
(
op_info
->
HasInputScale
(
input_name
));
auto
input_scale
=
op_info
->
GetInputScale
(
input_name
)[
0
];
CHECK
(
op_info
->
HasInputScale
(
w_name
));
auto
w_scale
=
op_info
->
GetInputScale
(
w_name
);
CHECK
(
op_info
->
HasOutputScale
(
out_name
));
auto
out_scale
=
op_info
->
GetOutputScale
(
out_name
)[
0
];
// Add input tensor type
NeuronOperandType
inType
;
...
...
lite/kernels/apu/bridges/pool_op.cc
浏览文件 @
698c7e76
...
...
@@ -32,6 +32,9 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto
scope
=
op
->
scope
();
VLOG
(
3
)
<<
"[APU] Converting ["
+
op_type
+
"] "
;
CHECK
(
op_info
->
HasAttr
(
"enable_int8"
)
&&
op_info
->
GetAttr
<
bool
>
(
"enable_int8"
));
// Get input and output vars and op attributes
auto
x_name
=
op_info
->
Input
(
"X"
).
front
();
auto
x
=
scope
->
FindMutableTensor
(
x_name
);
...
...
@@ -87,24 +90,10 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
ksize
);
// Add x tensor type
float
x_scale
=
1.0
f
;
float
out_scale
=
1.0
f
;
if
(
op_info
->
HasAttr
(
"enable_int8"
))
{
if
(
op_info
->
GetAttr
<
bool
>
(
"enable_int8"
))
{
auto
x_name
=
op_info
->
Input
(
"X"
).
front
();
auto
out_name
=
op_info
->
Output
(
"Out"
).
front
();
if
(
op_info
->
HasInputScale
(
x_name
))
x_scale
=
op_info
->
GetInputScale
(
x_name
)[
0
];
if
(
op_info
->
HasOutputScale
(
out_name
))
out_scale
=
op_info
->
GetOutputScale
(
out_name
)[
0
];
}
else
{
LOG
(
WARNING
)
<<
"Do not enable_int8"
;
return
FAILED
;
}
}
else
{
LOG
(
WARNING
)
<<
"Do not enable_int8"
;
return
FAILED
;
}
CHECK
(
op_info
->
HasInputScale
(
x_name
));
auto
x_scale
=
op_info
->
GetInputScale
(
x_name
)[
0
];
CHECK
(
op_info
->
HasOutputScale
(
out_name
));
auto
out_scale
=
op_info
->
GetOutputScale
(
out_name
)[
0
];
NeuronOperandType
xType
;
xType
.
type
=
NEURON_TENSOR_QUANT8_ASYMM
;
...
...
lite/kernels/apu/bridges/softmax_op.cc
浏览文件 @
698c7e76
...
...
@@ -31,6 +31,9 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto
scope
=
op
->
scope
();
VLOG
(
3
)
<<
"[APU] Converting ["
+
op_type
+
"]"
;
CHECK
(
op_info
->
HasAttr
(
"enable_int8"
)
&&
op_info
->
GetAttr
<
bool
>
(
"enable_int8"
));
// Get input and output vars and op attributes
auto
x_name
=
op_info
->
Input
(
"X"
).
front
();
auto
x
=
scope
->
FindMutableTensor
(
x_name
);
...
...
@@ -45,24 +48,10 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
axis
+=
x_rank
;
}
float
input_scale
=
1.0
f
;
float
out_scale
=
1.0
f
;
if
(
op_info
->
HasAttr
(
"enable_int8"
))
{
if
(
op_info
->
GetAttr
<
bool
>
(
"enable_int8"
))
{
auto
x_name
=
op_info
->
Input
(
"X"
).
front
();
auto
out_name
=
op_info
->
Output
(
"Out"
).
front
();
if
(
op_info
->
HasInputScale
(
x_name
))
input_scale
=
op_info
->
GetInputScale
(
x_name
)[
0
];
if
(
op_info
->
HasOutputScale
(
out_name
))
out_scale
=
op_info
->
GetOutputScale
(
out_name
)[
0
];
}
else
{
LOG
(
WARNING
)
<<
"Do not enable_int8"
;
return
FAILED
;
}
}
else
{
LOG
(
WARNING
)
<<
"Do not enable_int8"
;
return
FAILED
;
}
CHECK
(
op_info
->
HasInputScale
(
x_name
));
auto
input_scale
=
op_info
->
GetInputScale
(
x_name
)[
0
];
CHECK
(
op_info
->
HasOutputScale
(
out_name
));
auto
out_scale
=
op_info
->
GetOutputScale
(
out_name
)[
0
];
// Check output scale
NeuronOperandType
xType
;
...
...
@@ -106,14 +95,14 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Add out operand
NeuronOperandType
outType
;
outType
.
type
=
NEURON_TENSOR_QUANT8_ASYMM
;
outType
.
scale
=
out_scale
/
127
;
outType
.
scale
=
out_scale
;
outType
.
zeroPoint
=
128
;
outType
.
dimensionCount
=
x_dims
.
size
();
outType
.
dimensions
=
&
dims_x
[
0
];
NeuronModel_addOperand
(
model
,
&
outType
);
// 3: output
std
::
shared_ptr
<
Node
>
out_node
=
nullptr
;
out_node
=
graph
->
Add
(
out_name
,
dims_x
);
VLOG
(
3
)
<<
"out
put
_scale: "
<<
out_scale
;
VLOG
(
3
)
<<
"out_scale: "
<<
out_scale
;
float
beta_val
[]
=
{
1.0
f
};
NeuronModel_setOperandValue
(
...
...
lite/kernels/apu/subgraph_compute.cc
浏览文件 @
698c7e76
...
...
@@ -153,18 +153,15 @@ int SubgraphEngine::LaunchDeviceProgram() {
}
// Set input buffer
Tensor
input_temp
;
for
(
size_t
i
=
0
;
i
<
origin_itensors_
.
size
();
i
++
)
{
input_temp
.
Resize
({
origin_idims_
[
i
]});
uint8_t
*
input_data
=
input_temp
.
mutable_data
<
uint8_t
>
();
memcpy
(
input_data
,
origin_itensors_
[
i
]
->
raw_data
(),
origin_itensors_
[
i
]
->
memory_size
());
auto
origin_data
=
origin_itensors_
[
i
]
->
mutable_data
<
int8_t
>
();
auto
converted_data
=
reinterpret_cast
<
uint8_t
*>
(
origin_data
);
for
(
int
j
=
0
;
j
<
origin_itensors_
[
i
]
->
data_size
();
j
++
)
{
input_data
[
j
]
+=
(
uint8_t
)
128
;
converted_data
[
j
]
=
static_cast
<
uint8_t
>
(
static_cast
<
int16_t
>
(
origin_data
[
j
])
+
128
);
}
NeuronExecution_setInput
(
run
,
i
,
NULL
,
input
_data
,
origin_itensors_
[
i
]
->
memory_size
());
run
,
i
,
NULL
,
converted
_data
,
origin_itensors_
[
i
]
->
memory_size
());
}
// Set output buffer
...
...
@@ -184,10 +181,11 @@ int SubgraphEngine::LaunchDeviceProgram() {
}
for
(
size_t
i
=
0
;
i
<
origin_otensors_
.
size
();
i
++
)
{
int8_t
*
output
_data
=
origin_otensors_
[
i
]
->
mutable_data
<
int8_t
>
();
VLOG
(
3
)
<<
"output size:"
<<
origin_otensors_
[
i
]
->
memory_size
(
);
auto
converted
_data
=
origin_otensors_
[
i
]
->
mutable_data
<
int8_t
>
();
auto
origin_data
=
reinterpret_cast
<
uint8_t
*>
(
converted_data
);
for
(
int
j
=
0
;
j
<
origin_otensors_
[
i
]
->
data_size
();
j
++
)
{
output_data
[
j
]
-=
(
int8_t
)
128
;
converted_data
[
j
]
=
static_cast
<
int8_t
>
(
static_cast
<
int16_t
>
(
origin_data
[
j
])
-
128
);
}
}
NeuronExecution_free
(
run
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录