Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
9e361a4d
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9e361a4d
编写于
6月 03, 2020
作者:
Y
yiicy
提交者:
GitHub
6月 03, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[ARM] int8 direct_conv, dw_conv add relu6 and leaky relu fusion, test=develop (#3737)
int8 direct_conv, dw_conv add relu6 and leaky relu fusion
上级
cba42f0d
变更
13
展开全部
隐藏空白更改
内联
并排
Showing
13 changed file
with
636 addition
and
547 deletion
+636
-547
lite/backends/arm/math/conv3x3s1_depthwise_int8.cc
lite/backends/arm/math/conv3x3s1_depthwise_int8.cc
+8
-4
lite/backends/arm/math/conv3x3s1_direct_int8.cc
lite/backends/arm/math/conv3x3s1_direct_int8.cc
+25
-2
lite/backends/arm/math/conv3x3s2_depthwise_int8.cc
lite/backends/arm/math/conv3x3s2_depthwise_int8.cc
+8
-4
lite/backends/arm/math/conv3x3s2_direct_int8.cc
lite/backends/arm/math/conv3x3s2_direct_int8.cc
+50
-4
lite/backends/arm/math/conv5x5s1_depthwise_int8.cc
lite/backends/arm/math/conv5x5s1_depthwise_int8.cc
+8
-4
lite/backends/arm/math/conv5x5s2_depthwise_int8.cc
lite/backends/arm/math/conv5x5s2_depthwise_int8.cc
+8
-4
lite/backends/arm/math/conv_block_utils.h
lite/backends/arm/math/conv_block_utils.h
+361
-476
lite/backends/arm/math/conv_depthwise.h
lite/backends/arm/math/conv_depthwise.h
+8
-4
lite/backends/arm/math/conv_impl.cc
lite/backends/arm/math/conv_impl.cc
+108
-12
lite/backends/arm/math/gemm_prepacked_int8.cc
lite/backends/arm/math/gemm_prepacked_int8.cc
+12
-12
lite/kernels/arm/conv_depthwise.cc
lite/kernels/arm/conv_depthwise.cc
+6
-0
lite/kernels/arm/conv_direct.h
lite/kernels/arm/conv_direct.h
+19
-13
lite/tests/math/conv_int8_compute_test.cc
lite/tests/math/conv_int8_compute_test.cc
+15
-8
未找到文件。
lite/backends/arm/math/conv3x3s1_depthwise_int8.cc
浏览文件 @
9e361a4d
...
...
@@ -36,7 +36,8 @@ void conv_depthwise_3x3s1_int8(Dtype* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
@@ -434,7 +435,8 @@ void conv_depthwise_3x3s1_int8(Dtype* dout,
chout
,
hout
,
wout
,
flag_relu
,
flag_act
,
alpha
,
bias_local
,
flag_bias
,
ptr_write
,
...
...
@@ -450,7 +452,8 @@ template void conv_depthwise_3x3s1_int8<int8_t>(int8_t* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
@@ -467,7 +470,8 @@ template void conv_depthwise_3x3s1_int8<float>(float* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
lite/backends/arm/math/conv3x3s1_direct_int8.cc
浏览文件 @
9e361a4d
...
...
@@ -42,8 +42,30 @@ void conv_3x3s1_direct_int8(const int8_t* din,
Context
<
TARGET
(
kARM
)
>*
ctx
,
const
float
*
scale
)
{
auto
paddings
=
*
param
.
paddings
;
bool
flag_relu
=
param
.
fuse_relu
;
bool
flag_bias
=
param
.
bias
;
auto
act_param
=
param
.
activation_param
;
auto
act_type
=
act_param
.
active_type
;
int
flag_act
=
0
;
// relu: 1, relu6: 2, leakey: 3
float
alpha
[
4
]
=
{
0.
f
,
0.
f
,
0.
f
,
0.
f
};
if
(
act_param
.
has_active
)
{
if
(
act_type
==
lite_api
::
ActivationType
::
kRelu
)
{
flag_act
=
1
;
}
else
if
(
act_type
==
lite_api
::
ActivationType
::
kRelu6
)
{
flag_act
=
2
;
float
local_alpha
=
act_param
.
Relu_clipped_coef
;
alpha
[
0
]
=
local_alpha
;
alpha
[
1
]
=
local_alpha
;
alpha
[
2
]
=
local_alpha
;
alpha
[
3
]
=
local_alpha
;
}
else
if
(
act_type
==
lite_api
::
ActivationType
::
kLeakyRelu
)
{
flag_act
=
3
;
float
local_alpha
=
act_param
.
Leaky_relu_alpha
;
alpha
[
0
]
=
local_alpha
;
alpha
[
1
]
=
local_alpha
;
alpha
[
2
]
=
local_alpha
;
alpha
[
3
]
=
local_alpha
;
}
}
int
pad_h
=
paddings
[
0
];
int
pad_w
=
paddings
[
2
];
...
...
@@ -442,7 +464,8 @@ void conv_3x3s1_direct_int8(const int8_t* din,
chout
,
hout
,
wout
,
flag_relu
,
flag_act
,
alpha
,
bias_local
,
flag_bias
,
ptr_write
,
...
...
lite/backends/arm/math/conv3x3s2_depthwise_int8.cc
浏览文件 @
9e361a4d
...
...
@@ -36,7 +36,8 @@ void conv_depthwise_3x3s2_int8(Dtype* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
@@ -447,7 +448,8 @@ void conv_depthwise_3x3s2_int8(Dtype* dout,
chout
,
hout
,
wout
,
flag_relu
,
flag_act
,
alpha
,
bias_local
,
flag_bias
,
ptr_write
,
...
...
@@ -463,7 +465,8 @@ template void conv_depthwise_3x3s2_int8<int8_t>(int8_t* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
@@ -480,7 +483,8 @@ template void conv_depthwise_3x3s2_int8<float>(float* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
lite/backends/arm/math/conv3x3s2_direct_int8.cc
浏览文件 @
9e361a4d
...
...
@@ -47,8 +47,30 @@ void conv_3x3s2_direct_int8(const int8_t* din,
//! prepack input to tmp buffer
//! write output to tmp buffer
auto
paddings
=
*
param
.
paddings
;
bool
flag_relu
=
param
.
fuse_relu
;
bool
flag_bias
=
param
.
bias
;
auto
act_param
=
param
.
activation_param
;
auto
act_type
=
act_param
.
active_type
;
int
flag_act
=
0
;
// relu: 1, relu6: 2, leakey: 3
float
alpha
[
4
]
=
{
0.
f
,
0.
f
,
0.
f
,
0.
f
};
if
(
act_param
.
has_active
)
{
if
(
act_type
==
lite_api
::
ActivationType
::
kRelu
)
{
flag_act
=
1
;
}
else
if
(
act_type
==
lite_api
::
ActivationType
::
kRelu6
)
{
flag_act
=
2
;
float
local_alpha
=
act_param
.
Relu_clipped_coef
;
alpha
[
0
]
=
local_alpha
;
alpha
[
1
]
=
local_alpha
;
alpha
[
2
]
=
local_alpha
;
alpha
[
3
]
=
local_alpha
;
}
else
if
(
act_type
==
lite_api
::
ActivationType
::
kLeakyRelu
)
{
flag_act
=
3
;
float
local_alpha
=
act_param
.
Leaky_relu_alpha
;
alpha
[
0
]
=
local_alpha
;
alpha
[
1
]
=
local_alpha
;
alpha
[
2
]
=
local_alpha
;
alpha
[
3
]
=
local_alpha
;
}
}
int
pad_h
=
paddings
[
0
];
int
pad_w
=
paddings
[
2
];
...
...
@@ -442,7 +464,8 @@ void conv_3x3s2_direct_int8(const int8_t* din,
chout
,
hout
,
wout
,
flag_relu
,
flag_act
,
alpha
,
bias_local
,
flag_bias
,
ptr_write
,
...
...
@@ -474,8 +497,30 @@ void conv_3x3s2_direct_int8(const int8_t* din,
//! prepack input to tmp buffer
//! write output to tmp buffer
auto
paddings
=
*
param
.
paddings
;
bool
flag_relu
=
param
.
fuse_relu
;
bool
flag_bias
=
param
.
bias
;
auto
act_param
=
param
.
activation_param
;
auto
act_type
=
act_param
.
active_type
;
int
flag_act
=
0
;
// relu: 1, relu6: 2, leakey: 3
float
alpha
[
4
]
=
{
0.
f
,
0.
f
,
0.
f
,
0.
f
};
if
(
act_param
.
has_active
)
{
if
(
act_type
==
lite_api
::
ActivationType
::
kRelu
)
{
flag_act
=
1
;
}
else
if
(
act_type
==
lite_api
::
ActivationType
::
kRelu6
)
{
flag_act
=
2
;
float
local_alpha
=
act_param
.
Relu_clipped_coef
;
alpha
[
0
]
=
local_alpha
;
alpha
[
1
]
=
local_alpha
;
alpha
[
2
]
=
local_alpha
;
alpha
[
3
]
=
local_alpha
;
}
else
if
(
act_type
==
lite_api
::
ActivationType
::
kLeakyRelu
)
{
flag_act
=
3
;
float
local_alpha
=
act_param
.
Leaky_relu_alpha
;
alpha
[
0
]
=
local_alpha
;
alpha
[
1
]
=
local_alpha
;
alpha
[
2
]
=
local_alpha
;
alpha
[
3
]
=
local_alpha
;
}
}
int
pad_h
=
paddings
[
0
];
int
pad_w
=
paddings
[
2
];
const
int
threads
=
ctx
->
threads
();
...
...
@@ -698,7 +743,8 @@ void conv_3x3s2_direct_int8(const int8_t* din,
chout
,
hout
,
wout
,
flag_relu
,
flag_act
,
alpha
,
bias_local
,
flag_bias
,
ptr_write
,
...
...
lite/backends/arm/math/conv5x5s1_depthwise_int8.cc
浏览文件 @
9e361a4d
...
...
@@ -36,7 +36,8 @@ void conv_depthwise_5x5s1_int8(Dtype* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
@@ -726,7 +727,8 @@ void conv_depthwise_5x5s1_int8(Dtype* dout,
chout
,
hout
,
wout
,
flag_relu
,
flag_act
,
alpha
,
bias_local
,
flag_bias
,
ptr_write
,
...
...
@@ -742,7 +744,8 @@ template void conv_depthwise_5x5s1_int8<int8_t>(int8_t* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
@@ -759,7 +762,8 @@ template void conv_depthwise_5x5s1_int8<float>(float* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
lite/backends/arm/math/conv5x5s2_depthwise_int8.cc
浏览文件 @
9e361a4d
...
...
@@ -36,7 +36,8 @@ void conv_depthwise_5x5s2_int8(Dtype* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
@@ -746,7 +747,8 @@ void conv_depthwise_5x5s2_int8(Dtype* dout,
chout
,
hout
,
wout
,
flag_relu
,
flag_act
,
alpha
,
bias_local
,
flag_bias
,
ptr_write
,
...
...
@@ -762,7 +764,8 @@ template void conv_depthwise_5x5s2_int8<int8_t>(int8_t* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
@@ -779,7 +782,8 @@ template void conv_depthwise_5x5s2_int8<float>(float* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
lite/backends/arm/math/conv_block_utils.h
浏览文件 @
9e361a4d
此差异已折叠。
点击以展开。
lite/backends/arm/math/conv_depthwise.h
浏览文件 @
9e361a4d
...
...
@@ -94,7 +94,8 @@ void conv_depthwise_3x3s1_int8(Dtype* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
@@ -112,7 +113,8 @@ void conv_depthwise_3x3s2_int8(Dtype* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
@@ -178,7 +180,8 @@ void conv_depthwise_5x5s1_int8(Dtype* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
@@ -196,7 +199,8 @@ void conv_depthwise_5x5s2_int8(Dtype* dout,
const
float
*
scale
,
const
float
*
bias
,
bool
flag_bias
,
bool
flag_relu
,
int
flag_act
,
float
*
alpha
,
int
num
,
int
chin
,
int
hin
,
...
...
lite/backends/arm/math/conv_impl.cc
浏览文件 @
9e361a4d
...
...
@@ -790,8 +790,30 @@ void conv_depthwise_3x3_int8_fp32(const void* din,
int
pad_h
=
paddings
[
0
];
int
pad_w
=
paddings
[
2
];
int
stride
=
param
.
strides
[
1
];
bool
flag_relu
=
param
.
fuse_relu
;
bool
flag_bias
=
param
.
bias
!=
nullptr
;
auto
act_param
=
param
.
activation_param
;
auto
act_type
=
act_param
.
active_type
;
int
flag_act
=
0
;
// relu: 1, relu6: 2, leakey: 3
float
alpha
[
4
]
=
{
0.
f
,
0.
f
,
0.
f
,
0.
f
};
if
(
act_param
.
has_active
)
{
if
(
act_type
==
lite_api
::
ActivationType
::
kRelu
)
{
flag_act
=
1
;
}
else
if
(
act_type
==
lite_api
::
ActivationType
::
kRelu6
)
{
flag_act
=
2
;
float
local_alpha
=
act_param
.
Relu_clipped_coef
;
alpha
[
0
]
=
local_alpha
;
alpha
[
1
]
=
local_alpha
;
alpha
[
2
]
=
local_alpha
;
alpha
[
3
]
=
local_alpha
;
}
else
if
(
act_type
==
lite_api
::
ActivationType
::
kLeakyRelu
)
{
flag_act
=
3
;
float
local_alpha
=
act_param
.
Leaky_relu_alpha
;
alpha
[
0
]
=
local_alpha
;
alpha
[
1
]
=
local_alpha
;
alpha
[
2
]
=
local_alpha
;
alpha
[
3
]
=
local_alpha
;
}
}
if
(
stride
==
1
)
{
conv_depthwise_3x3s1_int8
(
reinterpret_cast
<
float
*>
(
dout
),
reinterpret_cast
<
const
int8_t
*>
(
din
),
...
...
@@ -799,7 +821,8 @@ void conv_depthwise_3x3_int8_fp32(const void* din,
scale
,
bias
,
flag_bias
,
flag_relu
,
flag_act
,
alpha
,
num
,
ch_in
,
h_in
,
...
...
@@ -816,7 +839,8 @@ void conv_depthwise_3x3_int8_fp32(const void* din,
scale
,
bias
,
flag_bias
,
flag_relu
,
flag_act
,
alpha
,
num
,
ch_in
,
h_in
,
...
...
@@ -849,8 +873,30 @@ void conv_depthwise_3x3_int8_int8(const void* din,
int
pad_h
=
paddings
[
0
];
int
pad_w
=
paddings
[
2
];
int
stride
=
param
.
strides
[
1
];
bool
flag_relu
=
param
.
fuse_relu
;
bool
flag_bias
=
param
.
bias
!=
nullptr
;
auto
act_param
=
param
.
activation_param
;
auto
act_type
=
act_param
.
active_type
;
int
flag_act
=
0
;
// relu: 1, relu6: 2, leakey: 3
float
alpha
[
4
]
=
{
0.
f
,
0.
f
,
0.
f
,
0.
f
};
if
(
act_param
.
has_active
)
{
if
(
act_type
==
lite_api
::
ActivationType
::
kRelu
)
{
flag_act
=
1
;
}
else
if
(
act_type
==
lite_api
::
ActivationType
::
kRelu6
)
{
flag_act
=
2
;
float
local_alpha
=
act_param
.
Relu_clipped_coef
;
alpha
[
0
]
=
local_alpha
;
alpha
[
1
]
=
local_alpha
;
alpha
[
2
]
=
local_alpha
;
alpha
[
3
]
=
local_alpha
;
}
else
if
(
act_type
==
lite_api
::
ActivationType
::
kLeakyRelu
)
{
flag_act
=
3
;
float
local_alpha
=
act_param
.
Leaky_relu_alpha
;
alpha
[
0
]
=
local_alpha
;
alpha
[
1
]
=
local_alpha
;
alpha
[
2
]
=
local_alpha
;
alpha
[
3
]
=
local_alpha
;
}
}
if
(
stride
==
1
)
{
conv_depthwise_3x3s1_int8
(
reinterpret_cast
<
int8_t
*>
(
dout
),
reinterpret_cast
<
const
int8_t
*>
(
din
),
...
...
@@ -858,7 +904,8 @@ void conv_depthwise_3x3_int8_int8(const void* din,
scale
,
bias
,
flag_bias
,
flag_relu
,
flag_act
,
alpha
,
num
,
ch_in
,
h_in
,
...
...
@@ -875,7 +922,8 @@ void conv_depthwise_3x3_int8_int8(const void* din,
scale
,
bias
,
flag_bias
,
flag_relu
,
flag_act
,
alpha
,
num
,
ch_in
,
h_in
,
...
...
@@ -908,8 +956,30 @@ void conv_depthwise_5x5_int8_fp32(const void* din,
int
pad_h
=
paddings
[
0
];
int
pad_w
=
paddings
[
2
];
int
stride
=
param
.
strides
[
1
];
bool
flag_relu
=
param
.
fuse_relu
;
bool
flag_bias
=
param
.
bias
!=
nullptr
;
auto
act_param
=
param
.
activation_param
;
auto
act_type
=
act_param
.
active_type
;
int
flag_act
=
0
;
// relu: 1, relu6: 2, leakey: 3
float
alpha
[
4
]
=
{
0.
f
,
0.
f
,
0.
f
,
0.
f
};
if
(
act_param
.
has_active
)
{
if
(
act_type
==
lite_api
::
ActivationType
::
kRelu
)
{
flag_act
=
1
;
}
else
if
(
act_type
==
lite_api
::
ActivationType
::
kRelu6
)
{
flag_act
=
2
;
float
local_alpha
=
act_param
.
Relu_clipped_coef
;
alpha
[
0
]
=
local_alpha
;
alpha
[
1
]
=
local_alpha
;
alpha
[
2
]
=
local_alpha
;
alpha
[
3
]
=
local_alpha
;
}
else
if
(
act_type
==
lite_api
::
ActivationType
::
kLeakyRelu
)
{
flag_act
=
3
;
float
local_alpha
=
act_param
.
Leaky_relu_alpha
;
alpha
[
0
]
=
local_alpha
;
alpha
[
1
]
=
local_alpha
;
alpha
[
2
]
=
local_alpha
;
alpha
[
3
]
=
local_alpha
;
}
}
if
(
stride
==
1
)
{
conv_depthwise_5x5s1_int8
(
reinterpret_cast
<
float
*>
(
dout
),
reinterpret_cast
<
const
int8_t
*>
(
din
),
...
...
@@ -917,7 +987,8 @@ void conv_depthwise_5x5_int8_fp32(const void* din,
scale
,
bias
,
flag_bias
,
flag_relu
,
flag_act
,
alpha
,
num
,
ch_in
,
h_in
,
...
...
@@ -934,7 +1005,8 @@ void conv_depthwise_5x5_int8_fp32(const void* din,
scale
,
bias
,
flag_bias
,
flag_relu
,
flag_act
,
alpha
,
num
,
ch_in
,
h_in
,
...
...
@@ -967,8 +1039,30 @@ void conv_depthwise_5x5_int8_int8(const void* din,
int
pad_h
=
paddings
[
0
];
int
pad_w
=
paddings
[
2
];
int
stride
=
param
.
strides
[
1
];
bool
flag_relu
=
param
.
fuse_relu
;
bool
flag_bias
=
param
.
bias
!=
nullptr
;
auto
act_param
=
param
.
activation_param
;
auto
act_type
=
act_param
.
active_type
;
int
flag_act
=
0
;
// relu: 1, relu6: 2, leakey: 3
float
alpha
[
4
]
=
{
0.
f
,
0.
f
,
0.
f
,
0.
f
};
if
(
act_param
.
has_active
)
{
if
(
act_type
==
lite_api
::
ActivationType
::
kRelu
)
{
flag_act
=
1
;
}
else
if
(
act_type
==
lite_api
::
ActivationType
::
kRelu6
)
{
flag_act
=
2
;
float
local_alpha
=
act_param
.
Relu_clipped_coef
;
alpha
[
0
]
=
local_alpha
;
alpha
[
1
]
=
local_alpha
;
alpha
[
2
]
=
local_alpha
;
alpha
[
3
]
=
local_alpha
;
}
else
if
(
act_type
==
lite_api
::
ActivationType
::
kLeakyRelu
)
{
flag_act
=
3
;
float
local_alpha
=
act_param
.
Leaky_relu_alpha
;
alpha
[
0
]
=
local_alpha
;
alpha
[
1
]
=
local_alpha
;
alpha
[
2
]
=
local_alpha
;
alpha
[
3
]
=
local_alpha
;
}
}
if
(
stride
==
1
)
{
conv_depthwise_5x5s1_int8
(
reinterpret_cast
<
int8_t
*>
(
dout
),
reinterpret_cast
<
const
int8_t
*>
(
din
),
...
...
@@ -976,7 +1070,8 @@ void conv_depthwise_5x5_int8_int8(const void* din,
scale
,
bias
,
flag_bias
,
flag_relu
,
flag_act
,
alpha
,
num
,
ch_in
,
h_in
,
...
...
@@ -993,7 +1088,8 @@ void conv_depthwise_5x5_int8_int8(const void* din,
scale
,
bias
,
flag_bias
,
flag_relu
,
flag_act
,
alpha
,
num
,
ch_in
,
h_in
,
...
...
lite/backends/arm/math/gemm_prepacked_int8.cc
浏览文件 @
9e361a4d
...
...
@@ -534,18 +534,18 @@ inline void gemm_int8_kernel(const int8_t* a_ptr,
"fmin v17.4s, v17.4s, v1.4s\n"
/* relu6 */
\
"fmin v18.4s, v18.4s, v1.4s\n"
/* relu6 */
\
"fmin v19.4s, v19.4s, v1.4s\n"
/* relu6 */
\
"fmin v20.4s, v20.4s, v
0
.4s\n"
/* relu6 */
\
"fmin v21.4s, v21.4s, v
0
.4s\n"
/* relu6 */
\
"fmin v22.4s, v22.4s, v
0
.4s\n"
/* relu6 */
\
"fmin v23.4s, v23.4s, v
0
.4s\n"
/* relu6 */
\
"fmin v24.4s, v24.4s, v
0
.4s\n"
/* relu6 */
\
"fmin v25.4s, v25.4s, v
0
.4s\n"
/* relu6 */
\
"fmin v26.4s, v26.4s, v
0
.4s\n"
/* relu6 */
\
"fmin v27.4s, v27.4s, v
0
.4s\n"
/* relu6 */
\
"fmin v28.4s, v28.4s, v
0
.4s\n"
/* relu6 */
\
"fmin v29.4s, v29.4s, v
0
.4s\n"
/* relu6 */
\
"fmin v30.4s, v30.4s, v
0
.4s\n"
/* relu6 */
\
"fmin v31.4s, v31.4s, v
0
.4s\n"
/* relu6 */
\
"fmin v20.4s, v20.4s, v
1
.4s\n"
/* relu6 */
\
"fmin v21.4s, v21.4s, v
1
.4s\n"
/* relu6 */
\
"fmin v22.4s, v22.4s, v
1
.4s\n"
/* relu6 */
\
"fmin v23.4s, v23.4s, v
1
.4s\n"
/* relu6 */
\
"fmin v24.4s, v24.4s, v
1
.4s\n"
/* relu6 */
\
"fmin v25.4s, v25.4s, v
1
.4s\n"
/* relu6 */
\
"fmin v26.4s, v26.4s, v
1
.4s\n"
/* relu6 */
\
"fmin v27.4s, v27.4s, v
1
.4s\n"
/* relu6 */
\
"fmin v28.4s, v28.4s, v
1
.4s\n"
/* relu6 */
\
"fmin v29.4s, v29.4s, v
1
.4s\n"
/* relu6 */
\
"fmin v30.4s, v30.4s, v
1
.4s\n"
/* relu6 */
\
"fmin v31.4s, v31.4s, v
1
.4s\n"
/* relu6 */
\
"b 9f \n"
/* relu end */
#define GEMM_INT8_LEAKY_RELU \
...
...
lite/kernels/arm/conv_depthwise.cc
浏览文件 @
9e361a4d
...
...
@@ -169,6 +169,12 @@ void DepthwiseConv<PRECISION(kInt8), PRECISION(kInt8)>::PrepareForRun() {
}
flag_trans_bias_
=
true
;
}
//! update relu6 parameter
if
(
param
.
activation_param
.
has_active
&&
param
.
activation_param
.
active_type
==
lite_api
::
ActivationType
::
kRelu6
)
{
param
.
activation_param
.
Relu_clipped_coef
=
param
.
activation_param
.
Relu_clipped_coef
/
param
.
output_scale
;
}
/// select dw conv kernel
if
(
kw
==
3
)
{
// trans weights
...
...
lite/kernels/arm/conv_direct.h
浏览文件 @
9e361a4d
...
...
@@ -39,7 +39,8 @@ inline bool direct_conv_trans_weights(
const
std
::
vector
<
float
>&
w_scale
,
float
in_scale
,
float
out_scale
,
std
::
vector
<
float
>&
merge_scale
)
{
// NOLINT
std
::
vector
<
float
>&
merge_scale
,
// NOLINT
float
*
relu_clipped_coef
)
{
constexpr
int
cblock
=
4
;
int
oc
=
win
->
dims
()[
0
];
int
ic
=
win
->
dims
()[
1
];
...
...
@@ -64,7 +65,8 @@ inline bool direct_conv_trans_weights<PRECISION(kInt8), PRECISION(kFloat)>(
const
std
::
vector
<
float
>&
w_scale
,
float
in_scale
,
float
out_scale
,
std
::
vector
<
float
>&
merge_scale
)
{
// NOLINT
std
::
vector
<
float
>&
merge_scale
,
// NOLINT
float
*
relu_clipped_coef
)
{
int
cblock
=
4
;
if
(
stride
==
2
)
{
cblock
=
lite
::
arm
::
math
::
conv_3x3s2_direct_int8_c_num
();
...
...
@@ -103,7 +105,8 @@ inline bool direct_conv_trans_weights<PRECISION(kInt8), PRECISION(kInt8)>(
const
std
::
vector
<
float
>&
w_scale
,
float
in_scale
,
float
out_scale
,
std
::
vector
<
float
>&
merge_scale
)
{
// NOLINT
std
::
vector
<
float
>&
merge_scale
,
// NOLINT
float
*
relu_clipped_coef
)
{
int
cblock
=
4
;
if
(
stride
==
2
)
{
cblock
=
lite
::
arm
::
math
::
conv_3x3s2_direct_int8_c_num
();
...
...
@@ -130,6 +133,8 @@ inline bool direct_conv_trans_weights<PRECISION(kInt8), PRECISION(kInt8)>(
merge_scale
[
i
]
=
w_scale
[
i
]
*
scale
;
}
}
/// update relu_clipped_coef
*
relu_clipped_coef
/=
out_scale
;
/// update bias
if
(
bin
)
{
bout
->
Resize
(
bin
->
dims
());
...
...
@@ -167,16 +172,17 @@ class DirectConv : public KernelLite<TARGET(kARM), Ptype> {
<<
"direct conv only support conv3x3s1 and conv3x3s2"
;
CHECK
(
kw
==
3
&&
kh
==
3
)
<<
"direct conv only support conv3x3s1 and conv3x3s2"
;
flag_trans_bias_
=
direct_conv_trans_weights
<
Ptype
,
OutType
>
(
param
.
filter
,
&
weights_
,
param
.
bias
,
&
bias_
,
sw
,
param
.
weight_scale
,
param
.
input_scale
,
param
.
output_scale
,
w_scale_
);
flag_trans_bias_
=
direct_conv_trans_weights
<
Ptype
,
OutType
>
(
param
.
filter
,
&
weights_
,
param
.
bias
,
&
bias_
,
sw
,
param
.
weight_scale
,
param
.
input_scale
,
param
.
output_scale
,
w_scale_
,
&
param
.
activation_param
.
Relu_clipped_coef
);
}
virtual
void
Run
();
...
...
lite/tests/math/conv_int8_compute_test.cc
浏览文件 @
9e361a4d
...
...
@@ -56,7 +56,7 @@ DEFINE_int32(dila_w, 1, "dilation width");
DEFINE_bool
(
flag_act
,
true
,
"do act"
);
DEFINE_bool
(
flag_bias
,
true
,
"with bias"
);
DEFINE_double
(
clipped_coef
,
1.0
,
"clipped relu coef"
);
DEFINE_double
(
leakey_relu_alpha
,
8.88
,
"leakey relu alpha"
);
DEFINE_double
(
leakey_relu_alpha
,
2.22
,
"leakey relu alpha"
);
typedef
paddle
::
lite
::
DDim
DDim
;
typedef
paddle
::
lite
::
Tensor
Tensor
;
...
...
@@ -188,7 +188,14 @@ void test_conv_int8(const std::vector<DDim>& input_dims,
}
std
::
vector
<
float
>
scale_in
{
1.
f
/
127
};
std
::
vector
<
float
>
scale_out
{
weight_dim
.
count
(
1
,
4
)
/
127.
f
};
std
::
vector
<
float
>
scale_out
(
1
,
weight_dim
.
count
(
1
,
4
)
/
127.
f
);
if
(
flag_act
==
2
)
{
scale_out
[
0
]
=
six
/
127.
f
;
}
else
if
(
flag_act
==
4
)
{
if
(
std
::
abs
(
alpha
)
>
1
)
{
scale_out
[
0
]
*=
std
::
abs
(
alpha
);
}
}
std
::
vector
<
float
>
scale_w
(
weight_dim
[
0
],
1.
f
/
127
);
param_int8_out
.
input_scale
=
scale_in
[
0
];
...
...
@@ -484,7 +491,7 @@ TEST(TestConv3x3DWInt8, test_conv3x3_depthwise) {
for
(
auto
&
stride
:
{
1
,
2
})
{
for
(
auto
&
pad
:
{
0
,
1
})
{
for
(
auto
&
flag_bias
:
{
false
,
true
})
{
for
(
auto
&
flag_act
:
{
0
,
1
})
{
for
(
auto
&
flag_act
:
{
0
,
1
,
2
,
4
})
{
for
(
auto
&
c
:
{
1
,
3
,
5
,
8
,
16
,
32
})
{
std
::
vector
<
DDim
>
dims
;
DDim
weights_dim
({
c
,
1
,
3
,
3
});
...
...
@@ -520,7 +527,7 @@ TEST(TestConv5x5DWInt8, test_conv5x5_depthwise) {
for
(
auto
&
stride
:
{
1
,
2
})
{
for
(
auto
&
pad
:
{
0
,
1
,
2
,
3
,
4
})
{
for
(
auto
&
flag_bias
:
{
false
,
true
})
{
for
(
auto
&
flag_act
:
{
0
,
1
})
{
for
(
auto
&
flag_act
:
{
0
,
1
,
2
,
4
})
{
for
(
auto
&
c
:
{
1
,
5
,
15
,
33
})
{
std
::
vector
<
DDim
>
dims
;
DDim
weights_dim
({
c
,
1
,
5
,
5
});
...
...
@@ -553,7 +560,7 @@ TEST(TestConv5x5DWInt8, test_conv5x5_depthwise) {
#if 1 /// conv1x1s1
TEST
(
TestConv1x1s1Int8
,
test_conv1x1s1
)
{
if
(
FLAGS_basic_test
)
{
for
(
auto
&
cin
:
{
1
,
3
,
8
,
3
2
})
{
for
(
auto
&
cin
:
{
1
,
3
,
8
,
3
3
})
{
for
(
auto
&
cout
:
{
1
,
5
,
17
})
{
for
(
auto
&
g
:
{
1
,
2
})
{
for
(
auto
&
flag_bias
:
{
false
,
true
})
{
...
...
@@ -599,7 +606,7 @@ TEST(TestConv3x3s1Int8, test_conv_3x3s1) {
for
(
auto
&
pad_left
:
{
1
,
2
})
{
for
(
auto
&
pad_right
:
{
1
,
2
})
{
for
(
auto
&
flag_bias
:
{
false
,
true
})
{
for
(
auto
&
flag_act
:
{
0
,
1
})
{
for
(
auto
&
flag_act
:
{
0
,
1
,
2
,
4
})
{
std
::
vector
<
DDim
>
dims
;
DDim
weights_dim
({
cout
,
cin
,
3
,
3
});
for
(
auto
&
batch
:
{
1
,
2
})
{
...
...
@@ -641,7 +648,7 @@ TEST(TestConv3x3s2Int8, test_conv_3x3s2) {
for
(
auto
&
pad_left
:
{
1
,
2
})
{
for
(
auto
&
pad_right
:
{
1
,
2
})
{
for
(
auto
&
flag_bias
:
{
false
,
true
})
{
for
(
auto
&
flag_act
:
{
0
,
1
})
{
for
(
auto
&
flag_act
:
{
0
,
1
,
2
,
4
})
{
std
::
vector
<
DDim
>
dims
;
DDim
weights_dim
({
cout
,
cin
,
3
,
3
});
for
(
auto
&
batch
:
{
1
,
2
})
{
...
...
@@ -673,7 +680,7 @@ TEST(TestConv3x3s2Int8, test_conv_3x3s2) {
}
#endif /// conv3x3s2
#if
0
/// random param conv
#if
1
/// random param conv
TEST
(
TestConvRandInt8
,
test_conv_rand
)
{
if
(
FLAGS_basic_test
)
{
for
(
auto
&
cin
:
{
1
,
17
})
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录