Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
5ac1e63c
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5ac1e63c
编写于
10月 21, 2018
作者:
H
hjchen2
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support 1x1 and 7x7 conv, fix quant scale, we can run the googlenet with int8 now
上级
a69fc85c
变更
26
展开全部
隐藏空白更改
内联
并排
Showing
26 changed file
with
1608 addition
and
661 deletion
+1608
-661
src/framework/operator.cpp
src/framework/operator.cpp
+1
-1
src/operators/kernel/arm/dequantize_kernel.cpp
src/operators/kernel/arm/dequantize_kernel.cpp
+2
-1
src/operators/kernel/arm/quantize_kernel.cpp
src/operators/kernel/arm/quantize_kernel.cpp
+6
-5
src/operators/kernel/central-arm-func/conv_arm_func.h
src/operators/kernel/central-arm-func/conv_arm_func.h
+40
-38
src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h
...erators/kernel/central-arm-func/depthwise_conv_arm_func.h
+1
-1
src/operators/kernel/central-arm-func/mul_arm_func.h
src/operators/kernel/central-arm-func/mul_arm_func.h
+11
-5
src/operators/math/conv3x3_arm_int8.cpp
src/operators/math/conv3x3_arm_int8.cpp
+19
-15
src/operators/math/conv_arm_int8.h
src/operators/math/conv_arm_int8.h
+4
-0
src/operators/math/gemm.cpp
src/operators/math/gemm.cpp
+3
-4
src/operators/math/gemm.h
src/operators/math/gemm.h
+50
-0
src/operators/math/gemm_int8.cpp
src/operators/math/gemm_int8.cpp
+652
-0
src/operators/math/im2col.cpp
src/operators/math/im2col.cpp
+406
-398
src/operators/math/math_function.cpp
src/operators/math/math_function.cpp
+4
-4
src/operators/math/math_function.h
src/operators/math/math_function.h
+2
-1
src/operators/math/math_function_int8.cpp
src/operators/math/math_function_int8.cpp
+64
-0
src/operators/math/vol2col.cpp
src/operators/math/vol2col.cpp
+2
-59
src/operators/op_param.h
src/operators/op_param.h
+2
-4
test/CMakeLists.txt
test/CMakeLists.txt
+4
-0
test/common/test_gemm_accuracy.cpp
test/common/test_gemm_accuracy.cpp
+1
-1
test/common/test_gemm_int8_accuracy.cpp
test/common/test_gemm_int8_accuracy.cpp
+131
-0
test/common/test_gemm_perf.cpp
test/common/test_gemm_perf.cpp
+43
-13
test/net/test_googlenet.cpp
test/net/test_googlenet.cpp
+11
-7
test/operators/test_dequantize_op.cpp
test/operators/test_dequantize_op.cpp
+1
-1
test/operators/test_int8_conv_op.cpp
test/operators/test_int8_conv_op.cpp
+67
-25
test/operators/test_mul_op.cpp
test/operators/test_mul_op.cpp
+67
-67
test/operators/test_quantize_op.cpp
test/operators/test_quantize_op.cpp
+14
-11
未找到文件。
src/framework/operator.cpp
浏览文件 @
5ac1e63c
...
...
@@ -32,7 +32,7 @@ template <typename Dtype>
vector
<
string
>
OperatorBase
<
Dtype
>::
GetInputKeys
()
const
{
auto
it
=
op_input_output_key
.
find
(
type_
);
if
(
it
==
op_input_output_key
.
end
())
{
DLOG
<<
type_
<<
" has no
out
puts"
;
DLOG
<<
type_
<<
" has no
in
puts"
;
return
{};
}
return
it
->
second
.
first
;
...
...
src/operators/kernel/arm/dequantize_kernel.cpp
浏览文件 @
5ac1e63c
...
...
@@ -38,7 +38,8 @@ void DequantizeKernel<CPU, float>::Compute(
const
int32_t
*
x
=
input
->
data
<
const
int32_t
>
();
float
*
y
=
output
->
mutable_data
<
float
>
();
size_t
size
=
output
->
numel
();
float
scale
=
1.
f
/
(
activation_scale
*
weight_scale
);
// float scale = 1.f / (activation_scale * weight_scale);
float
scale
=
activation_scale
/
weight_scale
;
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
size_t
loop
=
size
>>
4
;
size_t
remain
=
size
&
0xF
;
...
...
src/operators/kernel/arm/quantize_kernel.cpp
浏览文件 @
5ac1e63c
...
...
@@ -280,17 +280,18 @@ void QuantizeKernel<CPU, float>::Compute(
}
max_abs
=
std
::
max
(
max_abs
,
1e-6
f
);
// only support int8 currently
float
online_
scale
=
127
/
max_abs
;
param
.
online_scale_
->
mutable_data
<
float
>
()[
0
]
=
online_scale
;
float
scale
=
127
/
max_abs
;
param
.
online_scale_
->
mutable_data
<
float
>
()[
0
]
=
max_abs
;
switch
(
param
.
round_type_
)
{
case
ROUND_NEAREST_TO_EVEN
:
quantize_round_to_even
(
input
,
online_
scale
,
output
);
quantize_round_to_even
(
input
,
scale
,
output
);
break
;
case
ROUND_NEAREST_TOWARDS_ZERO
:
quantize_round_to_zero
(
input
,
online_
scale
,
output
);
quantize_round_to_zero
(
input
,
scale
,
output
);
break
;
case
ROUND_NEAREST_AWAY_ZERO
:
quantize_round_to_nearest
(
input
,
online_scale
,
output
);
quantize_round_to_nearest
(
input
,
scale
,
output
);
break
;
default:
LOG
(
kLOG_ERROR
)
<<
"round type is not supported."
;
break
;
...
...
src/operators/kernel/central-arm-func/conv_arm_func.h
浏览文件 @
5ac1e63c
...
...
@@ -28,15 +28,15 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
operators
{
template
<
typename
Dtype
>
inline
void
ConvBasic
(
const
ConvParam
<
CPU
>
&
param
)
{
const
Tensor
*
input
=
param
.
Input
();
Tensor
filter
=
*
param
.
Filter
();
Tensor
*
output
=
param
.
Output
();
output
->
mutable_data
<
float
>
();
int
groups
=
param
.
Groups
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
std
::
vector
<
int
>
dilations
=
param
.
Dilations
();
const
std
::
vector
<
int
>
strides
=
param
.
Strides
();
const
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
const
std
::
vector
<
int
>
dilations
=
param
.
Dilations
();
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
...
...
@@ -60,7 +60,7 @@ inline void ConvBasic(const ConvParam<CPU> ¶m) {
Tensor
col
;
Tensor
col_matrix
;
if
(
is_expand
)
{
col
.
mutable_data
<
float
>
(
col_shape
);
col
.
mutable_data
<
Dtype
>
(
col_shape
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
...
...
@@ -79,8 +79,8 @@ inline void ConvBasic(const ConvParam<CPU> ¶m) {
int
in_step
=
static_cast
<
int
>
(
input
->
dims
()[
1
])
/
groups
;
int
out_step
=
static_cast
<
int
>
(
output
->
dims
()[
1
])
/
groups
;
math
::
Vol2ColFunctor
<
CPU
,
float
>
vol2col
;
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
CPU
,
float
>
im2col
;
math
::
Vol2ColFunctor
<
CPU
,
Dtype
>
vol2col
;
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kCFO
,
CPU
,
Dtype
>
im2col
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_shape
);
...
...
@@ -99,6 +99,7 @@ inline void ConvBasic(const ConvParam<CPU> ¶m) {
std
::
vector
<
int
>
{
paddings
[
0
],
paddings
[
1
],
paddings
[
0
],
paddings
[
1
]},
&
col
);
}
else
if
(
data_dim
==
3U
)
{
// vol2col
vol2col
(
in_slice
,
dilations
,
strides
,
paddings
,
&
col
);
...
...
@@ -107,7 +108,8 @@ inline void ConvBasic(const ConvParam<CPU> ¶m) {
// gemm
Tensor
out_slice
=
out_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
math
::
matmul
<
float
>
(
filter_slice
,
false
,
col_matrix
,
false
,
math
::
matmul
<
Dtype
>
(
filter_slice
,
false
,
col_matrix
,
false
,
static_cast
<
float
>
(
1
),
&
out_slice
,
static_cast
<
float
>
(
0
));
}
...
...
@@ -126,42 +128,41 @@ inline void ConvCompute_int8(const ConvParam<CPU> ¶m) {
const
Tensor
*
input
=
param
.
Input
();
Tensor
*
filter
=
param
.
Filter
();
Tensor
*
output
=
param
.
Output
();
output
->
mutable_data
<
int32_t
>
();
int
groups
=
param
.
Groups
();
std
::
vector
<
int
>
strides
=
param
.
Strides
();
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
std
::
vector
<
int
>
dilations
=
param
.
Dilations
();
const
std
::
vector
<
int
>
&
strides
=
param
.
Strides
();
const
std
::
vector
<
int
>
&
paddings
=
param
.
Paddings
();
const
std
::
vector
<
int
>
&
dilations
=
param
.
Dilations
();
int
kernel_h
=
filter
->
dims
()[
2
];
int
kernel_w
=
filter
->
dims
()[
3
];
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
math
::
PadFunctor
<
CPU
,
int8_t
>
pad
;
Tensor
input_pad
;
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
);
Tensor
out_batch
=
output
->
Slice
(
i
,
i
+
1
);
if
(
paddings
[
0
]
==
0
&&
paddings
[
1
]
==
0
)
{
input_pad
=
in_batch
;
}
else
{
framework
::
DDim
pad_shape
=
in_batch
.
dims
();
pad_shape
[
2
]
+=
2
*
paddings
[
0
];
pad_shape
[
3
]
+=
2
*
paddings
[
1
];
input_pad
.
mutable_data
<
int8_t
>
(
pad_shape
);
pad
(
in_batch
,
paddings
[
0
],
paddings
[
1
],
&
input_pad
);
}
output
->
mutable_data
<
int32_t
>
();
if
(
strides
[
1
]
==
strides
[
0
]
&&
strides
[
1
]
<
6
&&
kernel_h
==
kernel_w
&&
kernel_h
<
8
&&
groups
==
1
&&
dilations
[
0
]
==
dilations
[
1
]
&&
dilations
[
1
]
==
1
)
{
ConvFunc
conv_func
=
conv_funcs_table
[
kernel_h
-
1
][
strides
[
0
]
-
1
];
if
(
conv_func
)
{
conv_func
(
input_pad
,
*
filter
,
&
out_batch
);
ConvFunc
conv_func
=
0
;
if
(
strides
[
1
]
==
strides
[
0
]
&&
strides
[
1
]
<
6
&&
kernel_h
==
kernel_w
&&
kernel_h
<
8
&&
groups
==
1
&&
dilations
[
0
]
==
dilations
[
1
]
&&
dilations
[
1
]
==
1
)
{
conv_func
=
conv_funcs_table
[
kernel_h
-
1
][
strides
[
0
]
-
1
];
}
if
(
conv_func
)
{
int
batch_size
=
input
->
dims
()[
0
];
math
::
PadFunctor
<
CPU
,
int8_t
>
pad
;
Tensor
input_pad
;
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
);
Tensor
out_batch
=
output
->
Slice
(
i
,
i
+
1
);
if
(
paddings
[
0
]
==
0
&&
paddings
[
1
]
==
0
)
{
input_pad
=
in_batch
;
}
else
{
// TODO(hjchen2)
framework
::
DDim
pad_shape
=
in_batch
.
dims
();
pad_shape
[
2
]
+=
2
*
paddings
[
0
];
pad_shape
[
3
]
+=
2
*
paddings
[
1
];
input_pad
.
mutable_data
<
int8_t
>
(
pad_shape
);
pad
(
in_batch
,
paddings
[
0
],
paddings
[
1
],
&
input_pad
);
}
}
else
{
// TODO(hjchen2)
conv_func
(
input_pad
,
*
filter
,
&
out_batch
);
}
}
else
{
ConvBasic
<
int8_t
>
(
param
);
}
}
...
...
@@ -170,6 +171,7 @@ void ConvCompute(const ConvParam<CPU> ¶m) {
if
(
param
.
Input
()
->
type
()
==
typeid
(
int8_t
))
{
ConvCompute_int8
(
param
);
}
else
{
param
.
Output
()
->
mutable_data
<
float
>
();
if
(
param
.
Groups
()
==
param
.
Input
()
->
dims
()[
1
]
&&
param
.
Input
()
->
dims
()[
1
]
==
param
.
Output
()
->
dims
()[
1
]
&&
param
.
Filter
()
->
dims
()[
2
]
==
param
.
Filter
()
->
dims
()[
3
]
&&
...
...
@@ -183,7 +185,7 @@ void ConvCompute(const ConvParam<CPU> ¶m) {
math
::
DepthwiseConv3x3
(
param
.
Input
(),
param
.
Strides
(),
param
.
Paddings
(),
param
.
Filter
(),
nullptr
,
param
.
Output
(),
false
);
}
else
{
ConvBasic
(
param
);
ConvBasic
<
float
>
(
param
);
}
}
}
...
...
src/operators/kernel/central-arm-func/depthwise_conv_arm_func.h
浏览文件 @
5ac1e63c
...
...
@@ -44,7 +44,7 @@ void DepthwiseConvCompute(const ConvParam<CPU> ¶m) {
Bias
,
false
);
}
else
{
ConvBasic
(
param
);
ConvBasic
<
float
>
(
param
);
}
}
...
...
src/operators/kernel/central-arm-func/mul_arm_func.h
浏览文件 @
5ac1e63c
...
...
@@ -58,7 +58,7 @@ void MulCompute(const MulParam<CPU> ¶m) {
const
Tensor
*
input_x
=
param
.
InputX
();
const
Tensor
*
input_y
=
param
.
InputY
();
Tensor
*
out
=
param
.
Out
();
out
->
mutable_data
<
float
>
();
const
Tensor
x_matrix
=
input_x
->
dims
().
size
()
>
2
?
framework
::
ReshapeToMatrix
(
*
input_x
,
param
.
XNumColDims
())
...
...
@@ -71,15 +71,21 @@ void MulCompute(const MulParam<CPU> ¶m) {
if
(
out_dim
.
size
()
!=
2
)
{
out
->
Resize
({
x_matrix
.
dims
()[
0
],
y_matrix
.
dims
()[
1
]});
}
math
::
matmul
<
float
>
(
x_matrix
,
false
,
y_matrix
,
false
,
static_cast
<
float
>
(
1
),
out
,
static_cast
<
float
>
(
0
));
if
(
param
.
InputX
()
->
type
()
==
typeid
(
int8_t
))
{
out
->
mutable_data
<
int32_t
>
();
math
::
matmul
<
int8_t
>
(
x_matrix
,
false
,
y_matrix
,
false
,
static_cast
<
int8_t
>
(
1
),
out
,
static_cast
<
int8_t
>
(
0
));
}
else
{
out
->
mutable_data
<
float
>
();
math
::
matmul
<
float
>
(
x_matrix
,
false
,
y_matrix
,
false
,
static_cast
<
float
>
(
1
),
out
,
static_cast
<
float
>
(
0
));
}
if
(
out_dim
.
size
()
!=
2
)
{
out
->
Resize
(
out_dim
);
}
}
template
class
MulKernel
<
CPU
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/math/conv3x3_arm_int8.cpp
浏览文件 @
5ac1e63c
...
...
@@ -112,15 +112,15 @@ void conv3x3s1_int8(const framework::Tensor& input,
"vmull.s8 q7, d4, d7
\n
"
"vmlal.s8 q6, d5, d8
\n
"
"vaddw.s16 q12, q12, d12
\n
"
"vaddw.s16 q12, q12, d14
\n
"
"vaddw.s16 q13, q13, d13
\n
"
"vaddw.s16 q12, q12, d14
\n
"
"vaddw.s16 q13, q13, d15
\n
"
"vmull.s8 q6, d2, d9
\n
"
"vmull.s8 q7, d4, d10
\n
"
"vmlal.s8 q6, d5, d11
\n
"
"vaddw.s16 q14, q14, d12
\n
"
"vaddw.s16 q14, q14, d14
\n
"
"vaddw.s16 q15, q15, d13
\n
"
"vaddw.s16 q14, q14, d14
\n
"
"vaddw.s16 q15, q15, d15
\n
"
"vld1.8 {d2-d3}, [%[r2]]
\n
"
// r2
...
...
@@ -139,8 +139,8 @@ void conv3x3s1_int8(const framework::Tensor& input,
"vmull.s8 q7, d4, d10
\n
"
"vmlal.s8 q6, d5, d11
\n
"
"vaddw.s16 q10, q10, d12
\n
"
"vaddw.s16 q10, q10, d14
\n
"
"vaddw.s16 q11, q11, d13
\n
"
"vaddw.s16 q10, q10, d14
\n
"
"vaddw.s16 q11, q11, d15
\n
"
"vdup.s8 d6, d0[6]
\n
"
...
...
@@ -153,21 +153,23 @@ void conv3x3s1_int8(const framework::Tensor& input,
"vmull.s8 q7, d4, d7
\n
"
"vmlal.s8 q6, d5, d8
\n
"
"vaddw.s16 q12, q12, d12
\n
"
"vaddw.s16 q12, q12, d14
\n
"
"vaddw.s16 q13, q13, d13
\n
"
"vaddw.s16 q12, q12, d14
\n
"
"vaddw.s16 q13, q13, d15
\n
"
"vld1.32 {d12-d15}, [%[output0]]
\n
"
"vadd.s32 q6, q6, q12
\n
"
"vadd.s32 q7, q7, q13
\n
"
"vst1.32 {d12-d15}, [%[output0]]!
\n
"
"vmull.s8 q6, d2, d9
\n
"
"vmull.s8 q7, d4, d10
\n
"
"vmlal.s8 q6, d5, d11
\n
"
"vaddw.s16 q14, q14, d12
\n
"
"vaddw.s16 q14, q14, d14
\n
"
"vaddw.s16 q15, q15, d13
\n
"
"vaddw.s16 q14, q14, d14
\n
"
"vaddw.s16 q15, q15, d15
\n
"
"vld1.32 {d12-d15}, [%[output0]]
\n
"
"vadd.s32 q6, q6, q12
\n
"
"vadd.s32 q7, q7, q13
\n
"
"vst1.32 {d12-d15}, [%[output0]]!
\n
"
"vld1.32 {d12-d15}, [%[output1]]
\n
"
"vadd.s32 q6, q6, q14
\n
"
"vadd.s32 q7, q7, q15
\n
"
...
...
@@ -182,21 +184,23 @@ void conv3x3s1_int8(const framework::Tensor& input,
"vmull.s8 q7, d4, d7
\n
"
"vmlal.s8 q6, d5, d8
\n
"
"vaddw.s16 q8, q8, d12
\n
"
"vaddw.s16 q9, q9, d15
\n
"
"vaddw.s16 q8, q8, d14
\n
"
"vaddw.s16 q9, q9, d13
\n
"
"vaddw.s16 q9, q9, d15
\n
"
"vld1.32 {d12-d15}, [%[output0n]]
\n
"
"vadd.s32 q6, q6, q8
\n
"
"vadd.s32 q7, q7, q9
\n
"
"vst1.32 {d12-d15}, [%[output0n]]!
\n
"
"vmull.s8 q6, d2, d9
\n
"
"vmull.s8 q7, d4, d10
\n
"
"vmlal.s8 q6, d5, d11
\n
"
"vaddw.s16 q10, q10, d12
\n
"
"vaddw.s16 q11, q11, d15
\n
"
"vaddw.s16 q10, q10, d14
\n
"
"vaddw.s16 q11, q11, d13
\n
"
"vaddw.s16 q11, q11, d15
\n
"
"vld1.32 {d12-d15}, [%[output0n]]
\n
"
"vadd.s32 q6, q6, q8
\n
"
"vadd.s32 q7, q7, q9
\n
"
"vst1.32 {d12-d15}, [%[output0n]]!
\n
"
"vld1.32 {d12-d15}, [%[output1n]]
\n
"
"vadd.s32 q6, q6, q10
\n
"
"vadd.s32 q7, q7, q11
\n
"
...
...
src/operators/math/conv_arm_int8.h
浏览文件 @
5ac1e63c
...
...
@@ -24,6 +24,10 @@ namespace operators {
void
conv3x3s1_int8
(
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
output
);
void
conv3x3s1_int8_4c
(
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
output
);
void
conv5x5s1_int8
(
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
weight
,
framework
::
Tensor
*
output
);
...
...
src/operators/math/gemm.cpp
浏览文件 @
5ac1e63c
...
...
@@ -3662,7 +3662,7 @@ void Gemm::AddDot6x8(int k, const float *a, const float *b, float *c, int ldc) {
b_ptr
=
b
;
int
kc1
=
k
/
8
;
int
kc2
=
k
%
8
;
int
step
=
4
*
ldc
;
int
step
=
sizeof
(
float
)
*
ldc
;
asm
volatile
(
"pld [%[a_ptr]]
\n\t
"
"pld [%[a_ptr], #64]
\n\t
"
...
...
@@ -3866,11 +3866,10 @@ void Gemm::AddDot6x8(int k, const float *a, const float *b, float *c, int ldc) {
:
:
[
a_ptr
]
"r"
(
a_ptr
),
[
b_ptr
]
"r"
(
b_ptr
),
[
c
]
"r"
(
c
),
[
kc1
]
"r"
(
kc1
),
[
kc2
]
"r"
(
kc2
),
[
step
]
"r"
(
step
)
:
"
memory"
,
"r5"
,
"r6"
,
"q0"
,
"q1"
,
"q2"
,
"q3"
,
"q4"
,
"q5"
,
"q6"
,
"q7
"
,
"q8"
,
"q9"
,
"q10"
,
"q11"
,
"q12"
,
"q13"
,
"q14"
,
"q15"
);
:
"
cc"
,
"memory"
,
"r5"
,
"r6"
,
"q0"
,
"q1"
,
"q2"
,
"q3"
,
"q4"
,
"q5"
,
"q6
"
,
"q
7"
,
"q
8"
,
"q9"
,
"q10"
,
"q11"
,
"q12"
,
"q13"
,
"q14"
,
"q15"
);
#endif // __aarch64__
#else
#endif // __ARM_NEON
}
...
...
src/operators/math/gemm.h
浏览文件 @
5ac1e63c
...
...
@@ -96,6 +96,7 @@ void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb,
void
InnerKernelWithPRelu
(
int
mc
,
int
nc
,
const
float
*
a
,
const
float
*
b
,
float
*
c
,
float
*
C
,
int
ldc
,
float
*
p
,
std
::
string
mode
,
float
*
bias
,
float
*
bias1
);
/*
// 向量矩阵乘法 (M = 1)
void VectorKernel(int m, int n, int k, float alpha, const float *A, int lda,
...
...
@@ -139,6 +140,7 @@ void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb,
float
*
new_scale
,
float
*
new_bias
);
void
WriteWithBnAddRelu
(
int
mc
,
int
nc
,
float
*
c
,
float
*
C
,
int
ldc
,
float
*
new_scale
,
float
*
new_bias
,
float
*
bias1
);
/*
// 向量矩阵乘法结果回写
// C = A * B
...
...
@@ -185,15 +187,63 @@ void PackMatrixB(int k, int n, int n_tail, const float *B, int ldb,
const
float
*
B
,
int
ldb
,
float
*
C
,
int
ldc
,
float
*
p
,
std
::
string
mode
,
float
*
bias
,
float
*
bias1
);
/************************ 8 bit function cluster ************************/
// 8 bit int small block inner product
void
AddDot6x8
(
int32_t
k
,
const
int8_t
*
a
,
const
int8_t
*
b
,
int32_t
*
c
,
int32_t
ldc
);
// 8 bit int inner product
void
InnerKernelWithBias
(
int32_t
mc
,
int32_t
nc
,
int8_t
alpha
,
const
int8_t
*
a
,
const
int8_t
*
b
,
int8_t
beta
,
int32_t
*
c
,
int32_t
*
C
,
int32_t
ldc
,
bool
relu
,
int8_t
*
bias
);
// 8 bit int pack function
void
PackMatrixA_6r
(
int32_t
m
,
int32_t
k
,
int32_t
m_tail
,
const
int8_t
*
A
,
int32_t
lda
,
int8_t
*
buffer
);
void
PackMatrixB_8c
(
int32_t
k
,
int32_t
n
,
int32_t
n_tail
,
const
int8_t
*
B
,
int32_t
ldb
,
int8_t
*
buffer
);
// 8 bit int matrix product
void
Sgemm
(
int32_t
m
,
int32_t
n
,
int32_t
k
,
int8_t
alpha
,
const
int8_t
*
A
,
int32_t
lda
,
const
int8_t
*
B
,
int32_t
ldb
,
int8_t
beta
,
int32_t
*
C
,
int32_t
ldc
,
bool
relu
,
int8_t
*
bias
);
// 8 bit int write back
// C = alpha * A * B + beta * C
void
WriteWithAlphaBeta
(
int32_t
mc
,
int32_t
nc
,
int32_t
*
c
,
int32_t
*
C
,
int32_t
ldc
);
// C = A * B
void
WriteBasic
(
int32_t
mc
,
int32_t
nc
,
int32_t
*
c
,
int32_t
*
C
,
int32_t
ldc
);
// C = A * B + C
void
WriteWithAdd
(
int32_t
mc
,
int32_t
nc
,
int32_t
*
c
,
int32_t
*
C
,
int32_t
ldc
);
// C = A * B + bias
void
WriteWithAddV1
(
int32_t
mc
,
int32_t
nc
,
int32_t
*
c
,
int32_t
*
C
,
int32_t
ldc
,
int8_t
*
bias
);
// C = A * B + C, relu(C)
void
WriteWithAddRelu
(
int32_t
mc
,
int32_t
nc
,
int32_t
*
c
,
int32_t
*
C
,
int32_t
ldc
);
// C = A * B + bias, relu(C)
void
WriteWithAddReluV1
(
int32_t
mc
,
int32_t
nc
,
int32_t
*
c
,
int32_t
*
C
,
int32_t
ldc
,
int8_t
*
bias
);
private:
int
MC
=
0
;
int
KC
=
0
;
int
NC
=
0
;
// 32位 float
float
*
packedA
;
float
*
packedB
;
float
*
packedC
;
float
*
zero
;
// 8 bit int
int8_t
*
packedA_int8
;
int8_t
*
packedB_int8
;
int32_t
*
packedC_int8
;
int8_t
*
zero_int8
;
};
}
// namespace math
...
...
src/operators/math/gemm_int8.cpp
0 → 100644
浏览文件 @
5ac1e63c
此差异已折叠。
点击以展开。
src/operators/math/im2col.cpp
浏览文件 @
5ac1e63c
此差异已折叠。
点击以展开。
src/operators/math/math_function.cpp
浏览文件 @
5ac1e63c
...
...
@@ -135,7 +135,7 @@ template <typename T>
struct
ClearTensor
<
CPU
,
T
>
{
void
operator
()(
framework
::
Tensor
*
tensor
)
{
auto
size
=
tensor
->
numel
();
auto
*
tensor_data
=
tensor
->
data
<
float
>
();
auto
*
tensor_data
=
tensor
->
data
<
T
>
();
memset
((
void
*
)
tensor_data
,
0
,
sizeof
(
T
)
*
size
);
// NOLINT
}
};
...
...
@@ -151,9 +151,9 @@ struct RowwiseAdd<CPU, T> {
PADDLE_MOBILE_ENFORCE
((
output
->
dims
()
==
in_dims
),
"output->dims() must be equal to in_dims."
);
auto
*
input_data
=
input
.
data
<
float
>
();
auto
*
out_data
=
output
->
data
<
float
>
();
auto
*
vec_data
=
vector
.
data
<
float
>
();
auto
*
input_data
=
input
.
data
<
T
>
();
auto
*
out_data
=
output
->
data
<
T
>
();
auto
*
vec_data
=
vector
.
data
<
T
>
();
for
(
int64_t
i
=
0
;
i
<
in_dims
[
0
];
++
i
)
{
for
(
int64_t
j
=
0
;
j
<
size
;
++
j
)
{
out_data
[
i
*
size
+
j
]
=
input_data
[
i
*
size
+
j
]
+
vec_data
[
j
];
...
...
src/operators/math/math_function.h
浏览文件 @
5ac1e63c
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <cmath>
#include <string>
#include "framework/tensor.h"
namespace
paddle_mobile
{
...
...
@@ -25,7 +26,7 @@ template <typename T>
void
matmul
(
const
framework
::
Tensor
&
matrix_a
,
bool
trans_a
,
const
framework
::
Tensor
&
matrix_b
,
bool
trans_b
,
T
alpha
,
framework
::
Tensor
*
matrix_out
,
T
beta
,
bool
relu
=
false
,
float
*
bias
=
nullptr
);
T
*
bias
=
nullptr
);
template
<
typename
T
>
void
matmulWithBn
(
const
framework
::
Tensor
&
matrix_a
,
bool
trans_a
,
...
...
src/operators/math/math_function_int8.cpp
0 → 100644
浏览文件 @
5ac1e63c
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cstring>
#include <string>
#include "operators/math/gemm.h"
#include "operators/math/math_function.h"
namespace
paddle_mobile
{
namespace
operators
{
namespace
math
{
template
<
>
void
matmul
<
int8_t
>
(
const
framework
::
Tensor
&
matrix_a
,
bool
trans_a
,
const
framework
::
Tensor
&
matrix_b
,
bool
trans_b
,
int8_t
alpha
,
framework
::
Tensor
*
matrix_out
,
int8_t
beta
,
bool
relu
,
int8_t
*
bias
)
{
auto
dim_a
=
matrix_a
.
dims
();
auto
dim_b
=
matrix_b
.
dims
();
auto
dim_out
=
matrix_out
->
dims
();
PADDLE_MOBILE_ENFORCE
(
dim_a
.
size
()
==
2
&&
dim_b
.
size
()
==
2
&&
dim_out
.
size
()
==
2
,
"The input and output of matmul be matrix"
);
int32_t
M
=
dim_out
[
0
];
int32_t
N
=
dim_out
[
1
];
int32_t
K
=
(
!
trans_a
)
?
dim_a
[
1
]
:
dim_a
[
0
];
Gemm
gemm
;
if
(
trans_a
)
{
int32_t
numel
=
matrix_a
.
numel
();
int32_t
m
=
matrix_a
.
dims
()[
0
];
int32_t
n
=
matrix_a
.
dims
()[
1
];
int8_t
*
tmp
=
(
int8_t
*
)(
matrix_a
.
data
<
int8_t
>
());
// NOLINT
int8_t
*
a
=
static_cast
<
int8_t
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
int8_t
)
*
numel
));
int32_t
index
=
0
;
for
(
int32_t
j
=
0
;
j
<
n
;
j
++
)
{
for
(
int32_t
i
=
0
;
i
<
m
;
i
++
)
{
a
[
index
++
]
=
tmp
[
i
*
n
+
j
];
}
}
gemm
.
Sgemm
(
M
,
N
,
K
,
alpha
,
a
,
K
,
matrix_b
.
data
<
int8_t
>
(),
N
,
beta
,
matrix_out
->
data
<
int32_t
>
(),
N
,
relu
,
bias
);
}
else
{
gemm
.
Sgemm
(
M
,
N
,
K
,
alpha
,
matrix_a
.
data
<
int8_t
>
(),
K
,
matrix_b
.
data
<
int8_t
>
(),
N
,
beta
,
matrix_out
->
data
<
int32_t
>
(),
N
,
relu
,
bias
);
}
}
}
// namespace math
}
// namespace operators
}
// namespace paddle_mobile
src/operators/math/vol2col.cpp
浏览文件 @
5ac1e63c
...
...
@@ -32,9 +32,6 @@ class Vol2ColFunctor<CPU, T> {
void
operator
()(
const
Tensor
&
vol
,
const
std
::
vector
<
int
>
&
dilations
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
paddings
,
Tensor
*
col
)
const
{
// PADDLE_ENFORCE(vol.dims().size() == 4);
// PADDLE_ENFORCE(col->dims().size() == 7);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_height
=
vol
.
dims
()[
2
];
...
...
@@ -48,32 +45,6 @@ class Vol2ColFunctor<CPU, T> {
int
channels_col
=
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
// PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
// ((dilations[0] * (filter_depth - 1)
// + 1))) /
// strides[0] +
// 1,
// output_depth,
// "input_depth and output_depth are "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
// ((dilations[1] * (filter_height -
// 1) + 1))) /
// strides[1] +
// 1,
// output_height,
// "input_height and output_height are
// "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
// ((dilations[2] * (filter_width - 1)
// + 1))) /
// strides[2] +
// 1,
// output_width,
// "input_width and output_width are "
// "mismatching.");
const
T
*
vol_data
=
vol
.
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
...
...
@@ -119,9 +90,6 @@ class Col2VolFunctor<CPU, T> {
void
operator
()(
const
Tensor
&
col
,
const
std
::
vector
<
int
>
&
dilations
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
paddings
,
Tensor
*
vol
)
const
{
// PADDLE_ENFORCE(vol->dims().size() == 4);
// PADDLE_ENFORCE(col.dims().size() == 7);
int
input_channels
=
vol
->
dims
()[
0
];
int
input_depth
=
vol
->
dims
()[
1
];
int
input_height
=
vol
->
dims
()[
2
];
...
...
@@ -135,31 +103,6 @@ class Col2VolFunctor<CPU, T> {
int
channels_col
=
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
// PADDLE_ENFORCE_EQ((input_depth + 2 * paddings[0] -
// ((dilations[0] * (filter_depth - 1)
// + 1))) /
// strides[0] +
// 1,
// output_depth,
// "input_depth and output_depth are "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_height + 2 * paddings[1] -
// ((dilations[1] * (filter_height -
// 1) + 1))) /
// strides[1] +
// 1,
// output_height,
// "input_height and output_height are
// "
// "mismatching.");
// PADDLE_ENFORCE_EQ((input_width + 2 * paddings[2] -
// ((dilations[2] * (filter_width - 1)
// + 1))) /
// strides[2] +
// 1,
// output_width,
// "input_width and output_width are "
// "mismatching.");
T
*
vol_data
=
vol
->
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
...
...
@@ -195,9 +138,9 @@ class Col2VolFunctor<CPU, T> {
};
template
class
Vol2ColFunctor
<
CPU
,
float
>;
template
class
Vol2ColFunctor
<
CPU
,
double
>;
template
class
Vol2ColFunctor
<
CPU
,
int8_t
>;
template
class
Col2VolFunctor
<
CPU
,
float
>;
template
class
Col2VolFunctor
<
CPU
,
double
>;
template
class
Col2VolFunctor
<
CPU
,
int8_t
>;
}
// namespace math
}
// namespace operators
...
...
src/operators/op_param.h
浏览文件 @
5ac1e63c
...
...
@@ -2150,14 +2150,12 @@ class QuantizeParam : public OpParam {
const
AttributeMap
&
attrs
,
const
Scope
&
scope
)
{
input_
=
InputXFrom
<
GType
>
(
inputs
,
scope
);
out_
=
OutFrom
<
GType
>
(
outputs
,
scope
);
if
(
HasAttr
(
"is_static"
,
attrs
))
{
is_static_
=
GetAttr
<
bool
>
(
"is_static"
,
attrs
);
}
// online
// scale = max(abs(x))
online_scale_
=
GetVarValue
<
GType
>
(
"OutScale"
,
outputs
,
scope
);
// offline
if
(
HasAttr
(
"static_scale"
,
attrs
))
{
is_static_
=
true
;
static_scale_
=
GetAttr
<
float
>
(
"static_scale"
,
attrs
);
}
// x = round(scale * x)
...
...
@@ -2179,7 +2177,7 @@ class QuantizeParam : public OpParam {
float
static_scale_
=
1.0
f
;
// round method type
// nearest_zero and nearest_even is valid currently
RoundType
round_type_
=
ROUND_NEAREST_
TO_EVEN
;
RoundType
round_type_
=
ROUND_NEAREST_
AWAY_ZERO
;
};
template
<
typename
Dtype
>
...
...
test/CMakeLists.txt
浏览文件 @
5ac1e63c
...
...
@@ -258,6 +258,10 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE
(
test-gemm-accuracy common/test_gemm_accuracy.cpp
)
target_link_libraries
(
test-gemm-accuracy paddle-mobile
)
# gen test
ADD_EXECUTABLE
(
test-gemm-int8-accuracy common/test_gemm_int8_accuracy.cpp
)
target_link_libraries
(
test-gemm-int8-accuracy paddle-mobile
)
# gen test
ADD_EXECUTABLE
(
test-gemm-perf common/test_gemm_perf.cpp
)
target_link_libraries
(
test-gemm-perf paddle-mobile
)
...
...
test/common/test_gemm_accuracy.cpp
浏览文件 @
5ac1e63c
...
...
@@ -84,7 +84,7 @@ int do_sgemm(int m, int n, int k, bool relu, int t1, int t2, int pr) {
}
paddle_mobile
::
operators
::
math
::
Gemm
gemm
;
gemm
.
SgemmWithBn
(
m
,
n
,
k
,
0.9
,
a
,
lda
,
b
,
ldb
,
0.3
,
c
,
ldc
,
relu
,
scale
,
bias
,
gemm
.
SgemmWithBn
(
m
,
n
,
k
,
1
,
a
,
lda
,
b
,
ldb
,
0.3
,
c
,
ldc
,
relu
,
scale
,
bias
,
nullptr
);
int
eq
=
0
;
int
neq
=
0
;
...
...
test/common/test_gemm_int8_accuracy.cpp
0 → 100644
浏览文件 @
5ac1e63c
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <random>
#include "../test_helper.h"
#include "common/log.h"
#include "memory/t_malloc.h"
#include "operators/math/gemm.h"
#define a(i, j) a[(i)*lda + (j)]
#define b(i, j) b[(i)*ldb + (j)]
#define c(i, j) c[(i)*ldc + (j)]
#define c1(i, j) c1[(i)*ldc + (j)]
using
std
::
default_random_engine
;
using
std
::
uniform_int_distribution
;
void
print_matirx
(
int
m
,
int
n
,
int
ldc
,
int32_t
*
c
)
{
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
std
::
cout
<<
c
(
i
,
0
);
for
(
int
j
=
1
;
j
<
n
;
++
j
)
{
std
::
cout
<<
" | "
<<
c
(
i
,
j
);
}
std
::
cout
<<
std
::
endl
;
}
std
::
cout
<<
std
::
endl
;
}
void
print_matirx
(
int
m
,
int
n
,
int
ldc
,
int8_t
*
c
)
{
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
std
::
cout
<<
static_cast
<
int32_t
>
(
c
(
i
,
0
));
for
(
int
j
=
1
;
j
<
n
;
++
j
)
{
std
::
cout
<<
" | "
<<
static_cast
<
int32_t
>
(
c
(
i
,
j
));
}
std
::
cout
<<
std
::
endl
;
}
std
::
cout
<<
std
::
endl
;
}
int
do_sgemm
(
int
m
,
int
n
,
int
k
,
bool
relu
,
int
pr
)
{
int
lda
=
k
;
int
ldb
=
n
;
int
ldc
=
n
;
default_random_engine
e
;
uniform_int_distribution
<
int8_t
>
pixel
(
-
127
,
127
);
int8_t
*
a
=
static_cast
<
int8_t
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
int8_t
)
*
m
*
k
));
int8_t
*
b
=
static_cast
<
int8_t
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
int8_t
)
*
k
*
n
));
int32_t
*
c
=
static_cast
<
int32_t
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
int32_t
)
*
m
*
n
));
int32_t
*
c1
=
static_cast
<
int32_t
*>
(
paddle_mobile
::
memory
::
Alloc
(
sizeof
(
int32_t
)
*
m
*
n
));
for
(
int
i
=
0
;
i
<
m
*
k
;
++
i
)
{
a
[
i
]
=
pixel
(
e
);
}
for
(
int
i
=
0
;
i
<
k
*
n
;
++
i
)
{
b
[
i
]
=
pixel
(
e
);
}
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
for
(
int
j
=
0
;
j
<
n
;
++
j
)
{
int32_t
r
=
0
;
for
(
int
p
=
0
;
p
<
k
;
p
++
)
{
r
+=
static_cast
<
int32_t
>
(
a
(
i
,
p
))
*
static_cast
<
int32_t
>
(
b
(
p
,
j
));
}
c1
(
i
,
j
)
=
r
;
}
}
paddle_mobile
::
operators
::
math
::
Gemm
gemm
;
gemm
.
Sgemm
(
m
,
n
,
k
,
static_cast
<
int8_t
>
(
1
),
a
,
lda
,
b
,
ldb
,
static_cast
<
int8_t
>
(
0
),
c
,
ldc
,
relu
,
nullptr
);
int
eq
=
0
;
int
neq
=
0
;
for
(
int
i
=
0
;
i
<
m
*
n
;
++
i
)
{
if
(
c
[
i
]
==
c1
[
i
])
{
++
eq
;
}
else
{
++
neq
;
}
}
if
(
pr
>
0
)
{
std
::
cout
<<
"A:"
<<
std
::
endl
;
print_matirx
(
m
,
k
,
lda
,
a
);
std
::
cout
<<
"B:"
<<
std
::
endl
;
print_matirx
(
k
,
n
,
ldb
,
b
);
std
::
cout
<<
"C:"
<<
std
::
endl
;
print_matirx
(
m
,
n
,
ldc
,
c
);
std
::
cout
<<
"C1:"
<<
std
::
endl
;
print_matirx
(
m
,
n
,
ldc
,
c1
);
}
std
::
cout
<<
"mnk="
<<
m
<<
" "
<<
n
<<
" "
<<
k
<<
" relu="
<<
relu
<<
" eq="
<<
eq
<<
" neq="
<<
neq
<<
std
::
endl
;
paddle_mobile
::
memory
::
Free
(
a
);
paddle_mobile
::
memory
::
Free
(
b
);
paddle_mobile
::
memory
::
Free
(
c
);
paddle_mobile
::
memory
::
Free
(
c1
);
return
0
;
}
int
main
()
{
do_sgemm
(
9
,
9
,
9
,
false
,
10
);
do_sgemm
(
10
,
6
,
12
,
false
,
0
);
do_sgemm
(
512
,
256
,
384
,
false
,
0
);
do_sgemm
(
1366
,
768
,
256
,
false
,
0
);
do_sgemm
(
1255
,
755
,
333
,
false
,
0
);
do_sgemm
(
555
,
777
,
999
,
false
,
0
);
do_sgemm
(
1024
,
1024
,
1024
,
false
,
0
);
return
0
;
}
test/common/test_gemm_perf.cpp
浏览文件 @
5ac1e63c
...
...
@@ -28,13 +28,11 @@ limitations under the License. */
int
main
()
{
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
paddle_mobile
.
SetThreadNum
(
4
);
Tensor
aa
,
bb
,
cc
,
scale
,
bias
;
paddle_mobile
.
SetThreadNum
(
1
);
Tensor
aa
,
bb
,
cc
;
auto
aaptr
=
aa
.
mutable_data
<
float
>
({
m
,
k
});
auto
bbptr
=
bb
.
mutable_data
<
float
>
({
k
,
n
});
auto
ccptr
=
cc
.
mutable_data
<
float
>
({
m
,
n
});
auto
scaleptr
=
scale
.
mutable_data
<
float
>
({
m
});
auto
biasptr
=
bias
.
mutable_data
<
float
>
({
m
});
for
(
int
i
=
0
;
i
<
m
*
k
;
++
i
)
{
aaptr
[
i
]
=
2
;
...
...
@@ -45,23 +43,55 @@ int main() {
for
(
int
i
=
0
;
i
<
m
*
n
;
++
i
)
{
ccptr
[
i
]
=
2
;
}
for
(
int
i
=
0
;
i
<
m
;
++
i
)
{
scaleptr
[
i
]
=
1
;
biasptr
[
i
]
=
0
;
Tensor
aa_int8
,
bb_int8
,
cc_int8
;
auto
aaptr_int8
=
aa_int8
.
mutable_data
<
int8_t
>
({
m
,
k
});
auto
bbptr_int8
=
bb_int8
.
mutable_data
<
int8_t
>
({
k
,
n
});
auto
ccptr_int8
=
cc_int8
.
mutable_data
<
int32_t
>
({
m
,
n
});
for
(
int
i
=
0
;
i
<
m
*
k
;
++
i
)
{
aaptr_int8
[
i
]
=
static_cast
<
int8_t
>
(
2
);
}
for
(
int
i
=
0
;
i
<
k
*
n
;
++
i
)
{
bbptr_int8
[
i
]
=
static_cast
<
int8_t
>
(
2
);
}
for
(
int
i
=
0
;
i
<
m
*
n
;
++
i
)
{
ccptr_int8
[
i
]
=
static_cast
<
int32_t
>
(
2
);
}
auto
time1
=
time
();
// float
// warm-up 10 times
for
(
int
j
=
0
;
j
<
10
;
++
j
)
{
paddle_mobile
::
operators
::
math
::
matmul
<
float
>
(
aa
,
false
,
bb
,
false
,
static_cast
<
float
>
(
1
),
&
cc
,
static_cast
<
float
>
(
0
),
false
,
biasptr
);
false
,
nullptr
);
}
// paddle_mobile::operators::math::matmulWithBn<float>(
// aa, false, bb, false, static_cast<float>(1), &cc,
// static_cast<float>(0), true, &scale, &bias, 0);
auto
time1
=
time
();
for
(
int
j
=
0
;
j
<
10
;
++
j
)
{
paddle_mobile
::
operators
::
math
::
matmul
<
float
>
(
aa
,
false
,
bb
,
false
,
static_cast
<
float
>
(
1
),
&
cc
,
static_cast
<
float
>
(
0
),
false
,
nullptr
);
}
auto
time2
=
time
();
std
::
cout
<<
"gemm cost :"
<<
time_diff
(
time1
,
time2
)
/
10
<<
"ms
\n
"
;
std
::
cout
<<
"float gemm cost :"
<<
time_diff
(
time1
,
time2
)
/
10
<<
"ms
\n
"
;
// int8_t
// warm-up 10 times
for
(
int
j
=
0
;
j
<
10
;
++
j
)
{
paddle_mobile
::
operators
::
math
::
matmul
<
int8_t
>
(
aa_int8
,
false
,
bb_int8
,
false
,
static_cast
<
int8_t
>
(
1
),
&
cc_int8
,
static_cast
<
int8_t
>
(
0
),
false
,
nullptr
);
}
auto
time3
=
time
();
for
(
int
j
=
0
;
j
<
10
;
++
j
)
{
paddle_mobile
::
operators
::
math
::
matmul
<
int8_t
>
(
aa_int8
,
false
,
bb_int8
,
false
,
static_cast
<
int8_t
>
(
1
),
&
cc_int8
,
static_cast
<
int8_t
>
(
0
),
false
,
nullptr
);
}
auto
time4
=
time
();
std
::
cout
<<
"int8_t gemm cost :"
<<
time_diff
(
time3
,
time4
)
/
10
<<
"ms
\n
"
;
return
0
;
}
test/net/test_googlenet.cpp
浏览文件 @
5ac1e63c
...
...
@@ -25,27 +25,31 @@ int main() {
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
;
#endif
paddle_mobile
.
SetThreadNum
(
4
);
bool
optimize
=
tru
e
;
paddle_mobile
.
SetThreadNum
(
1
);
bool
optimize
=
fals
e
;
auto
time1
=
time
();
if
(
paddle_mobile
.
Load
(
g_googlenet
,
optimize
))
{
auto
time2
=
time
();
std
::
cout
<<
"load cost :"
<<
time_diff
(
time1
,
time2
)
<<
"ms"
<<
std
::
endl
;
std
::
vector
<
float
>
input
;
std
::
vector
<
float
>
output
;
std
::
vector
<
int64_t
>
dims
{
1
,
3
,
224
,
224
};
GetInput
<
float
>
(
g_test_image_1x3x224x224
,
&
input
,
dims
);
// 预热十次
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
auto
vec_resul
t
=
paddle_mobile
.
Predict
(
input
,
dims
);
}
//
//
预热十次
//
for (int i = 0; i < 10; ++i) {
// outpu
t = paddle_mobile.Predict(input, dims);
//
}
auto
time3
=
time
();
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
auto
vec_resul
t
=
paddle_mobile
.
Predict
(
input
,
dims
);
outpu
t
=
paddle_mobile
.
Predict
(
input
,
dims
);
}
auto
time4
=
time
();
std
::
cout
<<
"predict cost :"
<<
time_diff
(
time3
,
time4
)
/
10
<<
"ms"
<<
std
::
endl
;
for
(
int
i
=
0
;
i
<
output
.
size
();
++
i
)
{
DLOG
<<
"result["
<<
i
<<
"] = "
<<
output
[
i
];
}
}
return
0
;
}
test/operators/test_dequantize_op.cpp
浏览文件 @
5ac1e63c
...
...
@@ -59,7 +59,7 @@ int TestDequqntizeOp() {
framework
::
Tensor
output_cmp
;
output_cmp
.
Resize
(
dim
);
float
dequant_scale
=
1.
f
/
(
1.27
*
1.74
)
;
float
dequant_scale
=
1.
27
/
1.74
;
dequantize
(
input
,
dequant_scale
,
&
output_cmp
);
const
float
*
output_cmp_data
=
output_cmp
.
data
<
float
>
();
for
(
int
i
=
0
;
i
<
output
->
numel
();
++
i
)
{
...
...
test/operators/test_int8_conv_op.cpp
浏览文件 @
5ac1e63c
...
...
@@ -140,10 +140,10 @@ int TestConvOp() {
int
dilation_w
=
1
;
int
batch_size
=
1
;
int
input_c
=
3
;
int
input_h
=
25
;
int
input_w
=
25
;
int
output_c
=
3
;
int
input_c
=
6
3
;
int
input_h
=
51
;
int
input_w
=
51
;
int
output_c
=
125
;
framework
::
DDim
input_shape
=
framework
::
make_ddim
({
batch_size
,
input_c
,
input_h
,
input_w
});
framework
::
DDim
filter_shape
=
...
...
@@ -158,11 +158,11 @@ int TestConvOp() {
auto
input_var
=
scope
.
get
()
->
Var
(
"input"
);
auto
input
=
input_var
->
template
GetMutable
<
framework
::
LoDTensor
>();
SetupTensor
<
Itype
>
(
input
,
input_shape
,
-
127
,
127
);
SetupTensor
<
Itype
>
(
input
,
input_shape
,
-
20
,
20
);
auto
filter_var
=
scope
.
get
()
->
Var
(
"filter"
);
auto
filter
=
filter_var
->
template
GetMutable
<
framework
::
LoDTensor
>();
SetupTensor
<
Itype
>
(
filter
,
filter_shape
,
-
127
,
127
);
SetupTensor
<
Itype
>
(
filter
,
filter_shape
,
-
20
,
20
);
auto
output_var
=
scope
.
get
()
->
Var
(
"output"
);
framework
::
AttributeMap
attrs
;
...
...
@@ -174,28 +174,40 @@ int TestConvOp() {
auto
*
op
=
new
operators
::
ConvOp
<
CPU
,
float
>
(
"conv2d"
,
inputs
,
outputs
,
attrs
,
scope
);
struct
timespec
ts_begin
,
ts_end
;
op
->
InferShape
();
// warmup
op
->
Run
();
int
kernel_extent_h
=
dilation_h
*
(
kernel_h
-
1
)
+
1
;
int
kernel_extent_w
=
dilation_w
*
(
kernel_w
-
1
)
+
1
;
int
output_h
=
(
input_h
+
2
*
pad_h
-
kernel_extent_h
)
/
stride_h
+
1
;
int
output_w
=
(
input_w
+
2
*
pad_w
-
kernel_extent_w
)
/
stride_w
+
1
;
auto
output_shape
=
framework
::
make_ddim
(
std
::
vector
<
int
>
({
batch_size
,
output_c
,
output_h
,
output_w
}));
framework
::
Tensor
output_cmp
;
output_cmp
.
mutable_data
<
Otype
>
(
output_shape
);
conv2d
<
Itype
,
Otype
>
(
input
,
filter
,
attrs
,
&
output_cmp
);
// compare results
auto
output
=
output_var
->
template
Get
<
framework
::
LoDTensor
>();
const
Otype
*
output_data
=
output
->
data
<
Otype
>
();
Otype
*
output_cmp_data
=
output_cmp
.
data
<
Otype
>
();
for
(
int
i
=
0
;
i
<
output
->
numel
();
++
i
)
{
PADDLE_MOBILE_ENFORCE
(
output_data
[
i
]
==
output_cmp_data
[
i
],
"output[%d] = %d, output_cmp[%d] = %d"
,
i
,
output_data
[
i
],
i
,
output_cmp_data
[
i
]);
clock_gettime
(
CLOCK_MONOTONIC
,
&
ts_begin
);
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
op
->
Run
();
}
clock_gettime
(
CLOCK_MONOTONIC
,
&
ts_end
);
uint64_t
elapsed
=
(
ts_end
.
tv_sec
-
ts_begin
.
tv_sec
)
*
1e3
+
(
ts_end
.
tv_nsec
-
ts_begin
.
tv_nsec
)
/
1e6
;
LOG
(
kLOG_INFO
)
<<
"elapsed: "
<<
elapsed
/
10.0
<<
" ms"
;
/*
int kernel_extent_h = dilation_h * (kernel_h - 1) + 1;
int kernel_extent_w = dilation_w * (kernel_w - 1) + 1;
int output_h = (input_h + 2 * pad_h - kernel_extent_h) / stride_h + 1;
int output_w = (input_w + 2 * pad_w - kernel_extent_w) / stride_w + 1;
auto output_shape = framework::make_ddim(
std::vector<int>({batch_size, output_c, output_h, output_w}));
framework::Tensor output_cmp;
output_cmp.mutable_data<Otype>(output_shape);
conv2d<Itype, Otype>(input, filter, attrs, &output_cmp);
// compare results
auto output = output_var->template Get<framework::LoDTensor>();
const Otype *output_data = output->data<Otype>();
Otype *output_cmp_data = output_cmp.data<Otype>();
for (int i = 0; i < output->numel(); ++i) {
PADDLE_MOBILE_ENFORCE(output_data[i] == output_cmp_data[i],
"output[%d] = %d, output_cmp[%d] = %d", i,
output_data[i], i, output_cmp_data[i]);
}
*/
delete
op
;
return
0
;
}
...
...
@@ -203,12 +215,42 @@ int TestConvOp() {
}
// namespace paddle_mobile
int
main
()
{
// kernel = 7, pad = 0, stride = 2
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=0, stride=2"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
7
,
0
,
2
>
();
// kernel = 7, pad = 3, stride = 2
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=7, pad=3, stride=2"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
7
,
3
,
2
>
();
// kernel = 3, pad = 0, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=3, pad=0, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
3
,
0
,
1
>
();
// kernel = 3, pad = 0, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"float, kernel=3, pad=0, stride=1"
;
paddle_mobile
::
TestConvOp
<
float
,
float
,
3
,
0
,
1
>
();
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"
\n
"
;
// kernel = 3, pad = 1, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=3, pad=1, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
3
,
1
,
1
>
();
// kernel = 3, pad = 1, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"float, kernel=3, pad=1, stride=1"
;
paddle_mobile
::
TestConvOp
<
float
,
float
,
3
,
1
,
1
>
();
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"
\n
"
;
// kernel = 5, pad = 0, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=5, pad=0, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
5
,
0
,
1
>
();
// kernel = 5, pad = 0, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"float, kernel=5, pad=0, stride=1"
;
paddle_mobile
::
TestConvOp
<
float
,
float
,
5
,
0
,
1
>
();
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"
\n
"
;
// kernel = 5, pad = 2, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"int8, kernel=5, pad=2, stride=1"
;
paddle_mobile
::
TestConvOp
<
int8_t
,
int32_t
,
5
,
2
,
1
>
();
// kernel = 5, pad = 2, stride = 1
LOG
(
paddle_mobile
::
kLOG_INFO
)
<<
"float, kernel=5, pad=2, stride=1"
;
paddle_mobile
::
TestConvOp
<
float
,
float
,
5
,
2
,
1
>
();
}
test/operators/test_mul_op.cpp
浏览文件 @
5ac1e63c
...
...
@@ -12,80 +12,80 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/mul_op.h"
int
main
()
{
paddle_mobile
::
Loader
<
paddle_mobile
::
CPU
>
loader
;
auto
program
=
loader
.
Load
(
g_resnet
);
PADDLE_MOBILE_ENFORCE
(
program
.
originProgram
!=
nullptr
,
"program file read fail"
);
Executor4Test
<
paddle_mobile
::
CPU
,
paddle_mobile
::
operators
::
MulOp
<
paddle_mobile
::
CPU
,
float
>>
executor
(
program
,
"mul"
);
// 1. input_tensors;
vector
<
Tensor
>
input_tensors
;
Tensor
input1
;
auto
input1_data
=
CreateInput
<
float
>
(
&
input1
,
{
3
,
2
,
1
,
1
},
0
,
1
);
input_tensors
.
push_back
(
input1
);
Tensor
input2
;
auto
input2_data
=
CreateInput
<
float
>
(
&
input2
,
{
2
,
3
},
0
,
1
);
input_tensors
.
push_back
(
input2
);
// 2. input_names
vector
<
string
>
input_names
({
"pool2d_0.tmp_0"
,
"fc_0.w_0"
,
});
// 3. output_names
vector
<
string
>
output_names
({
"fc_0.tmp_0"
});
// 4. out_dims;
vector
<
DDim
>
out_ddims
;
auto
out_ddim
=
paddle_mobile
::
framework
::
make_ddim
({
3
,
3
});
out_ddims
.
push_back
(
out_ddim
);
auto
output
=
executor
.
Predict
<
LoDTensor
>
(
input_tensors
,
input_names
,
output_names
,
out_ddims
);
auto
output0_data
=
output
[
0
]
->
data
<
float
>
();
auto
dim_1
=
input1
.
numel
()
/
input1
.
dims
()[
0
];
DLOG
<<
" input1 : "
;
for
(
int
i
=
0
;
i
<
input1
.
dims
()[
0
];
++
i
)
{
for
(
int
j
=
0
;
j
<
dim_1
;
++
j
)
{
DLOGF
(
"%f "
,
input1_data
[
i
*
dim_1
+
j
]);
#define a(i, j) a[(i)*lda + (j)]
#define b(i, j) b[(i)*ldb + (j)]
#define c(i, j) c[(i)*ldc + (j)]
namespace
paddle_mobile
{
using
framework
::
AttributeMap
;
using
framework
::
DDim
;
using
framework
::
Scope
;
using
framework
::
make_ddim
;
template
<
typename
I
,
typename
O
>
int
TestMulOP
()
{
int32_t
m
=
1024
;
int32_t
n
=
1024
;
int32_t
k
=
1024
;
int32_t
lda
=
k
;
int32_t
ldb
=
n
;
int32_t
ldc
=
n
;
DDim
inputA_shape
=
make_ddim
({
m
,
k
});
DDim
inputB_shape
=
make_ddim
({
k
,
n
});
VariableNameMap
inputs
;
VariableNameMap
outputs
;
auto
scope
=
std
::
make_shared
<
Scope
>
();
inputs
[
"X"
]
=
std
::
vector
<
std
::
string
>
({
"inputA"
});
inputs
[
"Y"
]
=
std
::
vector
<
std
::
string
>
({
"inputB"
});
outputs
[
"Out"
]
=
std
::
vector
<
std
::
string
>
({
"output"
});
auto
inputA_var
=
scope
.
get
()
->
Var
(
"inputA"
);
auto
inputA
=
inputA_var
->
template
GetMutable
<
framework
::
LoDTensor
>();
SetupTensor
<
I
>
(
inputA
,
inputA_shape
,
-
127
,
127
);
auto
inputB_var
=
scope
.
get
()
->
Var
(
"inputB"
);
auto
inputB
=
inputB_var
->
template
GetMutable
<
framework
::
LoDTensor
>();
SetupTensor
<
I
>
(
inputB
,
inputB_shape
,
-
127
,
127
);
auto
output_var
=
scope
.
get
()
->
Var
(
"output"
);
AttributeMap
attrs
;
attrs
[
"x_num_col_dims"
].
Set
<
int
>
(
1
);
attrs
[
"y_num_col_dims"
].
Set
<
int
>
(
1
);
auto
*
op
=
new
operators
::
MulOp
<
CPU
,
float
>
(
"mul"
,
inputs
,
outputs
,
attrs
,
scope
);
op
->
InferShape
();
op
->
Run
();
auto
output
=
output_var
->
template
Get
<
framework
::
LoDTensor
>();
const
O
*
output_data
=
output
->
data
<
O
>
();
// compare
O
*
c
=
static_cast
<
O
*>
(
memory
::
Alloc
(
sizeof
(
O
)
*
m
*
n
));
I
*
a
=
inputA
->
data
<
I
>
();
I
*
b
=
inputB
->
data
<
I
>
();
for
(
int32_t
i
=
0
;
i
<
m
;
++
i
)
{
for
(
int32_t
j
=
0
;
j
<
n
;
++
j
)
{
O
r
=
0
;
for
(
int32_t
p
=
0
;
p
<
k
;
p
++
)
{
r
+=
static_cast
<
O
>
(
a
(
i
,
p
))
*
static_cast
<
O
>
(
b
(
p
,
j
));
}
c
(
i
,
j
)
=
r
;
}
DLOGF
(
"
\n
"
);
}
auto
dim_2
=
input2
.
numel
()
/
input2
.
dims
()[
0
];
DLOG
<<
" input2 : "
;
for
(
int
i
=
0
;
i
<
input2
.
dims
()[
0
];
++
i
)
{
for
(
int
j
=
0
;
j
<
dim_2
;
++
j
)
{
DLOGF
(
"%f "
,
input2_data
[
i
*
dim_2
+
j
]);
}
DLOGF
(
"
\n
"
);
}
auto
dim_output0
=
output
[
0
]
->
numel
()
/
output
[
0
]
->
dims
()[
0
];
DLOG
<<
" output : "
;
for
(
int
i
=
0
;
i
<
output
[
0
]
->
dims
()[
0
];
++
i
)
{
for
(
int
j
=
0
;
j
<
dim_output0
;
++
j
)
{
DLOGF
(
"%f "
,
output0_data
[
i
*
dim_2
+
j
]);
}
DLOGF
(
"
\n
"
);
for
(
int32_t
i
=
0
;
i
<
m
*
n
;
++
i
)
{
PADDLE_MOBILE_ENFORCE
(
output_data
[
i
]
==
c
[
i
],
"output[%d] = %d, output_cmp[%d] = %d"
,
i
,
static_cast
<
int32_t
>
(
output_data
[
i
]),
i
,
static_cast
<
int32_t
>
(
c
[
i
]));
}
DLOG
<<
"Run MulOp successfully!"
;
delete
op
;
return
0
;
}
}
// namespace paddle_mobile
/// output (3,3)
DLOG
<<
"output memory size : "
<<
output
[
0
]
->
memory_size
();
DLOG
<<
"output numel : "
<<
output
[
0
]
->
numel
();
DLOG
<<
input1_data
[
0
]
<<
" x "
<<
input2_data
[
0
]
<<
" + "
<<
input1_data
[
1
]
<<
" x "
<<
input2_data
[
0
+
3
]
<<
" = "
<<
output0_data
[
0
];
int
main
()
{
paddle_mobile
::
TestMulOP
<
int8_t
,
int32_t
>
();
paddle_mobile
::
TestMulOP
<
float
,
float
>
();
return
0
;
}
test/operators/test_quantize_op.cpp
浏览文件 @
5ac1e63c
...
...
@@ -18,14 +18,6 @@ limitations under the License. */
namespace
paddle_mobile
{
// static float g_test_data[50] = {
// -5.55, -5.5, -5.45, -5.0, -4.55, -4.5, -4.45, -4.0, -3.55, -3.5,
// -3.45, -3.01, -2.75, -2.5, -2.501, -2.49, -2.01, -1.75, -1.5, -1.25,
// -1.0, -0.75, -0.5, -0.25, 0.0, 0.25, 0.5, 0.75, 1.0, 1.25,
// 1.5, 1.75, 2.01, 2.49, 2.501, 2.5, 2.75, 3.01, 3.45, 3.5,
// 3.55, 4.0, 4.45, 4.5, 4.55, 5.0, 5.45, 5.5, 5.55, 6.0,
// };
static
float
find_abs_max
(
const
Tensor
*
input
)
{
float
max_abs
=
0.
f
;
const
float
*
x
=
input
->
data
<
const
float
>
();
...
...
@@ -60,6 +52,16 @@ static void quantize_round_to_even(const Tensor *input, const float scale,
}
}
static
void
quantize_round_to_nearest
(
const
Tensor
*
input
,
const
float
scale
,
Tensor
*
output
)
{
const
float
*
x
=
input
->
data
<
const
float
>
();
int8_t
*
y
=
output
->
mutable_data
<
int8_t
>
();
size_t
size
=
input
->
numel
();
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
y
[
i
]
=
round
(
x
[
i
]
*
scale
);
}
}
int
TestQuqntizeOp
()
{
framework
::
DDim
dim
=
framework
::
make_ddim
({
1
,
3
,
224
,
224
});
...
...
@@ -88,15 +90,16 @@ int TestQuqntizeOp() {
auto
output_scale
=
output_scale_var
->
template
Get
<
framework
::
LoDTensor
>();
const
float
*
output_scale_data
=
output_scale
->
data
<
float
>
();
float
max_abs
=
find_abs_max
(
input
);
float
output_scale_cmp
=
127
/
max_abs
;
float
output_scale_cmp
=
find_abs_max
(
input
);
PADDLE_MOBILE_ENFORCE
(
output_scale_cmp
==
output_scale_data
[
0
],
"output_scale = %.6f, output_scale_cmp = %.6f"
,
output_scale_cmp
,
output_scale_data
[
0
]);
framework
::
Tensor
output_cmp
;
output_cmp
.
Resize
(
dim
);
quantize_round_to_even
(
input
,
output_scale_cmp
,
&
output_cmp
);
float
scale
=
127
/
output_scale_cmp
;
// quantize_round_to_even(input, scale, &output_cmp);
quantize_round_to_nearest
(
input
,
scale
,
&
output_cmp
);
int8_t
*
output_cmp_data
=
output_cmp
.
data
<
int8_t
>
();
for
(
int
i
=
0
;
i
<
output
->
numel
();
++
i
)
{
PADDLE_MOBILE_ENFORCE
(
output_data
[
i
]
==
output_cmp_data
[
i
],
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录