Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
ca9ec692
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ca9ec692
编写于
9月 21, 2020
作者:
Z
zhupengyang
提交者:
GitHub
9月 21, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[xpu] update bert, ernie unittests (#4357)
上级
1d3754aa
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
266 addition
and
140 deletion
+266
-140
lite/CMakeLists.txt
lite/CMakeLists.txt
+1
-0
lite/tests/api/CMakeLists.txt
lite/tests/api/CMakeLists.txt
+14
-7
lite/tests/api/bert_utility.h
lite/tests/api/bert_utility.h
+118
-0
lite/tests/api/test_bert_fp32_xpu.cc
lite/tests/api/test_bert_fp32_xpu.cc
+46
-51
lite/tests/api/test_ernie_fp32_xpu.cc
lite/tests/api/test_ernie_fp32_xpu.cc
+45
-39
lite/tests/kernels/fc_compute_test.cc
lite/tests/kernels/fc_compute_test.cc
+3
-3
lite/tests/kernels/prior_box_compute_test.cc
lite/tests/kernels/prior_box_compute_test.cc
+1
-1
lite/tests/math/gemm_int8_compute_test.cc
lite/tests/math/gemm_int8_compute_test.cc
+11
-12
lite/tests/math/gemv_int8_compute_test.cc
lite/tests/math/gemv_int8_compute_test.cc
+8
-8
lite/tests/math/sgemm_c4_compute_test.cc
lite/tests/math/sgemm_c4_compute_test.cc
+11
-11
lite/tests/math/sgemv_compute_test.cc
lite/tests/math/sgemv_compute_test.cc
+8
-8
未找到文件。
lite/CMakeLists.txt
浏览文件 @
ca9ec692
...
@@ -63,6 +63,7 @@ if (WITH_TESTING)
...
@@ -63,6 +63,7 @@ if (WITH_TESTING)
lite_download_and_uncompress
(
${
LITE_MODEL_DIR
}
${
LITE_URL_FOR_UNITTESTS
}
"VGG19.tar.gz"
)
lite_download_and_uncompress
(
${
LITE_MODEL_DIR
}
${
LITE_URL_FOR_UNITTESTS
}
"VGG19.tar.gz"
)
# data
# data
lite_download_and_uncompress
(
${
LITE_MODEL_DIR
}
${
LITE_URL_FOR_UNITTESTS
}
"ILSVRC2012_small.tar.gz"
)
lite_download_and_uncompress
(
${
LITE_MODEL_DIR
}
${
LITE_URL_FOR_UNITTESTS
}
"ILSVRC2012_small.tar.gz"
)
lite_download_and_uncompress
(
${
LITE_MODEL_DIR
}
${
LITE_URL_FOR_UNITTESTS
}
"bert_data.tar.gz"
)
endif
()
endif
()
endif
()
endif
()
...
...
lite/tests/api/CMakeLists.txt
浏览文件 @
ca9ec692
...
@@ -9,11 +9,18 @@ if(LITE_WITH_ARM)
...
@@ -9,11 +9,18 @@ if(LITE_WITH_ARM)
endif
()
endif
()
function
(
xpu_x86_without_xtcl_test TARGET MODEL DATA
)
function
(
xpu_x86_without_xtcl_test TARGET MODEL DATA
)
if
(
${
DATA
}
STREQUAL
""
)
lite_cc_test
(
${
TARGET
}
SRCS
${
TARGET
}
.cc
lite_cc_test
(
${
TARGET
}
SRCS
${
TARGET
}
.cc
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${
ops
}
${
host_kernels
}
${
x86_kernels
}
${
xpu_kernels
}
${
ops
}
${
host_kernels
}
${
x86_kernels
}
${
xpu_kernels
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/
${
MODEL
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/
${
MODEL
}
)
--data_dir=
${
LITE_MODEL_DIR
}
/
${
DATA
}
)
else
()
lite_cc_test
(
${
TARGET
}
SRCS
${
TARGET
}
.cc
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${
ops
}
${
host_kernels
}
${
x86_kernels
}
${
xpu_kernels
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/
${
MODEL
}
--data_dir=
${
LITE_MODEL_DIR
}
/
${
DATA
}
)
endif
()
if
(
WITH_TESTING
)
if
(
WITH_TESTING
)
add_dependencies
(
${
TARGET
}
extern_lite_download_
${
MODEL
}
_tar_gz
)
add_dependencies
(
${
TARGET
}
extern_lite_download_
${
MODEL
}
_tar_gz
)
if
(
NOT
${
DATA
}
STREQUAL
""
)
if
(
NOT
${
DATA
}
STREQUAL
""
)
...
@@ -26,8 +33,8 @@ if(LITE_WITH_XPU AND NOT LITE_WITH_XTCL)
...
@@ -26,8 +33,8 @@ if(LITE_WITH_XPU AND NOT LITE_WITH_XTCL)
xpu_x86_without_xtcl_test
(
test_resnet50_fp32_xpu resnet50 ILSVRC2012_small
)
xpu_x86_without_xtcl_test
(
test_resnet50_fp32_xpu resnet50 ILSVRC2012_small
)
xpu_x86_without_xtcl_test
(
test_googlenet_fp32_xpu GoogLeNet ILSVRC2012_small
)
xpu_x86_without_xtcl_test
(
test_googlenet_fp32_xpu GoogLeNet ILSVRC2012_small
)
xpu_x86_without_xtcl_test
(
test_vgg19_fp32_xpu VGG19 ILSVRC2012_small
)
xpu_x86_without_xtcl_test
(
test_vgg19_fp32_xpu VGG19 ILSVRC2012_small
)
xpu_x86_without_xtcl_test
(
test_ernie_fp32_xpu ernie
""
)
xpu_x86_without_xtcl_test
(
test_ernie_fp32_xpu ernie
bert_data
)
xpu_x86_without_xtcl_test
(
test_bert_fp32_xpu bert
""
)
xpu_x86_without_xtcl_test
(
test_bert_fp32_xpu bert
bert_data
)
endif
()
endif
()
if
(
LITE_WITH_RKNPU
)
if
(
LITE_WITH_RKNPU
)
...
...
lite/tests/api/bert_utility.h
0 → 100644
浏览文件 @
ca9ec692
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "lite/api/paddle_api.h"
#include "lite/utils/cp_logging.h"
#include "lite/utils/io.h"
#include "lite/utils/string.h"
namespace
paddle
{
namespace
lite
{
template
<
class
T
=
int64_t
>
void
ReadRawData
(
const
std
::
string
&
input_data_dir
,
std
::
vector
<
std
::
vector
<
T
>>*
input0
,
std
::
vector
<
std
::
vector
<
T
>>*
input1
,
std
::
vector
<
std
::
vector
<
T
>>*
input2
,
std
::
vector
<
std
::
vector
<
T
>>*
input3
,
std
::
vector
<
std
::
vector
<
int64_t
>>*
input_shapes
)
{
auto
lines
=
ReadLines
(
input_data_dir
);
for
(
auto
line
:
lines
)
{
std
::
vector
<
std
::
string
>
shape_and_data
=
Split
(
line
,
";"
);
std
::
vector
<
int64_t
>
input_shape
=
Split
<
int64_t
>
(
Split
(
shape_and_data
[
0
],
":"
)[
0
],
" "
);
input_shapes
->
emplace_back
(
input_shape
);
std
::
vector
<
T
>
input0_data
=
Split
<
T
>
(
Split
(
shape_and_data
[
0
],
":"
)[
1
],
" "
);
input0
->
emplace_back
(
input0_data
);
std
::
vector
<
T
>
input1_data
=
Split
<
T
>
(
Split
(
shape_and_data
[
1
],
":"
)[
1
],
" "
);
input1
->
emplace_back
(
input1_data
);
std
::
vector
<
T
>
input2_data
=
Split
<
T
>
(
Split
(
shape_and_data
[
2
],
":"
)[
1
],
" "
);
input2
->
emplace_back
(
input2_data
);
std
::
vector
<
T
>
input3_data
=
Split
<
T
>
(
Split
(
shape_and_data
[
3
],
":"
)[
1
],
" "
);
input3
->
emplace_back
(
input3_data
);
}
}
template
<
class
T
=
int64_t
>
void
FillTensor
(
const
std
::
shared_ptr
<
lite_api
::
PaddlePredictor
>&
predictor
,
int
tensor_id
,
const
std
::
vector
<
int64_t
>&
tensor_shape
,
const
std
::
vector
<
T
>&
tensor_value
)
{
predictor
->
GetInput
(
tensor_id
)
->
Resize
(
tensor_shape
);
int64_t
tensor_size
=
1
;
for
(
size_t
i
=
0
;
i
<
tensor_shape
.
size
();
i
++
)
{
tensor_size
*=
tensor_shape
[
i
];
}
CHECK_EQ
(
static_cast
<
size_t
>
(
tensor_size
),
tensor_value
.
size
());
memcpy
(
predictor
->
GetInput
(
tensor_id
)
->
mutable_data
<
T
>
(),
tensor_value
.
data
(),
sizeof
(
T
)
*
tensor_size
);
}
float
CalBertOutAccuracy
(
const
std
::
vector
<
std
::
vector
<
float
>>&
out
,
const
std
::
string
&
out_file
)
{
auto
lines
=
ReadLines
(
out_file
);
std
::
vector
<
std
::
vector
<
float
>>
ref_out
;
for
(
auto
line
:
lines
)
{
ref_out
.
emplace_back
(
Split
<
float
>
(
line
,
" "
));
}
int
right_num
=
0
;
for
(
size_t
i
=
0
;
i
<
out
.
size
();
i
++
)
{
std
::
vector
<
size_t
>
out_index
{
0
,
1
,
2
};
std
::
vector
<
size_t
>
ref_out_index
{
0
,
1
,
2
};
std
::
sort
(
out_index
.
begin
(),
out_index
.
end
(),
[
&
out
,
i
](
size_t
a
,
size_t
b
)
{
return
out
[
i
][
a
]
>
out
[
i
][
b
];
});
std
::
sort
(
ref_out_index
.
begin
(),
ref_out_index
.
end
(),
[
&
ref_out
,
i
](
size_t
a
,
size_t
b
)
{
return
ref_out
[
i
][
a
]
>
ref_out
[
i
][
b
];
});
right_num
+=
(
out_index
==
ref_out_index
);
}
return
static_cast
<
float
>
(
right_num
)
/
static_cast
<
float
>
(
out
.
size
());
}
float
CalErnieOutAccuracy
(
const
std
::
vector
<
std
::
vector
<
float
>>&
out
,
const
std
::
string
&
out_file
)
{
auto
lines
=
ReadLines
(
out_file
);
std
::
vector
<
std
::
vector
<
float
>>
ref_out
;
for
(
auto
line
:
lines
)
{
ref_out
.
emplace_back
(
Split
<
float
>
(
line
,
" "
));
}
int
right_num
=
0
;
for
(
size_t
i
=
0
;
i
<
out
.
size
();
i
++
)
{
right_num
+=
(
std
::
fabs
(
out
[
i
][
0
]
-
ref_out
[
i
][
0
])
<
0.01
f
);
}
return
static_cast
<
float
>
(
right_num
)
/
static_cast
<
float
>
(
out
.
size
());
}
}
// namespace lite
}
// namespace paddle
lite/tests/api/test_bert_fp32_xpu.cc
浏览文件 @
ca9ec692
...
@@ -21,23 +21,16 @@
...
@@ -21,23 +21,16 @@
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/api/test_helper.h"
#include "lite/tests/api/bert_utility.h"
#include "lite/utils/cp_logging.h"
#include "lite/utils/cp_logging.h"
DEFINE_string
(
data_dir
,
""
,
"data dir"
);
DEFINE_int32
(
iteration
,
9
,
"iteration times to run"
);
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
template
<
typename
T
>
TEST
(
Bert
,
test_bert_fp32_xpu
)
{
lite
::
Tensor
GetTensorWithShape
(
std
::
vector
<
int64_t
>
shape
)
{
lite
::
Tensor
ret
;
ret
.
Resize
(
shape
);
T
*
ptr
=
ret
.
mutable_data
<
T
>
();
for
(
int
i
=
0
;
i
<
ret
.
numel
();
++
i
)
{
ptr
[
i
]
=
(
T
)
1
;
}
return
ret
;
}
TEST
(
Ernie
,
test_ernie_fp32_xpu
)
{
lite_api
::
CxxConfig
config
;
lite_api
::
CxxConfig
config
;
config
.
set_model_dir
(
FLAGS_model_dir
);
config
.
set_model_dir
(
FLAGS_model_dir
);
config
.
set_valid_places
({
lite_api
::
Place
{
TARGET
(
kXPU
),
PRECISION
(
kFloat
)},
config
.
set_valid_places
({
lite_api
::
Place
{
TARGET
(
kXPU
),
PRECISION
(
kFloat
)},
...
@@ -46,56 +39,58 @@ TEST(Ernie, test_ernie_fp32_xpu) {
...
@@ -46,56 +39,58 @@ TEST(Ernie, test_ernie_fp32_xpu) {
config
.
set_xpu_workspace_l3_size_per_thread
();
config
.
set_xpu_workspace_l3_size_per_thread
();
auto
predictor
=
lite_api
::
CreatePaddlePredictor
(
config
);
auto
predictor
=
lite_api
::
CreatePaddlePredictor
(
config
);
int64_t
batch_size
=
1
;
std
::
string
input_data_file
=
FLAGS_data_dir
+
std
::
string
(
"/bert_in.txt"
);
int64_t
seq_len
=
64
;
std
::
vector
<
std
::
vector
<
int64_t
>>
input0
;
Tensor
sample_input
=
GetTensorWithShape
<
int64_t
>
({
batch_size
,
seq_len
,
1
});
std
::
vector
<
std
::
vector
<
int64_t
>>
input1
;
std
::
vector
<
int64_t
>
input_shape
{
batch_size
,
seq_len
,
1
};
std
::
vector
<
std
::
vector
<
int64_t
>>
input2
;
predictor
->
GetInput
(
0
)
->
Resize
(
input_shape
);
std
::
vector
<
std
::
vector
<
int64_t
>>
input3
;
predictor
->
GetInput
(
1
)
->
Resize
(
input_shape
);
std
::
vector
<
std
::
vector
<
int64_t
>>
input_shapes
;
predictor
->
GetInput
(
2
)
->
Resize
(
input_shape
);
ReadRawData
(
predictor
->
GetInput
(
3
)
->
Resize
(
input_shape
);
input_data_file
,
&
input0
,
&
input1
,
&
input2
,
&
input3
,
&
input_shapes
);
memcpy
(
predictor
->
GetInput
(
0
)
->
mutable_data
<
int64_t
>
(),
sample_input
.
raw_data
(),
sizeof
(
int64_t
)
*
batch_size
*
seq_len
);
memcpy
(
predictor
->
GetInput
(
1
)
->
mutable_data
<
int64_t
>
(),
sample_input
.
raw_data
(),
sizeof
(
int64_t
)
*
batch_size
*
seq_len
);
memcpy
(
predictor
->
GetInput
(
2
)
->
mutable_data
<
int64_t
>
(),
sample_input
.
raw_data
(),
sizeof
(
int64_t
)
*
batch_size
*
seq_len
);
memcpy
(
predictor
->
GetInput
(
3
)
->
mutable_data
<
int64_t
>
(),
sample_input
.
raw_data
(),
sizeof
(
int64_t
)
*
batch_size
*
seq_len
);
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
std
::
vector
<
int64_t
>
shape
=
{
1
,
64
,
1
};
std
::
vector
<
int64_t
>
fill_value
(
64
,
0
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
FillTensor
(
predictor
,
j
,
shape
,
fill_value
);
}
predictor
->
Run
();
predictor
->
Run
();
}
}
auto
start
=
GetCurrentUS
();
std
::
vector
<
std
::
vector
<
float
>>
out_rets
;
for
(
int
i
=
0
;
i
<
FLAGS_repeats
;
++
i
)
{
out_rets
.
resize
(
FLAGS_iteration
);
double
cost_time
=
0
;
for
(
int
i
=
0
;
i
<
FLAGS_iteration
;
++
i
)
{
FillTensor
(
predictor
,
0
,
input_shapes
[
i
],
input0
[
i
]);
FillTensor
(
predictor
,
1
,
input_shapes
[
i
],
input1
[
i
]);
FillTensor
(
predictor
,
2
,
input_shapes
[
i
],
input2
[
i
]);
FillTensor
(
predictor
,
3
,
input_shapes
[
i
],
input3
[
i
]);
double
start
=
GetCurrentUS
();
predictor
->
Run
();
predictor
->
Run
();
cost_time
+=
GetCurrentUS
()
-
start
;
auto
output_tensor
=
predictor
->
GetOutput
(
0
);
auto
output_shape
=
output_tensor
->
shape
();
auto
output_data
=
output_tensor
->
data
<
float
>
();
ASSERT_EQ
(
output_shape
.
size
(),
2UL
);
ASSERT_EQ
(
output_shape
[
0
],
1
);
ASSERT_EQ
(
output_shape
[
1
],
3
);
int
output_size
=
output_shape
[
0
]
*
output_shape
[
1
];
out_rets
[
i
].
resize
(
output_size
);
memcpy
(
&
(
out_rets
[
i
].
at
(
0
)),
output_data
,
sizeof
(
float
)
*
output_size
);
}
}
LOG
(
INFO
)
<<
"================== Speed Report ==================="
;
LOG
(
INFO
)
<<
"================== Speed Report ==================="
;
LOG
(
INFO
)
<<
"Model: "
<<
FLAGS_model_dir
<<
", threads num "
<<
FLAGS_threads
LOG
(
INFO
)
<<
"Model: "
<<
FLAGS_model_dir
<<
", threads num "
<<
FLAGS_threads
<<
", warmup: "
<<
FLAGS_warmup
<<
", repeats: "
<<
FLAGS_repeats
<<
", warmup: "
<<
FLAGS_warmup
<<
",
spend "
<<
(
GetCurrentUS
()
-
start
)
/
FLAGS_repeats
/
1000.0
<<
",
iteration: "
<<
FLAGS_iteration
<<
", spend "
<<
" ms in average."
;
<<
cost_time
/
FLAGS_iteration
/
1000.0
<<
" ms in average."
;
std
::
vector
<
std
::
vector
<
float
>>
results
;
std
::
string
ref_out_file
=
FLAGS_data_dir
+
std
::
string
(
"/bert_out.txt"
);
results
.
emplace_back
(
std
::
vector
<
float
>
({
0.278893
,
0.330888
,
0.39022
}));
float
out_accuracy
=
CalBertOutAccuracy
(
out_rets
,
ref_out_file
);
auto
out
=
predictor
->
GetOutput
(
0
);
ASSERT_GT
(
out_accuracy
,
0.95
f
);
ASSERT_EQ
(
out
->
shape
().
size
(),
2
);
ASSERT_EQ
(
out
->
shape
()[
0
],
1
);
ASSERT_EQ
(
out
->
shape
()[
1
],
3
);
for
(
size_t
i
=
0
;
i
<
results
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
results
[
i
].
size
();
++
j
)
{
EXPECT_NEAR
(
out
->
data
<
float
>
()[
j
+
(
out
->
shape
()[
1
]
*
i
)],
results
[
i
][
j
],
3e-5
);
}
}
}
}
}
// namespace lite
}
// namespace lite
...
...
lite/tests/api/test_ernie_fp32_xpu.cc
浏览文件 @
ca9ec692
...
@@ -21,8 +21,12 @@
...
@@ -21,8 +21,12 @@
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/api/test_helper.h"
#include "lite/tests/api/bert_utility.h"
#include "lite/utils/cp_logging.h"
#include "lite/utils/cp_logging.h"
DEFINE_string
(
data_dir
,
""
,
"data dir"
);
DEFINE_int32
(
iteration
,
9
,
"iteration times to run"
);
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -46,56 +50,58 @@ TEST(Ernie, test_ernie_fp32_xpu) {
...
@@ -46,56 +50,58 @@ TEST(Ernie, test_ernie_fp32_xpu) {
config
.
set_xpu_workspace_l3_size_per_thread
();
config
.
set_xpu_workspace_l3_size_per_thread
();
auto
predictor
=
lite_api
::
CreatePaddlePredictor
(
config
);
auto
predictor
=
lite_api
::
CreatePaddlePredictor
(
config
);
int64_t
batch_size
=
1
;
std
::
string
input_data_file
=
FLAGS_data_dir
+
std
::
string
(
"/bert_in.txt"
);
int64_t
seq_len
=
64
;
std
::
vector
<
std
::
vector
<
int64_t
>>
input0
;
Tensor
sample_input
=
GetTensorWithShape
<
int64_t
>
({
batch_size
,
seq_len
,
1
});
std
::
vector
<
std
::
vector
<
int64_t
>>
input1
;
std
::
vector
<
int64_t
>
input_shape
{
batch_size
,
seq_len
,
1
};
std
::
vector
<
std
::
vector
<
int64_t
>>
input2
;
predictor
->
GetInput
(
0
)
->
Resize
(
input_shape
);
std
::
vector
<
std
::
vector
<
int64_t
>>
input3
;
predictor
->
GetInput
(
1
)
->
Resize
(
input_shape
);
std
::
vector
<
std
::
vector
<
int64_t
>>
input_shapes
;
predictor
->
GetInput
(
2
)
->
Resize
(
input_shape
);
ReadRawData
(
predictor
->
GetInput
(
3
)
->
Resize
(
input_shape
);
input_data_file
,
&
input0
,
&
input1
,
&
input2
,
&
input3
,
&
input_shapes
);
memcpy
(
predictor
->
GetInput
(
0
)
->
mutable_data
<
int64_t
>
(),
sample_input
.
raw_data
(),
sizeof
(
int64_t
)
*
batch_size
*
seq_len
);
memcpy
(
predictor
->
GetInput
(
1
)
->
mutable_data
<
int64_t
>
(),
sample_input
.
raw_data
(),
sizeof
(
int64_t
)
*
batch_size
*
seq_len
);
memcpy
(
predictor
->
GetInput
(
2
)
->
mutable_data
<
int64_t
>
(),
sample_input
.
raw_data
(),
sizeof
(
int64_t
)
*
batch_size
*
seq_len
);
memcpy
(
predictor
->
GetInput
(
3
)
->
mutable_data
<
int64_t
>
(),
sample_input
.
raw_data
(),
sizeof
(
int64_t
)
*
batch_size
*
seq_len
);
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
std
::
vector
<
int64_t
>
shape
=
{
1
,
64
,
1
};
std
::
vector
<
int64_t
>
fill_value
(
64
,
0
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
FillTensor
(
predictor
,
j
,
shape
,
fill_value
);
}
predictor
->
Run
();
predictor
->
Run
();
}
}
auto
start
=
GetCurrentUS
();
std
::
vector
<
std
::
vector
<
float
>>
out_rets
;
for
(
int
i
=
0
;
i
<
FLAGS_repeats
;
++
i
)
{
out_rets
.
resize
(
FLAGS_iteration
);
double
cost_time
=
0
;
for
(
int
i
=
0
;
i
<
FLAGS_iteration
;
++
i
)
{
FillTensor
(
predictor
,
0
,
input_shapes
[
i
],
input0
[
i
]);
FillTensor
(
predictor
,
1
,
input_shapes
[
i
],
input1
[
i
]);
FillTensor
(
predictor
,
2
,
input_shapes
[
i
],
input2
[
i
]);
FillTensor
(
predictor
,
3
,
input_shapes
[
i
],
input3
[
i
]);
double
start
=
GetCurrentUS
();
predictor
->
Run
();
predictor
->
Run
();
cost_time
+=
GetCurrentUS
()
-
start
;
auto
output_tensor
=
predictor
->
GetOutput
(
0
);
auto
output_shape
=
output_tensor
->
shape
();
auto
output_data
=
output_tensor
->
data
<
float
>
();
ASSERT_EQ
(
output_shape
.
size
(),
2UL
);
ASSERT_EQ
(
output_shape
[
0
],
1
);
ASSERT_EQ
(
output_shape
[
1
],
1
);
int
output_size
=
output_shape
[
0
]
*
output_shape
[
1
];
out_rets
[
i
].
resize
(
output_size
);
memcpy
(
&
(
out_rets
[
i
].
at
(
0
)),
output_data
,
sizeof
(
float
)
*
output_size
);
}
}
LOG
(
INFO
)
<<
"================== Speed Report ==================="
;
LOG
(
INFO
)
<<
"================== Speed Report ==================="
;
LOG
(
INFO
)
<<
"Model: "
<<
FLAGS_model_dir
<<
", threads num "
<<
FLAGS_threads
LOG
(
INFO
)
<<
"Model: "
<<
FLAGS_model_dir
<<
", threads num "
<<
FLAGS_threads
<<
", warmup: "
<<
FLAGS_warmup
<<
", repeats: "
<<
FLAGS_repeats
<<
", warmup: "
<<
FLAGS_warmup
<<
", spend "
<<
(
GetCurrentUS
()
-
start
)
/
FLAGS_repeats
/
1000.0
<<
", iteration: "
<<
FLAGS_iteration
<<
", spend "
<<
" ms in average."
;
<<
cost_time
/
FLAGS_iteration
/
1000.0
<<
" ms in average."
;
std
::
vector
<
std
::
vector
<
float
>>
results
;
results
.
emplace_back
(
std
::
vector
<
float
>
({
0.108398
}));
auto
out
=
predictor
->
GetOutput
(
0
);
ASSERT_EQ
(
out
->
shape
().
size
(),
2
);
ASSERT_EQ
(
out
->
shape
()[
0
],
1
);
ASSERT_EQ
(
out
->
shape
()[
1
],
1
);
for
(
size_t
i
=
0
;
i
<
results
.
size
();
++
i
)
{
std
::
string
ref_out_file
=
FLAGS_data_dir
+
std
::
string
(
"/ernie_out.txt"
);
for
(
size_t
j
=
0
;
j
<
results
[
i
].
size
();
++
j
)
{
float
out_accuracy
=
CalErnieOutAccuracy
(
out_rets
,
ref_out_file
);
EXPECT_NEAR
(
ASSERT_GT
(
out_accuracy
,
0.95
f
);
out
->
data
<
float
>
()[
j
+
(
out
->
shape
()[
1
]
*
i
)],
results
[
i
][
j
],
2e-5
);
}
}
}
}
}
// namespace lite
}
// namespace lite
...
...
lite/tests/kernels/fc_compute_test.cc
浏览文件 @
ca9ec692
...
@@ -121,8 +121,8 @@ class FcOPTest : public arena::TestCase {
...
@@ -121,8 +121,8 @@ class FcOPTest : public arena::TestCase {
int
k
=
wdims_
[
0
];
int
k
=
wdims_
[
0
];
int
n
=
wdims_
[
1
];
int
n
=
wdims_
[
1
];
LOG
(
INFO
)
<<
"M="
<<
m
<<
", N="
<<
n
<<
", K="
<<
k
VLOG
(
4
)
<<
"M="
<<
m
<<
", N="
<<
n
<<
", K="
<<
k
<<
", bias="
<<
flag_bias
<<
", bias="
<<
flag_bias
<<
", with_relu="
<<
with_relu_
<<
", with_relu="
<<
with_relu_
<<
", padding_weights="
<<
padding_weights_
;
<<
", padding_weights="
<<
padding_weights_
;
if
(
m
==
1
)
{
if
(
m
==
1
)
{
...
...
lite/tests/kernels/prior_box_compute_test.cc
浏览文件 @
ca9ec692
...
@@ -738,7 +738,7 @@ TEST(PriorBox, precision) {
...
@@ -738,7 +738,7 @@ TEST(PriorBox, precision) {
}
}
TEST
(
DensityPriorBox
,
precision
)
{
TEST
(
DensityPriorBox
,
precision
)
{
#if
def LITE_WITH_X86
#if
defined(LITE_WITH_X86) && !defined(LITE_WITH_XPU)
Place
place
(
TARGET
(
kX86
));
Place
place
(
TARGET
(
kX86
));
test_density_prior_box
(
place
);
test_density_prior_box
(
place
);
#endif
#endif
...
...
lite/tests/math/gemm_int8_compute_test.cc
浏览文件 @
ca9ec692
...
@@ -104,7 +104,7 @@ bool test_gemm_int8(bool tra,
...
@@ -104,7 +104,7 @@ bool test_gemm_int8(bool tra,
scale_merge_int8
[
j
]
=
scale_merge_fp32
[
j
]
/
scale_c
[
0
];
scale_merge_int8
[
j
]
=
scale_merge_fp32
[
j
]
/
scale_c
[
0
];
}
}
LOG
(
INFO
)
<<
"gemm_int8 M: "
<<
m
<<
", N: "
<<
n
<<
", K: "
<<
k
VLOG
(
4
)
<<
"gemm_int8 M: "
<<
m
<<
", N: "
<<
n
<<
", K: "
<<
k
<<
", transA: "
<<
(
tra
?
"true"
:
"false"
)
<<
", transA: "
<<
(
tra
?
"true"
:
"false"
)
<<
", transB: "
<<
(
trb
?
"true"
:
"false"
)
<<
", transB: "
<<
(
trb
?
"true"
:
"false"
)
<<
", relu: "
<<
(
has_relu
?
"true"
:
"false"
)
<<
", relu: "
<<
(
has_relu
?
"true"
:
"false"
)
...
@@ -344,8 +344,7 @@ TEST(TestLiteGemmInt8, gemm_prepacked_int8) {
...
@@ -344,8 +344,7 @@ TEST(TestLiteGemmInt8, gemm_prepacked_int8) {
FLAGS_power_mode
,
FLAGS_power_mode
,
th
);
th
);
if
(
flag
)
{
if
(
flag
)
{
LOG
(
INFO
)
<<
"test m = "
<<
m
<<
", n="
<<
n
VLOG
(
4
)
<<
"test m = "
<<
m
<<
", n="
<<
n
<<
", k="
<<
k
<<
", k="
<<
k
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
)
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
)
<<
", relu: "
<<
(
has_relu
?
"true"
:
"false"
)
<<
", relu: "
<<
(
has_relu
?
"true"
:
"false"
)
<<
", trans A: "
<<
(
tra
?
"true"
:
"false"
)
<<
", trans A: "
<<
(
tra
?
"true"
:
"false"
)
...
...
lite/tests/math/gemv_int8_compute_test.cc
浏览文件 @
ca9ec692
...
@@ -97,7 +97,7 @@ bool test_gemv_int8(bool tra,
...
@@ -97,7 +97,7 @@ bool test_gemv_int8(bool tra,
scale_merge_int8
[
j
]
=
scale_merge_fp32
[
j
]
/
scale_c
[
0
];
scale_merge_int8
[
j
]
=
scale_merge_fp32
[
j
]
/
scale_c
[
0
];
}
}
LOG
(
INFO
)
<<
"gemv_int8 M: "
<<
m
<<
", N: "
<<
n
VLOG
(
4
)
<<
"gemv_int8 M: "
<<
m
<<
", N: "
<<
n
<<
", transA: "
<<
(
tra
?
"true"
:
"false"
)
<<
", act: "
<<
flag_act
<<
", transA: "
<<
(
tra
?
"true"
:
"false"
)
<<
", act: "
<<
flag_act
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
);
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
);
#ifdef LITE_WITH_ARM
#ifdef LITE_WITH_ARM
...
@@ -336,7 +336,7 @@ TEST(TestLiteGemvInt8, gemv_prepacked_int8) {
...
@@ -336,7 +336,7 @@ TEST(TestLiteGemvInt8, gemv_prepacked_int8) {
six
,
six
,
alpha
);
alpha
);
if
(
flag
)
{
if
(
flag
)
{
LOG
(
INFO
)
<<
"test m = "
<<
m
<<
", n="
<<
n
VLOG
(
4
)
<<
"test m = "
<<
m
<<
", n="
<<
n
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
)
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
)
<<
", relu: "
<<
(
has_relu
?
"true"
:
"false"
)
<<
", relu: "
<<
(
has_relu
?
"true"
:
"false"
)
<<
", trans A: "
<<
(
tra
?
"true"
:
"false"
)
<<
", trans A: "
<<
(
tra
?
"true"
:
"false"
)
...
...
lite/tests/math/sgemm_c4_compute_test.cc
浏览文件 @
ca9ec692
...
@@ -98,7 +98,7 @@ bool test_sgemm_c4(
...
@@ -98,7 +98,7 @@ bool test_sgemm_c4(
basic_trans_mat_to_c4
(
da
,
da_c4
,
k
,
m
,
k
,
true
);
basic_trans_mat_to_c4
(
da
,
da_c4
,
k
,
m
,
k
,
true
);
basic_trans_mat_to_c4
(
db
,
db_c4
,
n
,
k
,
n
,
false
);
basic_trans_mat_to_c4
(
db
,
db_c4
,
n
,
k
,
n
,
false
);
LOG
(
INFO
)
<<
"sgemm_c4 M: "
<<
m
<<
", N: "
<<
n
<<
", K: "
<<
k
VLOG
(
4
)
<<
"sgemm_c4 M: "
<<
m
<<
", N: "
<<
n
<<
", K: "
<<
k
<<
", relu: "
<<
(
has_relu
?
"true"
:
"false"
)
<<
", relu: "
<<
(
has_relu
?
"true"
:
"false"
)
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
);
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
);
...
@@ -331,7 +331,7 @@ TEST(TestSgemmC4, test_func_sgemm_c4_prepacked) {
...
@@ -331,7 +331,7 @@ TEST(TestSgemmC4, test_func_sgemm_c4_prepacked) {
auto
flag
=
test_sgemm_c4
(
auto
flag
=
test_sgemm_c4
(
m
,
n
,
k
,
has_bias
,
has_relu
,
FLAGS_power_mode
,
th
);
m
,
n
,
k
,
has_bias
,
has_relu
,
FLAGS_power_mode
,
th
);
if
(
flag
)
{
if
(
flag
)
{
LOG
(
INFO
)
<<
"test m = "
<<
m
<<
", n="
<<
n
<<
", k="
<<
k
VLOG
(
4
)
<<
"test m = "
<<
m
<<
", n="
<<
n
<<
", k="
<<
k
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
)
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
)
<<
", relu: "
<<
(
has_relu
?
"true"
:
"false"
)
<<
", relu: "
<<
(
has_relu
?
"true"
:
"false"
)
<<
" passed
\n
"
;
<<
" passed
\n
"
;
...
@@ -364,7 +364,7 @@ TEST(TestSgemmC8, test_func_sgemm_c8_prepacked) {
...
@@ -364,7 +364,7 @@ TEST(TestSgemmC8, test_func_sgemm_c8_prepacked) {
auto
flag
=
test_sgemm_c8
(
auto
flag
=
test_sgemm_c8
(
m
,
n
,
k
,
has_bias
,
has_relu
,
FLAGS_power_mode
,
th
);
m
,
n
,
k
,
has_bias
,
has_relu
,
FLAGS_power_mode
,
th
);
if
(
flag
)
{
if
(
flag
)
{
LOG
(
INFO
)
<<
"test m = "
<<
m
<<
", n="
<<
n
<<
", k="
<<
k
VLOG
(
4
)
<<
"test m = "
<<
m
<<
", n="
<<
n
<<
", k="
<<
k
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
)
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
)
<<
", relu: "
<<
(
has_relu
?
"true"
:
"false"
)
<<
", relu: "
<<
(
has_relu
?
"true"
:
"false"
)
<<
" passed
\n
"
;
<<
" passed
\n
"
;
...
...
lite/tests/math/sgemv_compute_test.cc
浏览文件 @
ca9ec692
...
@@ -75,7 +75,7 @@ bool test_sgemv(bool tra,
...
@@ -75,7 +75,7 @@ bool test_sgemv(bool tra,
// fill_tensor_const(tb, 1.f);
// fill_tensor_const(tb, 1.f);
fill_tensor_rand
(
tbias
,
-
1.
f
,
1.
f
);
fill_tensor_rand
(
tbias
,
-
1.
f
,
1.
f
);
LOG
(
INFO
)
<<
"sgemv M: "
<<
m
<<
", K: "
<<
k
VLOG
(
4
)
<<
"sgemv M: "
<<
m
<<
", K: "
<<
k
<<
", transA: "
<<
(
tra
?
"true"
:
"false"
)
<<
", act: "
<<
flag_act
<<
", transA: "
<<
(
tra
?
"true"
:
"false"
)
<<
", act: "
<<
flag_act
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
);
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
);
#ifdef LITE_WITH_ARM
#ifdef LITE_WITH_ARM
...
@@ -209,7 +209,7 @@ TEST(TestLiteSgemv, Sgemv) {
...
@@ -209,7 +209,7 @@ TEST(TestLiteSgemv, Sgemv) {
six
,
six
,
alpha
);
alpha
);
if
(
flag
)
{
if
(
flag
)
{
LOG
(
INFO
)
<<
"test m = "
<<
m
<<
", k="
<<
k
VLOG
(
4
)
<<
"test m = "
<<
m
<<
", k="
<<
k
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
)
<<
", bias: "
<<
(
has_bias
?
"true"
:
"false"
)
<<
", flag act: "
<<
flag_act
<<
", flag act: "
<<
flag_act
<<
", trans A: "
<<
(
tra
?
"true"
:
"false"
)
<<
", trans A: "
<<
(
tra
?
"true"
:
"false"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录