Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
ac6c98f4
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
338
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ac6c98f4
编写于
8月 31, 2020
作者:
H
hong19860320
提交者:
GitHub
8月 31, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[XPU] Fix the compilation errors when XTCL is enabled (#4077)
上级
db98a6bb
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
99 addition
and
133 deletion
+99
-133
cmake/device/xpu.cmake
cmake/device/xpu.cmake
+16
-4
lite/backends/xpu/device.cc
lite/backends/xpu/device.cc
+1
-1
lite/kernels/xpu/subgraph_compute.cc
lite/kernels/xpu/subgraph_compute.cc
+35
-73
lite/kernels/xpu/subgraph_compute.h
lite/kernels/xpu/subgraph_compute.h
+2
-2
lite/tests/kernels/cast_compute_test.cc
lite/tests/kernels/cast_compute_test.cc
+2
-2
lite/tests/kernels/elementwise_compute_test.cc
lite/tests/kernels/elementwise_compute_test.cc
+2
-2
lite/tests/kernels/layer_norm_compute_test.cc
lite/tests/kernels/layer_norm_compute_test.cc
+3
-3
lite/tests/kernels/matmul_compute_test.cc
lite/tests/kernels/matmul_compute_test.cc
+4
-2
lite/tests/kernels/mul_compute_test.cc
lite/tests/kernels/mul_compute_test.cc
+1
-0
lite/tests/kernels/multiclass_nms_compute_test.cc
lite/tests/kernels/multiclass_nms_compute_test.cc
+2
-2
lite/tests/kernels/pool_compute_test.cc
lite/tests/kernels/pool_compute_test.cc
+2
-2
lite/tests/kernels/reshape_compute_test.cc
lite/tests/kernels/reshape_compute_test.cc
+2
-2
lite/tests/kernels/transpose_compute_test.cc
lite/tests/kernels/transpose_compute_test.cc
+3
-3
lite/tools/ci_build.sh
lite/tools/ci_build.sh
+24
-35
未找到文件。
cmake/device/xpu.cmake
浏览文件 @
ac6c98f4
...
@@ -62,7 +62,7 @@ if(LITE_WITH_XTCL)
...
@@ -62,7 +62,7 @@ if(LITE_WITH_XTCL)
include_directories
(
"
${
XPU_SDK_ROOT
}
/XTCL/include"
)
include_directories
(
"
${
XPU_SDK_ROOT
}
/XTCL/include"
)
find_library
(
XPU_SDK_XTCL_FILE NAMES xtcl
find_library
(
XPU_SDK_XTCL_FILE NAMES xtcl
PATHS
${
XPU_SDK_ROOT
}
/XTCL/
so
PATHS
${
XPU_SDK_ROOT
}
/XTCL/
lib
NO_DEFAULT_PATH
)
NO_DEFAULT_PATH
)
if
(
NOT XPU_SDK_XTCL_FILE
)
if
(
NOT XPU_SDK_XTCL_FILE
)
...
@@ -74,7 +74,7 @@ if(LITE_WITH_XTCL)
...
@@ -74,7 +74,7 @@ if(LITE_WITH_XTCL)
endif
()
endif
()
find_library
(
XPU_SDK_TVM_FILE NAMES tvm
find_library
(
XPU_SDK_TVM_FILE NAMES tvm
PATHS
${
XPU_SDK_ROOT
}
/XTCL/s
o
PATHS
${
XPU_SDK_ROOT
}
/XTCL/s
hlib
NO_DEFAULT_PATH
)
NO_DEFAULT_PATH
)
if
(
NOT XPU_SDK_TVM_FILE
)
if
(
NOT XPU_SDK_TVM_FILE
)
...
@@ -97,8 +97,20 @@ if(LITE_WITH_XTCL)
...
@@ -97,8 +97,20 @@ if(LITE_WITH_XTCL)
set_property
(
TARGET xpu_sdk_llvm PROPERTY IMPORTED_LOCATION
${
XPU_SDK_LLVM_FILE
}
)
set_property
(
TARGET xpu_sdk_llvm PROPERTY IMPORTED_LOCATION
${
XPU_SDK_LLVM_FILE
}
)
endif
()
endif
()
find_library
(
XPU_SDK_XPU_JITC_FILE NAMES xpujitc
PATHS
${
XPU_SDK_ROOT
}
/XTDK/runtime/shlib
${
XPU_SDK_ROOT
}
/XTDK/shlib
# libxpujitc.so may have been moved to XTDK/runtime/shlib
NO_DEFAULT_PATH
)
if
(
NOT XPU_SDK_XPU_JITC_FILE
)
message
(
FATAL_ERROR
"Can not find XPU JITC Library in
${
XPU_SDK_ROOT
}
"
)
else
()
message
(
STATUS
"Found XPU JITC Library:
${
XPU_SDK_XPU_JITC_FILE
}
"
)
add_library
(
xpu_sdk_xpu_jitc SHARED IMPORTED GLOBAL
)
set_property
(
TARGET xpu_sdk_xpu_jitc PROPERTY IMPORTED_LOCATION
${
XPU_SDK_XPU_JITC_FILE
}
)
endif
()
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-DDMLC_USE_GLOG=1"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-DDMLC_USE_GLOG=1"
)
set
(
xpu_runtime_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_llvm CACHE INTERNAL
"xpu runtime libs"
)
set
(
xpu_runtime_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_llvm
xpu_sdk_xpu_jitc
CACHE INTERNAL
"xpu runtime libs"
)
set
(
xpu_builder_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_llvm CACHE INTERNAL
"xpu builder libs"
)
set
(
xpu_builder_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_llvm
xpu_sdk_xpu_jitc
CACHE INTERNAL
"xpu builder libs"
)
endif
()
endif
()
lite/backends/xpu/device.cc
浏览文件 @
ac6c98f4
...
@@ -34,7 +34,7 @@ std::unique_ptr<xtcl::network::xRuntimeInstance> Device::Build(
...
@@ -34,7 +34,7 @@ std::unique_ptr<xtcl::network::xRuntimeInstance> Device::Build(
for
(
size_t
i
=
0
;
i
<
outputs
->
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
outputs
->
size
();
i
++
)
{
all_outs
.
push_back
(
*
outputs
->
at
(
i
));
all_outs
.
push_back
(
*
outputs
->
at
(
i
));
}
}
xtcl
::
x
Network
network
=
xtcl
::
x
Function
network
=
builder
->
FinalizeNetwork
(
xtcl
::
relay
::
TupleNode
::
make
(
all_outs
));
builder
->
FinalizeNetwork
(
xtcl
::
relay
::
TupleNode
::
make
(
all_outs
));
auto
target
=
xtcl
::
NullValue
<
xtcl
::
Target
>
();
auto
target
=
xtcl
::
NullValue
<
xtcl
::
Target
>
();
if
(
!
target_
.
empty
())
{
if
(
!
target_
.
empty
())
{
...
...
lite/kernels/xpu/subgraph_compute.cc
浏览文件 @
ac6c98f4
...
@@ -35,27 +35,20 @@ bool SubgraphEngine::PrepareWorkspaceForDeviceProgram() {
...
@@ -35,27 +35,20 @@ bool SubgraphEngine::PrepareWorkspaceForDeviceProgram() {
// Create the device input and output tensors, but don't initialize them
// Create the device input and output tensors, but don't initialize them
// with the dimensions
// with the dimensions
device_itensors_
.
resize
(
input_names_
.
size
());
device_itensors_
.
resize
(
input_names_
.
size
());
for
(
int
i
=
0
;
i
<
input_names_
.
size
();
i
++
)
{
device_itensors_
[
i
].
reset
(
new
hiai
::
AiTensor
);
CHECK
(
device_itensors_
[
i
]);
}
device_otensors_
.
resize
(
output_names_
.
size
());
device_otensors_
.
resize
(
output_names_
.
size
());
for
(
int
i
=
0
;
i
<
output_names_
.
size
();
i
++
)
{
device_otensors_
[
i
].
reset
(
new
hiai
::
AiTensor
);
CHECK
(
device_otensors_
[
i
]);
}
return
true
;
return
true
;
}
}
bool
SubgraphEngine
::
BuildDeviceProgram
()
{
bool
SubgraphEngine
::
BuildDeviceProgram
()
{
int
status
=
0
;
int
status
=
0
;
if
(
!
origin_program_
)
{
BuildOriginProgram
();
}
// Convert all of ops and their input vars and weights and added into the XPU
// Convert all of ops and their input vars and weights and added into the XPU
// IR graph
// IR graph
subgraph
::
xpu
::
Graph
graph
;
subgraph
::
xpu
::
Graph
graph
;
const
auto
&
bridges
=
subgraph
::
Registry
::
Instance
();
const
auto
&
bridges
=
subgraph
::
Registry
::
Instance
();
if
(
!
origin_program_
)
{
BuildOriginProgram
();
}
const
auto
&
insts
=
origin_program_
->
instructions
(
kRootBlockIdx
);
const
auto
&
insts
=
origin_program_
->
instructions
(
kRootBlockIdx
);
for
(
auto
&
inst
:
insts
)
{
for
(
auto
&
inst
:
insts
)
{
auto
op
=
const_cast
<
OpLite
*>
(
inst
.
op
());
auto
op
=
const_cast
<
OpLite
*>
(
inst
.
op
());
...
@@ -73,64 +66,38 @@ bool SubgraphEngine::BuildDeviceProgram() {
...
@@ -73,64 +66,38 @@ bool SubgraphEngine::BuildDeviceProgram() {
return
false
;
return
false
;
}
}
}
}
// Obtain the output nodes of the XPU IR graph and build the graph to the XPU
// Collect the input and output nodes of the XPU IR graph
// runtime
device_inames_
.
clear
();
device_onames_
.
clear
();
std
::
vector
<
xtcl
::
xExpr
*>
device_inodes
;
std
::
vector
<
xtcl
::
xExpr
*>
device_inodes
;
std
::
vector
<
xtcl
::
xExpr
*>
device_onodes
;
std
::
vector
<
xtcl
::
xExpr
*>
device_onodes
;
for
(
auto
&
input_name
:
input_names_
)
{
for
(
size_t
i
=
0
;
i
<
input_names_
.
size
();
i
++
)
{
if
(
graph
.
Has
(
input_name
))
{
CHECK
(
graph
.
Has
(
input_names_
[
i
]));
if
(
graph
.
Get
(
input_name
)
->
is_data
())
{
CHECK
(
graph
.
Get
(
input_names_
[
i
])
->
is_data
());
device_inodes
.
push_back
(
graph
.
Get
(
input_name
)
->
data
().
get
());
device_inodes
.
push_back
(
graph
.
Get
(
input_names_
[
i
])
->
data
().
get
());
device_inames_
.
push_back
(
input_name
);
}
else
{
LOG
(
WARNING
)
<<
"[XPU] Input node "
<<
input_name
<<
" is ignored because it is not a data node."
;
}
}
else
{
LOG
(
WARNING
)
<<
"[XPU] Input node "
<<
input_name
<<
" is ignored because it does not exist."
;
}
}
}
for
(
auto
&
output_name
:
output_names_
)
{
for
(
size_t
i
=
0
;
i
<
output_names_
.
size
();
i
++
)
{
if
(
graph
.
Has
(
output_name
))
{
CHECK
(
graph
.
Has
(
output_names_
[
i
]));
device_onodes
.
push_back
(
graph
.
Get
(
output_name
)
->
data
().
get
());
device_onodes
.
push_back
(
graph
.
Get
(
output_names_
[
i
])
->
data
().
get
());
device_onames_
.
push_back
(
output_name
);
}
else
{
LOG
(
WARNING
)
<<
"[XPU] Output node "
<<
output_name
<<
" is ignored because it does not exist."
;
}
}
}
CHECK
(
!
device_inames_
.
empty
())
// Build the XPU IR graph to the XPU runtime for inference
<<
"[XPU] No input nodes found for building XPU model"
;
CHECK
(
!
device_onames_
.
empty
())
<<
"[XPU] No output nodes found for building XPU model"
;
device_program_
=
lite
::
xpu
::
Device
::
Global
().
Build
(
device_program_
=
lite
::
xpu
::
Device
::
Global
().
Build
(
&
graph
.
builder_
,
&
graph
.
params_
,
&
device_onodes
);
&
graph
.
builder_
,
&
graph
.
params_
,
&
device_onodes
);
if
(
device_program_
==
nullptr
)
{
if
(
device_program_
==
nullptr
)
{
LOG
(
WARNING
)
<<
"[XPU] Build model failed!"
;
LOG
(
WARNING
)
<<
"[XPU] Build model failed!"
;
return
false
;
return
false
;
}
}
origin_otypes_
.
resize
(
output_names_
.
size
());
origin_odims_
.
resize
(
output_names_
.
size
());
for
(
size_t
i
=
0
;
i
<
output_names_
.
size
();
i
++
)
{
origin_otypes_
[
i
]
=
graph
.
Get
(
output_names_
[
i
])
->
precision
();
origin_odims_
[
i
]
=
origin_otensors_
[
i
]
->
dims
().
Vectorize
();
}
// Query and check the dimensions of input and output tensors
// Query and check the dimensions of input and output tensors
origin_idims_
.
resize
(
device_inames_
.
size
());
CHECK_EQ
(
device_itensors_
.
size
(),
input_names_
.
size
());
origin_itensors_
.
resize
(
device_inames_
.
size
());
CHECK_EQ
(
device_otensors_
.
size
(),
output_names_
.
size
());
device_itensors_
.
resize
(
device_inames_
.
size
());
for
(
size_t
i
=
0
;
i
<
input_names_
.
size
();
i
++
)
{
origin_odims_
.
resize
(
device_onames_
.
size
());
VLOG
(
3
)
<<
"[XPU] Inputs["
<<
i
<<
"] name: "
<<
input_names_
[
i
]
origin_otensors_
.
resize
(
device_onames_
.
size
());
<<
" dims: "
<<
DDim
(
origin_idims_
[
i
]).
repr
();
device_otensors_
.
resize
(
device_onames_
.
size
());
for
(
int
i
=
0
;
i
<
device_inames_
.
size
();
i
++
)
{
auto
node
=
graph
.
Get
(
device_inames_
[
i
]);
auto
precision
=
node
->
precision
();
auto
layout
=
node
->
layout
();
origin_itensors_
[
i
]
=
exec_scope_
->
FindMutableTensor
(
device_inames_
[
i
]);
CHECK
(
origin_itensors_
[
i
]);
origin_idims_
[
i
]
=
origin_itensors_
[
i
]
->
dims
();
VLOG
(
3
)
<<
"[XPU] Inputs["
<<
i
<<
"] name: "
<<
device_inames_
[
i
]
<<
" precision: "
<<
PrecisionToStr
(
precision
)
<<
" layout: "
<<
DataLayoutToStr
(
layout
)
<<
" dims: "
<<
origin_idims_
[
i
];
// Prepare the device input tensors which share data with the origin input
// Prepare the device input tensors which share data with the origin input
// tensors
// tensors
device_itensors_
[
i
].
data
=
nullptr
;
device_itensors_
[
i
].
data
=
nullptr
;
...
@@ -138,25 +105,20 @@ bool SubgraphEngine::BuildDeviceProgram() {
...
@@ -138,25 +105,20 @@ bool SubgraphEngine::BuildDeviceProgram() {
subgraph
::
xpu
::
CvtDLDeviceType
(
TARGET
(
kHost
));
subgraph
::
xpu
::
CvtDLDeviceType
(
TARGET
(
kHost
));
device_itensors_
[
i
].
ctx
.
device_id
=
0
;
device_itensors_
[
i
].
ctx
.
device_id
=
0
;
device_itensors_
[
i
].
ndim
=
origin_idims_
[
i
].
size
();
device_itensors_
[
i
].
ndim
=
origin_idims_
[
i
].
size
();
device_itensors_
[
i
].
dtype
=
subgraph
::
xpu
::
CvtDLDataType
(
precision
);
device_itensors_
[
i
].
dtype
=
subgraph
::
xpu
::
CvtDLDataType
(
origin_itensors_
[
i
]
->
precision
());
device_itensors_
[
i
].
shape
=
const_cast
<
int64_t
*>
(
device_itensors_
[
i
].
shape
=
const_cast
<
int64_t
*>
(
static_cast
<
const
int64_t
*>
(
origin_idims_
[
i
].
data
()
.
data
()
));
static_cast
<
const
int64_t
*>
(
origin_idims_
[
i
].
data
()));
device_itensors_
[
i
].
strides
=
nullptr
;
device_itensors_
[
i
].
strides
=
nullptr
;
device_itensors_
[
i
].
byte_offset
=
0
;
device_itensors_
[
i
].
byte_offset
=
0
;
}
}
for
(
int
i
=
0
;
i
<
device_onames_
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
output_names_
.
size
();
i
++
)
{
auto
node
=
graph
.
Get
(
device_onames_
[
i
]);
VLOG
(
3
)
<<
"[XPU] Outputs["
<<
i
<<
"] name: "
<<
output_names_
[
i
]
auto
precision
=
node
->
precision
();
<<
" dims: "
<<
DDim
(
origin_odims_
[
i
]).
repr
();
auto
layout
=
node
->
layout
();
origin_otensors_
[
i
]
=
exec_scope_
->
FindMutableTensor
(
device_onames_
[
i
]);
CHECK
(
origin_otensors_
[
i
]);
origin_odims_
[
i
]
=
origin_otensors_
[
i
]
->
dims
();
VLOG
(
3
)
<<
"[XPU] Outputs["
<<
i
<<
"] name: "
<<
device_onames_
[
i
]
<<
" precision: "
<<
PrecisionToStr
(
precision
)
<<
" layout: "
<<
DataLayoutToStr
(
layout
)
<<
" dims: "
<<
origin_odims_
[
i
];
// Prepare the device output tensors which share data with the origin output
// Prepare the device output tensors which share data with the origin output
// tensors
// tensors
origin_otensors_
[
i
]
->
Resize
(
origin_odims_
[
i
]);
auto
&
precision
=
origin_otypes_
[
i
];
switch
(
precision
)
{
switch
(
precision
)
{
case
PRECISION
(
kFloat
):
case
PRECISION
(
kFloat
):
origin_otensors_
[
i
]
->
mutable_data
<
float
>
();
origin_otensors_
[
i
]
->
mutable_data
<
float
>
();
...
@@ -174,7 +136,7 @@ bool SubgraphEngine::BuildDeviceProgram() {
...
@@ -174,7 +136,7 @@ bool SubgraphEngine::BuildDeviceProgram() {
origin_otensors_
[
i
]
->
mutable_data
<
int64_t
>
();
origin_otensors_
[
i
]
->
mutable_data
<
int64_t
>
();
break
;
break
;
default:
default:
LOG
(
FATAL
)
<<
"[XPU] "
<<
device_o
names_
[
i
]
LOG
(
FATAL
)
<<
"[XPU] "
<<
output_
names_
[
i
]
<<
" can't mutable data with precision type "
<<
" can't mutable data with precision type "
<<
PrecisionToStr
(
precision
);
<<
PrecisionToStr
(
precision
);
break
;
break
;
...
@@ -186,7 +148,7 @@ bool SubgraphEngine::BuildDeviceProgram() {
...
@@ -186,7 +148,7 @@ bool SubgraphEngine::BuildDeviceProgram() {
device_otensors_
[
i
].
ndim
=
origin_odims_
[
i
].
size
();
device_otensors_
[
i
].
ndim
=
origin_odims_
[
i
].
size
();
device_otensors_
[
i
].
dtype
=
subgraph
::
xpu
::
CvtDLDataType
(
precision
);
device_otensors_
[
i
].
dtype
=
subgraph
::
xpu
::
CvtDLDataType
(
precision
);
device_otensors_
[
i
].
shape
=
const_cast
<
int64_t
*>
(
device_otensors_
[
i
].
shape
=
const_cast
<
int64_t
*>
(
static_cast
<
const
int64_t
*>
(
origin_odims_
[
i
].
data
()
.
data
()
));
static_cast
<
const
int64_t
*>
(
origin_odims_
[
i
].
data
()));
device_otensors_
[
i
].
strides
=
nullptr
;
device_otensors_
[
i
].
strides
=
nullptr
;
device_otensors_
[
i
].
byte_offset
=
0
;
device_otensors_
[
i
].
byte_offset
=
0
;
}
}
...
@@ -198,7 +160,7 @@ bool SubgraphEngine::LaunchDeviceProgram() {
...
@@ -198,7 +160,7 @@ bool SubgraphEngine::LaunchDeviceProgram() {
// Update the data pointer of DLTensor to track the origin input tensors
// Update the data pointer of DLTensor to track the origin input tensors
device_itensors_
[
i
].
data
=
device_itensors_
[
i
].
data
=
const_cast
<
void
*>
(
origin_itensors_
[
i
]
->
raw_data
());
const_cast
<
void
*>
(
origin_itensors_
[
i
]
->
raw_data
());
device_program_
->
SetInput
(
device_i
names_
[
i
],
&
device_itensors_
[
i
]);
device_program_
->
SetInput
(
input_
names_
[
i
],
&
device_itensors_
[
i
]);
}
}
// Run the XPU model
// Run the XPU model
auto
GetCurrentUS
=
[]()
->
double
{
auto
GetCurrentUS
=
[]()
->
double
{
...
...
lite/kernels/xpu/subgraph_compute.h
浏览文件 @
ac6c98f4
...
@@ -47,10 +47,10 @@ class SubgraphEngine : public subgraph::Engine {
...
@@ -47,10 +47,10 @@ class SubgraphEngine : public subgraph::Engine {
bool
BuildDeviceProgram
()
override
;
bool
BuildDeviceProgram
()
override
;
bool
LaunchDeviceProgram
()
override
;
bool
LaunchDeviceProgram
()
override
;
std
::
vector
<
std
::
string
>
device_inames_
;
std
::
vector
<
std
::
string
>
device_onames_
;
std
::
vector
<
DLTensor
>
device_itensors_
{};
std
::
vector
<
DLTensor
>
device_itensors_
{};
std
::
vector
<
DLTensor
>
device_otensors_
{};
std
::
vector
<
DLTensor
>
device_otensors_
{};
std
::
vector
<
std
::
vector
<
int64_t
>>
origin_odims_
;
std
::
vector
<
PrecisionType
>
origin_otypes_
;
std
::
unique_ptr
<
xtcl
::
network
::
xRuntimeInstance
>
device_program_
{
nullptr
};
std
::
unique_ptr
<
xtcl
::
network
::
xRuntimeInstance
>
device_program_
{
nullptr
};
};
};
...
...
lite/tests/kernels/cast_compute_test.cc
浏览文件 @
ac6c98f4
...
@@ -135,8 +135,8 @@ TEST(Cast, precision) {
...
@@ -135,8 +135,8 @@ TEST(Cast, precision) {
float
abs_error
=
2e-5
;
float
abs_error
=
2e-5
;
#if defined(LITE_WITH_ARM)
#if defined(LITE_WITH_ARM)
place
=
TARGET
(
kARM
);
place
=
TARGET
(
kARM
);
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
//
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
place
=
TARGET
(
kXPU
);
//
place = TARGET(kXPU);
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place
=
TARGET
(
kHuaweiAscendNPU
);
place
=
TARGET
(
kHuaweiAscendNPU
);
abs_error
=
1e-2
;
// precision_mode default is force_fp16
abs_error
=
1e-2
;
// precision_mode default is force_fp16
...
...
lite/tests/kernels/elementwise_compute_test.cc
浏览文件 @
ac6c98f4
...
@@ -231,8 +231,8 @@ TEST(Elementwise, precision) {
...
@@ -231,8 +231,8 @@ TEST(Elementwise, precision) {
abs_error
=
1e-2
;
// precision_mode default is force_fp16
abs_error
=
1e-2
;
// precision_mode default is force_fp16
#elif defined(LITE_WITH_ARM)
#elif defined(LITE_WITH_ARM)
place
=
TARGET
(
kARM
);
place
=
TARGET
(
kARM
);
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
//
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
place
=
TARGET
(
kXPU
);
//
place = TARGET(kXPU);
#else
#else
return
;
return
;
#endif
#endif
...
...
lite/tests/kernels/layer_norm_compute_test.cc
浏览文件 @
ac6c98f4
...
@@ -147,9 +147,7 @@ TEST(LayerNorm, precision) {
...
@@ -147,9 +147,7 @@ TEST(LayerNorm, precision) {
LOG
(
INFO
)
<<
"test layer_norm op"
;
LOG
(
INFO
)
<<
"test layer_norm op"
;
float
abs_error
=
2e-5
;
float
abs_error
=
2e-5
;
Place
place
;
Place
place
;
#if defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
#if defined(LITE_WITH_NPU)
place
=
TARGET
(
kXPU
);
#elif defined(LITE_WITH_NPU)
place
=
TARGET
(
kNPU
);
place
=
TARGET
(
kNPU
);
abs_error
=
1e-2
;
abs_error
=
1e-2
;
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
...
@@ -158,6 +156,8 @@ TEST(LayerNorm, precision) {
...
@@ -158,6 +156,8 @@ TEST(LayerNorm, precision) {
#elif defined(LITE_WITH_ARM)
#elif defined(LITE_WITH_ARM)
place
=
TARGET
(
kARM
);
place
=
TARGET
(
kARM
);
abs_error
=
6e-5
;
abs_error
=
6e-5
;
// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
// place = TARGET(kXPU);
#else
#else
return
;
return
;
#endif
#endif
...
...
lite/tests/kernels/matmul_compute_test.cc
浏览文件 @
ac6c98f4
...
@@ -460,8 +460,9 @@ TEST(Matmul2x2, precision) {
...
@@ -460,8 +460,9 @@ TEST(Matmul2x2, precision) {
abs_error
=
1e-2
;
// precision_mode default is force_fp16
abs_error
=
1e-2
;
// precision_mode default is force_fp16
#elif defined(LITE_WITH_ARM)
#elif defined(LITE_WITH_ARM)
place
=
TARGET
(
kARM
);
place
=
TARGET
(
kARM
);
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
place
=
TARGET
(
kXPU
);
// place = TARGET(kXPU);
// abs_error = 1e-3; // use int16 in xpu
#else
#else
return
;
return
;
#endif
#endif
...
@@ -500,6 +501,7 @@ TEST(Matmul2x2_y_transpose, precision) {
...
@@ -500,6 +501,7 @@ TEST(Matmul2x2_y_transpose, precision) {
place
=
TARGET
(
kARM
);
place
=
TARGET
(
kARM
);
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
place
=
TARGET
(
kXPU
);
place
=
TARGET
(
kXPU
);
abs_error
=
1e-3
;
// use int16 in xpu
#else
#else
return
;
return
;
#endif
#endif
...
...
lite/tests/kernels/mul_compute_test.cc
浏览文件 @
ac6c98f4
...
@@ -129,6 +129,7 @@ TEST(Mul, precision) {
...
@@ -129,6 +129,7 @@ TEST(Mul, precision) {
abs_error
=
1e-2
;
// use fp16 in npu
abs_error
=
1e-2
;
// use fp16 in npu
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
place
=
TARGET
(
kXPU
);
place
=
TARGET
(
kXPU
);
abs_error
=
1e-3
;
// use int16 in xpu
#else
#else
return
;
return
;
#endif
#endif
...
...
lite/tests/kernels/multiclass_nms_compute_test.cc
浏览文件 @
ac6c98f4
...
@@ -478,8 +478,8 @@ TEST(multiclass_nms, precision) {
...
@@ -478,8 +478,8 @@ TEST(multiclass_nms, precision) {
Place
place
;
Place
place
;
#if defined(LITE_WITH_ARM)
#if defined(LITE_WITH_ARM)
place
=
TARGET
(
kHost
);
place
=
TARGET
(
kHost
);
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
//
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
place
=
TARGET
(
kXPU
);
//
place = TARGET(kXPU);
#else
#else
return
;
return
;
#endif
#endif
...
...
lite/tests/kernels/pool_compute_test.cc
浏览文件 @
ac6c98f4
...
@@ -384,8 +384,8 @@ TEST(Pool, precision) {
...
@@ -384,8 +384,8 @@ TEST(Pool, precision) {
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place
=
TARGET
(
kHuaweiAscendNPU
);
place
=
TARGET
(
kHuaweiAscendNPU
);
abs_error
=
1e-2
;
// precision_mode default is force_fp16
abs_error
=
1e-2
;
// precision_mode default is force_fp16
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) // NOLINT
place
=
TARGET
(
kXPU
);
//
place = TARGET(kXPU);
#else
#else
return
;
return
;
#endif
#endif
...
...
lite/tests/kernels/reshape_compute_test.cc
浏览文件 @
ac6c98f4
...
@@ -206,8 +206,8 @@ TEST(Reshape, precision) {
...
@@ -206,8 +206,8 @@ TEST(Reshape, precision) {
abs_error
=
1e-2
;
// Using fp16 in NPU
abs_error
=
1e-2
;
// Using fp16 in NPU
#elif defined(LITE_WITH_ARM)
#elif defined(LITE_WITH_ARM)
place
=
TARGET
(
kHost
);
place
=
TARGET
(
kHost
);
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
//
#elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
place
=
TARGET
(
kXPU
);
//
place = TARGET(kXPU);
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place
=
TARGET
(
kHuaweiAscendNPU
);
place
=
TARGET
(
kHuaweiAscendNPU
);
abs_error
=
1e-2
;
// precision_mode default is force_fp16
abs_error
=
1e-2
;
// precision_mode default is force_fp16
...
...
lite/tests/kernels/transpose_compute_test.cc
浏览文件 @
ac6c98f4
...
@@ -164,14 +164,14 @@ TEST(Transpose, precision) {
...
@@ -164,14 +164,14 @@ TEST(Transpose, precision) {
LOG
(
INFO
)
<<
"test Transpose op"
;
LOG
(
INFO
)
<<
"test Transpose op"
;
float
abs_error
=
2e-5
;
float
abs_error
=
2e-5
;
Place
place
;
Place
place
;
#if defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL)
#if defined(LITE_WITH_NPU)
place
=
TARGET
(
kXPU
);
#elif defined(LITE_WITH_NPU)
place
=
TARGET
(
kNPU
);
place
=
TARGET
(
kNPU
);
abs_error
=
1e-2
;
// Using fp16 in NPU
abs_error
=
1e-2
;
// Using fp16 in NPU
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
#elif defined(LITE_WITH_HUAWEI_ASCEND_NPU)
place
=
TARGET
(
kHuaweiAscendNPU
);
place
=
TARGET
(
kHuaweiAscendNPU
);
abs_error
=
1e-2
;
// precision_mode default is force_fp16
abs_error
=
1e-2
;
// precision_mode default is force_fp16
// #elif defined(LITE_WITH_XPU) && defined(LITE_WITH_XTCL) // NOLINT
// place = TARGET(kXPU);
#else
#else
return
;
return
;
#endif
#endif
...
...
lite/tools/ci_build.sh
浏览文件 @
ac6c98f4
...
@@ -342,24 +342,6 @@ function build_test_train {
...
@@ -342,24 +342,6 @@ function build_test_train {
}
}
function
cmake_xpu
{
export
LD_LIBRARY_PATH
=
"
$LD_LIBRARY_PATH
:
$PWD
/third_party/install/mklml/lib"
prepare_workspace
cmake ..
\
${
common_flags
}
\
-DWITH_GPU
=
OFF
\
-DWITH_MKLDNN
=
OFF
\
-DLITE_WITH_X86
=
ON
\
-DWITH_MKL
=
ON
\
-DLITE_BUILD_EXTRA
=
ON
\
-DLITE_WITH_XPU
=
ON
\
-DXPU_SDK_ROOT
=
"./output"
}
function
build_xpu
{
make lite_compile_deps
-j
$NUM_CORES_FOR_COMPILE
}
# It will eagerly test all lite related unittests.
# It will eagerly test all lite related unittests.
function
test_xpu
{
function
test_xpu
{
# Due to the missing of xpu kernels, we skip the following tests temporarily.
# Due to the missing of xpu kernels, we skip the following tests temporarily.
...
@@ -387,14 +369,25 @@ function test_xpu {
...
@@ -387,14 +369,25 @@ function test_xpu {
# Build the code and run lite server tests. This is executed in the CI system.
# Build the code and run lite server tests. This is executed in the CI system.
function
build_test_xpu
{
function
build_test_xpu
{
cur_dir
=
$(
pwd
)
local
with_xtcl
=
$1
if
[[
"
${
with_xtcl
}
x"
==
"x"
]]
;
then
build_dir
=
$cur_dir
/build.lite.xpu
with_xtcl
=
OFF
mkdir
-p
$build_dir
fi
cd
$build_dir
mkdir
-p
./build
cd
./build
cmake_xpu
export
LD_LIBRARY_PATH
=
"
$LD_LIBRARY_PATH
:
$PWD
/third_party/install/mklml/lib"
build_xpu
prepare_workspace
cmake ..
\
${
common_flags
}
\
-DWITH_GPU
=
OFF
\
-DWITH_MKLDNN
=
OFF
\
-DLITE_WITH_X86
=
ON
\
-DWITH_MKL
=
ON
\
-DLITE_BUILD_EXTRA
=
ON
\
-DLITE_WITH_XPU
=
ON
\
-DLITE_WITH_XTCL
=
$with_xtcl
\
-DXPU_SDK_ROOT
=
"./output"
make lite_compile_deps
-j
$NUM_CORES_FOR_COMPILE
test_xpu
test_xpu
}
}
...
@@ -1171,10 +1164,6 @@ function main {
...
@@ -1171,10 +1164,6 @@ function main {
cmake_x86
cmake_x86
shift
shift
;;
;;
cmake_xpu
)
cmake_xpu
shift
;;
cmake_opencl
)
cmake_opencl
)
cmake_opencl
$ARM_OS
$ARM_ABI
$ARM_LANG
cmake_opencl
$ARM_OS
$ARM_ABI
$ARM_LANG
shift
shift
...
@@ -1199,10 +1188,6 @@ function main {
...
@@ -1199,10 +1188,6 @@ function main {
test_server
test_server
shift
shift
;;
;;
test_xpu
)
test_xpu
shift
;;
test_arm
)
test_arm
)
test_arm
$ARM_OS
$ARM_ABI
$ARM_LANG
$ARM_PORT
test_arm
$ARM_OS
$ARM_ABI
$ARM_LANG
$ARM_PORT
shift
shift
...
@@ -1233,7 +1218,11 @@ function main {
...
@@ -1233,7 +1218,11 @@ function main {
shift
shift
;;
;;
build_test_xpu
)
build_test_xpu
)
build_test_xpu
build_test_xpu OFF
shift
;;
build_test_xpu_with_xtcl
)
build_test_xpu ON
shift
shift
;;
;;
build_test_huawei_ascend_npu
)
build_test_huawei_ascend_npu
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录