Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
9edfecaa
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9edfecaa
编写于
3月 16, 2020
作者:
Z
zhupengyang
提交者:
GitHub
3月 16, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] support increment, less than op bridge (#3147)
* [NPU] subgraph's precision register to kAny
上级
ff332144
变更
25
显示空白变更内容
内联
并排
Showing
25 changed file
with
367 addition
and
329 deletion
+367
-329
lite/core/arena/framework.cc
lite/core/arena/framework.cc
+2
-0
lite/core/op_lite.cc
lite/core/op_lite.cc
+10
-9
lite/kernels/arm/CMakeLists.txt
lite/kernels/arm/CMakeLists.txt
+0
-1
lite/kernels/arm/compare_compute.cc
lite/kernels/arm/compare_compute.cc
+42
-39
lite/kernels/arm/compare_compute.h
lite/kernels/arm/compare_compute.h
+0
-8
lite/kernels/npu/bridges/CMakeLists.txt
lite/kernels/npu/bridges/CMakeLists.txt
+5
-0
lite/kernels/npu/bridges/compare_op.cc
lite/kernels/npu/bridges/compare_op.cc
+76
-0
lite/kernels/npu/bridges/increment_op.cc
lite/kernels/npu/bridges/increment_op.cc
+74
-0
lite/kernels/npu/bridges/paddle_use_bridges.h
lite/kernels/npu/bridges/paddle_use_bridges.h
+4
-2
lite/kernels/npu/bridges/scale_op_test.cc
lite/kernels/npu/bridges/scale_op_test.cc
+0
-125
lite/kernels/npu/subgraph_compute.cc
lite/kernels/npu/subgraph_compute.cc
+8
-3
lite/kernels/npu/subgraph_compute.h
lite/kernels/npu/subgraph_compute.h
+1
-1
lite/tests/kernels/CMakeLists.txt
lite/tests/kernels/CMakeLists.txt
+1
-1
lite/tests/kernels/assign_compute_test.cc
lite/tests/kernels/assign_compute_test.cc
+1
-1
lite/tests/kernels/assign_value_compute_test.cc
lite/tests/kernels/assign_value_compute_test.cc
+1
-1
lite/tests/kernels/cast_compute_test.cc
lite/tests/kernels/cast_compute_test.cc
+1
-1
lite/tests/kernels/compare_compute_test.cc
lite/tests/kernels/compare_compute_test.cc
+123
-124
lite/tests/kernels/fill_constant_batch_size_like_compute_test.cc
...sts/kernels/fill_constant_batch_size_like_compute_test.cc
+1
-1
lite/tests/kernels/fill_constant_compute_test.cc
lite/tests/kernels/fill_constant_compute_test.cc
+1
-1
lite/tests/kernels/gather_compute_test.cc
lite/tests/kernels/gather_compute_test.cc
+1
-1
lite/tests/kernels/increment_compute_test.cc
lite/tests/kernels/increment_compute_test.cc
+11
-6
lite/tests/kernels/lookup_table_compute_test.cc
lite/tests/kernels/lookup_table_compute_test.cc
+1
-1
lite/tests/kernels/lookup_table_dequant_compute_test.cc
lite/tests/kernels/lookup_table_dequant_compute_test.cc
+1
-1
lite/tests/kernels/read_from_array_compute_test.cc
lite/tests/kernels/read_from_array_compute_test.cc
+1
-1
lite/tests/kernels/write_to_array_compute_test.cc
lite/tests/kernels/write_to_array_compute_test.cc
+1
-1
未找到文件。
lite/core/arena/framework.cc
浏览文件 @
9edfecaa
...
...
@@ -59,6 +59,8 @@ void TestCase::CreateInstruction() {
CHECK
(
it
!=
kernels
.
end
())
<<
"failed to create the kernel in "
<<
place_
.
DebugString
()
<<
" with alias: "
<<
alias_
;
// reset final place
place_
=
(
*
it
)
->
place
();
// prepare context
(
*
it
)
->
SetContext
(
std
::
move
(
ctx_
));
instruction_
.
reset
(
new
Instruction
(
op
,
std
::
move
(
*
it
)));
...
...
lite/core/op_lite.cc
浏览文件 @
9edfecaa
...
...
@@ -47,18 +47,19 @@ std::vector<std::unique_ptr<KernelBase>> OpLite::CreateKernels(
return
kernels
;
}
std
::
set
<
Place
>
place_set
;
for
(
auto
place
:
places
)
{
place_set
.
insert
(
place
);
// Pick kernels those support any Precision and any DataLayout
place
.
precision
=
PRECISION
(
kAny
);
place_set
.
insert
(
place
);
place
.
layout
=
DATALAYOUT
(
kAny
);
place_set
.
insert
(
place
);
std
::
set
<
Place
>
expanded_places
(
places
.
begin
(),
places
.
end
());
for
(
auto
&
place
:
places
)
{
// Pick kernels those support any Precision and any DataLayout, For example:
// kARM,kFloat,kNCHW -> kARM,kFloat,kAny; kARM,kAny,kNCHW; kARM,kAny,kAny
expanded_places
.
insert
(
Place
(
place
.
target
,
place
.
precision
,
DATALAYOUT
(
kAny
)));
expanded_places
.
insert
(
Place
(
place
.
target
,
PRECISION
(
kAny
),
place
.
layout
));
expanded_places
.
insert
(
Place
(
place
.
target
,
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
)));
}
std
::
set
<
TargetType
>
targets
;
for
(
auto
place
:
place_set
)
{
for
(
auto
place
:
expanded_places
)
{
pick_kernel
(
place
);
targets
.
insert
(
place
.
target
);
}
...
...
lite/kernels/arm/CMakeLists.txt
浏览文件 @
9edfecaa
...
...
@@ -91,7 +91,6 @@ add_kernel(lookup_table_compute_arm ARM extra SRCS lookup_table_compute.cc DEPS
add_kernel
(
lookup_table_dequant_compute_arm ARM extra SRCS lookup_table_dequant_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
logical_compute_arm ARM extra SRCS logical_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
sequence_softmax_compute_arm ARM extra SRCS sequence_softmax_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
less_than_arm ARM extra SRCS compare_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
while_compute_arm ARM extra SRCS while_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
compare_compute_arm ARM extra SRCS compare_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
topk_compute_arm ARM extra SRCS topk_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
...
...
lite/kernels/arm/compare_compute.cc
浏览文件 @
9edfecaa
...
...
@@ -73,8 +73,6 @@ inline void get_mid_dims(const lite::DDim &x_dims,
(
*
post
)
*=
x_dims
[
i
];
}
}
template
<
template
<
typename
T
>
class
Functor
>
void
CompareCompute
<
Functor
>::
PrepareForRun
()
{}
template
<
template
<
typename
T
>
class
Functor
>
void
CompareCompute
<
Functor
>::
Run
()
{
...
...
@@ -177,7 +175,6 @@ void CompareCompute_int64<Functor>::Run() {
for
(
int
inner_id
=
0
;
inner_id
<
inner_num
;
++
inner_id
)
{
int
index
=
(
outer_id
*
mid_num
+
mid_id
)
*
inner_num
+
inner_id
;
z
[
index
]
=
CompareFunctor
()(
x
[
index
],
y_data
);
// z[index] = x[index] < y_data;
}
}
}
...
...
@@ -189,50 +186,78 @@ void CompareCompute_int64<Functor>::Run() {
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
less_than
,
REGISTER_LITE_KERNEL
(
equal
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
CompareCompute
<
paddle
::
lite
::
kernels
::
arm
::
_
LessThan
Functor
>
,
paddle
::
lite
::
kernels
::
arm
::
_
Equal
Functor
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kBool
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
less_than
,
REGISTER_LITE_KERNEL
(
equal
,
kARM
,
kInt
64
,
kInt
32
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
CompareCompute_int
64
<
paddle
::
lite
::
kernels
::
arm
::
_
LessThan
Functor
>
,
paddle
::
lite
::
kernels
::
arm
::
CompareCompute_int
32
<
paddle
::
lite
::
kernels
::
arm
::
_
Equal
Functor
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt
64
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt
64
))})
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt
32
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt
32
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kBool
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
equal
,
REGISTER_LITE_KERNEL
(
not_equal
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
CompareCompute
<
paddle
::
lite
::
kernels
::
arm
::
_EqualFunctor
>
,
paddle
::
lite
::
kernels
::
arm
::
_
Not
EqualFunctor
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kBool
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
not_equal
,
REGISTER_LITE_KERNEL
(
less_than
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
CompareCompute
<
paddle
::
lite
::
kernels
::
arm
::
_
NotEqual
Functor
>
,
paddle
::
lite
::
kernels
::
arm
::
_
LessThan
Functor
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kBool
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
less_than
,
kARM
,
kInt32
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
CompareCompute_int32
<
paddle
::
lite
::
kernels
::
arm
::
_LessThanFunctor
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kBool
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
less_than
,
kARM
,
kInt64
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
CompareCompute_int64
<
paddle
::
lite
::
kernels
::
arm
::
_LessThanFunctor
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt64
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt64
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kBool
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
less_equal
,
kARM
,
kFloat
,
...
...
@@ -244,6 +269,7 @@ REGISTER_LITE_KERNEL(less_equal,
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kBool
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
greater_than
,
kARM
,
kFloat
,
...
...
@@ -255,6 +281,7 @@ REGISTER_LITE_KERNEL(greater_than,
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kBool
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
greater_equal
,
kARM
,
kFloat
,
...
...
@@ -266,27 +293,3 @@ REGISTER_LITE_KERNEL(greater_equal,
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kBool
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
less_than
,
kARM
,
kInt32
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
CompareCompute_int32
<
paddle
::
lite
::
kernels
::
arm
::
_LessThanFunctor
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kBool
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
equal
,
kARM
,
kInt32
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
CompareCompute_int32
<
paddle
::
lite
::
kernels
::
arm
::
_EqualFunctor
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kBool
))})
.
Finalize
();
lite/kernels/arm/compare_compute.h
浏览文件 @
9edfecaa
...
...
@@ -26,10 +26,6 @@ namespace arm {
template
<
template
<
typename
T
>
class
Functor
>
class
CompareCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
LogicalParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
~
CompareCompute
()
{}
...
...
@@ -39,8 +35,6 @@ template <template <typename T> class Functor>
class
CompareCompute_int32
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kInt32
)
>
{
public:
using
param_t
=
operators
::
LogicalParam
;
void
Run
()
override
;
~
CompareCompute_int32
()
{}
...
...
@@ -50,8 +44,6 @@ template <template <typename T> class Functor>
class
CompareCompute_int64
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kInt64
)
>
{
public:
using
param_t
=
operators
::
LogicalParam
;
void
Run
()
override
;
~
CompareCompute_int64
()
{}
...
...
lite/kernels/npu/bridges/CMakeLists.txt
浏览文件 @
9edfecaa
...
...
@@ -46,6 +46,9 @@ lite_cc_library(subgraph_bridge_dropout_op_npu SRCS dropout_op.cc DEPS ${npu_sub
lite_cc_library
(
subgraph_bridge_layer_norm_op_npu SRCS layer_norm_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_fill_constant_op_npu SRCS fill_constant_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_fill_constant_batch_size_like_op_npu SRCS fill_constant_batch_size_like_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_increment_op_npu SRCS increment_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_compare_op_npu SRCS compare_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
set
(
npu_subgraph_bridges
subgraph_bridge_registry
...
...
@@ -79,6 +82,8 @@ set(npu_subgraph_bridges
subgraph_bridge_layer_norm_op_npu
subgraph_bridge_fill_constant_op_npu
subgraph_bridge_fill_constant_batch_size_like_op_npu
subgraph_bridge_increment_op_npu
subgraph_bridge_compare_op_npu
CACHE INTERNAL
"npu_subgraph_bridges"
)
message
(
STATUS
"+++++ npu_subgraph_bridges:
${
npu_subgraph_bridges
}
"
)
lite/kernels/npu/bridges/compare_op.cc
0 → 100644
浏览文件 @
9edfecaa
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/utility.h"
namespace
paddle
{
namespace
lite
{
namespace
subgraph
{
namespace
npu
{
int
LessThanConverter
(
void
*
ctx
,
OpLite
*
op
,
KernelBase
*
kernel
)
{
CHECK
(
ctx
!=
nullptr
);
CHECK
(
op
!=
nullptr
);
auto
graph
=
static_cast
<
Graph
*>
(
ctx
);
auto
op_info
=
op
->
op_info
();
auto
op_type
=
op_info
->
Type
();
auto
scope
=
op
->
scope
();
VLOG
(
3
)
<<
"[NPU] Converting "
+
op_type
+
"..."
;
// Get input, output and op attributes
auto
x_name
=
op_info
->
Input
(
"X"
).
front
();
auto
x
=
scope
->
FindTensor
(
x_name
);
auto
x_dims
=
x
->
dims
();
auto
y_name
=
op_info
->
Input
(
"Y"
).
front
();
auto
y
=
scope
->
FindTensor
(
y_name
);
auto
y_dims
=
y
->
dims
();
auto
out_name
=
op_info
->
Output
(
"Out"
).
front
();
// X node
std
::
shared_ptr
<
Node
>
x_node
=
nullptr
;
if
(
graph
->
Has
(
x_name
))
{
x_node
=
graph
->
Get
(
x_name
);
}
else
{
x_node
=
graph
->
Add
(
x_name
,
*
x
);
}
// Y node
std
::
shared_ptr
<
Node
>
y_node
=
nullptr
;
if
(
graph
->
Has
(
y_name
))
{
y_node
=
graph
->
Get
(
y_name
);
}
else
{
y_node
=
graph
->
Add
(
y_name
,
*
y
);
}
// add node
auto
less_than_node
=
graph
->
Add
<
ge
::
op
::
Less
>
(
out_name
,
PRECISION
(
kBool
));
auto
less_than_op
=
less_than_node
->
data
<
ge
::
op
::
Less
>
();
less_than_op
->
set_input_x1
(
*
x_node
->
data
());
less_than_op
->
set_input_x2
(
*
y_node
->
data
());
return
REBUILD_WHEN_SHAPE_CHANGED
;
}
}
// namespace npu
}
// namespace subgraph
}
// namespace lite
}
// namespace paddle
REGISTER_SUBGRAPH_BRIDGE
(
less_than
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
LessThanConverter
);
lite/kernels/npu/bridges/increment_op.cc
0 → 100644
浏览文件 @
9edfecaa
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/utility.h"
namespace
paddle
{
namespace
lite
{
namespace
subgraph
{
namespace
npu
{
int
IncrementConverter
(
void
*
ctx
,
OpLite
*
op
,
KernelBase
*
kernel
)
{
CHECK
(
ctx
!=
nullptr
);
CHECK
(
op
!=
nullptr
);
auto
graph
=
static_cast
<
Graph
*>
(
ctx
);
auto
op_info
=
op
->
op_info
();
auto
op_type
=
op_info
->
Type
();
auto
scope
=
op
->
scope
();
VLOG
(
3
)
<<
"[NPU] Converting "
+
op_type
+
"..."
;
// Get input, output and op attributes
auto
x_name
=
op_info
->
Input
(
"X"
).
front
();
auto
x
=
scope
->
FindTensor
(
x_name
);
auto
x_dims
=
x
->
dims
();
auto
out_name
=
op_info
->
Output
(
"Out"
).
front
();
float
step
=
op_info
->
GetAttr
<
float
>
(
"step"
);
// X node
std
::
shared_ptr
<
Node
>
x_node
=
nullptr
;
if
(
graph
->
Has
(
x_name
))
{
x_node
=
graph
->
Get
(
x_name
);
}
else
{
x_node
=
graph
->
Add
(
x_name
,
*
x
,
CvtShape
(
x_dims
));
}
// Y node
Tensor
y
;
y
.
Resize
({
1
});
auto
y_data
=
y
.
mutable_data
<
float
>
();
y_data
[
0
]
=
step
;
y
.
set_persistable
(
true
);
auto
y_node
=
graph
->
Add
(
out_name
+
"/y"
,
y
);
// add node
auto
increment_node
=
graph
->
Add
<
ge
::
op
::
Add
>
(
out_name
);
auto
increment_op
=
increment_node
->
data
<
ge
::
op
::
Add
>
();
increment_op
->
set_input_x1
(
*
x_node
->
data
());
increment_op
->
set_input_x2
(
*
y_node
->
data
());
return
REBUILD_WHEN_SHAPE_CHANGED
;
}
}
// namespace npu
}
// namespace subgraph
}
// namespace lite
}
// namespace paddle
REGISTER_SUBGRAPH_BRIDGE
(
increment
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
IncrementConverter
);
lite/kernels/npu/bridges/paddle_use_bridges.h
浏览文件 @
9edfecaa
...
...
@@ -23,6 +23,7 @@ USE_SUBGRAPH_BRIDGE(softsign, kNPU);
USE_SUBGRAPH_BRIDGE
(
hard_sigmoid
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
batch_norm
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
less_than
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
concat
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
conv2d
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
depthwise_conv2d
,
kNPU
);
...
...
@@ -40,9 +41,12 @@ USE_SUBGRAPH_BRIDGE(fusion_elementwise_div_activation, kNPU);
USE_SUBGRAPH_BRIDGE
(
fill_constant
,
kNPU
)
USE_SUBGRAPH_BRIDGE
(
fill_constant_batch_size_like
,
kNPU
)
USE_SUBGRAPH_BRIDGE
(
increment
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
instance_norm
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
fc
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
bilinear_interp
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
nearest_interp
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
layer_norm
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
matmul
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
mul
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
pad2d
,
kNPU
);
...
...
@@ -60,5 +64,3 @@ USE_SUBGRAPH_BRIDGE(transpose, kNPU);
USE_SUBGRAPH_BRIDGE
(
transpose2
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
unsqueeze
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
unsqueeze2
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
instance_norm
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
layer_norm
,
kNPU
);
lite/kernels/npu/bridges/scale_op_test.cc
已删除
100644 → 0
浏览文件 @
ff332144
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/scale_op.h"
#include <gtest/gtest.h>
#include <random>
#include "lite/core/op_registry.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/test_helper.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
npu
{
namespace
bridges
{
void
scale_ref
(
const
std
::
shared_ptr
<
operators
::
ScaleOp
>
op
)
{
Scope
*
scope
=
op
->
scope
();
const
OpInfo
*
op_info
=
op
->
op_info
();
auto
x
=
scope
->
FindVar
(
op_info
->
Input
(
"X"
).
front
())
->
GetMutable
<
Tensor
>
();
auto
out
=
scope
->
FindVar
(
op_info
->
Output
(
"Out"
).
front
())
->
GetMutable
<
Tensor
>
();
float
scale
=
op_info
->
GetAttr
<
float
>
(
"scale"
);
float
bias
=
op_info
->
GetAttr
<
float
>
(
"bias"
);
bool
bias_after_scale
=
op_info
->
GetAttr
<
bool
>
(
"bias_after_scale"
);
if
(
!
bias_after_scale
)
{
bias
*=
scale
;
}
auto
x_data
=
x
->
data
<
float
>
();
auto
out_data
=
out
->
mutable_data
<
float
>
();
DDim
x_dims
=
x
->
dims
();
DDim
out_dims
=
out
->
dims
();
CHECK_EQ
(
x_dims
.
production
(),
out_dims
.
production
());
for
(
int
i
=
0
;
i
<
out_dims
.
production
();
i
++
)
{
out_data
[
i
]
=
x_data
[
i
]
*
scale
+
bias
;
}
}
void
test_scale
(
int
bs
,
int
ic
,
int
ih
,
int
iw
,
bool
bias_after_scale
,
float
scale
,
float
bias
)
{
// prepare input&output variables
Scope
scope
;
std
::
string
x_var_name
(
"x"
);
std
::
string
out_var_name
(
"out"
);
std
::
string
out_ref_var_name
(
"out_ref"
);
auto
*
x
=
scope
.
Var
(
x_var_name
)
->
GetMutable
<
Tensor
>
();
auto
*
out
=
scope
.
Var
(
out_var_name
)
->
GetMutable
<
Tensor
>
();
auto
*
out_ref
=
scope
.
Var
(
out_ref_var_name
)
->
GetMutable
<
Tensor
>
();
x
->
Resize
({
bs
,
ic
,
ih
,
iw
});
// initialize input&output data
FillTensor
<
float
,
int
>
(
x
);
// initialize op desc
cpp
::
OpDesc
opdesc
;
opdesc
.
SetType
(
"scale"
);
opdesc
.
SetInput
(
"X"
,
{
x_var_name
});
opdesc
.
SetOutput
(
"Out"
,
{
out_var_name
});
opdesc
.
SetAttr
(
"bias_after_scale"
,
bias_after_scale
);
opdesc
.
SetAttr
(
"scale"
,
scale
);
opdesc
.
SetAttr
(
"bias"
,
bias
);
// create and convert op to NPU model, then run it on NPU
auto
op
=
CreateOp
<
operators
::
ScaleOp
>
(
opdesc
,
&
scope
);
LauchOp
(
op
,
{
x_var_name
},
{
out_var_name
});
out_ref
->
CopyDataFrom
(
*
out
);
// execute reference implementation and save to output tensor('out')
scale_ref
(
op
);
// compare results
auto
*
out_data
=
out
->
mutable_data
<
float
>
();
auto
*
out_ref_data
=
out_ref
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
out
->
dims
().
production
();
i
++
)
{
VLOG
(
5
)
<<
i
;
EXPECT_NEAR
(
out_data
[
i
],
out_ref_data
[
i
],
1e-5
);
}
}
TEST
(
NPUBridges
,
scale
)
{
for
(
auto
bs
:
{
1
,
3
})
{
for
(
auto
ic
:
{
1
,
3
})
{
for
(
auto
ih
:
{
3
,
4
})
{
for
(
auto
iw
:
{
4
,
3
})
{
for
(
auto
bias_after_scale
:
{
true
,
false
})
{
for
(
auto
scale
:
{
-
1.0
f
,
5.0
f
})
{
for
(
auto
bias
:
{
-
2.0
f
,
30.0
f
})
{
VLOG
(
3
)
<<
"bs: "
<<
bs
<<
" ic: "
<<
ic
<<
" ih: "
<<
ih
<<
" iw: "
<<
iw
<<
" bias_after_scale: "
<<
bias_after_scale
<<
" scale: "
<<
scale
<<
" bias: "
<<
bias
;
test_scale
(
bs
,
ic
,
ih
,
iw
,
bias_after_scale
,
scale
,
bias
);
}
}
}
}
}
}
}
}
}
// namespace bridges
}
// namespace npu
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_OP
(
scale
);
USE_NPU_BRIDGE
(
scale
);
lite/kernels/npu/subgraph_compute.cc
浏览文件 @
9edfecaa
...
...
@@ -149,6 +149,9 @@ int SubgraphEngine::BuildDeviceProgram() {
case
PRECISION
(
kFloat
):
origin_otensors_
[
i
]
->
mutable_data
<
float
>
();
break
;
case
PRECISION
(
kBool
):
origin_otensors_
[
i
]
->
mutable_data
<
bool
>
();
break
;
case
PRECISION
(
kInt8
):
origin_otensors_
[
i
]
->
mutable_data
<
int8_t
>
();
break
;
...
...
@@ -231,10 +234,12 @@ void SubgraphCompute::Run() {
REGISTER_LITE_KERNEL
(
subgraph
,
kNPU
,
k
Float
,
k
Any
,
kNCHW
,
paddle
::
lite
::
kernels
::
npu
::
SubgraphCompute
,
def
)
.
BindInput
(
"Inputs"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindOutput
(
"Outputs"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindInput
(
"Inputs"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
),
PRECISION
(
kAny
))})
.
BindOutput
(
"Outputs"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
),
PRECISION
(
kAny
))})
.
Finalize
();
lite/kernels/npu/subgraph_compute.h
浏览文件 @
9edfecaa
...
...
@@ -51,7 +51,7 @@ class SubgraphEngine : public subgraph::Engine {
std
::
unique_ptr
<
hiai
::
AiModelMngerClient
>
device_program_
{
nullptr
};
};
class
SubgraphCompute
:
public
KernelLite
<
TARGET
(
kNPU
),
PRECISION
(
k
Float
)
>
{
class
SubgraphCompute
:
public
KernelLite
<
TARGET
(
kNPU
),
PRECISION
(
k
Any
)
>
{
public:
using
param_t
=
operators
::
SubgraphParam
;
...
...
lite/tests/kernels/CMakeLists.txt
浏览文件 @
9edfecaa
...
...
@@ -19,7 +19,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_
lite_cc_test
(
test_kernel_grid_sampler_compute SRCS grid_sampler_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
#lite_cc_test(test_kernel_sequence_softmax_compute SRCS sequence_softmax_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_im2sequence_compute SRCS im2sequence_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_compare_compute SRCS compare_compute_test.cc DEPS arena_framework
${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test
(
test_kernel_compare_compute SRCS compare_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
#lite_cc_test(test_kernel_logical_xor_compute SRCS logical_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_topk_compute SRCS topk_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test
(
test_kernel_increment_compute SRCS increment_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
...
...
lite/tests/kernels/assign_compute_test.cc
浏览文件 @
9edfecaa
...
...
@@ -69,7 +69,7 @@ void TestAssign(const Place& place) {
TEST
(
Assign
,
precision
)
{
Place
place
;
#ifdef LITE_WITH_ARM
place
=
{
TARGET
(
kARM
),
PRECISION
(
kAny
)}
;
place
=
TARGET
(
kARM
)
;
#else
return
;
#endif
...
...
lite/tests/kernels/assign_value_compute_test.cc
浏览文件 @
9edfecaa
...
...
@@ -97,7 +97,7 @@ class AssignValueComputeTester : public arena::TestCase {
TEST
(
AssignValue
,
precision
)
{
Place
place
;
#ifdef LITE_WITH_ARM
place
=
{
TARGET
(
kARM
),
PRECISION
(
kAny
)}
;
place
=
TARGET
(
kARM
)
;
#else
return
;
#endif
...
...
lite/tests/kernels/cast_compute_test.cc
浏览文件 @
9edfecaa
...
...
@@ -134,7 +134,7 @@ TEST(Cast, precision) {
Place
place
;
float
abs_error
=
2e-5
;
#if defined(LITE_WITH_ARM)
place
=
{
TARGET
(
kARM
),
PRECISION
(
kAny
)}
;
place
=
TARGET
(
kARM
)
;
#elif defined(LITE_WITH_XPU)
place
=
TARGET
(
kXPU
);
#else
...
...
lite/tests/kernels/compare_compute_test.cc
浏览文件 @
9edfecaa
...
...
@@ -16,12 +16,14 @@
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h"
#include "lite/tests/utils/fill_data.h"
namespace
paddle
{
namespace
lite
{
#define COMPARE_FUNCTOR(name, op) \
template <typename T> \
struct
_##name##Functor {
\
struct
name##Functor {
\
inline bool operator()(const T& a, const T& b) const { return a op b; } \
};
...
...
@@ -33,7 +35,7 @@ COMPARE_FUNCTOR(GreaterThan, >);
COMPARE_FUNCTOR
(
GreaterEqual
,
>=
);
template
<
>
struct
_
EqualFunctor
<
float
>
{
struct
EqualFunctor
<
float
>
{
inline
bool
operator
()(
const
float
&
a
,
const
float
&
b
)
const
{
// It is safe to cast a and b to double.
return
fabs
(
static_cast
<
double
>
(
a
-
b
))
<
1e-8
;
...
...
@@ -41,59 +43,56 @@ struct _EqualFunctor<float> {
};
template
<
>
struct
_
NotEqualFunctor
<
float
>
{
struct
NotEqualFunctor
<
float
>
{
inline
bool
operator
()(
const
float
&
a
,
const
float
&
b
)
const
{
return
!
_
EqualFunctor
<
float
>
()(
a
,
b
);
return
!
EqualFunctor
<
float
>
()(
a
,
b
);
}
};
template
<
t
emplate
<
typename
T
>
class
Functor
>
class
LessThan
Tester
:
public
arena
::
TestCase
{
template
<
t
ypename
T
,
template
<
typename
U
>
class
Functor
>
class
CompareCompute
Tester
:
public
arena
::
TestCase
{
protected:
std
::
string
input_x_
=
"x"
;
std
::
string
input_y_
=
"y"
;
std
::
string
output_
=
"out"
;
int
axis_
=
1
;
bool
force_cpu_
=
0
;
std
::
string
x_
=
"x"
;
std
::
string
y_
=
"y"
;
std
::
string
out_
=
"out"
;
std
::
string
op_
=
"less_than"
;
DDim
x_dims_
{{
3
,
5
,
4
,
4
}};
DDim
y_dims_
{{
4
}};
std
::
string
opname_
=
"less_than"
;
int
axis_
=
-
1
;
bool
force_cpu_
=
false
;
public:
LessThan
Tester
(
const
Place
&
place
,
CompareCompute
Tester
(
const
Place
&
place
,
const
std
::
string
&
alias
,
bool
force_cpu
,
int
axis
,
const
std
::
string
op
,
DDim
x_dims
,
DDim
y_dims
,
const
std
::
string
&
opname
)
int
axis
=
-
1
)
:
TestCase
(
place
,
alias
),
axis_
(
axis
),
force_cpu_
(
force_cpu
),
op_
(
op
),
x_dims_
(
x_dims
),
y_dims_
(
y_dims
),
opname_
(
opname
)
{}
axis_
(
axis
)
{}
void
RunBaseline
(
Scope
*
scope
)
override
{
auto
*
out
=
scope
->
NewTensor
(
out
put
_
);
auto
*
out
=
scope
->
NewTensor
(
out_
);
CHECK
(
out
);
out
->
Resize
(
x_dims_
);
auto
*
out_data
=
out
->
mutable_data
<
bool
>
();
auto
axis
=
axis_
;
auto
*
x
=
scope
->
FindTensor
(
input_
x_
);
const
auto
*
x_data
=
x
->
data
<
float
>
();
auto
*
y
=
scope
->
FindTensor
(
input_
y_
);
auto
*
y_data_in
=
y
->
data
<
float
>
();
auto
*
x
=
scope
->
FindTensor
(
x_
);
const
auto
*
x_data
=
x
->
data
<
T
>
();
auto
*
y
=
scope
->
FindTensor
(
y_
);
auto
*
y_data_in
=
y
->
data
<
T
>
();
using
CompareFunc
=
Functor
<
float
>
;
using
CompareFunc
=
Functor
<
T
>
;
if
(
x_dims_
.
size
()
==
y_dims_
.
size
())
{
for
(
int
i
=
0
;
i
<
x_dims_
.
production
();
i
++
)
{
// out_data[i] = x_data[i] < y_data[i];
out_data
[
i
]
=
CompareFunc
()(
x_data
[
i
],
y_data_in
[
i
]);
}
}
else
{
auto
*
y_data
=
reinterpret_cast
<
float
*>
(
malloc
(
x_dims_
.
production
()
*
sizeof
(
float
)));
auto
*
y_data
=
reinterpret_cast
<
T
*>
(
malloc
(
x_dims_
.
production
()
*
sizeof
(
T
)));
if
(
axis
<
0
)
{
axis
=
x_dims_
.
size
()
-
y_dims_
.
size
();
...
...
@@ -111,12 +110,12 @@ class LessThanTester : public arena::TestCase {
num
*=
x_dims_
[
i
];
}
int
ysize
=
channels
*
num
;
float
*
y_data_t
=
reinterpret_cast
<
float
*>
(
y_data
);
T
*
y_data_t
=
reinterpret_cast
<
T
*>
(
y_data
);
if
(
num
==
1
)
{
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
memcpy
(
reinterpret_cast
<
void
*>
(
y_data_t
),
reinterpret_cast
<
const
void
*>
(
&
y_data_in
[
0
]),
ysize
*
sizeof
(
float
));
ysize
*
sizeof
(
T
));
y_data_t
+=
ysize
;
}
...
...
@@ -126,118 +125,118 @@ class LessThanTester : public arena::TestCase {
y_data_t
[
i
*
num
+
j
]
=
y_data_in
[
i
];
}
}
float
*
tempptr
=
y_data_t
;
T
*
tempptr
=
y_data_t
;
for
(
int
i
=
0
;
i
<
batch
;
++
i
)
{
memcpy
(
y_data_t
,
tempptr
,
ysize
*
sizeof
(
float
));
memcpy
(
y_data_t
,
tempptr
,
ysize
*
sizeof
(
T
));
y_data_t
+=
ysize
;
}
}
for
(
int
i
=
0
;
i
<
x_dims_
.
production
();
i
++
)
{
// out_data[i] = x_data[i] < y_data[i];
out_data
[
i
]
=
CompareFunc
()(
x_data
[
i
],
y_data
[
i
]);
}
}
}
void
PrepareOpDesc
(
cpp
::
OpDesc
*
op_desc
)
{
op_desc
->
SetType
(
op
name
_
);
op_desc
->
SetInput
(
"X"
,
{
input_
x_
});
op_desc
->
SetInput
(
"Y"
,
{
input_
y_
});
op_desc
->
SetOutput
(
"Out"
,
{
out
put
_
});
op_desc
->
SetType
(
op_
);
op_desc
->
SetInput
(
"X"
,
{
x_
});
op_desc
->
SetInput
(
"Y"
,
{
y_
});
op_desc
->
SetOutput
(
"Out"
,
{
out_
});
op_desc
->
SetAttr
(
"axis"
,
axis_
);
op_desc
->
SetAttr
(
"force_cpu"
,
force_cpu_
);
}
void
PrepareData
()
override
{
std
::
vector
<
float
>
data
(
x_dims_
.
production
());
std
::
vector
<
float
>
datay
(
y_dims_
.
production
());
// datay(dims_.production());
for
(
int
i
=
0
;
i
<
x_dims_
.
production
();
i
++
)
{
data
[
i
]
=
1.1
;
}
for
(
int
i
=
0
;
i
<
y_dims_
.
production
();
i
++
)
{
datay
[
i
]
=
i
;
}
SetCommonTensor
(
input_x_
,
x_dims_
,
data
.
data
());
SetCommonTensor
(
input_y_
,
y_dims_
,
datay
.
data
());
std
::
vector
<
T
>
dx
(
x_dims_
.
production
());
std
::
vector
<
T
>
dy
(
y_dims_
.
production
());
fill_data_rand
<
T
>
(
dx
.
data
(),
-
5
,
5
,
x_dims_
.
production
());
fill_data_rand
<
T
>
(
dy
.
data
(),
-
5
,
5
,
y_dims_
.
production
());
SetCommonTensor
(
x_
,
x_dims_
,
dx
.
data
());
SetCommonTensor
(
y_
,
y_dims_
,
dy
.
data
());
}
};
void
test_compare
(
Place
place
)
{
for
(
bool
force_cpu
:
{
0
})
{
for
(
auto
n
:
{
1
,
3
,
4
})
{
for
(
auto
c
:
{
1
,
3
,
4
})
{
for
(
auto
h
:
{
1
,
3
,
4
})
{
for
(
auto
w
:
{
1
,
3
,
4
})
{
for
(
auto
axis
:
{
-
1
,
0
,
1
,
3
})
{
for
(
auto
yd
:
{
std
::
vector
<
int64_t
>
({
n
}),
std
::
vector
<
int64_t
>
({
c
}),
std
::
vector
<
int64_t
>
({
h
}),
std
::
vector
<
int64_t
>
({
w
}),
std
::
vector
<
int64_t
>
({
n
,
c
}),
std
::
vector
<
int64_t
>
({
h
,
w
}),
std
::
vector
<
int64_t
>
({
n
,
c
,
h
}),
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})})
{
DDimLite
x_dims
=
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
}));
DDimLite
y_dims
=
DDim
(
yd
);
int
axis_t
=
axis
<
0
?
x_dims
.
size
()
-
y_dims
.
size
()
:
axis
;
if
(
axis_t
+
y_dims
.
size
()
>
4
)
continue
;
bool
flag
=
false
;
for
(
int
i
=
0
;
i
<
y_dims
.
size
();
i
++
)
{
if
(
x_dims
[
i
+
axis_t
]
!=
y_dims
[
i
])
flag
=
true
;
}
if
(
flag
)
continue
;
std
::
unique_ptr
<
arena
::
TestCase
>
less_than_tester
(
new
LessThanTester
<
paddle
::
lite
::
_LessThanFunctor
>
(
place
,
"def"
,
force_cpu
,
axis
,
x_dims
,
y_dims
,
"less_than"
));
arena
::
Arena
less_than_arena
(
std
::
move
(
less_than_tester
),
place
,
0.001
);
less_than_arena
.
TestPrecision
();
std
::
unique_ptr
<
arena
::
TestCase
>
equal_tester
(
new
LessThanTester
<
paddle
::
lite
::
_EqualFunctor
>
(
place
,
"def"
,
force_cpu
,
axis
,
x_dims
,
y_dims
,
"equal"
));
arena
::
Arena
equal_arena
(
std
::
move
(
equal_tester
),
place
,
0.001
);
equal_arena
.
TestPrecision
();
std
::
unique_ptr
<
arena
::
TestCase
>
greater_than_tester
(
new
LessThanTester
<
paddle
::
lite
::
_GreaterThanFunctor
>
(
place
,
"def"
,
force_cpu
,
axis
,
x_dims
,
y_dims
,
"greater_than"
));
arena
::
Arena
greater_than_arena
(
std
::
move
(
greater_than_tester
),
place
,
0.001
);
greater_than_arena
.
TestPrecision
();
}
}
}
}
template
<
typename
T
>
void
TestCompare
(
Place
place
,
float
abs_error
,
std
::
string
op
,
std
::
vector
<
int64_t
>
x_dims
,
std
::
vector
<
int64_t
>
y_dims
,
int
axis
)
{
if
(
typeid
(
T
)
==
typeid
(
float
))
{
place
.
precision
=
PRECISION
(
kFloat
);
}
else
if
(
typeid
(
T
)
==
typeid
(
int32_t
))
{
place
.
precision
=
PRECISION
(
kInt32
);
}
else
if
(
typeid
(
T
)
==
typeid
(
int64_t
))
{
place
.
precision
=
PRECISION
(
kInt64
);
}
else
{
LOG
(
FATAL
)
<<
"unsupported dtype"
;
}
std
::
unique_ptr
<
arena
::
TestCase
>
tester
=
nullptr
;
if
(
op
==
"equal"
)
{
tester
=
static_cast
<
std
::
unique_ptr
<
arena
::
TestCase
>>
(
new
CompareComputeTester
<
T
,
EqualFunctor
>
(
place
,
"def"
,
op
,
DDim
(
x_dims
),
DDim
(
y_dims
),
axis
));
}
else
if
(
op
==
"not_equal"
)
{
tester
=
static_cast
<
std
::
unique_ptr
<
arena
::
TestCase
>>
(
new
CompareComputeTester
<
T
,
NotEqualFunctor
>
(
place
,
"def"
,
op
,
DDim
(
x_dims
),
DDim
(
y_dims
),
axis
));
}
else
if
(
op
==
"less_than"
)
{
tester
=
static_cast
<
std
::
unique_ptr
<
arena
::
TestCase
>>
(
new
CompareComputeTester
<
T
,
LessThanFunctor
>
(
place
,
"def"
,
op
,
DDim
(
x_dims
),
DDim
(
y_dims
),
axis
));
}
else
if
(
op
==
"less_equal"
)
{
tester
=
static_cast
<
std
::
unique_ptr
<
arena
::
TestCase
>>
(
new
CompareComputeTester
<
T
,
LessEqualFunctor
>
(
place
,
"def"
,
op
,
DDim
(
x_dims
),
DDim
(
y_dims
),
axis
));
}
else
if
(
op
==
"greater_than"
)
{
tester
=
static_cast
<
std
::
unique_ptr
<
arena
::
TestCase
>>
(
new
CompareComputeTester
<
T
,
GreaterThanFunctor
>
(
place
,
"def"
,
op
,
DDim
(
x_dims
),
DDim
(
y_dims
),
axis
));
}
else
if
(
op
==
"greater_equal"
)
{
tester
=
static_cast
<
std
::
unique_ptr
<
arena
::
TestCase
>>
(
new
CompareComputeTester
<
T
,
GreaterEqualFunctor
>
(
place
,
"def"
,
op
,
DDim
(
x_dims
),
DDim
(
y_dims
),
axis
));
}
else
{
LOG
(
FATAL
)
<<
"unsupported type"
;
}
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
#if defined(LITE_WITH_NPU)
TEST
(
Compare_OP_NPU
,
precision
)
{
Place
place
{
TARGET
(
kNPU
)};
float
abs_error
=
1e-2
;
TestCompare
<
float
>
(
place
,
abs_error
,
"less_than"
,
{
2
,
3
,
4
,
5
},
{
2
,
3
,
4
,
5
},
-
1
);
TestCompare
<
float
>
(
place
,
abs_error
,
"less_than"
,
{
2
,
3
,
4
},
{
2
,
3
,
4
},
0
);
}
#elif defined(LITE_WITH_ARM)
TEST
(
Compare_OP_ARM
,
precision
)
{
Place
place
{
TARGET
(
kARM
)};
float
abs_error
=
1e-5
;
for
(
auto
op
:
std
::
vector
<
std
::
string
>
{
"equal"
,
"not_equal"
,
"less_than"
,
"less_equal"
,
"greater_than"
,
"greater_equal"
})
{
TestCompare
<
float
>
(
place
,
abs_error
,
op
,
{
2
,
3
,
4
,
5
},
{
2
,
3
,
4
,
5
},
-
1
);
TestCompare
<
float
>
(
place
,
abs_error
,
op
,
{
2
,
3
,
4
},
{
2
,
3
,
4
},
0
);
}
TestCompare
<
float
>
(
place
,
abs_error
,
"equal"
,
{
2
,
3
,
4
},
{
3
,
4
},
1
);
TestCompare
<
float
>
(
place
,
abs_error
,
"equal"
,
{
2
,
3
,
4
,
5
},
{
3
,
4
},
1
);
TestCompare
<
float
>
(
place
,
abs_error
,
"equal"
,
{
2
,
3
,
4
},
{
4
},
2
);
TestCompare
<
float
>
(
place
,
abs_error
,
"equal"
,
{
2
,
3
,
4
,
5
},
{
5
},
3
);
TestCompare
<
int32_t
>
(
place
,
abs_error
,
"less_than"
,
{
3
,
4
},
{
3
,
4
},
-
1
);
TestCompare
<
int64_t
>
(
place
,
abs_error
,
"less_than"
,
{
3
,
4
},
{
3
,
4
},
-
1
);
}
TEST
(
Compare_OP
,
precision
)
{
// #ifdef LITE_WITH_X86
// // Place place(TARGET(kX86));
// // #endif
#ifdef LITE_WITH_ARM
Place
place
(
TARGET
(
kARM
));
test_compare
(
place
);
#endif
}
}
// namespace lite
}
// namespace paddle
lite/tests/kernels/fill_constant_batch_size_like_compute_test.cc
浏览文件 @
9edfecaa
...
...
@@ -136,7 +136,7 @@ TEST(fill_constant_batch_size_like, precision) {
place
=
TARGET
(
kNPU
);
abs_error
=
1e-2
;
// use fp16 in npu
#elif defined(LITE_WITH_ARM)
place
=
{
TARGET
(
kARM
),
PRECISION
(
kAny
)}
;
place
=
TARGET
(
kARM
)
;
#else
return
;
#endif
...
...
lite/tests/kernels/fill_constant_compute_test.cc
浏览文件 @
9edfecaa
...
...
@@ -174,7 +174,7 @@ TEST(fill_constant, precision) {
place
=
TARGET
(
kNPU
);
abs_error
=
1e-2
;
// use fp16 in npu
#elif defined(LITE_WITH_ARM)
place
=
{
TARGET
(
kARM
),
PRECISION
(
kAny
)}
;
place
=
TARGET
(
kARM
)
;
#else
return
;
#endif
...
...
lite/tests/kernels/gather_compute_test.cc
浏览文件 @
9edfecaa
...
...
@@ -95,7 +95,7 @@ TEST(Gather, precision) {
float
abs_error
=
2e-5
;
Place
place
;
#if defined(LITE_WITH_ARM)
place
=
{
TARGET
(
kARM
),
PRECISION
(
kAny
)}
;
place
=
TARGET
(
kARM
)
;
#elif defined(LITE_WITH_XPU)
place
=
TARGET
(
kXPU
);
#else
...
...
lite/tests/kernels/increment_compute_test.cc
浏览文件 @
9edfecaa
...
...
@@ -66,12 +66,14 @@ class IncrementComputeTester : public arena::TestCase {
};
void
test_increment
(
Place
place
,
float
abs_error
)
{
DDimLite
dims_0
{{
3
,
5
,
4
,
4
}};
DDimLite
dims_1
{{
3
,
5
}};
for
(
auto
dims
:
{
dims_0
,
dims_1
})
{
std
::
vector
<
std
::
vector
<
int64_t
>>
x_dims
{{
3
,
5
,
4
,
4
},
{
3
,
5
},
{
1
}};
for
(
auto
dims
:
x_dims
)
{
for
(
float
step
:
{
1
,
2
})
{
#if LITE_WITH_NPU
if
(
dims
.
size
()
!=
1
)
continue
;
#endif
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
IncrementComputeTester
(
place
,
"def"
,
step
,
dims
));
new
IncrementComputeTester
(
place
,
"def"
,
step
,
DDim
(
dims
)
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
...
...
@@ -81,8 +83,11 @@ void test_increment(Place place, float abs_error) {
TEST
(
Increment
,
precision
)
{
Place
place
;
float
abs_error
=
2e-5
;
#if defined(LITE_WITH_ARM)
place
=
{
TARGET
(
kARM
),
PRECISION
(
kAny
)};
#if defined(LITE_WITH_NPU)
place
=
TARGET
(
kNPU
);
abs_error
=
1e-2
;
// use fp16 in npu
#elif defined(LITE_WITH_ARM)
place
=
TARGET
(
kARM
);
#else
return
;
#endif
...
...
lite/tests/kernels/lookup_table_compute_test.cc
浏览文件 @
9edfecaa
...
...
@@ -112,7 +112,7 @@ TEST(LookupTable, precision) {
float
abs_error
=
2e-5
;
Place
place
;
#if defined(LITE_WITH_ARM)
place
=
{
TARGET
(
kARM
),
PRECISION
(
kAny
)}
;
place
=
TARGET
(
kARM
)
;
#elif defined(LITE_WITH_XPU)
place
=
TARGET
(
kXPU
);
#else
...
...
lite/tests/kernels/lookup_table_dequant_compute_test.cc
浏览文件 @
9edfecaa
...
...
@@ -129,7 +129,7 @@ class LookupTableDequantComputeTest : public arena::TestCase {
TEST
(
LookupTableDequant
,
precision
)
{
#ifdef LITE_WITH_ARM
float
abs_error
=
2e-5
;
Place
place
=
{
TARGET
(
kARM
),
PRECISION
(
kAny
)}
;
Place
place
=
TARGET
(
kARM
)
;
for
(
auto
ids_dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{{
5
,
2
,
3
,
1
},
{
2
,
3
,
1
},
{
3
,
1
}})
{
for
(
auto
w_dims
:
...
...
lite/tests/kernels/read_from_array_compute_test.cc
浏览文件 @
9edfecaa
...
...
@@ -88,7 +88,7 @@ TEST(ReadFromArray, precision) {
Place
place
;
float
abs_error
=
1e-5
;
#ifdef LITE_WITH_ARM
place
=
{
TARGET
(
kARM
),
PRECISION
(
kAny
)}
;
place
=
TARGET
(
kARM
)
;
#else
return
;
#endif
...
...
lite/tests/kernels/write_to_array_compute_test.cc
浏览文件 @
9edfecaa
...
...
@@ -85,7 +85,7 @@ TEST(WriteToArray, precision) {
Place
place
;
float
abs_error
=
1e-5
;
#ifdef LITE_WITH_ARM
place
=
{
TARGET
(
kARM
),
PRECISION
(
kAny
)}
;
place
=
TARGET
(
kARM
)
;
#else
return
;
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录