Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
b49d95b1
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b49d95b1
编写于
3月 18, 2020
作者:
Z
zhupengyang
提交者:
GitHub
3月 18, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] add topk, log op bridge (#3216)
上级
327c62e0
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
296 addition
and
635 deletion
+296
-635
lite/kernels/arm/topk_compute.cc
lite/kernels/arm/topk_compute.cc
+1
-1
lite/kernels/host/use_kernels.h
lite/kernels/host/use_kernels.h
+0
-21
lite/kernels/npu/bridges/CMakeLists.txt
lite/kernels/npu/bridges/CMakeLists.txt
+2
-4
lite/kernels/npu/bridges/act_op.cc
lite/kernels/npu/bridges/act_op.cc
+76
-26
lite/kernels/npu/bridges/paddle_use_bridges.h
lite/kernels/npu/bridges/paddle_use_bridges.h
+4
-2
lite/kernels/npu/bridges/sqrt_op_test.cc
lite/kernels/npu/bridges/sqrt_op_test.cc
+0
-93
lite/kernels/npu/bridges/square_op.cc
lite/kernels/npu/bridges/square_op.cc
+0
-61
lite/kernels/npu/bridges/square_op_test.cc
lite/kernels/npu/bridges/square_op_test.cc
+0
-92
lite/kernels/npu/bridges/topk_op.cc
lite/kernels/npu/bridges/topk_op.cc
+18
-10
lite/operators/topk_op.cc
lite/operators/topk_op.cc
+13
-12
lite/tests/kernels/CMakeLists.txt
lite/tests/kernels/CMakeLists.txt
+1
-1
lite/tests/kernels/activation_compute_test.cc
lite/tests/kernels/activation_compute_test.cc
+120
-258
lite/tests/kernels/topk_compute_test.cc
lite/tests/kernels/topk_compute_test.cc
+61
-54
未找到文件。
lite/kernels/arm/topk_compute.cc
浏览文件 @
b49d95b1
...
...
@@ -44,5 +44,5 @@ REGISTER_LITE_KERNEL(
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Indices"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt
32
))})
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt
64
))})
.
Finalize
();
lite/kernels/host/use_kernels.h
已删除
100644 → 0
浏览文件 @
327c62e0
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "lite/core/op_registry.h"
USE_LITE_KERNEL
(
feed
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
reshape
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
reshape2
,
kHost
,
kAny
,
kAny
,
def
);
lite/kernels/npu/bridges/CMakeLists.txt
浏览文件 @
b49d95b1
...
...
@@ -36,13 +36,12 @@ lite_cc_library(subgraph_bridge_split_op_npu SRCS split_op.cc DEPS ${npu_subgrap
lite_cc_library
(
subgraph_bridge_concat_op_npu SRCS concat_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_shuffle_channel_op_npu SRCS shuffle_channel_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_pad2d_op_npu SRCS pad2d_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_square_op_npu SRCS square_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_sqrt_op_npu SRCS sqrt_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_reduce_mean_op_npu SRCS reduce_mean_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_unsqueeze_op_npu SRCS unsqueeze_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_argmax_op_npu SRCS argmax_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_instance_norm_op_npu SRCS instance_norm_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_dropout_op_npu SRCS dropout_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_topk_op_npu SRCS topk_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_layer_norm_op_npu SRCS layer_norm_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_fill_constant_op_npu SRCS fill_constant_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_fill_constant_batch_size_like_op_npu SRCS fill_constant_batch_size_like_op.cc DEPS
${
npu_subgraph_bridge_deps
}
)
...
...
@@ -72,13 +71,12 @@ set(npu_subgraph_bridges
subgraph_bridge_concat_op_npu
subgraph_bridge_shuffle_channel_op_npu
subgraph_bridge_pad2d_op_npu
subgraph_bridge_square_op_npu
subgraph_bridge_sqrt_op_npu
subgraph_bridge_reduce_mean_op_npu
subgraph_bridge_unsqueeze_op_npu
subgraph_bridge_argmax_op_npu
subgraph_bridge_instance_norm_op_npu
subgraph_bridge_dropout_op_npu
subgraph_bridge_topk_op_npu
subgraph_bridge_layer_norm_op_npu
subgraph_bridge_fill_constant_op_npu
subgraph_bridge_fill_constant_batch_size_like_op_npu
...
...
lite/kernels/npu/bridges/act_op.cc
浏览文件 @
b49d95b1
...
...
@@ -21,6 +21,7 @@ namespace lite {
namespace
subgraph
{
namespace
npu
{
template
<
typename
ActType
>
int
ActConverter
(
void
*
ctx
,
OpLite
*
op
,
KernelBase
*
kernel
)
{
CHECK
(
ctx
!=
nullptr
);
CHECK
(
op
!=
nullptr
);
...
...
@@ -30,6 +31,40 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto
scope
=
op
->
scope
();
VLOG
(
3
)
<<
"[NPU] Converting "
+
op_type
+
"..."
;
// Get input and output vars and op attributes
auto
x_name
=
op_info
->
Input
(
"X"
).
front
();
auto
x
=
scope
->
FindTensor
(
x_name
);
auto
out_name
=
op_info
->
Output
(
"Out"
).
front
();
// X node
std
::
shared_ptr
<
Node
>
x_node
=
nullptr
;
if
(
graph
->
Has
(
x_name
))
{
x_node
=
graph
->
Get
(
x_name
);
}
else
{
x_node
=
graph
->
Add
(
x_name
,
*
x
);
}
// Act node
auto
act_node
=
graph
->
template
Add
<
ActType
>(
out_name
);
auto
act_op
=
act_node
->
template
data
<
ActType
>();
act_op
->
set_input_x
(
*
x_node
->
data
());
return
SUCCESS
;
}
template
<
>
int
ActConverter
<
ge
::
op
::
Activation
>
(
void
*
ctx
,
OpLite
*
op
,
KernelBase
*
kernel
)
{
CHECK
(
ctx
!=
nullptr
);
CHECK
(
op
!=
nullptr
);
auto
graph
=
static_cast
<
Graph
*>
(
ctx
);
auto
op_info
=
op
->
op_info
();
auto
op_type
=
op_info
->
Type
();
auto
scope
=
op
->
scope
();
VLOG
(
3
)
<<
"[NPU] Converting "
+
op_type
+
"..."
;
// Get input and output vars and op attributes
auto
x_name
=
op_info
->
Input
(
"X"
).
front
();
auto
x
=
scope
->
FindMutableTensor
(
x_name
);
...
...
@@ -45,8 +80,8 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
}
// Act node
auto
act_node
=
graph
->
Add
<
ge
::
op
::
Activation
>
(
out_name
);
auto
act_op
=
act_node
->
data
<
ge
::
op
::
Activation
>
();
auto
act_node
=
graph
->
template
Add
<
ge
::
op
::
Activation
>(
out_name
);
auto
act_op
=
act_node
->
template
data
<
ge
::
op
::
Activation
>();
act_op
->
set_input_x
(
*
x_node
->
data
());
// TODO(hong19860320) set the coef value for act Ops, such as leaky_relu,
// clipped_relu etc.
...
...
@@ -74,27 +109,42 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
}
// namespace lite
}
// namespace paddle
REGISTER_SUBGRAPH_BRIDGE
(
sigmoid
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
);
REGISTER_SUBGRAPH_BRIDGE
(
relu
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
);
REGISTER_SUBGRAPH_BRIDGE
(
tanh
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
);
REGISTER_SUBGRAPH_BRIDGE
(
relu_clipped
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
);
REGISTER_SUBGRAPH_BRIDGE
(
relu6
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
);
REGISTER_SUBGRAPH_BRIDGE
(
leaky_relu
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
);
REGISTER_SUBGRAPH_BRIDGE
(
abs
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
);
REGISTER_SUBGRAPH_BRIDGE
(
softsign
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
);
REGISTER_SUBGRAPH_BRIDGE
(
softplus
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
);
REGISTER_SUBGRAPH_BRIDGE
(
hard_sigmoid
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
);
REGISTER_SUBGRAPH_BRIDGE
(
sigmoid
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
op
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
relu
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
op
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
tanh
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
op
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
relu_clipped
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
op
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
relu6
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
op
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
leaky_relu
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
op
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
abs
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
op
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
softsign
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
op
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
softplus
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
op
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
hard_sigmoid
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
op
::
Activation
>
);
REGISTER_SUBGRAPH_BRIDGE
(
log
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
op
::
Log
>
);
REGISTER_SUBGRAPH_BRIDGE
(
square
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
op
::
Square
>
);
REGISTER_SUBGRAPH_BRIDGE
(
sqrt
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
ActConverter
<
ge
::
op
::
Sqrt
>
);
lite/kernels/npu/bridges/paddle_use_bridges.h
浏览文件 @
b49d95b1
...
...
@@ -21,6 +21,9 @@ USE_SUBGRAPH_BRIDGE(relu_clipped, kNPU);
USE_SUBGRAPH_BRIDGE
(
leaky_relu
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
softsign
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
hard_sigmoid
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
log
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
sqrt
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
square
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
batch_norm
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
less_than
,
kNPU
);
...
...
@@ -58,8 +61,7 @@ USE_SUBGRAPH_BRIDGE(scale, kNPU);
USE_SUBGRAPH_BRIDGE
(
shuffle_channel
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
softmax
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
split
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
sqrt
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
square
,
kNPU
);
// USE_SUBGRAPH_BRIDGE(top_k, kNPU);
USE_SUBGRAPH_BRIDGE
(
transpose
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
transpose2
,
kNPU
);
USE_SUBGRAPH_BRIDGE
(
unsqueeze
,
kNPU
);
...
...
lite/kernels/npu/bridges/sqrt_op_test.cc
已删除
100644 → 0
浏览文件 @
327c62e0
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <cmath>
#include "lite/core/op_registry.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/test_helper.h"
#include "lite/operators/activation_ops.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
npu
{
namespace
bridges
{
template
<
typename
dtype
>
void
sqrt_ref
(
const
std
::
shared_ptr
<
operators
::
ActivationOp
>
op
)
{
Scope
*
scope
=
op
->
scope
();
const
OpInfo
*
op_info
=
op
->
op_info
();
auto
x
=
scope
->
FindTensor
(
"x"
);
auto
out
=
scope
->
FindMutableTensor
(
"out_ref"
);
out
->
Resize
(
x
->
dims
());
auto
x_data
=
x
->
data
<
dtype
>
();
auto
out_data
=
out
->
mutable_data
<
dtype
>
();
for
(
size_t
i
=
0
;
i
<
x
->
numel
();
i
++
)
{
out_data
[
i
]
=
std
::
sqrtf
(
x_data
[
i
]);
}
}
void
test_sqrt
(
const
std
::
vector
<
int64_t
>&
input_shape
)
{
// prepare input&output variables
Scope
scope
;
std
::
string
x_var_name
=
"x"
;
std
::
string
out_var_name
=
"out"
;
std
::
string
out_ref_var_name
=
"out_ref"
;
auto
*
x
=
scope
.
NewTensor
(
x_var_name
);
auto
*
out
=
scope
.
NewTensor
(
out_var_name
);
auto
*
out_ref
=
scope
.
NewTensor
(
out_ref_var_name
);
x
->
Resize
(
input_shape
);
// initialize input&output data
FillTensor
<
float
>
(
x
,
0
,
5
);
// initialize op desc
cpp
::
OpDesc
opdesc
;
opdesc
.
SetType
(
"sqrt"
);
opdesc
.
SetInput
(
"X"
,
{
x_var_name
});
opdesc
.
SetOutput
(
"Out"
,
{
out_var_name
});
// create and convert op to NPU model, then run it on NPU
auto
op
=
CreateOp
<
operators
::
ActivationOp
>
(
opdesc
,
&
scope
);
LauchOp
(
op
,
{
x_var_name
},
{
out_var_name
});
// execute reference implementation and save to output tensor
sqrt_ref
<
float
>
(
op
);
// compare results
auto
*
out_data
=
out
->
mutable_data
<
float
>
();
auto
*
out_ref_data
=
out_ref
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
out
->
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
out_ref_data
[
i
],
1e-2
);
}
}
TEST
(
NPUBridges
,
sqrt
)
{
test_sqrt
({
2
});
test_sqrt
({
2
,
3
});
test_sqrt
({
1
,
2
,
3
,
4
});
test_sqrt
({
5
,
6
,
7
,
8
});
}
}
// namespace bridges
}
// namespace npu
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_OP
(
sqrt
);
USE_NPU_BRIDGE
(
sqrt
);
lite/kernels/npu/bridges/square_op.cc
已删除
100644 → 0
浏览文件 @
327c62e0
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/utility.h"
namespace
paddle
{
namespace
lite
{
namespace
subgraph
{
namespace
npu
{
int
SquareConverter
(
void
*
ctx
,
OpLite
*
op
,
KernelBase
*
kernel
)
{
CHECK
(
ctx
!=
nullptr
);
CHECK
(
op
!=
nullptr
);
auto
graph
=
static_cast
<
Graph
*>
(
ctx
);
auto
op_info
=
op
->
op_info
();
auto
op_type
=
op_info
->
Type
();
auto
scope
=
op
->
scope
();
VLOG
(
3
)
<<
"[NPU] Converting "
+
op_type
+
"..."
;
// Get input and output vars and op attributes
auto
x_name
=
op_info
->
Input
(
"X"
).
front
();
auto
x
=
scope
->
FindMutableTensor
(
x_name
);
auto
x_dims
=
x
->
dims
();
auto
out_name
=
op_info
->
Output
(
"Out"
).
front
();
// X node
std
::
shared_ptr
<
Node
>
x_node
=
nullptr
;
if
(
graph
->
Has
(
x_name
))
{
x_node
=
graph
->
Get
(
x_name
);
}
else
{
x_node
=
graph
->
Add
(
x_name
,
*
x
);
}
// Square node
auto
square_node
=
graph
->
Add
<
ge
::
op
::
Square
>
(
out_name
);
auto
square_op
=
square_node
->
data
<
ge
::
op
::
Square
>
();
square_op
->
set_input_x
(
*
x_node
->
data
());
return
SUCCESS
;
}
}
// namespace npu
}
// namespace subgraph
}
// namespace lite
}
// namespace paddle
REGISTER_SUBGRAPH_BRIDGE
(
square
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
SquareConverter
);
lite/kernels/npu/bridges/square_op_test.cc
已删除
100644 → 0
浏览文件 @
327c62e0
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "lite/core/op_registry.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/test_helper.h"
#include "lite/operators/activation_ops.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
npu
{
namespace
bridges
{
template
<
typename
dtype
>
void
square_ref
(
const
std
::
shared_ptr
<
operators
::
ActivationOp
>
op
)
{
Scope
*
scope
=
op
->
scope
();
const
OpInfo
*
op_info
=
op
->
op_info
();
auto
x
=
scope
->
FindTensor
(
"x"
);
auto
out
=
scope
->
FindMutableTensor
(
"out_ref"
);
out
->
Resize
(
x
->
dims
());
auto
x_data
=
x
->
data
<
dtype
>
();
auto
out_data
=
out
->
mutable_data
<
dtype
>
();
for
(
size_t
i
=
0
;
i
<
x
->
numel
();
i
++
)
{
out_data
[
i
]
=
x_data
[
i
]
*
x_data
[
i
];
}
}
void
test_square
(
const
std
::
vector
<
int64_t
>&
input_shape
)
{
// prepare input&output variables
Scope
scope
;
std
::
string
x_var_name
=
"x"
;
std
::
string
out_var_name
=
"out"
;
std
::
string
out_ref_var_name
=
"out_ref"
;
auto
*
x
=
scope
.
NewTensor
(
x_var_name
);
auto
*
out
=
scope
.
NewTensor
(
out_var_name
);
auto
*
out_ref
=
scope
.
NewTensor
(
out_ref_var_name
);
x
->
Resize
(
input_shape
);
// initialize input&output data
FillTensor
<
float
>
(
x
);
// initialize op desc
cpp
::
OpDesc
opdesc
;
opdesc
.
SetType
(
"square"
);
opdesc
.
SetInput
(
"X"
,
{
x_var_name
});
opdesc
.
SetOutput
(
"Out"
,
{
out_var_name
});
// create and convert op to NPU model, then run it on NPU
auto
op
=
CreateOp
<
operators
::
ActivationOp
>
(
opdesc
,
&
scope
);
LauchOp
(
op
,
{
x_var_name
},
{
out_var_name
});
// execute reference implementation and save to output tensor
square_ref
<
float
>
(
op
);
// compare results
auto
*
out_data
=
out
->
mutable_data
<
float
>
();
auto
*
out_ref_data
=
out_ref
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
out
->
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
out_ref_data
[
i
],
1e-2
);
}
}
TEST
(
NPUBridges
,
square
)
{
test_square
({
2
});
test_square
({
2
,
3
});
test_square
({
1
,
2
,
3
,
4
});
test_square
({
5
,
6
,
7
,
8
});
}
}
// namespace bridges
}
// namespace npu
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_OP
(
square
);
USE_NPU_BRIDGE
(
square
);
lite/kernels/npu/bridges/
sqrt
_op.cc
→
lite/kernels/npu/bridges/
topk
_op.cc
浏览文件 @
b49d95b1
// Copyright (c) 20
19
PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 20
20
PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
...
...
@@ -21,7 +21,7 @@ namespace lite {
namespace
subgraph
{
namespace
npu
{
int
Sqrt
Converter
(
void
*
ctx
,
OpLite
*
op
,
KernelBase
*
kernel
)
{
int
Topk
Converter
(
void
*
ctx
,
OpLite
*
op
,
KernelBase
*
kernel
)
{
CHECK
(
ctx
!=
nullptr
);
CHECK
(
op
!=
nullptr
);
auto
graph
=
static_cast
<
Graph
*>
(
ctx
);
...
...
@@ -32,10 +32,12 @@ int SqrtConverter(void* ctx, OpLite* op, KernelBase* kernel) {
// Get input and output vars and op attributes
auto
x_name
=
op_info
->
Input
(
"X"
).
front
();
auto
x
=
scope
->
Find
Mutable
Tensor
(
x_name
);
auto
x_dims
=
x
->
dims
();
auto
x
=
scope
->
FindTensor
(
x_name
);
auto
out_name
=
op_info
->
Output
(
"Out"
).
front
();
int
k
=
op_info
->
GetAttr
<
int
>
(
"k"
);
// X node
std
::
shared_ptr
<
Node
>
x_node
=
nullptr
;
if
(
graph
->
Has
(
x_name
))
{
...
...
@@ -44,10 +46,16 @@ int SqrtConverter(void* ctx, OpLite* op, KernelBase* kernel) {
x_node
=
graph
->
Add
(
x_name
,
*
x
);
}
// Sqrt node
auto
sqrt_node
=
graph
->
Add
<
ge
::
op
::
Sqrt
>
(
out_name
);
auto
sqrt_op
=
sqrt_node
->
data
<
ge
::
op
::
Sqrt
>
();
sqrt_op
->
set_input_x
(
*
x_node
->
data
());
// k node
std
::
shared_ptr
<
Node
>
k_node
=
graph
->
Add
<
int
>
(
out_name
+
"/k"
,
k
);
// topk node
auto
topk_node
=
graph
->
Add
<
ge
::
op
::
TopK
>
(
out_name
);
auto
topk_op
=
topk_node
->
data
<
ge
::
op
::
TopK
>
();
topk_op
->
set_input_x
(
*
x_node
->
data
());
topk_op
->
set_input_k
(
*
k_node
->
data
());
topk_op
->
set_attr_format
(
0
);
return
SUCCESS
;
}
...
...
@@ -56,6 +64,6 @@ int SqrtConverter(void* ctx, OpLite* op, KernelBase* kernel) {
}
// namespace lite
}
// namespace paddle
REGISTER_SUBGRAPH_BRIDGE
(
sqrt
,
REGISTER_SUBGRAPH_BRIDGE
(
top_k
,
kNPU
,
paddle
::
lite
::
subgraph
::
npu
::
Sqrt
Converter
);
paddle
::
lite
::
subgraph
::
npu
::
Topk
Converter
);
lite/operators/topk_op.cc
浏览文件 @
b49d95b1
...
...
@@ -20,6 +20,8 @@ namespace operators {
bool
TopkOp
::
CheckShape
()
const
{
CHECK_OR_FALSE
(
param_
.
X
);
CHECK_OR_FALSE
(
param_
.
Out
);
CHECK_OR_FALSE
(
param_
.
Indices
);
return
true
;
}
...
...
@@ -28,26 +30,25 @@ bool TopkOp::InferShape() const {
out_dims
[
out_dims
.
size
()
-
1
]
=
param_
.
K
;
auto
out
=
param_
.
Out
;
out
->
Resize
(
out_dims
);
auto
out_lod
=
out
->
mutable_lod
(
);
*
out_lod
=
param_
.
X
->
lod
();
auto
ind
=
param_
.
Indices
;
ind
->
Resize
(
out_dims
);
auto
ind_lod
=
out
->
mutable_lod
(
);
*
ind_lod
=
param_
.
X
->
lod
();
out
->
set_lod
(
param_
.
X
->
lod
()
);
auto
ind
ices
=
param_
.
Indices
;
ind
ices
->
Resize
(
out_dims
);
indices
->
set_lod
(
param_
.
X
->
lod
()
);
return
true
;
}
bool
TopkOp
::
AttachImpl
(
const
cpp
::
OpDesc
&
op_desc
,
lite
::
Scope
*
scope
)
{
auto
x
=
op_desc
.
Input
(
"X"
).
front
();
param_
.
X
=
scope
->
Find
Var
(
x
)
->
GetMutable
<
Tensor
>
(
);
param_
.
X
=
scope
->
Find
Tensor
(
x
);
auto
output
s
0
=
op_desc
.
Output
(
"Out"
).
front
();
auto
output
s
1
=
op_desc
.
Output
(
"Indices"
).
front
();
param_
.
Out
=
scope
->
Find
Var
(
outputs0
)
->
GetMutable
<
lite
::
Tensor
>
(
);
param_
.
Indices
=
scope
->
Find
Var
(
outputs1
)
->
GetMutable
<
lite
::
Tensor
>
(
);
auto
output0
=
op_desc
.
Output
(
"Out"
).
front
();
auto
output1
=
op_desc
.
Output
(
"Indices"
).
front
();
param_
.
Out
=
scope
->
Find
MutableTensor
(
output0
);
param_
.
Indices
=
scope
->
Find
MutableTensor
(
output1
);
param_
.
K
=
op_desc
.
GetAttr
<
int
>
(
"k"
);
CHECK
(
param_
.
X
);
CHECK_GE
(
param_
.
K
,
1
)
<<
"topK param is not valid"
;
return
true
;
}
...
...
lite/tests/kernels/CMakeLists.txt
浏览文件 @
b49d95b1
...
...
@@ -21,7 +21,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_
#lite_cc_test(test_kernel_im2sequence_compute SRCS im2sequence_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test
(
test_kernel_compare_compute SRCS compare_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
#lite_cc_test(test_kernel_logical_xor_compute SRCS logical_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_topk_compute SRCS topk_compute_test.cc DEPS arena_framework
${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test
(
test_kernel_topk_compute SRCS topk_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_increment_compute SRCS increment_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_write_to_array_compute SRCS write_to_array_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
...
...
lite/tests/kernels/activation_compute_test.cc
浏览文件 @
b49d95b1
...
...
@@ -271,25 +271,12 @@ TEST(Activation_relu, precision) {
return
;
#endif
for
(
auto
n
:
{
1
,
3
})
{
for
(
auto
c
:
{
3
,
6
})
{
for
(
auto
h
:
{
9
,
18
})
{
for
(
auto
w
:
{
9
,
18
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})),
"relu"
,
RELU
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{
{
1
,
3
,
2
,
4
},
{
2
,
3
,
4
},
{
5
,
4
},
{
8
}})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
dims
),
"relu"
,
RELU
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
...
...
@@ -306,26 +293,21 @@ TEST(Activation_leaky_relu, precision) {
return
;
#endif
for
(
auto
n
:
{
1
,
3
})
{
for
(
auto
c
:
{
3
,
6
})
{
for
(
auto
h
:
{
9
,
18
})
{
for
(
auto
w
:
{
9
,
18
})
{
for
(
auto
slope
:
{
0.01
,
0.1
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
slope
,
6.
,
"all"
,
0.
,
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})),
"leaky_relu"
,
LEAKY_RELU
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{
{
1
,
3
,
2
,
4
},
{
2
,
3
,
4
},
{
5
,
4
},
{
8
}})
{
for
(
auto
slope
:
{
0.01
,
0.1
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
slope
,
6.
,
"all"
,
0.
,
DDim
(
dims
),
"leaky_relu"
,
LEAKY_RELU
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
...
...
@@ -343,26 +325,21 @@ TEST(Activation_relu_clipped, precision) {
return
;
#endif
for
(
auto
n
:
{
1
,
3
})
{
for
(
auto
c
:
{
3
,
6
})
{
for
(
auto
h
:
{
9
,
18
})
{
for
(
auto
w
:
{
9
,
18
})
{
for
(
auto
coef
:
{
0.5
,
6.
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
coef
,
"all"
,
0.
,
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})),
"relu_clipped"
,
RELU_CLIPPED
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{
{
1
,
3
,
2
,
4
},
{
2
,
3
,
4
},
{
5
,
4
},
{
8
}})
{
for
(
auto
coef
:
{
0.5
,
6.
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
coef
,
"all"
,
0.
,
DDim
(
dims
),
"relu_clipped"
,
RELU_CLIPPED
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
...
...
@@ -372,26 +349,12 @@ TEST(Activation_prelu, precision) {
#ifdef LITE_WITH_ARM
Place
place
(
TARGET
(
kARM
));
for
(
auto
n
:
{
1
,
3
})
{
for
(
auto
c
:
{
3
,
6
})
{
for
(
auto
h
:
{
9
,
18
})
{
for
(
auto
w
:
{
9
,
18
})
{
for
(
auto
mode
:
{
"all"
,
"channel"
,
"element"
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6
,
mode
,
0.
,
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})),
"prelu"
,
PRELU
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
}
}
}
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{{
1
,
3
,
2
,
4
}})
{
for
(
auto
mode
:
{
"all"
,
"channel"
,
"element"
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6
,
mode
,
0.
,
DDim
(
dims
),
"prelu"
,
PRELU
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
}
}
#endif
...
...
@@ -410,25 +373,12 @@ TEST(Activation_sigmoid, precision) {
return
;
#endif
for
(
auto
n
:
{
1
,
3
})
{
for
(
auto
c
:
{
3
,
6
})
{
for
(
auto
h
:
{
9
,
18
})
{
for
(
auto
w
:
{
9
,
18
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})),
"sigmoid"
,
SIGMOID
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{
{
1
,
3
,
2
,
4
},
{
2
,
3
,
4
},
{
5
,
4
},
{
8
}})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
dims
),
"sigmoid"
,
SIGMOID
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
...
...
@@ -447,25 +397,12 @@ TEST(Activation_tanh, precision) {
return
;
#endif
for
(
auto
n
:
{
1
,
3
})
{
for
(
auto
c
:
{
3
,
6
})
{
for
(
auto
h
:
{
9
,
18
})
{
for
(
auto
w
:
{
9
,
18
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})),
"tanh"
,
TANH
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{
{
1
,
3
,
2
,
4
},
{
2
,
3
,
4
},
{
5
,
4
},
{
8
}})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
dims
),
"tanh"
,
TANH
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
...
...
@@ -474,26 +411,13 @@ TEST(Activation_swish, precision) {
#ifdef LITE_WITH_ARM
Place
place
(
TARGET
(
kARM
));
for
(
auto
n
:
{
1
,
3
})
{
for
(
auto
c
:
{
3
,
6
})
{
for
(
auto
h
:
{
9
,
18
})
{
for
(
auto
w
:
{
9
,
18
})
{
for
(
auto
coef
:
{
0.01
,
0.1
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6
,
"all"
,
coef
,
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})),
"swish"
,
SWISH
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
}
}
}
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{
{
1
,
3
,
2
,
4
},
{
2
,
3
,
4
},
{
5
,
4
},
{
8
}})
{
for
(
auto
coef
:
{
0.01
,
0.1
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6
,
"all"
,
coef
,
DDim
(
dims
),
"swish"
,
SWISH
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
}
}
#endif
...
...
@@ -504,26 +428,13 @@ TEST(Activation_relu6, precision) {
#ifdef LITE_WITH_ARM
Place
place
(
TARGET
(
kARM
));
for
(
auto
n
:
{
1
,
3
})
{
for
(
auto
c
:
{
3
,
6
})
{
for
(
auto
h
:
{
9
,
18
})
{
for
(
auto
w
:
{
9
,
18
})
{
for
(
auto
slope
:
{
0.01
,
0.1
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})),
"relu6"
,
RELU6
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
}
}
}
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{
{
1
,
3
,
2
,
4
},
{
2
,
3
,
4
},
{
5
,
4
},
{
8
}})
{
for
(
auto
slope
:
{
0.01
,
0.1
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
dims
),
"relu6"
,
RELU6
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
}
}
#endif
...
...
@@ -531,30 +442,24 @@ TEST(Activation_relu6, precision) {
TEST
(
Activation_log
,
precision
)
{
LOG
(
INFO
)
<<
"test log op"
;
#ifdef LITE_WITH_ARM
Place
place
(
TARGET
(
kARM
));
Place
place
;
float
abs_error
=
2e-5
;
#if defined(LITE_WITH_NPU)
place
=
TARGET
(
kNPU
);
abs_error
=
1e-2
;
// Using fp16 in NPU
#elif defined(LITE_WITH_ARM)
place
=
TARGET
(
kARM
);
#else
return
;
#endif
for
(
auto
n
:
{
1
,
3
})
{
for
(
auto
c
:
{
3
,
6
})
{
for
(
auto
h
:
{
9
,
18
})
{
for
(
auto
w
:
{
9
,
18
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})),
"log"
,
LOG
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
}
}
}
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{
{
1
,
3
,
2
,
4
},
{
2
,
3
,
4
},
{
5
,
4
},
{
8
}})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
dims
),
"log"
,
LOG
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
#endif
}
TEST
(
Activation_exp
,
precision
)
{
...
...
@@ -562,25 +467,12 @@ TEST(Activation_exp, precision) {
#ifdef LITE_WITH_ARM
Place
place
(
TARGET
(
kARM
));
for
(
auto
n
:
{
1
,
3
})
{
for
(
auto
c
:
{
3
,
6
})
{
for
(
auto
h
:
{
9
,
18
})
{
for
(
auto
w
:
{
9
,
18
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})),
"exp"
,
EXP
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
}
}
}
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{
{
1
,
3
,
2
,
4
},
{
2
,
3
,
4
},
{
5
,
4
},
{
8
}})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
dims
),
"exp"
,
EXP
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
}
#endif
}
...
...
@@ -589,26 +481,14 @@ TEST(Activation_floor, precision) {
LOG
(
INFO
)
<<
"test floor op"
;
#ifdef LITE_WITH_ARM
Place
place
(
TARGET
(
kARM
));
for
(
auto
n
:
{
1
,
3
})
{
for
(
auto
c
:
{
3
,
6
})
{
for
(
auto
h
:
{
9
,
18
})
{
for
(
auto
w
:
{
9
,
18
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})),
"floor"
,
FLOOR
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
}
}
}
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{
{
1
,
3
,
2
,
4
},
{
2
,
3
,
4
},
{
5
,
4
},
{
8
}})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
dims
),
"floor"
,
FLOOR
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
}
#endif
}
...
...
@@ -616,54 +496,36 @@ TEST(Activation_rsqrt, precision) {
LOG
(
INFO
)
<<
"test rsqrt op"
;
#ifdef LITE_WITH_ARM
Place
place
(
TARGET
(
kARM
));
for
(
auto
n
:
{
2
})
{
for
(
auto
c
:
{
2
})
{
for
(
auto
h
:
{
2
})
{
for
(
auto
w
:
{
2
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})),
"rsqrt"
,
RSQRT
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
}
}
}
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{
{
1
,
3
,
2
,
4
},
{
2
,
3
,
4
},
{
5
,
4
},
{
8
}})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
dims
),
"rsqrt"
,
RSQRT
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
}
#endif
}
TEST
(
Activation_square
,
precision
)
{
LOG
(
INFO
)
<<
"test square op"
;
#ifdef LITE_WITH_ARM
Place
place
(
TARGET
(
kARM
));
for
(
auto
n
:
{
2
})
{
for
(
auto
c
:
{
2
})
{
for
(
auto
h
:
{
2
})
{
for
(
auto
w
:
{
2
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})),
"square"
,
SQUARE
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
arena
.
TestPrecision
();
}
}
}
}
Place
place
;
float
abs_error
=
2e-5
;
#if defined(LITE_WITH_NPU)
place
=
TARGET
(
kNPU
);
abs_error
=
1e-2
;
// Using fp16 in NPU
#elif defined(LITE_WITH_ARM)
place
=
TARGET
(
kARM
);
#else
return
;
#endif
for
(
auto
dims
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{
{
1
,
3
,
2
,
4
},
{
2
,
3
,
4
},
{
5
,
4
},
{
8
}})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
ActivationComputeTester
(
place
,
"def"
,
0.01
,
6.
,
"all"
,
0.
,
DDim
(
dims
),
"square"
,
SQUARE
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
TEST
(
Activation_gelu
,
precision
)
{
...
...
lite/tests/kernels/topk_compute_test.cc
浏览文件 @
b49d95b1
...
...
@@ -16,102 +16,109 @@
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/core/arena/framework.h"
#include "lite/tests/utils/fill_data.h"
namespace
paddle
{
namespace
lite
{
bool
comp_func
(
std
::
pair
<
float
,
int
>
a
,
std
::
pair
<
float
,
int
>
b
)
{
template
<
typename
T1
,
typename
T2
>
bool
comp_func
(
std
::
pair
<
T1
,
T2
>
a
,
std
::
pair
<
T1
,
T2
>
b
)
{
return
(
a
.
first
>
b
.
first
);
}
template
<
typename
T1
,
typename
T2
>
class
TopkComputeTester
:
public
arena
::
TestCase
{
protected:
// common attributes for this op.
std
::
string
input
_
=
"x"
;
std
::
string
out_
val_
=
"out_val
"
;
std
::
string
out_ind_
=
"out_ind
"
;
int
K_
=
1
;
DDim
dims_
{{
3
,
5
,
4
,
4
}}
;
std
::
string
x
_
=
"x"
;
std
::
string
out_
=
"out
"
;
std
::
string
indices_
=
"indices
"
;
DDim
x_dims_
{{
3
,
5
,
4
,
4
}}
;
int
k_
=
1
;
public:
TopkComputeTester
(
const
Place
&
place
,
const
std
::
string
&
alias
,
int
K
,
DDim
dims
)
:
TestCase
(
place
,
alias
),
K_
(
K
),
dims_
(
dims
)
{}
DDim
x_dims
,
int
k
=
1
)
:
TestCase
(
place
,
alias
),
x_dims_
(
x_dims
),
k_
(
k
)
{}
void
RunBaseline
(
Scope
*
scope
)
override
{
auto
*
out_val
=
scope
->
NewTensor
(
out_val_
);
auto
*
out_ind
=
scope
->
NewTensor
(
out_ind_
);
CHECK
(
out_val
);
CHECK
(
out_ind
);
DDim
out_dims
=
dims_
;
out_dims
[
out_dims
.
size
()
-
1
]
=
K_
;
auto
*
out_val
=
scope
->
NewTensor
(
out_
);
auto
*
out_ind
=
scope
->
NewTensor
(
indices_
);
DDim
out_dims
=
x_dims_
;
out_dims
[
out_dims
.
size
()
-
1
]
=
k_
;
out_val
->
Resize
(
out_dims
);
out_ind
->
Resize
(
out_dims
);
auto
*
out_val_data
=
out_val
->
mutable_data
<
float
>
();
auto
*
out_ind_data
=
out_ind
->
mutable_data
<
int
>
();
auto
*
out_val_data
=
out_val
->
mutable_data
<
T1
>
();
auto
*
out_ind_data
=
out_ind
->
mutable_data
<
T2
>
();
auto
*
x
=
scope
->
FindTensor
(
input
_
);
const
auto
*
x_data
=
x
->
data
<
float
>
();
int
m
=
out_dims
.
production
()
/
K
_
;
int
n
=
dims_
[
dims_
.
size
()
-
1
];
auto
*
x
=
scope
->
FindTensor
(
x
_
);
const
auto
*
x_data
=
x
->
data
<
T1
>
();
int
m
=
out_dims
.
production
()
/
k
_
;
int
n
=
x_dims_
[
x_
dims_
.
size
()
-
1
];
for
(
int
i
=
0
;
i
<
m
;
i
++
)
{
const
float
*
in_tmp
=
x_data
+
i
*
n
;
float
*
out_val_tmp
=
out_val_data
+
i
*
K
_
;
int
*
out_ind_tmp
=
out_ind_data
+
i
*
K
_
;
std
::
vector
<
std
::
pair
<
float
,
int
>>
vec
;
const
T1
*
in_tmp
=
x_data
+
i
*
n
;
T1
*
out_val_tmp
=
out_val_data
+
i
*
k
_
;
T2
*
out_ind_tmp
=
out_ind_data
+
i
*
k
_
;
std
::
vector
<
std
::
pair
<
T1
,
T2
>>
vec
;
for
(
int
j
=
0
;
j
<
n
;
j
++
)
{
vec
.
push_back
(
std
::
make_pair
(
in_tmp
[
j
],
j
));
vec
.
push_back
(
std
::
make_pair
(
in_tmp
[
j
],
static_cast
<
T2
>
(
j
)
));
}
std
::
partial_sort
(
vec
.
begin
(),
vec
.
begin
()
+
K_
,
vec
.
end
(),
comp_func
);
for
(
int
q
=
0
;
q
<
K_
;
q
++
)
{
std
::
partial_sort
(
vec
.
begin
(),
vec
.
begin
()
+
k_
,
vec
.
end
(),
comp_func
<
T1
,
T2
>
);
for
(
int
q
=
0
;
q
<
k_
;
q
++
)
{
out_val_tmp
[
q
]
=
vec
[
q
].
first
;
out_ind_tmp
[
q
]
=
vec
[
q
].
second
;
LOG
(
INFO
)
<<
"out:"
<<
i
<<
" "
<<
q
<<
" "
<<
out_val_tmp
[
q
]
<<
" "
<<
out_ind_tmp
[
q
];
}
}
}
void
PrepareOpDesc
(
cpp
::
OpDesc
*
op_desc
)
{
op_desc
->
SetType
(
"topk"
);
op_desc
->
SetInput
(
"X"
,
{
input_
});
op_desc
->
SetOutput
(
"Out"
,
{
out_val_
,
out_ind_
});
op_desc
->
SetAttr
(
"K"
,
K_
);
op_desc
->
SetType
(
"top_k"
);
op_desc
->
SetInput
(
"X"
,
{
x_
});
op_desc
->
SetOutput
(
"Out"
,
{
out_
});
op_desc
->
SetOutput
(
"Indices"
,
{
indices_
});
op_desc
->
SetAttr
(
"k"
,
k_
);
}
void
PrepareData
()
override
{
std
::
vector
<
float
>
data
(
dims_
.
production
());
for
(
int
i
=
0
;
i
<
dims_
.
production
();
i
++
)
{
data
[
i
]
=
std
::
rand
()
*
1.0
f
/
RAND_MAX
;
}
SetCommonTensor
(
input_
,
dims_
,
data
.
data
());
std
::
vector
<
T1
>
dx
(
x_dims_
.
production
());
fill_data_rand
<
T1
>
(
dx
.
data
(),
-
1
,
1
,
x_dims_
.
production
());
SetCommonTensor
(
x_
,
x_dims_
,
dx
.
data
());
}
};
void
test_topk
(
Place
place
)
{
DDimLite
dims_0
{{
3
,
5
}};
DDimLite
dims_1
{{
8
}};
for
(
int
K
:
{
1
,
2
})
{
for
(
auto
dims
:
{
dims_0
,
dims_1
})
{
template
<
typename
T1
,
typename
T2
>
void
test_topk
(
Place
place
,
float
abs_error
)
{
for
(
auto
x_shape
:
std
::
vector
<
std
::
vector
<
int64_t
>>
{
{
2
,
3
,
4
,
5
},
{
3
,
4
,
5
},
{
4
,
5
},
{
5
}
})
{
for
(
int
k
:
{
2
,
5
})
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
TopkComputeTester
(
place
,
"def"
,
K
,
dims
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
2e-5
);
new
TopkComputeTester
<
T1
,
T2
>
(
place
,
"def"
,
DDim
(
x_shape
),
k
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
}
TEST
(
Topk
,
precision
)
{
// #ifdef LITE_WITH_X86
// Place place(TARGET(kX86));
// #endif
#ifdef LITE_WITH_ARM
Place
place
(
TARGET
(
kARM
));
test_topk
(
place
);
Place
place
;
float
abs_error
=
2e-5
;
#if defined(LITE_WITH_NPU)
place
=
TARGET
(
kNPU
);
abs_error
=
1e-3
;
// Using fp16 in NPU
#elif defined(LITE_WITH_ARM)
place
=
TARGET
(
kARM
);
#else
return
;
#endif
#if defined(LITE_WITH_NPU)
test_topk
<
float
,
int
>
(
place
,
abs_error
);
#else
test_topk
<
float
,
int64_t
>
(
place
,
abs_error
);
#endif
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录