Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
69ad4b80
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
69ad4b80
编写于
1月 24, 2020
作者:
Z
zhupengyang
提交者:
hong19860320
1月 24, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] clean code (#2798)
上级
124c43a0
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
27 addition
and
524 deletion
+27
-524
lite/kernels/npu/bridges/fc_op_test.cc
lite/kernels/npu/bridges/fc_op_test.cc
+0
-140
lite/kernels/npu/bridges/reshape_op_test.cc
lite/kernels/npu/bridges/reshape_op_test.cc
+0
-204
lite/kernels/npu/bridges/softmax_op_test.cc
lite/kernels/npu/bridges/softmax_op_test.cc
+0
-153
lite/tests/kernels/CMakeLists.txt
lite/tests/kernels/CMakeLists.txt
+27
-27
未找到文件。
lite/kernels/npu/bridges/fc_op_test.cc
已删除
100644 → 0
浏览文件 @
124c43a0
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/fc_op.h"
#include <gtest/gtest.h>
#include "lite/core/op_registry.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/test_helper.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
npu
{
namespace
bridges
{
void
fc_ref
(
const
std
::
shared_ptr
<
operators
::
FcOpLite
>
op
)
{
Scope
*
scope
=
op
->
scope
();
const
OpInfo
*
op_info
=
op
->
op_info
();
auto
input
=
scope
->
FindVar
(
op_info
->
Input
(
"Input"
).
front
())
->
GetMutable
<
Tensor
>
();
auto
w
=
scope
->
FindVar
(
op_info
->
Input
(
"W"
).
front
())
->
GetMutable
<
Tensor
>
();
auto
out
=
scope
->
FindVar
(
op_info
->
Output
(
"Out"
).
front
())
->
GetMutable
<
Tensor
>
();
int32_t
in_num_col_dims
=
op_info
->
GetAttr
<
int32_t
>
(
"in_num_col_dims"
);
Tensor
*
bias
=
nullptr
;
float
*
bias_data
=
nullptr
;
if
(
op_info
->
HasInput
(
"Bias"
))
{
auto
bias_var_names
=
op_info
->
Input
(
"Bias"
);
if
(
bias_var_names
.
size
()
>
0
)
{
auto
bias_var_name
=
bias_var_names
.
front
();
bias
=
scope
->
FindVar
(
bias_var_name
)
->
GetMutable
<
lite
::
Tensor
>
();
bias_data
=
bias
->
mutable_data
<
float
>
();
}
}
auto
input_data
=
input
->
data
<
float
>
();
auto
w_data
=
w
->
mutable_data
<
float
>
();
auto
out_data
=
out
->
mutable_data
<
float
>
();
auto
in_mat_dims
=
input
->
dims
().
Flatten2D
(
in_num_col_dims
);
int
out_num_classes
=
w
->
dims
()[
1
];
const
int
M
=
in_mat_dims
[
0
];
const
int
K
=
in_mat_dims
[
1
];
const
int
N
=
out_num_classes
;
for
(
int
m
=
0
;
m
<
M
;
++
m
)
{
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
out_data
[
m
*
N
+
n
]
=
0
;
for
(
int
k
=
0
;
k
<
K
;
++
k
)
{
out_data
[
m
*
N
+
n
]
+=
input_data
[
m
*
K
+
k
]
*
w_data
[
k
*
N
+
n
];
}
}
}
if
(
bias_data
!=
nullptr
)
{
for
(
int
m
=
0
;
m
<
M
;
++
m
)
{
for
(
int
n
=
0
;
n
<
N
;
++
n
)
{
out_data
[
m
*
N
+
n
]
+=
bias_data
[
n
];
}
}
}
}
void
test_fc
(
const
std
::
vector
<
int64_t
>&
input_shape
,
const
std
::
vector
<
int64_t
>&
w_shape
,
int
in_num_col_dims
,
bool
has_bias
)
{
CHECK_EQ
(
w_shape
.
size
(),
2UL
);
const
auto
&
bridges
=
lite
::
kernels
::
npu
::
bridges
::
Factory
::
Instance
();
const
auto
&
supported_lists
=
bridges
.
AllFunctions
();
CHECK
(
bridges
.
HasType
(
"fc"
));
Scope
scope
;
std
::
string
input_var_name
(
"Input"
);
std
::
string
w_var_name
(
"W"
);
std
::
string
bias_var_name
(
"Bias"
);
std
::
string
out_var_name
(
"Out"
);
std
::
string
out_ref_var_name
(
"out_ref"
);
auto
*
input
=
scope
.
Var
(
input_var_name
)
->
GetMutable
<
Tensor
>
();
auto
*
w
=
scope
.
Var
(
w_var_name
)
->
GetMutable
<
Tensor
>
();
auto
*
out
=
scope
.
Var
(
out_var_name
)
->
GetMutable
<
Tensor
>
();
auto
*
out_ref
=
scope
.
Var
(
out_ref_var_name
)
->
GetMutable
<
Tensor
>
();
input
->
Resize
(
input_shape
);
w
->
Resize
(
w_shape
);
FillTensor
<
float
,
int
>
(
input
);
FillTensor
<
float
,
int
>
(
w
);
// create fc op
cpp
::
OpDesc
fc_op_desc
;
fc_op_desc
.
SetType
(
"fc"
);
fc_op_desc
.
SetInput
(
"Input"
,
{
input_var_name
});
fc_op_desc
.
SetInput
(
"W"
,
{
w_var_name
});
fc_op_desc
.
SetOutput
(
"Out"
,
{
out_var_name
});
fc_op_desc
.
SetAttr
(
"in_num_col_dims"
,
static_cast
<
int
>
(
in_num_col_dims
));
if
(
has_bias
)
{
auto
*
bias
=
scope
.
Var
(
bias_var_name
)
->
GetMutable
<
Tensor
>
();
bias
->
Resize
({
w_shape
[
1
]});
FillTensor
<
float
,
int
>
(
bias
);
fc_op_desc
.
SetInput
(
"Bias"
,
{
bias_var_name
});
}
auto
fc_op
=
CreateOp
<
operators
::
FcOpLite
>
(
fc_op_desc
,
&
scope
);
LauchOp
(
fc_op
,
{
input_var_name
},
{
out_var_name
});
out_ref
->
CopyDataFrom
(
*
out
);
// compare results
fc_ref
(
fc_op
);
auto
*
out_data
=
out
->
mutable_data
<
float
>
();
auto
*
out_ref_data
=
out_ref
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
out
->
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
out_ref_data
[
i
],
1e-5
);
}
}
TEST
(
NPUBridges
,
fc
)
{
for
(
bool
use_bias
:
{
true
,
false
})
{
test_fc
({
1
,
8
,
8
,
1
},
{
8
,
4
},
2
,
use_bias
);
test_fc
({
1
,
5
,
5
,
1
},
{
5
,
7
},
2
,
use_bias
);
test_fc
({
1
,
4
,
1
,
1
},
{
4
,
8
},
1
,
use_bias
);
test_fc
({
1
,
1024
,
1
,
1
},
{
1024
,
1000
},
1
,
use_bias
);
}
}
}
// namespace bridges
}
// namespace npu
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_OP
(
fc
);
USE_NPU_BRIDGE
(
fc
);
lite/kernels/npu/bridges/reshape_op_test.cc
已删除
100644 → 0
浏览文件 @
124c43a0
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/reshape_op.h"
#include <gtest/gtest.h>
#include <random>
#include "lite/core/op_registry.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/test_helper.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
npu
{
namespace
bridges
{
void
reshape_ref
(
const
std
::
shared_ptr
<
lite
::
OpLite
>
op
)
{
auto
scope
=
op
->
scope
();
auto
op_info
=
op
->
op_info
();
auto
op_type
=
op_info
->
Type
();
auto
x
=
scope
->
FindVar
(
op_info
->
Input
(
"X"
).
front
())
->
GetMutable
<
Tensor
>
();
auto
out
=
scope
->
FindVar
(
op_info
->
Output
(
"Out"
).
front
())
->
GetMutable
<
Tensor
>
();
auto
x_dims
=
x
->
dims
();
auto
shape
=
op_info
->
GetAttr
<
std
::
vector
<
int
>>
(
"shape"
);
auto
inplace
=
op_info
->
GetAttr
<
bool
>
(
"inplace"
);
if
(
op_info
->
HasInput
(
"Shape"
))
{
auto
actual_shape_var_names
=
op_info
->
Input
(
"Shape"
);
if
(
actual_shape_var_names
.
size
()
>
0
)
{
auto
actual_shape
=
scope
->
FindVar
(
actual_shape_var_names
.
front
())
->
GetMutable
<
lite
::
Tensor
>
();
auto
actual_shape_dims
=
actual_shape
->
dims
();
auto
*
actual_shape_data
=
actual_shape
->
data
<
int
>
();
shape
=
std
::
vector
<
int
>
(
actual_shape_data
,
actual_shape_data
+
actual_shape_dims
.
production
());
}
}
if
(
inplace
)
{
out
->
ShareDataWith
(
*
x
);
}
else
{
out
->
CopyDataFrom
(
*
x
);
}
auto
out_dims
=
operators
::
ValidateShape
(
shape
,
x_dims
);
out
->
Resize
(
out_dims
);
}
void
test_reshape
(
const
std
::
vector
<
int64_t
>&
x_shape
,
const
std
::
vector
<
int
>&
shape
,
const
std
::
vector
<
int
>&
act_shape
,
bool
inplace
,
bool
reshape2
)
{
// prepare input&output variables
Scope
scope
;
std
::
string
x_var_name
(
"x"
);
std
::
string
actual_shape_var_name
(
"actual_shape"
);
std
::
string
out_var_name
(
"out"
);
std
::
string
out_ref_var_name
(
"out_ref"
);
std
::
string
xshape_var_name
(
"xshape"
);
std
::
string
xshape_ref_var_name
(
"xshape_ref"
);
auto
x
=
scope
.
Var
(
x_var_name
)
->
GetMutable
<
Tensor
>
();
auto
actual_shape
=
scope
.
Var
(
actual_shape_var_name
)
->
GetMutable
<
Tensor
>
();
auto
out
=
scope
.
Var
(
out_var_name
)
->
GetMutable
<
Tensor
>
();
auto
out_ref
=
scope
.
Var
(
out_ref_var_name
)
->
GetMutable
<
Tensor
>
();
auto
xshape
=
scope
.
Var
(
xshape_var_name
)
->
GetMutable
<
Tensor
>
();
auto
xshape_ref
=
scope
.
Var
(
xshape_ref_var_name
)
->
GetMutable
<
Tensor
>
();
x
->
Resize
(
x_shape
);
// initialize input&output data
FillTensor
<
float
,
int
>
(
x
);
// initialize op desc
cpp
::
OpDesc
opdesc
;
opdesc
.
SetType
(
reshape2
?
"reshape2"
:
"reshape"
);
opdesc
.
SetInput
(
"X"
,
{
x_var_name
});
opdesc
.
SetOutput
(
"Out"
,
{
out_var_name
});
opdesc
.
SetAttr
(
"shape"
,
shape
);
opdesc
.
SetAttr
(
"inplace"
,
inplace
);
if
(
!
act_shape
.
empty
())
{
int64_t
act_shape_size
=
act_shape
.
size
();
actual_shape
->
Resize
({
act_shape_size
});
memcpy
(
actual_shape
->
mutable_data
<
int
>
(),
act_shape
.
data
(),
act_shape_size
*
sizeof
(
int
));
opdesc
.
SetInput
(
"Shape"
,
{
actual_shape_var_name
});
}
if
(
reshape2
)
{
opdesc
.
SetOutput
(
"XShape"
,
{
xshape_var_name
});
}
// create op and execute reference implementation
auto
op
=
reshape2
?
CreateOp
<
operators
::
Reshape2Op
>
(
opdesc
,
&
scope
)
:
CreateOp
<
operators
::
ReshapeOp
>
(
opdesc
,
&
scope
);
reshape_ref
(
op
);
out_ref
->
CopyDataFrom
(
*
out
);
if
(
reshape2
)
{
xshape_ref
->
CopyDataFrom
(
*
xshape
);
}
// convert op to NPU model, then run it on NPU
LauchOp
(
op
,
{
x_var_name
},
{
out_var_name
});
// TODO(hong19860320) support XShape for reshape2
// compare results
auto
out_dims
=
out
->
dims
();
auto
out_ref_dims
=
out_ref
->
dims
();
CHECK_EQ
(
out_dims
.
size
(),
out_ref_dims
.
size
());
for
(
int
i
=
0
;
i
<
out_dims
.
size
();
i
++
)
{
CHECK_EQ
(
out_dims
[
i
],
out_ref_dims
[
i
]);
}
auto
out_data
=
out
->
mutable_data
<
float
>
();
auto
out_ref_data
=
out_ref
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
out
->
dims
().
production
();
i
++
)
{
VLOG
(
5
)
<<
i
;
EXPECT_NEAR
(
out_data
[
i
],
out_ref_data
[
i
],
1e-5
);
}
// if (reshape2) {
// auto xshape_dims = xshape->dims();
// auto xshape_ref_dims = xshape_ref->dims();
// CHECK_EQ(xshape_dims.size(), xshape_ref_dims.size());
// for (size_t i = 0; i < xshape_dims.size(); i++) {
// CHECK_EQ(xshape_dims[i], xshape_ref_dims[i]);
// }
// }
}
TEST
(
NPUBridges
,
reshape
)
{
#if 1
std
::
map
<
std
::
vector
<
int64_t
>
,
std
::
vector
<
std
::
vector
<
int
>>>
tests
=
{
{{
1
,
2
,
4
,
6
},
{{},
{
-
1
},
{
48
},
{
-
1
,
48
},
{
1
,
48
},
{
0
,
48
},
{
48
,
-
1
},
{
48
,
1
},
{
-
1
,
24
},
{
2
,
24
},
{
24
,
0
},
{
-
1
,
0
,
3
,
2
},
{
4
,
2
,
3
,
2
},
{
0
,
-
1
,
3
,
2
},
{
1
,
8
,
3
,
2
}}}};
for
(
auto
&
i
:
tests
)
{
for
(
auto
&
shape
:
i
.
second
)
{
if
(
shape
.
empty
())
{
continue
;
}
for
(
auto
&
act_shape
:
i
.
second
)
{
for
(
auto
&
inplace
:
{
true
,
false
})
{
for
(
auto
&
reshape2
:
{
true
,
false
})
{
std
::
stringstream
ss
;
ss
<<
"x:{ "
;
for
(
auto
s
:
i
.
first
)
{
ss
<<
s
<<
" "
;
}
ss
<<
"} shape:{ "
;
for
(
auto
s
:
shape
)
{
ss
<<
s
<<
" "
;
}
ss
<<
"} act_shape:{ "
;
for
(
auto
s
:
act_shape
)
{
ss
<<
s
<<
" "
;
}
VLOG
(
3
)
<<
ss
.
str
()
<<
"} inplace:"
<<
inplace
<<
" reshape2:"
<<
reshape2
;
test_reshape
(
i
.
first
,
shape
,
act_shape
,
inplace
,
reshape2
);
}
}
}
}
}
#else
test_reshape
({
2
,
4
,
6
},
{
-
1
,
0
,
4
,
3
},
{},
true
,
true
);
test_reshape
({
1
,
232
,
14
,
14
},
{
-
1
,
2
,
116
,
14
,
14
},
{},
true
,
true
);
#endif
}
}
// namespace bridges
}
// namespace npu
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_OP
(
reshape
);
USE_NPU_BRIDGE
(
reshape
);
USE_LITE_OP
(
reshape2
);
USE_NPU_BRIDGE
(
reshape2
);
lite/kernels/npu/bridges/softmax_op_test.cc
已删除
100644 → 0
浏览文件 @
124c43a0
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/softmax_op.h"
#include <gtest/gtest.h>
#include "lite/core/op_registry.h"
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/test_helper.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
npu
{
namespace
bridges
{
template
<
typename
dtype
>
void
softmax_ref
(
const
std
::
shared_ptr
<
operators
::
SoftmaxOp
>
op
)
{
Scope
*
scope
=
op
->
scope
();
const
OpInfo
*
op_info
=
op
->
op_info
();
auto
x
=
scope
->
FindVar
(
op_info
->
Input
(
"X"
).
front
())
->
GetMutable
<
Tensor
>
();
auto
out
=
scope
->
FindVar
(
op_info
->
Output
(
"Out"
).
front
())
->
GetMutable
<
Tensor
>
();
auto
x_data
=
x
->
data
<
dtype
>
();
auto
out_data
=
out
->
mutable_data
<
dtype
>
();
DDim
x_dims
=
x
->
dims
();
auto
x_rank
=
x_dims
.
size
();
int
axis
=
op_info
->
GetAttr
<
int
>
(
"axis"
);
if
(
axis
<
0
)
{
axis
+=
x_rank
;
}
int
axis_size
=
x_dims
[
axis
];
int
outer_num
=
x_dims
.
Slice
(
0
,
axis
).
production
();
int
inner_num
=
x_dims
.
Slice
(
axis
+
1
,
x_rank
).
production
();
int
compute_size
=
outer_num
*
inner_num
;
for
(
int
i
=
0
;
i
<
compute_size
;
i
++
)
{
int
idx_inner
=
i
%
inner_num
;
int
idx_outer
=
(
i
/
inner_num
)
*
axis_size
;
int
start
=
idx_outer
*
inner_num
+
idx_inner
;
int
offset
;
offset
=
start
;
dtype
max_data
=
std
::
numeric_limits
<
dtype
>::
lowest
();
for
(
int
j
=
0
;
j
<
axis_size
;
j
++
)
{
max_data
=
x_data
[
offset
]
>
max_data
?
x_data
[
offset
]
:
max_data
;
offset
+=
inner_num
;
}
offset
=
start
;
dtype
sum_data
=
(
dtype
)
0
;
for
(
int
j
=
0
;
j
<
axis_size
;
j
++
)
{
out_data
[
offset
]
=
exp
(
x_data
[
offset
]
-
max_data
);
sum_data
+=
out_data
[
offset
];
offset
+=
inner_num
;
}
offset
=
start
;
for
(
int
j
=
0
;
j
<
axis_size
;
j
++
)
{
out_data
[
offset
]
/=
sum_data
;
offset
+=
inner_num
;
}
}
}
void
test_softmax
(
const
std
::
vector
<
int64_t
>&
input_shape
,
int
axis
)
{
// prepare input&output variables
Scope
scope
;
std
::
string
x_var_name
=
"x"
;
std
::
string
out_var_name
=
"out"
;
std
::
string
out_ref_var_name
=
"out_ref"
;
auto
*
x
=
scope
.
Var
(
x_var_name
)
->
GetMutable
<
Tensor
>
();
auto
*
out
=
scope
.
Var
(
out_var_name
)
->
GetMutable
<
Tensor
>
();
auto
*
out_ref
=
scope
.
Var
(
out_ref_var_name
)
->
GetMutable
<
Tensor
>
();
x
->
Resize
(
input_shape
);
// initialize input&output data
FillTensor
<
float
>
(
x
);
// initialize op desc
cpp
::
OpDesc
opdesc
;
opdesc
.
SetType
(
"softmax"
);
opdesc
.
SetInput
(
"X"
,
{
x_var_name
});
opdesc
.
SetOutput
(
"Out"
,
{
out_var_name
});
opdesc
.
SetAttr
(
"axis"
,
axis
);
// create and convert op to NPU model, then run it on NPU
auto
op
=
CreateOp
<
operators
::
SoftmaxOp
>
(
opdesc
,
&
scope
);
LauchOp
(
op
,
{
x_var_name
},
{
out_var_name
});
out_ref
->
CopyDataFrom
(
*
out
);
// execute reference implementation and save to output tensor
softmax_ref
<
float
>
(
op
);
// compare results
auto
*
out_data
=
out
->
mutable_data
<
float
>
();
auto
*
out_ref_data
=
out_ref
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
out
->
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
out_ref_data
[
i
],
1e-2
);
}
}
TEST
(
NPUBridges
,
softmax
)
{
test_softmax
({
1
,
4
},
-
1
);
// Bug exists in HiAI DDK when the number of items > 16500
// test_softmax({1, 16500}, -1);
test_softmax
({
1
,
4
},
0
);
test_softmax
({
1
,
4
},
1
);
test_softmax
({
3
,
4
},
-
1
);
test_softmax
({
3
,
4
},
0
);
test_softmax
({
3
,
4
},
1
);
test_softmax
({
1
,
4
,
7
},
-
1
);
test_softmax
({
1
,
4
,
7
},
0
);
// Bug exists in HiAI DDK when axis is 1 and iw > 1
// test_softmax({1, 4, 7}, 1);
test_softmax
({
1
,
4
,
1
},
1
);
test_softmax
({
1
,
4
,
7
},
2
);
test_softmax
({
3
,
4
,
7
},
-
1
);
test_softmax
({
3
,
4
,
7
},
0
);
test_softmax
({
3
,
4
,
1
},
1
);
test_softmax
({
3
,
4
,
7
},
2
);
test_softmax
({
1
,
4
,
7
,
9
},
-
1
);
test_softmax
({
1
,
4
,
7
,
9
},
0
);
test_softmax
({
1
,
4
,
7
,
9
},
1
);
// Bug exists in HiAI DDK when axis is 2 and iw > 1
// test_softmax({1, 4, 7, 9}, 2);
test_softmax
({
1
,
4
,
7
,
1
},
2
);
test_softmax
({
1
,
4
,
7
,
9
},
3
);
test_softmax
({
3
,
4
,
7
,
9
},
-
1
);
test_softmax
({
3
,
4
,
7
,
9
},
0
);
test_softmax
({
3
,
4
,
7
,
9
},
1
);
test_softmax
({
3
,
4
,
7
,
1
},
2
);
test_softmax
({
3
,
4
,
7
,
9
},
3
);
}
}
// namespace bridges
}
// namespace npu
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_OP
(
softmax
);
USE_NPU_BRIDGE
(
softmax
);
lite/tests/kernels/CMakeLists.txt
浏览文件 @
69ad4b80
...
...
@@ -2,21 +2,21 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_
lite_cc_test
(
test_kernel_conv_compute SRCS conv_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_conv_transpose_compute SRCS conv_transpose_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_scale_compute SRCS scale_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_power_compute SRCS power_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_yolo_box_compute SRCS yolo_box_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_power_compute SRCS power_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_shuffle_channel_compute SRCS shuffle_channel_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_yolo_box_compute SRCS yolo_box_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_fc_compute SRCS fc_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_elementwise_compute SRCS elementwise_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_lrn_compute SRCS lrn_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_decode_bboxes_compute SRCS decode_bboxes_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_box_coder_compute SRCS box_coder_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_activation_compute SRCS activation_compute_test.cc DEPS arena_framework
${
npu_kernels
}
${
x
pu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_argmax_compute SRCS argmax_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_axpy_compute SRCS axpy_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_norm_compute SRCS norm_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_cast_compute SRCS cast_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_lrn_compute SRCS lrn_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_decode_bboxes_compute SRCS decode_bboxes_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_box_coder_compute SRCS box_coder_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_activation_compute SRCS activation_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
n
pu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_argmax_compute SRCS argmax_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_axpy_compute SRCS axpy_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_norm_compute SRCS norm_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_cast_compute SRCS cast_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_instance_norm_compute SRCS instance_norm_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_grid_sampler_compute SRCS grid_sampler_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_grid_sampler_compute SRCS grid_sampler_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
#lite_cc_test(test_kernel_sequence_softmax_compute SRCS sequence_softmax_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_im2sequence_compute SRCS im2sequence_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_compare_compute SRCS compare_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
...
...
@@ -31,28 +31,28 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_BM) AND (LITE_
lite_cc_test
(
test_kernel_layer_norm_compute SRCS layer_norm_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_dropout_compute SRCS dropout_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_softmax_compute SRCS softmax_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_mul_compute SRCS mul_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_batch_norm_compute SRCS batch_norm_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_pool_compute SRCS pool_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
if
(
LITE_BUILD_EXTRA
)
lite_cc_test
(
test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework
${
x86_kernels
}
${
bm_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS
${
bm_kernels
}
arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework
${
bm_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework
${
bm_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_assign_value_compute SRCS assign_value_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
bm_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_box_clip_compute SRCS box_clip_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_reduce_mean_compute SRCS reduce_mean_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_reduce_prod_compute SRCS reduce_prod_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
bm_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS
${
bm_kernels
}
arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework
${
bm_kernels
}
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_unsqueeze_compute SRCS unsqueeze_compute_test.cc DEPS arena_framework
${
bm_kernels
}
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_assign_compute SRCS assign_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_assign_value_compute SRCS assign_value_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
bm_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_box_clip_compute SRCS box_clip_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_reduce_mean_compute SRCS reduce_mean_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_reduce_prod_compute SRCS reduce_prod_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_stack_compute SRCS stack_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_range_compute SRCS range_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_affine_channel_compute SRCS affine_channel_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_range_compute SRCS range_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_affine_channel_compute SRCS affine_channel_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_anchor_generator_compute SRCS anchor_generator_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
#lite_cc_test(test_kernel_generate_proposals_compute SRCS generate_proposals_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_roi_align_compute SRCS roi_align_compute_test.cc DEPS arena_framework ${x86_kernels} ${cuda_kernels} ${bm_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test
(
test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
bm_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework
${
x86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_search_aligned_mat_mul_compute SRCS search_aligned_mat_mul_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
bm_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_search_seq_fc_compute SRCS search_seq_fc_compute_test.cc DEPS arena_framework
${
x
pu_kernels
}
${
npu_kernels
}
${
x
86_kernels
}
${
cuda_kernels
}
${
bm_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_lookup_table_compute SRCS lookup_table_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
bm_kernels
}
${
x86_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
lite_cc_test
(
test_kernel_gather_compute SRCS gather_compute_test.cc DEPS arena_framework
${
xpu_kernels
}
${
npu_kernels
}
${
x86_kernels
}
${
bm_kernels
}
${
cuda_kernels
}
${
arm_kernels
}
${
lite_ops
}
${
host_kernels
}
)
endif
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录