Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
cf027d49
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
cf027d49
编写于
6月 12, 2019
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
use arm kernel
上级
926ab88e
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
62 addition
and
108 deletion
+62
-108
paddle/fluid/lite/kernels/arm/CMakeLists.txt
paddle/fluid/lite/kernels/arm/CMakeLists.txt
+2
-1
paddle/fluid/lite/kernels/arm/fc_compute.cc
paddle/fluid/lite/kernels/arm/fc_compute.cc
+0
-4
paddle/fluid/lite/kernels/arm/fc_compute.h
paddle/fluid/lite/kernels/arm/fc_compute.h
+0
-3
paddle/fluid/lite/kernels/arm/mul_compute.cc
paddle/fluid/lite/kernels/arm/mul_compute.cc
+34
-38
paddle/fluid/lite/kernels/arm/mul_compute.h
paddle/fluid/lite/kernels/arm/mul_compute.h
+2
-40
paddle/fluid/lite/kernels/arm/mul_compute_test.cc
paddle/fluid/lite/kernels/arm/mul_compute_test.cc
+24
-22
未找到文件。
paddle/fluid/lite/kernels/arm/CMakeLists.txt
浏览文件 @
cf027d49
...
...
@@ -6,7 +6,7 @@ message(STATUS "compile with lite ARM kernels")
cc_library
(
fc_compute_arm SRCS fc_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
relu_compute_arm SRCS relu_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
mul_compute_arm SRCS mul_compute.cc DEPS
${
lite_kernel_deps
}
eigen3
)
cc_library
(
mul_compute_arm SRCS mul_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
scale_compute_arm SRCS scale_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
softmax_compute_arm SRCS softmax_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
cc_library
(
conv_compute_arm SRCS conv_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
...
...
@@ -19,6 +19,7 @@ lite_cc_test(test_softmax_compute_arm SRCS softmax_compute_test.cc DEPS softmax_
lite_cc_test
(
test_conv_compute_arm SRCS conv_compute_test.cc DEPS conv_compute_arm
)
lite_cc_test
(
test_elementwise_add_compute_arm SRCS elementwise_add_compute_test.cc DEPS elementwise_add_compute_arm
)
lite_cc_test
(
test_pool_compute_arm SRCS pool_compute_test.cc DEPS pool_compute_arm
)
lite_cc_test
(
test_mul_compute_arm SRCS mul_compute_test.cc DEPS mul_compute_arm
)
set
(
arm_kernels
fc_compute_arm
...
...
paddle/fluid/lite/kernels/arm/fc_compute.cc
浏览文件 @
cf027d49
...
...
@@ -63,10 +63,6 @@ void FcCompute::Run() {
}
}
TargetType
FcCompute
::
target
()
const
{
return
TARGET
(
kARM
);
}
PrecisionType
FcCompute
::
precision
()
const
{
return
PRECISION
(
kFloat
);
}
}
// namespace arm
}
// namespace kernels
}
// namespace lite
...
...
paddle/fluid/lite/kernels/arm/fc_compute.h
浏览文件 @
cf027d49
...
...
@@ -29,9 +29,6 @@ class FcCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
void
Run
()
override
;
TargetType
target
()
const
override
;
PrecisionType
precision
()
const
override
;
virtual
~
FcCompute
()
=
default
;
};
...
...
paddle/fluid/lite/kernels/arm/mul_compute.cc
浏览文件 @
cf027d49
...
...
@@ -12,57 +12,53 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include
<Eigen/Core>
#include "paddle/fluid/lite/
core/kernel
.h"
#include
"paddle/fluid/lite/kernels/arm/mul_compute.h"
#include "paddle/fluid/lite/
arm/math/funcs
.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type
s
.h"
#include "paddle/fluid/lite/core/type
_system
.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
template
<
typename
T
>
void
mul_compute_eigen
(
const
T
*
x
,
int
x_h
,
int
x_w
,
const
T
*
y
,
int
y_h
,
int
y_w
,
T
*
out
)
{
using
matrix_t
=
Eigen
::
Matrix
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
Eigen
::
RowMajor
>
;
void
MulCompute
::
PrepareForRun
()
{
// TODO(TJ): transpose x or y if necessary
}
Eigen
::
Map
<
const
matrix_t
>
X
(
x
,
x_h
,
x_w
);
Eigen
::
Map
<
const
matrix_t
>
Y
(
y
,
y_h
,
y_w
);
Eigen
::
Map
<
matrix_t
>
Out
(
out
,
x_h
,
y_w
);
void
MulCompute
::
Run
()
{
auto
&
param
=
Param
<
param_t
>
();
Out
=
X
*
Y
;
}
const
auto
*
x_data
=
param
.
x
->
data
<
float
>
();
const
auto
*
y_data
=
param
.
y
->
data
<
float
>
();
auto
*
o_data
=
param
.
output
->
mutable_data
<
float
>
();
class
MulCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
MulParam
;
int
x_h
=
static_cast
<
int
>
(
param
.
x
->
dims
().
Slice
(
0
,
param
.
x_num_col_dims
).
production
());
int
x_w
=
static_cast
<
int
>
(
param
.
x
->
dims
()
.
Slice
(
param
.
x_num_col_dims
,
param
.
x
->
dims
().
size
())
.
production
());
int
y_h
=
static_cast
<
int
>
(
param
.
y
->
dims
().
Slice
(
0
,
param
.
y_num_col_dims
).
production
());
int
y_w
=
static_cast
<
int
>
(
param
.
y
->
dims
()
.
Slice
(
param
.
y_num_col_dims
,
param
.
y
->
dims
().
size
())
.
production
());
void
Run
()
override
{
auto
&
param
=
Param
<
operators
::
MulParam
>
();
core
::
dim2
x_shape
(
{
static_cast
<
int
>
(
param
.
x
->
dims
().
Slice
(
0
,
param
.
x_num_col_dims
).
production
()),
static_cast
<
int
>
(
param
.
x
->
dims
()
.
Slice
(
param
.
x_num_col_dims
,
param
.
x
->
dims
().
size
())
.
production
())});
core
::
dim2
y_shape
(
{
static_cast
<
int
>
(
param
.
y
->
dims
().
Slice
(
0
,
param
.
y_num_col_dims
).
production
()),
static_cast
<
int
>
(
param
.
y
->
dims
()
.
Slice
(
param
.
y_num_col_dims
,
param
.
y
->
dims
().
size
())
.
production
())});
CHECK_EQ
(
x_w
,
y_h
)
<<
"x_w must be equal with y_h"
;
if
(
y_w
==
1
||
x_h
==
1
)
{
lite
::
arm
::
math
::
sgemv
(
x_data
,
y_data
,
o_data
,
false
,
x_h
,
x_w
,
false
,
nullptr
,
false
);
mul_compute_eigen
(
param
.
x
->
data
<
float
>
(),
x_shape
.
x
,
x_shape
.
y
,
//
param
.
y
->
data
<
float
>
(),
y_shape
.
x
,
y_shape
.
y
,
//
param
.
output
->
mutable_data
<
float
>
());
}
}
else
{
constexpr
bool
is_tranposed_y
=
false
;
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
virtual
~
MulCompute
()
=
default
;
};
lite
::
arm
::
math
::
sgemm_prepack
(
x_data
,
y_data
,
nullptr
,
o_data
,
x_h
,
y_w
,
x_w
,
false
,
false
,
is_tranposed_y
,
&
ctx
);
}
}
}
// namespace arm
}
// namespace kernels
...
...
paddle/fluid/lite/kernels/arm/mul_compute.h
浏览文件 @
cf027d49
...
...
@@ -22,44 +22,13 @@ namespace lite {
namespace
kernels
{
namespace
arm
{
template
<
typename
T
>
void
mul_compute_eigen
(
const
T
*
x
,
int
x_h
,
int
x_w
,
const
T
*
y
,
int
y_h
,
int
y_w
,
T
*
out
)
{
using
matrix_t
=
Eigen
::
Matrix
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
Eigen
::
RowMajor
>
;
Eigen
::
Map
<
const
matrix_t
>
X
(
x
,
x_h
,
x_w
);
Eigen
::
Map
<
const
matrix_t
>
Y
(
y
,
y_h
,
y_w
);
Eigen
::
Map
<
matrix_t
>
Out
(
out
,
x_h
,
y_w
);
Out
=
X
*
Y
;
}
class
MulCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
MulParam
;
void
Run
()
override
{
auto
&
param
=
Param
<
operators
::
MulParam
>
();
core
::
dim2
x_shape
(
{
static_cast
<
int
>
(
param
.
x
->
dims
().
Slice
(
0
,
param
.
x_num_col_dims
).
production
()),
static_cast
<
int
>
(
param
.
x
->
dims
()
.
Slice
(
param
.
x_num_col_dims
,
param
.
x
->
dims
().
size
())
.
production
())});
core
::
dim2
y_shape
(
{
static_cast
<
int
>
(
param
.
y
->
dims
().
Slice
(
0
,
param
.
y_num_col_dims
).
production
()),
static_cast
<
int
>
(
param
.
y
->
dims
()
.
Slice
(
param
.
y_num_col_dims
,
param
.
y
->
dims
().
size
())
.
production
())});
void
PrepareForRun
()
override
;
mul_compute_eigen
(
param
.
x
->
data
<
float
>
(),
x_shape
.
x
,
x_shape
.
y
,
//
param
.
y
->
data
<
float
>
(),
y_shape
.
x
,
y_shape
.
y
,
//
param
.
output
->
mutable_data
<
float
>
());
}
void
Run
()
override
;
virtual
~
MulCompute
()
=
default
;
};
...
...
@@ -68,10 +37,3 @@ class MulCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
mul
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
MulCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
paddle/fluid/lite/kernels/arm/mul_compute_test.cc
浏览文件 @
cf027d49
...
...
@@ -12,31 +12,33 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/mul_compute.h"
#include <gtest/gtest.h>
#include <memory>
#include <utility>
#include <vector>
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/kernels/arm/fc_compute.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
TEST
(
fc
_arm
,
retrive_op
)
{
auto
fc
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"
fc
"
);
ASSERT_FALSE
(
fc
.
empty
());
ASSERT_TRUE
(
fc
.
front
());
TEST
(
mul
_arm
,
retrive_op
)
{
auto
mul
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"
mul
"
);
ASSERT_FALSE
(
mul
.
empty
());
ASSERT_TRUE
(
mul
.
front
());
}
TEST
(
fc
_arm
,
init
)
{
FcCompute
fc
;
ASSERT_EQ
(
fc
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
fc
.
target
(),
TARGET
(
kARM
));
TEST
(
mul
_arm
,
init
)
{
FcCompute
mul
;
ASSERT_EQ
(
mul
.
precision
(),
PRECISION
(
kFloat
));
ASSERT_EQ
(
mul
.
target
(),
TARGET
(
kARM
));
}
TEST
(
fc
_arm
,
compare_test
)
{
TEST
(
mul
_arm
,
compare_test
)
{
lite
::
Tensor
x
,
w
,
b
,
out
,
ref
;
constexpr
int
batch_size
=
2
;
x
.
Resize
({
batch_size
,
3
});
...
...
@@ -65,8 +67,8 @@ TEST(fc_arm, compare_test) {
w_data
,
3
,
4
,
//
b_data
,
ref_data
);
//
fc
compute kernel
FcCompute
fc
;
//
mul
compute kernel
FcCompute
mul
;
operators
::
FcParam
param
;
param
.
in_num_col_dims
=
1
;
...
...
@@ -79,9 +81,9 @@ TEST(fc_arm, compare_test) {
DeviceInfo
::
Init
();
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
fc
.
SetParam
(
param
);
fc
.
SetContext
(
std
::
move
(
ctx
));
fc
.
Run
();
mul
.
SetParam
(
param
);
mul
.
SetContext
(
std
::
move
(
ctx
));
mul
.
Run
();
VLOG
(
3
)
<<
"output vs ref"
;
for
(
int
i
=
0
;
i
<
out
.
dims
().
product
();
i
++
)
{
...
...
@@ -93,8 +95,8 @@ TEST(fc_arm, compare_test) {
}
}
TEST
(
fc
_arm
,
num_col_dims
)
{
FcCompute
fc
;
TEST
(
mul
_arm
,
num_col_dims
)
{
FcCompute
mul
;
operators
::
FcParam
param
;
lite
::
Tensor
x
;
...
...
@@ -136,9 +138,9 @@ TEST(fc_arm, num_col_dims) {
ctx
->
As
<
ARMContext
>
();
DeviceInfo
::
Init
();
fc
.
SetParam
(
param
);
fc
.
SetContext
(
std
::
move
(
ctx
));
fc
.
Run
();
mul
.
SetParam
(
param
);
mul
.
SetContext
(
std
::
move
(
ctx
));
mul
.
Run
();
}
}
// namespace arm
...
...
@@ -146,4 +148,4 @@ TEST(fc_arm, num_col_dims) {
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
fc
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kARM
,
kFloat
,
kNCHW
,
def
);
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录