Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
7ee1308a
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7ee1308a
编写于
6月 15, 2019
作者:
C
Chunwei
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'tangjian/incubate/lite' into 'incubate/lite'
refine fc prepare and test See merge request inference/paddlelite!5
上级
5f833603
6e5a7c6b
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
166 addition
and
114 deletion
+166
-114
paddle/fluid/lite/kernels/arm/fc_compute.cc
paddle/fluid/lite/kernels/arm/fc_compute.cc
+35
-16
paddle/fluid/lite/kernels/arm/fc_compute.h
paddle/fluid/lite/kernels/arm/fc_compute.h
+9
-1
paddle/fluid/lite/kernels/arm/fc_compute_test.cc
paddle/fluid/lite/kernels/arm/fc_compute_test.cc
+122
-97
未找到文件。
paddle/fluid/lite/kernels/arm/fc_compute.cc
浏览文件 @
7ee1308a
...
@@ -23,10 +23,6 @@ namespace kernels {
...
@@ -23,10 +23,6 @@ namespace kernels {
namespace
arm
{
namespace
arm
{
void
FcCompute
::
PrepareForRun
()
{
void
FcCompute
::
PrepareForRun
()
{
// TODO(TJ): transpose weight
}
void
FcCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
operators
::
FcParam
>
();
auto
&
param
=
this
->
Param
<
operators
::
FcParam
>
();
auto
x_dims
=
param
.
input
->
dims
();
auto
x_dims
=
param
.
input
->
dims
();
auto
w_dims
=
param
.
w
->
dims
();
auto
w_dims
=
param
.
w
->
dims
();
...
@@ -35,29 +31,52 @@ void FcCompute::Run() {
...
@@ -35,29 +31,52 @@ void FcCompute::Run() {
CHECK_EQ
(
w_dims
.
size
(),
2UL
);
CHECK_EQ
(
w_dims
.
size
(),
2UL
);
CHECK_EQ
(
param
.
output
->
dims
().
size
(),
2UL
);
CHECK_EQ
(
param
.
output
->
dims
().
size
(),
2UL
);
m_
=
x_dims
.
Slice
(
0
,
param
.
in_num_col_dims
).
production
();
k_
=
x_dims
.
Slice
(
param
.
in_num_col_dims
,
x_dims
.
size
()).
production
();
n_
=
w_dims
[
1
];
CHECK_EQ
(
k_
,
static_cast
<
int
>
(
w_dims
[
0
]));
if
(
m_
==
1
)
{
if
(
!
transed_weight_
)
{
transed_weight_
=
new
Tensor
;
}
transed_weight_
->
Resize
({
n_
,
k_
});
const
auto
*
w_data
=
param
.
w
->
data
<
float
>
();
auto
*
t_data
=
transed_weight_
->
mutable_data
<
float
>
();
int
i
=
0
;
for
(
int
nn
=
0
;
nn
<
n_
;
++
nn
)
{
for
(
int
kk
=
0
;
kk
<
k_
;
++
kk
)
{
t_data
[
i
++
]
=
w_data
[
kk
*
n_
+
nn
];
}
}
}
}
void
FcCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
operators
::
FcParam
>
();
const
auto
*
i_data
=
param
.
input
->
data
<
float
>
();
const
auto
*
i_data
=
param
.
input
->
data
<
float
>
();
const
auto
*
w_data
=
param
.
w
->
data
<
float
>
();
const
auto
*
w_data
=
param
.
w
->
data
<
float
>
();
const
auto
*
b_data
=
param
.
bias
?
param
.
bias
->
data
<
float
>
()
:
nullptr
;
const
auto
*
b_data
=
param
.
bias
?
param
.
bias
->
data
<
float
>
()
:
nullptr
;
auto
*
o_data
=
param
.
output
->
mutable_data
<
float
>
();
auto
*
o_data
=
param
.
output
->
mutable_data
<
float
>
();
int
x_h
=
x_dims
.
Slice
(
0
,
param
.
in_num_col_dims
).
production
();
int
x_w
=
x_dims
.
Slice
(
param
.
in_num_col_dims
,
x_dims
.
size
()).
production
();
int
n
=
w_dims
[
1
];
CHECK_EQ
(
x_w
,
static_cast
<
int
>
(
w_dims
[
0
]));
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
if
(
x_h
>
1
)
{
if
(
m_
>
1
)
{
float
*
packed_in
=
static_cast
<
float
*>
(
ctx
.
workspace_data
<
float
>
())
+
float
*
packed_in
=
static_cast
<
float
*>
(
ctx
.
workspace_data
<
float
>
())
+
ctx
.
l2_cache_size
()
/
sizeof
(
float
);
ctx
.
l2_cache_size
()
/
sizeof
(
float
);
lite
::
arm
::
math
::
prepackA
(
packed_in
,
i_data
,
x_w
,
0
,
x_h
,
0
,
x_w
,
false
,
lite
::
arm
::
math
::
prepackA
(
packed_in
,
i_data
,
k_
,
0
,
m_
,
0
,
k_
,
false
,
&
ctx
);
&
ctx
);
lite
::
arm
::
math
::
sgemm_prepack
(
packed_in
,
w_data
,
b_data
,
o_data
,
m_
,
n_
,
lite
::
arm
::
math
::
sgemm_prepack
(
packed_in
,
w_data
,
b_data
,
o_data
,
x_h
,
n
,
k_
,
false
,
false
,
false
,
&
ctx
);
x_w
,
false
,
false
,
false
,
&
ctx
);
if
(
param
.
bias
)
{
if
(
param
.
bias
)
{
CHECK_EQ
(
param
.
bias
->
numel
(),
n
);
CHECK_EQ
(
param
.
bias
->
numel
(),
n
_
);
lite
::
arm
::
math
::
fill_bias_fc
(
o_data
,
b_data
,
x_h
,
n
);
lite
::
arm
::
math
::
fill_bias_fc
(
o_data
,
b_data
,
m_
,
n_
);
}
}
}
else
{
}
else
{
lite
::
arm
::
math
::
sgemv
(
w_data
,
i_data
,
o_data
,
false
,
n
,
x_w
,
CHECK
(
transed_weight_
);
const
auto
*
t_data
=
transed_weight_
->
data
<
float
>
();
lite
::
arm
::
math
::
sgemv
(
t_data
,
i_data
,
o_data
,
false
,
n_
,
k_
,
b_data
!=
nullptr
,
b_data
,
false
);
b_data
!=
nullptr
,
b_data
,
false
);
}
}
}
}
...
...
paddle/fluid/lite/kernels/arm/fc_compute.h
浏览文件 @
7ee1308a
...
@@ -29,7 +29,15 @@ class FcCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
...
@@ -29,7 +29,15 @@ class FcCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
void
Run
()
override
;
void
Run
()
override
;
virtual
~
FcCompute
()
=
default
;
~
FcCompute
()
override
{
if
(
transed_weight_
)
{
delete
transed_weight_
;
}
};
private:
lite
::
Tensor
*
transed_weight_
{
nullptr
};
int
m_
,
n_
,
k_
;
};
};
}
// namespace arm
}
// namespace arm
...
...
paddle/fluid/lite/kernels/arm/fc_compute_test.cc
浏览文件 @
7ee1308a
...
@@ -14,6 +14,11 @@
...
@@ -14,6 +14,11 @@
#include "paddle/fluid/lite/kernels/arm/fc_compute.h"
#include "paddle/fluid/lite/kernels/arm/fc_compute.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <algorithm>
#include <iostream>
#include <memory>
#include <random>
#include <utility>
#include <vector>
#include <vector>
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/op_registry.h"
...
@@ -23,6 +28,17 @@ namespace lite {
...
@@ -23,6 +28,17 @@ namespace lite {
namespace
kernels
{
namespace
kernels
{
namespace
arm
{
namespace
arm
{
template
<
typename
T
>
void
FillData
(
T
*
a
,
const
int
n
,
const
T
lower
=
static_cast
<
T
>
(
-
2.
f
),
const
T
upper
=
static_cast
<
T
>
(
2.
f
))
{
static
unsigned
int
seed
=
100
;
std
::
mt19937
rng
(
seed
++
);
std
::
uniform_real_distribution
<
double
>
uniform_dist
(
0
,
1
);
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
a
[
i
]
=
static_cast
<
T
>
(
uniform_dist
(
rng
)
*
(
upper
-
lower
)
+
lower
);
}
}
TEST
(
fc_arm
,
retrive_op
)
{
TEST
(
fc_arm
,
retrive_op
)
{
auto
fc
=
auto
fc
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"fc"
);
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"fc"
);
...
@@ -37,108 +53,117 @@ TEST(fc_arm, init) {
...
@@ -37,108 +53,117 @@ TEST(fc_arm, init) {
}
}
TEST
(
fc_arm
,
compare_test
)
{
TEST
(
fc_arm
,
compare_test
)
{
lite
::
Tensor
x
,
w
,
b
,
out
,
ref
;
using
T
=
float
;
constexpr
int
batch_size
=
2
;
x
.
Resize
({
batch_size
,
3
});
for
(
int
m
:
{
1
,
2
,
3
,
4
})
{
w
.
Resize
({
3
,
4
});
for
(
int
n
:
{
1
,
2
,
3
,
4
})
{
b
.
Resize
({
1
,
4
});
for
(
int
k
:
{
1
,
2
,
3
,
4
})
{
out
.
Resize
({
batch_size
,
4
});
for
(
bool
with_bias
:
{
true
,
false
})
{
ref
.
Resize
({
batch_size
,
4
});
VLOG
(
3
)
<<
"m: "
<<
m
<<
", n: "
<<
n
<<
", k: "
<<
k
<<
(
with_bias
?
", with bias"
:
""
);
auto
x_data
=
x
.
mutable_data
<
float
>
();
lite
::
Tensor
x
,
w
,
b
,
out
,
ref
;
auto
w_data
=
w
.
mutable_data
<
float
>
();
auto
b_data
=
b
.
mutable_data
<
float
>
();
x
.
Resize
({
m
,
k
});
auto
out_data
=
out
.
mutable_data
<
float
>
();
w
.
Resize
({
k
,
n
});
auto
ref_data
=
ref
.
mutable_data
<
float
>
();
b
.
Resize
({
1
,
n
});
out
.
Resize
({
m
,
n
});
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
product
();
i
++
)
{
ref
.
Resize
({
m
,
n
});
x_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
auto
*
x_data
=
x
.
mutable_data
<
T
>
();
for
(
int64_t
i
=
0
;
i
<
w
.
dims
().
product
();
i
++
)
{
auto
*
w_data
=
w
.
mutable_data
<
T
>
();
w_data
[
i
]
=
static_cast
<
float
>
(
i
);
auto
*
b_data
=
with_bias
?
b
.
mutable_data
<
T
>
()
:
nullptr
;
}
for
(
int64_t
i
=
0
;
i
<
b
.
dims
().
product
();
i
++
)
{
auto
*
out_data
=
out
.
mutable_data
<
T
>
();
b_data
[
i
]
=
static_cast
<
float
>
(
i
);
auto
*
ref_data
=
ref
.
mutable_data
<
T
>
();
}
FillData
<
T
>
(
x_data
,
x
.
dims
().
production
());
lite
::
arm
::
math
::
fc_compute_eigen
(
x_data
,
batch_size
,
3
,
//
FillData
<
T
>
(
w_data
,
w
.
dims
().
production
());
w_data
,
3
,
4
,
//
FillData
<
T
>
(
out_data
,
out
.
dims
().
production
(),
0
,
0
);
b_data
,
ref_data
);
FillData
<
T
>
(
ref_data
,
ref
.
dims
().
production
(),
0
,
0
);
// fc compute kernel
if
(
with_bias
)
{
FcCompute
fc
;
FillData
<
T
>
(
b_data
,
b
.
dims
().
production
());
operators
::
FcParam
param
;
}
param
.
in_num_col_dims
=
1
;
FcCompute
fc
;
param
.
input
=
&
x
;
operators
::
FcParam
param
;
param
.
w
=
&
w
;
param
.
bias
=
&
b
;
param
.
input
=
&
x
;
param
.
output
=
&
out
;
param
.
w
=
&
w
;
param
.
in_mat_dims
=
x
.
dims
();
param
.
bias
=
with_bias
?
&
b
:
nullptr
;
param
.
output
=
&
out
;
DeviceInfo
::
Init
();
param
.
in_num_col_dims
=
1
;
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
param
.
in_mat_dims
=
x
.
dims
();
ctx
->
As
<
ARMContext
>
();
fc
.
SetParam
(
param
);
DeviceInfo
::
Init
();
fc
.
SetContext
(
std
::
move
(
ctx
));
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
fc
.
Run
();
ctx
->
As
<
ARMContext
>
();
fc
.
SetParam
(
param
);
VLOG
(
3
)
<<
"output vs ref"
;
fc
.
SetContext
(
std
::
move
(
ctx
));
for
(
int
i
=
0
;
i
<
out
.
dims
().
product
();
i
++
)
{
fc
.
PrepareForRun
();
VLOG
(
3
)
<<
out_data
[
i
]
<<
" vs "
<<
ref_data
[
i
];
fc
.
Run
();
}
lite
::
arm
::
math
::
fc_compute_eigen
(
x_data
,
m
,
k
,
w_data
,
k
,
n
,
b_data
,
for
(
int
i
=
0
;
i
<
out
.
dims
().
product
();
++
i
)
{
ref_data
);
EXPECT_NEAR
(
out_data
[
i
],
ref_data
[
i
],
1e-5
);
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
ref_data
[
i
],
1e-3
);
}
}
}
}
}
}
}
}
TEST
(
fc_arm
,
num_col_dims
)
{
TEST
(
fc_arm
,
num_col_dims
)
{
FcCompute
fc
;
using
T
=
float
;
operators
::
FcParam
param
;
for
(
bool
with_bias
:
{
true
,
false
})
{
lite
::
Tensor
x
;
lite
::
Tensor
x
,
w
,
b
,
out
,
ref
;
lite
::
Tensor
w
;
lite
::
Tensor
bias
;
x
.
Resize
({
1
,
2
,
3
});
lite
::
Tensor
output
;
w
.
Resize
({
3
,
4
});
b
.
Resize
({
1
,
4
});
x
.
Resize
({
1
,
2
,
3
});
out
.
Resize
({
2
,
4
});
w
.
Resize
({
3
,
4
});
ref
.
Resize
({
2
,
4
});
bias
.
Resize
({
1
,
4
});
output
.
Resize
({
2
,
4
});
auto
*
x_data
=
x
.
mutable_data
<
float
>
();
auto
*
w_data
=
w
.
mutable_data
<
float
>
();
auto
*
x_data
=
x
.
mutable_data
<
float
>
();
auto
*
b_data
=
with_bias
?
b
.
mutable_data
<
T
>
()
:
nullptr
;
auto
*
w_data
=
w
.
mutable_data
<
float
>
();
auto
*
bias_data
=
bias
.
mutable_data
<
float
>
();
auto
*
out_data
=
out
.
mutable_data
<
T
>
();
auto
*
output_data
=
output
.
mutable_data
<
float
>
();
auto
*
ref_data
=
ref
.
mutable_data
<
T
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
product
();
i
++
)
{
FillData
<
T
>
(
x_data
,
x
.
dims
().
production
());
x_data
[
i
]
=
static_cast
<
float
>
(
i
);
FillData
<
T
>
(
w_data
,
w
.
dims
().
production
());
}
FillData
<
T
>
(
out_data
,
out
.
dims
().
production
(),
0
,
0
);
for
(
int64_t
i
=
0
;
i
<
w
.
dims
().
product
();
i
++
)
{
FillData
<
T
>
(
ref_data
,
ref
.
dims
().
production
(),
0
,
0
);
w_data
[
i
]
=
static_cast
<
float
>
(
i
);
if
(
with_bias
)
{
FillData
<
T
>
(
b_data
,
b
.
dims
().
production
());
}
FcCompute
fc
;
operators
::
FcParam
param
;
param
.
input
=
&
x
;
param
.
w
=
&
w
;
param
.
bias
=
with_bias
?
&
b
:
nullptr
;
param
.
output
=
&
out
;
param
.
in_num_col_dims
=
2
;
param
.
in_mat_dims
=
x
.
dims
();
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
DeviceInfo
::
Init
();
fc
.
SetParam
(
param
);
fc
.
SetContext
(
std
::
move
(
ctx
));
fc
.
PrepareForRun
();
fc
.
Run
();
lite
::
arm
::
math
::
fc_compute_eigen
(
x_data
,
2
,
3
,
w_data
,
3
,
4
,
b_data
,
ref_data
);
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
ref_data
[
i
],
1e-3
);
}
}
}
for
(
int64_t
i
=
0
;
i
<
bias
.
dims
().
product
();
i
++
)
{
bias_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
for
(
int64_t
i
=
0
;
i
<
output
.
dims
().
product
();
i
++
)
{
output_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
param
.
in_num_col_dims
=
2
;
param
.
input
=
&
x
;
param
.
w
=
&
w
;
param
.
bias
=
&
bias
;
param
.
output
=
&
output
;
param
.
in_mat_dims
=
x
.
dims
();
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
DeviceInfo
::
Init
();
fc
.
SetParam
(
param
);
fc
.
SetContext
(
std
::
move
(
ctx
));
fc
.
Run
();
}
}
}
// namespace arm
}
// namespace arm
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录