Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
b69262cf
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b69262cf
编写于
6月 15, 2019
作者:
S
superjomn
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'incubate/lite' of
http://10.87.145.36/inference/paddlelite
into hongming/arm-fix
上级
89c48e60
8749f4fc
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
187 addition
and
124 deletion
+187
-124
.gitlab-ci.yml
.gitlab-ci.yml
+14
-4
CMakeLists.txt
CMakeLists.txt
+1
-0
paddle/fluid/lite/core/naive_test_model.py
paddle/fluid/lite/core/naive_test_model.py
+6
-6
paddle/fluid/lite/kernels/arm/fc_compute.cc
paddle/fluid/lite/kernels/arm/fc_compute.cc
+35
-16
paddle/fluid/lite/kernels/arm/fc_compute.h
paddle/fluid/lite/kernels/arm/fc_compute.h
+9
-1
paddle/fluid/lite/kernels/arm/fc_compute_test.cc
paddle/fluid/lite/kernels/arm/fc_compute_test.cc
+122
-97
未找到文件。
.gitlab-ci.yml
浏览文件 @
b69262cf
...
...
@@ -9,6 +9,8 @@ stages:
-
build_mobile
check:prebuilt:
tags
:
-
lite
stage
:
ci
script
:
#- pip3 install pre-commit
...
...
@@ -24,17 +26,21 @@ check:prebuilt:
-
/root/.cache
build:server:
tags
:
-
lite
image
:
$SERVER_LITE_DOCKER_IMAGE
stage
:
build_server
cache
:
key
:
server_thirdparty
paths
:
-
build/third_party
-
/root/.ccache
script
:
#- export http_proxy=http://172.19.57.45:3128
#- export https_proxy=http://172.19.57.45:3128
-
export http_proxy=http://agent.baidu.com:8118
-
export https_proxy=http://agent.baidu.com:8118
-
apt install ccache
-
export http_proxy=http://172.19.57.45:3128
-
export https_proxy=http://172.19.57.45:3128
#- export http_proxy=http://agent.baidu.com:8118
#- export https_proxy=http://agent.baidu.com:8118
-
mkdir -p build
-
cd build
-
../paddle/fluid/lite/tools/build.sh cmake_x86
...
...
@@ -49,6 +55,8 @@ build:server:
-
check:prebuilt
build:mobile:
tags
:
-
lite
stage
:
build_mobile
image
:
$MOBILE_LITE_DOCKER_IMAGE
cache
:
...
...
@@ -56,7 +64,9 @@ build:mobile:
paths
:
-
$MOBILE_LITE_CACHE0
-
$MOBILE_LITE_CACHE1
-
/root/.ccache
script
:
-
apt install ccache
-
export http_proxy=http://172.19.57.45:3128
-
export https_proxy=http://172.19.57.45:3128
-
./paddle/fluid/lite/tools/build.sh build_test_arm
...
...
CMakeLists.txt
浏览文件 @
b69262cf
...
...
@@ -166,6 +166,7 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
#include(external/zlib) # download, build, install gtest
include
(
external/protobuf
)
# download, build, install protobuf
include
(
external/eigen
)
# download eigen3
include
(
ccache
)
# set ccache for compilation
include
(
generic
)
# simplify cmake module
include
(
configure
)
# add paddle env configuration
...
...
paddle/fluid/lite/core/naive_test_model.py
浏览文件 @
b69262cf
...
...
@@ -18,10 +18,10 @@ import numpy as np
import
paddle.fluid
as
fluid
from
paddle.fluid.backward
import
append_backward
a
=
fluid
.
layers
.
data
(
name
=
"a"
,
shape
=
[
100
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
10
0
],
dtype
=
'float32'
)
a
=
fluid
.
layers
.
data
(
name
=
"a"
,
shape
=
[
2
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
10
],
dtype
=
'float32'
)
a1
=
fluid
.
layers
.
fc
(
input
=
a
,
size
=
500
,
act
=
None
,
bias_attr
=
False
)
a1
=
fluid
.
layers
.
fc
(
input
=
a
,
size
=
3
,
act
=
None
,
bias_attr
=
False
)
cost
=
fluid
.
layers
.
square_error_cost
(
a1
,
label
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
...
...
@@ -36,7 +36,7 @@ exe.run(fluid.default_startup_program())
with
open
(
'startup_program.pb'
,
'wb'
)
as
f
:
f
.
write
(
fluid
.
default_startup_program
().
desc
.
serialize_to_string
())
data_1
=
np
.
array
(
numpy
.
random
.
random
([
100
,
100
]),
dtype
=
'float32'
)
#
data_1 = np.array(numpy.random.random([100, 100]), dtype='float32')
#fluid.default_main_program().desc.
...
...
@@ -50,7 +50,7 @@ with open('main_program.pb', 'wb') as f:
#outs = exe.run(program=prog, feed={'a':data_1, }, fetch_list=[cost])
sys
.
exit
(
0
)
#
sys.exit(0)
fluid
.
io
.
save_inference_model
(
"./model2"
,
[
a
.
name
],
[
a1
],
exe
)
print
(
numpy
.
array
(
outs
))
#
print(numpy.array(outs))
paddle/fluid/lite/kernels/arm/fc_compute.cc
浏览文件 @
b69262cf
...
...
@@ -23,10 +23,6 @@ namespace kernels {
namespace
arm
{
void
FcCompute
::
PrepareForRun
()
{
// TODO(TJ): transpose weight
}
void
FcCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
operators
::
FcParam
>
();
auto
x_dims
=
param
.
input
->
dims
();
auto
w_dims
=
param
.
w
->
dims
();
...
...
@@ -35,29 +31,52 @@ void FcCompute::Run() {
CHECK_EQ
(
w_dims
.
size
(),
2UL
);
CHECK_EQ
(
param
.
output
->
dims
().
size
(),
2UL
);
m_
=
x_dims
.
Slice
(
0
,
param
.
in_num_col_dims
).
production
();
k_
=
x_dims
.
Slice
(
param
.
in_num_col_dims
,
x_dims
.
size
()).
production
();
n_
=
w_dims
[
1
];
CHECK_EQ
(
k_
,
static_cast
<
int
>
(
w_dims
[
0
]));
if
(
m_
==
1
)
{
if
(
!
transed_weight_
)
{
transed_weight_
=
new
Tensor
;
}
transed_weight_
->
Resize
({
n_
,
k_
});
const
auto
*
w_data
=
param
.
w
->
data
<
float
>
();
auto
*
t_data
=
transed_weight_
->
mutable_data
<
float
>
();
int
i
=
0
;
for
(
int
nn
=
0
;
nn
<
n_
;
++
nn
)
{
for
(
int
kk
=
0
;
kk
<
k_
;
++
kk
)
{
t_data
[
i
++
]
=
w_data
[
kk
*
n_
+
nn
];
}
}
}
}
void
FcCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
operators
::
FcParam
>
();
const
auto
*
i_data
=
param
.
input
->
data
<
float
>
();
const
auto
*
w_data
=
param
.
w
->
data
<
float
>
();
const
auto
*
b_data
=
param
.
bias
?
param
.
bias
->
data
<
float
>
()
:
nullptr
;
auto
*
o_data
=
param
.
output
->
mutable_data
<
float
>
();
int
x_h
=
x_dims
.
Slice
(
0
,
param
.
in_num_col_dims
).
production
();
int
x_w
=
x_dims
.
Slice
(
param
.
in_num_col_dims
,
x_dims
.
size
()).
production
();
int
n
=
w_dims
[
1
];
CHECK_EQ
(
x_w
,
static_cast
<
int
>
(
w_dims
[
0
]));
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
if
(
x_h
>
1
)
{
if
(
m_
>
1
)
{
float
*
packed_in
=
static_cast
<
float
*>
(
ctx
.
workspace_data
<
float
>
())
+
ctx
.
l2_cache_size
()
/
sizeof
(
float
);
lite
::
arm
::
math
::
prepackA
(
packed_in
,
i_data
,
x_w
,
0
,
x_h
,
0
,
x_w
,
false
,
&
ctx
);
lite
::
arm
::
math
::
sgemm_prepack
(
packed_in
,
w_data
,
b_data
,
o_data
,
x_h
,
n
,
x_w
,
false
,
false
,
false
,
&
ctx
);
lite
::
arm
::
math
::
prepackA
(
packed_in
,
i_data
,
k_
,
0
,
m_
,
0
,
k_
,
false
,
&
ctx
);
lite
::
arm
::
math
::
sgemm_prepack
(
packed_in
,
w_data
,
b_data
,
o_data
,
m_
,
n_
,
k_
,
false
,
false
,
false
,
&
ctx
);
if
(
param
.
bias
)
{
CHECK_EQ
(
param
.
bias
->
numel
(),
n
);
lite
::
arm
::
math
::
fill_bias_fc
(
o_data
,
b_data
,
x_h
,
n
);
CHECK_EQ
(
param
.
bias
->
numel
(),
n
_
);
lite
::
arm
::
math
::
fill_bias_fc
(
o_data
,
b_data
,
m_
,
n_
);
}
}
else
{
lite
::
arm
::
math
::
sgemv
(
w_data
,
i_data
,
o_data
,
false
,
n
,
x_w
,
CHECK
(
transed_weight_
);
const
auto
*
t_data
=
transed_weight_
->
data
<
float
>
();
lite
::
arm
::
math
::
sgemv
(
t_data
,
i_data
,
o_data
,
false
,
n_
,
k_
,
b_data
!=
nullptr
,
b_data
,
false
);
}
}
...
...
paddle/fluid/lite/kernels/arm/fc_compute.h
浏览文件 @
b69262cf
...
...
@@ -29,7 +29,15 @@ class FcCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
void
Run
()
override
;
virtual
~
FcCompute
()
=
default
;
~
FcCompute
()
override
{
if
(
transed_weight_
)
{
delete
transed_weight_
;
}
};
private:
lite
::
Tensor
*
transed_weight_
{
nullptr
};
int
m_
,
n_
,
k_
;
};
}
// namespace arm
...
...
paddle/fluid/lite/kernels/arm/fc_compute_test.cc
浏览文件 @
b69262cf
...
...
@@ -14,6 +14,11 @@
#include "paddle/fluid/lite/kernels/arm/fc_compute.h"
#include <gtest/gtest.h>
#include <algorithm>
#include <iostream>
#include <memory>
#include <random>
#include <utility>
#include <vector>
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/core/op_registry.h"
...
...
@@ -23,6 +28,17 @@ namespace lite {
namespace
kernels
{
namespace
arm
{
template
<
typename
T
>
void
FillData
(
T
*
a
,
const
int
n
,
const
T
lower
=
static_cast
<
T
>
(
-
2.
f
),
const
T
upper
=
static_cast
<
T
>
(
2.
f
))
{
static
unsigned
int
seed
=
100
;
std
::
mt19937
rng
(
seed
++
);
std
::
uniform_real_distribution
<
double
>
uniform_dist
(
0
,
1
);
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
a
[
i
]
=
static_cast
<
T
>
(
uniform_dist
(
rng
)
*
(
upper
-
lower
)
+
lower
);
}
}
TEST
(
fc_arm
,
retrive_op
)
{
auto
fc
=
KernelRegistry
::
Global
().
Create
<
TARGET
(
kARM
),
PRECISION
(
kFloat
)
>
(
"fc"
);
...
...
@@ -37,108 +53,117 @@ TEST(fc_arm, init) {
}
TEST
(
fc_arm
,
compare_test
)
{
lite
::
Tensor
x
,
w
,
b
,
out
,
ref
;
constexpr
int
batch_size
=
2
;
x
.
Resize
({
batch_size
,
3
});
w
.
Resize
({
3
,
4
});
b
.
Resize
({
1
,
4
});
out
.
Resize
({
batch_size
,
4
});
ref
.
Resize
({
batch_size
,
4
});
auto
x_data
=
x
.
mutable_data
<
float
>
();
auto
w_data
=
w
.
mutable_data
<
float
>
();
auto
b_data
=
b
.
mutable_data
<
float
>
();
auto
out_data
=
out
.
mutable_data
<
float
>
();
auto
ref_data
=
ref
.
mutable_data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
product
();
i
++
)
{
x_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
for
(
int64_t
i
=
0
;
i
<
w
.
dims
().
product
();
i
++
)
{
w_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
for
(
int64_t
i
=
0
;
i
<
b
.
dims
().
product
();
i
++
)
{
b_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
lite
::
arm
::
math
::
fc_compute_eigen
(
x_data
,
batch_size
,
3
,
//
w_data
,
3
,
4
,
//
b_data
,
ref_data
);
// fc compute kernel
FcCompute
fc
;
operators
::
FcParam
param
;
param
.
in_num_col_dims
=
1
;
param
.
input
=
&
x
;
param
.
w
=
&
w
;
param
.
bias
=
&
b
;
param
.
output
=
&
out
;
param
.
in_mat_dims
=
x
.
dims
();
DeviceInfo
::
Init
();
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
fc
.
SetParam
(
param
);
fc
.
SetContext
(
std
::
move
(
ctx
));
fc
.
Run
();
VLOG
(
3
)
<<
"output vs ref"
;
for
(
int
i
=
0
;
i
<
out
.
dims
().
product
();
i
++
)
{
VLOG
(
3
)
<<
out_data
[
i
]
<<
" vs "
<<
ref_data
[
i
];
}
for
(
int
i
=
0
;
i
<
out
.
dims
().
product
();
++
i
)
{
EXPECT_NEAR
(
out_data
[
i
],
ref_data
[
i
],
1e-5
);
using
T
=
float
;
for
(
int
m
:
{
1
,
2
,
3
,
4
})
{
for
(
int
n
:
{
1
,
2
,
3
,
4
})
{
for
(
int
k
:
{
1
,
2
,
3
,
4
})
{
for
(
bool
with_bias
:
{
true
,
false
})
{
VLOG
(
3
)
<<
"m: "
<<
m
<<
", n: "
<<
n
<<
", k: "
<<
k
<<
(
with_bias
?
", with bias"
:
""
);
lite
::
Tensor
x
,
w
,
b
,
out
,
ref
;
x
.
Resize
({
m
,
k
});
w
.
Resize
({
k
,
n
});
b
.
Resize
({
1
,
n
});
out
.
Resize
({
m
,
n
});
ref
.
Resize
({
m
,
n
});
auto
*
x_data
=
x
.
mutable_data
<
T
>
();
auto
*
w_data
=
w
.
mutable_data
<
T
>
();
auto
*
b_data
=
with_bias
?
b
.
mutable_data
<
T
>
()
:
nullptr
;
auto
*
out_data
=
out
.
mutable_data
<
T
>
();
auto
*
ref_data
=
ref
.
mutable_data
<
T
>
();
FillData
<
T
>
(
x_data
,
x
.
dims
().
production
());
FillData
<
T
>
(
w_data
,
w
.
dims
().
production
());
FillData
<
T
>
(
out_data
,
out
.
dims
().
production
(),
0
,
0
);
FillData
<
T
>
(
ref_data
,
ref
.
dims
().
production
(),
0
,
0
);
if
(
with_bias
)
{
FillData
<
T
>
(
b_data
,
b
.
dims
().
production
());
}
FcCompute
fc
;
operators
::
FcParam
param
;
param
.
input
=
&
x
;
param
.
w
=
&
w
;
param
.
bias
=
with_bias
?
&
b
:
nullptr
;
param
.
output
=
&
out
;
param
.
in_num_col_dims
=
1
;
param
.
in_mat_dims
=
x
.
dims
();
DeviceInfo
::
Init
();
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
fc
.
SetParam
(
param
);
fc
.
SetContext
(
std
::
move
(
ctx
));
fc
.
PrepareForRun
();
fc
.
Run
();
lite
::
arm
::
math
::
fc_compute_eigen
(
x_data
,
m
,
k
,
w_data
,
k
,
n
,
b_data
,
ref_data
);
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
ref_data
[
i
],
1e-3
);
}
}
}
}
}
}
TEST
(
fc_arm
,
num_col_dims
)
{
FcCompute
fc
;
operators
::
FcParam
param
;
lite
::
Tensor
x
;
lite
::
Tensor
w
;
lite
::
Tensor
bias
;
lite
::
Tensor
output
;
x
.
Resize
({
1
,
2
,
3
});
w
.
Resize
({
3
,
4
});
bias
.
Resize
({
1
,
4
});
output
.
Resize
({
2
,
4
});
auto
*
x_data
=
x
.
mutable_data
<
float
>
();
auto
*
w_data
=
w
.
mutable_data
<
float
>
();
auto
*
bias_data
=
bias
.
mutable_data
<
float
>
();
auto
*
output_data
=
output
.
mutable_data
<
float
>
();
for
(
int64_t
i
=
0
;
i
<
x
.
dims
().
product
();
i
++
)
{
x_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
for
(
int64_t
i
=
0
;
i
<
w
.
dims
().
product
();
i
++
)
{
w_data
[
i
]
=
static_cast
<
float
>
(
i
);
using
T
=
float
;
for
(
bool
with_bias
:
{
true
,
false
})
{
lite
::
Tensor
x
,
w
,
b
,
out
,
ref
;
x
.
Resize
({
1
,
2
,
3
});
w
.
Resize
({
3
,
4
});
b
.
Resize
({
1
,
4
});
out
.
Resize
({
2
,
4
});
ref
.
Resize
({
2
,
4
});
auto
*
x_data
=
x
.
mutable_data
<
float
>
();
auto
*
w_data
=
w
.
mutable_data
<
float
>
();
auto
*
b_data
=
with_bias
?
b
.
mutable_data
<
T
>
()
:
nullptr
;
auto
*
out_data
=
out
.
mutable_data
<
T
>
();
auto
*
ref_data
=
ref
.
mutable_data
<
T
>
();
FillData
<
T
>
(
x_data
,
x
.
dims
().
production
());
FillData
<
T
>
(
w_data
,
w
.
dims
().
production
());
FillData
<
T
>
(
out_data
,
out
.
dims
().
production
(),
0
,
0
);
FillData
<
T
>
(
ref_data
,
ref
.
dims
().
production
(),
0
,
0
);
if
(
with_bias
)
{
FillData
<
T
>
(
b_data
,
b
.
dims
().
production
());
}
FcCompute
fc
;
operators
::
FcParam
param
;
param
.
input
=
&
x
;
param
.
w
=
&
w
;
param
.
bias
=
with_bias
?
&
b
:
nullptr
;
param
.
output
=
&
out
;
param
.
in_num_col_dims
=
2
;
param
.
in_mat_dims
=
x
.
dims
();
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
DeviceInfo
::
Init
();
fc
.
SetParam
(
param
);
fc
.
SetContext
(
std
::
move
(
ctx
));
fc
.
PrepareForRun
();
fc
.
Run
();
lite
::
arm
::
math
::
fc_compute_eigen
(
x_data
,
2
,
3
,
w_data
,
3
,
4
,
b_data
,
ref_data
);
for
(
int
i
=
0
;
i
<
out
.
dims
().
production
();
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
ref_data
[
i
],
1e-3
);
}
}
for
(
int64_t
i
=
0
;
i
<
bias
.
dims
().
product
();
i
++
)
{
bias_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
for
(
int64_t
i
=
0
;
i
<
output
.
dims
().
product
();
i
++
)
{
output_data
[
i
]
=
static_cast
<
float
>
(
i
);
}
param
.
in_num_col_dims
=
2
;
param
.
input
=
&
x
;
param
.
w
=
&
w
;
param
.
bias
=
&
bias
;
param
.
output
=
&
output
;
param
.
in_mat_dims
=
x
.
dims
();
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
ARMContext
>
();
DeviceInfo
::
Init
();
fc
.
SetParam
(
param
);
fc
.
SetContext
(
std
::
move
(
ctx
));
fc
.
Run
();
}
}
// namespace arm
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录