Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
fe589d43
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
fe589d43
编写于
10月 13, 2018
作者:
qnqinan
提交者:
GitHub
10月 13, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1067 from zhangyang0701/develop
add FC format functions for FPGA track close
#1066
上级
9301c53b
ad57b923
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
69 addition
and
10 deletion
+69
-10
src/fpga/api.cpp
src/fpga/api.cpp
+16
-2
src/fpga/api.h
src/fpga/api.h
+3
-2
src/fpga/filter.cpp
src/fpga/filter.cpp
+39
-0
src/fpga/filter.h
src/fpga/filter.h
+6
-1
src/operators/kernel/fpga/fc_relu_kernel.cpp
src/operators/kernel/fpga/fc_relu_kernel.cpp
+1
-1
src/operators/kernel/fpga/fusion_fc_kernel.cpp
src/operators/kernel/fpga/fusion_fc_kernel.cpp
+1
-1
test/net/test_resnet.cpp
test/net/test_resnet.cpp
+3
-3
未找到文件。
src/fpga/api.cpp
浏览文件 @
fe589d43
...
...
@@ -104,7 +104,7 @@ int fpga_invalidate(void *address, size_t size) {
}
half
fp32_2_fp16
(
float
fp32_num
)
{
unsigned
long
tmp
=
*
(
unsigned
long
*
)(
&
fp32_num
);
unsigned
long
tmp
=
*
(
unsigned
long
*
)(
&
fp32_num
);
// NOLINT
half
t
=
((
tmp
&
0x007fffff
)
>>
13
)
|
((
tmp
&
0x80000000
)
>>
16
)
|
(((
tmp
&
0x7f800000
)
>>
13
)
-
(
112
<<
10
));
if
(
tmp
&
0x1000
)
{
...
...
@@ -120,7 +120,7 @@ float fp16_2_fp32(half fp16_num) {
int
tmp
=
0
;
float
fp32_num
;
tmp
=
s
<<
16
|
exp
<<
23
|
frac
<<
13
;
fp32_num
=
*
(
float
*
)
&
tmp
;
fp32_num
=
*
(
float
*
)
&
tmp
;
// NOLINT
return
fp32_num
;
}
...
...
@@ -347,6 +347,20 @@ void format_filter(framework::Tensor *filter_tensor, float max_value,
filter_tensor
->
reset_data_ptr
(
new_data
);
}
void
format_fc_filter
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
)
{
filter_tensor
->
scale
[
0
]
=
float
(
max_value
/
127.0
);
// NOLINT
filter_tensor
->
scale
[
1
]
=
float
(
127.0
/
max_value
);
// NOLINT
auto
dims
=
filter_tensor
->
dims
();
auto
num
=
dims
[
0
],
channel
=
dims
[
1
],
height
=
dims
[
2
],
width
=
dims
[
3
];
auto
data_ptr
=
filter_tensor
->
data
<
float
>
();
size_t
memory_size
=
num
*
channel
*
height
*
width
*
sizeof
(
float
);
auto
new_data
=
(
float
*
)
fpga_malloc
(
memory_size
);
// NOLINT
fpga_copy
(
new_data
,
data_ptr
,
memory_size
);
filter
::
format_fc_filter
(
&
new_data
,
num
,
channel
,
height
,
width
,
1
,
max_value
);
filter_tensor
->
reset_data_ptr
(
new_data
);
}
void
format_bias_scale_array
(
float
**
bias_scale_array
,
int
element_num_per_division
,
int
num
)
{
bias_scale
::
format_bias_scale_array
(
bias_scale_array
,
...
...
src/fpga/api.h
浏览文件 @
fe589d43
...
...
@@ -109,8 +109,8 @@ struct PoolingArgs {
struct
EWAddArgs
{
bool
relu_enabled
;
half
const0
;
// output0 = const0 x input0 + const1 x input1;
half
const1
;
uint32_t
const0
;
// output0 = const0 x input0 + const1 x input1;
uint32_t
const1
;
struct
ImageInputArgs
image0
;
struct
ImageInputArgs
image1
;
struct
ImageOutputArgs
output
;
...
...
@@ -214,6 +214,7 @@ int get_aligned_filter_element_num(int chw);
int
get_aligned_filter_num
(
int
num
);
void
format_filter
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
,
int
group_num
);
void
format_fc_filter
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
);
void
format_bias_scale_array
(
float
**
bias_scale_array
,
int
element_num_per_division
,
int
num
);
void
format_concat_output
(
framework
::
Tensor
*
out
,
int
height
,
int
width
,
...
...
src/fpga/filter.cpp
浏览文件 @
fe589d43
...
...
@@ -225,6 +225,45 @@ void format_filter(float **data_in, int num, int channel, int height, int width,
num_after_alignment
*
sizeof
(
char
));
}
void
convert_fc_filter
(
char
**
data_in
,
int
num
,
int
chw
)
{
char
*
tmp
=
*
data_in
;
char
*
data_tmp
=
(
char
*
)
fpga_malloc
(
chw
*
num
*
sizeof
(
char
));
// NOLINT
for
(
int
n
=
0
;
n
<
num
;
n
++
)
{
for
(
int
c
=
0
;
c
<
chw
;
c
++
)
{
data_tmp
[
n
*
chw
+
c
]
=
(
*
data_in
)[
num
*
c
+
n
];
}
}
*
data_in
=
data_tmp
;
fpga_free
(
tmp
);
}
void
format_fc_filter
(
float
**
data_in
,
int
num
,
int
channel
,
int
height
,
int
width
,
int
group_num
,
float
max
)
{
int
data_size
=
channel
*
height
*
width
*
num
;
int
chw
=
channel
*
height
*
width
;
int
division_capacity
=
calc_division_capacity
(
chw
);
int
num_per_div_before_alignment
=
calc_num_per_div
(
num
,
group_num
,
division_capacity
);
int
num_per_div_after_alignment
=
align_to_x
(
num_per_div_before_alignment
,
FILTER_NUM_ALIGNMENT
);
int
div_num
=
(
num
+
num_per_div_before_alignment
-
1
)
/
num_per_div_before_alignment
;
int
num_after_alignment
=
num_per_div_after_alignment
*
div_num
;
quantize
(
data_in
,
data_size
,
max
);
char
**
quantize_data
=
(
char
**
)
data_in
;
// NOLINT
convert_fc_filter
(
quantize_data
,
num
,
chw
);
align_element
(
quantize_data
,
num
,
chw
);
align_num
(
quantize_data
,
num_per_div_before_alignment
,
num
,
chw
);
reorder
(
quantize_data
,
num_after_alignment
,
chw
);
interleave
(
quantize_data
,
num_after_alignment
,
chw
);
fpga_flush
(
*
quantize_data
,
align_to_x
(
chw
,
FILTER_ELEMENT_ALIGNMENT
)
*
num_after_alignment
*
sizeof
(
char
));
}
}
// namespace filter
}
// namespace fpga
}
// namespace paddle_mobile
src/fpga/filter.h
浏览文件 @
fe589d43
...
...
@@ -25,7 +25,7 @@ int calc_division_capacity(int chw);
int
calc_split_num
(
int
num
,
int
division_capacity
);
int
calc_division_number
(
int
num
,
int
group_num
,
int
division_capacity
);
int
calc_num_per_div
(
int
num
,
int
group_num
,
int
division_capacity
);
void
convert_to_hwc
(
float
**
data_in
,
int
num
,
int
channel
,
int
height
,
void
convert_to_hwc
(
char
**
data_in
,
int
num
,
int
channel
,
int
height
,
int
width
);
float
find_max
(
float
*
data_in
,
int
data_size
);
void
quantize
(
float
**
data_in
,
int
data_size
,
float
max
);
...
...
@@ -36,6 +36,11 @@ void reorder(float** data_in, int num_after_alignment, int chw);
void
interleave
(
float
**
data_in
,
int
num_after_alignment
,
int
chw
);
void
format_filter
(
float
**
data_in
,
int
num
,
int
channel
,
int
height
,
int
width
,
int
group_num
,
float
max
);
void
convert_fc_filter
(
char
**
data_in
,
int
num
,
int
chw
);
void
format_fc_filter
(
float
**
data_in
,
int
num
,
int
channel
,
int
height
,
int
width
,
int
group_num
,
float
max
);
}
// namespace filter
}
// namespace fpga
}
// namespace paddle_mobile
src/operators/kernel/fpga/fc_relu_kernel.cpp
浏览文件 @
fe589d43
...
...
@@ -46,7 +46,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
filter
->
Resize
(
framework
::
make_ddim
({
num
,
filter_channel
,
height
,
width
}));
float
max_value
=
fpga
::
filter_find_max
(
filter
);
fpga
::
format_f
ilter
(
filter
,
max_value
,
1
);
fpga
::
format_f
c_filter
(
filter
,
max_value
);
int
element_num_per_div
=
fpga
::
get_filter_num_per_div
(
filter
,
1
);
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
element_num_per_div
,
channel
);
...
...
src/operators/kernel/fpga/fusion_fc_kernel.cpp
浏览文件 @
fe589d43
...
...
@@ -47,7 +47,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
filter
->
Resize
(
framework
::
make_ddim
({
num
,
filter_channel
,
height
,
width
}));
float
max_value
=
fpga
::
filter_find_max
(
filter
);
fpga
::
format_f
ilter
(
filter
,
max_value
,
1
);
fpga
::
format_f
c_filter
(
filter
,
max_value
);
int
element_num_per_div
=
fpga
::
get_filter_num_per_div
(
filter
,
1
);
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
element_num_per_div
,
channel
);
...
...
test/net/test_resnet.cpp
浏览文件 @
fe589d43
...
...
@@ -52,8 +52,8 @@ int main() {
#else
auto
time3
=
time
();
paddle_mobile
.
FeedData
(
input_tensor
);
paddle_mobile
.
Predict_To
(
10
);
paddle_mobile
.
Predict_From
(
10
);
paddle_mobile
.
Predict_To
(
-
1
);
/*
paddle_mobile.Predict_From(10);
auto tensor_ptr = paddle_mobile.FetchResult(9);
std::cout << "Tensor element number for op[9]: " << tensor_ptr->numel()
<< std::endl;
...
...
@@ -63,7 +63,7 @@ int main() {
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
<<
std
::
endl
;
<< std::endl;
*/
#endif
}
return
0
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录