Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
660ee569
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
337
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
660ee569
编写于
9月 07, 2018
作者:
Z
zhangyang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix bugs
上级
250969bb
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
383 addition
and
284 deletion
+383
-284
src/fpga/api.cpp
src/fpga/api.cpp
+38
-23
src/fpga/api.h
src/fpga/api.h
+13
-1
src/framework/dim.h
src/framework/dim.h
+1
-0
src/operators/kernel/fpga/conv_add_bn_kernel.cpp
src/operators/kernel/fpga/conv_add_bn_kernel.cpp
+38
-24
src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
+36
-19
src/operators/kernel/fpga/conv_add_relu_kernel.cpp
src/operators/kernel/fpga/conv_add_relu_kernel.cpp
+36
-21
src/operators/kernel/fpga/conv_bn_kernel.cpp
src/operators/kernel/fpga/conv_bn_kernel.cpp
+34
-20
src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
+35
-19
src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
+12
-12
src/operators/kernel/fpga/fc_relu_kernel.cpp
src/operators/kernel/fpga/fc_relu_kernel.cpp
+46
-35
src/operators/kernel/fpga/fusion_fc_kernel.cpp
src/operators/kernel/fpga/fusion_fc_kernel.cpp
+50
-32
src/operators/kernel/fpga/pool_kernel.cpp
src/operators/kernel/fpga/pool_kernel.cpp
+11
-11
src/operators/kernel/fpga/softmax_kernel.cpp
src/operators/kernel/fpga/softmax_kernel.cpp
+2
-2
src/operators/math/gemm.cpp
src/operators/math/gemm.cpp
+1
-1
src/operators/op_param.h
src/operators/op_param.h
+30
-64
未找到文件。
src/fpga/api.cpp
浏览文件 @
660ee569
...
...
@@ -68,26 +68,26 @@ void fpga_copy(void *dest, const void *src, size_t num) {
memcpy
(
dest
,
src
,
num
);
}
int
ComputeFpgaConv
(
const
struct
ConvArgs
&
args
)
{
int
ComputeFpgaConv
(
const
struct
Wrapper
ConvArgs
&
args
)
{
#ifdef FPGA_TEST_MODE
DLOG
<<
" relu_enabled:"
<<
args
.
relu_enabled
<<
" sb_address:"
<<
args
.
sb_address
<<
" filter_address:"
<<
args
.
filter_address
<<
" filter_num:"
<<
args
.
filter_num
<<
" group_num:"
<<
args
.
group_num
;
DLOG
<<
" image_address:"
<<
args
.
image
.
address
<<
" image_scale_address:"
<<
args
.
image
.
scale_address
<<
" image_channels:"
<<
args
.
image
.
channels
<<
" image_height:"
<<
args
.
image
.
height
<<
" image_width:"
<<
args
.
image
.
width
<<
" pad_height:"
<<
args
.
image
.
pad_height
<<
" pad_width:"
<<
args
.
image
.
pad_width
;
DLOG
<<
" kernel_height:"
<<
args
.
kernel
.
height
<<
" kernel_width:"
<<
args
.
kernel
.
width
<<
" stride_h:"
<<
args
.
kernel
.
stride_h
<<
" stride_w:"
<<
args
.
kernel
.
stride_w
;
DLOG
<<
" out_address:"
<<
args
.
output
.
address
<<
" out_scale_address:"
<<
args
.
output
.
scale_address
;
/*
DLOG << " relu_enabled:" << args.relu_enabled
<< " sb_address:" << args.sb_address
<< " filter_address:" << args.filter_address
<< " filter_num:" << args.filter_num
<< " group_num:" << args.group_num;
DLOG << " image_address:" << args.image.address
<< " image_scale_address:" << args.image.scale_address
<< " image_channels:" << args.image.channels
<< " image_height:" << args.image.height
<< " image_width:" << args.image.width
<< " pad_height:" << args.image.pad_height
<< " pad_width:" << args.image.pad_width;
DLOG << " kernel_height:" << args.kernel.height
<< " kernel_width:" << args.kernel.width
<< " stride_h:" << args.kernel.stride_h
<< " stride_w:" << args.kernel.stride_w;
DLOG << " out_address:" << args.output.address
<< " out_scale_address:" << args.output.scale_address;*/
#endif
return
do_ioctl
(
IOCTL_CONFIG_CONV
,
&
args
);
...
...
@@ -178,16 +178,31 @@ float filter_find_max(framework::Tensor *filter_tensor) {
auto
filter_ptr
=
filter_tensor
->
data
<
float
>
();
return
filter
::
find_max
(
filter_ptr
,
filter_tensor
->
numel
());
}
int
get_plit_num
(
framework
::
Tensor
*
filter_tensor
)
{
auto
dims
=
filter_tensor
->
dims
();
int
chw
=
dims
[
1
]
*
dims
[
2
]
*
dims
[
3
];
int
num
=
dims
[
0
];
int
div_capacity
=
filter
::
calc_division_capacity
(
chw
);
return
filter
::
calc_split_num
(
num
,
div_capacity
);
}
int
get_element_num_per_div
(
framework
::
Tensor
*
filter_tensor
,
int
group_num
)
{
auto
dims
=
filter_tensor
->
dims
();
PADDLE_MOBILE_ENFORCE
(
dims
.
size
()
==
4
||
dims
.
size
()
==
2
,
"Filter order should be 4 or 2"
);
int
chw
=
dims
.
size
()
==
4
?
dims
[
1
]
*
dims
[
2
]
*
dims
[
3
]
:
dims
[
1
];
int
num
=
dims
.
size
()
==
4
?
dims
[
0
]
:
dims
[
1
];
int
chw
=
dims
[
1
]
*
dims
[
2
]
*
dims
[
3
];
int
num
=
dims
[
0
];
int
div_capacity
=
filter
::
calc_division_capacity
(
chw
);
return
filter
::
calc_num_per_div
(
num
,
group_num
,
div_capacity
);
}
int
get_aligned_filter_element_num
(
int
chw
)
{
return
align_to_x
(
chw
,
FILTER_ELEMENT_ALIGNMENT
);
}
int
get_aligned_filter_num
(
int
num
)
{
return
align_to_x
(
num
,
FILTER_NUM_ALIGNMENT
);
}
void
format_filter
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
,
int
group_num
)
{
auto
dims
=
filter_tensor
->
dims
();
...
...
src/fpga/api.h
浏览文件 @
660ee569
...
...
@@ -92,6 +92,14 @@ struct ConvArgs {
struct
ImageOutputArgs
output
;
};
struct
WrapperConvArgs
{
uint32_t
split_num
;
uint32_t
group_num
;
uint32_t
filter_num
;
struct
ImageOutputArgs
output
;
struct
ConvArgs
*
args
;
};
struct
PoolingArgs
{
struct
KernelArgs
kernel
;
struct
ImageInputArgs
image
;
// input image;
...
...
@@ -165,7 +173,7 @@ enum FPGA_ERR_TYPE {
//============================== API =============================
int
PerformBypass
(
const
struct
BypassArgs
&
args
);
int
ComputeFpgaConv
(
const
struct
ConvArgs
&
args
);
int
ComputeFpgaConv
(
const
struct
Wrapper
ConvArgs
&
args
);
int
ComputeFpgaPool
(
const
struct
PoolingArgs
&
args
);
int
ComputeFpgaEWAdd
(
const
struct
EWAddArgs
&
args
);
...
...
@@ -174,6 +182,10 @@ void format_image(framework::Tensor* image_tensor);
void
format_ofm
(
framework
::
Tensor
*
ofm_tensor
);
// only allocate memory
float
filter_find_max
(
framework
::
Tensor
*
filter_tensor
);
int
get_element_num_per_div
(
framework
::
Tensor
*
filter_tensor
,
int
group_num
);
int
get_plit_num
(
framework
::
Tensor
*
filter_tensor
);
int
get_aligned_filter_element_num
(
int
chw
);
int
get_aligned_filter_num
(
int
num
);
void
format_filter
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
,
int
group_num
);
void
format_fc_matrix
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
,
...
...
src/framework/dim.h
浏览文件 @
660ee569
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <cstdlib>
#include <string>
#include "common/enforce.h"
namespace
paddle_mobile
{
namespace
framework
{
...
...
src/operators/kernel/fpga/conv_add_bn_kernel.cpp
浏览文件 @
660ee569
...
...
@@ -15,7 +15,6 @@ limitations under the License. */
#ifdef FUSION_CONVADDBN_OP
#include "operators/kernel/conv_add_bn_kernel.h"
#include "fpga/api.h"
namespace
paddle_mobile
{
namespace
operators
{
...
...
@@ -23,11 +22,11 @@ namespace operators {
template
<
>
bool
ConvAddBNKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddBNParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
Tensor
*
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
*
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input_ptr
=
input
->
data
<
float
>
();
const
Tensor
*
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
Tensor
*
filter
=
param
->
Filter
(
);
auto
*
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
()
);
Tensor
*
out
=
param
->
Output
();
...
...
@@ -41,10 +40,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
"Output channel should be equal to bias number"
);
const
int
channel
=
out
->
dims
()[
1
];
float
*
bs_ptr
=
auto
*
bs_ptr
=
reinterpret_cast
<
float
*>
(
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
)));
Tensor
*
new_scale
=
new
Tensor
();
Tensor
*
new_bias
=
new
Tensor
();
auto
*
new_scale
=
new
Tensor
();
auto
*
new_bias
=
new
Tensor
();
auto
new_scale_ptr
=
new_scale
->
mutable_data
<
float
>
({
channel
});
auto
new_bias_ptr
=
new_bias
->
mutable_data
<
float
>
({
channel
});
...
...
@@ -70,27 +69,42 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga
::
format_ofm
(
out
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
ConvArgs
convArgs
;
convArgs
.
relu_enabled
=
relu_enabled
;
convArgs
.
filter_address
=
(
void
*
)
filter_ptr
;
convArgs
.
filter_num
=
filter
->
dims
()[
0
];
convArgs
.
group_num
=
param
->
Groups
();
convArgs
.
sb_address
=
(
void
*
)
bs_ptr
;
convArgs
.
kernel
.
stride_h
=
param
->
Strides
()[
0
];
convArgs
.
kernel
.
stride_w
=
param
->
Strides
()[
1
];
convArgs
.
kernel
.
height
=
filter
->
dims
()[
2
];
convArgs
.
kernel
.
width
=
filter
->
dims
()[
3
];
convArgs
.
image
.
address
=
(
void
*
)
input_ptr
;
convArgs
.
image
.
channels
=
input
->
dims
()[
1
];
convArgs
.
image
.
height
=
input
->
dims
()[
2
];
convArgs
.
image
.
width
=
input
->
dims
()[
3
];
convArgs
.
image
.
pad_height
=
param
->
Paddings
()[
0
];
convArgs
.
image
.
pad_width
=
param
->
Paddings
()[
1
];
convArgs
.
image
.
scale_address
=
input
->
scale
;
convArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
fpga
::
WrapperConvArgs
convArgs
;
convArgs
.
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
convArgs
.
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
convArgs
.
output
.
address
=
out_ptr
;
convArgs
.
output
.
scale_address
=
out
->
scale
;
convArgs
.
args
=
(
fpga
::
ConvArgs
*
)
fpga
::
fpga_malloc
(
convArgs
.
split_num
*
sizeof
(
fpga
::
ConvArgs
));
param
->
SetFpgaArgs
(
convArgs
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
int
n
=
convArgs
.
split_num
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
convArgs
.
args
[
i
].
relu_enabled
=
relu_enabled
;
convArgs
.
args
[
i
].
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
args
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
param
->
Strides
()[
0
];
convArgs
.
args
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
param
->
Strides
()[
1
];
convArgs
.
args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
convArgs
.
args
[
i
].
image
.
address
=
input_ptr
;
convArgs
.
args
[
i
].
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
convArgs
.
args
[
i
].
image
.
height
=
(
uint32_t
)
input
->
dims
()[
2
];
convArgs
.
args
[
i
].
image
.
width
=
(
uint32_t
)
input
->
dims
()[
3
];
convArgs
.
args
[
i
].
image
.
pad_height
=
(
uint32_t
)
param
->
Paddings
()[
0
];
convArgs
.
args
[
i
].
image
.
pad_width
=
(
uint32_t
)
param
->
Paddings
()[
1
];
convArgs
.
args
[
i
].
filter_address
=
&
((
int8_t
*
)
filter_ptr
)[
i
*
element_num
];
convArgs
.
args
[
i
].
sb_address
=
&
((
int8_t
*
)
bs_ptr
)[
i
*
element_num
];
convArgs
.
args
[
i
].
filter_num
=
(
uint32_t
)(
i
==
n
-
1
?
fpga
::
get_aligned_filter_num
(
channel
-
(
n
-
1
)
*
element_num_per_div
)
:
element_num_per_div
);
convArgs
.
args
[
i
].
image
.
scale_address
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
sizeof
(
float
));
}
return
true
;
}
...
...
src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
浏览文件 @
660ee569
...
...
@@ -27,7 +27,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
auto
input_ptr
=
input
->
data
<
float
>
();
const
Tensor
*
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
Tensor
*
filter
=
param
->
Filter
(
);
Tensor
*
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
()
);
Tensor
*
out
=
param
->
Output
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
...
...
@@ -67,26 +67,43 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
fpga
::
format_ofm
(
out
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
ConvArgs
convArgs
;
convArgs
.
relu_enabled
=
relu_enabled
;
convArgs
.
filter_address
=
(
void
*
)
filter_ptr
;
convArgs
.
filter_num
=
filter
->
dims
()[
0
];
convArgs
.
group_num
=
param
->
Groups
();
convArgs
.
sb_address
=
(
void
*
)
bs_ptr
;
convArgs
.
kernel
.
stride_h
=
param
->
Strides
()[
0
];
convArgs
.
kernel
.
stride_w
=
param
->
Strides
()[
1
];
convArgs
.
kernel
.
height
=
filter
->
dims
()[
2
];
convArgs
.
kernel
.
width
=
filter
->
dims
()[
3
];
convArgs
.
image
.
address
=
(
void
*
)
input_ptr
;
convArgs
.
image
.
channels
=
input
->
dims
()[
1
];
convArgs
.
image
.
height
=
input
->
dims
()[
2
];
convArgs
.
image
.
width
=
input
->
dims
()[
3
];
convArgs
.
image
.
pad_height
=
param
->
Paddings
()[
0
];
convArgs
.
image
.
pad_width
=
param
->
Paddings
()[
1
];
convArgs
.
image
.
scale_address
=
input
->
scale
;
convArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
fpga
::
WrapperConvArgs
convArgs
;
convArgs
.
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
convArgs
.
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
convArgs
.
output
.
address
=
out_ptr
;
convArgs
.
output
.
scale_address
=
out
->
scale
;
convArgs
.
args
=
(
fpga
::
ConvArgs
*
)
fpga
::
fpga_malloc
(
convArgs
.
split_num
*
sizeof
(
fpga
::
ConvArgs
));
param
->
SetFpgaArgs
(
convArgs
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
int
n
=
convArgs
.
split_num
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
convArgs
.
args
[
i
].
relu_enabled
=
relu_enabled
;
convArgs
.
args
[
i
].
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
args
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
param
->
Strides
()[
0
];
convArgs
.
args
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
param
->
Strides
()[
1
];
convArgs
.
args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
convArgs
.
args
[
i
].
image
.
address
=
input_ptr
;
convArgs
.
args
[
i
].
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
convArgs
.
args
[
i
].
image
.
height
=
(
uint32_t
)
input
->
dims
()[
2
];
convArgs
.
args
[
i
].
image
.
width
=
(
uint32_t
)
input
->
dims
()[
3
];
convArgs
.
args
[
i
].
image
.
pad_height
=
(
uint32_t
)
param
->
Paddings
()[
0
];
convArgs
.
args
[
i
].
image
.
pad_width
=
(
uint32_t
)
param
->
Paddings
()[
1
];
convArgs
.
args
[
i
].
filter_address
=
&
((
int8_t
*
)
filter_ptr
)[
i
*
element_num
];
convArgs
.
args
[
i
].
sb_address
=
&
((
int8_t
*
)
bs_ptr
)[
i
*
element_num
];
convArgs
.
args
[
i
].
filter_num
=
(
uint32_t
)(
i
==
n
-
1
?
fpga
::
get_aligned_filter_num
(
channel
-
(
n
-
1
)
*
element_num_per_div
)
:
element_num_per_div
);
convArgs
.
args
[
i
].
image
.
scale_address
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
sizeof
(
float
));
}
return
true
;
return
true
;
}
...
...
src/operators/kernel/fpga/conv_add_relu_kernel.cpp
浏览文件 @
660ee569
...
...
@@ -26,13 +26,13 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
auto
input_ptr
=
input
->
data
<
float
>
();
const
Tensor
*
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
Tensor
*
filter
=
param
->
Filter
(
);
auto
*
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
()
);
Tensor
*
out
=
param
->
Output
();
PADDLE_MOBILE_ENFORCE
(
out
->
dims
()[
1
]
==
bias
->
dims
()[
0
],
"Output channel should be equal to bias number"
);
int
channel
=
out
->
dims
()[
1
];
float
*
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
auto
*
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
channel
;
i
++
)
{
bs_ptr
[
i
+
channel
]
=
1
;
bs_ptr
[
i
]
=
bias_ptr
[
i
];
...
...
@@ -49,27 +49,42 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga
::
format_ofm
(
out
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
ConvArgs
convArgs
;
convArgs
.
relu_enabled
=
relu_enabled
;
convArgs
.
filter_address
=
(
void
*
)
filter_ptr
;
convArgs
.
filter_num
=
filter
->
dims
()[
0
];
convArgs
.
group_num
=
param
->
Groups
();
convArgs
.
sb_address
=
(
void
*
)
bs_ptr
;
convArgs
.
kernel
.
stride_h
=
param
->
Strides
()[
0
];
convArgs
.
kernel
.
stride_w
=
param
->
Strides
()[
1
];
convArgs
.
kernel
.
height
=
filter
->
dims
()[
2
];
convArgs
.
kernel
.
width
=
filter
->
dims
()[
3
];
convArgs
.
image
.
address
=
(
void
*
)
input_ptr
;
convArgs
.
image
.
channels
=
input
->
dims
()[
1
];
convArgs
.
image
.
height
=
input
->
dims
()[
2
];
convArgs
.
image
.
width
=
input
->
dims
()[
3
];
convArgs
.
image
.
pad_height
=
param
->
Paddings
()[
0
];
convArgs
.
image
.
pad_width
=
param
->
Paddings
()[
1
];
convArgs
.
image
.
scale_address
=
input
->
scale
;
convArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
fpga
::
WrapperConvArgs
convArgs
;
convArgs
.
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
convArgs
.
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
convArgs
.
output
.
address
=
out_ptr
;
convArgs
.
output
.
scale_address
=
out
->
scale
;
convArgs
.
args
=
(
fpga
::
ConvArgs
*
)
fpga
::
fpga_malloc
(
convArgs
.
split_num
*
sizeof
(
fpga
::
ConvArgs
));
param
->
SetFpgaArgs
(
convArgs
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
int
n
=
convArgs
.
split_num
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
convArgs
.
args
[
i
].
relu_enabled
=
relu_enabled
;
convArgs
.
args
[
i
].
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
args
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
param
->
Strides
()[
0
];
convArgs
.
args
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
param
->
Strides
()[
1
];
convArgs
.
args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
convArgs
.
args
[
i
].
image
.
address
=
input_ptr
;
convArgs
.
args
[
i
].
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
convArgs
.
args
[
i
].
image
.
height
=
(
uint32_t
)
input
->
dims
()[
2
];
convArgs
.
args
[
i
].
image
.
width
=
(
uint32_t
)
input
->
dims
()[
3
];
convArgs
.
args
[
i
].
image
.
pad_height
=
(
uint32_t
)
param
->
Paddings
()[
0
];
convArgs
.
args
[
i
].
image
.
pad_width
=
(
uint32_t
)
param
->
Paddings
()[
1
];
convArgs
.
args
[
i
].
filter_address
=
&
((
int8_t
*
)
filter_ptr
)[
i
*
element_num
];
convArgs
.
args
[
i
].
sb_address
=
&
((
int8_t
*
)
bs_ptr
)[
i
*
element_num
];
convArgs
.
args
[
i
].
filter_num
=
(
uint32_t
)(
i
==
n
-
1
?
fpga
::
get_aligned_filter_num
(
channel
-
(
n
-
1
)
*
element_num_per_div
)
:
element_num_per_div
);
convArgs
.
args
[
i
].
image
.
scale_address
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
sizeof
(
float
));
}
return
true
;
}
...
...
src/operators/kernel/fpga/conv_bn_kernel.cpp
浏览文件 @
660ee569
...
...
@@ -25,8 +25,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
bool
relu_enabled
=
false
;
Tensor
*
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input_ptr
=
input
->
data
<
float
>
();
Tensor
*
filter
=
param
->
Filter
();
Tensor
*
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
Tensor
*
out
=
param
->
Output
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
...
...
@@ -65,27 +64,42 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga
::
format_ofm
(
out
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
ConvArgs
convArgs
;
convArgs
.
relu_enabled
=
relu_enabled
;
convArgs
.
filter_address
=
(
void
*
)
filter_ptr
;
convArgs
.
filter_num
=
filter
->
dims
()[
0
];
convArgs
.
group_num
=
param
->
Groups
();
convArgs
.
sb_address
=
(
void
*
)
bs_ptr
;
convArgs
.
kernel
.
stride_h
=
param
->
Strides
()[
0
];
convArgs
.
kernel
.
stride_w
=
param
->
Strides
()[
1
];
convArgs
.
kernel
.
height
=
filter
->
dims
()[
2
];
convArgs
.
kernel
.
width
=
filter
->
dims
()[
3
];
convArgs
.
image
.
address
=
(
void
*
)
input_ptr
;
convArgs
.
image
.
channels
=
input
->
dims
()[
1
];
convArgs
.
image
.
height
=
input
->
dims
()[
2
];
convArgs
.
image
.
width
=
input
->
dims
()[
3
];
convArgs
.
image
.
pad_height
=
param
->
Paddings
()[
0
];
convArgs
.
image
.
pad_width
=
param
->
Paddings
()[
1
];
convArgs
.
image
.
scale_address
=
input
->
scale
;
convArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
fpga
::
WrapperConvArgs
convArgs
;
convArgs
.
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
convArgs
.
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
convArgs
.
output
.
address
=
out_ptr
;
convArgs
.
output
.
scale_address
=
out
->
scale
;
convArgs
.
args
=
(
fpga
::
ConvArgs
*
)
fpga
::
fpga_malloc
(
convArgs
.
split_num
*
sizeof
(
fpga
::
ConvArgs
));
param
->
SetFpgaArgs
(
convArgs
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
int
n
=
convArgs
.
split_num
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
convArgs
.
args
[
i
].
relu_enabled
=
relu_enabled
;
convArgs
.
args
[
i
].
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
args
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
param
->
Strides
()[
0
];
convArgs
.
args
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
param
->
Strides
()[
1
];
convArgs
.
args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
convArgs
.
args
[
i
].
image
.
address
=
input_ptr
;
convArgs
.
args
[
i
].
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
convArgs
.
args
[
i
].
image
.
height
=
(
uint32_t
)
input
->
dims
()[
2
];
convArgs
.
args
[
i
].
image
.
width
=
(
uint32_t
)
input
->
dims
()[
3
];
convArgs
.
args
[
i
].
image
.
pad_height
=
(
uint32_t
)
param
->
Paddings
()[
0
];
convArgs
.
args
[
i
].
image
.
pad_width
=
(
uint32_t
)
param
->
Paddings
()[
1
];
convArgs
.
args
[
i
].
filter_address
=
&
((
int8_t
*
)
filter_ptr
)[
i
*
element_num
];
convArgs
.
args
[
i
].
sb_address
=
&
((
int8_t
*
)
bs_ptr
)[
i
*
element_num
];
convArgs
.
args
[
i
].
filter_num
=
(
uint32_t
)(
i
==
n
-
1
?
fpga
::
get_aligned_filter_num
(
channel
-
(
n
-
1
)
*
element_num_per_div
)
:
element_num_per_div
);
convArgs
.
args
[
i
].
image
.
scale_address
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
sizeof
(
float
));
}
return
true
;
}
...
...
src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
浏览文件 @
660ee569
...
...
@@ -24,7 +24,7 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
bool
relu_enabled
=
true
;
Tensor
*
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input_ptr
=
input
->
data
<
float
>
();
Tensor
*
filter
=
param
->
Filter
(
);
Tensor
*
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
()
);
Tensor
*
out
=
param
->
Output
();
auto
bn_mean_ptr
=
param
->
InputMean
()
->
data
<
float
>
();
auto
bn_var_ptr
=
param
->
InputVariance
()
->
data
<
float
>
();
...
...
@@ -61,26 +61,42 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
fpga
::
format_ofm
(
out
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
ConvArgs
convArgs
;
convArgs
.
relu_enabled
=
relu_enabled
;
convArgs
.
filter_address
=
(
void
*
)
filter_ptr
;
convArgs
.
filter_num
=
filter
->
dims
()[
0
];
convArgs
.
group_num
=
param
->
Groups
();
convArgs
.
sb_address
=
(
void
*
)
bs_ptr
;
convArgs
.
kernel
.
stride_h
=
param
->
Strides
()[
0
];
convArgs
.
kernel
.
stride_w
=
param
->
Strides
()[
1
];
convArgs
.
kernel
.
height
=
filter
->
dims
()[
2
];
convArgs
.
kernel
.
width
=
filter
->
dims
()[
3
];
convArgs
.
image
.
address
=
(
void
*
)
input_ptr
;
convArgs
.
image
.
channels
=
input
->
dims
()[
1
];
convArgs
.
image
.
height
=
input
->
dims
()[
2
];
convArgs
.
image
.
width
=
input
->
dims
()[
3
];
convArgs
.
image
.
pad_height
=
param
->
Paddings
()[
0
];
convArgs
.
image
.
pad_width
=
param
->
Paddings
()[
1
];
convArgs
.
image
.
scale_address
=
input
->
scale
;
convArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
fpga
::
WrapperConvArgs
convArgs
;
convArgs
.
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
convArgs
.
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
convArgs
.
output
.
address
=
out_ptr
;
convArgs
.
output
.
scale_address
=
out
->
scale
;
convArgs
.
args
=
(
fpga
::
ConvArgs
*
)
fpga
::
fpga_malloc
(
convArgs
.
split_num
*
sizeof
(
fpga
::
ConvArgs
));
param
->
SetFpgaArgs
(
convArgs
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
int
n
=
convArgs
.
split_num
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
convArgs
.
args
[
i
].
relu_enabled
=
relu_enabled
;
convArgs
.
args
[
i
].
group_num
=
(
uint32_t
)
param
->
Groups
();
convArgs
.
args
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
param
->
Strides
()[
0
];
convArgs
.
args
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
param
->
Strides
()[
1
];
convArgs
.
args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
convArgs
.
args
[
i
].
image
.
address
=
input_ptr
;
convArgs
.
args
[
i
].
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
convArgs
.
args
[
i
].
image
.
height
=
(
uint32_t
)
input
->
dims
()[
2
];
convArgs
.
args
[
i
].
image
.
width
=
(
uint32_t
)
input
->
dims
()[
3
];
convArgs
.
args
[
i
].
image
.
pad_height
=
(
uint32_t
)
param
->
Paddings
()[
0
];
convArgs
.
args
[
i
].
image
.
pad_width
=
(
uint32_t
)
param
->
Paddings
()[
1
];
convArgs
.
args
[
i
].
filter_address
=
&
((
int8_t
*
)
filter_ptr
)[
i
*
element_num
];
convArgs
.
args
[
i
].
sb_address
=
&
((
int8_t
*
)
bs_ptr
)[
i
*
element_num
];
convArgs
.
args
[
i
].
filter_num
=
(
uint32_t
)(
i
==
n
-
1
?
fpga
::
get_aligned_filter_num
(
channel
-
(
n
-
1
)
*
element_num_per_div
)
:
element_num_per_div
);
convArgs
.
args
[
i
].
image
.
scale_address
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
sizeof
(
float
));
}
return
true
;
}
...
...
src/operators/kernel/fpga/elementwise_add_relu_kernel.cpp
浏览文件 @
660ee569
...
...
@@ -22,9 +22,9 @@ template <>
bool
ElementwiseAddReluKernel
<
FPGA
,
float
>::
Init
(
ElementwiseAddReluParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
true
;
Tensor
*
input_x
=
const_cast
<
Tensor
*>
(
param
->
InputX
());
Tensor
*
input_y
=
const_cast
<
Tensor
*>
(
param
->
InputY
());
Tensor
*
out
=
param
->
Out
();
auto
*
input_x
=
const_cast
<
Tensor
*>
(
param
->
InputX
());
auto
*
input_y
=
const_cast
<
Tensor
*>
(
param
->
InputY
());
auto
*
out
=
param
->
Out
();
auto
input_x_ptr
=
input_x
->
data
<
float
>
();
auto
input_y_ptr
=
input_y
->
data
<
float
>
();
fpga
::
format_ofm
(
out
);
...
...
@@ -34,22 +34,22 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
ewaddArgs
.
relu_enabled
=
relu_enabled
;
ewaddArgs
.
const0
=
1
;
ewaddArgs
.
const1
=
1
;
ewaddArgs
.
image0
.
address
=
(
void
*
)
input_x_ptr
;
ewaddArgs
.
image0
.
channels
=
input_x
->
dims
()[
1
];
ewaddArgs
.
image0
.
address
=
input_x_ptr
;
ewaddArgs
.
image0
.
channels
=
(
uint32_t
)
input_x
->
dims
()[
1
];
ewaddArgs
.
image0
.
scale_address
=
input_x
->
scale
;
ewaddArgs
.
image0
.
height
=
input_x
->
dims
()[
2
];
ewaddArgs
.
image0
.
width
=
input_x
->
dims
()[
3
];
ewaddArgs
.
image0
.
height
=
(
uint32_t
)
input_x
->
dims
()[
2
];
ewaddArgs
.
image0
.
width
=
(
uint32_t
)
input_x
->
dims
()[
3
];
ewaddArgs
.
image0
.
pad_height
=
0
;
ewaddArgs
.
image0
.
pad_width
=
0
;
ewaddArgs
.
image1
.
address
=
(
void
*
)
input_y_ptr
;
ewaddArgs
.
image1
.
channels
=
input_y
->
dims
()[
1
];
ewaddArgs
.
image1
.
address
=
input_y_ptr
;
ewaddArgs
.
image1
.
channels
=
(
uint32_t
)
input_y
->
dims
()[
1
];
ewaddArgs
.
image1
.
scale_address
=
input_y
->
scale
;
ewaddArgs
.
image1
.
height
=
input_y
->
dims
()[
2
];
ewaddArgs
.
image1
.
width
=
input_y
->
dims
()[
3
];
ewaddArgs
.
image1
.
height
=
(
uint32_t
)
input_y
->
dims
()[
2
];
ewaddArgs
.
image1
.
width
=
(
uint32_t
)
input_y
->
dims
()[
3
];
ewaddArgs
.
image1
.
pad_height
=
0
;
ewaddArgs
.
image1
.
pad_width
=
0
;
ewaddArgs
.
output
.
scale_address
=
out
->
scale
;
ewaddArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
ewaddArgs
.
output
.
address
=
out_ptr
;
param
->
SetFpgaArgs
(
ewaddArgs
);
return
true
;
}
...
...
src/operators/kernel/fpga/fc_relu_kernel.cpp
浏览文件 @
660ee569
...
...
@@ -14,71 +14,82 @@ limitations under the License. */
#ifdef FUSION_FCRELU_OP
#include "operators/kernel/fc_relu_kernel.h"
#include "fpga/api.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
FusionFcReluKernel
<
FPGA
,
float
>::
Init
(
FusionFcReluParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
true
;
Tensor
*
input_x
=
const_cast
<
Tensor
*>
(
param
->
InputX
());
auto
*
input_x
=
const_cast
<
Tensor
*>
(
param
->
InputX
());
auto
input_x_ptr
=
input_x
->
data
<
float
>
();
Tensor
*
input_y
=
param
->
InputY
(
);
auto
*
filter
=
const_cast
<
Tensor
*>
(
param
->
InputY
()
);
const
Tensor
*
input_z
=
param
->
InputZ
();
auto
input_z_ptr
=
input_z
->
data
<
float
>
();
Tensor
*
out
=
param
->
Out
();
PADDLE_MOBILE_ENFORCE
(
input_x
->
dims
()[
1
]
==
input_y
->
dims
()[
0
],
PADDLE_MOBILE_ENFORCE
(
input_x
->
dims
()[
1
]
==
filter
->
dims
()[
0
],
"Image channel should be equal to weight number"
);
int
channel
=
out
->
dims
()[
1
];
float
*
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
int
channel
=
(
uint32_t
)
out
->
dims
()[
1
];
auto
*
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
channel
;
i
++
)
{
bs_ptr
[
i
+
channel
]
=
1
;
bs_ptr
[
i
]
=
input_z_ptr
[
i
];
}
int
num
=
input_y
->
dims
()[
1
];
int
chw
=
input_y
->
dims
()[
0
];
int
num
=
(
uint32_t
)
filter
->
dims
()[
1
];
int
chw
=
(
uint32_t
)
filter
->
dims
()[
0
];
PADDLE_MOBILE_ENFORCE
(
chw
==
input_x
->
numel
(),
"Filter element num should be equal to IFM element num"
);
int
height
=
input_x
->
dims
()[
2
];
int
width
=
input_x
->
dims
()[
3
];
int
height
=
(
uint32_t
)
input_x
->
dims
()[
2
];
int
width
=
(
uint32_t
)
input_x
->
dims
()[
3
];
int
filter_channel
=
chw
/
height
/
width
;
input_y
->
Resize
(
framework
::
make_ddim
({
num
,
filter_channel
,
height
,
width
}));
float
max_value
=
fpga
::
filter_find_max
(
input_y
);
fpga
::
format_filter
(
input_y
,
max_value
,
1
);
auto
input_y_ptr
=
input_y
->
data
<
float
>
();
filter
->
Resize
(
framework
::
make_ddim
({
num
,
filter_channel
,
height
,
width
}));
float
max_value
=
fpga
::
filter_find_max
(
filter
);
fpga
::
format_filter
(
filter
,
max_value
,
1
);
auto
filter_ptr
=
filter
->
data
<
float
>
();
int
element_num_per_div
=
fpga
::
get_element_num_per_div
(
input_y
,
1
);
int
element_num_per_div
=
fpga
::
get_element_num_per_div
(
filter
,
1
);
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
element_num_per_div
,
channel
);
fpga
::
format_ofm
(
out
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
ConvArgs
convArgs
;
convArgs
.
relu_enabled
=
relu_enabled
;
convArgs
.
filter_address
=
(
void
*
)
input_y_ptr
;
convArgs
.
filter_num
=
out
->
dims
()[
1
];
fpga
::
WrapperConvArgs
convArgs
;
convArgs
.
group_num
=
1
;
convArgs
.
sb_address
=
(
void
*
)
bs_ptr
;
convArgs
.
kernel
.
stride_w
=
1
;
convArgs
.
kernel
.
stride_h
=
1
;
convArgs
.
kernel
.
height
=
input_x
->
dims
()[
2
];
convArgs
.
kernel
.
width
=
input_x
->
dims
()[
3
];
convArgs
.
image
.
address
=
(
void
*
)
input_x_ptr
;
convArgs
.
image
.
channels
=
input_x
->
dims
()[
1
];
convArgs
.
image
.
height
=
input_x
->
dims
()[
2
];
convArgs
.
image
.
width
=
input_x
->
dims
()[
3
];
convArgs
.
image
.
pad_height
=
0
;
convArgs
.
image
.
pad_width
=
0
;
convArgs
.
image
.
scale_address
=
input_x
->
scale
;
convArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
convArgs
.
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
convArgs
.
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
convArgs
.
output
.
address
=
out_ptr
;
convArgs
.
output
.
scale_address
=
out
->
scale
;
convArgs
.
args
=
(
fpga
::
ConvArgs
*
)
fpga
::
fpga_malloc
(
convArgs
.
split_num
*
sizeof
(
fpga
::
ConvArgs
));
param
->
SetFpgaArgs
(
convArgs
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
int
n
=
convArgs
.
split_num
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
convArgs
.
args
[
i
].
relu_enabled
=
relu_enabled
;
convArgs
.
args
[
i
].
group_num
=
1
;
convArgs
.
args
[
i
].
kernel
.
stride_h
=
1
;
convArgs
.
args
[
i
].
kernel
.
stride_w
=
1
;
convArgs
.
args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
convArgs
.
args
[
i
].
image
.
address
=
input_x_ptr
;
convArgs
.
args
[
i
].
image
.
channels
=
(
uint32_t
)
input_x
->
dims
()[
1
];
convArgs
.
args
[
i
].
image
.
height
=
(
uint32_t
)
input_x
->
dims
()[
2
];
convArgs
.
args
[
i
].
image
.
width
=
(
uint32_t
)
input_x
->
dims
()[
3
];
convArgs
.
args
[
i
].
image
.
pad_height
=
0
;
convArgs
.
args
[
i
].
image
.
pad_width
=
0
;
convArgs
.
args
[
i
].
filter_address
=
&
((
int8_t
*
)
filter_ptr
)[
i
*
element_num
];
convArgs
.
args
[
i
].
sb_address
=
&
((
int8_t
*
)
bs_ptr
)[
i
*
element_num
];
convArgs
.
args
[
i
].
filter_num
=
(
uint32_t
)(
i
==
n
-
1
?
fpga
::
get_aligned_filter_num
(
channel
-
(
n
-
1
)
*
element_num_per_div
)
:
element_num_per_div
);
convArgs
.
args
[
i
].
image
.
scale_address
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
sizeof
(
float
));
}
return
true
;
}
template
<
>
...
...
src/operators/kernel/fpga/fusion_fc_kernel.cpp
浏览文件 @
660ee569
...
...
@@ -21,58 +21,76 @@ namespace operators {
template
<
>
bool
FusionFcKernel
<
FPGA
,
float
>::
Init
(
FusionFcParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
Tensor
*
input_x
=
const_cast
<
Tensor
*>
(
param
->
InputX
());
auto
*
input_x
=
const_cast
<
Tensor
*>
(
param
->
InputX
());
auto
input_x_ptr
=
input_x
->
data
<
float
>
();
Tensor
*
input_y
=
param
->
InputY
(
);
auto
*
filter
=
const_cast
<
Tensor
*>
(
param
->
InputY
()
);
const
Tensor
*
input_z
=
param
->
InputZ
();
auto
input_z_ptr
=
input_z
->
data
<
float
>
();
Tensor
*
out
=
param
->
Out
();
PADDLE_MOBILE_ENFORCE
(
input_x
->
dims
()[
1
]
==
input_y
->
dims
()[
0
],
PADDLE_MOBILE_ENFORCE
(
input_x
->
dims
()[
1
]
==
filter
->
dims
()[
0
],
"Image channel should be equal to weight number"
);
int
channel
=
out
->
dims
()[
1
];
float
*
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
int
channel
=
(
uint32_t
)
out
->
dims
()[
1
];
auto
*
bs_ptr
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
channel
*
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
channel
;
i
++
)
{
bs_ptr
[
i
+
channel
]
=
1
;
bs_ptr
[
i
]
=
input_z_ptr
[
i
];
}
int
num
=
input_y
->
dims
()[
1
];
int
chw
=
input_y
->
dims
()[
0
];
int
num
=
(
uint32_t
)
filter
->
dims
()[
1
];
int
chw
=
(
uint32_t
)
filter
->
dims
()[
0
];
PADDLE_MOBILE_ENFORCE
(
chw
==
input_x
->
numel
(),
"Filter element num should be equal to IFM element num"
);
int
height
=
input_x
->
dims
()[
2
];
int
width
=
input_x
->
dims
()[
3
];
int
height
=
(
uint32_t
)
input_x
->
dims
()[
2
];
int
width
=
(
uint32_t
)
input_x
->
dims
()[
3
];
int
filter_channel
=
chw
/
height
/
width
;
input_y
->
Resize
(
framework
::
make_ddim
({
num
,
filter_channel
,
height
,
width
}));
float
max_value
=
fpga
::
filter_find_max
(
input_y
);
fpga
::
format_filter
(
input_y
,
max_value
,
1
);
auto
input_y_ptr
=
input_y
->
data
<
float
>
();
int
element_num_per_div
=
fpga
::
get_element_num_per_div
(
input_y
,
1
);
filter
->
Resize
(
framework
::
make_ddim
({
num
,
filter_channel
,
height
,
width
}));
float
max_value
=
fpga
::
filter_find_max
(
filter
);
fpga
::
format_filter
(
filter
,
max_value
,
1
);
auto
filter_ptr
=
filter
->
data
<
float
>
();
int
element_num_per_div
=
fpga
::
get_element_num_per_div
(
filter
,
1
);
fpga
::
format_bias_scale_array
(
&
bs_ptr
,
element_num_per_div
,
channel
);
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
ConvArgs
convArgs
;
convArgs
.
relu_enabled
=
relu_enabled
;
convArgs
.
filter_address
=
(
void
*
)
input_y_ptr
;
convArgs
.
filter_num
=
out
->
dims
()[
1
];
fpga
::
WrapperConvArgs
convArgs
;
convArgs
.
group_num
=
1
;
convArgs
.
sb_address
=
(
void
*
)
bs_ptr
;
convArgs
.
kernel
.
stride_w
=
1
;
convArgs
.
kernel
.
stride_h
=
1
;
convArgs
.
kernel
.
height
=
input_x
->
dims
()[
2
];
convArgs
.
kernel
.
width
=
input_x
->
dims
()[
3
];
convArgs
.
image
.
address
=
(
void
*
)
input_x_ptr
;
convArgs
.
image
.
channels
=
input_x
->
dims
()[
1
];
convArgs
.
image
.
height
=
input_x
->
dims
()[
2
];
convArgs
.
image
.
width
=
input_x
->
dims
()[
3
];
convArgs
.
image
.
pad_height
=
0
;
convArgs
.
image
.
pad_width
=
0
;
convArgs
.
image
.
scale_address
=
input_x
->
scale
;
convArgs
.
output
.
address
=
(
void
*
)
out_ptr
;
convArgs
.
split_num
=
(
uint32_t
)
fpga
::
get_plit_num
(
filter
);
convArgs
.
filter_num
=
(
uint32_t
)
filter
->
dims
()[
0
];
convArgs
.
output
.
address
=
out_ptr
;
convArgs
.
output
.
scale_address
=
out
->
scale
;
convArgs
.
args
=
(
fpga
::
ConvArgs
*
)
fpga
::
fpga_malloc
(
convArgs
.
split_num
*
sizeof
(
fpga
::
ConvArgs
));
param
->
SetFpgaArgs
(
convArgs
);
int
element_num
=
fpga
::
get_aligned_filter_element_num
(
filter
->
dims
()[
1
]
*
filter
->
dims
()[
2
]
*
filter
->
dims
()[
3
]);
int
n
=
convArgs
.
split_num
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
convArgs
.
args
[
i
].
relu_enabled
=
relu_enabled
;
convArgs
.
args
[
i
].
group_num
=
1
;
convArgs
.
args
[
i
].
kernel
.
stride_h
=
1
;
convArgs
.
args
[
i
].
kernel
.
stride_w
=
1
;
convArgs
.
args
[
i
].
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
convArgs
.
args
[
i
].
kernel
.
width
=
(
uint32_t
)
filter
->
dims
()[
3
];
convArgs
.
args
[
i
].
image
.
address
=
input_x_ptr
;
convArgs
.
args
[
i
].
image
.
channels
=
(
uint32_t
)
input_x
->
dims
()[
1
];
convArgs
.
args
[
i
].
image
.
height
=
(
uint32_t
)
input_x
->
dims
()[
2
];
convArgs
.
args
[
i
].
image
.
width
=
(
uint32_t
)
input_x
->
dims
()[
3
];
convArgs
.
args
[
i
].
image
.
pad_height
=
0
;
convArgs
.
args
[
i
].
image
.
pad_width
=
0
;
convArgs
.
args
[
i
].
filter_address
=
&
((
int8_t
*
)
filter_ptr
)[
i
*
element_num
];
convArgs
.
args
[
i
].
sb_address
=
&
((
int8_t
*
)
bs_ptr
)[
i
*
element_num
];
convArgs
.
args
[
i
].
filter_num
=
(
uint32_t
)(
i
==
n
-
1
?
fpga
::
get_aligned_filter_num
(
channel
-
(
n
-
1
)
*
element_num_per_div
)
:
element_num_per_div
);
convArgs
.
args
[
i
].
image
.
scale_address
=
(
float
*
)
fpga
::
fpga_malloc
(
2
*
sizeof
(
float
));
}
return
true
;
}
...
...
src/operators/kernel/fpga/pool_kernel.cpp
浏览文件 @
660ee569
...
...
@@ -21,7 +21,7 @@ namespace operators {
template
<
>
bool
PoolKernel
<
FPGA
,
float
>::
Init
(
PoolParam
<
FPGA
>
*
param
)
{
Tensor
*
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
*
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
input_ptr
=
input
->
data
<
float
>
();
Tensor
*
output
=
param
->
Output
();
fpga
::
format_ofm
(
output
);
...
...
@@ -31,19 +31,19 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
vector
<
int
>
paddings
=
param
->
Paddings
();
fpga
::
PoolingArgs
poolArgs
;
poolArgs
.
image
.
address
=
(
void
*
)
input_ptr
;
poolArgs
.
image
.
channels
=
input
->
dims
()[
1
];
poolArgs
.
image
.
height
=
input
->
dims
()[
2
];
poolArgs
.
image
.
width
=
input
->
dims
()[
3
];
poolArgs
.
image
.
pad_height
=
paddings
[
0
];
poolArgs
.
image
.
pad_width
=
paddings
[
1
];
poolArgs
.
image
.
address
=
input_ptr
;
poolArgs
.
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
poolArgs
.
image
.
height
=
(
uint32_t
)
input
->
dims
()[
2
];
poolArgs
.
image
.
width
=
(
uint32_t
)
input
->
dims
()[
3
];
poolArgs
.
image
.
pad_height
=
(
uint32_t
)
paddings
[
0
];
poolArgs
.
image
.
pad_width
=
(
uint32_t
)
paddings
[
1
];
poolArgs
.
image
.
scale_address
=
input
->
scale
;
poolArgs
.
output
.
address
=
output_ptr
;
poolArgs
.
output
.
scale_address
=
input
->
scale
;
poolArgs
.
kernel
.
height
=
ksize
[
0
];
poolArgs
.
kernel
.
width
=
ksize
[
1
];
poolArgs
.
kernel
.
stride_h
=
strides
[
0
];
poolArgs
.
kernel
.
stride_w
=
strides
[
1
];
poolArgs
.
kernel
.
height
=
(
uint32_t
)
ksize
[
0
];
poolArgs
.
kernel
.
width
=
(
uint32_t
)
ksize
[
1
];
poolArgs
.
kernel
.
stride_h
=
(
uint32_t
)
strides
[
0
];
poolArgs
.
kernel
.
stride_w
=
(
uint32_t
)
strides
[
1
];
param
->
SetFpgaArgs
(
poolArgs
);
return
true
;
}
...
...
src/operators/kernel/fpga/softmax_kernel.cpp
浏览文件 @
660ee569
...
...
@@ -33,8 +33,8 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args
.
convert_type
=
fpga
::
DATA_FP16_TO_FP32
;
args
.
layout_type
=
fpga
::
LAYOUT_NO_CONVERT
;
args
.
image
.
address
=
(
void
*
)(
input_ptr
);
args
.
image
.
height
=
input
->
dims
()[
0
];
args
.
image
.
width
=
input
->
dims
()[
1
];
args
.
image
.
height
=
(
uint32_t
)
input
->
dims
()[
0
];
args
.
image
.
width
=
(
uint32_t
)
input
->
dims
()[
1
];
args
.
image
.
channels
=
1
;
args
.
output
.
address
=
output_ptr
;
param
->
SetFpgaArgs
(
args
);
...
...
src/operators/math/gemm.cpp
浏览文件 @
660ee569
...
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/math/gemm.h"
#include <string>
#include <string
.h
>
#include "common/log.h"
#include "memory/t_malloc.h"
#if __ARM_NEON
...
...
src/operators/op_param.h
浏览文件 @
660ee569
...
...
@@ -56,7 +56,7 @@ struct DtypeTensorTrait<CPU> {
template
<
>
struct
DtypeTensorTrait
<
FPGA
>
{
// This is the type we obtained in variable.
typedef
framework
::
LoD
Tensor
gtype
;
typedef
framework
::
Tensor
gtype
;
// This type will be the parent class type
// or the same type.
typedef
framework
::
Tensor
rtype
;
...
...
@@ -1232,11 +1232,7 @@ class FusionFcParam : public OpParam {
}
const
GType
*
InputX
()
const
{
return
input_x_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
InputY
()
const
{
return
input_y_
;
}
#else
const
RType
*
InputY
()
const
{
return
input_y_
;
}
#endif
const
RType
*
InputZ
()
const
{
return
input_z_
;
}
...
...
@@ -1259,11 +1255,11 @@ class FusionFcParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
...
...
@@ -1297,11 +1293,7 @@ class FusionConvAddParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_
;
}
...
...
@@ -1326,11 +1318,11 @@ class FusionConvAddParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
...
...
@@ -1379,11 +1371,7 @@ class FusionConvAddPReluParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_
;
}
...
...
@@ -1410,11 +1398,11 @@ class FusionConvAddPReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1461,11 +1449,7 @@ class FusionConvAddAddPReluParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_
;
}
...
...
@@ -1496,11 +1480,11 @@ class FusionConvAddAddPReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1538,11 +1522,7 @@ class FusionConvAddBNReluParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_
;
}
...
...
@@ -1598,11 +1578,11 @@ class FusionConvAddBNReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1648,11 +1628,7 @@ class FusionConvBNAddReluParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_
;
}
...
...
@@ -1711,11 +1687,11 @@ class FusionConvBNAddReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1748,11 +1724,8 @@ class FusionConvBNParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_y_
;
}
const
vector
<
int
>
&
Strides
()
const
{
return
strides_
;
}
...
...
@@ -1805,11 +1778,11 @@ class FusionConvBNParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -1847,11 +1820,8 @@ class FusionConvAddBNParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_y_
;
}
const
vector
<
int
>
&
Strides
()
const
{
return
strides_
;
}
...
...
@@ -1906,11 +1876,11 @@ class FusionConvAddBNParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
@@ -2027,11 +1997,7 @@ class FusionConvBNReluParam : public OpParam {
const
RType
*
Input
()
const
{
return
input_
;
}
#ifdef PADDLE_MOBILE_FPGA
RType
*
Filter
()
const
{
return
filter_
;
}
#else
const
RType
*
Filter
()
const
{
return
filter_
;
}
#endif
RType
*
Output
()
const
{
return
output_
;
}
...
...
@@ -2085,11 +2051,11 @@ class FusionConvBNReluParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
fpga
::
ConvArgs
fpga_conv_args
;
fpga
::
Wrapper
ConvArgs
fpga_conv_args
;
public:
const
fpga
::
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
const
fpga
::
Wrapper
ConvArgs
&
FpgaArgs
()
const
{
return
fpga_conv_args
;
}
void
SetFpgaArgs
(
const
fpga
::
Wrapper
ConvArgs
&
args
)
{
fpga_conv_args
=
args
;
}
#endif
};
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录