Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
29f2fd79
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
29f2fd79
编写于
1月 28, 2019
作者:
qnqinan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add activation in FPGA track
上级
9da5e094
变更
24
隐藏空白更改
内联
并排
Showing
24 changed file
with
301 addition
and
142 deletion
+301
-142
src/fpga/V1/api.cpp
src/fpga/V1/api.cpp
+39
-16
src/fpga/V1/api.h
src/fpga/V1/api.h
+16
-8
src/fpga/V1/deconv_filter.cpp
src/fpga/V1/deconv_filter.cpp
+0
-1
src/fpga/V1/pe.cpp
src/fpga/V1/pe.cpp
+91
-8
src/fpga/common/driver.cpp
src/fpga/common/driver.cpp
+0
-4
src/fpga/common/driver.h
src/fpga/common/driver.h
+1
-1
src/fpga/common/fpga_common.h
src/fpga/common/fpga_common.h
+4
-3
src/operators/activation_op.cpp
src/operators/activation_op.cpp
+4
-0
src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp
src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp
+8
-5
src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp
src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp
+10
-7
src/operators/kernel/fpga/V1/conv_add_kernel.cpp
src/operators/kernel/fpga/V1/conv_add_kernel.cpp
+8
-5
src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp
src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp
+8
-5
src/operators/kernel/fpga/V1/conv_bn_kernel.cpp
src/operators/kernel/fpga/V1/conv_bn_kernel.cpp
+8
-5
src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp
src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp
+8
-5
src/operators/kernel/fpga/V1/deconv_add_kernel.cpp
src/operators/kernel/fpga/V1/deconv_add_kernel.cpp
+10
-6
src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp
src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp
+10
-6
src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp
src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp
+8
-2
src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp
src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp
+8
-2
src/operators/kernel/fpga/V1/fetch_kernel.cpp
src/operators/kernel/fpga/V1/fetch_kernel.cpp
+22
-0
src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp
src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp
+6
-3
src/operators/kernel/fpga/V1/reshape_kernel.cpp
src/operators/kernel/fpga/V1/reshape_kernel.cpp
+6
-0
src/operators/kernel/fpga/V1/sigmoid_kernel.cpp
src/operators/kernel/fpga/V1/sigmoid_kernel.cpp
+11
-43
src/operators/kernel/fpga/V1/softmax_kernel.cpp
src/operators/kernel/fpga/V1/softmax_kernel.cpp
+1
-2
src/operators/op_param.h
src/operators/op_param.h
+14
-5
未找到文件。
src/fpga/V1/api.cpp
浏览文件 @
29f2fd79
...
...
@@ -346,9 +346,9 @@ void expand_conv_arg(ConvArgs *arg) {
auto
filter_pad_width_mul_channel
=
args
.
image
.
pad_width
*
args
.
image
.
channels
;
auto
image_amount_per_row_multi_win_first
=
image_amount_per_row
*
(
4
*
args
.
kernel
.
stride_h
-
args
.
image
.
pad_height
);
image_amount_per_row
*
(
2
*
args
.
kernel
.
stride_h
-
args
.
image
.
pad_height
);
auto
image_amount_per_row_multi_win
=
image_amount_per_row
*
(
4
*
args
.
kernel
.
stride_h
);
image_amount_per_row
*
(
2
*
args
.
kernel
.
stride_h
);
auto
image_block_num
=
block_num
;
auto
image_block_len
=
...
...
@@ -375,7 +375,8 @@ void expand_conv_arg(ConvArgs *arg) {
(
512
/
(
align_to_x
(
args
.
filter_num
,
4
)
/
4
*
2
)
>
2
)
?
(
512
/
(
align_to_x
(
args
.
filter_num
,
4
)
/
4
*
2
)
-
2
)
:
0
;
auto
cmd
=
0UL
|
(
args
.
relu_enabled
?
USE_RELU
:
0
)
|
USE_BIAS
;
// auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS;
auto
cmd
=
0UL
|
USE_BIAS
;
auto
deconv_param
=
((
args
.
deconv_tx_param
.
deconv_en
)
<<
24
)
|
((
args
.
deconv_tx_param
.
sub_conv_num
)
<<
16
)
|
...
...
@@ -413,7 +414,8 @@ void expand_conv_arg(ConvArgs *arg) {
void
expand_EW_arg
(
EWAddArgs
*
arg
)
{
EWAddArgs
args
=
*
arg
;
uint64_t
cmd
=
args
.
relu_enabled
?
USE_RELU
:
0
;
// uint64_t cmd = args.relu_enabled ? USE_RELU : 0;
uint64_t
cmd
=
0
;
uint64_t
datalen
=
(
uint64_t
)
args
.
image0
.
width
*
(
uint64_t
)
args
.
image0
.
height
*
(
uint64_t
)
args
.
image0
.
channels
;
...
...
@@ -441,8 +443,10 @@ void expand_EW_arg(EWAddArgs *arg) {
void
fill_split_arg
(
struct
SplitConvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
bool
relu_enabled
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
)
{
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
)
{
auto
input_ptr
=
input
->
data
<
float
>
();
auto
filter_ptr
=
filter
->
data
<
float
>
();
auto
out_ptr
=
out
->
data
<
float
>
();
...
...
@@ -488,7 +492,10 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
filter
->
dims
()[
3
]));
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
arg
->
conv_arg
[
i
].
relu_enabled
=
relu_enabled
;
// arg->conv_arg[i].relu_enabled = relu_enabled;
arg
->
conv_arg
[
i
].
output
.
activation
.
activation_type
=
activation_enable
;
arg
->
conv_arg
[
i
].
output
.
activation
.
leaky_relu_negative_slope
=
leaky_relu_negative_slope
;
arg
->
conv_arg
[
i
].
group_num
=
(
uint32_t
)
group_num
;
arg
->
conv_arg
[
i
].
kernel
.
stride_h
=
(
uint32_t
)
stride_h
;
arg
->
conv_arg
[
i
].
kernel
.
stride_w
=
(
uint32_t
)
stride_w
;
...
...
@@ -560,8 +567,9 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
void
fill_deconv_arg
(
struct
DeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
bool
relu_enabled
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
)
{
auto
input_ptr
=
input
->
data
<
float
>
();
auto
filter_ptr
=
filter
->
data
<
float
>
();
...
...
@@ -687,7 +695,13 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
}
for
(
int
j
=
0
;
j
<
split_num
;
++
j
)
{
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
].
relu_enabled
=
relu_enabled
;
// arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled;
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
].
output
.
activation
.
activation_type
=
activation_enable
;
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
]
.
output
.
activation
.
leaky_relu_negative_slope
=
leaky_relu_negative_slope
;
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
].
group_num
=
(
uint32_t
)
group_num
;
arg
->
split_conv_args
[
i
]
->
conv_arg
[
j
].
kernel
.
width
=
...
...
@@ -800,13 +814,17 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
void
fill_dwconv_arg
(
struct
DWconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
bool
relu_enabled
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
)
{
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
)
{
auto
filter_ptr
=
filter
->
data
<
float
>
();
auto
input_ptr
=
input
->
data
<
float
>
();
auto
output_ptr
=
out
->
mutable_data
<
float
>
();
arg
->
sub_conv_num
=
1
;
arg
->
relu_enabled
=
relu_enabled
;
// arg->relu_enabled = relu_enabled;
arg
->
output
.
activation
.
activation_type
=
activation_enable
;
arg
->
output
.
activation
.
leaky_relu_negative_slope
=
leaky_relu_negative_slope
;
arg
->
bias_address
=
bias_ptr
;
arg
->
filter_address
=
filter_ptr
;
arg
->
kernel
.
height
=
(
uint32_t
)
filter
->
dims
()[
2
];
...
...
@@ -826,8 +844,10 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
void
fill_DWDeconv_arg
(
struct
DWDeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
bool
relu_enabled
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
)
{
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
)
{
auto
filter_ptr
=
filter
->
data
<
float
>
();
auto
input_ptr
=
input
->
data
<
float
>
();
auto
output_ptr
=
out
->
mutable_data
<
float
>
();
...
...
@@ -884,7 +904,10 @@ void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
arg
->
dw_conv_args
.
push_back
(
std
::
make_shared
<
DWconvArgs
>
());
arg
->
dw_conv_args
[
i
]
->
sub_conv_num
=
sub_conv_num
;
arg
->
dw_conv_args
[
i
]
->
relu_enabled
=
relu_enabled
;
// arg->dw_conv_args[i]->relu_enabled = relu_enabled;
arg
->
dw_conv_args
[
i
]
->
output
.
activation
.
activation_type
=
activation_enable
;
arg
->
dw_conv_args
[
i
]
->
output
.
activation
.
leaky_relu_negative_slope
=
leaky_relu_negative_slope
;
arg
->
dw_conv_args
[
i
]
->
bias_address
=
bias_ptr
;
arg
->
dw_conv_args
[
i
]
->
filter_address
=
...
...
src/fpga/V1/api.h
浏览文件 @
29f2fd79
...
...
@@ -47,20 +47,28 @@ void format_concat_output(framework::Tensor* out, int height, int width,
void
fill_split_arg
(
struct
SplitConvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
bool
relu_enabled
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
void
fill_deconv_arg
(
struct
DeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
bool
relu_enabled
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
group_num
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
void
fill_dwconv_arg
(
struct
DWconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
bool
relu_enabled
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
);
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bias_ptr
);
void
fill_DWDeconv_arg
(
struct
DWDeconvArgs
*
arg
,
framework
::
Tensor
*
input
,
framework
::
Tensor
*
out
,
framework
::
Tensor
*
filter
,
bool
relu_enabled
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
ActivationType
activation_enable
,
int16_t
leaky_relu_negative_slope
,
int
stride_h
,
int
stride_w
,
int
padding_h
,
int
padding_w
,
float
*
bs_ptr
);
void
format_deconv_filter
(
framework
::
Tensor
*
filter_tensor
,
float
max_value
,
int
group_num
,
int
stride
);
...
...
src/fpga/V1/deconv_filter.cpp
浏览文件 @
29f2fd79
...
...
@@ -19,7 +19,6 @@ limitations under the License. */
#include "fpga/V1/filter.h"
// #include "filter.h"
#include "fpga/V1/api.h"
// #include "fpga_api.h"
namespace
paddle_mobile
{
namespace
fpga
{
...
...
src/fpga/V1/pe.cpp
浏览文件 @
29f2fd79
...
...
@@ -63,6 +63,7 @@ using namespace std; // NOLINT
#define REG_TIMER_COUNTER 0x070
#define REG_SCALE_PARAMETER 0x080
#define REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR 0x090
#define REG_FLASH_CMD 0x200
#define REG_FLASH_DATA 0x208
...
...
@@ -189,8 +190,8 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) {
int
ComputeBasicConv
(
const
struct
ConvArgs
&
args
)
{
#ifdef FPGA_PRINT_MODE
DLOG
<<
"======Compute Basic Conv======"
;
DLOG
<<
" relu_enabled:"
<<
args
.
relu_enabled
<<
" sb_address:"
<<
args
.
sb_address
//
DLOG << " relu_enabled:" << args.relu_enabled
DLOG
<<
" sb_address:"
<<
args
.
sb_address
<<
" filter_address:"
<<
args
.
filter_address
<<
" filter_num:"
<<
args
.
filter_num
<<
" group_num:"
<<
args
.
group_num
;
...
...
@@ -212,6 +213,25 @@ int ComputeBasicConv(const struct ConvArgs &args) {
#ifdef PADDLE_MOBILE_ZU5
int
ret
=
0
;
uint64_t
output_scale
=
0
;
uint64_t
reg_ActivationArgs
=
0
;
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs
active_args
;
// active_args.activation_type = LEAKYRELU;
active_args
.
activation_type
=
args
.
output
.
activation
.
activation_type
;
active_args
.
leaky_relu_negative_slope
=
args
.
output
.
activation
.
leaky_relu_negative_slope
;
reg_ActivationArgs
=
(
uint64_t
(
active_args
.
activation_type
)
<<
32
)
|
active_args
.
leaky_relu_negative_slope
;
DLOG
<<
" activation_type:"
<<
active_args
.
activation_type
<<
" leaky_relu_negative_slope:"
<<
active_args
.
leaky_relu_negative_slope
;
DLOG
<<
" reg_ActivationArgs:"
<<
reg_ActivationArgs
;
pthread_mutex_lock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
if
(
ERROR
==
g_fpgainfo
.
pe_data
->
pes
[
PE_IDX_CONV
]
->
status
)
{
ret
=
-
EIO
;
...
...
@@ -219,6 +239,10 @@ int ComputeBasicConv(const struct ConvArgs &args) {
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
return
ret
;
}
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
// active functoion
reg_writeq
(
output_scale
,
REG_SCALE_PARAMETER
);
reg_writeq
(
((
uint64_t
)
args
.
image
.
height
)
|
(((
uint64_t
)
args
.
image
.
width
)
<<
32
),
...
...
@@ -278,6 +302,9 @@ int ComputeBasicConv(const struct ConvArgs &args) {
output_scale
=
(
output_scale
<<
32
)
|
(
output_scale
>>
32
);
fpga_copy
(
args
.
output
.
scale_address
,
&
output_scale
,
sizeof
(
float
)
*
2
);
active_args
.
activation_type
=
NONE
;
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
return
ret
;
...
...
@@ -314,6 +341,23 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
uint64_t
image_physical_address
=
0
;
uint64_t
output_physical_address
=
0
;
uint64_t
reg_ActivationArgs
=
0
;
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs
active_args
;
// active_args.activation_type = LEAKYRELU;
active_args
.
activation_type
=
args
.
output
.
activation
.
activation_type
;
active_args
.
leaky_relu_negative_slope
=
args
.
output
.
activation
.
leaky_relu_negative_slope
;
reg_ActivationArgs
=
(
uint64_t
(
active_args
.
activation_type
)
<<
32
)
|
active_args
.
leaky_relu_negative_slope
;
DLOG
<<
" activation_type:"
<<
active_args
.
activation_type
<<
" leaky_relu_negative_slope:"
<<
active_args
.
leaky_relu_negative_slope
;
DLOG
<<
" reg_ActivationArgs:"
<<
reg_ActivationArgs
;
image_physical_address
=
vaddr_to_paddr_driver
(
args
.
image
.
address
);
output_physical_address
=
vaddr_to_paddr_driver
(
args
.
output
.
address
);
uint32_t
output_height
=
(
uint32_t
)(
...
...
@@ -364,6 +408,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
return
ret
;
}
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
// active functoion
reg_writeq
(
output_scale
,
REG_SCALE_PARAMETER
);
reg_writeq
(
image_physical_address
,
REG_POOLING_IMAGE_BASE_ADDR
);
reg_writeq
(
output_physical_address
,
REG_POOLING_RESULT_BASE_ADDR
);
...
...
@@ -408,6 +455,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
output_scale
=
reg_readq
(
REG_SCALE_PARAMETER
);
output_scale
=
(
output_scale
<<
32
)
|
(
output_scale
>>
32
);
fpga_copy
(
args
.
output
.
scale_address
,
&
output_scale
,
sizeof
(
float
)
*
2
);
active_args
.
activation_type
=
NONE
;
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
return
ret
;
...
...
@@ -418,8 +469,8 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
int
ComputeFpgaEWAdd
(
const
struct
EWAddArgs
&
args
)
{
#ifdef FPGA_PRINT_MODE
DLOG
<<
"=============ComputeFpgaEWAdd==========="
;
DLOG
<<
" relu_enabled:"
<<
args
.
relu_enabled
<<
" const0:"
<<
fp16_2_fp32
(
int16_t
(
args
.
const0
))
//
DLOG << " relu_enabled:" << args.relu_enabled
DLOG
<<
" const0:"
<<
fp16_2_fp32
(
int16_t
(
args
.
const0
))
<<
" const1:"
<<
fp16_2_fp32
(
int16_t
(
args
.
const1
));
DLOG
<<
" image0_address:"
<<
args
.
image0
.
address
<<
" image0_scale_address:"
<<
args
.
image0
.
scale_address
...
...
@@ -441,6 +492,19 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#ifdef PADDLE_MOBILE_ZU5
int
ret
=
0
;
uint64_t
output_scale
=
0
;
uint64_t
reg_ActivationArgs
=
0
;
ActivationArgs
active_args
;
active_args
.
activation_type
=
args
.
output
.
activation
.
activation_type
;
active_args
.
leaky_relu_negative_slope
=
args
.
output
.
activation
.
leaky_relu_negative_slope
;
reg_ActivationArgs
=
(
uint64_t
(
active_args
.
activation_type
)
<<
32
)
|
active_args
.
leaky_relu_negative_slope
;
DLOG
<<
" activation_type:"
<<
active_args
.
activation_type
<<
" leaky_relu_negative_slope:"
<<
active_args
.
leaky_relu_negative_slope
;
DLOG
<<
" reg_ActivationArgs:"
<<
reg_ActivationArgs
;
pthread_mutex_lock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
if
(
ERROR
==
g_fpgainfo
.
pe_data
->
pes
[
PE_IDX_EW
]
->
status
)
{
ret
=
-
EIO
;
...
...
@@ -449,6 +513,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
return
ret
;
}
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
// active functoion
reg_writeq
(
output_scale
,
REG_SCALE_PARAMETER
);
reg_writeq
(
args
.
driver
.
image0_address_phy
,
REG_EW_IMAGE0_BASE_ADDR
);
reg_writeq
(
args
.
driver
.
image1_address_phy
,
REG_EW_IMAGE1_BASE_ADDR
);
...
...
@@ -468,6 +535,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
output_scale
=
reg_readq
(
REG_SCALE_PARAMETER
);
output_scale
=
(
output_scale
<<
32
)
|
(
output_scale
>>
32
);
fpga_copy
(
args
.
output
.
scale_address
,
&
output_scale
,
sizeof
(
float
)
*
2
);
active_args
.
activation_type
=
NONE
;
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
return
ret
;
#endif
...
...
@@ -501,6 +571,17 @@ int PerformBypass(const struct BypassArgs &args) {
uint8_t
data_cell_in
=
0
;
uint8_t
data_cell_out
=
0
;
int
ret
=
0
;
uint64_t
reg_ActivationArgs
=
0
;
ActivationArgs
active_args
;
active_args
.
activation_type
=
args
.
output
.
activation
.
activation_type
;
active_args
.
leaky_relu_negative_slope
=
args
.
output
.
activation
.
leaky_relu_negative_slope
;
reg_ActivationArgs
=
(
uint64_t
(
active_args
.
activation_type
)
<<
32
)
|
active_args
.
leaky_relu_negative_slope
;
datalen
=
(
uint64_t
)
args
.
image
.
width
*
(
uint64_t
)
args
.
image
.
height
*
(
uint64_t
)
args
.
image
.
channels
;
datalen
=
align_to_x
(
datalen
,
16
);
...
...
@@ -559,7 +640,6 @@ int PerformBypass(const struct BypassArgs &args) {
(
data_cell_out
!=
SIZE_FP16
&&
data_cell_out
!=
SIZE_FP32
))
{
return
-
EFAULT
;
}
pthread_mutex_lock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
if
(
ERROR
==
g_fpgainfo
.
pe_data
->
pes
[
PE_IDX_BYPASS
]
->
status
)
{
ret
=
-
EIO
;
...
...
@@ -567,7 +647,8 @@ int PerformBypass(const struct BypassArgs &args) {
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
return
ret
;
}
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
// active functoion
reg_writeq
(
output_scale
,
REG_SCALE_PARAMETER
);
reg_writeq
(
input_address_phy
,
REG_CONVERT_SRC_ADDR
);
reg_writeq
(
output_address_phy
,
REG_CONVERT_DST_ADDR
);
...
...
@@ -585,6 +666,7 @@ int PerformBypass(const struct BypassArgs &args) {
output_scale
=
reg_readq
(
REG_SCALE_PARAMETER
);
output_scale
=
(
output_scale
<<
32
)
|
(
output_scale
>>
32
);
fpga_copy
(
args
.
output
.
scale_address
,
&
output_scale
,
sizeof
(
float
)
*
2
);
reg_writeq
(
reg_ActivationArgs
,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR
);
pthread_mutex_unlock
(
&
g_fpgainfo
.
pe_data
->
mutex
);
return
ret
;
#endif
...
...
@@ -808,7 +890,7 @@ int ComputeFPGASplit(const struct SplitArgs &args) {
int
ComputeDWConv
(
const
struct
DWconvArgs
&
args
)
{
#ifdef FPGA_PRINT_MODE
DLOG
<<
"=============ComputeDWConv==========="
;
DLOG
<<
" mode:"
<<
args
.
relu_enabled
;
//
DLOG << " mode:" << args.relu_enabled;
DLOG
<<
" image_address:"
<<
args
.
image
.
address
<<
" image_scale_address:"
<<
args
.
image
.
scale_address
<<
" image_channels:"
<<
args
.
image
.
channels
...
...
@@ -831,7 +913,8 @@ int ComputeDWConv(const struct DWconvArgs &args) {
uint64_t
output_scale
=
0
;
uint64_t
timer_cnt
=
0
;
int
ret
=
0
;
uint64_t
cmd
=
args
.
relu_enabled
;
// uint64_t cmd = args.relu_enabled;
uint64_t
cmd
=
0
;
uint64_t
image_physical_address
=
0
;
uint64_t
output_physical_address
=
0
;
uint64_t
filter_physical_address
=
0
;
...
...
src/fpga/common/driver.cpp
浏览文件 @
29f2fd79
...
...
@@ -154,7 +154,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) {
unsigned
int
nr
=
(
unsigned
int
)
_nr
;
int
ret
=
0
;
uint64_t
a_size
=
FPGA_PAGE_SIZE
*
nr
;
DLOG
<<
a_size
;
pthread_mutex_lock
(
&
memory
->
mutex
);
...
...
@@ -391,9 +390,6 @@ int fpga_invalidate_driver(void *address, size_t size) {
void
fpga_copy_driver
(
void
*
dest
,
const
void
*
src
,
size_t
num
)
{
uint64_t
i
;
DLOG
<<
"dest:"
<<
dest
<<
" src:"
<<
src
<<
" size:"
<<
num
;
for
(
i
=
0
;
i
<
num
;
i
++
)
{
*
((
int8_t
*
)
dest
+
i
)
=
*
((
int8_t
*
)
src
+
i
);
// NOLINT
}
...
...
src/fpga/common/driver.h
浏览文件 @
29f2fd79
...
...
@@ -29,7 +29,7 @@ namespace driver {
#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
#define FPGA_REG_PHY_ADDR 0x
a
0000000
#define FPGA_REG_PHY_ADDR 0x
8
0000000
#define FPGA_REG_SIZE 0x1000
#define FPGA_MEM_PHY_ADDR 0x40000000
#define FPGA_MEM_SIZE 0x80000000
...
...
src/fpga/common/fpga_common.h
浏览文件 @
29f2fd79
...
...
@@ -45,6 +45,7 @@ enum ActivationType {
LEAKYRELU
=
1
,
SIGMOID
=
2
,
TANH
=
3
,
SOFTMAX
=
4
,
};
struct
ActivationArgs
{
...
...
@@ -132,7 +133,7 @@ struct DeconvTxParm {
#endif
struct
ConvArgs
{
bool
relu_enabled
;
//
bool relu_enabled;
void
*
sb_address
;
// scale and bias
void
*
filter_address
;
float
*
filter_scale_address
;
...
...
@@ -198,7 +199,7 @@ struct PoolingArgs {
};
struct
EWAddArgs
{
bool
relu_enabled
;
//
bool relu_enabled;
uint32_t
const0
;
// output0 = const0 x input0 + const1 x input1;
uint32_t
const1
;
struct
ImageInputArgs
image0
;
...
...
@@ -230,7 +231,7 @@ struct DeconvArgs {
};
struct
DWconvArgs
{
uint32_t
sub_conv_num
;
bool
relu_enabled
;
//
bool relu_enabled;
void
*
bias_address
;
void
*
filter_address
;
struct
KernelArgs
kernel
;
...
...
src/operators/activation_op.cpp
浏览文件 @
29f2fd79
...
...
@@ -31,6 +31,10 @@ DEFINE_ACTIVATION_INFERSHAPE(Relu6);
#ifdef SIGMOID_OP
DEFINE_ACTIVATION_INFERSHAPE
(
Sigmoid
);
namespace
ops
=
paddle_mobile
::
operators
;
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA
(
sigmoid
,
ops
::
SigmoidOp
);
#endif
#endif // SIGMOID_OP
#ifdef TANH_OP
...
...
src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp
浏览文件 @
29f2fd79
...
...
@@ -22,7 +22,10 @@ namespace operators {
template
<
>
bool
ConvAddBNKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddBNParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
// bool relu_enabled = false;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
bias
=
param
->
Bias
();
...
...
@@ -61,10 +64,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
return
true
;
...
...
src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp
浏览文件 @
29f2fd79
...
...
@@ -23,7 +23,10 @@ namespace operators {
template
<
>
bool
ConvAddBNReluKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddBNReluParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
true
;
// bool relu_enabled = true;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
LEAKYRELU
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
...
...
@@ -64,16 +67,16 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
if
(
groups
==
channel
)
{
fpga
::
format_dwconv_data
(
filter
,
out
,
new_scale_ptr
,
&
new_bias_ptr
);
fpga
::
DWconvArgs
dwconv_arg
=
{
0
};
fpga
::
fill_dwconv_arg
(
&
dwconv_arg
,
input
,
out
,
filter
,
relu_enabled
,
strides
[
0
],
strides
[
1
],
paddings
[
0
],
padding
s
[
1
],
new_bias_ptr
);
fpga
::
fill_dwconv_arg
(
&
dwconv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
strides
[
0
],
stride
s
[
1
],
paddings
[
0
],
paddings
[
1
],
new_bias_ptr
);
param
->
SetFpgaArgs
(
dwconv_arg
);
}
else
{
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
param
->
Groups
(),
strides
[
0
],
strides
[
1
],
padding
s
[
0
],
paddings
[
1
],
bs_ptr
);
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
(),
stride
s
[
0
],
strides
[
1
],
paddings
[
0
],
paddings
[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
}
return
true
;
...
...
src/operators/kernel/fpga/V1/conv_add_kernel.cpp
100755 → 100644
浏览文件 @
29f2fd79
...
...
@@ -21,7 +21,10 @@ namespace operators {
template
<
>
bool
ConvAddKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
// bool relu_enabled = false;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
const
Tensor
*
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
...
...
@@ -40,10 +43,10 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
return
true
;
}
...
...
src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp
100755 → 100644
浏览文件 @
29f2fd79
...
...
@@ -21,7 +21,10 @@ namespace operators {
template
<
>
bool
ConvAddReluKernel
<
FPGA
,
float
>::
Init
(
FusionConvAddReluParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
true
;
// bool relu_enabled = true;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
LEAKYRELU
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
const
Tensor
*
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
...
...
@@ -40,10 +43,10 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
return
true
;
}
...
...
src/operators/kernel/fpga/V1/conv_bn_kernel.cpp
浏览文件 @
29f2fd79
...
...
@@ -22,7 +22,10 @@ namespace operators {
template
<
>
bool
ConvBNKernel
<
FPGA
,
float
>::
Init
(
FusionConvBNParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
// bool relu_enabled = false;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
...
...
@@ -53,10 +56,10 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
return
true
;
}
...
...
src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp
浏览文件 @
29f2fd79
...
...
@@ -22,7 +22,10 @@ namespace operators {
template
<
>
bool
ConvBNReluKernel
<
FPGA
,
float
>::
Init
(
FusionConvBNReluParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
true
;
// bool relu_enabled = true;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
LEAKYRELU
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
Filter
());
auto
out
=
param
->
Output
();
...
...
@@ -53,10 +56,10 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
fpga
::
format_conv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
());
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
relu_enabled
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
fpga
::
fill_split_arg
(
&
conv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
return
true
;
}
...
...
src/operators/kernel/fpga/V1/deconv_add_kernel.cpp
浏览文件 @
29f2fd79
...
...
@@ -23,7 +23,10 @@ namespace operators {
template
<
>
bool
DeconvAddKernel
<
FPGA
,
float
>::
Init
(
FusionDeconvAddParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
// bool relu_enabled = false;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
const
Tensor
*
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
...
...
@@ -53,17 +56,18 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
relu_enabled
,
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
}
else
{
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
relu_enabled
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
deconv_arg
);
}
...
...
src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp
浏览文件 @
29f2fd79
...
...
@@ -24,7 +24,10 @@ namespace operators {
template
<
>
bool
DeconvAddReluKernel
<
FPGA
,
float
>::
Init
(
FusionDeconvAddReluParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
true
;
// bool relu_enabled = true;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
LEAKYRELU
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
Input
());
const
Tensor
*
bias
=
param
->
Bias
();
auto
bias_ptr
=
bias
->
data
<
float
>
();
...
...
@@ -54,17 +57,18 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
fpga
::
format_DWDeconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DWDeconvArgs
DWDeconv_arg
=
{
0
};
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
relu_enabled
,
fpga
::
fill_DWDeconv_arg
(
&
DWDeconv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
DWDeconv_arg
);
}
else
{
fpga
::
format_deconv_data
(
filter
,
out
,
&
bs_ptr
,
param
->
Groups
(),
sub_conv_n
);
fpga
::
DeconvArgs
deconv_arg
=
{
0
};
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
relu_enabled
,
param
->
Groups
(),
param
->
Strides
()[
0
]
,
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
fpga
::
fill_deconv_arg
(
&
deconv_arg
,
input
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
param
->
Groups
()
,
param
->
Strides
()[
0
],
param
->
Strides
()[
1
],
param
->
Paddings
()[
0
],
param
->
Paddings
()[
1
],
bs_ptr
);
param
->
SetFpgaArgs
(
deconv_arg
);
}
return
true
;
...
...
src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp
浏览文件 @
29f2fd79
...
...
@@ -20,7 +20,10 @@ namespace operators {
template
<
>
bool
ElementwiseAddKernel
<
FPGA
,
float
>::
Init
(
ElementwiseAddParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
// bool relu_enabled = false;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
*
input_x
=
const_cast
<
LoDTensor
*>
(
param
->
InputX
());
auto
*
input_y
=
const_cast
<
LoDTensor
*>
(
param
->
InputY
());
auto
*
out
=
param
->
Out
();
...
...
@@ -30,7 +33,10 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
EWAddArgs
ewaddArgs
=
{
0
};
ewaddArgs
.
relu_enabled
=
relu_enabled
;
// ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs
.
output
.
activation
.
activation_type
=
activation_enable
;
ewaddArgs
.
output
.
activation
.
leaky_relu_negative_slope
=
leaky_relu_negative_slope
;
ewaddArgs
.
const0
=
0x3c00
;
// =1
ewaddArgs
.
const1
=
0x3c00
;
// =1
ewaddArgs
.
image0
.
address
=
input_x_ptr
;
...
...
src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp
浏览文件 @
29f2fd79
...
...
@@ -21,7 +21,10 @@ namespace operators {
template
<
>
bool
ElementwiseAddReluKernel
<
FPGA
,
float
>::
Init
(
ElementwiseAddReluParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
true
;
// bool relu_enabled = true;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
LEAKYRELU
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
*
input_x
=
const_cast
<
LoDTensor
*>
(
param
->
InputX
());
auto
*
input_y
=
const_cast
<
LoDTensor
*>
(
param
->
InputY
());
auto
*
out
=
param
->
Out
();
...
...
@@ -31,7 +34,10 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
auto
out_ptr
=
out
->
mutable_data
<
float
>
();
fpga
::
EWAddArgs
ewaddArgs
=
{
0
};
ewaddArgs
.
relu_enabled
=
relu_enabled
;
// ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs
.
output
.
activation
.
activation_type
=
activation_enable
;
ewaddArgs
.
output
.
activation
.
leaky_relu_negative_slope
=
leaky_relu_negative_slope
;
ewaddArgs
.
const0
=
0x3c00
;
// =1
ewaddArgs
.
const1
=
0x3c00
;
// =1
ewaddArgs
.
image0
.
address
=
input_x_ptr
;
...
...
src/operators/kernel/fpga/V1/fetch_kernel.cpp
浏览文件 @
29f2fd79
...
...
@@ -19,12 +19,34 @@ namespace operators {
template
<
>
bool
FetchKernel
<
FPGA
,
float
>::
Init
(
FetchParam
<
FPGA
>
*
param
)
{
Tensor
*
output
=
param
->
Out
();
// fpga::format_fp16_ofm(output);
return
true
;
}
template
<
>
void
FetchKernel
<
FPGA
,
float
>::
Compute
(
const
FetchParam
<
FPGA
>
&
param
)
{
param
.
Out
()
->
ShareDataWith
(
*
(
param
.
InputX
()));
/*auto input =
reinterpret_cast<Tensor *>(const_cast<Tensor *>(param.InputX()));
fpga::format_image(input);
auto input_ptr = input->data<float>();
Tensor *output = param.Out();
auto output_ptr = output->data<float>();
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32;
args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = reinterpret_cast<void *>(input_ptr);
args.image.channels = (uint32_t)input->dims()[1];
args.image.height = (input->dims().size() == 4) ? (uint32_t)input->dims()[2] :
1; args.image.width = (input->dims().size() == 4) ? (uint32_t)input->dims()[3]
: 1; args.image.pad_height = 0; args.image.pad_width = 0; args.output.address
= output_ptr; args.output.scale_address = output->scale;
fpga::PerformBypass(args);*/
}
template
class
FetchKernel
<
FPGA
,
float
>;
...
...
src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp
浏览文件 @
29f2fd79
...
...
@@ -20,7 +20,10 @@ namespace operators {
template
<
>
bool
FusionFcKernel
<
FPGA
,
float
>::
Init
(
FusionFcParam
<
FPGA
>
*
param
)
{
bool
relu_enabled
=
false
;
// bool relu_enabled = false;
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
NONE
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input_x
=
const_cast
<
LoDTensor
*>
(
param
->
InputX
());
auto
filter
=
const_cast
<
Tensor
*>
(
param
->
InputY
());
const
Tensor
*
input_z
=
param
->
InputZ
();
...
...
@@ -55,8 +58,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
fpga
::
format_fp16_ofm
(
out
);
fpga
::
SplitConvArgs
conv_arg
=
{
0
};
fpga
::
fill_split_arg
(
&
conv_arg
,
input_x
,
out
,
filter
,
relu_enabled
,
1
,
1
,
1
,
0
,
0
,
bs_ptr
);
fpga
::
fill_split_arg
(
&
conv_arg
,
input_x
,
out
,
filter
,
activation_enable
,
leaky_relu_negative_slope
,
1
,
1
,
1
,
0
,
0
,
bs_ptr
);
param
->
SetFpgaArgs
(
conv_arg
);
return
true
;
}
...
...
src/operators/kernel/fpga/V1/reshape_kernel.cpp
浏览文件 @
29f2fd79
...
...
@@ -22,6 +22,12 @@ namespace operators {
template
<
>
bool
ReshapeKernel
<
FPGA
,
float
>::
Init
(
ReshapeParam
<
FPGA
>
*
param
)
{
param
->
Out
()
->
ShareDataWith
(
*
param
->
InputX
());
const
int
in_n
=
param
->
InputX
()
->
dims
()[
0
];
const
int
in_c
=
param
->
InputX
()
->
dims
()[
1
];
const
int
in_h
=
param
->
InputX
()
->
dims
()[
2
];
const
int
in_w
=
param
->
InputX
()
->
dims
()[
3
];
auto
out
=
param
->
Out
();
out
->
Resize
(
framework
::
make_ddim
({
in_n
,
in_c
*
in_h
*
in_w
}));
return
true
;
}
...
...
src/operators/kernel/fpga/V1/sigmoid_kernel.cpp
浏览文件 @
29f2fd79
...
...
@@ -15,73 +15,41 @@ limitations under the License. */
#ifdef SIGMOID_OP
#include "operators/kernel/activation_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
using
framework
::
DDim
;
using
framework
::
Tensor
;
template
<
>
bool
SigmoidKernel
<
FPGA
,
float
>::
Init
(
SigmoidParam
<
FPGA
>
*
param
)
{
paddle_mobile
::
fpga
::
ActivationType
activation_enable
=
paddle_mobile
::
fpga
::
SIGMOID
;
int16_t
leaky_relu_negative_slope
=
0
;
auto
input
=
const_cast
<
Tensor
*>
(
param
->
InputX
());
auto
input_ptr
=
input
->
data
<
float
>
();
auto
out
=
param
->
Out
();
fpga
::
format_fp
32
_ofm
(
out
);
fpga
::
format_fp
16
_ofm
(
out
);
auto
float_input
=
new
Tensor
;
if
(
input
->
dims
().
size
()
==
2
)
{
float_input
->
mutable_data
<
float
>
({
1
,
input
->
dims
()[
1
]});
}
else
if
(
input
->
dims
().
size
()
==
4
)
{
float_input
->
mutable_data
<
float
>
(
{
1
,
input
->
dims
()[
2
],
input
->
dims
()[
3
],
input
->
dims
()[
1
]});
}
else
{
DLOG
<<
"wrong dimension of softmax input"
;
}
fpga
::
format_fp32_ofm
(
float_input
);
fpga
::
BypassArgs
args
=
{
fpga
::
DATA_TYPE_FP16
};
args
.
input_layout_type
=
fpga
::
LAYOUT_HWC
;
args
.
output_layout_type
=
fpga
::
LAYOUT_CHW
;
args
.
input_data_type
=
fpga
::
DATA_TYPE_FP16
;
args
.
output_data_type
=
fpga
::
DATA_TYPE_FP
32
;
args
.
output_data_type
=
fpga
::
DATA_TYPE_FP
16
;
args
.
image
.
address
=
input_ptr
;
args
.
image
.
height
=
(
input
->
dims
().
size
()
==
4
)
?
(
uint32_t
)
input
->
dims
()[
2
]
:
1
;
args
.
image
.
width
=
(
input
->
dims
().
size
()
==
4
)
?
(
uint32_t
)
input
->
dims
()[
3
]
:
1
;
args
.
image
.
channels
=
(
uint32_t
)
input
->
dims
()[
1
];
args
.
output
.
address
=
float_input
->
data
<
float
>
();
args
.
output
.
scale_address
=
float_input
->
scale
;
param
->
SetFloatInput
(
float_input
);
args
.
output
.
address
=
out
->
data
<
float
>
();
args
.
output
.
scale_address
=
out
->
scale
;
args
.
output
.
activation
.
activation_type
=
activation_enable
;
args
.
output
.
activation
.
leaky_relu_negative_slope
=
leaky_relu_negative_slope
;
param
->
SetFpgaArgs
(
args
);
return
true
;
}
template
<
typename
T
>
T
Sigmoid
(
const
T
a
)
{
T
tmp
=
-
1.0
f
*
a
;
return
(
1.0
/
(
1.0
+
exp
(
tmp
)));
}
template
<
typename
T
>
void
sigmoidFuntor
(
Tensor
*
input
,
Tensor
*
output
)
{
auto
*
input_ptr
=
input
->
data
<
T
>
();
auto
*
output_ptr
=
output
->
mutable_data
<
T
>
();
for
(
int
i
=
0
;
i
<
input
->
numel
();
i
++
)
{
*
(
output_ptr
+
i
)
=
Sigmoid
<
T
>
(
*
(
input_ptr
+
i
));
}
}
template
<
>
void
SigmoidKernel
<
FPGA
,
float
>::
Compute
(
const
SigmoidParam
<
FPGA
>
&
param
)
{
Tensor
*
in_x
=
param
.
FloatInput
();
Tensor
*
out
=
param
.
Out
();
fpga
::
PerformBypass
(
param
.
FpgaArgs
());
fpga
::
fpga_invalidate
((
void
*
)
in_x
->
data
<
float
>
(),
// NOLINT
in_x
->
numel
()
*
sizeof
(
float
));
// TODO: In general case, 0 should be squeezed before softmax input // NOLINT
sigmoidFuntor
<
float
>
(
in_x
,
out
);
fpga
::
fpga_flush
(
out
->
data
<
float
>
(),
out
->
memory_size
());
}
}
// namespace operators
}
// namespace paddle_mobile
...
...
src/operators/kernel/fpga/V1/softmax_kernel.cpp
浏览文件 @
29f2fd79
...
...
@@ -26,7 +26,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto
input_ptr
=
input
->
data
<
float
>
();
auto
out
=
param
->
Out
();
fpga
::
format_fp32_ofm
(
out
);
auto
float_input
=
new
Tensor
;
if
(
input
->
dims
().
size
()
==
2
)
{
float_input
->
mutable_data
<
float
>
({
1
,
input
->
dims
()[
1
]});
...
...
@@ -36,7 +35,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
}
else
{
DLOG
<<
"wrong dimension of softmax input"
;
}
fpga
::
format_fp32_ofm
(
float_input
);
fpga
::
BypassArgs
args
=
{
fpga
::
DATA_TYPE_FP16
};
args
.
input_layout_type
=
fpga
::
LAYOUT_HWC
;
...
...
@@ -53,6 +51,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args
.
output
.
scale_address
=
float_input
->
scale
;
param
->
SetFloatInput
(
float_input
);
param
->
SetFpgaArgs
(
args
);
return
true
;
}
...
...
src/operators/op_param.h
浏览文件 @
29f2fd79
...
...
@@ -1081,14 +1081,9 @@ class SigmoidParam : public OpParam {
#ifdef PADDLE_MOBILE_FPGA
private:
std
::
shared_ptr
<
RType
>
float_input_x_
;
fpga
::
BypassArgs
fpga_bypass_args
;
public:
RType
*
FloatInput
()
const
{
return
float_input_x_
==
nullptr
?
input_x_
:
float_input_x_
.
get
();
}
void
SetFloatInput
(
Tensor
*
input
)
{
float_input_x_
.
reset
(
input
);
}
const
fpga
::
BypassArgs
&
FpgaArgs
()
const
{
return
fpga_bypass_args
;
}
void
SetFpgaArgs
(
const
fpga
::
BypassArgs
&
args
)
{
fpga_bypass_args
=
args
;
}
#endif
...
...
@@ -1214,6 +1209,20 @@ class FetchParam : public OpParam {
private:
RType
*
input_x_
;
Tensor
*
out_
;
#ifdef PADDLE_MOBILE_FPGA
private:
std
::
shared_ptr
<
RType
>
float_input_x_
;
fpga
::
BypassArgs
fpga_bypass_args
;
public:
RType
*
FloatInput
()
const
{
return
float_input_x_
==
nullptr
?
input_x_
:
float_input_x_
.
get
();
}
void
SetFloatInput
(
Tensor
*
input
)
{
float_input_x_
.
reset
(
input
);
}
const
fpga
::
BypassArgs
&
FpgaArgs
()
const
{
return
fpga_bypass_args
;
}
void
SetFpgaArgs
(
const
fpga
::
BypassArgs
&
args
)
{
fpga_bypass_args
=
args
;
}
#endif
};
#ifdef FILL_CONSTANT_OP
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录