Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
8e699af1
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8e699af1
编写于
5月 15, 2020
作者:
C
chonwhite
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
ReNext Pass
上级
337eb1bf
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
100 addition
and
101 deletion
+100
-101
lite/backends/fpga/KD/debugger.hpp
lite/backends/fpga/KD/debugger.hpp
+22
-15
lite/backends/fpga/KD/llapi/filter.cpp
lite/backends/fpga/KD/llapi/filter.cpp
+2
-0
lite/backends/fpga/KD/pe_params.hpp
lite/backends/fpga/KD/pe_params.hpp
+31
-23
lite/backends/fpga/KD/pes/conv_pe.hpp
lite/backends/fpga/KD/pes/conv_pe.hpp
+2
-10
lite/backends/fpga/KD/pes/relu_pe.hpp
lite/backends/fpga/KD/pes/relu_pe.hpp
+17
-34
lite/backends/fpga/KD/pes/scale_pe.hpp
lite/backends/fpga/KD/pes/scale_pe.hpp
+1
-0
lite/backends/fpga/KD/tensor.hpp
lite/backends/fpga/KD/tensor.hpp
+1
-5
lite/kernels/fpga/CMakeLists.txt
lite/kernels/fpga/CMakeLists.txt
+2
-2
lite/kernels/fpga/activation_compute.cc
lite/kernels/fpga/activation_compute.cc
+3
-3
lite/kernels/fpga/elementwise_compute.cc
lite/kernels/fpga/elementwise_compute.cc
+15
-9
lite/kernels/fpga/elementwise_compute.h
lite/kernels/fpga/elementwise_compute.h
+3
-0
lite/kernels/fpga/fetch_compute.cc
lite/kernels/fpga/fetch_compute.cc
+1
-0
未找到文件。
lite/backends/fpga/KD/debugger.hpp
浏览文件 @
8e699af1
...
...
@@ -48,26 +48,33 @@ class Debugger {
void
tock
(
std
::
string
key
)
{}
void
setEnable
(
bool
en
)
{
enabled_
=
en
;
}
private:
bool
enabled_
=
false
;
std
::
unordered_map
<
std
::
string
,
bool
>
op_config
;
std
::
unordered_map
<
std
::
string
,
float
>
tick_tock_map
;
Debugger
()
{
op_config
[
"concat"
]
=
true
;
op_config
[
"pooling"
]
=
true
;
op_config
[
"conv"
]
=
true
;
op_config
[
"dwconv"
]
=
true
;
op_config
[
"ew_add"
]
=
true
;
op_config
[
"crop"
]
=
true
;
op_config
[
"feed"
]
=
true
;
op_config
[
"mul"
]
=
true
;
op_config
[
"fetch"
]
=
true
;
op_config
[
"boxes"
]
=
true
;
op_config
[
"scores"
]
=
true
;
op_config
[
"nms"
]
=
true
;
op_config
[
"pb_boxes"
]
=
true
;
op_config
[
"pb_variances"
]
=
true
;
// op_config["concat"] = true;
// op_config["pooling"] = true;
// op_config["conv"] = true;
// op_config["dropout"] = true;
// op_config["dwconv"] = true;
// op_config["ew_add"] = true;
// op_config["ew_mul"] = true;
// op_config["crop"] = true;
// op_config["feed"] = true;
// op_config["fc"] = true;
op_config
[
"softmax"
]
=
true
;
// op_config["mul"] = true;
// op_config["fetch"] = true;
// op_config["boxes"] = true;
// op_config["scores"] = true;
// op_config["nms"] = true;
// op_config["pb_boxes"] = true;
// op_config["pb_variances"] = true;
// op_config["softmax"] = true;
}
};
...
...
lite/backends/fpga/KD/llapi/filter.cpp
浏览文件 @
8e699af1
...
...
@@ -240,6 +240,8 @@ int8_t* format_filter(float* data_in,
for
(
int
n
=
0
;
n
<
num
;
n
++
)
{
float
*
filter_start
=
data_in
+
n
*
chw
;
int8_t
*
quantized_start
=
quantized_data
+
n
*
chw
;
// float f_max = find_max(filter_start, chw);
float
f_max
=
max
;
quantize
(
filter_start
,
quantized_start
,
chw
,
f_max
);
filter_max
.
push_back
(
f_max
);
}
...
...
lite/backends/fpga/KD/pe_params.hpp
浏览文件 @
8e699af1
...
...
@@ -83,26 +83,34 @@ struct ConvParam : PEParam {
std
::
vector
<
int
>
kernelSize
;
std
::
vector
<
int
>
dilations
;
Tensor
*
scale
()
{
return
scale_
;
}
Tensor
*
scale
()
{
return
&
scale_
;
}
Tensor
*
bias
()
{
return
bias_
;
}
Tensor
*
bias
()
{
return
&
bias_
;
}
std
::
vector
<
BasicConvParam
*>&
splitParams
()
{
return
splitParams_
;
}
~
ConvParam
()
{
for
(
int
i
=
0
;
i
<
splitParams_
.
size
();
i
++
)
{
BasicConvParam
*
basic_param
=
splitParams_
[
i
];
delete
basic_param
;
}
splitParams_
.
clear
();
}
protected:
std
::
vector
<
BasicConvParam
*>
splitParams_
;
Tensor
*
scale_
=
new
Tensor
()
;
Tensor
*
bias_
=
new
Tensor
()
;
Tensor
scale_
;
Tensor
bias_
;
};
struct
DepthwiseConvParam
:
ConvParam
{
public:
Tensor
*
quantizedFilter
()
{
return
quantizedFilter_
;
}
Tensor
*
quantizedFilter
()
{
return
&
quantizedFilter_
;
}
DWconvArgs
args
;
protected:
Tensor
*
quantizedFilter_
=
new
Tensor
()
;
Tensor
quantizedFilter_
;
};
enum
PoolingType
:
int
{
...
...
@@ -142,7 +150,7 @@ struct ElementwiseAddParam : PEParam {
struct
ElementwiseMulParam
:
PEParam
{
public:
Tensor
*
input_x
;
Tensor
*
input_x
=
nullptr
;
Tensor
*
input_y
=
nullptr
;
Tensor
*
output
=
nullptr
;
};
...
...
@@ -154,13 +162,13 @@ struct FullyConnectedParam : PEParam {
Tensor
*
bias
=
nullptr
;
Tensor
*
output
=
nullptr
;
Tensor
*
quantizedFilter
()
{
return
quantizedFilter_
;
}
Tensor
*
quantizedFilter
()
{
return
&
quantizedFilter_
;
}
Tensor
*
biasScale
()
{
return
biasScale_
;
}
Tensor
*
biasScale
()
{
return
&
biasScale_
;
}
protected:
Tensor
*
quantizedFilter_
=
new
Tensor
()
;
Tensor
*
biasScale_
=
new
Tensor
()
;
Tensor
quantizedFilter_
;
Tensor
biasScale_
;
};
struct
SoftmaxParam
:
PEParam
{
...
...
@@ -193,10 +201,10 @@ struct NormParam : PEParam {
};
struct
PriorBoxParam
:
PEParam
{
Tensor
*
input
;
Tensor
*
image
;
Tensor
*
outputBoxes
;
Tensor
*
outputVariances
;
Tensor
*
input
=
nullptr
;
Tensor
*
image
=
nullptr
;
Tensor
*
outputBoxes
=
nullptr
;
Tensor
*
outputVariances
=
nullptr
;
std
::
vector
<
float
>
minSizes
;
std
::
vector
<
float
>
maxSizes
;
...
...
@@ -212,10 +220,10 @@ struct PriorBoxParam : PEParam {
};
struct
YoloBoxParam
:
PEParam
{
Tensor
*
input
;
Tensor
*
imgSize
;
Tensor
*
outputBoxes
;
Tensor
*
outputScores
;
Tensor
*
input
=
nullptr
;
Tensor
*
imgSize
=
nullptr
;
Tensor
*
outputBoxes
=
nullptr
;
Tensor
*
outputScores
=
nullptr
;
int
downsampleRatio
;
std
::
vector
<
int
>
anchors
;
int
classNum
;
...
...
@@ -229,15 +237,15 @@ struct ScaleParam : PEParam {
Tensor
*
scale
=
nullptr
;
Tensor
*
bias
=
nullptr
;
Tensor
*
alignedScale
()
{
return
alignedScale_
;
}
Tensor
*
alignedScale
()
{
return
&
alignedScale_
;
}
Tensor
*
alignedBias
()
{
return
alignedBias_
;
}
Tensor
*
alignedBias
()
{
return
&
alignedBias_
;
}
ScaleArgs
args
=
{
0
};
protected:
Tensor
*
alignedScale_
=
new
Tensor
()
;
Tensor
*
alignedBias_
=
new
Tensor
()
;
Tensor
alignedScale_
;
Tensor
alignedBias_
;
};
struct
ResizeParam
:
PEParam
{
...
...
lite/backends/fpga/KD/pes/conv_pe.hpp
浏览文件 @
8e699af1
...
...
@@ -195,16 +195,6 @@ class ConvPE : public PE {
addPE_
.
init
();
addPE_
.
apply
();
addPE_
.
dispatch
();
// param_.output->printScale();
// params[0]->input.saveToFile("conv_1.txt");
// params[1]->input.saveToFile("conv_2.txt");
// params[0]->output.saveToFile("ew_o1.txt");
// params[1]->output.saveToFile("ew_o2.txt");
// std::cout << "\n ================== EW ================== \n";
// }
}
return
ret
==
0
;
...
...
@@ -212,6 +202,8 @@ class ConvPE : public PE {
ConvParam
&
param
()
{
return
param_
;
}
~
ConvPE
()
{}
private:
bool
use_cpu_
=
false
;
bool
split_channel
=
false
;
...
...
lite/backends/fpga/KD/pes/relu_pe.hpp
浏览文件 @
8e699af1
...
...
@@ -23,43 +23,27 @@ class ReluPE : public PE {
public:
bool
init
()
{
Tensor
*
output
=
param_
.
output
;
output
->
setAligned
(
true
);
output
->
setDataLocation
(
Device
);
output
->
setAligned
(
param_
.
input
->
aligned
()
);
output
->
setDataLocation
(
CPU
);
return
true
;
}
void
apply
()
{
Tensor
*
src
=
param_
.
input
;
args_
.
input_data_type
=
DATA_TYPE_FP16
;
args_
.
output_data_type
=
DATA_TYPE_FP16
;
args_
.
input_layout_type
=
LAYOUT_HWC
;
args_
.
output_layout_type
=
LAYOUT_HWC
;
args_
.
image
=
{.
address
=
src
->
data
<
void
>
(),
.
scale_address
=
src
->
scale
(),
.
channels
=
(
uint32_t
)
src
->
shape
().
channel
(),
.
width
=
(
uint32_t
)
src
->
shape
().
width
(),
.
height
=
(
uint32_t
)
src
->
shape
().
height
(),
.
pad_width
=
0u
,
.
pad_height
=
0u
};
args_
.
output
=
{
.
address
=
param_
.
output
->
data
<
void
>
(),
.
scale_address
=
param_
.
output
->
scale
(),
};
inplace_
.
relu_enable
=
false
;
inplace_
.
power_enable
=
false
;
inplace_
.
normalize_enable
=
false
;
}
void
apply
()
{}
bool
dispatch
()
{
inplace_
.
relu_enable
=
true
;
config_inplace
(
inplace_
);
param_
.
input
->
syncToDevice
();
param_
.
output
->
copyFrom
(
param_
.
input
);
param_
.
output
->
invalidate
();
inplace_
.
relu_enable
=
false
;
config_inplace
(
inplace_
);
param_
.
input
->
invalidate
();
int16_t
*
input_data
=
param_
.
input
->
data
<
int16_t
>
();
float16
*
out_data
=
param_
.
output
->
data
<
float16
>
();
for
(
int
i
=
0
;
i
<
param_
.
input
->
shape
().
alignedElementCount
();
i
++
)
{
int16_t
v
=
param_
.
input
->
data
<
float16
>
()[
i
];
if
(
v
>
0
)
{
out_data
[
i
]
=
input_data
[
i
];
}
else
{
out_data
[
i
]
=
zero
;
}
}
param_
.
output
->
copyScaleFrom
(
param_
.
input
);
param_
.
output
->
flush
();
return
true
;
}
...
...
@@ -67,8 +51,7 @@ class ReluPE : public PE {
private:
InputParam
param_
;
BypassArgs
args_
;
InplaceArgs
inplace_
;
float16
zero
=
float_to_half
(
0.0
f
);
};
}
// namespace zynqmp
...
...
lite/backends/fpga/KD/pes/scale_pe.hpp
浏览文件 @
8e699af1
...
...
@@ -36,6 +36,7 @@ class ScalePE : public PE {
}
inline
int
lcm
(
int
a
,
int
b
)
{
return
a
*
b
/
gcd
(
a
,
b
);
}
bool
init
()
{
Tensor
*
output
=
param_
.
output
;
output
->
setAligned
(
true
);
...
...
lite/backends/fpga/KD/tensor.hpp
浏览文件 @
8e699af1
...
...
@@ -283,7 +283,6 @@ class Tensor {
.
address
=
data
<
void
>
(),
.
scale_address
=
scale
(),
};
args
.
output
=
output
;
src
->
syncToDevice
();
size_t
aligned_remainder
=
src
->
shape
().
numel
()
%
16
;
if
(
aligned_remainder
>
0
)
{
size_t
dtype_size
=
...
...
@@ -293,7 +292,6 @@ class Tensor {
fpga_flush
(
dst
,
aligned_remainder
*
dtype_size
);
}
src
->
syncToDevice
();
this
->
invalidate
();
perform_bypass
(
args
);
this
->
invalidate
();
}
...
...
@@ -303,8 +301,7 @@ class Tensor {
return
;
}
size_t
memorySize
=
shape_
->
memorySize
(
CellSize
(
dataType_
))
*
mem_scale_factor_
;
size_t
memorySize
=
placeHolder_
->
memorySize
();
fpga_flush
(
placeHolder_
->
data
(),
memorySize
);
}
...
...
@@ -384,7 +381,6 @@ class Tensor {
}
void
save_file_with_name
(
std
::
string
path
)
{
invalidate
();
std
::
ofstream
ofs
;
ofs
.
open
(
path
);
ofs
<<
scale
()[
0
]
<<
" / "
<<
scale
()[
1
]
<<
std
::
endl
;
...
...
lite/kernels/fpga/CMakeLists.txt
浏览文件 @
8e699af1
...
...
@@ -5,7 +5,7 @@ endif()
set
(
fpga_deps fpga_target_wrapper kernel_fpga
)
#
add_kernel(activation_compute_fpga FPGA basic SRCS activation_compute.cc DEPS ${fpga_deps})
add_kernel
(
activation_compute_fpga FPGA basic SRCS activation_compute.cc DEPS
${
fpga_deps
}
)
# add_kernel(box_coder_compute_fpga FPGA basic SRCS box_coder_compute.cc DEPS ${fpga_deps})
# add_kernel(concat_compute_fpga FPGA basic SRCS concat_compute.cc DEPS ${fpga_deps})
...
...
@@ -25,7 +25,7 @@ add_kernel(norm_compute_fpga FPGA basic SRCS norm_compute.cc DEPS ${fpga_deps})
# add_kernel(im2sequence_compute_fpga FPGA basic SRCS im2sequence_compute.cc DEPS ${fpga_deps})
add_kernel
(
pooling_compute_fpga FPGA basic SRCS pooling_compute.cc DEPS
${
fpga_deps
}
)
add_kernel
(
prior_box_compute_fpga FPGA basic SRCS prior_box_compute.cc DEPS
${
fpga_deps
}
)
#
add_kernel(reshape_compute_fpga FPGA basic SRCS reshape_compute.cc DEPS ${fpga_deps} reshape_op)
add_kernel
(
reshape_compute_fpga FPGA basic SRCS reshape_compute.cc DEPS
${
fpga_deps
}
reshape_op
)
# add_kernel(sequence_pool_compute_fpga FPGA basic SRCS sequence_pool_compute.cc DEPS ${fpga_deps})
add_kernel
(
scale_compute_fpga FPGA basic SRCS scale_compute.cc DEPS
${
fpga_deps
}
)
# add_kernel(softmax_compute_fpga FPGA basic SRCS softmax_compute.cc DEPS ${fpga_deps})
...
...
lite/kernels/fpga/activation_compute.cc
浏览文件 @
8e699af1
...
...
@@ -25,10 +25,10 @@ using float16 = zynqmp::float16;
void
ReluCompute
::
PrepareForRun
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
output_data
=
param
.
Out
->
mutable_data
<
float16
>
();
zynqmp
::
InputParam
&
input
_param
=
pe_
.
param
();
zynqmp
::
InputParam
&
relu
_param
=
pe_
.
param
();
input
_param
.
input
=
param
.
X
->
ZynqTensor
();
input
_param
.
output
=
param
.
Out
->
ZynqTensor
();
relu
_param
.
input
=
param
.
X
->
ZynqTensor
();
relu
_param
.
output
=
param
.
Out
->
ZynqTensor
();
pe_
.
init
();
pe_
.
apply
();
}
...
...
lite/kernels/fpga/elementwise_compute.cc
浏览文件 @
8e699af1
...
...
@@ -40,6 +40,7 @@ void ElementwiseAddCompute::PrepareForRun() {
pe_
.
apply
();
}
void
ElementwiseAddCompute
::
Run
()
{
usleep
(
50
*
100
*
1000
);
pe_
.
dispatch
();
#ifdef FPGA_PRINT_TENSOR
zynqmp
::
ElementwiseAddParam
&
ew_param
=
pe_
.
param
();
...
...
@@ -62,6 +63,7 @@ void ElementwiseAddActivationCompute::PrepareForRun() {
pe_
.
apply
();
}
void
ElementwiseAddActivationCompute
::
Run
()
{
usleep
(
500
*
100
*
1000
);
pe_
.
dispatch
();
#ifdef FPGA_PRINT_TENSOR
zynqmp
::
ElementwiseAddParam
&
ew_param
=
pe_
.
param
();
...
...
@@ -80,21 +82,21 @@ void ElementwiseMulCompute::PrepareForRun() {
scale_param
.
activeParam
.
type
=
zynqmp
::
TYPE_NONE
;
int
channel
=
scale_param
.
input
->
shape
().
channel
();
zynqmp
::
Tensor
*
scale
=
new
zynqmp
::
Tensor
();
zynqmp
::
Tensor
*
bias
=
new
zynqmp
::
Tensor
();
scale_param
.
scale
=
scale
;
scale_param
.
bias
=
bias
;
scale_param
.
scale
=
&
scale_
;
scale_param
.
bias
=
&
bias_
;
zynqmp
::
Shape
shape
(
zynqmp
::
N
,
{
channel
});
float
*
scale_data
=
scale
->
mutableData
<
float
>
(
zynqmp
::
FP32
,
shape
);
float
*
bias_data
=
bias
->
mutableData
<
float
>
(
zynqmp
::
FP32
,
shape
);
zynqmp
::
float16
*
scale_data
=
scale_
.
mutableData
<
zynqmp
::
float16
>
(
zynqmp
::
FP16
,
shape
);
zynqmp
::
float16
*
bias_data
=
bias_
.
mutableData
<
zynqmp
::
float16
>
(
zynqmp
::
FP16
,
shape
);
float
scale_value
=
param
.
Y
->
data
<
float
>
()[
0
];
for
(
int
i
=
0
;
i
<
channel
;
++
i
)
{
for
(
int
i
=
0
;
i
<
channel
;
i
++
)
{
if
(
param
.
Y
->
dims
().
production
()
!=
1
)
{
scale_value
=
param
.
Y
->
ZynqTensor
()
->
data
<
float
>
()[
i
];
}
scale_data
[
i
]
=
scale_value
;
bias_data
[
i
]
=
0
;
scale_data
[
i
]
=
zynqmp
::
float_to_half
(
scale_value
)
;
bias_data
[
i
]
=
zero_
;
}
pe_
.
init
();
...
...
@@ -102,6 +104,10 @@ void ElementwiseMulCompute::PrepareForRun() {
}
void
ElementwiseMulCompute
::
Run
()
{
auto
&
param
=
Param
<
operators
::
ElementwiseParam
>
();
param
.
Y
->
ZynqTensor
()
->
flush
();
scale_
.
copyFrom
(
param
.
Y
->
ZynqTensor
());
scale_
.
invalidate
();
pe_
.
dispatch
();
#ifdef FPGA_PRINT_TENSOR
zynqmp
::
ScaleParam
&
scale_param
=
pe_
.
param
();
...
...
lite/kernels/fpga/elementwise_compute.h
浏览文件 @
8e699af1
...
...
@@ -61,6 +61,9 @@ class ElementwiseMulCompute
private:
zynqmp
::
ScalePE
pe_
;
zynqmp
::
Tensor
scale_
;
zynqmp
::
Tensor
bias_
;
zynqmp
::
float16
zero_
=
zynqmp
::
float_to_half
(
0.0
f
);
};
}
// namespace fpga
...
...
lite/kernels/fpga/fetch_compute.cc
浏览文件 @
8e699af1
...
...
@@ -55,6 +55,7 @@ void FetchCompute::Run() {
#ifdef FPGA_PRINT_TENSOR
zynqmp
::
OutputParam
&
fetch_param
=
pe_
.
param
();
Debugger
::
get_instance
().
registerOutput
(
"fetch"
,
fetch_param
.
output
);
Debugger
::
get_instance
().
setEnable
(
true
);
#endif
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录