Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
b02b1822
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b02b1822
编写于
12月 18, 2019
作者:
C
chonwhite
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
shared library works
上级
a8b4a533
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
174 addition
and
19 deletion
+174
-19
.gitignore
.gitignore
+7
-0
lite/api/cxx_api.cc
lite/api/cxx_api.cc
+25
-0
lite/backends/fpga/KD/debugger.hpp
lite/backends/fpga/KD/debugger.hpp
+13
-10
lite/kernels/arm/layout_compute.cc
lite/kernels/arm/layout_compute.cc
+5
-0
lite/kernels/fpga/CMakeLists.txt
lite/kernels/fpga/CMakeLists.txt
+6
-6
lite/kernels/fpga/io_copy_compute.cc
lite/kernels/fpga/io_copy_compute.cc
+18
-0
lite/kernels/fpga/reshape_compute.cc
lite/kernels/fpga/reshape_compute.cc
+31
-0
lite/kernels/fpga/reshape_compute.h
lite/kernels/fpga/reshape_compute.h
+8
-0
lite/kernels/fpga/transpose_compute.cc
lite/kernels/fpga/transpose_compute.cc
+59
-1
lite/tools/build_fpga.sh
lite/tools/build_fpga.sh
+2
-2
未找到文件。
.gitignore
浏览文件 @
b02b1822
...
...
@@ -104,3 +104,10 @@ metal/paddle-mobile-demo/paddle-mobile-demo/Resources
metal/paddle-mobile-demo/paddle-mobile-demo/Resources/images
metal/paddle-mobile-demo/paddle-mobile-demo/Resources/models
metal/MobileNetDemo/MobileNetDemo/Resources
# generated files
lite/api/paddle_use_kernels.h
lite/api/paddle_use_ops.h
lite/backends/arm/math/dotprod/gemm_sdot.h
lite/tools/cmake_tools/ast.pyc
lite/api/cxx_api.cc
浏览文件 @
b02b1822
...
...
@@ -186,6 +186,8 @@ void Predictor::PrepareFeedFetch() {
}
}
#ifndef LITE_WITH_FPGA
const
lite
::
Tensor
*
Predictor
::
GetOutput
(
size_t
offset
)
const
{
CHECK
(
output_names_
.
size
()
>
offset
)
<<
"The network has "
<<
output_names_
.
size
()
<<
" outputs"
...
...
@@ -205,6 +207,29 @@ std::vector<const lite::Tensor *> Predictor::GetOutputs() const {
}
return
outputs
;
}
#else
const
lite
::
Tensor
*
Predictor
::
GetOutput
(
size_t
offset
)
const
{
auto
*
_fetch_list
=
exec_scope_
->
FindVar
(
"fetch"
);
CHECK
(
_fetch_list
)
<<
"no fatch variable in exec_scope"
;
auto
&
fetch_list
=
*
_fetch_list
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
CHECK_LT
(
offset
,
fetch_list
.
size
())
<<
"offset "
<<
offset
<<
" overflow"
;
return
&
fetch_list
.
at
(
offset
);
}
std
::
vector
<
const
lite
::
Tensor
*>
Predictor
::
GetOutputs
()
const
{
auto
*
_fetch_list
=
exec_scope_
->
FindVar
(
"fetch"
);
CHECK
(
_fetch_list
)
<<
"no fatch variable in exec_scope"
;
auto
&
fetch_list
=
*
_fetch_list
->
GetMutable
<
std
::
vector
<
lite
::
Tensor
>>
();
std
::
vector
<
const
lite
::
Tensor
*>
outputs
;
for
(
auto
out
:
fetch_list
)
{
outputs
.
push_back
(
&
out
);
}
return
outputs
;
}
#endif
const
cpp
::
ProgramDesc
&
Predictor
::
program_desc
()
const
{
return
program_desc_
;
...
...
lite/backends/fpga/KD/debugger.hpp
浏览文件 @
b02b1822
...
...
@@ -33,10 +33,6 @@ class Debugger {
}
void
registerOutput
(
std
::
string
op_type
,
zynqmp
::
Tensor
*
tensor
)
{
// tensor->printScale();
// if (op_type != "conv") {
// // tensor->saveToFile(op_type, true);
// }
if
(
op_config
[
op_type
])
{
tensor
->
saveToFile
(
op_type
,
true
);
}
...
...
@@ -45,12 +41,19 @@ class Debugger {
private:
std
::
unordered_map
<
std
::
string
,
bool
>
op_config
;
Debugger
()
{
op_config
[
"concat"
]
=
true
;
op_config
[
"conv"
]
=
true
;
op_config
[
"crop"
]
=
true
;
op_config
[
"fetch"
]
=
true
;
op_config
[
"fc"
]
=
true
;
op_config
[
"softmax"
]
=
true
;
// op_config["concat"] = true;
// op_config["pooling"] = true;
// op_config["conv"] = true;
// op_config["crop"] = true;
// op_config["feed"] = true;
// op_config["fetch"] = true;
// op_config["boxes"] = true;
// op_config["scores"] = true;
// op_config["nms"] = true;
// op_config["pb_boxes"] = true;
// op_config["pb_variances"] = true;
// // op_config["fc"] = true;
// op_config["softmax"] = true;
}
};
...
...
lite/kernels/arm/layout_compute.cc
浏览文件 @
b02b1822
...
...
@@ -59,6 +59,8 @@ namespace arm {
template
<
>
void
NCHWToNHWCCompute
<
PRECISION
(
kFloat
)
>::
Run
()
{
NCHWTONHWC
(
float
);
// auto& param = this->template Param<param_t>();
// param.y->ZynqTensor()->copyFrom(param.x->ZynqTensor());
}
template
<
>
...
...
@@ -69,6 +71,9 @@ void NCHWToNHWCCompute<PRECISION(kInt8)>::Run() {
template
<
>
void
NHWCToNCHWCompute
<
PRECISION
(
kFloat
)
>::
Run
()
{
NHWCTONCHW
(
float
);
// auto& param = this->template Param<param_t>();
// param.y->mutable_data<float>();
// param.y->ZynqTensor()->copyFrom(param.x->ZynqTensor());
}
template
<
>
...
...
lite/kernels/fpga/CMakeLists.txt
浏览文件 @
b02b1822
...
...
@@ -7,7 +7,7 @@ set(fpga_deps fpga_target_wrapper kernel_fpga)
# add_kernel(activation_compute_fpga FPGA basic SRCS activation_compute.cc DEPS ${fpga_deps})
# add_kernel(box_coder_compute_fpga FPGA basic SRCS box_coder_compute.cc DEPS ${fpga_deps})
add_kernel
(
concat_compute_fpga FPGA basic SRCS concat_compute.cc DEPS
${
fpga_deps
}
)
#
add_kernel(concat_compute_fpga FPGA basic SRCS concat_compute.cc DEPS ${fpga_deps})
add_kernel
(
conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS
${
fpga_deps
}
)
# add_kernel(density_prior_box_compute_fpga FPGA basic SRCS density_prior_box_compute.cc DEPS ${fpga_deps})
add_kernel
(
dropout_compute_fpga FPGA basic SRCS dropout_compute.cc DEPS
${
fpga_deps
}
)
...
...
@@ -15,18 +15,18 @@ add_kernel(elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS
# add_kernel(feed_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps})
add_kernel
(
fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS
${
fpga_deps
}
)
#
add_kernel(gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS ${fpga_deps})
add_kernel
(
mul_compute_fpga FPGA basic SRCS mul_compute.cc DEPS
${
fpga_deps
}
)
add_kernel
(
gru_compute_fpga FPGA extra SRCS gru_compute.cc DEPS
${
fpga_deps
}
)
#
add_kernel(mul_compute_fpga FPGA basic SRCS mul_compute.cc DEPS ${fpga_deps})
add_kernel
(
multiclass_nms_compute_fpga FPGA basic SRCS multiclass_nms_compute.cc DEPS
${
fpga_deps
}
)
add_kernel
(
norm_compute_fpga FPGA basic SRCS norm_compute.cc DEPS
${
fpga_deps
}
)
# add_kernel(im2sequence_compute_fpga FPGA basic SRCS im2sequence_compute.cc DEPS ${fpga_deps})
add_kernel
(
pooling_compute_fpga FPGA basic SRCS pooling_compute.cc DEPS
${
fpga_deps
}
)
add_kernel
(
prior_box_compute_fpga FPGA basic SRCS prior_box_compute.cc DEPS
${
fpga_deps
}
)
add_kernel
(
reshape_compute_fpga FPGA basic SRCS reshape_compute.cc DEPS
${
fpga_deps
}
reshape_op
)
#
add_kernel(reshape_compute_fpga FPGA basic SRCS reshape_compute.cc DEPS ${fpga_deps} reshape_op)
# add_kernel(sequence_pool_compute_fpga FPGA basic SRCS sequence_pool_compute.cc DEPS ${fpga_deps})
add_kernel
(
scale_compute_fpga FPGA basic SRCS scale_compute.cc DEPS
${
fpga_deps
}
)
add_kernel
(
softmax_compute_fpga FPGA basic SRCS softmax_compute.cc DEPS
${
fpga_deps
}
)
add_kernel
(
transpose_compute_fpga FPGA basic SRCS transpose_compute.cc DEPS
${
fpga_deps
}
)
#
add_kernel(softmax_compute_fpga FPGA basic SRCS softmax_compute.cc DEPS ${fpga_deps})
#
add_kernel(transpose_compute_fpga FPGA basic SRCS transpose_compute.cc DEPS ${fpga_deps})
add_kernel
(
io_copy_compute_fpga FPGA basic SRCS io_copy_compute.cc DEPS
${
fpga_deps
}
)
add_kernel
(
calib_compute_fpga FPGA basic SRCS calib_compute.cc DEPS
${
fpga_deps
}
)
...
...
lite/kernels/fpga/io_copy_compute.cc
浏览文件 @
b02b1822
...
...
@@ -118,6 +118,9 @@ class IoCopyFpgaToHostCompute
param
.
y
->
ZynqTensor
()
->
flush
();
auto
out_lod
=
param
.
y
->
mutable_lod
();
*
out_lod
=
param
.
x
->
lod
();
// param.x->ZynqTensor()->saveToFile("io_x", true);
// param.y->ZynqTensor()->saveToFile("io_y", true);
}
std
::
string
doc
()
const
override
{
return
"Copy IO from FPGA to HOST"
;
}
...
...
@@ -144,6 +147,21 @@ REGISTER_LITE_KERNEL(io_copy,
DATALAYOUT
(
kAny
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
io_copy
,
kFPGA
,
kAny
,
kAny
,
paddle
::
lite
::
kernels
::
fpga
::
IoCopyHostToFpgaCompute
,
host_to_device_any_any
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
),
-
1
)})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kFPGA
),
PRECISION
(
kFP16
),
DATALAYOUT
(
kNHWC
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
io_copy
,
kFPGA
,
kAny
,
...
...
lite/kernels/fpga/reshape_compute.cc
100755 → 100644
浏览文件 @
b02b1822
...
...
@@ -46,9 +46,40 @@ void ReshapeCompute::Run() {
}
else
{
output
->
CopyDataFrom
(
*
x
);
}
param
.
x
->
ZynqTensor
()
->
saveToFile
(
"reshape_in"
,
true
);
output
->
ZynqTensor
()
->
saveToFile
(
"reshape_out"
,
true
);
output
->
Resize
(
output_dims
);
}
// void ReshapeComputeFpgaToHost::Run() {
// auto& param = Param<operators::ReshapeParam>();
// param.output->mutable_data<float>();
// auto x = param.x;
// // auto actual_shape = param.actual_shape;
// Tensor* actual_shape = nullptr; // TODO(chonwhite) change it.
// auto output = param.output;
// bool inplace = param.inplace;
// auto x_dims = x->dims();
// auto output_dims = output->dims();
// if (actual_shape) {
// auto actual_shape_dims = actual_shape->dims();
// auto* actual_shape_data = actual_shape->data<int>();
// auto shape = std::vector<int>(
// actual_shape_data, actual_shape_data +
// actual_shape_dims.production());
// output_dims = lite::operators::ValidateShape(shape, x_dims);
// output->Resize(output_dims);
// }
// if (inplace) {
// output->ShareDataWith(*x);
// } else {
// output->CopyDataFrom(*x);
// }
// output->Resize(output_dims);
// }
}
// namespace fpga
}
// namespace kernels
}
// namespace lite
...
...
lite/kernels/fpga/reshape_compute.h
浏览文件 @
b02b1822
...
...
@@ -30,6 +30,14 @@ class ReshapeCompute
virtual
~
ReshapeCompute
()
=
default
;
};
class
ReshapeComputeFpgaToHost
:
public
KernelLite
<
TARGET
(
kFPGA
),
PRECISION
(
kFP16
),
DATALAYOUT
(
kNHWC
)
>
{
public:
void
Run
()
override
;
virtual
~
ReshapeComputeFpgaToHost
()
=
default
;
};
}
// namespace fpga
}
// namespace kernels
}
// namespace lite
...
...
lite/kernels/fpga/transpose_compute.cc
100755 → 100644
浏览文件 @
b02b1822
...
...
@@ -27,6 +27,62 @@ namespace fpga {
using
float16
=
zynqmp
::
float16
;
void
transposeCompute
(
operators
::
TransposeParam
param
)
{
// copy from;
const
auto
*
input_x
=
param
.
x
;
const
auto
input_x_dims
=
input_x
->
dims
();
input_x
->
ZynqTensor
()
->
invalidate
();
input_x
->
ZynqTensor
()
->
unalignImage
();
Tensor
float_input
;
float_input
.
Resize
(
input_x_dims
);
float_input
.
mutable_data
<
float
>
();
float_input
.
ZynqTensor
()
->
copyFrom
(
input_x
->
ZynqTensor
());
// const auto* input_x_data = input_x->data<float>();
const
auto
*
input_x_data
=
float_input
.
data
<
float
>
();
// auto& param = this->Param<param_t>();
auto
*
out
=
param
.
output
;
const
auto
axis
=
param
.
axis
;
auto
*
out_data
=
out
->
mutable_data
<
float
>
();
size_t
ndim
=
axis
.
size
();
std
::
vector
<
int
>
xdim
(
ndim
);
std
::
vector
<
int
>
xstride
(
ndim
);
std
::
vector
<
int
>
xout
(
ndim
);
for
(
int
i
=
0
;
i
<
ndim
;
i
++
)
{
int
j
=
ndim
-
1
-
i
;
xdim
[
j
]
=
input_x_dims
[
axis
[
i
]];
xstride
[
j
]
=
1
;
for
(
int
k
=
axis
[
i
]
+
1
;
k
<
ndim
;
k
++
)
{
xstride
[
j
]
*=
input_x_dims
[
k
];
}
xout
[
j
]
=
xstride
[
j
]
*
xdim
[
j
];
}
auto
numel
=
input_x
->
numel
();
size_t
pind
=
0
;
std
::
vector
<
int
>
ind
(
ndim
);
for
(
int
i
=
0
;
i
<
numel
;
i
++
)
{
out_data
[
i
]
=
input_x_data
[
pind
];
ind
[
0
]
++
;
pind
+=
xstride
[
0
];
for
(
int
j
=
0
;
j
<
ndim
-
1
;
j
++
)
{
if
(
ind
[
j
]
==
xdim
[
j
])
{
ind
[
j
+
1
]
++
;
ind
[
j
]
=
0
;
pind
+=
xstride
[
j
+
1
];
pind
-=
xout
[
j
];
}
else
{
break
;
}
}
}
}
// Transpose
void
TransposeCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
...
...
@@ -40,7 +96,7 @@ void Transpose2Compute::Run() {
param
.
x
->
ZynqTensor
()
->
invalidate
();
param
.
x
->
ZynqTensor
()
->
unalignImage
();
if
(
param
.
x
->
dims
().
size
()
!=
4
)
{
// TransposeCompute<float>
(param);
transposeCompute
(
param
);
// auto out = param.Out();
// auto out_data = out->data<half>();
...
...
@@ -54,6 +110,8 @@ void Transpose2Compute::Run() {
// index++;
// }
// }
// param.output->ZynqTensor()->copyFrom(param.x->ZynqTensor());
}
else
{
param
.
x
->
ZynqTensor
()
->
saveToFile
(
"tx"
,
true
);
param
.
output
->
ZynqTensor
()
->
copyFrom
(
param
.
x
->
ZynqTensor
());
...
...
lite/tools/build_fpga.sh
浏览文件 @
b02b1822
...
...
@@ -22,10 +22,10 @@ cmake .. \
-DLITE_WITH_FPGA
=
ON
\
-DLITE_WITH_OPENMP
=
ON
\
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK
=
ON
\
-DWITH_TESTING
=
O
N
\
-DWITH_TESTING
=
O
FF
\
-DARM_TARGET_OS
=
armlinux
\
-DLITE_BUILD_EXTRA
=
ON
\
-DLITE_WITH_PROFILE
=
O
N
-DLITE_WITH_PROFILE
=
O
FF
make
-j42
cd
-
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录